potisanのプログラミングメモ

趣味のプログラマーがプログラミング関係で気になったことや調べたことをいつでも忘れられるようにメモするブログです。はてなブログ無料版なので記事の上の方はたぶん広告です。記事中にも広告挿入されるみたいです。

C++20&Win API&WIL HTTPSプロトコルのコンテンツを取得する

WinINet APIを使用してHTTPSプロトコルのコンテンツ(https://~)を取得するコードです。

#include <string>
#include <span>
#include <optional>
#include <vector>

#define STRICT
#define NOMINMAX
#include <Windows.h>
#pragma comment(lib, "wininet.lib")
#include <wininet.h>

#include "wil/resource.h" // wil::unique_hinternet

// URL_COMPONENTSWを簡単に扱うためのクラス。
template <DWORD BUFFER_SIZE = 256>
class URLComponentsW
{
public:
    URLComponentsW(std::wstring_view url, DWORD flags = 0)
    {
        URL_COMPONENTSW urlComps{ sizeof(URL_COMPONENTSW) };
        m_scheme.resize(BUFFER_SIZE);
        m_hostname.resize(BUFFER_SIZE);
        m_username.resize(BUFFER_SIZE);
        m_password.resize(BUFFER_SIZE);
        m_path.resize(BUFFER_SIZE);
        m_extrainfo.resize(BUFFER_SIZE);
        urlComps.lpszScheme = m_scheme.data();
        urlComps.dwSchemeLength = BUFFER_SIZE;
        urlComps.lpszHostName = m_hostname.data();
        urlComps.dwHostNameLength = BUFFER_SIZE;
        urlComps.lpszUserName = m_username.data();
        urlComps.dwUserNameLength = BUFFER_SIZE;
        urlComps.lpszPassword = m_password.data();
        urlComps.dwPasswordLength = BUFFER_SIZE;
        urlComps.lpszUrlPath = m_path.data();
        urlComps.dwUrlPathLength = BUFFER_SIZE;
        urlComps.lpszExtraInfo = m_extrainfo.data();
        urlComps.dwExtraInfoLength = BUFFER_SIZE;
        if (!::InternetCrackUrlW(url.data(), 0, flags, &urlComps))
        {
            m_nscheme = INTERNET_SCHEME_UNKNOWN;
            m_nport = 0;
            m_scheme.clear();
            m_hostname.clear();
            m_username.clear();
            m_password.clear();
            m_path.clear();
            m_extrainfo.clear();
            return;
        }
        m_nscheme = urlComps.nScheme;
        m_nport = urlComps.nPort;

        m_scheme.resize(urlComps.dwSchemeLength);
        m_hostname.resize(urlComps.dwHostNameLength);
        m_username.resize(urlComps.dwUserNameLength);
        m_password.resize(urlComps.dwPasswordLength);
        m_path.resize(urlComps.dwUrlPathLength);
        m_extrainfo.resize(urlComps.dwExtraInfoLength);
        m_scheme.shrink_to_fit();
        m_hostname.shrink_to_fit();
        m_username.shrink_to_fit();
        m_password.shrink_to_fit();
        m_path.shrink_to_fit();
        m_extrainfo.shrink_to_fit();
    }

    constexpr INTERNET_SCHEME nscheme() const noexcept { return m_nscheme; }
    constexpr INTERNET_PORT nport() const noexcept { return m_nport; }
    constexpr const std::wstring& scheme() const noexcept { return m_scheme; }
    constexpr const std::wstring& hostname() const noexcept { return m_hostname; }
    constexpr const std::wstring& username() const noexcept { return m_username; }
    constexpr const std::wstring& password() const noexcept { return m_password; }
    constexpr const std::wstring& path() const noexcept { return m_path; }
    constexpr const std::wstring& extrainfo() const noexcept { return m_extrainfo; }

    constexpr bool empty() const noexcept
    {
        return m_nscheme == INTERNET_SCHEME_UNKNOWN && m_nport == 0
            && m_scheme.empty() && m_hostname.empty() && m_username.empty()
            && m_password.empty() && m_path.empty() && m_extrainfo.empty();
    }

private:
    INTERNET_SCHEME m_nscheme;
    INTERNET_PORT m_nport;
    std::wstring m_scheme;
    std::wstring m_hostname;
    std::wstring m_username;
    std::wstring m_password;
    std::wstring m_path;
    std::wstring m_extrainfo;
};

template <DWORD BUFFER_LEN = 1024>
std::wstring HttpQueryInfoW(HINTERNET hinternet, DWORD infoLevel)
{
    std::wstring data;
    DWORD index = 0;
    std::wstring buffer(BUFFER_LEN, 0);
    for (;;)
    {
        DWORD bufferLen = BUFFER_LEN;
        auto f = ::HttpQueryInfoW(hinternet, infoLevel, buffer.data(), &bufferLen, &index);
        if (f)
            buffer.resize(bufferLen);
        else if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)
            return {};
        data.append(buffer);
        if (f) break;
    }
    return data;
}

std::optional<DWORD> HttpQueryInfoAsNumber(HINTERNET hinternet, DWORD infoLevel, LPDWORD index = nullptr)
{
    DWORD dw;
    DWORD bufferLen = sizeof(DWORD);
    if (!::HttpQueryInfoW(hinternet, infoLevel | HTTP_QUERY_FLAG_NUMBER, &dw, &bufferLen, index))
        return {};
    return dw;
}

template <DWORD BUFFER_SIZE = 1024>
std::vector<BYTE> InternetReadAllFile(HINTERNET hfile)
{
    std::vector<BYTE> data;
    for (;;)
    {
        std::vector<BYTE> buffer(BUFFER_SIZE, 0);
        DWORD bytesRead = 0;
        if (!InternetReadFile(hfile, buffer.data(), BUFFER_SIZE, &bytesRead))
            return {};
        if (bytesRead == 0) break;
        data.insert(data.cend(), buffer.cbegin(), buffer.cend());
    }
    data.shrink_to_fit();
    return data;
}

bool FetchHttpsResponseHeaderAndContent(
    HINTERNET hinternet,
    std::wstring_view url,
    std::wstring& responseHeader,
    std::vector<BYTE>& content)
{
    responseHeader.clear();
    content.clear();

    // URLの分解
    URLComponentsW urlComp(url);
    if (urlComp.empty() || urlComp.nscheme() != INTERNET_SCHEME_HTTPS)
        return false;

    // 接続の確立
    wil::unique_hinternet hconnect;
    hconnect.reset(::InternetConnectW(hinternet, urlComp.hostname().data(),
        INTERNET_DEFAULT_HTTPS_PORT, urlComp.username().data(), urlComp.password().data(), INTERNET_SERVICE_HTTP,
        INTERNET_FLAG_RELOAD              // キャッシュの無視
        | INTERNET_FLAG_NO_AUTO_REDIRECT, // リダイレクトの無効化
        0));
    if (!hconnect) return {};

    // リクエストの作成
    wil::unique_hinternet hrequest;
    hrequest.reset(::HttpOpenRequestW(hconnect.get(), L"GET",
        (urlComp.path() + urlComp.extrainfo()).data(), nullptr, nullptr, nullptr, INTERNET_FLAG_SECURE, 0));
    if (!hrequest) return {};

    // リクエストの送信
    if (!::HttpSendRequestW(hrequest.get(), nullptr, 0, nullptr, 0))
        return false;

    // ステータスコードの確認
    auto statusCode = HttpQueryInfoAsNumber(hrequest.get(), HTTP_QUERY_STATUS_CODE);
    if (statusCode != HTTP_STATUS_OK)
        return false;

    responseHeader = HttpQueryInfoW(hrequest.get(), HTTP_QUERY_RAW_HEADERS_CRLF);
    content = InternetReadAllFile(hrequest.get());

    return true;
}

std::optional<std::wstring> ConvUTF8ToUTF16(std::span<BYTE> buffer)
{
    if (buffer.size() > INT_MAX) return {};
    auto len = ::MultiByteToWideChar(CP_UTF8, 0, std::bit_cast<LPCCH>(buffer.data()), buffer.size(), nullptr, 0);
    if (len == 0 && ::GetLastError() != 0)
        return {};
    std::wstring s(len, L'\0');
    if (!::MultiByteToWideChar(CP_UTF8, 0, std::bit_cast<LPCCH>(buffer.data()), buffer.size(), s.data(), len))
        return {};
    return s;
}

int main()
{
    // 適当なHTTPSプロトコルのURLを指定します。
    const auto url = L<"https://~">;

    // Windows Internet APIの初期化
    wil::unique_hinternet hinternet;
    hinternet.reset(::InternetOpenW(nullptr, INTERNET_OPEN_TYPE_PRECONFIG, nullptr, nullptr, 0));
    if (!hinternet) return -1;

    std::wstring responseHeader;
    std::vector<BYTE> content;
    if (!FetchHttpsResponseHeaderAndContent(hinternet.get(), url, responseHeader, content))
        return - 1;

    // UTF-8エンコーディングと決め打ちして変換
    auto html = ConvUTF8ToUTF16(content);

    return 0;
}