potisanのプログラミングメモ

プログラミング素人です。昔の自分を育ててくれたネット情報に少しでも貢献できるよう、情報を貯めていこうと思っています。Windows環境のC++やC#がメインです。

C++20&Win API&WIL HTTPSプロトコルのコンテンツを取得する

WinINet APIを使用してHTTPSプロトコルのコンテンツ(https://~)を取得するコードです。

#include <string>
#include <span>
#include <optional>
#include <vector>

#define STRICT
#define NOMINMAX
#include <Windows.h>
#pragma comment(lib, "wininet.lib")
#include <wininet.h>

#include "wil/resource.h" // wil::unique_hinternet

// URL_COMPONENTSWを簡単に扱うためのクラス。
template <DWORD BUFFER_SIZE = 256>
class URLComponentsW
{
public:
    URLComponentsW(std::wstring_view url, DWORD flags = 0)
    {
        URL_COMPONENTSW urlComps{ sizeof(URL_COMPONENTSW) };
        m_scheme.resize(BUFFER_SIZE);
        m_hostname.resize(BUFFER_SIZE);
        m_username.resize(BUFFER_SIZE);
        m_password.resize(BUFFER_SIZE);
        m_path.resize(BUFFER_SIZE);
        m_extrainfo.resize(BUFFER_SIZE);
        urlComps.lpszScheme = m_scheme.data();
        urlComps.dwSchemeLength = BUFFER_SIZE;
        urlComps.lpszHostName = m_hostname.data();
        urlComps.dwHostNameLength = BUFFER_SIZE;
        urlComps.lpszUserName = m_username.data();
        urlComps.dwUserNameLength = BUFFER_SIZE;
        urlComps.lpszPassword = m_password.data();
        urlComps.dwPasswordLength = BUFFER_SIZE;
        urlComps.lpszUrlPath = m_path.data();
        urlComps.dwUrlPathLength = BUFFER_SIZE;
        urlComps.lpszExtraInfo = m_extrainfo.data();
        urlComps.dwExtraInfoLength = BUFFER_SIZE;
        if (!::InternetCrackUrlW(url.data(), 0, flags, &urlComps))
        {
            m_nscheme = INTERNET_SCHEME_UNKNOWN;
            m_nport = 0;
            m_scheme.clear();
            m_hostname.clear();
            m_username.clear();
            m_password.clear();
            m_path.clear();
            m_extrainfo.clear();
            return;
        }
        m_nscheme = urlComps.nScheme;
        m_nport = urlComps.nPort;

        m_scheme.resize(urlComps.dwSchemeLength);
        m_hostname.resize(urlComps.dwHostNameLength);
        m_username.resize(urlComps.dwUserNameLength);
        m_password.resize(urlComps.dwPasswordLength);
        m_path.resize(urlComps.dwUrlPathLength);
        m_extrainfo.resize(urlComps.dwExtraInfoLength);
        m_scheme.shrink_to_fit();
        m_hostname.shrink_to_fit();
        m_username.shrink_to_fit();
        m_password.shrink_to_fit();
        m_path.shrink_to_fit();
        m_extrainfo.shrink_to_fit();
    }

    constexpr INTERNET_SCHEME nscheme() const noexcept { return m_nscheme; }
    constexpr INTERNET_PORT nport() const noexcept { return m_nport; }
    constexpr const std::wstring& scheme() const noexcept { return m_scheme; }
    constexpr const std::wstring& hostname() const noexcept { return m_hostname; }
    constexpr const std::wstring& username() const noexcept { return m_username; }
    constexpr const std::wstring& password() const noexcept { return m_password; }
    constexpr const std::wstring& path() const noexcept { return m_path; }
    constexpr const std::wstring& extrainfo() const noexcept { return m_extrainfo; }

    constexpr bool empty() const noexcept
    {
        return m_nscheme == INTERNET_SCHEME_UNKNOWN && m_nport == 0
            && m_scheme.empty() && m_hostname.empty() && m_username.empty()
            && m_password.empty() && m_path.empty() && m_extrainfo.empty();
    }

private:
    INTERNET_SCHEME m_nscheme;
    INTERNET_PORT m_nport;
    std::wstring m_scheme;
    std::wstring m_hostname;
    std::wstring m_username;
    std::wstring m_password;
    std::wstring m_path;
    std::wstring m_extrainfo;
};

template <DWORD BUFFER_LEN = 1024>
std::wstring HttpQueryInfoW(HINTERNET hinternet, DWORD infoLevel)
{
    std::wstring data;
    DWORD index = 0;
    std::wstring buffer(BUFFER_LEN, 0);
    for (;;)
    {
        DWORD bufferLen = BUFFER_LEN;
        auto f = ::HttpQueryInfoW(hinternet, infoLevel, buffer.data(), &bufferLen, &index);
        if (f)
            buffer.resize(bufferLen);
        else if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)
            return {};
        data.append(buffer);
        if (f) break;
    }
    return data;
}

std::optional<DWORD> HttpQueryInfoAsNumber(HINTERNET hinternet, DWORD infoLevel, LPDWORD index = nullptr)
{
    DWORD dw;
    DWORD bufferLen = sizeof(DWORD);
    if (!::HttpQueryInfoW(hinternet, infoLevel | HTTP_QUERY_FLAG_NUMBER, &dw, &bufferLen, index))
        return {};
    return dw;
}

template <DWORD BUFFER_SIZE = 1024>
std::vector<BYTE> InternetReadAllFile(HINTERNET hfile)
{
    std::vector<BYTE> data;
    for (;;)
    {
        std::vector<BYTE> buffer(BUFFER_SIZE, 0);
        DWORD bytesRead = 0;
        if (!InternetReadFile(hfile, buffer.data(), BUFFER_SIZE, &bytesRead))
            return {};
        if (bytesRead == 0) break;
        data.insert(data.cend(), buffer.cbegin(), buffer.cend());
    }
    data.shrink_to_fit();
    return data;
}

bool FetchHttpsResponseHeaderAndContent(
    HINTERNET hinternet,
    std::wstring_view url,
    std::wstring& responseHeader,
    std::vector<BYTE>& content)
{
    responseHeader.clear();
    content.clear();

    // URLの分解
    URLComponentsW urlComp(url);
    if (urlComp.empty() || urlComp.nscheme() != INTERNET_SCHEME_HTTPS)
        return false;

    // 接続の確立
    wil::unique_hinternet hconnect;
    hconnect.reset(::InternetConnectW(hinternet, urlComp.hostname().data(),
        INTERNET_DEFAULT_HTTPS_PORT, urlComp.username().data(), urlComp.password().data(), INTERNET_SERVICE_HTTP,
        INTERNET_FLAG_RELOAD              // キャッシュの無視
        | INTERNET_FLAG_NO_AUTO_REDIRECT, // リダイレクトの無効化
        0));
    if (!hconnect) return {};

    // リクエストの作成
    wil::unique_hinternet hrequest;
    hrequest.reset(::HttpOpenRequestW(hconnect.get(), L"GET",
        (urlComp.path() + urlComp.extrainfo()).data(), nullptr, nullptr, nullptr, INTERNET_FLAG_SECURE, 0));
    if (!hrequest) return {};

    // リクエストの送信
    if (!::HttpSendRequestW(hrequest.get(), nullptr, 0, nullptr, 0))
        return false;

    // ステータスコードの確認
    auto statusCode = HttpQueryInfoAsNumber(hrequest.get(), HTTP_QUERY_STATUS_CODE);
    if (statusCode != HTTP_STATUS_OK)
        return false;

    responseHeader = HttpQueryInfoW(hrequest.get(), HTTP_QUERY_RAW_HEADERS_CRLF);
    content = InternetReadAllFile(hrequest.get());

    return true;
}

std::optional<std::wstring> ConvUTF8ToUTF16(std::span<BYTE> buffer)
{
    if (buffer.size() > INT_MAX) return {};
    auto len = ::MultiByteToWideChar(CP_UTF8, 0, std::bit_cast<LPCCH>(buffer.data()), buffer.size(), nullptr, 0);
    if (len == 0 && ::GetLastError() != 0)
        return {};
    std::wstring s(len, L'\0');
    if (!::MultiByteToWideChar(CP_UTF8, 0, std::bit_cast<LPCCH>(buffer.data()), buffer.size(), s.data(), len))
        return {};
    return s;
}

int main()
{
    // 適当なHTTPSプロトコルのURLを指定します。
    const auto url = L<"https://~">;

    // Windows Internet APIの初期化
    wil::unique_hinternet hinternet;
    hinternet.reset(::InternetOpenW(nullptr, INTERNET_OPEN_TYPE_PRECONFIG, nullptr, nullptr, 0));
    if (!hinternet) return -1;

    std::wstring responseHeader;
    std::vector<BYTE> content;
    if (!FetchHttpsResponseHeaderAndContent(hinternet.get(), url, responseHeader, content))
        return - 1;

    // UTF-8エンコーディングと決め打ちして変換
    auto html = ConvUTF8ToUTF16(content);

    return 0;
}