From a75ea9362403a4d9da13bca2e3c1a44814723f0b Mon Sep 17 00:00:00 2001 From: Anhgelus Morhtuuzh Date: Mon, 16 Mar 2026 13:58:21 +0100 Subject: feat(html): retrieve human from http response --- html.go | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 52 insertions(+), 6 deletions(-) (limited to 'html.go') diff --git a/html.go b/html.go index 8c968f5..52460ee 100644 --- a/html.go +++ b/html.go @@ -1,22 +1,68 @@ package human import ( + "context" + "encoding/json" + "errors" + "io" + "net/http" "net/url" "strings" ) +var ErrUnsupportedContentType = errors.New("unsupported content type") + +// GetHumanFromHTML returns the [Human] contained in the [http.Response]. +// +// If Content-Type is unsupported, it returns [ErrUnsupportedContentType]. +// Currently, only `text/html` is supported. +// +// [http.Client] is used to retrieve the `human.json` linked in the [http.Response.Body]. +func GetHumanFromHTML(ctx context.Context, client *http.Client, resp *http.Response) (*Human, error) { + if !strings.Contains(resp.Header.Get("Content-Type"), "text/html") { + return nil, ErrUnsupportedContentType + } + b, err := io.ReadAll(resp.Body) + resp.Body.Close() + if err != nil { + return nil, err + } + url, err := GetURLFromHTML(b, resp.Request.URL) + if err != nil { + return nil, err + } + if url == nil { + return nil, nil + } + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url.String(), nil) + if err != nil { + return nil, err + } + resp, err = client.Do(req) + if err != nil { + return nil, err + } + b, err = io.ReadAll(resp.Body) + resp.Body.Close() + if err != nil { + return nil, err + } + var h Human + return &h, json.Unmarshal(b, &h) +} + // GetURLFromHTML returns the [url.URL] extracted from the raw HTML. // Base is the URL containing the HTML. // // Returns nil if nothing is found. func GetURLFromHTML(b []byte, base *url.URL) (*url.URL, error) { content := string(b) + runed := []rune(content) i := strings.Index(content, `= 0 { - if i+6 >= len(content) { - return nil, nil - } - args := parseArgs(string([]rune(content)[i+6:])) + for i >= 0 && i+6 < len(runed) { + content = string(runed[i+6:]) + runed = []rune(content) + args := parseArgs(content) if args["rel"] == "human-json" { href, ok := args["href"] if ok { @@ -37,7 +83,7 @@ func GetURLFromHTML(b []byte, base *url.URL) (*url.URL, error) { return u, nil } } - i = strings.IndexAny(content[i:], `