diff options
| author | Anhgelus Morhtuuzh <william@herges.fr> | 2026-03-16 13:58:21 +0100 |
|---|---|---|
| committer | Anhgelus Morhtuuzh <william@herges.fr> | 2026-03-16 13:58:21 +0100 |
| commit | a75ea9362403a4d9da13bca2e3c1a44814723f0b (patch) | |
| tree | e314efdf03a05f0e202d027d784eb2d677c8d824 | |
| parent | d9b66eb7a9a9135462e490e46244dcd1b71c7293 (diff) | |
feat(html): retrieve human from http response
| -rw-r--r-- | html.go | 58 | ||||
| -rw-r--r-- | html_test.go | 25 | ||||
| -rw-r--r-- | human.go | 2 |
3 files changed, 78 insertions, 7 deletions
@@ -1,22 +1,68 @@ package human import ( + "context" + "encoding/json" + "errors" + "io" + "net/http" "net/url" "strings" ) +var ErrUnsupportedContentType = errors.New("unsupported content type") + +// GetHumanFromHTML returns the [Human] contained in the [http.Response]. +// +// If Content-Type is unsupported, it returns [ErrUnsupportedContentType]. +// Currently, only `text/html` is supported. +// +// [http.Client] is used to retrieve the `human.json` linked in the [http.Response.Body]. +func GetHumanFromHTML(ctx context.Context, client *http.Client, resp *http.Response) (*Human, error) { + if !strings.Contains(resp.Header.Get("Content-Type"), "text/html") { + return nil, ErrUnsupportedContentType + } + b, err := io.ReadAll(resp.Body) + resp.Body.Close() + if err != nil { + return nil, err + } + url, err := GetURLFromHTML(b, resp.Request.URL) + if err != nil { + return nil, err + } + if url == nil { + return nil, nil + } + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url.String(), nil) + if err != nil { + return nil, err + } + resp, err = client.Do(req) + if err != nil { + return nil, err + } + b, err = io.ReadAll(resp.Body) + resp.Body.Close() + if err != nil { + return nil, err + } + var h Human + return &h, json.Unmarshal(b, &h) +} + // GetURLFromHTML returns the [url.URL] extracted from the raw HTML. // Base is the URL containing the HTML. // // Returns nil if nothing is found. func GetURLFromHTML(b []byte, base *url.URL) (*url.URL, error) { content := string(b) + runed := []rune(content) i := strings.Index(content, `<link `) - for i >= 0 { - if i+6 >= len(content) { - return nil, nil - } - args := parseArgs(string([]rune(content)[i+6:])) + for i >= 0 && i+6 < len(runed) { + content = string(runed[i+6:]) + runed = []rune(content) + args := parseArgs(content) if args["rel"] == "human-json" { href, ok := args["href"] if ok { @@ -37,7 +83,7 @@ func GetURLFromHTML(b []byte, base *url.URL) (*url.URL, error) { return u, nil } } - i = strings.IndexAny(content[i:], `<link `) + i = strings.Index(content, `<link `) } return nil, nil } diff --git a/html_test.go b/html_test.go index ab2bda7..b7fdf1a 100644 --- a/html_test.go +++ b/html_test.go @@ -1,10 +1,35 @@ package human import ( + "context" + "encoding/json" + "net/http" "net/url" "testing" ) +func TestGetHumanFromHTML(t *testing.T) { + client := http.DefaultClient + // using human.json author's website + resp, err := client.Get(`https://robida.net/`) + if err != nil { + t.Fatal(err) + } + t.Logf("html fetched") + h, err := GetHumanFromHTML(context.Background(), client, resp) + if err != nil { + t.Fatal(err) + } + if h == nil { + t.Fatal("human.json is nil") + } + b, err := json.Marshal(h) + if err != nil { + t.Fatal(err) + } + t.Logf("%s", b) +} + func TestGetURLFromHTML(t *testing.T) { base, _ := url.Parse(`https://example.org/foo/`) u, err := GetURLFromHTML([]byte(`<link rel=human-json href=/human.json>`), base) @@ -110,7 +110,7 @@ func (v *Vouch) MarshalJSON() ([]byte, error) { // Human represents the human.json file. type Human struct { - Version string `json:"string"` + Version string `json:"version"` URL *URL `json:"url"` Vouches []*Vouch `json:"vouches"` } |
