aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--html.go58
-rw-r--r--html_test.go25
-rw-r--r--human.go2
3 files changed, 78 insertions, 7 deletions
diff --git a/html.go b/html.go
index 8c968f5..52460ee 100644
--- a/html.go
+++ b/html.go
@@ -1,22 +1,68 @@
package human
import (
+ "context"
+ "encoding/json"
+ "errors"
+ "io"
+ "net/http"
"net/url"
"strings"
)
+var ErrUnsupportedContentType = errors.New("unsupported content type")
+
+// GetHumanFromHTML returns the [Human] contained in the [http.Response].
+//
+// If Content-Type is unsupported, it returns [ErrUnsupportedContentType].
+// Currently, only `text/html` is supported.
+//
+// [http.Client] is used to retrieve the `human.json` linked in the [http.Response.Body].
+func GetHumanFromHTML(ctx context.Context, client *http.Client, resp *http.Response) (*Human, error) {
+ if !strings.Contains(resp.Header.Get("Content-Type"), "text/html") {
+ return nil, ErrUnsupportedContentType
+ }
+ b, err := io.ReadAll(resp.Body)
+ resp.Body.Close()
+ if err != nil {
+ return nil, err
+ }
+ url, err := GetURLFromHTML(b, resp.Request.URL)
+ if err != nil {
+ return nil, err
+ }
+ if url == nil {
+ return nil, nil
+ }
+ req, err := http.NewRequestWithContext(ctx, http.MethodGet, url.String(), nil)
+ if err != nil {
+ return nil, err
+ }
+ resp, err = client.Do(req)
+ if err != nil {
+ return nil, err
+ }
+ b, err = io.ReadAll(resp.Body)
+ resp.Body.Close()
+ if err != nil {
+ return nil, err
+ }
+ var h Human
+ return &h, json.Unmarshal(b, &h)
+}
+
// GetURLFromHTML returns the [url.URL] extracted from the raw HTML.
// Base is the URL containing the HTML.
//
// Returns nil if nothing is found.
func GetURLFromHTML(b []byte, base *url.URL) (*url.URL, error) {
content := string(b)
+ runed := []rune(content)
i := strings.Index(content, `<link `)
- for i >= 0 {
- if i+6 >= len(content) {
- return nil, nil
- }
- args := parseArgs(string([]rune(content)[i+6:]))
+ for i >= 0 && i+6 < len(runed) {
+ content = string(runed[i+6:])
+ runed = []rune(content)
+ args := parseArgs(content)
if args["rel"] == "human-json" {
href, ok := args["href"]
if ok {
@@ -37,7 +83,7 @@ func GetURLFromHTML(b []byte, base *url.URL) (*url.URL, error) {
return u, nil
}
}
- i = strings.IndexAny(content[i:], `<link `)
+ i = strings.Index(content, `<link `)
}
return nil, nil
}
diff --git a/html_test.go b/html_test.go
index ab2bda7..b7fdf1a 100644
--- a/html_test.go
+++ b/html_test.go
@@ -1,10 +1,35 @@
package human
import (
+ "context"
+ "encoding/json"
+ "net/http"
"net/url"
"testing"
)
+func TestGetHumanFromHTML(t *testing.T) {
+ client := http.DefaultClient
+ // using human.json author's website
+ resp, err := client.Get(`https://robida.net/`)
+ if err != nil {
+ t.Fatal(err)
+ }
+ t.Logf("html fetched")
+ h, err := GetHumanFromHTML(context.Background(), client, resp)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if h == nil {
+ t.Fatal("human.json is nil")
+ }
+ b, err := json.Marshal(h)
+ if err != nil {
+ t.Fatal(err)
+ }
+ t.Logf("%s", b)
+}
+
func TestGetURLFromHTML(t *testing.T) {
base, _ := url.Parse(`https://example.org/foo/`)
u, err := GetURLFromHTML([]byte(`<link rel=human-json href=/human.json>`), base)
diff --git a/human.go b/human.go
index 796d970..2063261 100644
--- a/human.go
+++ b/human.go
@@ -110,7 +110,7 @@ func (v *Vouch) MarshalJSON() ([]byte, error) {
// Human represents the human.json file.
type Human struct {
- Version string `json:"string"`
+ Version string `json:"version"`
URL *URL `json:"url"`
Vouches []*Vouch `json:"vouches"`
}