From 0fe92fb4b9344e76111e88b2c95cb8e7c5cc3f85 Mon Sep 17 00:00:00 2001 From: Anhgelus Morhtuuzh Date: Sun, 15 Mar 2026 16:23:06 +0100 Subject: feat(html): parse link rel in html --- html.go | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ html_test.go | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 185 insertions(+) create mode 100644 html.go create mode 100644 html_test.go diff --git a/html.go b/html.go new file mode 100644 index 0000000..8c968f5 --- /dev/null +++ b/html.go @@ -0,0 +1,88 @@ +package human + +import ( + "net/url" + "strings" +) + +// GetURLFromHTML returns the [url.URL] extracted from the raw HTML. +// Base is the URL containing the HTML. +// +// Returns nil if nothing is found. +func GetURLFromHTML(b []byte, base *url.URL) (*url.URL, error) { + content := string(b) + i := strings.Index(content, `= 0 { + if i+6 >= len(content) { + return nil, nil + } + args := parseArgs(string([]rune(content)[i+6:])) + if args["rel"] == "human-json" { + href, ok := args["href"] + if ok { + u, err := url.Parse(href) + if err != nil { + return nil, err + } + if u.Host != "" { + return u, nil + } + path := u.Path + *u = *base + if strings.HasPrefix(path, "/") { + u.Path = path + } else { + u = u.JoinPath(path) + } + return u, nil + } + } + i = strings.IndexAny(content[i:], ` 0 { + res[key.String()] = "" + } + key.Reset() + default: + key.WriteRune(curr) + } + } else { + if value.Len() == 0 && curr == '"' { + quote = true + } else if (curr == '"' && quote) || curr == ' ' && !quote { + quote = false + sep = false + res[key.String()] = value.String() + key.Reset() + value.Reset() + } else { + value.WriteRune(curr) + } + } + i++ + } + if key.Len() > 0 { + res[key.String()] = value.String() + } + return res +} diff --git a/html_test.go b/html_test.go new file mode 100644 index 0000000..ab2bda7 --- /dev/null +++ b/html_test.go @@ -0,0 +1,97 @@ +package human + +import ( + "net/url" + "testing" +) + +func TestGetURLFromHTML(t *testing.T) { + base, _ := url.Parse(`https://example.org/foo/`) + u, err := GetURLFromHTML([]byte(``), base) + if err != nil { + t.Fatal(err) + } + if u == nil { + t.Fatal("not found") + } + if u.Path != "/human.json" { + t.Errorf("invalid path: %s", u.Path) + } + if u.Host != "example.org" { + t.Errorf("invalid host: %s", u.Host) + } + + u, err = GetURLFromHTML([]byte(``), base) + if err != nil { + t.Fatal(err) + } + if u == nil { + t.Fatal("not found") + } + if u.Path != "/human.json" { + t.Errorf("invalid path: %s", u.Path) + } + if u.Host != "example.org" { + t.Errorf("invalid host: %s", u.Host) + } + + u, err = GetURLFromHTML([]byte(``), base) + if err != nil { + t.Fatal(err) + } + if u == nil { + t.Fatal("not found") + } + if u.Path != "/foo/human.json" { + t.Errorf("invalid path: %s", u.Path) + } + if u.Host != "example.org" { + t.Errorf("invalid host: %s", u.Host) + } + + u, err = GetURLFromHTML([]byte(``), base) + if err != nil { + t.Fatal(err) + } + if u == nil { + t.Fatal("not found") + } + if u.Path != "/human.json" { + t.Errorf("invalid path: %s", u.Path) + } + if u.Host != "example.org" { + t.Errorf("invalid host: %s", u.Host) + } +} + +func TestParseArgs(t *testing.T) { + args := parseArgs(`key="hello world">`) + if args["key"] != "hello world" { + t.Errorf("invalid arg: %v", args) + } + + args = parseArgs(`key=hello>`) + if args["key"] != "hello" { + t.Errorf("invalid arg: %v", args) + } + + args = parseArgs(`key=hello world>`) + if args["key"] != "hello" { + t.Errorf("invalid arg: %v", args) + } + + args = parseArgs(`key=hello/>`) + if args["key"] != "hello" { + t.Errorf("invalid arg: %v", args) + } + + args = parseArgs(`key=hello`) + if args["key"] != "hello" { + t.Errorf("invalid arg: %v", args) + } + + args = parseArgs(`key=word foo=bar`) + if args["key"] != "word" || args["foo"] != "bar" { + t.Errorf("invalid args: %v", args) + } +} -- cgit v1.2.3