aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnhgelus Morhtuuzh <william@herges.fr>2026-03-15 16:23:06 +0100
committerAnhgelus Morhtuuzh <william@herges.fr>2026-03-15 16:23:06 +0100
commit0fe92fb4b9344e76111e88b2c95cb8e7c5cc3f85 (patch)
tree943ff5babd5e0c047f86fc55bb21f77ac0afc5ef
parent15cf6bb73408f9568c335be894d34372b5589a1d (diff)
feat(html): parse link rel in html
-rw-r--r--html.go88
-rw-r--r--html_test.go97
2 files changed, 185 insertions, 0 deletions
diff --git a/html.go b/html.go
new file mode 100644
index 0000000..8c968f5
--- /dev/null
+++ b/html.go
@@ -0,0 +1,88 @@
+package human
+
+import (
+ "net/url"
+ "strings"
+)
+
+// GetURLFromHTML returns the [url.URL] extracted from the raw HTML.
+// Base is the URL containing the HTML.
+//
+// Returns nil if nothing is found.
+func GetURLFromHTML(b []byte, base *url.URL) (*url.URL, error) {
+ content := string(b)
+ i := strings.Index(content, `<link `)
+ for i >= 0 {
+ if i+6 >= len(content) {
+ return nil, nil
+ }
+ args := parseArgs(string([]rune(content)[i+6:]))
+ if args["rel"] == "human-json" {
+ href, ok := args["href"]
+ if ok {
+ u, err := url.Parse(href)
+ if err != nil {
+ return nil, err
+ }
+ if u.Host != "" {
+ return u, nil
+ }
+ path := u.Path
+ *u = *base
+ if strings.HasPrefix(path, "/") {
+ u.Path = path
+ } else {
+ u = u.JoinPath(path)
+ }
+ return u, nil
+ }
+ }
+ i = strings.IndexAny(content[i:], `<link `)
+ }
+ return nil, nil
+}
+
+func parseArgs(base string) map[string]string {
+ content := []rune(base)
+ i := 0
+ res := map[string]string{}
+
+ var sep bool
+ var key strings.Builder
+ var value strings.Builder
+ var quote bool
+ for i < len(content) && content[i] != '>' && (i+1 == len(content) || string(content[i:i+2]) != "/>") {
+ curr := content[i]
+ if !sep {
+ switch curr {
+ case '=':
+ sep = true
+ case ' ':
+ sep = false
+ if key.Len() > 0 {
+ res[key.String()] = ""
+ }
+ key.Reset()
+ default:
+ key.WriteRune(curr)
+ }
+ } else {
+ if value.Len() == 0 && curr == '"' {
+ quote = true
+ } else if (curr == '"' && quote) || curr == ' ' && !quote {
+ quote = false
+ sep = false
+ res[key.String()] = value.String()
+ key.Reset()
+ value.Reset()
+ } else {
+ value.WriteRune(curr)
+ }
+ }
+ i++
+ }
+ if key.Len() > 0 {
+ res[key.String()] = value.String()
+ }
+ return res
+}
diff --git a/html_test.go b/html_test.go
new file mode 100644
index 0000000..ab2bda7
--- /dev/null
+++ b/html_test.go
@@ -0,0 +1,97 @@
+package human
+
+import (
+ "net/url"
+ "testing"
+)
+
+func TestGetURLFromHTML(t *testing.T) {
+ base, _ := url.Parse(`https://example.org/foo/`)
+ u, err := GetURLFromHTML([]byte(`<link rel=human-json href=/human.json>`), base)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if u == nil {
+ t.Fatal("not found")
+ }
+ if u.Path != "/human.json" {
+ t.Errorf("invalid path: %s", u.Path)
+ }
+ if u.Host != "example.org" {
+ t.Errorf("invalid host: %s", u.Host)
+ }
+
+ u, err = GetURLFromHTML([]byte(`<link rel="human-json" href="/human.json">`), base)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if u == nil {
+ t.Fatal("not found")
+ }
+ if u.Path != "/human.json" {
+ t.Errorf("invalid path: %s", u.Path)
+ }
+ if u.Host != "example.org" {
+ t.Errorf("invalid host: %s", u.Host)
+ }
+
+ u, err = GetURLFromHTML([]byte(`<link rel="human-json" href="human.json">`), base)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if u == nil {
+ t.Fatal("not found")
+ }
+ if u.Path != "/foo/human.json" {
+ t.Errorf("invalid path: %s", u.Path)
+ }
+ if u.Host != "example.org" {
+ t.Errorf("invalid host: %s", u.Host)
+ }
+
+ u, err = GetURLFromHTML([]byte(`<link rel="human-json" href="../human.json">`), base)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if u == nil {
+ t.Fatal("not found")
+ }
+ if u.Path != "/human.json" {
+ t.Errorf("invalid path: %s", u.Path)
+ }
+ if u.Host != "example.org" {
+ t.Errorf("invalid host: %s", u.Host)
+ }
+}
+
+func TestParseArgs(t *testing.T) {
+ args := parseArgs(`key="hello world">`)
+ if args["key"] != "hello world" {
+ t.Errorf("invalid arg: %v", args)
+ }
+
+ args = parseArgs(`key=hello>`)
+ if args["key"] != "hello" {
+ t.Errorf("invalid arg: %v", args)
+ }
+
+ args = parseArgs(`key=hello world>`)
+ if args["key"] != "hello" {
+ t.Errorf("invalid arg: %v", args)
+ }
+
+ args = parseArgs(`key=hello/>`)
+ if args["key"] != "hello" {
+ t.Errorf("invalid arg: %v", args)
+ }
+
+ args = parseArgs(`key=hello`)
+ if args["key"] != "hello" {
+ t.Errorf("invalid arg: %v", args)
+ }
+
+ args = parseArgs(`key=word foo=bar`)
+ if args["key"] != "word" || args["foo"] != "bar" {
+ t.Errorf("invalid args: %v", args)
+ }
+}