aboutsummaryrefslogtreecommitdiff
path: root/html.go
diff options
context:
space:
mode:
authorAnhgelus Morhtuuzh <william@herges.fr>2026-03-15 16:23:06 +0100
committerAnhgelus Morhtuuzh <william@herges.fr>2026-03-15 16:23:06 +0100
commit0fe92fb4b9344e76111e88b2c95cb8e7c5cc3f85 (patch)
tree943ff5babd5e0c047f86fc55bb21f77ac0afc5ef /html.go
parent15cf6bb73408f9568c335be894d34372b5589a1d (diff)
feat(html): parse link rel in html
Diffstat (limited to 'html.go')
-rw-r--r--html.go88
1 files changed, 88 insertions, 0 deletions
diff --git a/html.go b/html.go
new file mode 100644
index 0000000..8c968f5
--- /dev/null
+++ b/html.go
@@ -0,0 +1,88 @@
+package human
+
+import (
+ "net/url"
+ "strings"
+)
+
+// GetURLFromHTML returns the [url.URL] extracted from the raw HTML.
+// Base is the URL containing the HTML.
+//
+// Returns nil if nothing is found.
+func GetURLFromHTML(b []byte, base *url.URL) (*url.URL, error) {
+ content := string(b)
+ i := strings.Index(content, `<link `)
+ for i >= 0 {
+ if i+6 >= len(content) {
+ return nil, nil
+ }
+ args := parseArgs(string([]rune(content)[i+6:]))
+ if args["rel"] == "human-json" {
+ href, ok := args["href"]
+ if ok {
+ u, err := url.Parse(href)
+ if err != nil {
+ return nil, err
+ }
+ if u.Host != "" {
+ return u, nil
+ }
+ path := u.Path
+ *u = *base
+ if strings.HasPrefix(path, "/") {
+ u.Path = path
+ } else {
+ u = u.JoinPath(path)
+ }
+ return u, nil
+ }
+ }
+ i = strings.IndexAny(content[i:], `<link `)
+ }
+ return nil, nil
+}
+
+func parseArgs(base string) map[string]string {
+ content := []rune(base)
+ i := 0
+ res := map[string]string{}
+
+ var sep bool
+ var key strings.Builder
+ var value strings.Builder
+ var quote bool
+ for i < len(content) && content[i] != '>' && (i+1 == len(content) || string(content[i:i+2]) != "/>") {
+ curr := content[i]
+ if !sep {
+ switch curr {
+ case '=':
+ sep = true
+ case ' ':
+ sep = false
+ if key.Len() > 0 {
+ res[key.String()] = ""
+ }
+ key.Reset()
+ default:
+ key.WriteRune(curr)
+ }
+ } else {
+ if value.Len() == 0 && curr == '"' {
+ quote = true
+ } else if (curr == '"' && quote) || curr == ' ' && !quote {
+ quote = false
+ sep = false
+ res[key.String()] = value.String()
+ key.Reset()
+ value.Reset()
+ } else {
+ value.WriteRune(curr)
+ }
+ }
+ i++
+ }
+ if key.Len() > 0 {
+ res[key.String()] = value.String()
+ }
+ return res
+}