From 0fe92fb4b9344e76111e88b2c95cb8e7c5cc3f85 Mon Sep 17 00:00:00 2001 From: Anhgelus Morhtuuzh Date: Sun, 15 Mar 2026 16:23:06 +0100 Subject: feat(html): parse link rel in html --- html.go | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 html.go (limited to 'html.go') diff --git a/html.go b/html.go new file mode 100644 index 0000000..8c968f5 --- /dev/null +++ b/html.go @@ -0,0 +1,88 @@ +package human + +import ( + "net/url" + "strings" +) + +// GetURLFromHTML returns the [url.URL] extracted from the raw HTML. +// Base is the URL containing the HTML. +// +// Returns nil if nothing is found. +func GetURLFromHTML(b []byte, base *url.URL) (*url.URL, error) { + content := string(b) + i := strings.Index(content, `= 0 { + if i+6 >= len(content) { + return nil, nil + } + args := parseArgs(string([]rune(content)[i+6:])) + if args["rel"] == "human-json" { + href, ok := args["href"] + if ok { + u, err := url.Parse(href) + if err != nil { + return nil, err + } + if u.Host != "" { + return u, nil + } + path := u.Path + *u = *base + if strings.HasPrefix(path, "/") { + u.Path = path + } else { + u = u.JoinPath(path) + } + return u, nil + } + } + i = strings.IndexAny(content[i:], ` 0 { + res[key.String()] = "" + } + key.Reset() + default: + key.WriteRune(curr) + } + } else { + if value.Len() == 0 && curr == '"' { + quote = true + } else if (curr == '"' && quote) || curr == ' ' && !quote { + quote = false + sep = false + res[key.String()] = value.String() + key.Reset() + value.Reset() + } else { + value.WriteRune(curr) + } + } + i++ + } + if key.Len() > 0 { + res[key.String()] = value.String() + } + return res +} -- cgit v1.2.3