package human
import (
"net/url"
"strings"
)
// GetURLFromHTML returns the [url.URL] extracted from the raw HTML.
// Base is the URL containing the HTML.
//
// Returns nil if nothing is found.
func GetURLFromHTML(b []byte, base *url.URL) (*url.URL, error) {
content := string(b)
i := strings.Index(content, `= 0 {
if i+6 >= len(content) {
return nil, nil
}
args := parseArgs(string([]rune(content)[i+6:]))
if args["rel"] == "human-json" {
href, ok := args["href"]
if ok {
u, err := url.Parse(href)
if err != nil {
return nil, err
}
if u.Host != "" {
return u, nil
}
path := u.Path
*u = *base
if strings.HasPrefix(path, "/") {
u.Path = path
} else {
u = u.JoinPath(path)
}
return u, nil
}
}
i = strings.IndexAny(content[i:], `' && (i+1 == len(content) || string(content[i:i+2]) != "/>") {
curr := content[i]
if !sep {
switch curr {
case '=':
sep = true
case ' ':
sep = false
if key.Len() > 0 {
res[key.String()] = ""
}
key.Reset()
default:
key.WriteRune(curr)
}
} else {
if value.Len() == 0 && curr == '"' {
quote = true
} else if (curr == '"' && quote) || curr == ' ' && !quote {
quote = false
sep = false
res[key.String()] = value.String()
key.Reset()
value.Reset()
} else {
value.WriteRune(curr)
}
}
i++
}
if key.Len() > 0 {
res[key.String()] = value.String()
}
return res
}