blob: 8c968f54e502fc4993b2e505c53cb5d6b03dfa2a (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
|
package human
import (
"net/url"
"strings"
)
// GetURLFromHTML returns the [url.URL] extracted from the raw HTML.
// Base is the URL containing the HTML.
//
// Returns nil if nothing is found.
func GetURLFromHTML(b []byte, base *url.URL) (*url.URL, error) {
content := string(b)
i := strings.Index(content, `<link `)
for i >= 0 {
if i+6 >= len(content) {
return nil, nil
}
args := parseArgs(string([]rune(content)[i+6:]))
if args["rel"] == "human-json" {
href, ok := args["href"]
if ok {
u, err := url.Parse(href)
if err != nil {
return nil, err
}
if u.Host != "" {
return u, nil
}
path := u.Path
*u = *base
if strings.HasPrefix(path, "/") {
u.Path = path
} else {
u = u.JoinPath(path)
}
return u, nil
}
}
i = strings.IndexAny(content[i:], `<link `)
}
return nil, nil
}
func parseArgs(base string) map[string]string {
content := []rune(base)
i := 0
res := map[string]string{}
var sep bool
var key strings.Builder
var value strings.Builder
var quote bool
for i < len(content) && content[i] != '>' && (i+1 == len(content) || string(content[i:i+2]) != "/>") {
curr := content[i]
if !sep {
switch curr {
case '=':
sep = true
case ' ':
sep = false
if key.Len() > 0 {
res[key.String()] = ""
}
key.Reset()
default:
key.WriteRune(curr)
}
} else {
if value.Len() == 0 && curr == '"' {
quote = true
} else if (curr == '"' && quote) || curr == ' ' && !quote {
quote = false
sep = false
res[key.String()] = value.String()
key.Reset()
value.Reset()
} else {
value.WriteRune(curr)
}
}
i++
}
if key.Len() > 0 {
res[key.String()] = value.String()
}
return res
}
|