1 // Copyright (c) 2015, Daniel Martí <mvdan@mvdan.cc>
2 // See LICENSE for licensing information
12 type testCase struct {
17 func wantStr(in string, want interface{}) string {
18 switch x := want.(type) {
29 func doTest(t *testing.T, name string, re *regexp.Regexp, cases []testCase) {
30 for i, c := range cases {
31 t.Run(fmt.Sprintf("%s/%03d", name, i), func(t *testing.T) {
32 want := wantStr(c.in, c.want)
33 for _, surround := range []string{"", "\n"} {
34 in := surround + c.in + surround
35 got := re.FindString(in)
37 t.Errorf(`FindString(%q) got %q, want %q`, in, got, want)
44 var constantTestCases = []testCase{
70 {`foorandom:bar`, nil},
71 {`foo.randombar`, nil},
86 {`https://localhost`, true},
90 {`xmpp:foo@bar`, true},
91 {`bitcoin:Addr23?amount=1&message=foo`, true},
92 {`http://foo.com`, true},
93 {`http://foo.co.uk`, true},
94 {`http://foo.random`, true},
95 {` http://foo.com/bar `, `http://foo.com/bar`},
96 {` http://foo.com/bar more`, `http://foo.com/bar`},
97 {`<http://foo.com/bar>`, `http://foo.com/bar`},
98 {`<http://foo.com/bar>more`, `http://foo.com/bar`},
99 {`.http://foo.com/bar.`, `http://foo.com/bar`},
100 {`.http://foo.com/bar.more`, `http://foo.com/bar.more`},
101 {`,http://foo.com/bar,`, `http://foo.com/bar`},
102 {`,http://foo.com/bar,more`, `http://foo.com/bar,more`},
103 {`*http://foo.com/bar*`, `http://foo.com/bar`},
104 {`*http://foo.com/bar*more`, `http://foo.com/bar*more`},
105 {`_http://foo.com/bar_`, `http://foo.com/bar_`},
106 {`_http://foo.com/bar_more`, `http://foo.com/bar_more`},
107 {`(http://foo.com/bar)`, `http://foo.com/bar`},
108 {`(http://foo.com/bar)more`, `http://foo.com/bar`},
109 {`[http://foo.com/bar]`, `http://foo.com/bar`},
110 {`[http://foo.com/bar]more`, `http://foo.com/bar`},
111 {`'http://foo.com/bar'`, `http://foo.com/bar`},
112 {`'http://foo.com/bar'more`, `http://foo.com/bar'more`},
113 {`"http://foo.com/bar"`, `http://foo.com/bar`},
114 {`"http://foo.com/bar"more`, `http://foo.com/bar"more`},
115 {`http://a.b/a0/-+_&~*%=#@.,:;'?![]()a`, true},
116 {`http://a.b/a0/$€¥`, true},
117 {`http://✪foo.bar/pa✪th©more`, true},
118 {`http://foo.bar/path/`, true},
119 {`http://foo.bar/path-`, true},
120 {`http://foo.bar/path+`, true},
121 {`http://foo.bar/path&`, true},
122 {`http://foo.bar/path~`, true},
123 {`http://foo.bar/path%`, true},
124 {`http://foo.bar/path=`, true},
125 {`http://foo.bar/path#`, true},
126 {`http://foo.bar/path.`, `http://foo.bar/path`},
127 {`http://foo.bar/path,`, `http://foo.bar/path`},
128 {`http://foo.bar/path:`, `http://foo.bar/path`},
129 {`http://foo.bar/path;`, `http://foo.bar/path`},
130 {`http://foo.bar/path'`, `http://foo.bar/path`},
131 {`http://foo.bar/path?`, `http://foo.bar/path`},
132 {`http://foo.bar/path!`, `http://foo.bar/path`},
133 {`http://foo.bar/path@`, `http://foo.bar/path`},
134 {`http://foo.bar/path|`, `http://foo.bar/path`},
135 {`http://foo.bar/path|more`, `http://foo.bar/path`},
136 {`http://foo.bar/path<`, `http://foo.bar/path`},
137 {`http://foo.bar/path<more`, `http://foo.bar/path`},
138 {`http://foo.com/path_(more)`, true},
139 {`(http://foo.com/path_(more))`, `http://foo.com/path_(more)`},
140 {`http://foo.com/path_(even)-(more)`, true},
141 {`http://foo.com/path_(even)(more)`, true},
142 {`http://foo.com/path_(even_(nested))`, true},
143 {`(http://foo.com/path_(even_(nested)))`, `http://foo.com/path_(even_(nested))`},
144 {`http://foo.com/path_[more]`, true},
145 {`[http://foo.com/path_[more]]`, `http://foo.com/path_[more]`},
146 {`http://foo.com/path_[even]-[more]`, true},
147 {`http://foo.com/path_[even][more]`, true},
148 {`http://foo.com/path_[even_[nested]]`, true},
149 {`[http://foo.com/path_[even_[nested]]]`, `http://foo.com/path_[even_[nested]]`},
150 {`http://foo.com/path_{more}`, true},
151 {`{http://foo.com/path_{more}}`, `http://foo.com/path_{more}`},
152 {`http://foo.com/path_{even}-{more}`, true},
153 {`http://foo.com/path_{even}{more}`, true},
154 {`http://foo.com/path_{even_{nested}}`, true},
155 {`{http://foo.com/path_{even_{nested}}}`, `http://foo.com/path_{even_{nested}}`},
156 {`http://foo.com/path#fragment`, true},
157 {`http://foo.com/emptyfrag#`, true},
158 {`http://foo.com/spaced%20path`, true},
159 {`http://foo.com/?p=spaced%20param`, true},
160 {`http://test.foo.com/`, true},
161 {`http://foo.com/path`, true},
162 {`http://foo.com:8080/path`, true},
163 {`http://1.1.1.1/path`, true},
164 {`http://1080::8:800:200c:417a/path`, true},
165 {`http://中国.中国/中国`, true},
166 {`http://中国.中国/foo中国`, true},
167 {`http://उदाहरण.परीकषा`, true},
168 {`http://xn-foo.xn--p1acf/path`, true},
169 {`what is http://foo.com?`, `http://foo.com`},
170 {`go visit http://foo.com/path.`, `http://foo.com/path`},
171 {`go visit http://foo.com/path...`, `http://foo.com/path`},
172 {`what is http://foo.com/path?`, `http://foo.com/path`},
173 {`the http://foo.com!`, `http://foo.com`},
174 {`https://test.foo.bar/path?a=b`, `https://test.foo.bar/path?a=b`},
175 {`ftp://user@foo.bar`, true},
176 {`http://foo.com/base64-bCBwbGVhcw==`, true},
177 {`http://foo.com/🐼`, true},
178 {`https://shmibbles.me/tmp/自殺でも?.png`, true},
179 {`randomtexthttp://foo.bar/etc`, "http://foo.bar/etc"},
182 func TestRegexes(t *testing.T) {
183 doTest(t, "Relaxed", Relaxed(), constantTestCases)
184 doTest(t, "Strict", Strict(), constantTestCases)
185 doTest(t, "Relaxed2", Relaxed(), []testCase{
188 {`foo.com bar.com`, `foo.com`},
189 {`foo.com-foo`, `foo.com`},
190 {`foo.company`, true},
191 // We can't use \b to not match foo.com here, because that
192 // does't work for non-ASCII TLDs.
193 {`foo.comrandom`, `foo.com`},
194 {`foo.example`, true},
199 {`中国.中国/foo中国`, true},
201 {`test.联通 extra`, `test.联通`},
202 {`test.xn--8y0a063a`, true},
203 {`test.xn--8y0a063a/foobar`, true},
204 {`test.xn-foo`, nil},
208 {`10.50.23.250`, true},
213 {`foo@1.2.3.4`, `1.2.3.4`},
214 {`1080:0:0:0:8:800:200C:4171`, true},
215 {`3ffe:2a00:100:7031::1`, true},
216 {`1080::8:800:200c:417a`, true},
217 {`foo.com:8080`, true},
218 {`foo.com:8080/path`, true},
219 {`test.foo.com`, true},
220 {`test.foo.com/path`, true},
221 {`test.foo.com/path/more/`, true},
222 {`TEST.FOO.COM/PATH`, true},
223 {`TEST.FÓO.COM/PÁTH`, true},
224 {`foo.com/path_(more)`, true},
225 {`foo.com/path_(even)_(more)`, true},
226 {`foo.com/path_(more)/more`, true},
227 {`foo.com/path_(more)/end)`, `foo.com/path_(more)/end`},
228 {`www.foo.com`, true},
229 {` foo.com/bar `, `foo.com/bar`},
230 {` foo.com/bar more`, `foo.com/bar`},
231 {`<foo.com/bar>`, `foo.com/bar`},
232 {`<foo.com/bar>more`, `foo.com/bar`},
233 {`,foo.com/bar.`, `foo.com/bar`},
234 {`,foo.com/bar.more`, `foo.com/bar.more`},
235 {`,foo.com/bar,`, `foo.com/bar`},
236 {`,foo.com/bar,more`, `foo.com/bar,more`},
237 {`(foo.com/bar)`, `foo.com/bar`},
238 {`"foo.com/bar'`, `foo.com/bar`},
239 {`"foo.com/bar'more`, `foo.com/bar'more`},
240 {`"foo.com/bar"`, `foo.com/bar`},
241 {`what is foo.com?`, `foo.com`},
242 {`the foo.com!`, `foo.com`},
246 {`foo@bar.com`, "bar.com"},
247 {`foo@sub.bar.com`, "sub.bar.com"},
248 {`foo@中国.中国`, "中国.中国"},
250 doTest(t, "Strict2", Strict(), []testCase{
251 {`http:// foo.com`, nil},
256 {`3ffe:2a00:100:7031::1`, nil},
257 {`test.foo.com:8080/path`, nil},
258 {`foo@bar.com`, nil},
262 func TestStrictMatchingSchemeError(t *testing.T) {
263 for _, c := range []struct {
268 {`https?://`, false},
269 {`http://|mailto:`, false},
272 _, err := StrictMatchingScheme(c.exp)
273 if c.wantErr && err == nil {
274 t.Errorf(`StrictMatchingScheme("%s") did not error as expected`, c.exp)
275 } else if !c.wantErr && err != nil {
276 t.Errorf(`StrictMatchingScheme("%s") unexpectedly errored`, c.exp)
281 func TestStrictMatchingScheme(t *testing.T) {
282 strictMatching, _ := StrictMatchingScheme("http://|ftps?://|mailto:")
283 doTest(t, "StrictMatchingScheme", strictMatching, []testCase{
285 {`foo@bar.com`, nil},
286 {`http://foo`, true},
287 {`Http://foo`, true},
288 {`https://foo`, nil},
290 {`ftps://foo`, true},
291 {`mailto:foo`, true},
292 {`MAILTO:foo`, true},
297 func TestStrictMatchingSchemeAny(t *testing.T) {
298 strictMatching, _ := StrictMatchingScheme(AnyScheme)
299 doTest(t, "StrictMatchingScheme", strictMatching, []testCase{
300 {`http://foo`, true},
301 {`git+https://foo`, true},
302 {`randomtexthttp://foo.bar/etc`, true},
303 {`mailto:foo`, true},
307 func bench(b *testing.B, re *regexp.Regexp, str string) {
308 for i := 0; i < b.N; i++ {
309 re.FindAllString(str, -1)
313 func BenchmarkStrictEmpty(b *testing.B) {
314 bench(b, Strict(), "foo")
317 func BenchmarkStrictSingle(b *testing.B) {
318 bench(b, Strict(), "http://foo.foo foo.com")
321 func BenchmarkStrictMany(b *testing.B) {
322 bench(b, Strict(), ` foo bar http://foo.foo
323 foo.com bitcoin:address ftp://
327 func BenchmarkRelaxedEmpty(b *testing.B) {
328 bench(b, Relaxed(), "foo")
331 func BenchmarkRelaxedSingle(b *testing.B) {
332 bench(b, Relaxed(), "http://foo.foo foo.com")
335 func BenchmarkRelaxedMany(b *testing.B) {
336 bench(b, Relaxed(), ` foo bar http://foo.foo
337 foo.com bitcoin:address ftp://