--- /dev/null
+// Copyright (c) 2015, Daniel Martà <mvdan@mvdan.cc>
+// See LICENSE for licensing information
+
+package main
+
+import (
+ "bufio"
+ "bytes"
+ "errors"
+ "flag"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "net/http"
+ "net/url"
+ "os"
+ "regexp"
+ "strings"
+ "time"
+
+ "mvdan.cc/xurls/v2"
+)
+
+var (
+ matching = flag.String("m", "", "")
+ relaxed = flag.Bool("r", false, "")
+ fix = flag.Bool("fix", false, "")
+)
+
+func init() {
+ flag.Usage = func() {
+ p := func(format string, a ...interface{}) {
+ fmt.Fprintf(os.Stderr, format, a...)
+ }
+ p("Usage: xurls [-h] [files]\n\n")
+ p("If no files are given, it reads from standard input.\n\n")
+ p(" -m <regexp> only match urls whose scheme matches a regexp\n")
+ p(" example: 'https?://|mailto:'\n")
+ p(" -r also match urls without a scheme (relaxed)\n")
+ p(" -fix overwrite urls that redirect\n")
+ }
+}
+
+func scanPath(re *regexp.Regexp, path string) error {
+ f := os.Stdin
+ if path != "-" {
+ var err error
+ f, err = os.Open(path)
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+ }
+ bufr := bufio.NewReader(f)
+ var fixedBuf bytes.Buffer
+ anyFixed := false
+ var broken []string
+ for {
+ line, err := bufr.ReadBytes('\n')
+ offset := 0
+ for _, pair := range re.FindAllIndex(line, -1) {
+ // The indexes are based on the original line.
+ pair[0] += offset
+ pair[1] += offset
+ match := line[pair[0]:pair[1]]
+ if !*fix {
+ fmt.Printf("%s\n", match)
+ continue
+ }
+ u, err := url.Parse(string(match))
+ if err != nil {
+ continue
+ }
+ fixed := u.String()
+ switch u.Scheme {
+ case "http", "https":
+ // See if the URL redirects somewhere.
+ client := &http.Client{
+ Timeout: 10 * time.Second,
+ CheckRedirect: func(req *http.Request, via []*http.Request) error {
+ if len(via) >= 10 {
+ return errors.New("stopped after 10 redirects")
+ }
+ // Keep the fragment around.
+ req.URL.Fragment = u.Fragment
+ fixed = req.URL.String()
+ return nil
+ },
+ }
+ resp, err := client.Get(fixed)
+ if err != nil {
+ continue
+ }
+ if resp.StatusCode >= 400 {
+ broken = append(broken, string(match))
+ }
+ resp.Body.Close()
+ }
+ if fixed != string(match) {
+ // Replace the url, and update the offset.
+ newLine := line[:pair[0]]
+ newLine = append(newLine, fixed...)
+ newLine = append(newLine, line[pair[1]:]...)
+ offset += len(newLine) - len(line)
+ line = newLine
+ anyFixed = true
+ }
+ }
+ if *fix {
+ if path == "-" {
+ os.Stdout.Write(line)
+ } else {
+ fixedBuf.Write(line)
+ }
+ }
+ if err == io.EOF {
+ break
+ } else if err != nil {
+ return err
+ }
+ }
+ if anyFixed && path != "-" {
+ f.Close()
+ // Overwrite the file, if we weren't reading stdin. Report its
+ // path too.
+ fmt.Println(path)
+ if err := ioutil.WriteFile(path, fixedBuf.Bytes(), 0666); err != nil {
+ return err
+ }
+ }
+ if len(broken) > 0 {
+ return fmt.Errorf("found %d broken urls in %q:\n%s", len(broken),
+ path, strings.Join(broken, "\n"))
+ }
+ return nil
+}
+
+func main() { os.Exit(main1()) }
+
+func main1() int {
+ flag.Parse()
+ if *relaxed && *matching != "" {
+ fmt.Fprintln(os.Stderr, "-r and -m at the same time don't make much sense")
+ return 1
+ }
+ var re *regexp.Regexp
+ if *relaxed {
+ re = xurls.Relaxed()
+ } else if *matching != "" {
+ var err error
+ if re, err = xurls.StrictMatchingScheme(*matching); err != nil {
+ fmt.Fprintln(os.Stderr, err)
+ return 1
+ }
+ } else {
+ re = xurls.Strict()
+ }
+ args := flag.Args()
+ if len(args) == 0 {
+ args = []string{"-"}
+ }
+ for _, path := range args {
+ if err := scanPath(re, path); err != nil {
+ fmt.Fprintln(os.Stderr, err)
+ return 1
+ }
+ }
+ return 0
+}