Giant blob of minor changes
[dotfiles/.git] / .config / coc / extensions / coc-go-data / tools / pkg / mod / golang.org / x / tools@v0.0.0-20201105173854-bc9fc8d8c4bc / cmd / html2article / conv.go
diff --git a/.config/coc/extensions/coc-go-data/tools/pkg/mod/golang.org/x/tools@v0.0.0-20201105173854-bc9fc8d8c4bc/cmd/html2article/conv.go b/.config/coc/extensions/coc-go-data/tools/pkg/mod/golang.org/x/tools@v0.0.0-20201105173854-bc9fc8d8c4bc/cmd/html2article/conv.go
new file mode 100644 (file)
index 0000000..4ef4f6c
--- /dev/null
@@ -0,0 +1,331 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This program takes an HTML file and outputs a corresponding article file in
+// present format. See: golang.org/x/tools/present
+package main // import "golang.org/x/tools/cmd/html2article"
+
+import (
+       "bytes"
+       "errors"
+       "flag"
+       "fmt"
+       "io"
+       "log"
+       "net/url"
+       "os"
+       "regexp"
+       "strings"
+
+       "golang.org/x/net/html"
+       "golang.org/x/net/html/atom"
+)
+
+func main() {
+       flag.Parse()
+
+       err := convert(os.Stdout, os.Stdin)
+       if err != nil {
+               log.Fatal(err)
+       }
+}
+
+func convert(w io.Writer, r io.Reader) error {
+       root, err := html.Parse(r)
+       if err != nil {
+               return err
+       }
+
+       style := find(root, isTag(atom.Style))
+       if err := parseStyles(style); err != nil {
+               log.Printf("couldn't parse all styles: %v", err)
+       }
+
+       body := find(root, isTag(atom.Body))
+       if body == nil {
+               return errors.New("couldn't find body")
+       }
+       article := limitNewlineRuns(makeHeadings(strings.TrimSpace(text(body))))
+       _, err = fmt.Fprintf(w, "Title\n\n%s", article)
+       return err
+}
+
+type Style string
+
+const (
+       Bold   Style = "*"
+       Italic Style = "_"
+       Code   Style = "`"
+)
+
+var cssRules = make(map[string]Style)
+
+func parseStyles(style *html.Node) error {
+       if style == nil || style.FirstChild == nil {
+               return errors.New("couldn't find styles")
+       }
+
+       styles := style.FirstChild.Data
+       readUntil := func(end rune) (string, bool) {
+               i := strings.IndexRune(styles, end)
+               if i < 0 {
+                       return "", false
+               }
+               s := styles[:i]
+               styles = styles[i:]
+               return s, true
+       }
+
+       for {
+               sel, ok := readUntil('{')
+               if !ok && sel == "" {
+                       break
+               } else if !ok {
+                       return fmt.Errorf("could not parse selector %q", styles)
+               }
+
+               value, ok := readUntil('}')
+               if !ok {
+                       return fmt.Errorf("couldn't parse style body for %s", sel)
+               }
+               switch {
+               case strings.Contains(value, "italic"):
+                       cssRules[sel] = Italic
+               case strings.Contains(value, "bold"):
+                       cssRules[sel] = Bold
+               case strings.Contains(value, "Consolas") || strings.Contains(value, "Courier New"):
+                       cssRules[sel] = Code
+               }
+       }
+       return nil
+}
+
+var newlineRun = regexp.MustCompile(`\n\n+`)
+
+func limitNewlineRuns(s string) string {
+       return newlineRun.ReplaceAllString(s, "\n\n")
+}
+
+func makeHeadings(body string) string {
+       buf := new(bytes.Buffer)
+       lines := strings.Split(body, "\n")
+       for i, s := range lines {
+               if i == 0 && !isBoldTitle(s) {
+                       buf.WriteString("* Introduction\n\n")
+               }
+               if isBoldTitle(s) {
+                       s = strings.TrimSpace(strings.Replace(s, "*", " ", -1))
+                       s = "* " + s
+               }
+               buf.WriteString(s)
+               buf.WriteByte('\n')
+       }
+       return buf.String()
+}
+
+func isBoldTitle(s string) bool {
+       return !strings.Contains(s, " ") &&
+               strings.HasPrefix(s, "*") &&
+               strings.HasSuffix(s, "*")
+}
+
+func indent(buf *bytes.Buffer, s string) {
+       for _, l := range strings.Split(s, "\n") {
+               if l != "" {
+                       buf.WriteByte('\t')
+                       buf.WriteString(l)
+               }
+               buf.WriteByte('\n')
+       }
+}
+
+func unwrap(buf *bytes.Buffer, s string) {
+       var cont bool
+       for _, l := range strings.Split(s, "\n") {
+               l = strings.TrimSpace(l)
+               if len(l) == 0 {
+                       if cont {
+                               buf.WriteByte('\n')
+                               buf.WriteByte('\n')
+                       }
+                       cont = false
+               } else {
+                       if cont {
+                               buf.WriteByte(' ')
+                       }
+                       buf.WriteString(l)
+                       cont = true
+               }
+       }
+}
+
+func text(n *html.Node) string {
+       var buf bytes.Buffer
+       walk(n, func(n *html.Node) bool {
+               switch n.Type {
+               case html.TextNode:
+                       buf.WriteString(n.Data)
+                       return false
+               case html.ElementNode:
+                       // no-op
+               default:
+                       return true
+               }
+               a := n.DataAtom
+               if a == atom.Span {
+                       switch {
+                       case hasStyle(Code)(n):
+                               a = atom.Code
+                       case hasStyle(Bold)(n):
+                               a = atom.B
+                       case hasStyle(Italic)(n):
+                               a = atom.I
+                       }
+               }
+               switch a {
+               case atom.Br:
+                       buf.WriteByte('\n')
+               case atom.P:
+                       unwrap(&buf, childText(n))
+                       buf.WriteString("\n\n")
+               case atom.Li:
+                       buf.WriteString("- ")
+                       unwrap(&buf, childText(n))
+                       buf.WriteByte('\n')
+               case atom.Pre:
+                       indent(&buf, childText(n))
+                       buf.WriteByte('\n')
+               case atom.A:
+                       href, text := attr(n, "href"), childText(n)
+                       // Skip links with no text.
+                       if strings.TrimSpace(text) == "" {
+                               break
+                       }
+                       // Don't emit empty links.
+                       if strings.TrimSpace(href) == "" {
+                               buf.WriteString(text)
+                               break
+                       }
+                       // Use original url for Google Docs redirections.
+                       if u, err := url.Parse(href); err != nil {
+                               log.Printf("parsing url %q: %v", href, err)
+                       } else if u.Host == "www.google.com" && u.Path == "/url" {
+                               href = u.Query().Get("q")
+                       }
+                       fmt.Fprintf(&buf, "[[%s][%s]]", href, text)
+               case atom.Code:
+                       buf.WriteString(highlight(n, "`"))
+               case atom.B:
+                       buf.WriteString(highlight(n, "*"))
+               case atom.I:
+                       buf.WriteString(highlight(n, "_"))
+               case atom.Img:
+                       src := attr(n, "src")
+                       fmt.Fprintf(&buf, ".image %s\n", src)
+               case atom.Iframe:
+                       src, w, h := attr(n, "src"), attr(n, "width"), attr(n, "height")
+                       fmt.Fprintf(&buf, "\n.iframe %s %s %s\n", src, h, w)
+               case atom.Param:
+                       if attr(n, "name") == "movie" {
+                               // Old style YouTube embed.
+                               u := attr(n, "value")
+                               u = strings.Replace(u, "/v/", "/embed/", 1)
+                               if i := strings.Index(u, "&"); i >= 0 {
+                                       u = u[:i]
+                               }
+                               fmt.Fprintf(&buf, "\n.iframe %s 540 304\n", u)
+                       }
+               case atom.Title:
+               default:
+                       return true
+               }
+               return false
+       })
+       return buf.String()
+}
+
+func childText(node *html.Node) string {
+       var buf bytes.Buffer
+       for n := node.FirstChild; n != nil; n = n.NextSibling {
+               fmt.Fprint(&buf, text(n))
+       }
+       return buf.String()
+}
+
+func highlight(node *html.Node, char string) string {
+       t := strings.Replace(childText(node), " ", char, -1)
+       return fmt.Sprintf("%s%s%s", char, t, char)
+}
+
+type selector func(*html.Node) bool
+
+func isTag(a atom.Atom) selector {
+       return func(n *html.Node) bool {
+               return n.DataAtom == a
+       }
+}
+
+func hasClass(name string) selector {
+       return func(n *html.Node) bool {
+               for _, a := range n.Attr {
+                       if a.Key == "class" {
+                               for _, c := range strings.Fields(a.Val) {
+                                       if c == name {
+                                               return true
+                                       }
+                               }
+                       }
+               }
+               return false
+       }
+}
+
+func hasStyle(s Style) selector {
+       return func(n *html.Node) bool {
+               for rule, s2 := range cssRules {
+                       if s2 != s {
+                               continue
+                       }
+                       if strings.HasPrefix(rule, ".") && hasClass(rule[1:])(n) {
+                               return true
+                       }
+                       if n.DataAtom.String() == rule {
+                               return true
+                       }
+               }
+               return false
+       }
+}
+
+func attr(node *html.Node, key string) (value string) {
+       for _, attr := range node.Attr {
+               if attr.Key == key {
+                       return attr.Val
+               }
+       }
+       return ""
+}
+
+func find(n *html.Node, fn selector) *html.Node {
+       var result *html.Node
+       walk(n, func(n *html.Node) bool {
+               if result != nil {
+                       return false
+               }
+               if fn(n) {
+                       result = n
+                       return false
+               }
+               return true
+       })
+       return result
+}
+
+func walk(n *html.Node, fn selector) {
+       if fn(n) {
+               for c := n.FirstChild; c != nil; c = c.NextSibling {
+                       walk(c, fn)
+               }
+       }
+}