--- /dev/null
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package span
+
+import (
+ "fmt"
+ "unicode/utf16"
+ "unicode/utf8"
+)
+
+// ToUTF16Column calculates the utf16 column expressed by the point given the
+// supplied file contents.
+// This is used to convert from the native (always in bytes) column
+// representation and the utf16 counts used by some editors.
+func ToUTF16Column(p Point, content []byte) (int, error) {
+ if !p.HasPosition() {
+ return -1, fmt.Errorf("ToUTF16Column: point is missing position")
+ }
+ if !p.HasOffset() {
+ return -1, fmt.Errorf("ToUTF16Column: point is missing offset")
+ }
+ offset := p.Offset() // 0-based
+ colZero := p.Column() - 1 // 0-based
+ if colZero == 0 {
+ // 0-based column 0, so it must be chr 1
+ return 1, nil
+ } else if colZero < 0 {
+ return -1, fmt.Errorf("ToUTF16Column: column is invalid (%v)", colZero)
+ }
+ // work out the offset at the start of the line using the column
+ lineOffset := offset - colZero
+ if lineOffset < 0 || offset > len(content) {
+ return -1, fmt.Errorf("ToUTF16Column: offsets %v-%v outside file contents (%v)", lineOffset, offset, len(content))
+ }
+ // Use the offset to pick out the line start.
+ // This cannot panic: offset > len(content) and lineOffset < offset.
+ start := content[lineOffset:]
+
+ // Now, truncate down to the supplied column.
+ start = start[:colZero]
+
+ // and count the number of utf16 characters
+ // in theory we could do this by hand more efficiently...
+ return len(utf16.Encode([]rune(string(start)))) + 1, nil
+}
+
+// FromUTF16Column advances the point by the utf16 character offset given the
+// supplied line contents.
+// This is used to convert from the utf16 counts used by some editors to the
+// native (always in bytes) column representation.
+func FromUTF16Column(p Point, chr int, content []byte) (Point, error) {
+ if !p.HasOffset() {
+ return Point{}, fmt.Errorf("FromUTF16Column: point is missing offset")
+ }
+ // if chr is 1 then no adjustment needed
+ if chr <= 1 {
+ return p, nil
+ }
+ if p.Offset() >= len(content) {
+ return p, fmt.Errorf("FromUTF16Column: offset (%v) greater than length of content (%v)", p.Offset(), len(content))
+ }
+ remains := content[p.Offset():]
+ // scan forward the specified number of characters
+ for count := 1; count < chr; count++ {
+ if len(remains) <= 0 {
+ return Point{}, fmt.Errorf("FromUTF16Column: chr goes beyond the content")
+ }
+ r, w := utf8.DecodeRune(remains)
+ if r == '\n' {
+ // Per the LSP spec:
+ //
+ // > If the character value is greater than the line length it
+ // > defaults back to the line length.
+ break
+ }
+ remains = remains[w:]
+ if r >= 0x10000 {
+ // a two point rune
+ count++
+ // if we finished in a two point rune, do not advance past the first
+ if count >= chr {
+ break
+ }
+ }
+ p.v.Column += w
+ p.v.Offset += w
+ }
+ return p, nil
+}