// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Binary package export.
// This file was derived from $GOROOT/src/cmd/compile/internal/gc/bexport.go;
// see that file for specification of the format.

package gcimporter

import (
	"bytes"
	"encoding/binary"
	"fmt"
	"go/ast"
	"go/constant"
	"go/token"
	"go/types"
	"math"
	"math/big"
	"sort"
	"strings"
)

// If debugFormat is set, each integer and string value is preceded by a marker
// and position information in the encoding. This mechanism permits an importer
// to recognize immediately when it is out of sync. The importer recognizes this
// mode automatically (i.e., it can import export data produced with debugging
// support even if debugFormat is not set at the time of import). This mode will
// lead to massively larger export data (by a factor of 2 to 3) and should only
// be enabled during development and debugging.
//
// NOTE: This flag is the first flag to enable if importing dies because of
// (suspected) format errors, and whenever a change is made to the format.
const debugFormat = false // default: false

// If trace is set, debugging output is printed to std out.
const trace = false // default: false

// Current export format version. Increase with each format change.
// Note: The latest binary (non-indexed) export format is at version 6.
//       This exporter is still at level 4, but it doesn't matter since
//       the binary importer can handle older versions just fine.
// 6: package height (CL 105038) -- NOT IMPLEMENTED HERE
// 5: improved position encoding efficiency (issue 20080, CL 41619) -- NOT IMPLEMEMTED HERE
// 4: type name objects support type aliases, uses aliasTag
// 3: Go1.8 encoding (same as version 2, aliasTag defined but never used)
// 2: removed unused bool in ODCL export (compiler only)
// 1: header format change (more regular), export package for _ struct fields
// 0: Go1.7 encoding
const exportVersion = 4

// trackAllTypes enables cycle tracking for all types, not just named
// types. The existing compiler invariants assume that unnamed types
// that are not completely set up are not used, or else there are spurious
// errors.
// If disabled, only named types are tracked, possibly leading to slightly
// less efficient encoding in rare cases. It also prevents the export of
// some corner-case type declarations (but those are not handled correctly
// with with the textual export format either).
// TODO(gri) enable and remove once issues caused by it are fixed
const trackAllTypes = false

type exporter struct {
	fset *token.FileSet
	out  bytes.Buffer

	// object -> index maps, indexed in order of serialization
	strIndex map[string]int
	pkgIndex map[*types.Package]int
	typIndex map[types.Type]int

	// position encoding
	posInfoFormat bool
	prevFile      string
	prevLine      int

	// debugging support
	written int // bytes written
	indent  int // for trace
}

// internalError represents an error generated inside this package.
type internalError string

func (e internalError) Error() string { return "gcimporter: " + string(e) }

func internalErrorf(format string, args ...interface{}) error {
	return internalError(fmt.Sprintf(format, args...))
}

// BExportData returns binary export data for pkg.
// If no file set is provided, position info will be missing.
func BExportData(fset *token.FileSet, pkg *types.Package) (b []byte, err error) {
	defer func() {
		if e := recover(); e != nil {
			if ierr, ok := e.(internalError); ok {
				err = ierr
				return
			}
			// Not an internal error; panic again.
			panic(e)
		}
	}()

	p := exporter{
		fset:          fset,
		strIndex:      map[string]int{"": 0}, // empty string is mapped to 0
		pkgIndex:      make(map[*types.Package]int),
		typIndex:      make(map[types.Type]int),
		posInfoFormat: true, // TODO(gri) might become a flag, eventually
	}

	// write version info
	// The version string must start with "version %d" where %d is the version
	// number. Additional debugging information may follow after a blank; that
	// text is ignored by the importer.
	p.rawStringln(fmt.Sprintf("version %d", exportVersion))
	var debug string
	if debugFormat {
		debug = "debug"
	}
	p.rawStringln(debug) // cannot use p.bool since it's affected by debugFormat; also want to see this clearly
	p.bool(trackAllTypes)
	p.bool(p.posInfoFormat)

	// --- generic export data ---

	// populate type map with predeclared "known" types
	for index, typ := range predeclared() {
		p.typIndex[typ] = index
	}
	if len(p.typIndex) != len(predeclared()) {
		return nil, internalError("duplicate entries in type map?")
	}

	// write package data
	p.pkg(pkg, true)
	if trace {
		p.tracef("\n")
	}

	// write objects
	objcount := 0
	scope := pkg.Scope()
	for _, name := range scope.Names() {
		if !ast.IsExported(name) {
			continue
		}
		if trace {
			p.tracef("\n")
		}
		p.obj(scope.Lookup(name))
		objcount++
	}

	// indicate end of list
	if trace {
		p.tracef("\n")
	}
	p.tag(endTag)

	// for self-verification only (redundant)
	p.int(objcount)

	if trace {
		p.tracef("\n")
	}

	// --- end of export data ---

	return p.out.Bytes(), nil
}

func (p *exporter) pkg(pkg *types.Package, emptypath bool) {
	if pkg == nil {
		panic(internalError("unexpected nil pkg"))
	}

	// if we saw the package before, write its index (>= 0)
	if i, ok := p.pkgIndex[pkg]; ok {
		p.index('P', i)
		return
	}

	// otherwise, remember the package, write the package tag (< 0) and package data
	if trace {
		p.tracef("P%d = { ", len(p.pkgIndex))
		defer p.tracef("} ")
	}
	p.pkgIndex[pkg] = len(p.pkgIndex)

	p.tag(packageTag)
	p.string(pkg.Name())
	if emptypath {
		p.string("")
	} else {
		p.string(pkg.Path())
	}
}

func (p *exporter) obj(obj types.Object) {
	switch obj := obj.(type) {
	case *types.Const:
		p.tag(constTag)
		p.pos(obj)
		p.qualifiedName(obj)
		p.typ(obj.Type())
		p.value(obj.Val())

	case *types.TypeName:
		if obj.IsAlias() {
			p.tag(aliasTag)
			p.pos(obj)
			p.qualifiedName(obj)
		} else {
			p.tag(typeTag)
		}
		p.typ(obj.Type())

	case *types.Var:
		p.tag(varTag)
		p.pos(obj)
		p.qualifiedName(obj)
		p.typ(obj.Type())

	case *types.Func:
		p.tag(funcTag)
		p.pos(obj)
		p.qualifiedName(obj)
		sig := obj.Type().(*types.Signature)
		p.paramList(sig.Params(), sig.Variadic())
		p.paramList(sig.Results(), false)

	default:
		panic(internalErrorf("unexpected object %v (%T)", obj, obj))
	}
}

func (p *exporter) pos(obj types.Object) {
	if !p.posInfoFormat {
		return
	}

	file, line := p.fileLine(obj)
	if file == p.prevFile {
		// common case: write line delta
		// delta == 0 means different file or no line change
		delta := line - p.prevLine
		p.int(delta)
		if delta == 0 {
			p.int(-1) // -1 means no file change
		}
	} else {
		// different file
		p.int(0)
		// Encode filename as length of common prefix with previous
		// filename, followed by (possibly empty) suffix. Filenames
		// frequently share path prefixes, so this can save a lot
		// of space and make export data size less dependent on file
		// path length. The suffix is unlikely to be empty because
		// file names tend to end in ".go".
		n := commonPrefixLen(p.prevFile, file)
		p.int(n)           // n >= 0
		p.string(file[n:]) // write suffix only
		p.prevFile = file
		p.int(line)
	}
	p.prevLine = line
}

func (p *exporter) fileLine(obj types.Object) (file string, line int) {
	if p.fset != nil {
		pos := p.fset.Position(obj.Pos())
		file = pos.Filename
		line = pos.Line
	}
	return
}

func commonPrefixLen(a, b string) int {
	if len(a) > len(b) {
		a, b = b, a
	}
	// len(a) <= len(b)
	i := 0
	for i < len(a) && a[i] == b[i] {
		i++
	}
	return i
}

func (p *exporter) qualifiedName(obj types.Object) {
	p.string(obj.Name())
	p.pkg(obj.Pkg(), false)
}

func (p *exporter) typ(t types.Type) {
	if t == nil {
		panic(internalError("nil type"))
	}

	// Possible optimization: Anonymous pointer types *T where
	// T is a named type are common. We could canonicalize all
	// such types *T to a single type PT = *T. This would lead
	// to at most one *T entry in typIndex, and all future *T's
	// would be encoded as the respective index directly. Would
	// save 1 byte (pointerTag) per *T and reduce the typIndex
	// size (at the cost of a canonicalization map). We can do
	// this later, without encoding format change.

	// if we saw the type before, write its index (>= 0)
	if i, ok := p.typIndex[t]; ok {
		p.index('T', i)
		return
	}

	// otherwise, remember the type, write the type tag (< 0) and type data
	if trackAllTypes {
		if trace {
			p.tracef("T%d = {>\n", len(p.typIndex))
			defer p.tracef("<\n} ")
		}
		p.typIndex[t] = len(p.typIndex)
	}

	switch t := t.(type) {
	case *types.Named:
		if !trackAllTypes {
			// if we don't track all types, track named types now
			p.typIndex[t] = len(p.typIndex)
		}

		p.tag(namedTag)
		p.pos(t.Obj())
		p.qualifiedName(t.Obj())
		p.typ(t.Underlying())
		if !types.IsInterface(t) {
			p.assocMethods(t)
		}

	case *types.Array:
		p.tag(arrayTag)
		p.int64(t.Len())
		p.typ(t.Elem())

	case *types.Slice:
		p.tag(sliceTag)
		p.typ(t.Elem())

	case *dddSlice:
		p.tag(dddTag)
		p.typ(t.elem)

	case *types.Struct:
		p.tag(structTag)
		p.fieldList(t)

	case *types.Pointer:
		p.tag(pointerTag)
		p.typ(t.Elem())

	case *types.Signature:
		p.tag(signatureTag)
		p.paramList(t.Params(), t.Variadic())
		p.paramList(t.Results(), false)

	case *types.Interface:
		p.tag(interfaceTag)
		p.iface(t)

	case *types.Map:
		p.tag(mapTag)
		p.typ(t.Key())
		p.typ(t.Elem())

	case *types.Chan:
		p.tag(chanTag)
		p.int(int(3 - t.Dir())) // hack
		p.typ(t.Elem())

	default:
		panic(internalErrorf("unexpected type %T: %s", t, t))
	}
}

func (p *exporter) assocMethods(named *types.Named) {
	// Sort methods (for determinism).
	var methods []*types.Func
	for i := 0; i < named.NumMethods(); i++ {
		methods = append(methods, named.Method(i))
	}
	sort.Sort(methodsByName(methods))

	p.int(len(methods))

	if trace && methods != nil {
		p.tracef("associated methods {>\n")
	}

	for i, m := range methods {
		if trace && i > 0 {
			p.tracef("\n")
		}

		p.pos(m)
		name := m.Name()
		p.string(name)
		if !exported(name) {
			p.pkg(m.Pkg(), false)
		}

		sig := m.Type().(*types.Signature)
		p.paramList(types.NewTuple(sig.Recv()), false)
		p.paramList(sig.Params(), sig.Variadic())
		p.paramList(sig.Results(), false)
		p.int(0) // dummy value for go:nointerface pragma - ignored by importer
	}

	if trace && methods != nil {
		p.tracef("<\n} ")
	}
}

type methodsByName []*types.Func

func (x methodsByName) Len() int           { return len(x) }
func (x methodsByName) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
func (x methodsByName) Less(i, j int) bool { return x[i].Name() < x[j].Name() }

func (p *exporter) fieldList(t *types.Struct) {
	if trace && t.NumFields() > 0 {
		p.tracef("fields {>\n")
		defer p.tracef("<\n} ")
	}

	p.int(t.NumFields())
	for i := 0; i < t.NumFields(); i++ {
		if trace && i > 0 {
			p.tracef("\n")
		}
		p.field(t.Field(i))
		p.string(t.Tag(i))
	}
}

func (p *exporter) field(f *types.Var) {
	if !f.IsField() {
		panic(internalError("field expected"))
	}

	p.pos(f)
	p.fieldName(f)
	p.typ(f.Type())
}

func (p *exporter) iface(t *types.Interface) {
	// TODO(gri): enable importer to load embedded interfaces,
	// then emit Embeddeds and ExplicitMethods separately here.
	p.int(0)

	n := t.NumMethods()
	if trace && n > 0 {
		p.tracef("methods {>\n")
		defer p.tracef("<\n} ")
	}
	p.int(n)
	for i := 0; i < n; i++ {
		if trace && i > 0 {
			p.tracef("\n")
		}
		p.method(t.Method(i))
	}
}

func (p *exporter) method(m *types.Func) {
	sig := m.Type().(*types.Signature)
	if sig.Recv() == nil {
		panic(internalError("method expected"))
	}

	p.pos(m)
	p.string(m.Name())
	if m.Name() != "_" && !ast.IsExported(m.Name()) {
		p.pkg(m.Pkg(), false)
	}

	// interface method; no need to encode receiver.
	p.paramList(sig.Params(), sig.Variadic())
	p.paramList(sig.Results(), false)
}

func (p *exporter) fieldName(f *types.Var) {
	name := f.Name()

	if f.Anonymous() {
		// anonymous field - we distinguish between 3 cases:
		// 1) field name matches base type name and is exported
		// 2) field name matches base type name and is not exported
		// 3) field name doesn't match base type name (alias name)
		bname := basetypeName(f.Type())
		if name == bname {
			if ast.IsExported(name) {
				name = "" // 1) we don't need to know the field name or package
			} else {
				name = "?" // 2) use unexported name "?" to force package export
			}
		} else {
			// 3) indicate alias and export name as is
			// (this requires an extra "@" but this is a rare case)
			p.string("@")
		}
	}

	p.string(name)
	if name != "" && !ast.IsExported(name) {
		p.pkg(f.Pkg(), false)
	}
}

func basetypeName(typ types.Type) string {
	switch typ := deref(typ).(type) {
	case *types.Basic:
		return typ.Name()
	case *types.Named:
		return typ.Obj().Name()
	default:
		return "" // unnamed type
	}
}

func (p *exporter) paramList(params *types.Tuple, variadic bool) {
	// use negative length to indicate unnamed parameters
	// (look at the first parameter only since either all
	// names are present or all are absent)
	n := params.Len()
	if n > 0 && params.At(0).Name() == "" {
		n = -n
	}
	p.int(n)
	for i := 0; i < params.Len(); i++ {
		q := params.At(i)
		t := q.Type()
		if variadic && i == params.Len()-1 {
			t = &dddSlice{t.(*types.Slice).Elem()}
		}
		p.typ(t)
		if n > 0 {
			name := q.Name()
			p.string(name)
			if name != "_" {
				p.pkg(q.Pkg(), false)
			}
		}
		p.string("") // no compiler-specific info
	}
}

func (p *exporter) value(x constant.Value) {
	if trace {
		p.tracef("= ")
	}

	switch x.Kind() {
	case constant.Bool:
		tag := falseTag
		if constant.BoolVal(x) {
			tag = trueTag
		}
		p.tag(tag)

	case constant.Int:
		if v, exact := constant.Int64Val(x); exact {
			// common case: x fits into an int64 - use compact encoding
			p.tag(int64Tag)
			p.int64(v)
			return
		}
		// uncommon case: large x - use float encoding
		// (powers of 2 will be encoded efficiently with exponent)
		p.tag(floatTag)
		p.float(constant.ToFloat(x))

	case constant.Float:
		p.tag(floatTag)
		p.float(x)

	case constant.Complex:
		p.tag(complexTag)
		p.float(constant.Real(x))
		p.float(constant.Imag(x))

	case constant.String:
		p.tag(stringTag)
		p.string(constant.StringVal(x))

	case constant.Unknown:
		// package contains type errors
		p.tag(unknownTag)

	default:
		panic(internalErrorf("unexpected value %v (%T)", x, x))
	}
}

func (p *exporter) float(x constant.Value) {
	if x.Kind() != constant.Float {
		panic(internalErrorf("unexpected constant %v, want float", x))
	}
	// extract sign (there is no -0)
	sign := constant.Sign(x)
	if sign == 0 {
		// x == 0
		p.int(0)
		return
	}
	// x != 0

	var f big.Float
	if v, exact := constant.Float64Val(x); exact {
		// float64
		f.SetFloat64(v)
	} else if num, denom := constant.Num(x), constant.Denom(x); num.Kind() == constant.Int {
		// TODO(gri): add big.Rat accessor to constant.Value.
		r := valueToRat(num)
		f.SetRat(r.Quo(r, valueToRat(denom)))
	} else {
		// Value too large to represent as a fraction => inaccessible.
		// TODO(gri): add big.Float accessor to constant.Value.
		f.SetFloat64(math.MaxFloat64) // FIXME
	}

	// extract exponent such that 0.5 <= m < 1.0
	var m big.Float
	exp := f.MantExp(&m)

	// extract mantissa as *big.Int
	// - set exponent large enough so mant satisfies mant.IsInt()
	// - get *big.Int from mant
	m.SetMantExp(&m, int(m.MinPrec()))
	mant, acc := m.Int(nil)
	if acc != big.Exact {
		panic(internalError("internal error"))
	}

	p.int(sign)
	p.int(exp)
	p.string(string(mant.Bytes()))
}

func valueToRat(x constant.Value) *big.Rat {
	// Convert little-endian to big-endian.
	// I can't believe this is necessary.
	bytes := constant.Bytes(x)
	for i := 0; i < len(bytes)/2; i++ {
		bytes[i], bytes[len(bytes)-1-i] = bytes[len(bytes)-1-i], bytes[i]
	}
	return new(big.Rat).SetInt(new(big.Int).SetBytes(bytes))
}

func (p *exporter) bool(b bool) bool {
	if trace {
		p.tracef("[")
		defer p.tracef("= %v] ", b)
	}

	x := 0
	if b {
		x = 1
	}
	p.int(x)
	return b
}

// ----------------------------------------------------------------------------
// Low-level encoders

func (p *exporter) index(marker byte, index int) {
	if index < 0 {
		panic(internalError("invalid index < 0"))
	}
	if debugFormat {
		p.marker('t')
	}
	if trace {
		p.tracef("%c%d ", marker, index)
	}
	p.rawInt64(int64(index))
}

func (p *exporter) tag(tag int) {
	if tag >= 0 {
		panic(internalError("invalid tag >= 0"))
	}
	if debugFormat {
		p.marker('t')
	}
	if trace {
		p.tracef("%s ", tagString[-tag])
	}
	p.rawInt64(int64(tag))
}

func (p *exporter) int(x int) {
	p.int64(int64(x))
}

func (p *exporter) int64(x int64) {
	if debugFormat {
		p.marker('i')
	}
	if trace {
		p.tracef("%d ", x)
	}
	p.rawInt64(x)
}

func (p *exporter) string(s string) {
	if debugFormat {
		p.marker('s')
	}
	if trace {
		p.tracef("%q ", s)
	}
	// if we saw the string before, write its index (>= 0)
	// (the empty string is mapped to 0)
	if i, ok := p.strIndex[s]; ok {
		p.rawInt64(int64(i))
		return
	}
	// otherwise, remember string and write its negative length and bytes
	p.strIndex[s] = len(p.strIndex)
	p.rawInt64(-int64(len(s)))
	for i := 0; i < len(s); i++ {
		p.rawByte(s[i])
	}
}

// marker emits a marker byte and position information which makes
// it easy for a reader to detect if it is "out of sync". Used for
// debugFormat format only.
func (p *exporter) marker(m byte) {
	p.rawByte(m)
	// Enable this for help tracking down the location
	// of an incorrect marker when running in debugFormat.
	if false && trace {
		p.tracef("#%d ", p.written)
	}
	p.rawInt64(int64(p.written))
}

// rawInt64 should only be used by low-level encoders.
func (p *exporter) rawInt64(x int64) {
	var tmp [binary.MaxVarintLen64]byte
	n := binary.PutVarint(tmp[:], x)
	for i := 0; i < n; i++ {
		p.rawByte(tmp[i])
	}
}

// rawStringln should only be used to emit the initial version string.
func (p *exporter) rawStringln(s string) {
	for i := 0; i < len(s); i++ {
		p.rawByte(s[i])
	}
	p.rawByte('\n')
}

// rawByte is the bottleneck interface to write to p.out.
// rawByte escapes b as follows (any encoding does that
// hides '$'):
//
//	'$'  => '|' 'S'
//	'|'  => '|' '|'
//
// Necessary so other tools can find the end of the
// export data by searching for "$$".
// rawByte should only be used by low-level encoders.
func (p *exporter) rawByte(b byte) {
	switch b {
	case '$':
		// write '$' as '|' 'S'
		b = 'S'
		fallthrough
	case '|':
		// write '|' as '|' '|'
		p.out.WriteByte('|')
		p.written++
	}
	p.out.WriteByte(b)
	p.written++
}

// tracef is like fmt.Printf but it rewrites the format string
// to take care of indentation.
func (p *exporter) tracef(format string, args ...interface{}) {
	if strings.ContainsAny(format, "<>\n") {
		var buf bytes.Buffer
		for i := 0; i < len(format); i++ {
			// no need to deal with runes
			ch := format[i]
			switch ch {
			case '>':
				p.indent++
				continue
			case '<':
				p.indent--
				continue
			}
			buf.WriteByte(ch)
			if ch == '\n' {
				for j := p.indent; j > 0; j-- {
					buf.WriteString(".  ")
				}
			}
		}
		format = buf.String()
	}
	fmt.Printf(format, args...)
}

// Debugging support.
// (tagString is only used when tracing is enabled)
var tagString = [...]string{
	// Packages
	-packageTag: "package",

	// Types
	-namedTag:     "named type",
	-arrayTag:     "array",
	-sliceTag:     "slice",
	-dddTag:       "ddd",
	-structTag:    "struct",
	-pointerTag:   "pointer",
	-signatureTag: "signature",
	-interfaceTag: "interface",
	-mapTag:       "map",
	-chanTag:      "chan",

	// Values
	-falseTag:    "false",
	-trueTag:     "true",
	-int64Tag:    "int64",
	-floatTag:    "float",
	-fractionTag: "fraction",
	-complexTag:  "complex",
	-stringTag:   "string",
	-unknownTag:  "unknown",

	// Type aliases
	-aliasTag: "alias",
}