+++ /dev/null
-// Copyright 2019 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package sumdb
-
-import (
- "bytes"
- "errors"
- "fmt"
- "path"
- "strings"
- "sync"
- "sync/atomic"
-
- "golang.org/x/mod/module"
- "golang.org/x/mod/sumdb/note"
- "golang.org/x/mod/sumdb/tlog"
-)
-
-// A ClientOps provides the external operations
-// (file caching, HTTP fetches, and so on) needed by the Client.
-// The methods must be safe for concurrent use by multiple goroutines.
-type ClientOps interface {
- // ReadRemote reads and returns the content served at the given path
- // on the remote database server. The path begins with "/lookup" or "/tile/",
- // and there is no need to parse the path in any way.
- // It is the implementation's responsibility to turn that path into a full URL
- // and make the HTTP request. ReadRemote should return an error for
- // any non-200 HTTP response status.
- ReadRemote(path string) ([]byte, error)
-
- // ReadConfig reads and returns the content of the named configuration file.
- // There are only a fixed set of configuration files.
- //
- // "key" returns a file containing the verifier key for the server.
- //
- // serverName + "/latest" returns a file containing the latest known
- // signed tree from the server.
- // To signal that the client wishes to start with an "empty" signed tree,
- // ReadConfig can return a successful empty result (0 bytes of data).
- ReadConfig(file string) ([]byte, error)
-
- // WriteConfig updates the content of the named configuration file,
- // changing it from the old []byte to the new []byte.
- // If the old []byte does not match the stored configuration,
- // WriteConfig must return ErrWriteConflict.
- // Otherwise, WriteConfig should atomically replace old with new.
- // The "key" configuration file is never written using WriteConfig.
- WriteConfig(file string, old, new []byte) error
-
- // ReadCache reads and returns the content of the named cache file.
- // Any returned error will be treated as equivalent to the file not existing.
- // There can be arbitrarily many cache files, such as:
- // serverName/lookup/pkg@version
- // serverName/tile/8/1/x123/456
- ReadCache(file string) ([]byte, error)
-
- // WriteCache writes the named cache file.
- WriteCache(file string, data []byte)
-
- // Log prints the given log message (such as with log.Print)
- Log(msg string)
-
- // SecurityError prints the given security error log message.
- // The Client returns ErrSecurity from any operation that invokes SecurityError,
- // but the return value is mainly for testing. In a real program,
- // SecurityError should typically print the message and call log.Fatal or os.Exit.
- SecurityError(msg string)
-}
-
-// ErrWriteConflict signals a write conflict during Client.WriteConfig.
-var ErrWriteConflict = errors.New("write conflict")
-
-// ErrSecurity is returned by Client operations that invoke Client.SecurityError.
-var ErrSecurity = errors.New("security error: misbehaving server")
-
-// A Client is a client connection to a checksum database.
-// All the methods are safe for simultaneous use by multiple goroutines.
-type Client struct {
- ops ClientOps // access to operations in the external world
-
- didLookup uint32
-
- // one-time initialized data
- initOnce sync.Once
- initErr error // init error, if any
- name string // name of accepted verifier
- verifiers note.Verifiers // accepted verifiers (just one, but Verifiers for note.Open)
- tileReader tileReader
- tileHeight int
- nosumdb string
-
- record parCache // cache of record lookup, keyed by path@vers
- tileCache parCache // cache of c.readTile, keyed by tile
-
- latestMu sync.Mutex
- latest tlog.Tree // latest known tree head
- latestMsg []byte // encoded signed note for latest
-
- tileSavedMu sync.Mutex
- tileSaved map[tlog.Tile]bool // which tiles have been saved using c.ops.WriteCache already
-}
-
-// NewClient returns a new Client using the given Client.
-func NewClient(ops ClientOps) *Client {
- return &Client{
- ops: ops,
- }
-}
-
-// init initiailzes the client (if not already initialized)
-// and returns any initialization error.
-func (c *Client) init() error {
- c.initOnce.Do(c.initWork)
- return c.initErr
-}
-
-// initWork does the actual initialization work.
-func (c *Client) initWork() {
- defer func() {
- if c.initErr != nil {
- c.initErr = fmt.Errorf("initializing sumdb.Client: %v", c.initErr)
- }
- }()
-
- c.tileReader.c = c
- if c.tileHeight == 0 {
- c.tileHeight = 8
- }
- c.tileSaved = make(map[tlog.Tile]bool)
-
- vkey, err := c.ops.ReadConfig("key")
- if err != nil {
- c.initErr = err
- return
- }
- verifier, err := note.NewVerifier(strings.TrimSpace(string(vkey)))
- if err != nil {
- c.initErr = err
- return
- }
- c.verifiers = note.VerifierList(verifier)
- c.name = verifier.Name()
-
- data, err := c.ops.ReadConfig(c.name + "/latest")
- if err != nil {
- c.initErr = err
- return
- }
- if err := c.mergeLatest(data); err != nil {
- c.initErr = err
- return
- }
-}
-
-// SetTileHeight sets the tile height for the Client.
-// Any call to SetTileHeight must happen before the first call to Lookup.
-// If SetTileHeight is not called, the Client defaults to tile height 8.
-// SetTileHeight can be called at most once,
-// and if so it must be called before the first call to Lookup.
-func (c *Client) SetTileHeight(height int) {
- if atomic.LoadUint32(&c.didLookup) != 0 {
- panic("SetTileHeight used after Lookup")
- }
- if height <= 0 {
- panic("invalid call to SetTileHeight")
- }
- if c.tileHeight != 0 {
- panic("multiple calls to SetTileHeight")
- }
- c.tileHeight = height
-}
-
-// SetGONOSUMDB sets the list of comma-separated GONOSUMDB patterns for the Client.
-// For any module path matching one of the patterns,
-// Lookup will return ErrGONOSUMDB.
-// SetGONOSUMDB can be called at most once,
-// and if so it must be called before the first call to Lookup.
-func (c *Client) SetGONOSUMDB(list string) {
- if atomic.LoadUint32(&c.didLookup) != 0 {
- panic("SetGONOSUMDB used after Lookup")
- }
- if c.nosumdb != "" {
- panic("multiple calls to SetGONOSUMDB")
- }
- c.nosumdb = list
-}
-
-// ErrGONOSUMDB is returned by Lookup for paths that match
-// a pattern listed in the GONOSUMDB list (set by SetGONOSUMDB,
-// usually from the environment variable).
-var ErrGONOSUMDB = errors.New("skipped (listed in GONOSUMDB)")
-
-func (c *Client) skip(target string) bool {
- return globsMatchPath(c.nosumdb, target)
-}
-
-// globsMatchPath reports whether any path prefix of target
-// matches one of the glob patterns (as defined by path.Match)
-// in the comma-separated globs list.
-// It ignores any empty or malformed patterns in the list.
-func globsMatchPath(globs, target string) bool {
- for globs != "" {
- // Extract next non-empty glob in comma-separated list.
- var glob string
- if i := strings.Index(globs, ","); i >= 0 {
- glob, globs = globs[:i], globs[i+1:]
- } else {
- glob, globs = globs, ""
- }
- if glob == "" {
- continue
- }
-
- // A glob with N+1 path elements (N slashes) needs to be matched
- // against the first N+1 path elements of target,
- // which end just before the N+1'th slash.
- n := strings.Count(glob, "/")
- prefix := target
- // Walk target, counting slashes, truncating at the N+1'th slash.
- for i := 0; i < len(target); i++ {
- if target[i] == '/' {
- if n == 0 {
- prefix = target[:i]
- break
- }
- n--
- }
- }
- if n > 0 {
- // Not enough prefix elements.
- continue
- }
- matched, _ := path.Match(glob, prefix)
- if matched {
- return true
- }
- }
- return false
-}
-
-// Lookup returns the go.sum lines for the given module path and version.
-// The version may end in a /go.mod suffix, in which case Lookup returns
-// the go.sum lines for the module's go.mod-only hash.
-func (c *Client) Lookup(path, vers string) (lines []string, err error) {
- atomic.StoreUint32(&c.didLookup, 1)
-
- if c.skip(path) {
- return nil, ErrGONOSUMDB
- }
-
- defer func() {
- if err != nil {
- err = fmt.Errorf("%s@%s: %v", path, vers, err)
- }
- }()
-
- if err := c.init(); err != nil {
- return nil, err
- }
-
- // Prepare encoded cache filename / URL.
- epath, err := module.EscapePath(path)
- if err != nil {
- return nil, err
- }
- evers, err := module.EscapeVersion(strings.TrimSuffix(vers, "/go.mod"))
- if err != nil {
- return nil, err
- }
- remotePath := "/lookup/" + epath + "@" + evers
- file := c.name + remotePath
-
- // Fetch the data.
- // The lookupCache avoids redundant ReadCache/GetURL operations
- // (especially since go.sum lines tend to come in pairs for a given
- // path and version) and also avoids having multiple of the same
- // request in flight at once.
- type cached struct {
- data []byte
- err error
- }
- result := c.record.Do(file, func() interface{} {
- // Try the on-disk cache, or else get from web.
- writeCache := false
- data, err := c.ops.ReadCache(file)
- if err != nil {
- data, err = c.ops.ReadRemote(remotePath)
- if err != nil {
- return cached{nil, err}
- }
- writeCache = true
- }
-
- // Validate the record before using it for anything.
- id, text, treeMsg, err := tlog.ParseRecord(data)
- if err != nil {
- return cached{nil, err}
- }
- if err := c.mergeLatest(treeMsg); err != nil {
- return cached{nil, err}
- }
- if err := c.checkRecord(id, text); err != nil {
- return cached{nil, err}
- }
-
- // Now that we've validated the record,
- // save it to the on-disk cache (unless that's where it came from).
- if writeCache {
- c.ops.WriteCache(file, data)
- }
-
- return cached{data, nil}
- }).(cached)
- if result.err != nil {
- return nil, result.err
- }
-
- // Extract the lines for the specific version we want
- // (with or without /go.mod).
- prefix := path + " " + vers + " "
- var hashes []string
- for _, line := range strings.Split(string(result.data), "\n") {
- if strings.HasPrefix(line, prefix) {
- hashes = append(hashes, line)
- }
- }
- return hashes, nil
-}
-
-// mergeLatest merges the tree head in msg
-// with the Client's current latest tree head,
-// ensuring the result is a consistent timeline.
-// If the result is inconsistent, mergeLatest calls c.ops.SecurityError
-// with a detailed security error message and then
-// (only if c.ops.SecurityError does not exit the program) returns ErrSecurity.
-// If the Client's current latest tree head moves forward,
-// mergeLatest updates the underlying configuration file as well,
-// taking care to merge any independent updates to that configuration.
-func (c *Client) mergeLatest(msg []byte) error {
- // Merge msg into our in-memory copy of the latest tree head.
- when, err := c.mergeLatestMem(msg)
- if err != nil {
- return err
- }
- if when != msgFuture {
- // msg matched our present or was in the past.
- // No change to our present, so no update of config file.
- return nil
- }
-
- // Flush our extended timeline back out to the configuration file.
- // If the configuration file has been updated in the interim,
- // we need to merge any updates made there as well.
- // Note that writeConfig is an atomic compare-and-swap.
- for {
- msg, err := c.ops.ReadConfig(c.name + "/latest")
- if err != nil {
- return err
- }
- when, err := c.mergeLatestMem(msg)
- if err != nil {
- return err
- }
- if when != msgPast {
- // msg matched our present or was from the future,
- // and now our in-memory copy matches.
- return nil
- }
-
- // msg (== config) is in the past, so we need to update it.
- c.latestMu.Lock()
- latestMsg := c.latestMsg
- c.latestMu.Unlock()
- if err := c.ops.WriteConfig(c.name+"/latest", msg, latestMsg); err != ErrWriteConflict {
- // Success or a non-write-conflict error.
- return err
- }
- }
-}
-
-const (
- msgPast = 1 + iota
- msgNow
- msgFuture
-)
-
-// mergeLatestMem is like mergeLatest but is only concerned with
-// updating the in-memory copy of the latest tree head (c.latest)
-// not the configuration file.
-// The when result explains when msg happened relative to our
-// previous idea of c.latest:
-// msgPast means msg was from before c.latest,
-// msgNow means msg was exactly c.latest, and
-// msgFuture means msg was from after c.latest, which has now been updated.
-func (c *Client) mergeLatestMem(msg []byte) (when int, err error) {
- if len(msg) == 0 {
- // Accept empty msg as the unsigned, empty timeline.
- c.latestMu.Lock()
- latest := c.latest
- c.latestMu.Unlock()
- if latest.N == 0 {
- return msgNow, nil
- }
- return msgPast, nil
- }
-
- note, err := note.Open(msg, c.verifiers)
- if err != nil {
- return 0, fmt.Errorf("reading tree note: %v\nnote:\n%s", err, msg)
- }
- tree, err := tlog.ParseTree([]byte(note.Text))
- if err != nil {
- return 0, fmt.Errorf("reading tree: %v\ntree:\n%s", err, note.Text)
- }
-
- // Other lookups may be calling mergeLatest with other heads,
- // so c.latest is changing underfoot. We don't want to hold the
- // c.mu lock during tile fetches, so loop trying to update c.latest.
- c.latestMu.Lock()
- latest := c.latest
- latestMsg := c.latestMsg
- c.latestMu.Unlock()
-
- for {
- // If the tree head looks old, check that it is on our timeline.
- if tree.N <= latest.N {
- if err := c.checkTrees(tree, msg, latest, latestMsg); err != nil {
- return 0, err
- }
- if tree.N < latest.N {
- return msgPast, nil
- }
- return msgNow, nil
- }
-
- // The tree head looks new. Check that we are on its timeline and try to move our timeline forward.
- if err := c.checkTrees(latest, latestMsg, tree, msg); err != nil {
- return 0, err
- }
-
- // Install our msg if possible.
- // Otherwise we will go around again.
- c.latestMu.Lock()
- installed := false
- if c.latest == latest {
- installed = true
- c.latest = tree
- c.latestMsg = msg
- } else {
- latest = c.latest
- latestMsg = c.latestMsg
- }
- c.latestMu.Unlock()
-
- if installed {
- return msgFuture, nil
- }
- }
-}
-
-// checkTrees checks that older (from olderNote) is contained in newer (from newerNote).
-// If an error occurs, such as malformed data or a network problem, checkTrees returns that error.
-// If on the other hand checkTrees finds evidence of misbehavior, it prepares a detailed
-// message and calls log.Fatal.
-func (c *Client) checkTrees(older tlog.Tree, olderNote []byte, newer tlog.Tree, newerNote []byte) error {
- thr := tlog.TileHashReader(newer, &c.tileReader)
- h, err := tlog.TreeHash(older.N, thr)
- if err != nil {
- if older.N == newer.N {
- return fmt.Errorf("checking tree#%d: %v", older.N, err)
- }
- return fmt.Errorf("checking tree#%d against tree#%d: %v", older.N, newer.N, err)
- }
- if h == older.Hash {
- return nil
- }
-
- // Detected a fork in the tree timeline.
- // Start by reporting the inconsistent signed tree notes.
- var buf bytes.Buffer
- fmt.Fprintf(&buf, "SECURITY ERROR\n")
- fmt.Fprintf(&buf, "go.sum database server misbehavior detected!\n\n")
- indent := func(b []byte) []byte {
- return bytes.Replace(b, []byte("\n"), []byte("\n\t"), -1)
- }
- fmt.Fprintf(&buf, "old database:\n\t%s\n", indent(olderNote))
- fmt.Fprintf(&buf, "new database:\n\t%s\n", indent(newerNote))
-
- // The notes alone are not enough to prove the inconsistency.
- // We also need to show that the newer note's tree hash for older.N
- // does not match older.Hash. The consumer of this report could
- // of course consult the server to try to verify the inconsistency,
- // but we are holding all the bits we need to prove it right now,
- // so we might as well print them and make the report not depend
- // on the continued availability of the misbehaving server.
- // Preparing this data only reuses the tiled hashes needed for
- // tlog.TreeHash(older.N, thr) above, so assuming thr is caching tiles,
- // there are no new access to the server here, and these operations cannot fail.
- fmt.Fprintf(&buf, "proof of misbehavior:\n\t%v", h)
- if p, err := tlog.ProveTree(newer.N, older.N, thr); err != nil {
- fmt.Fprintf(&buf, "\tinternal error: %v\n", err)
- } else if err := tlog.CheckTree(p, newer.N, newer.Hash, older.N, h); err != nil {
- fmt.Fprintf(&buf, "\tinternal error: generated inconsistent proof\n")
- } else {
- for _, h := range p {
- fmt.Fprintf(&buf, "\n\t%v", h)
- }
- }
- c.ops.SecurityError(buf.String())
- return ErrSecurity
-}
-
-// checkRecord checks that record #id's hash matches data.
-func (c *Client) checkRecord(id int64, data []byte) error {
- c.latestMu.Lock()
- latest := c.latest
- c.latestMu.Unlock()
-
- if id >= latest.N {
- return fmt.Errorf("cannot validate record %d in tree of size %d", id, latest.N)
- }
- hashes, err := tlog.TileHashReader(latest, &c.tileReader).ReadHashes([]int64{tlog.StoredHashIndex(0, id)})
- if err != nil {
- return err
- }
- if hashes[0] == tlog.RecordHash(data) {
- return nil
- }
- return fmt.Errorf("cannot authenticate record data in server response")
-}
-
-// tileReader is a *Client wrapper that implements tlog.TileReader.
-// The separate type avoids exposing the ReadTiles and SaveTiles
-// methods on Client itself.
-type tileReader struct {
- c *Client
-}
-
-func (r *tileReader) Height() int {
- return r.c.tileHeight
-}
-
-// ReadTiles reads and returns the requested tiles,
-// either from the on-disk cache or the server.
-func (r *tileReader) ReadTiles(tiles []tlog.Tile) ([][]byte, error) {
- // Read all the tiles in parallel.
- data := make([][]byte, len(tiles))
- errs := make([]error, len(tiles))
- var wg sync.WaitGroup
- for i, tile := range tiles {
- wg.Add(1)
- go func(i int, tile tlog.Tile) {
- defer wg.Done()
- data[i], errs[i] = r.c.readTile(tile)
- }(i, tile)
- }
- wg.Wait()
-
- for _, err := range errs {
- if err != nil {
- return nil, err
- }
- }
-
- return data, nil
-}
-
-// tileCacheKey returns the cache key for the tile.
-func (c *Client) tileCacheKey(tile tlog.Tile) string {
- return c.name + "/" + tile.Path()
-}
-
-// tileRemotePath returns the remote path for the tile.
-func (c *Client) tileRemotePath(tile tlog.Tile) string {
- return "/" + tile.Path()
-}
-
-// readTile reads a single tile, either from the on-disk cache or the server.
-func (c *Client) readTile(tile tlog.Tile) ([]byte, error) {
- type cached struct {
- data []byte
- err error
- }
-
- result := c.tileCache.Do(tile, func() interface{} {
- // Try the requested tile in on-disk cache.
- data, err := c.ops.ReadCache(c.tileCacheKey(tile))
- if err == nil {
- c.markTileSaved(tile)
- return cached{data, nil}
- }
-
- // Try the full tile in on-disk cache (if requested tile not already full).
- // We only save authenticated tiles to the on-disk cache,
- // so the recreated prefix is equally authenticated.
- full := tile
- full.W = 1 << uint(tile.H)
- if tile != full {
- data, err := c.ops.ReadCache(c.tileCacheKey(full))
- if err == nil {
- c.markTileSaved(tile) // don't save tile later; we already have full
- return cached{data[:len(data)/full.W*tile.W], nil}
- }
- }
-
- // Try requested tile from server.
- data, err = c.ops.ReadRemote(c.tileRemotePath(tile))
- if err == nil {
- return cached{data, nil}
- }
-
- // Try full tile on server.
- // If the partial tile does not exist, it should be because
- // the tile has been completed and only the complete one
- // is available.
- if tile != full {
- data, err := c.ops.ReadRemote(c.tileRemotePath(full))
- if err == nil {
- // Note: We could save the full tile in the on-disk cache here,
- // but we don't know if it is valid yet, and we will only find out
- // about the partial data, not the full data. So let SaveTiles
- // save the partial tile, and we'll just refetch the full tile later
- // once we can validate more (or all) of it.
- return cached{data[:len(data)/full.W*tile.W], nil}
- }
- }
-
- // Nothing worked.
- // Return the error from the server fetch for the requested (not full) tile.
- return cached{nil, err}
- }).(cached)
-
- return result.data, result.err
-}
-
-// markTileSaved records that tile is already present in the on-disk cache,
-// so that a future SaveTiles for that tile can be ignored.
-func (c *Client) markTileSaved(tile tlog.Tile) {
- c.tileSavedMu.Lock()
- c.tileSaved[tile] = true
- c.tileSavedMu.Unlock()
-}
-
-// SaveTiles saves the now validated tiles.
-func (r *tileReader) SaveTiles(tiles []tlog.Tile, data [][]byte) {
- c := r.c
-
- // Determine which tiles need saving.
- // (Tiles that came from the cache need not be saved back.)
- save := make([]bool, len(tiles))
- c.tileSavedMu.Lock()
- for i, tile := range tiles {
- if !c.tileSaved[tile] {
- save[i] = true
- c.tileSaved[tile] = true
- }
- }
- c.tileSavedMu.Unlock()
-
- for i, tile := range tiles {
- if save[i] {
- // If WriteCache fails here (out of disk space? i/o error?),
- // c.tileSaved[tile] is still true and we will not try to write it again.
- // Next time we run maybe we'll redownload it again and be
- // more successful.
- c.ops.WriteCache(c.name+"/"+tile.Path(), data[i])
- }
- }
-}