1 // Copyright 2019 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
16 "golang.org/x/mod/module"
17 "golang.org/x/mod/sumdb/note"
18 "golang.org/x/mod/sumdb/tlog"
21 // A ClientOps provides the external operations
22 // (file caching, HTTP fetches, and so on) needed by the Client.
23 // The methods must be safe for concurrent use by multiple goroutines.
24 type ClientOps interface {
25 // ReadRemote reads and returns the content served at the given path
26 // on the remote database server. The path begins with "/lookup" or "/tile/",
27 // and there is no need to parse the path in any way.
28 // It is the implementation's responsibility to turn that path into a full URL
29 // and make the HTTP request. ReadRemote should return an error for
30 // any non-200 HTTP response status.
31 ReadRemote(path string) ([]byte, error)
33 // ReadConfig reads and returns the content of the named configuration file.
34 // There are only a fixed set of configuration files.
36 // "key" returns a file containing the verifier key for the server.
38 // serverName + "/latest" returns a file containing the latest known
39 // signed tree from the server.
40 // To signal that the client wishes to start with an "empty" signed tree,
41 // ReadConfig can return a successful empty result (0 bytes of data).
42 ReadConfig(file string) ([]byte, error)
44 // WriteConfig updates the content of the named configuration file,
45 // changing it from the old []byte to the new []byte.
46 // If the old []byte does not match the stored configuration,
47 // WriteConfig must return ErrWriteConflict.
48 // Otherwise, WriteConfig should atomically replace old with new.
49 // The "key" configuration file is never written using WriteConfig.
50 WriteConfig(file string, old, new []byte) error
52 // ReadCache reads and returns the content of the named cache file.
53 // Any returned error will be treated as equivalent to the file not existing.
54 // There can be arbitrarily many cache files, such as:
55 // serverName/lookup/pkg@version
56 // serverName/tile/8/1/x123/456
57 ReadCache(file string) ([]byte, error)
59 // WriteCache writes the named cache file.
60 WriteCache(file string, data []byte)
62 // Log prints the given log message (such as with log.Print)
65 // SecurityError prints the given security error log message.
66 // The Client returns ErrSecurity from any operation that invokes SecurityError,
67 // but the return value is mainly for testing. In a real program,
68 // SecurityError should typically print the message and call log.Fatal or os.Exit.
69 SecurityError(msg string)
72 // ErrWriteConflict signals a write conflict during Client.WriteConfig.
73 var ErrWriteConflict = errors.New("write conflict")
75 // ErrSecurity is returned by Client operations that invoke Client.SecurityError.
76 var ErrSecurity = errors.New("security error: misbehaving server")
78 // A Client is a client connection to a checksum database.
79 // All the methods are safe for simultaneous use by multiple goroutines.
81 ops ClientOps // access to operations in the external world
85 // one-time initialized data
87 initErr error // init error, if any
88 name string // name of accepted verifier
89 verifiers note.Verifiers // accepted verifiers (just one, but Verifiers for note.Open)
94 record parCache // cache of record lookup, keyed by path@vers
95 tileCache parCache // cache of c.readTile, keyed by tile
98 latest tlog.Tree // latest known tree head
99 latestMsg []byte // encoded signed note for latest
101 tileSavedMu sync.Mutex
102 tileSaved map[tlog.Tile]bool // which tiles have been saved using c.ops.WriteCache already
105 // NewClient returns a new Client using the given Client.
106 func NewClient(ops ClientOps) *Client {
112 // init initiailzes the client (if not already initialized)
113 // and returns any initialization error.
114 func (c *Client) init() error {
115 c.initOnce.Do(c.initWork)
119 // initWork does the actual initialization work.
120 func (c *Client) initWork() {
122 if c.initErr != nil {
123 c.initErr = fmt.Errorf("initializing sumdb.Client: %v", c.initErr)
128 if c.tileHeight == 0 {
131 c.tileSaved = make(map[tlog.Tile]bool)
133 vkey, err := c.ops.ReadConfig("key")
138 verifier, err := note.NewVerifier(strings.TrimSpace(string(vkey)))
143 c.verifiers = note.VerifierList(verifier)
144 c.name = verifier.Name()
146 data, err := c.ops.ReadConfig(c.name + "/latest")
151 if err := c.mergeLatest(data); err != nil {
157 // SetTileHeight sets the tile height for the Client.
158 // Any call to SetTileHeight must happen before the first call to Lookup.
159 // If SetTileHeight is not called, the Client defaults to tile height 8.
160 // SetTileHeight can be called at most once,
161 // and if so it must be called before the first call to Lookup.
162 func (c *Client) SetTileHeight(height int) {
163 if atomic.LoadUint32(&c.didLookup) != 0 {
164 panic("SetTileHeight used after Lookup")
167 panic("invalid call to SetTileHeight")
169 if c.tileHeight != 0 {
170 panic("multiple calls to SetTileHeight")
172 c.tileHeight = height
175 // SetGONOSUMDB sets the list of comma-separated GONOSUMDB patterns for the Client.
176 // For any module path matching one of the patterns,
177 // Lookup will return ErrGONOSUMDB.
178 // SetGONOSUMDB can be called at most once,
179 // and if so it must be called before the first call to Lookup.
180 func (c *Client) SetGONOSUMDB(list string) {
181 if atomic.LoadUint32(&c.didLookup) != 0 {
182 panic("SetGONOSUMDB used after Lookup")
185 panic("multiple calls to SetGONOSUMDB")
190 // ErrGONOSUMDB is returned by Lookup for paths that match
191 // a pattern listed in the GONOSUMDB list (set by SetGONOSUMDB,
192 // usually from the environment variable).
193 var ErrGONOSUMDB = errors.New("skipped (listed in GONOSUMDB)")
195 func (c *Client) skip(target string) bool {
196 return globsMatchPath(c.nosumdb, target)
199 // globsMatchPath reports whether any path prefix of target
200 // matches one of the glob patterns (as defined by path.Match)
201 // in the comma-separated globs list.
202 // It ignores any empty or malformed patterns in the list.
203 func globsMatchPath(globs, target string) bool {
205 // Extract next non-empty glob in comma-separated list.
207 if i := strings.Index(globs, ","); i >= 0 {
208 glob, globs = globs[:i], globs[i+1:]
210 glob, globs = globs, ""
216 // A glob with N+1 path elements (N slashes) needs to be matched
217 // against the first N+1 path elements of target,
218 // which end just before the N+1'th slash.
219 n := strings.Count(glob, "/")
221 // Walk target, counting slashes, truncating at the N+1'th slash.
222 for i := 0; i < len(target); i++ {
223 if target[i] == '/' {
232 // Not enough prefix elements.
235 matched, _ := path.Match(glob, prefix)
243 // Lookup returns the go.sum lines for the given module path and version.
244 // The version may end in a /go.mod suffix, in which case Lookup returns
245 // the go.sum lines for the module's go.mod-only hash.
246 func (c *Client) Lookup(path, vers string) (lines []string, err error) {
247 atomic.StoreUint32(&c.didLookup, 1)
250 return nil, ErrGONOSUMDB
255 err = fmt.Errorf("%s@%s: %v", path, vers, err)
259 if err := c.init(); err != nil {
263 // Prepare encoded cache filename / URL.
264 epath, err := module.EscapePath(path)
268 evers, err := module.EscapeVersion(strings.TrimSuffix(vers, "/go.mod"))
272 remotePath := "/lookup/" + epath + "@" + evers
273 file := c.name + remotePath
276 // The lookupCache avoids redundant ReadCache/GetURL operations
277 // (especially since go.sum lines tend to come in pairs for a given
278 // path and version) and also avoids having multiple of the same
279 // request in flight at once.
284 result := c.record.Do(file, func() interface{} {
285 // Try the on-disk cache, or else get from web.
287 data, err := c.ops.ReadCache(file)
289 data, err = c.ops.ReadRemote(remotePath)
291 return cached{nil, err}
296 // Validate the record before using it for anything.
297 id, text, treeMsg, err := tlog.ParseRecord(data)
299 return cached{nil, err}
301 if err := c.mergeLatest(treeMsg); err != nil {
302 return cached{nil, err}
304 if err := c.checkRecord(id, text); err != nil {
305 return cached{nil, err}
308 // Now that we've validated the record,
309 // save it to the on-disk cache (unless that's where it came from).
311 c.ops.WriteCache(file, data)
314 return cached{data, nil}
316 if result.err != nil {
317 return nil, result.err
320 // Extract the lines for the specific version we want
321 // (with or without /go.mod).
322 prefix := path + " " + vers + " "
324 for _, line := range strings.Split(string(result.data), "\n") {
325 if strings.HasPrefix(line, prefix) {
326 hashes = append(hashes, line)
332 // mergeLatest merges the tree head in msg
333 // with the Client's current latest tree head,
334 // ensuring the result is a consistent timeline.
335 // If the result is inconsistent, mergeLatest calls c.ops.SecurityError
336 // with a detailed security error message and then
337 // (only if c.ops.SecurityError does not exit the program) returns ErrSecurity.
338 // If the Client's current latest tree head moves forward,
339 // mergeLatest updates the underlying configuration file as well,
340 // taking care to merge any independent updates to that configuration.
341 func (c *Client) mergeLatest(msg []byte) error {
342 // Merge msg into our in-memory copy of the latest tree head.
343 when, err := c.mergeLatestMem(msg)
347 if when != msgFuture {
348 // msg matched our present or was in the past.
349 // No change to our present, so no update of config file.
353 // Flush our extended timeline back out to the configuration file.
354 // If the configuration file has been updated in the interim,
355 // we need to merge any updates made there as well.
356 // Note that writeConfig is an atomic compare-and-swap.
358 msg, err := c.ops.ReadConfig(c.name + "/latest")
362 when, err := c.mergeLatestMem(msg)
367 // msg matched our present or was from the future,
368 // and now our in-memory copy matches.
372 // msg (== config) is in the past, so we need to update it.
374 latestMsg := c.latestMsg
376 if err := c.ops.WriteConfig(c.name+"/latest", msg, latestMsg); err != ErrWriteConflict {
377 // Success or a non-write-conflict error.
389 // mergeLatestMem is like mergeLatest but is only concerned with
390 // updating the in-memory copy of the latest tree head (c.latest)
391 // not the configuration file.
392 // The when result explains when msg happened relative to our
393 // previous idea of c.latest:
394 // msgPast means msg was from before c.latest,
395 // msgNow means msg was exactly c.latest, and
396 // msgFuture means msg was from after c.latest, which has now been updated.
397 func (c *Client) mergeLatestMem(msg []byte) (when int, err error) {
399 // Accept empty msg as the unsigned, empty timeline.
409 note, err := note.Open(msg, c.verifiers)
411 return 0, fmt.Errorf("reading tree note: %v\nnote:\n%s", err, msg)
413 tree, err := tlog.ParseTree([]byte(note.Text))
415 return 0, fmt.Errorf("reading tree: %v\ntree:\n%s", err, note.Text)
418 // Other lookups may be calling mergeLatest with other heads,
419 // so c.latest is changing underfoot. We don't want to hold the
420 // c.mu lock during tile fetches, so loop trying to update c.latest.
423 latestMsg := c.latestMsg
427 // If the tree head looks old, check that it is on our timeline.
428 if tree.N <= latest.N {
429 if err := c.checkTrees(tree, msg, latest, latestMsg); err != nil {
432 if tree.N < latest.N {
438 // The tree head looks new. Check that we are on its timeline and try to move our timeline forward.
439 if err := c.checkTrees(latest, latestMsg, tree, msg); err != nil {
443 // Install our msg if possible.
444 // Otherwise we will go around again.
447 if c.latest == latest {
453 latestMsg = c.latestMsg
458 return msgFuture, nil
463 // checkTrees checks that older (from olderNote) is contained in newer (from newerNote).
464 // If an error occurs, such as malformed data or a network problem, checkTrees returns that error.
465 // If on the other hand checkTrees finds evidence of misbehavior, it prepares a detailed
466 // message and calls log.Fatal.
467 func (c *Client) checkTrees(older tlog.Tree, olderNote []byte, newer tlog.Tree, newerNote []byte) error {
468 thr := tlog.TileHashReader(newer, &c.tileReader)
469 h, err := tlog.TreeHash(older.N, thr)
471 if older.N == newer.N {
472 return fmt.Errorf("checking tree#%d: %v", older.N, err)
474 return fmt.Errorf("checking tree#%d against tree#%d: %v", older.N, newer.N, err)
480 // Detected a fork in the tree timeline.
481 // Start by reporting the inconsistent signed tree notes.
483 fmt.Fprintf(&buf, "SECURITY ERROR\n")
484 fmt.Fprintf(&buf, "go.sum database server misbehavior detected!\n\n")
485 indent := func(b []byte) []byte {
486 return bytes.Replace(b, []byte("\n"), []byte("\n\t"), -1)
488 fmt.Fprintf(&buf, "old database:\n\t%s\n", indent(olderNote))
489 fmt.Fprintf(&buf, "new database:\n\t%s\n", indent(newerNote))
491 // The notes alone are not enough to prove the inconsistency.
492 // We also need to show that the newer note's tree hash for older.N
493 // does not match older.Hash. The consumer of this report could
494 // of course consult the server to try to verify the inconsistency,
495 // but we are holding all the bits we need to prove it right now,
496 // so we might as well print them and make the report not depend
497 // on the continued availability of the misbehaving server.
498 // Preparing this data only reuses the tiled hashes needed for
499 // tlog.TreeHash(older.N, thr) above, so assuming thr is caching tiles,
500 // there are no new access to the server here, and these operations cannot fail.
501 fmt.Fprintf(&buf, "proof of misbehavior:\n\t%v", h)
502 if p, err := tlog.ProveTree(newer.N, older.N, thr); err != nil {
503 fmt.Fprintf(&buf, "\tinternal error: %v\n", err)
504 } else if err := tlog.CheckTree(p, newer.N, newer.Hash, older.N, h); err != nil {
505 fmt.Fprintf(&buf, "\tinternal error: generated inconsistent proof\n")
507 for _, h := range p {
508 fmt.Fprintf(&buf, "\n\t%v", h)
511 c.ops.SecurityError(buf.String())
515 // checkRecord checks that record #id's hash matches data.
516 func (c *Client) checkRecord(id int64, data []byte) error {
522 return fmt.Errorf("cannot validate record %d in tree of size %d", id, latest.N)
524 hashes, err := tlog.TileHashReader(latest, &c.tileReader).ReadHashes([]int64{tlog.StoredHashIndex(0, id)})
528 if hashes[0] == tlog.RecordHash(data) {
531 return fmt.Errorf("cannot authenticate record data in server response")
534 // tileReader is a *Client wrapper that implements tlog.TileReader.
535 // The separate type avoids exposing the ReadTiles and SaveTiles
536 // methods on Client itself.
537 type tileReader struct {
541 func (r *tileReader) Height() int {
542 return r.c.tileHeight
545 // ReadTiles reads and returns the requested tiles,
546 // either from the on-disk cache or the server.
547 func (r *tileReader) ReadTiles(tiles []tlog.Tile) ([][]byte, error) {
548 // Read all the tiles in parallel.
549 data := make([][]byte, len(tiles))
550 errs := make([]error, len(tiles))
551 var wg sync.WaitGroup
552 for i, tile := range tiles {
554 go func(i int, tile tlog.Tile) {
556 data[i], errs[i] = r.c.readTile(tile)
561 for _, err := range errs {
570 // tileCacheKey returns the cache key for the tile.
571 func (c *Client) tileCacheKey(tile tlog.Tile) string {
572 return c.name + "/" + tile.Path()
575 // tileRemotePath returns the remote path for the tile.
576 func (c *Client) tileRemotePath(tile tlog.Tile) string {
577 return "/" + tile.Path()
580 // readTile reads a single tile, either from the on-disk cache or the server.
581 func (c *Client) readTile(tile tlog.Tile) ([]byte, error) {
587 result := c.tileCache.Do(tile, func() interface{} {
588 // Try the requested tile in on-disk cache.
589 data, err := c.ops.ReadCache(c.tileCacheKey(tile))
591 c.markTileSaved(tile)
592 return cached{data, nil}
595 // Try the full tile in on-disk cache (if requested tile not already full).
596 // We only save authenticated tiles to the on-disk cache,
597 // so the recreated prefix is equally authenticated.
599 full.W = 1 << uint(tile.H)
601 data, err := c.ops.ReadCache(c.tileCacheKey(full))
603 c.markTileSaved(tile) // don't save tile later; we already have full
604 return cached{data[:len(data)/full.W*tile.W], nil}
608 // Try requested tile from server.
609 data, err = c.ops.ReadRemote(c.tileRemotePath(tile))
611 return cached{data, nil}
614 // Try full tile on server.
615 // If the partial tile does not exist, it should be because
616 // the tile has been completed and only the complete one
619 data, err := c.ops.ReadRemote(c.tileRemotePath(full))
621 // Note: We could save the full tile in the on-disk cache here,
622 // but we don't know if it is valid yet, and we will only find out
623 // about the partial data, not the full data. So let SaveTiles
624 // save the partial tile, and we'll just refetch the full tile later
625 // once we can validate more (or all) of it.
626 return cached{data[:len(data)/full.W*tile.W], nil}
631 // Return the error from the server fetch for the requested (not full) tile.
632 return cached{nil, err}
635 return result.data, result.err
638 // markTileSaved records that tile is already present in the on-disk cache,
639 // so that a future SaveTiles for that tile can be ignored.
640 func (c *Client) markTileSaved(tile tlog.Tile) {
642 c.tileSaved[tile] = true
643 c.tileSavedMu.Unlock()
646 // SaveTiles saves the now validated tiles.
647 func (r *tileReader) SaveTiles(tiles []tlog.Tile, data [][]byte) {
650 // Determine which tiles need saving.
651 // (Tiles that came from the cache need not be saved back.)
652 save := make([]bool, len(tiles))
654 for i, tile := range tiles {
655 if !c.tileSaved[tile] {
657 c.tileSaved[tile] = true
660 c.tileSavedMu.Unlock()
662 for i, tile := range tiles {
664 // If WriteCache fails here (out of disk space? i/o error?),
665 // c.tileSaved[tile] is still true and we will not try to write it again.
666 // Next time we run maybe we'll redownload it again and be
668 c.ops.WriteCache(c.name+"/"+tile.Path(), data[i])