1 // Copyright 2019 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Package zip provides functions for creating and extracting module zip files.
7 // Module zip files have several restrictions listed below. These are necessary
8 // to ensure that module zip files can be extracted consistently on supported
9 // platforms and file systems.
11 // • All file paths within a zip file must start with "<module>@<version>/",
12 // where "<module>" is the module path and "<version>" is the version.
13 // The module path must be valid (see golang.org/x/mod/module.CheckPath).
14 // The version must be valid and canonical (see
15 // golang.org/x/mod/module.CanonicalVersion). The path must have a major
16 // version suffix consistent with the version (see
17 // golang.org/x/mod/module.Check). The part of the file path after the
18 // "<module>@<version>/" prefix must be valid (see
19 // golang.org/x/mod/module.CheckFilePath).
21 // • No two file paths may be equal under Unicode case-folding (see
22 // strings.EqualFold).
24 // • A go.mod file may or may not appear in the top-level directory. If present,
25 // it must be named "go.mod", not any other case. Files named "go.mod"
26 // are not allowed in any other directory.
28 // • The total size in bytes of a module zip file may be at most MaxZipFile
29 // bytes (500 MiB). The total uncompressed size of the files within the
30 // zip may also be at most MaxZipFile bytes.
32 // • Each file's uncompressed size must match its declared 64-bit uncompressed
33 // size in the zip file header.
35 // • If the zip contains files named "<module>@<version>/go.mod" or
36 // "<module>@<version>/LICENSE", their sizes in bytes may be at most
37 // MaxGoMod or MaxLICENSE, respectively (both are 16 MiB).
39 // • Empty directories are ignored. File permissions and timestamps are also
42 // • Symbolic links and other irregular files are not allowed.
44 // Note that this package does not provide hashing functionality. See
45 // golang.org/x/mod/sumdb/dirhash.
61 "golang.org/x/mod/module"
65 // MaxZipFile is the maximum size in bytes of a module zip file. The
66 // go command will report an error if either the zip file or its extracted
67 // content is larger than this.
68 MaxZipFile = 500 << 20
70 // MaxGoMod is the maximum size in bytes of a go.mod file within a
74 // MaxLICENSE is the maximum size in bytes of a LICENSE file within a
79 // File provides an abstraction for a file in a directory, zip, or anything
80 // else that looks like a file.
82 // Path returns a clean slash-separated relative path from the module root
83 // directory to the file.
86 // Lstat returns information about the file. If the file is a symbolic link,
87 // Lstat returns information about the link itself, not the file it points to.
88 Lstat() (os.FileInfo, error)
90 // Open provides access to the data within a regular file. Open may return
91 // an error if called on a directory or symbolic link.
92 Open() (io.ReadCloser, error)
95 // Create builds a zip archive for module m from an abstract list of files
96 // and writes it to w.
98 // Create verifies the restrictions described in the package documentation
99 // and should not produce an archive that Unzip cannot extract. Create does not
100 // include files in the output archive if they don't belong in the module zip.
101 // In particular, Create will not include files in modules found in
102 // subdirectories, most files in vendor directories, or irregular files (such
103 // as symbolic links) in the output archive.
104 func Create(w io.Writer, m module.Version, files []File) (err error) {
107 err = &zipError{verb: "create zip", err: err}
111 // Check that the version is canonical, the module path is well-formed, and
112 // the major version suffix matches the major version.
113 if vers := module.CanonicalVersion(m.Version); vers != m.Version {
114 return fmt.Errorf("version %q is not canonical (should be %q)", m.Version, vers)
116 if err := module.Check(m.Path, m.Version); err != nil {
120 // Find directories containing go.mod files (other than the root).
121 // These directories will not be included in the output zip.
122 haveGoMod := make(map[string]bool)
123 for _, f := range files {
124 dir, base := path.Split(f.Path())
125 if strings.EqualFold(base, "go.mod") {
126 info, err := f.Lstat()
130 if info.Mode().IsRegular() {
131 haveGoMod[dir] = true
136 inSubmodule := func(p string) bool {
138 dir, _ := path.Split(p)
149 // Create the module zip file.
150 zw := zip.NewWriter(w)
151 prefix := fmt.Sprintf("%s@%s/", m.Path, m.Version)
153 addFile := func(f File, path string, size int64) error {
159 w, err := zw.Create(prefix + path)
163 lr := &io.LimitedReader{R: rc, N: size + 1}
164 if _, err := io.Copy(w, lr); err != nil {
168 return fmt.Errorf("file %q is larger than declared size", path)
173 collisions := make(collisionChecker)
174 maxSize := int64(MaxZipFile)
175 for _, f := range files {
177 if p != path.Clean(p) {
178 return fmt.Errorf("file path %s is not clean", p)
181 return fmt.Errorf("file path %s is not relative", p)
183 if isVendoredPackage(p) || inSubmodule(p) {
186 if p == ".hg_archival.txt" {
187 // Inserted by hg archive.
188 // The go command drops this regardless of the VCS being used.
191 if err := module.CheckFilePath(p); err != nil {
194 if strings.ToLower(p) == "go.mod" && p != "go.mod" {
195 return fmt.Errorf("found file named %s, want all lower-case go.mod", p)
197 info, err := f.Lstat()
201 if err := collisions.check(p, info.IsDir()); err != nil {
204 if !info.Mode().IsRegular() {
205 // Skip symbolic links (golang.org/issue/27093).
209 if size < 0 || maxSize < size {
210 return fmt.Errorf("module source tree too large (max size is %d bytes)", MaxZipFile)
213 if p == "go.mod" && size > MaxGoMod {
214 return fmt.Errorf("go.mod file too large (max size is %d bytes)", MaxGoMod)
216 if p == "LICENSE" && size > MaxLICENSE {
217 return fmt.Errorf("LICENSE file too large (max size is %d bytes)", MaxLICENSE)
220 if err := addFile(f, p, size); err != nil {
228 // CreateFromDir creates a module zip file for module m from the contents of
229 // a directory, dir. The zip content is written to w.
231 // CreateFromDir verifies the restrictions described in the package
232 // documentation and should not produce an archive that Unzip cannot extract.
233 // CreateFromDir does not include files in the output archive if they don't
234 // belong in the module zip. In particular, CreateFromDir will not include
235 // files in modules found in subdirectories, most files in vendor directories,
236 // or irregular files (such as symbolic links) in the output archive.
237 // Additionally, unlike Create, CreateFromDir will not include directories
238 // named ".bzr", ".git", ".hg", or ".svn".
239 func CreateFromDir(w io.Writer, m module.Version, dir string) (err error) {
241 if zerr, ok := err.(*zipError); ok {
243 } else if err != nil {
244 err = &zipError{verb: "create zip", path: dir, err: err}
249 err = filepath.Walk(dir, func(filePath string, info os.FileInfo, err error) error {
253 relPath, err := filepath.Rel(dir, filePath)
257 slashPath := filepath.ToSlash(relPath)
261 // Don't skip the top-level directory.
265 // Skip VCS directories.
266 // fossil repos are regular files with arbitrary names, so we don't try
268 switch filepath.Base(filePath) {
269 case ".bzr", ".git", ".hg", ".svn":
270 return filepath.SkipDir
273 // Skip some subdirectories inside vendor, but maintain bug
274 // golang.org/issue/31562, described in isVendoredPackage.
275 // We would like Create and CreateFromDir to produce the same result
276 // for a set of files, whether expressed as a directory tree or zip.
277 if isVendoredPackage(slashPath) {
278 return filepath.SkipDir
281 // Skip submodules (directories containing go.mod files).
282 if goModInfo, err := os.Lstat(filepath.Join(filePath, "go.mod")); err == nil && !goModInfo.IsDir() {
283 return filepath.SkipDir
288 if info.Mode().IsRegular() {
289 if !isVendoredPackage(slashPath) {
290 files = append(files, dirFile{
292 slashPath: slashPath,
299 // Not a regular file or a directory. Probably a symbolic link.
300 // Irregular files are ignored, so skip it.
307 return Create(w, m, files)
310 type dirFile struct {
311 filePath, slashPath string
315 func (f dirFile) Path() string { return f.slashPath }
316 func (f dirFile) Lstat() (os.FileInfo, error) { return f.info, nil }
317 func (f dirFile) Open() (io.ReadCloser, error) { return os.Open(f.filePath) }
319 // isVendoredPackage attempts to report whether the given filename is contained
320 // in a package whose import path contains (but does not end with) the component
323 // Unfortunately, isVendoredPackage reports false positives for files in any
324 // non-top-level package whose import path ends in "vendor".
325 func isVendoredPackage(name string) bool {
327 if strings.HasPrefix(name, "vendor/") {
329 } else if j := strings.Index(name, "/vendor/"); j >= 0 {
330 // This offset looks incorrect; this should probably be
332 // i = j + len("/vendor/")
334 // (See https://golang.org/issue/31562 and https://golang.org/issue/37397.)
335 // Unfortunately, we can't fix it without invalidating module checksums.
340 return strings.Contains(name[i:], "/")
343 // Unzip extracts the contents of a module zip file to a directory.
345 // Unzip checks all restrictions listed in the package documentation and returns
346 // an error if the zip archive is not valid. In some cases, files may be written
347 // to dir before an error is returned (for example, if a file's uncompressed
348 // size does not match its declared size).
350 // dir may or may not exist: Unzip will create it and any missing parent
351 // directories if it doesn't exist. If dir exists, it must be empty.
352 func Unzip(dir string, m module.Version, zipFile string) (err error) {
355 err = &zipError{verb: "unzip", path: zipFile, err: err}
359 if vers := module.CanonicalVersion(m.Version); vers != m.Version {
360 return fmt.Errorf("version %q is not canonical (should be %q)", m.Version, vers)
362 if err := module.Check(m.Path, m.Version); err != nil {
366 // Check that the directory is empty. Don't create it yet in case there's
367 // an error reading the zip.
368 files, _ := ioutil.ReadDir(dir)
370 return fmt.Errorf("target directory %v exists and is not empty", dir)
373 // Open the zip file and ensure it's under the size limit.
374 f, err := os.Open(zipFile)
379 info, err := f.Stat()
383 zipSize := info.Size()
384 if zipSize > MaxZipFile {
385 return fmt.Errorf("module zip file is too large (%d bytes; limit is %d bytes)", zipSize, MaxZipFile)
388 z, err := zip.NewReader(f, zipSize)
393 // Check total size, valid file names.
394 collisions := make(collisionChecker)
395 prefix := fmt.Sprintf("%s@%s/", m.Path, m.Version)
397 for _, zf := range z.File {
398 if !strings.HasPrefix(zf.Name, prefix) {
399 return fmt.Errorf("unexpected file name %s", zf.Name)
401 name := zf.Name[len(prefix):]
405 isDir := strings.HasSuffix(name, "/")
407 name = name[:len(name)-1]
409 if path.Clean(name) != name {
410 return fmt.Errorf("invalid file name %s", zf.Name)
412 if err := module.CheckFilePath(name); err != nil {
415 if err := collisions.check(name, isDir); err != nil {
421 if base := path.Base(name); strings.EqualFold(base, "go.mod") {
423 return fmt.Errorf("found go.mod file not in module root directory (%s)", zf.Name)
424 } else if name != "go.mod" {
425 return fmt.Errorf("found file named %s, want all lower-case go.mod", zf.Name)
428 s := int64(zf.UncompressedSize64)
429 if s < 0 || MaxZipFile-size < s {
430 return fmt.Errorf("total uncompressed size of module contents too large (max size is %d bytes)", MaxZipFile)
433 if name == "go.mod" && s > MaxGoMod {
434 return fmt.Errorf("go.mod file too large (max size is %d bytes)", MaxGoMod)
436 if name == "LICENSE" && s > MaxLICENSE {
437 return fmt.Errorf("LICENSE file too large (max size is %d bytes)", MaxLICENSE)
441 // Unzip, enforcing sizes checked earlier.
442 if err := os.MkdirAll(dir, 0777); err != nil {
445 for _, zf := range z.File {
446 name := zf.Name[len(prefix):]
447 if name == "" || strings.HasSuffix(name, "/") {
450 dst := filepath.Join(dir, name)
451 if err := os.MkdirAll(filepath.Dir(dst), 0777); err != nil {
454 w, err := os.OpenFile(dst, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0444)
463 lr := &io.LimitedReader{R: r, N: int64(zf.UncompressedSize64) + 1}
464 _, err = io.Copy(w, lr)
470 if err := w.Close(); err != nil {
474 return fmt.Errorf("uncompressed size of file %s is larger than declared size (%d bytes)", zf.Name, zf.UncompressedSize64)
481 // collisionChecker finds case-insensitive name collisions and paths that
482 // are listed as both files and directories.
484 // The keys of this map are processed with strToFold. pathInfo has the original
485 // path for each folded path.
486 type collisionChecker map[string]pathInfo
488 type pathInfo struct {
493 func (cc collisionChecker) check(p string, isDir bool) error {
495 if other, ok := cc[fold]; ok {
497 return fmt.Errorf("case-insensitive file name collision: %q and %q", other.path, p)
499 if isDir != other.isDir {
500 return fmt.Errorf("entry %q is both a file and a directory", p)
503 return fmt.Errorf("multiple entries for file %q", p)
505 // It's not an error if check is called with the same directory multiple
506 // times. check is called recursively on parent directories, so check
507 // may be called on the same directory many times.
509 cc[fold] = pathInfo{path: p, isDir: isDir}
512 if parent := path.Dir(p); parent != "." {
513 return cc.check(parent, true)
518 type zipError struct {
523 func (e *zipError) Error() string {
525 return fmt.Sprintf("%s: %v", e.verb, e.err)
527 return fmt.Sprintf("%s %s: %v", e.verb, e.path, e.err)
531 func (e *zipError) Unwrap() error {
535 // strToFold returns a string with the property that
536 // strings.EqualFold(s, t) iff strToFold(s) == strToFold(t)
537 // This lets us test a large set of strings for fold-equivalent
538 // duplicates without making a quadratic number of calls
539 // to EqualFold. Note that strings.ToUpper and strings.ToLower
540 // do not have the desired property in some corner cases.
541 func strToFold(s string) string {
542 // Fast path: all ASCII, no upper case.
543 // Most paths look like this already.
544 for i := 0; i < len(s); i++ {
546 if c >= utf8.RuneSelf || 'A' <= c && c <= 'Z' {
554 for _, r := range s {
555 // SimpleFold(x) cycles to the next equivalent rune > x
556 // or wraps around to smaller values. Iterate until it wraps,
557 // and we've found the minimum value.
560 r = unicode.SimpleFold(r0)
565 // Exception to allow fast path above: A-Z => a-z
566 if 'A' <= r && r <= 'Z' {