1 // Copyright 2019 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Package zip provides functions for creating and extracting module zip files.
7 // Module zip files have several restrictions listed below. These are necessary
8 // to ensure that module zip files can be extracted consistently on supported
9 // platforms and file systems.
11 // • All file paths within a zip file must start with "<module>@<version>/",
12 // where "<module>" is the module path and "<version>" is the version.
13 // The module path must be valid (see golang.org/x/mod/module.CheckPath).
14 // The version must be valid and canonical (see
15 // golang.org/x/mod/module.CanonicalVersion). The path must have a major
16 // version suffix consistent with the version (see
17 // golang.org/x/mod/module.Check). The part of the file path after the
18 // "<module>@<version>/" prefix must be valid (see
19 // golang.org/x/mod/module.CheckFilePath).
21 // • No two file paths may be equal under Unicode case-folding (see
22 // strings.EqualFold).
24 // • A go.mod file may or may not appear in the top-level directory. If present,
25 // it must be named "go.mod", not any other case. Files named "go.mod"
26 // are not allowed in any other directory.
28 // • The total size in bytes of a module zip file may be at most MaxZipFile
29 // bytes (500 MiB). The total uncompressed size of the files within the
30 // zip may also be at most MaxZipFile bytes.
32 // • Each file's uncompressed size must match its declared 64-bit uncompressed
33 // size in the zip file header.
35 // • If the zip contains files named "<module>@<version>/go.mod" or
36 // "<module>@<version>/LICENSE", their sizes in bytes may be at most
37 // MaxGoMod or MaxLICENSE, respectively (both are 16 MiB).
39 // • Empty directories are ignored. File permissions and timestamps are also
42 // • Symbolic links and other irregular files are not allowed.
44 // Note that this package does not provide hashing functionality. See
45 // golang.org/x/mod/sumdb/dirhash.
62 "golang.org/x/mod/module"
66 // MaxZipFile is the maximum size in bytes of a module zip file. The
67 // go command will report an error if either the zip file or its extracted
68 // content is larger than this.
69 MaxZipFile = 500 << 20
71 // MaxGoMod is the maximum size in bytes of a go.mod file within a
75 // MaxLICENSE is the maximum size in bytes of a LICENSE file within a
80 // File provides an abstraction for a file in a directory, zip, or anything
81 // else that looks like a file.
83 // Path returns a clean slash-separated relative path from the module root
84 // directory to the file.
87 // Lstat returns information about the file. If the file is a symbolic link,
88 // Lstat returns information about the link itself, not the file it points to.
89 Lstat() (os.FileInfo, error)
91 // Open provides access to the data within a regular file. Open may return
92 // an error if called on a directory or symbolic link.
93 Open() (io.ReadCloser, error)
96 // CheckedFiles reports whether a set of files satisfy the name and size
97 // constraints required by module zip files. The constraints are listed in the
98 // package documentation.
100 // Functions that produce this report may include slightly different sets of
101 // files. See documentation for CheckFiles, CheckDir, and CheckZip for details.
102 type CheckedFiles struct {
103 // Valid is a list of file paths that should be included in a zip file.
106 // Omitted is a list of files that are ignored when creating a module zip
107 // file, along with the reason each file is ignored.
110 // Invalid is a list of files that should not be included in a module zip
111 // file, along with the reason each file is invalid.
114 // SizeError is non-nil if the total uncompressed size of the valid files
115 // exceeds the module zip size limit or if the zip file itself exceeds the
120 // Err returns an error if CheckedFiles does not describe a valid module zip
121 // file. SizeError is returned if that field is set. A FileErrorList is returned
122 // if there are one or more invalid files. Other errors may be returned in the
124 func (cf CheckedFiles) Err() error {
125 if cf.SizeError != nil {
128 if len(cf.Invalid) > 0 {
129 return FileErrorList(cf.Invalid)
134 type FileErrorList []FileError
136 func (el FileErrorList) Error() string {
137 buf := &strings.Builder{}
139 for _, e := range el {
141 buf.WriteString(e.Error())
147 type FileError struct {
152 func (e FileError) Error() string {
153 return fmt.Sprintf("%s: %s", e.Path, e.Err)
156 func (e FileError) Unwrap() error {
161 // Predefined error messages for invalid files. Not exhaustive.
162 errPathNotClean = errors.New("file path is not clean")
163 errPathNotRelative = errors.New("file path is not relative")
164 errGoModCase = errors.New("go.mod files must have lowercase names")
165 errGoModSize = fmt.Errorf("go.mod file too large (max size is %d bytes)", MaxGoMod)
166 errLICENSESize = fmt.Errorf("LICENSE file too large (max size is %d bytes)", MaxLICENSE)
168 // Predefined error messages for omitted files. Not exhaustive.
169 errVCS = errors.New("directory is a version control repository")
170 errVendored = errors.New("file is in vendor directory")
171 errSubmoduleFile = errors.New("file is in another module")
172 errSubmoduleDir = errors.New("directory is in another module")
173 errHgArchivalTxt = errors.New("file is inserted by 'hg archive' and is always omitted")
174 errSymlink = errors.New("file is a symbolic link")
175 errNotRegular = errors.New("not a regular file")
178 // CheckFiles reports whether a list of files satisfy the name and size
179 // constraints listed in the package documentation. The returned CheckedFiles
180 // record contains lists of valid, invalid, and omitted files. Every file in
181 // the given list will be included in exactly one of those lists.
183 // CheckFiles returns an error if the returned CheckedFiles does not describe
184 // a valid module zip file (according to CheckedFiles.Err). The returned
185 // CheckedFiles is still populated when an error is returned.
187 // Note that CheckFiles will not open any files, so Create may still fail when
188 // CheckFiles is successful due to I/O errors and reported size differences.
189 func CheckFiles(files []File) (CheckedFiles, error) {
190 cf, _, _ := checkFiles(files)
194 // checkFiles implements CheckFiles and also returns lists of valid files and
195 // their sizes, corresponding to cf.Valid. These lists are used in Crewate to
196 // avoid repeated calls to File.Lstat.
197 func checkFiles(files []File) (cf CheckedFiles, validFiles []File, validSizes []int64) {
198 errPaths := make(map[string]struct{})
199 addError := func(path string, omitted bool, err error) {
200 if _, ok := errPaths[path]; ok {
203 errPaths[path] = struct{}{}
204 fe := FileError{Path: path, Err: err}
206 cf.Omitted = append(cf.Omitted, fe)
208 cf.Invalid = append(cf.Invalid, fe)
212 // Find directories containing go.mod files (other than the root).
213 // Files in these directories will be omitted.
214 // These directories will not be included in the output zip.
215 haveGoMod := make(map[string]bool)
216 for _, f := range files {
218 dir, base := path.Split(p)
219 if strings.EqualFold(base, "go.mod") {
220 info, err := f.Lstat()
222 addError(p, false, err)
225 if info.Mode().IsRegular() {
226 haveGoMod[dir] = true
231 inSubmodule := func(p string) bool {
233 dir, _ := path.Split(p)
244 collisions := make(collisionChecker)
245 maxSize := int64(MaxZipFile)
246 for _, f := range files {
248 if p != path.Clean(p) {
249 addError(p, false, errPathNotClean)
253 addError(p, false, errPathNotRelative)
256 if isVendoredPackage(p) {
257 addError(p, true, errVendored)
261 addError(p, true, errSubmoduleFile)
264 if p == ".hg_archival.txt" {
265 // Inserted by hg archive.
266 // The go command drops this regardless of the VCS being used.
267 addError(p, true, errHgArchivalTxt)
270 if err := module.CheckFilePath(p); err != nil {
271 addError(p, false, err)
274 if strings.ToLower(p) == "go.mod" && p != "go.mod" {
275 addError(p, false, errGoModCase)
278 info, err := f.Lstat()
280 addError(p, false, err)
283 if err := collisions.check(p, info.IsDir()); err != nil {
284 addError(p, false, err)
287 if info.Mode()&os.ModeType == os.ModeSymlink {
288 // Skip symbolic links (golang.org/issue/27093).
289 addError(p, true, errSymlink)
292 if !info.Mode().IsRegular() {
293 addError(p, true, errNotRegular)
297 if size >= 0 && size <= maxSize {
299 } else if cf.SizeError == nil {
300 cf.SizeError = fmt.Errorf("module source tree too large (max size is %d bytes)", MaxZipFile)
302 if p == "go.mod" && size > MaxGoMod {
303 addError(p, false, errGoModSize)
306 if p == "LICENSE" && size > MaxLICENSE {
307 addError(p, false, errLICENSESize)
311 cf.Valid = append(cf.Valid, p)
312 validFiles = append(validFiles, f)
313 validSizes = append(validSizes, info.Size())
316 return cf, validFiles, validSizes
319 // CheckDir reports whether the files in dir satisfy the name and size
320 // constraints listed in the package documentation. The returned CheckedFiles
321 // record contains lists of valid, invalid, and omitted files. If a directory is
322 // omitted (for example, a nested module or vendor directory), it will appear in
323 // the omitted list, but its files won't be listed.
325 // CheckDir returns an error if it encounters an I/O error or if the returned
326 // CheckedFiles does not describe a valid module zip file (according to
327 // CheckedFiles.Err). The returned CheckedFiles is still populated when such
328 // an error is returned.
330 // Note that CheckDir will not open any files, so CreateFromDir may still fail
331 // when CheckDir is successful due to I/O errors.
332 func CheckDir(dir string) (CheckedFiles, error) {
333 // List files (as CreateFromDir would) and check which ones are omitted
335 files, omitted, err := listFilesInDir(dir)
337 return CheckedFiles{}, err
339 cf, cfErr := CheckFiles(files)
340 _ = cfErr // ignore this error; we'll generate our own after rewriting paths.
342 // Replace all paths with file system paths.
343 // Paths returned by CheckFiles will be slash-separated paths relative to dir.
344 // That's probably not appropriate for error messages.
345 for i := range cf.Valid {
346 cf.Valid[i] = filepath.Join(dir, cf.Valid[i])
348 cf.Omitted = append(cf.Omitted, omitted...)
349 for i := range cf.Omitted {
350 cf.Omitted[i].Path = filepath.Join(dir, cf.Omitted[i].Path)
352 for i := range cf.Invalid {
353 cf.Invalid[i].Path = filepath.Join(dir, cf.Invalid[i].Path)
358 // CheckZip reports whether the files contained in a zip file satisfy the name
359 // and size constraints listed in the package documentation.
361 // CheckZip returns an error if the returned CheckedFiles does not describe
362 // a valid module zip file (according to CheckedFiles.Err). The returned
363 // CheckedFiles is still populated when an error is returned. CheckZip will
364 // also return an error if the module path or version is malformed or if it
365 // encounters an error reading the zip file.
367 // Note that CheckZip does not read individual files, so Unzip may still fail
368 // when CheckZip is successful due to I/O errors.
369 func CheckZip(m module.Version, zipFile string) (CheckedFiles, error) {
370 f, err := os.Open(zipFile)
372 return CheckedFiles{}, err
375 _, cf, err := checkZip(m, f)
379 // checkZip implements checkZip and also returns the *zip.Reader. This is
380 // used in Unzip to avoid redundant I/O.
381 func checkZip(m module.Version, f *os.File) (*zip.Reader, CheckedFiles, error) {
382 // Make sure the module path and version are valid.
383 if vers := module.CanonicalVersion(m.Version); vers != m.Version {
384 return nil, CheckedFiles{}, fmt.Errorf("version %q is not canonical (should be %q)", m.Version, vers)
386 if err := module.Check(m.Path, m.Version); err != nil {
387 return nil, CheckedFiles{}, err
390 // Check the total file size.
391 info, err := f.Stat()
393 return nil, CheckedFiles{}, err
395 zipSize := info.Size()
396 if zipSize > MaxZipFile {
397 cf := CheckedFiles{SizeError: fmt.Errorf("module zip file is too large (%d bytes; limit is %d bytes)", zipSize, MaxZipFile)}
398 return nil, cf, cf.Err()
401 // Check for valid file names, collisions.
403 addError := func(zf *zip.File, err error) {
404 cf.Invalid = append(cf.Invalid, FileError{Path: zf.Name, Err: err})
406 z, err := zip.NewReader(f, zipSize)
408 return nil, CheckedFiles{}, err
410 prefix := fmt.Sprintf("%s@%s/", m.Path, m.Version)
411 collisions := make(collisionChecker)
413 for _, zf := range z.File {
414 if !strings.HasPrefix(zf.Name, prefix) {
415 addError(zf, fmt.Errorf("path does not have prefix %q", prefix))
418 name := zf.Name[len(prefix):]
422 isDir := strings.HasSuffix(name, "/")
424 name = name[:len(name)-1]
426 if path.Clean(name) != name {
427 addError(zf, errPathNotClean)
430 if err := module.CheckFilePath(name); err != nil {
434 if err := collisions.check(name, isDir); err != nil {
441 if base := path.Base(name); strings.EqualFold(base, "go.mod") {
443 addError(zf, fmt.Errorf("go.mod file not in module root directory"))
446 if name != "go.mod" {
447 addError(zf, errGoModCase)
451 sz := int64(zf.UncompressedSize64)
452 if sz >= 0 && MaxZipFile-size >= sz {
454 } else if cf.SizeError == nil {
455 cf.SizeError = fmt.Errorf("total uncompressed size of module contents too large (max size is %d bytes)", MaxZipFile)
457 if name == "go.mod" && sz > MaxGoMod {
458 addError(zf, fmt.Errorf("go.mod file too large (max size is %d bytes)", MaxGoMod))
461 if name == "LICENSE" && sz > MaxLICENSE {
462 addError(zf, fmt.Errorf("LICENSE file too large (max size is %d bytes)", MaxLICENSE))
465 cf.Valid = append(cf.Valid, zf.Name)
468 return z, cf, cf.Err()
471 // Create builds a zip archive for module m from an abstract list of files
472 // and writes it to w.
474 // Create verifies the restrictions described in the package documentation
475 // and should not produce an archive that Unzip cannot extract. Create does not
476 // include files in the output archive if they don't belong in the module zip.
477 // In particular, Create will not include files in modules found in
478 // subdirectories, most files in vendor directories, or irregular files (such
479 // as symbolic links) in the output archive.
480 func Create(w io.Writer, m module.Version, files []File) (err error) {
483 err = &zipError{verb: "create zip", err: err}
487 // Check that the version is canonical, the module path is well-formed, and
488 // the major version suffix matches the major version.
489 if vers := module.CanonicalVersion(m.Version); vers != m.Version {
490 return fmt.Errorf("version %q is not canonical (should be %q)", m.Version, vers)
492 if err := module.Check(m.Path, m.Version); err != nil {
496 // Check whether files are valid, not valid, or should be omitted.
497 // Also check that the valid files don't exceed the maximum size.
498 cf, validFiles, validSizes := checkFiles(files)
499 if err := cf.Err(); err != nil {
503 // Create the module zip file.
504 zw := zip.NewWriter(w)
505 prefix := fmt.Sprintf("%s@%s/", m.Path, m.Version)
507 addFile := func(f File, path string, size int64) error {
513 w, err := zw.Create(prefix + path)
517 lr := &io.LimitedReader{R: rc, N: size + 1}
518 if _, err := io.Copy(w, lr); err != nil {
522 return fmt.Errorf("file %q is larger than declared size", path)
527 for i, f := range validFiles {
529 size := validSizes[i]
530 if err := addFile(f, p, size); err != nil {
538 // CreateFromDir creates a module zip file for module m from the contents of
539 // a directory, dir. The zip content is written to w.
541 // CreateFromDir verifies the restrictions described in the package
542 // documentation and should not produce an archive that Unzip cannot extract.
543 // CreateFromDir does not include files in the output archive if they don't
544 // belong in the module zip. In particular, CreateFromDir will not include
545 // files in modules found in subdirectories, most files in vendor directories,
546 // or irregular files (such as symbolic links) in the output archive.
547 // Additionally, unlike Create, CreateFromDir will not include directories
548 // named ".bzr", ".git", ".hg", or ".svn".
549 func CreateFromDir(w io.Writer, m module.Version, dir string) (err error) {
551 if zerr, ok := err.(*zipError); ok {
553 } else if err != nil {
554 err = &zipError{verb: "create zip", path: dir, err: err}
558 files, _, err := listFilesInDir(dir)
563 return Create(w, m, files)
566 type dirFile struct {
567 filePath, slashPath string
571 func (f dirFile) Path() string { return f.slashPath }
572 func (f dirFile) Lstat() (os.FileInfo, error) { return f.info, nil }
573 func (f dirFile) Open() (io.ReadCloser, error) { return os.Open(f.filePath) }
575 // isVendoredPackage attempts to report whether the given filename is contained
576 // in a package whose import path contains (but does not end with) the component
579 // Unfortunately, isVendoredPackage reports false positives for files in any
580 // non-top-level package whose import path ends in "vendor".
581 func isVendoredPackage(name string) bool {
583 if strings.HasPrefix(name, "vendor/") {
585 } else if j := strings.Index(name, "/vendor/"); j >= 0 {
586 // This offset looks incorrect; this should probably be
588 // i = j + len("/vendor/")
590 // (See https://golang.org/issue/31562 and https://golang.org/issue/37397.)
591 // Unfortunately, we can't fix it without invalidating module checksums.
596 return strings.Contains(name[i:], "/")
599 // Unzip extracts the contents of a module zip file to a directory.
601 // Unzip checks all restrictions listed in the package documentation and returns
602 // an error if the zip archive is not valid. In some cases, files may be written
603 // to dir before an error is returned (for example, if a file's uncompressed
604 // size does not match its declared size).
606 // dir may or may not exist: Unzip will create it and any missing parent
607 // directories if it doesn't exist. If dir exists, it must be empty.
608 func Unzip(dir string, m module.Version, zipFile string) (err error) {
611 err = &zipError{verb: "unzip", path: zipFile, err: err}
615 // Check that the directory is empty. Don't create it yet in case there's
616 // an error reading the zip.
617 if files, _ := ioutil.ReadDir(dir); len(files) > 0 {
618 return fmt.Errorf("target directory %v exists and is not empty", dir)
621 // Open the zip and check that it satisfies all restrictions.
622 f, err := os.Open(zipFile)
627 z, cf, err := checkZip(m, f)
631 if err := cf.Err(); err != nil {
635 // Unzip, enforcing sizes declared in the zip file.
636 prefix := fmt.Sprintf("%s@%s/", m.Path, m.Version)
637 if err := os.MkdirAll(dir, 0777); err != nil {
640 for _, zf := range z.File {
641 name := zf.Name[len(prefix):]
642 if name == "" || strings.HasSuffix(name, "/") {
645 dst := filepath.Join(dir, name)
646 if err := os.MkdirAll(filepath.Dir(dst), 0777); err != nil {
649 w, err := os.OpenFile(dst, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0444)
658 lr := &io.LimitedReader{R: r, N: int64(zf.UncompressedSize64) + 1}
659 _, err = io.Copy(w, lr)
665 if err := w.Close(); err != nil {
669 return fmt.Errorf("uncompressed size of file %s is larger than declared size (%d bytes)", zf.Name, zf.UncompressedSize64)
676 // collisionChecker finds case-insensitive name collisions and paths that
677 // are listed as both files and directories.
679 // The keys of this map are processed with strToFold. pathInfo has the original
680 // path for each folded path.
681 type collisionChecker map[string]pathInfo
683 type pathInfo struct {
688 func (cc collisionChecker) check(p string, isDir bool) error {
690 if other, ok := cc[fold]; ok {
692 return fmt.Errorf("case-insensitive file name collision: %q and %q", other.path, p)
694 if isDir != other.isDir {
695 return fmt.Errorf("entry %q is both a file and a directory", p)
698 return fmt.Errorf("multiple entries for file %q", p)
700 // It's not an error if check is called with the same directory multiple
701 // times. check is called recursively on parent directories, so check
702 // may be called on the same directory many times.
704 cc[fold] = pathInfo{path: p, isDir: isDir}
707 if parent := path.Dir(p); parent != "." {
708 return cc.check(parent, true)
713 // listFilesInDir walks the directory tree rooted at dir and returns a list of
714 // files, as well as a list of directories and files that were skipped (for
715 // example, nested modules and symbolic links).
716 func listFilesInDir(dir string) (files []File, omitted []FileError, err error) {
717 err = filepath.Walk(dir, func(filePath string, info os.FileInfo, err error) error {
721 relPath, err := filepath.Rel(dir, filePath)
725 slashPath := filepath.ToSlash(relPath)
727 // Skip some subdirectories inside vendor, but maintain bug
728 // golang.org/issue/31562, described in isVendoredPackage.
729 // We would like Create and CreateFromDir to produce the same result
730 // for a set of files, whether expressed as a directory tree or zip.
731 if isVendoredPackage(slashPath) {
732 omitted = append(omitted, FileError{Path: slashPath, Err: errVendored})
738 // Don't skip the top-level directory.
742 // Skip VCS directories.
743 // fossil repos are regular files with arbitrary names, so we don't try
745 switch filepath.Base(filePath) {
746 case ".bzr", ".git", ".hg", ".svn":
747 omitted = append(omitted, FileError{Path: slashPath, Err: errVCS})
748 return filepath.SkipDir
751 // Skip submodules (directories containing go.mod files).
752 if goModInfo, err := os.Lstat(filepath.Join(filePath, "go.mod")); err == nil && !goModInfo.IsDir() {
753 omitted = append(omitted, FileError{Path: slashPath, Err: errSubmoduleDir})
754 return filepath.SkipDir
759 // Skip irregular files and files in vendor directories.
760 // Irregular files are ignored. They're typically symbolic links.
761 if !info.Mode().IsRegular() {
762 omitted = append(omitted, FileError{Path: slashPath, Err: errNotRegular})
766 files = append(files, dirFile{
768 slashPath: slashPath,
776 return files, omitted, nil
779 type zipError struct {
784 func (e *zipError) Error() string {
786 return fmt.Sprintf("%s: %v", e.verb, e.err)
788 return fmt.Sprintf("%s %s: %v", e.verb, e.path, e.err)
792 func (e *zipError) Unwrap() error {
796 // strToFold returns a string with the property that
797 // strings.EqualFold(s, t) iff strToFold(s) == strToFold(t)
798 // This lets us test a large set of strings for fold-equivalent
799 // duplicates without making a quadratic number of calls
800 // to EqualFold. Note that strings.ToUpper and strings.ToLower
801 // do not have the desired property in some corner cases.
802 func strToFold(s string) string {
803 // Fast path: all ASCII, no upper case.
804 // Most paths look like this already.
805 for i := 0; i < len(s); i++ {
807 if c >= utf8.RuneSelf || 'A' <= c && c <= 'Z' {
815 for _, r := range s {
816 // SimpleFold(x) cycles to the next equivalent rune > x
817 // or wraps around to smaller values. Iterate until it wraps,
818 // and we've found the minimum value.
821 r = unicode.SimpleFold(r0)
826 // Exception to allow fast path above: A-Z => a-z
827 if 'A' <= r && r <= 'Z' {