+++ /dev/null
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build !js,!nacl,!plan9,!solaris,!windows
-
-/*
-
-Splitdwarf uncompresses and copies the DWARF segment of a Mach-O
-executable into the "dSYM" file expected by lldb and ports of gdb
-on OSX.
-
-Usage: splitdwarf osxMachoFile [ osxDsymFile ]
-
-Unless a dSYM file name is provided on the command line,
-splitdwarf will place it where the OSX tools expect it, in
-"<osxMachoFile>.dSYM/Contents/Resources/DWARF/<osxMachoFile>",
-creating directories as necessary.
-
-*/
-package main // import "golang.org/x/tools/cmd/splitdwarf"
-
-import (
- "crypto/sha256"
- "fmt"
- "io"
- "os"
- "path/filepath"
- "strings"
- "syscall"
-
- "golang.org/x/tools/cmd/splitdwarf/internal/macho"
-)
-
-const (
- pageAlign = 12 // 4096 = 1 << 12
-)
-
-func note(format string, why ...interface{}) {
- fmt.Fprintf(os.Stderr, format+"\n", why...)
-}
-
-func fail(format string, why ...interface{}) {
- note(format, why...)
- os.Exit(1)
-}
-
-// splitdwarf inputexe [ outputdwarf ]
-func main() {
- if len(os.Args) < 2 || len(os.Args) > 3 {
- fmt.Printf(`
-Usage: %s input_exe [ output_dsym ]
-Reads the executable input_exe, uncompresses and copies debugging
-information into output_dsym. If output_dsym is not specified,
-the path
- input_exe.dSYM/Contents/Resources/DWARF/input_exe
-is used instead. That is the path that gdb and lldb expect
-on OSX. Input_exe needs a UUID segment; if that is missing,
-then one is created and added. In that case, the permissions
-for input_exe need to allow writing.
-`, os.Args[0])
- return
- }
-
- // Read input, find DWARF, be sure it looks right
- inputExe := os.Args[1]
- exeFile, err := os.Open(inputExe)
- if err != nil {
- fail("%v", err)
- }
- exeMacho, err := macho.NewFile(exeFile)
- if err != nil {
- fail("(internal) Couldn't create macho, %v", err)
- }
- // Postpone dealing with output till input is known-good
-
- // describe(&exeMacho.FileTOC)
-
- // Offsets into __LINKEDIT:
- //
- // Command LC_SYMTAB =
- // (1) number of symbols at file offset (within link edit section) of 16-byte symbol table entries
- // struct {
- // StringTableIndex uint32
- // Type, SectionIndex uint8
- // Description uint16
- // Value uint64
- // }
- //
- // (2) string table offset and size. Strings are zero-byte terminated. First must be " ".
- //
- // Command LC_DYSYMTAB = indices within symtab (above), except for IndSym
- // IndSym Offset = file offset (within link edit section) of 4-byte indices within symtab.
- //
- // Section __TEXT.__symbol_stub1.
- // Offset and size (Reserved2) locate and describe a table for thios section.
- // Symbols beginning at IndirectSymIndex (Reserved1) (see LC_DYSYMTAB.IndSymOffset) refer to this table.
- // (These table entries are apparently PLTs [Procedure Linkage Table/Trampoline])
- //
- // Section __DATA.__nl_symbol_ptr.
- // Reserved1 seems to be an index within the Indirect symbols (see LC_DYSYMTAB.IndSymOffset)
- // Some of these symbols appear to be duplicates of other indirect symbols appearing early
- //
- // Section __DATA.__la_symbol_ptr.
- // Reserved1 seems to be an index within the Indirect symbols (see LC_DYSYMTAB.IndSymOffset)
- // Some of these symbols appear to be duplicates of other indirect symbols appearing early
- //
-
- // Create a File for the output dwarf.
- // Copy header, file type is MH_DSYM
- // Copy the relevant load commands
-
- // LoadCmdUuid
- // Symtab -- very abbreviated (Use DYSYMTAB Iextdefsym, Nextdefsym to identify these).
- // Segment __PAGEZERO
- // Segment __TEXT (zero the size, zero the offset of each section)
- // Segment __DATA (zero the size, zero the offset of each section)
- // Segment __LINKEDIT (contains the symbols and strings from Symtab)
- // Segment __DWARF (uncompressed)
-
- var uuid *macho.Uuid
- for _, l := range exeMacho.Loads {
- switch l.Command() {
- case macho.LcUuid:
- uuid = l.(*macho.Uuid)
- }
- }
-
- // Ensure a given load is not nil
- nonnilC := func(l macho.Load, s string) {
- if l == nil {
- fail("input file %s lacks load command %s", inputExe, s)
- }
- }
-
- // Find a segment by name and ensure it is not nil
- nonnilS := func(s string) *macho.Segment {
- l := exeMacho.Segment(s)
- if l == nil {
- fail("input file %s lacks segment %s", inputExe, s)
- }
- return l
- }
-
- newtoc := exeMacho.FileTOC.DerivedCopy(macho.MhDsym, 0)
-
- symtab := exeMacho.Symtab
- dysymtab := exeMacho.Dysymtab // Not appearing in output, but necessary to construct output
- nonnilC(symtab, "symtab")
- nonnilC(dysymtab, "dysymtab")
- text := nonnilS("__TEXT")
- data := nonnilS("__DATA")
- linkedit := nonnilS("__LINKEDIT")
- pagezero := nonnilS("__PAGEZERO")
-
- newtext := text.CopyZeroed()
- newdata := data.CopyZeroed()
- newsymtab := symtab.Copy()
-
- // Linkedit segment contain symbols and strings;
- // Symtab refers to offsets into linkedit.
- // This next bit initializes newsymtab and sets up data structures for the linkedit segment
- linkeditsyms := []macho.Nlist64{}
- linkeditstrings := []string{}
-
- // Linkedit will begin at the second page, i.e., offset is one page from beginning
- // Symbols come first
- linkeditsymbase := uint32(1) << pageAlign
-
- // Strings come second, offset by the number of symbols times their size.
- // Only those symbols from dysymtab.defsym are written into the debugging information.
- linkeditstringbase := linkeditsymbase + exeMacho.FileTOC.SymbolSize()*dysymtab.Nextdefsym
-
- // The first two bytes of the strings are reserved for space, null (' ', \000)
- linkeditstringcur := uint32(2)
-
- newsymtab.Syms = newsymtab.Syms[:0]
- newsymtab.Symoff = linkeditsymbase
- newsymtab.Stroff = linkeditstringbase
- newsymtab.Nsyms = dysymtab.Nextdefsym
- for i := uint32(0); i < dysymtab.Nextdefsym; i++ {
- ii := i + dysymtab.Iextdefsym
- oldsym := symtab.Syms[ii]
- newsymtab.Syms = append(newsymtab.Syms, oldsym)
-
- linkeditsyms = append(linkeditsyms, macho.Nlist64{Name: uint32(linkeditstringcur),
- Type: oldsym.Type, Sect: oldsym.Sect, Desc: oldsym.Desc, Value: oldsym.Value})
- linkeditstringcur += uint32(len(oldsym.Name)) + 1
- linkeditstrings = append(linkeditstrings, oldsym.Name)
- }
- newsymtab.Strsize = linkeditstringcur
-
- exeNeedsUuid := uuid == nil
- if exeNeedsUuid {
- uuid = &macho.Uuid{macho.UuidCmd{LoadCmd: macho.LcUuid}}
- uuid.Len = uuid.LoadSize(newtoc)
- copy(uuid.Id[0:], contentuuid(&exeMacho.FileTOC)[0:16])
- uuid.Id[6] = uuid.Id[6]&^0xf0 | 0x40 // version 4 (pseudo-random); see section 4.1.3
- uuid.Id[8] = uuid.Id[8]&^0xc0 | 0x80 // variant bits; see section 4.1.1
- }
- newtoc.AddLoad(uuid)
-
- // For the specified segment (assumed to be in exeMacho) make a copy of its
- // sections with appropriate fields zeroed out, and append them to the
- // currently-last segment in newtoc.
- copyZOdSections := func(g *macho.Segment) {
- for i := g.Firstsect; i < g.Firstsect+g.Nsect; i++ {
- s := exeMacho.Sections[i].Copy()
- s.Offset = 0
- s.Reloff = 0
- s.Nreloc = 0
- newtoc.AddSection(s)
- }
- }
-
- newtoc.AddLoad(newsymtab)
- newtoc.AddSegment(pagezero)
- newtoc.AddSegment(newtext)
- copyZOdSections(text)
- newtoc.AddSegment(newdata)
- copyZOdSections(data)
-
- newlinkedit := linkedit.Copy()
- newlinkedit.Offset = uint64(linkeditsymbase)
- newlinkedit.Filesz = uint64(linkeditstringcur)
- newlinkedit.Addr = macho.RoundUp(newdata.Addr+newdata.Memsz, 1<<pageAlign) // Follows data sections in file
- newlinkedit.Memsz = macho.RoundUp(newlinkedit.Filesz, 1<<pageAlign)
- // The rest should copy over fine.
- newtoc.AddSegment(newlinkedit)
-
- dwarf := nonnilS("__DWARF")
- newdwarf := dwarf.CopyZeroed()
- newdwarf.Offset = macho.RoundUp(newlinkedit.Offset+newlinkedit.Filesz, 1<<pageAlign)
- newdwarf.Filesz = dwarf.UncompressedSize(&exeMacho.FileTOC, 1)
- newdwarf.Addr = newlinkedit.Addr + newlinkedit.Memsz // Follows linkedit sections in file.
- newdwarf.Memsz = macho.RoundUp(newdwarf.Filesz, 1<<pageAlign)
- newtoc.AddSegment(newdwarf)
-
- // Map out Dwarf sections (that is, this is section descriptors, not their contents).
- offset := uint32(newdwarf.Offset)
- for i := dwarf.Firstsect; i < dwarf.Firstsect+dwarf.Nsect; i++ {
- o := exeMacho.Sections[i]
- s := o.Copy()
- s.Offset = offset
- us := o.UncompressedSize()
- if s.Size < us {
- s.Size = uint64(us)
- s.Align = 0 // This is apparently true for debugging sections; not sure if it generalizes.
- }
- offset += uint32(us)
- if strings.HasPrefix(s.Name, "__z") {
- s.Name = "__" + s.Name[3:] // remove "z"
- }
- s.Reloff = 0
- s.Nreloc = 0
- newtoc.AddSection(s)
- }
-
- // Write segments/sections.
- // Only dwarf and linkedit contain anything interesting.
-
- // Memory map the output file to get the buffer directly.
- outDwarf := inputExe + ".dSYM/Contents/Resources/DWARF"
- if len(os.Args) > 2 {
- outDwarf = os.Args[2]
- } else {
- err := os.MkdirAll(outDwarf, 0755)
- if err != nil {
- fail("%v", err)
- }
- outDwarf = filepath.Join(outDwarf, filepath.Base(inputExe))
- }
- dwarfFile, buffer := CreateMmapFile(outDwarf, int64(newtoc.FileSize()))
-
- // (1) Linkedit segment
- // Symbol table
- offset = uint32(newlinkedit.Offset)
- for i := range linkeditsyms {
- if exeMacho.Magic == macho.Magic64 {
- offset += linkeditsyms[i].Put64(buffer[offset:], newtoc.ByteOrder)
- } else {
- offset += linkeditsyms[i].Put32(buffer[offset:], newtoc.ByteOrder)
- }
- }
-
- // Initial two bytes of string table, followed by actual zero-terminated strings.
- buffer[linkeditstringbase] = ' '
- buffer[linkeditstringbase+1] = 0
- offset = linkeditstringbase + 2
- for _, str := range linkeditstrings {
- for i := 0; i < len(str); i++ {
- buffer[offset] = str[i]
- offset++
- }
- buffer[offset] = 0
- offset++
- }
-
- // (2) DWARF segment
- ioff := newdwarf.Firstsect - dwarf.Firstsect
- for i := dwarf.Firstsect; i < dwarf.Firstsect+dwarf.Nsect; i++ {
- s := exeMacho.Sections[i]
- j := i + ioff
- s.PutUncompressedData(buffer[newtoc.Sections[j].Offset:])
- }
-
- // Because "text" overlaps the header and the loads, write them afterwards, just in case.
- // Write header.
- newtoc.Put(buffer)
-
- err = syscall.Munmap(buffer)
- if err != nil {
- fail("Munmap %s for dwarf output failed, %v", outDwarf, err)
- }
- err = dwarfFile.Close()
- if err != nil {
- fail("Close %s for dwarf output after mmap/munmap failed, %v", outDwarf, err)
- }
-
- if exeNeedsUuid { // Map the original exe, modify the header, and write the UUID command
- hdr := exeMacho.FileTOC.FileHeader
- oldCommandEnd := hdr.SizeCommands + newtoc.HdrSize()
- hdr.NCommands += 1
- hdr.SizeCommands += uuid.LoadSize(newtoc)
-
- mapf, err := os.OpenFile(inputExe, os.O_RDWR, 0)
- if err != nil {
- fail("Updating UUID in binary failed, %v", err)
- }
- exebuf, err := syscall.Mmap(int(mapf.Fd()), 0, int(macho.RoundUp(uint64(hdr.SizeCommands), 1<<pageAlign)),
- syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_FILE|syscall.MAP_SHARED)
- if err != nil {
- fail("Mmap of %s for UUID update failed, %v", inputExe, err)
- }
- _ = hdr.Put(exebuf, newtoc.ByteOrder)
- _ = uuid.Put(exebuf[oldCommandEnd:], newtoc.ByteOrder)
- err = syscall.Munmap(exebuf)
- if err != nil {
- fail("Munmap of %s for UUID update failed, %v", inputExe, err)
- }
- }
-}
-
-// CreateMmapFile creates the file 'outDwarf' of the specified size, mmaps that file,
-// and returns the file descriptor and mapped buffer.
-func CreateMmapFile(outDwarf string, size int64) (*os.File, []byte) {
- dwarfFile, err := os.OpenFile(outDwarf, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0666)
- if err != nil {
- fail("Open for mmap failed, %v", err)
- }
- err = os.Truncate(outDwarf, size)
- if err != nil {
- fail("Truncate/extend of %s to %d bytes failed, %v", dwarfFile, size, err)
- }
- buffer, err := syscall.Mmap(int(dwarfFile.Fd()), 0, int(size), syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_FILE|syscall.MAP_SHARED)
- if err != nil {
- fail("Mmap %s for dwarf output update failed, %v", outDwarf, err)
- }
- return dwarfFile, buffer
-}
-
-func describe(exem *macho.FileTOC) {
- note("Type = %s, Flags=0x%x", exem.Type, uint32(exem.Flags))
- for i, l := range exem.Loads {
- if s, ok := l.(*macho.Segment); ok {
- fmt.Printf("Load %d is Segment %s, offset=0x%x, filesz=%d, addr=0x%x, memsz=%d, nsect=%d\n", i, s.Name,
- s.Offset, s.Filesz, s.Addr, s.Memsz, s.Nsect)
- for j := uint32(0); j < s.Nsect; j++ {
- c := exem.Sections[j+s.Firstsect]
- fmt.Printf(" Section %s, offset=0x%x, size=%d, addr=0x%x, flags=0x%x, nreloc=%d, res1=%d, res2=%d, res3=%d\n", c.Name, c.Offset, c.Size, c.Addr, c.Flags, c.Nreloc, c.Reserved1, c.Reserved2, c.Reserved3)
- }
- } else {
- fmt.Printf("Load %d is %v\n", i, l)
- }
- }
- if exem.SizeCommands != exem.LoadSize() {
- fail("recorded command size %d does not equal computed command size %d", exem.SizeCommands, exem.LoadSize())
- } else {
- note("recorded command size %d, computed command size %d", exem.SizeCommands, exem.LoadSize())
- }
- note("File size is %d", exem.FileSize())
-}
-
-// contentuuid returns a UUID derived from (some of) the content of an executable.
-// specifically included are the non-DWARF sections, specifically excluded are things
-// that surely depend on the presence or absence of DWARF sections (e.g., section
-// numbers, positions with file, number of load commands).
-// (It was considered desirable if this was insensitive to the presence of the
-// __DWARF segment, however because it is not last, it moves other segments,
-// whose contents appear to contain file offset references.)
-func contentuuid(exem *macho.FileTOC) []byte {
- h := sha256.New()
- for _, l := range exem.Loads {
- if l.Command() == macho.LcUuid {
- continue
- }
- if s, ok := l.(*macho.Segment); ok {
- if s.Name == "__DWARF" || s.Name == "__PAGEZERO" {
- continue
- }
- for j := uint32(0); j < s.Nsect; j++ {
- c := exem.Sections[j+s.Firstsect]
- io.Copy(h, c.Open())
- }
- } // Getting dependence on other load commands right is fiddly.
- }
- return h.Sum(nil)
-}