1 // Copyright 2018 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // +build !js,!nacl,!plan9,!solaris,!windows
9 Splitdwarf uncompresses and copies the DWARF segment of a Mach-O
10 executable into the "dSYM" file expected by lldb and ports of gdb
13 Usage: splitdwarf osxMachoFile [ osxDsymFile ]
15 Unless a dSYM file name is provided on the command line,
16 splitdwarf will place it where the OSX tools expect it, in
17 "<osxMachoFile>.dSYM/Contents/Resources/DWARF/<osxMachoFile>",
18 creating directories as necessary.
21 package main // import "golang.org/x/tools/cmd/splitdwarf"
32 "golang.org/x/tools/cmd/splitdwarf/internal/macho"
36 pageAlign = 12 // 4096 = 1 << 12
39 func note(format string, why ...interface{}) {
40 fmt.Fprintf(os.Stderr, format+"\n", why...)
43 func fail(format string, why ...interface{}) {
48 // splitdwarf inputexe [ outputdwarf ]
50 if len(os.Args) < 2 || len(os.Args) > 3 {
52 Usage: %s input_exe [ output_dsym ]
53 Reads the executable input_exe, uncompresses and copies debugging
54 information into output_dsym. If output_dsym is not specified,
56 input_exe.dSYM/Contents/Resources/DWARF/input_exe
57 is used instead. That is the path that gdb and lldb expect
58 on OSX. Input_exe needs a UUID segment; if that is missing,
59 then one is created and added. In that case, the permissions
60 for input_exe need to allow writing.
65 // Read input, find DWARF, be sure it looks right
66 inputExe := os.Args[1]
67 exeFile, err := os.Open(inputExe)
71 exeMacho, err := macho.NewFile(exeFile)
73 fail("(internal) Couldn't create macho, %v", err)
75 // Postpone dealing with output till input is known-good
77 // describe(&exeMacho.FileTOC)
79 // Offsets into __LINKEDIT:
81 // Command LC_SYMTAB =
82 // (1) number of symbols at file offset (within link edit section) of 16-byte symbol table entries
84 // StringTableIndex uint32
85 // Type, SectionIndex uint8
90 // (2) string table offset and size. Strings are zero-byte terminated. First must be " ".
92 // Command LC_DYSYMTAB = indices within symtab (above), except for IndSym
93 // IndSym Offset = file offset (within link edit section) of 4-byte indices within symtab.
95 // Section __TEXT.__symbol_stub1.
96 // Offset and size (Reserved2) locate and describe a table for thios section.
97 // Symbols beginning at IndirectSymIndex (Reserved1) (see LC_DYSYMTAB.IndSymOffset) refer to this table.
98 // (These table entries are apparently PLTs [Procedure Linkage Table/Trampoline])
100 // Section __DATA.__nl_symbol_ptr.
101 // Reserved1 seems to be an index within the Indirect symbols (see LC_DYSYMTAB.IndSymOffset)
102 // Some of these symbols appear to be duplicates of other indirect symbols appearing early
104 // Section __DATA.__la_symbol_ptr.
105 // Reserved1 seems to be an index within the Indirect symbols (see LC_DYSYMTAB.IndSymOffset)
106 // Some of these symbols appear to be duplicates of other indirect symbols appearing early
109 // Create a File for the output dwarf.
110 // Copy header, file type is MH_DSYM
111 // Copy the relevant load commands
114 // Symtab -- very abbreviated (Use DYSYMTAB Iextdefsym, Nextdefsym to identify these).
115 // Segment __PAGEZERO
116 // Segment __TEXT (zero the size, zero the offset of each section)
117 // Segment __DATA (zero the size, zero the offset of each section)
118 // Segment __LINKEDIT (contains the symbols and strings from Symtab)
119 // Segment __DWARF (uncompressed)
122 for _, l := range exeMacho.Loads {
125 uuid = l.(*macho.Uuid)
129 // Ensure a given load is not nil
130 nonnilC := func(l macho.Load, s string) {
132 fail("input file %s lacks load command %s", inputExe, s)
136 // Find a segment by name and ensure it is not nil
137 nonnilS := func(s string) *macho.Segment {
138 l := exeMacho.Segment(s)
140 fail("input file %s lacks segment %s", inputExe, s)
145 newtoc := exeMacho.FileTOC.DerivedCopy(macho.MhDsym, 0)
147 symtab := exeMacho.Symtab
148 dysymtab := exeMacho.Dysymtab // Not appearing in output, but necessary to construct output
149 nonnilC(symtab, "symtab")
150 nonnilC(dysymtab, "dysymtab")
151 text := nonnilS("__TEXT")
152 data := nonnilS("__DATA")
153 linkedit := nonnilS("__LINKEDIT")
154 pagezero := nonnilS("__PAGEZERO")
156 newtext := text.CopyZeroed()
157 newdata := data.CopyZeroed()
158 newsymtab := symtab.Copy()
160 // Linkedit segment contain symbols and strings;
161 // Symtab refers to offsets into linkedit.
162 // This next bit initializes newsymtab and sets up data structures for the linkedit segment
163 linkeditsyms := []macho.Nlist64{}
164 linkeditstrings := []string{}
166 // Linkedit will begin at the second page, i.e., offset is one page from beginning
167 // Symbols come first
168 linkeditsymbase := uint32(1) << pageAlign
170 // Strings come second, offset by the number of symbols times their size.
171 // Only those symbols from dysymtab.defsym are written into the debugging information.
172 linkeditstringbase := linkeditsymbase + exeMacho.FileTOC.SymbolSize()*dysymtab.Nextdefsym
174 // The first two bytes of the strings are reserved for space, null (' ', \000)
175 linkeditstringcur := uint32(2)
177 newsymtab.Syms = newsymtab.Syms[:0]
178 newsymtab.Symoff = linkeditsymbase
179 newsymtab.Stroff = linkeditstringbase
180 newsymtab.Nsyms = dysymtab.Nextdefsym
181 for i := uint32(0); i < dysymtab.Nextdefsym; i++ {
182 ii := i + dysymtab.Iextdefsym
183 oldsym := symtab.Syms[ii]
184 newsymtab.Syms = append(newsymtab.Syms, oldsym)
186 linkeditsyms = append(linkeditsyms, macho.Nlist64{Name: uint32(linkeditstringcur),
187 Type: oldsym.Type, Sect: oldsym.Sect, Desc: oldsym.Desc, Value: oldsym.Value})
188 linkeditstringcur += uint32(len(oldsym.Name)) + 1
189 linkeditstrings = append(linkeditstrings, oldsym.Name)
191 newsymtab.Strsize = linkeditstringcur
193 exeNeedsUuid := uuid == nil
195 uuid = &macho.Uuid{macho.UuidCmd{LoadCmd: macho.LcUuid}}
196 uuid.Len = uuid.LoadSize(newtoc)
197 copy(uuid.Id[0:], contentuuid(&exeMacho.FileTOC)[0:16])
198 uuid.Id[6] = uuid.Id[6]&^0xf0 | 0x40 // version 4 (pseudo-random); see section 4.1.3
199 uuid.Id[8] = uuid.Id[8]&^0xc0 | 0x80 // variant bits; see section 4.1.1
203 // For the specified segment (assumed to be in exeMacho) make a copy of its
204 // sections with appropriate fields zeroed out, and append them to the
205 // currently-last segment in newtoc.
206 copyZOdSections := func(g *macho.Segment) {
207 for i := g.Firstsect; i < g.Firstsect+g.Nsect; i++ {
208 s := exeMacho.Sections[i].Copy()
216 newtoc.AddLoad(newsymtab)
217 newtoc.AddSegment(pagezero)
218 newtoc.AddSegment(newtext)
219 copyZOdSections(text)
220 newtoc.AddSegment(newdata)
221 copyZOdSections(data)
223 newlinkedit := linkedit.Copy()
224 newlinkedit.Offset = uint64(linkeditsymbase)
225 newlinkedit.Filesz = uint64(linkeditstringcur)
226 newlinkedit.Addr = macho.RoundUp(newdata.Addr+newdata.Memsz, 1<<pageAlign) // Follows data sections in file
227 newlinkedit.Memsz = macho.RoundUp(newlinkedit.Filesz, 1<<pageAlign)
228 // The rest should copy over fine.
229 newtoc.AddSegment(newlinkedit)
231 dwarf := nonnilS("__DWARF")
232 newdwarf := dwarf.CopyZeroed()
233 newdwarf.Offset = macho.RoundUp(newlinkedit.Offset+newlinkedit.Filesz, 1<<pageAlign)
234 newdwarf.Filesz = dwarf.UncompressedSize(&exeMacho.FileTOC, 1)
235 newdwarf.Addr = newlinkedit.Addr + newlinkedit.Memsz // Follows linkedit sections in file.
236 newdwarf.Memsz = macho.RoundUp(newdwarf.Filesz, 1<<pageAlign)
237 newtoc.AddSegment(newdwarf)
239 // Map out Dwarf sections (that is, this is section descriptors, not their contents).
240 offset := uint32(newdwarf.Offset)
241 for i := dwarf.Firstsect; i < dwarf.Firstsect+dwarf.Nsect; i++ {
242 o := exeMacho.Sections[i]
245 us := o.UncompressedSize()
248 s.Align = 0 // This is apparently true for debugging sections; not sure if it generalizes.
251 if strings.HasPrefix(s.Name, "__z") {
252 s.Name = "__" + s.Name[3:] // remove "z"
259 // Write segments/sections.
260 // Only dwarf and linkedit contain anything interesting.
262 // Memory map the output file to get the buffer directly.
263 outDwarf := inputExe + ".dSYM/Contents/Resources/DWARF"
264 if len(os.Args) > 2 {
265 outDwarf = os.Args[2]
267 err := os.MkdirAll(outDwarf, 0755)
271 outDwarf = filepath.Join(outDwarf, filepath.Base(inputExe))
273 dwarfFile, buffer := CreateMmapFile(outDwarf, int64(newtoc.FileSize()))
275 // (1) Linkedit segment
277 offset = uint32(newlinkedit.Offset)
278 for i := range linkeditsyms {
279 if exeMacho.Magic == macho.Magic64 {
280 offset += linkeditsyms[i].Put64(buffer[offset:], newtoc.ByteOrder)
282 offset += linkeditsyms[i].Put32(buffer[offset:], newtoc.ByteOrder)
286 // Initial two bytes of string table, followed by actual zero-terminated strings.
287 buffer[linkeditstringbase] = ' '
288 buffer[linkeditstringbase+1] = 0
289 offset = linkeditstringbase + 2
290 for _, str := range linkeditstrings {
291 for i := 0; i < len(str); i++ {
292 buffer[offset] = str[i]
300 ioff := newdwarf.Firstsect - dwarf.Firstsect
301 for i := dwarf.Firstsect; i < dwarf.Firstsect+dwarf.Nsect; i++ {
302 s := exeMacho.Sections[i]
304 s.PutUncompressedData(buffer[newtoc.Sections[j].Offset:])
307 // Because "text" overlaps the header and the loads, write them afterwards, just in case.
311 err = syscall.Munmap(buffer)
313 fail("Munmap %s for dwarf output failed, %v", outDwarf, err)
315 err = dwarfFile.Close()
317 fail("Close %s for dwarf output after mmap/munmap failed, %v", outDwarf, err)
320 if exeNeedsUuid { // Map the original exe, modify the header, and write the UUID command
321 hdr := exeMacho.FileTOC.FileHeader
322 oldCommandEnd := hdr.SizeCommands + newtoc.HdrSize()
324 hdr.SizeCommands += uuid.LoadSize(newtoc)
326 mapf, err := os.OpenFile(inputExe, os.O_RDWR, 0)
328 fail("Updating UUID in binary failed, %v", err)
330 exebuf, err := syscall.Mmap(int(mapf.Fd()), 0, int(macho.RoundUp(uint64(hdr.SizeCommands), 1<<pageAlign)),
331 syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_FILE|syscall.MAP_SHARED)
333 fail("Mmap of %s for UUID update failed, %v", inputExe, err)
335 _ = hdr.Put(exebuf, newtoc.ByteOrder)
336 _ = uuid.Put(exebuf[oldCommandEnd:], newtoc.ByteOrder)
337 err = syscall.Munmap(exebuf)
339 fail("Munmap of %s for UUID update failed, %v", inputExe, err)
344 // CreateMmapFile creates the file 'outDwarf' of the specified size, mmaps that file,
345 // and returns the file descriptor and mapped buffer.
346 func CreateMmapFile(outDwarf string, size int64) (*os.File, []byte) {
347 dwarfFile, err := os.OpenFile(outDwarf, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0666)
349 fail("Open for mmap failed, %v", err)
351 err = os.Truncate(outDwarf, size)
353 fail("Truncate/extend of %s to %d bytes failed, %v", dwarfFile, size, err)
355 buffer, err := syscall.Mmap(int(dwarfFile.Fd()), 0, int(size), syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_FILE|syscall.MAP_SHARED)
357 fail("Mmap %s for dwarf output update failed, %v", outDwarf, err)
359 return dwarfFile, buffer
362 func describe(exem *macho.FileTOC) {
363 note("Type = %s, Flags=0x%x", exem.Type, uint32(exem.Flags))
364 for i, l := range exem.Loads {
365 if s, ok := l.(*macho.Segment); ok {
366 fmt.Printf("Load %d is Segment %s, offset=0x%x, filesz=%d, addr=0x%x, memsz=%d, nsect=%d\n", i, s.Name,
367 s.Offset, s.Filesz, s.Addr, s.Memsz, s.Nsect)
368 for j := uint32(0); j < s.Nsect; j++ {
369 c := exem.Sections[j+s.Firstsect]
370 fmt.Printf(" Section %s, offset=0x%x, size=%d, addr=0x%x, flags=0x%x, nreloc=%d, res1=%d, res2=%d, res3=%d\n", c.Name, c.Offset, c.Size, c.Addr, c.Flags, c.Nreloc, c.Reserved1, c.Reserved2, c.Reserved3)
373 fmt.Printf("Load %d is %v\n", i, l)
376 if exem.SizeCommands != exem.LoadSize() {
377 fail("recorded command size %d does not equal computed command size %d", exem.SizeCommands, exem.LoadSize())
379 note("recorded command size %d, computed command size %d", exem.SizeCommands, exem.LoadSize())
381 note("File size is %d", exem.FileSize())
384 // contentuuid returns a UUID derived from (some of) the content of an executable.
385 // specifically included are the non-DWARF sections, specifically excluded are things
386 // that surely depend on the presence or absence of DWARF sections (e.g., section
387 // numbers, positions with file, number of load commands).
388 // (It was considered desirable if this was insensitive to the presence of the
389 // __DWARF segment, however because it is not last, it moves other segments,
390 // whose contents appear to contain file offset references.)
391 func contentuuid(exem *macho.FileTOC) []byte {
393 for _, l := range exem.Loads {
394 if l.Command() == macho.LcUuid {
397 if s, ok := l.(*macho.Segment); ok {
398 if s.Name == "__DWARF" || s.Name == "__PAGEZERO" {
401 for j := uint32(0); j < s.Nsect; j++ {
402 c := exem.Sections[j+s.Firstsect]
405 } // Getting dependence on other load commands right is fiddly.