1 // Copyright 2018 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 //go:build !js && !nacl && !plan9 && !solaris && !windows
6 // +build !js,!nacl,!plan9,!solaris,!windows
10 Splitdwarf uncompresses and copies the DWARF segment of a Mach-O
11 executable into the "dSYM" file expected by lldb and ports of gdb
14 Usage: splitdwarf osxMachoFile [ osxDsymFile ]
16 Unless a dSYM file name is provided on the command line,
17 splitdwarf will place it where the OSX tools expect it, in
18 "<osxMachoFile>.dSYM/Contents/Resources/DWARF/<osxMachoFile>",
19 creating directories as necessary.
22 package main // import "golang.org/x/tools/cmd/splitdwarf"
33 "golang.org/x/tools/cmd/splitdwarf/internal/macho"
37 pageAlign = 12 // 4096 = 1 << 12
40 func note(format string, why ...interface{}) {
41 fmt.Fprintf(os.Stderr, format+"\n", why...)
44 func fail(format string, why ...interface{}) {
49 // splitdwarf inputexe [ outputdwarf ]
51 if len(os.Args) < 2 || len(os.Args) > 3 {
53 Usage: %s input_exe [ output_dsym ]
54 Reads the executable input_exe, uncompresses and copies debugging
55 information into output_dsym. If output_dsym is not specified,
57 input_exe.dSYM/Contents/Resources/DWARF/input_exe
58 is used instead. That is the path that gdb and lldb expect
59 on OSX. Input_exe needs a UUID segment; if that is missing,
60 then one is created and added. In that case, the permissions
61 for input_exe need to allow writing.
66 // Read input, find DWARF, be sure it looks right
67 inputExe := os.Args[1]
68 exeFile, err := os.Open(inputExe)
72 exeMacho, err := macho.NewFile(exeFile)
74 fail("(internal) Couldn't create macho, %v", err)
76 // Postpone dealing with output till input is known-good
78 // describe(&exeMacho.FileTOC)
80 // Offsets into __LINKEDIT:
82 // Command LC_SYMTAB =
83 // (1) number of symbols at file offset (within link edit section) of 16-byte symbol table entries
85 // StringTableIndex uint32
86 // Type, SectionIndex uint8
91 // (2) string table offset and size. Strings are zero-byte terminated. First must be " ".
93 // Command LC_DYSYMTAB = indices within symtab (above), except for IndSym
94 // IndSym Offset = file offset (within link edit section) of 4-byte indices within symtab.
96 // Section __TEXT.__symbol_stub1.
97 // Offset and size (Reserved2) locate and describe a table for thios section.
98 // Symbols beginning at IndirectSymIndex (Reserved1) (see LC_DYSYMTAB.IndSymOffset) refer to this table.
99 // (These table entries are apparently PLTs [Procedure Linkage Table/Trampoline])
101 // Section __DATA.__nl_symbol_ptr.
102 // Reserved1 seems to be an index within the Indirect symbols (see LC_DYSYMTAB.IndSymOffset)
103 // Some of these symbols appear to be duplicates of other indirect symbols appearing early
105 // Section __DATA.__la_symbol_ptr.
106 // Reserved1 seems to be an index within the Indirect symbols (see LC_DYSYMTAB.IndSymOffset)
107 // Some of these symbols appear to be duplicates of other indirect symbols appearing early
110 // Create a File for the output dwarf.
111 // Copy header, file type is MH_DSYM
112 // Copy the relevant load commands
115 // Symtab -- very abbreviated (Use DYSYMTAB Iextdefsym, Nextdefsym to identify these).
116 // Segment __PAGEZERO
117 // Segment __TEXT (zero the size, zero the offset of each section)
118 // Segment __DATA (zero the size, zero the offset of each section)
119 // Segment __LINKEDIT (contains the symbols and strings from Symtab)
120 // Segment __DWARF (uncompressed)
123 for _, l := range exeMacho.Loads {
126 uuid = l.(*macho.Uuid)
130 // Ensure a given load is not nil
131 nonnilC := func(l macho.Load, s string) {
133 fail("input file %s lacks load command %s", inputExe, s)
137 // Find a segment by name and ensure it is not nil
138 nonnilS := func(s string) *macho.Segment {
139 l := exeMacho.Segment(s)
141 fail("input file %s lacks segment %s", inputExe, s)
146 newtoc := exeMacho.FileTOC.DerivedCopy(macho.MhDsym, 0)
148 symtab := exeMacho.Symtab
149 dysymtab := exeMacho.Dysymtab // Not appearing in output, but necessary to construct output
150 nonnilC(symtab, "symtab")
151 nonnilC(dysymtab, "dysymtab")
152 text := nonnilS("__TEXT")
153 data := nonnilS("__DATA")
154 linkedit := nonnilS("__LINKEDIT")
155 pagezero := nonnilS("__PAGEZERO")
157 newtext := text.CopyZeroed()
158 newdata := data.CopyZeroed()
159 newsymtab := symtab.Copy()
161 // Linkedit segment contain symbols and strings;
162 // Symtab refers to offsets into linkedit.
163 // This next bit initializes newsymtab and sets up data structures for the linkedit segment
164 linkeditsyms := []macho.Nlist64{}
165 linkeditstrings := []string{}
167 // Linkedit will begin at the second page, i.e., offset is one page from beginning
168 // Symbols come first
169 linkeditsymbase := uint32(1) << pageAlign
171 // Strings come second, offset by the number of symbols times their size.
172 // Only those symbols from dysymtab.defsym are written into the debugging information.
173 linkeditstringbase := linkeditsymbase + exeMacho.FileTOC.SymbolSize()*dysymtab.Nextdefsym
175 // The first two bytes of the strings are reserved for space, null (' ', \000)
176 linkeditstringcur := uint32(2)
178 newsymtab.Syms = newsymtab.Syms[:0]
179 newsymtab.Symoff = linkeditsymbase
180 newsymtab.Stroff = linkeditstringbase
181 newsymtab.Nsyms = dysymtab.Nextdefsym
182 for i := uint32(0); i < dysymtab.Nextdefsym; i++ {
183 ii := i + dysymtab.Iextdefsym
184 oldsym := symtab.Syms[ii]
185 newsymtab.Syms = append(newsymtab.Syms, oldsym)
187 linkeditsyms = append(linkeditsyms, macho.Nlist64{Name: uint32(linkeditstringcur),
188 Type: oldsym.Type, Sect: oldsym.Sect, Desc: oldsym.Desc, Value: oldsym.Value})
189 linkeditstringcur += uint32(len(oldsym.Name)) + 1
190 linkeditstrings = append(linkeditstrings, oldsym.Name)
192 newsymtab.Strsize = linkeditstringcur
194 exeNeedsUuid := uuid == nil
196 uuid = &macho.Uuid{macho.UuidCmd{LoadCmd: macho.LcUuid}}
197 uuid.Len = uuid.LoadSize(newtoc)
198 copy(uuid.Id[0:], contentuuid(&exeMacho.FileTOC)[0:16])
199 uuid.Id[6] = uuid.Id[6]&^0xf0 | 0x40 // version 4 (pseudo-random); see section 4.1.3
200 uuid.Id[8] = uuid.Id[8]&^0xc0 | 0x80 // variant bits; see section 4.1.1
204 // For the specified segment (assumed to be in exeMacho) make a copy of its
205 // sections with appropriate fields zeroed out, and append them to the
206 // currently-last segment in newtoc.
207 copyZOdSections := func(g *macho.Segment) {
208 for i := g.Firstsect; i < g.Firstsect+g.Nsect; i++ {
209 s := exeMacho.Sections[i].Copy()
217 newtoc.AddLoad(newsymtab)
218 newtoc.AddSegment(pagezero)
219 newtoc.AddSegment(newtext)
220 copyZOdSections(text)
221 newtoc.AddSegment(newdata)
222 copyZOdSections(data)
224 newlinkedit := linkedit.Copy()
225 newlinkedit.Offset = uint64(linkeditsymbase)
226 newlinkedit.Filesz = uint64(linkeditstringcur)
227 newlinkedit.Addr = macho.RoundUp(newdata.Addr+newdata.Memsz, 1<<pageAlign) // Follows data sections in file
228 newlinkedit.Memsz = macho.RoundUp(newlinkedit.Filesz, 1<<pageAlign)
229 // The rest should copy over fine.
230 newtoc.AddSegment(newlinkedit)
232 dwarf := nonnilS("__DWARF")
233 newdwarf := dwarf.CopyZeroed()
234 newdwarf.Offset = macho.RoundUp(newlinkedit.Offset+newlinkedit.Filesz, 1<<pageAlign)
235 newdwarf.Filesz = dwarf.UncompressedSize(&exeMacho.FileTOC, 1)
236 newdwarf.Addr = newlinkedit.Addr + newlinkedit.Memsz // Follows linkedit sections in file.
237 newdwarf.Memsz = macho.RoundUp(newdwarf.Filesz, 1<<pageAlign)
238 newtoc.AddSegment(newdwarf)
240 // Map out Dwarf sections (that is, this is section descriptors, not their contents).
241 offset := uint32(newdwarf.Offset)
242 for i := dwarf.Firstsect; i < dwarf.Firstsect+dwarf.Nsect; i++ {
243 o := exeMacho.Sections[i]
246 us := o.UncompressedSize()
249 s.Align = 0 // This is apparently true for debugging sections; not sure if it generalizes.
252 if strings.HasPrefix(s.Name, "__z") {
253 s.Name = "__" + s.Name[3:] // remove "z"
260 // Write segments/sections.
261 // Only dwarf and linkedit contain anything interesting.
263 // Memory map the output file to get the buffer directly.
264 outDwarf := inputExe + ".dSYM/Contents/Resources/DWARF"
265 if len(os.Args) > 2 {
266 outDwarf = os.Args[2]
268 err := os.MkdirAll(outDwarf, 0755)
272 outDwarf = filepath.Join(outDwarf, filepath.Base(inputExe))
274 dwarfFile, buffer := CreateMmapFile(outDwarf, int64(newtoc.FileSize()))
276 // (1) Linkedit segment
278 offset = uint32(newlinkedit.Offset)
279 for i := range linkeditsyms {
280 if exeMacho.Magic == macho.Magic64 {
281 offset += linkeditsyms[i].Put64(buffer[offset:], newtoc.ByteOrder)
283 offset += linkeditsyms[i].Put32(buffer[offset:], newtoc.ByteOrder)
287 // Initial two bytes of string table, followed by actual zero-terminated strings.
288 buffer[linkeditstringbase] = ' '
289 buffer[linkeditstringbase+1] = 0
290 offset = linkeditstringbase + 2
291 for _, str := range linkeditstrings {
292 for i := 0; i < len(str); i++ {
293 buffer[offset] = str[i]
301 ioff := newdwarf.Firstsect - dwarf.Firstsect
302 for i := dwarf.Firstsect; i < dwarf.Firstsect+dwarf.Nsect; i++ {
303 s := exeMacho.Sections[i]
305 s.PutUncompressedData(buffer[newtoc.Sections[j].Offset:])
308 // Because "text" overlaps the header and the loads, write them afterwards, just in case.
312 err = syscall.Munmap(buffer)
314 fail("Munmap %s for dwarf output failed, %v", outDwarf, err)
316 err = dwarfFile.Close()
318 fail("Close %s for dwarf output after mmap/munmap failed, %v", outDwarf, err)
321 if exeNeedsUuid { // Map the original exe, modify the header, and write the UUID command
322 hdr := exeMacho.FileTOC.FileHeader
323 oldCommandEnd := hdr.SizeCommands + newtoc.HdrSize()
325 hdr.SizeCommands += uuid.LoadSize(newtoc)
327 mapf, err := os.OpenFile(inputExe, os.O_RDWR, 0)
329 fail("Updating UUID in binary failed, %v", err)
331 exebuf, err := syscall.Mmap(int(mapf.Fd()), 0, int(macho.RoundUp(uint64(hdr.SizeCommands), 1<<pageAlign)),
332 syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_FILE|syscall.MAP_SHARED)
334 fail("Mmap of %s for UUID update failed, %v", inputExe, err)
336 _ = hdr.Put(exebuf, newtoc.ByteOrder)
337 _ = uuid.Put(exebuf[oldCommandEnd:], newtoc.ByteOrder)
338 err = syscall.Munmap(exebuf)
340 fail("Munmap of %s for UUID update failed, %v", inputExe, err)
345 // CreateMmapFile creates the file 'outDwarf' of the specified size, mmaps that file,
346 // and returns the file descriptor and mapped buffer.
347 func CreateMmapFile(outDwarf string, size int64) (*os.File, []byte) {
348 dwarfFile, err := os.OpenFile(outDwarf, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0666)
350 fail("Open for mmap failed, %v", err)
352 err = os.Truncate(outDwarf, size)
354 fail("Truncate/extend of %s to %d bytes failed, %v", dwarfFile, size, err)
356 buffer, err := syscall.Mmap(int(dwarfFile.Fd()), 0, int(size), syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_FILE|syscall.MAP_SHARED)
358 fail("Mmap %s for dwarf output update failed, %v", outDwarf, err)
360 return dwarfFile, buffer
363 func describe(exem *macho.FileTOC) {
364 note("Type = %s, Flags=0x%x", exem.Type, uint32(exem.Flags))
365 for i, l := range exem.Loads {
366 if s, ok := l.(*macho.Segment); ok {
367 fmt.Printf("Load %d is Segment %s, offset=0x%x, filesz=%d, addr=0x%x, memsz=%d, nsect=%d\n", i, s.Name,
368 s.Offset, s.Filesz, s.Addr, s.Memsz, s.Nsect)
369 for j := uint32(0); j < s.Nsect; j++ {
370 c := exem.Sections[j+s.Firstsect]
371 fmt.Printf(" Section %s, offset=0x%x, size=%d, addr=0x%x, flags=0x%x, nreloc=%d, res1=%d, res2=%d, res3=%d\n", c.Name, c.Offset, c.Size, c.Addr, c.Flags, c.Nreloc, c.Reserved1, c.Reserved2, c.Reserved3)
374 fmt.Printf("Load %d is %v\n", i, l)
377 if exem.SizeCommands != exem.LoadSize() {
378 fail("recorded command size %d does not equal computed command size %d", exem.SizeCommands, exem.LoadSize())
380 note("recorded command size %d, computed command size %d", exem.SizeCommands, exem.LoadSize())
382 note("File size is %d", exem.FileSize())
385 // contentuuid returns a UUID derived from (some of) the content of an executable.
386 // specifically included are the non-DWARF sections, specifically excluded are things
387 // that surely depend on the presence or absence of DWARF sections (e.g., section
388 // numbers, positions with file, number of load commands).
389 // (It was considered desirable if this was insensitive to the presence of the
390 // __DWARF segment, however because it is not last, it moves other segments,
391 // whose contents appear to contain file offset references.)
392 func contentuuid(exem *macho.FileTOC) []byte {
394 for _, l := range exem.Loads {
395 if l.Command() == macho.LcUuid {
398 if s, ok := l.(*macho.Segment); ok {
399 if s.Name == "__DWARF" || s.Name == "__PAGEZERO" {
402 for j := uint32(0); j < s.Nsect; j++ {
403 c := exem.Sections[j+s.Firstsect]
406 } // Getting dependence on other load commands right is fiddly.