// Copyright 2015 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package dwarf import ( "errors" "fmt" "io" "path" "strings" ) // A LineReader reads a sequence of LineEntry structures from a DWARF // "line" section for a single compilation unit. LineEntries occur in // order of increasing PC and each LineEntry gives metadata for the // instructions from that LineEntry's PC to just before the next // LineEntry's PC. The last entry will have its EndSequence field set. type LineReader struct { buf buf // Original .debug_line section data. Used by Seek. section []byte str []byte // .debug_str lineStr []byte // .debug_line_str // Header information version uint16 addrsize int segmentSelectorSize int minInstructionLength int maxOpsPerInstruction int defaultIsStmt bool lineBase int lineRange int opcodeBase int opcodeLengths []int directories []string fileEntries []*LineFile programOffset Offset // section offset of line number program endOffset Offset // section offset of byte following program initialFileEntries int // initial length of fileEntries // Current line number program state machine registers state LineEntry // public state fileIndex int // private state } // A LineEntry is a row in a DWARF line table. type LineEntry struct { // Address is the program-counter value of a machine // instruction generated by the compiler. This LineEntry // applies to each instruction from Address to just before the // Address of the next LineEntry. Address uint64 // OpIndex is the index of an operation within a VLIW // instruction. The index of the first operation is 0. For // non-VLIW architectures, it will always be 0. Address and // OpIndex together form an operation pointer that can // reference any individual operation within the instruction // stream. OpIndex int // File is the source file corresponding to these // instructions. File *LineFile // Line is the source code line number corresponding to these // instructions. Lines are numbered beginning at 1. It may be // 0 if these instructions cannot be attributed to any source // line. Line int // Column is the column number within the source line of these // instructions. Columns are numbered beginning at 1. It may // be 0 to indicate the "left edge" of the line. Column int // IsStmt indicates that Address is a recommended breakpoint // location, such as the beginning of a line, statement, or a // distinct subpart of a statement. IsStmt bool // BasicBlock indicates that Address is the beginning of a // basic block. BasicBlock bool // PrologueEnd indicates that Address is one (of possibly // many) PCs where execution should be suspended for a // breakpoint on entry to the containing function. // // Added in DWARF 3. PrologueEnd bool // EpilogueBegin indicates that Address is one (of possibly // many) PCs where execution should be suspended for a // breakpoint on exit from this function. // // Added in DWARF 3. EpilogueBegin bool // ISA is the instruction set architecture for these // instructions. Possible ISA values should be defined by the // applicable ABI specification. // // Added in DWARF 3. ISA int // Discriminator is an arbitrary integer indicating the block // to which these instructions belong. It serves to // distinguish among multiple blocks that may all have with // the same source file, line, and column. Where only one // block exists for a given source position, it should be 0. // // Added in DWARF 3. Discriminator int // EndSequence indicates that Address is the first byte after // the end of a sequence of target machine instructions. If it // is set, only this and the Address field are meaningful. A // line number table may contain information for multiple // potentially disjoint instruction sequences. The last entry // in a line table should always have EndSequence set. EndSequence bool } // A LineFile is a source file referenced by a DWARF line table entry. type LineFile struct { Name string Mtime uint64 // Implementation defined modification time, or 0 if unknown Length int // File length, or 0 if unknown } // LineReader returns a new reader for the line table of compilation // unit cu, which must be an Entry with tag TagCompileUnit. // // If this compilation unit has no line table, it returns nil, nil. func (d *Data) LineReader(cu *Entry) (*LineReader, error) { if d.line == nil { // No line tables available. return nil, nil } // Get line table information from cu. off, ok := cu.Val(AttrStmtList).(int64) if !ok { // cu has no line table. return nil, nil } if off > int64(len(d.line)) { return nil, errors.New("AttrStmtList value out of range") } // AttrCompDir is optional if all file names are absolute. Use // the empty string if it's not present. compDir, _ := cu.Val(AttrCompDir).(string) // Create the LineReader. u := &d.unit[d.offsetToUnit(cu.Offset)] buf := makeBuf(d, u, "line", Offset(off), d.line[off:]) // The compilation directory is implicitly directories[0]. r := LineReader{ buf: buf, section: d.line, str: d.str, lineStr: d.lineStr, } // Read the header. if err := r.readHeader(compDir); err != nil { return nil, err } // Initialize line reader state. r.Reset() return &r, nil } // readHeader reads the line number program header from r.buf and sets // all of the header fields in r. func (r *LineReader) readHeader(compDir string) error { buf := &r.buf // Read basic header fields [DWARF2 6.2.4]. hdrOffset := buf.off unitLength, dwarf64 := buf.unitLength() r.endOffset = buf.off + unitLength if r.endOffset > buf.off+Offset(len(buf.data)) { return DecodeError{"line", hdrOffset, fmt.Sprintf("line table end %d exceeds section size %d", r.endOffset, buf.off+Offset(len(buf.data)))} } r.version = buf.uint16() if buf.err == nil && (r.version < 2 || r.version > 5) { // DWARF goes to all this effort to make new opcodes // backward-compatible, and then adds fields right in // the middle of the header in new versions, so we're // picky about only supporting known line table // versions. return DecodeError{"line", hdrOffset, fmt.Sprintf("unknown line table version %d", r.version)} } if r.version >= 5 { r.addrsize = int(buf.uint8()) r.segmentSelectorSize = int(buf.uint8()) } else { r.addrsize = buf.format.addrsize() r.segmentSelectorSize = 0 } var headerLength Offset if dwarf64 { headerLength = Offset(buf.uint64()) } else { headerLength = Offset(buf.uint32()) } r.programOffset = buf.off + headerLength r.minInstructionLength = int(buf.uint8()) if r.version >= 4 { // [DWARF4 6.2.4] r.maxOpsPerInstruction = int(buf.uint8()) } else { r.maxOpsPerInstruction = 1 } r.defaultIsStmt = buf.uint8() != 0 r.lineBase = int(int8(buf.uint8())) r.lineRange = int(buf.uint8()) // Validate header. if buf.err != nil { return buf.err } if r.maxOpsPerInstruction == 0 { return DecodeError{"line", hdrOffset, "invalid maximum operations per instruction: 0"} } if r.lineRange == 0 { return DecodeError{"line", hdrOffset, "invalid line range: 0"} } // Read standard opcode length table. This table starts with opcode 1. r.opcodeBase = int(buf.uint8()) r.opcodeLengths = make([]int, r.opcodeBase) for i := 1; i < r.opcodeBase; i++ { r.opcodeLengths[i] = int(buf.uint8()) } // Validate opcode lengths. if buf.err != nil { return buf.err } for i, length := range r.opcodeLengths { if known, ok := knownOpcodeLengths[i]; ok && known != length { return DecodeError{"line", hdrOffset, fmt.Sprintf("opcode %d expected to have length %d, but has length %d", i, known, length)} } } if r.version < 5 { // Read include directories table. r.directories = []string{compDir} for { directory := buf.string() if buf.err != nil { return buf.err } if len(directory) == 0 { break } if !pathIsAbs(directory) { // Relative paths are implicitly relative to // the compilation directory. directory = pathJoin(compDir, directory) } r.directories = append(r.directories, directory) } // Read file name list. File numbering starts with 1, // so leave the first entry nil. r.fileEntries = make([]*LineFile, 1) for { if done, err := r.readFileEntry(); err != nil { return err } else if done { break } } } else { dirFormat := r.readLNCTFormat() c := buf.uint() r.directories = make([]string, c) for i := range r.directories { dir, _, _, err := r.readLNCT(dirFormat, dwarf64) if err != nil { return err } r.directories[i] = dir } fileFormat := r.readLNCTFormat() c = buf.uint() r.fileEntries = make([]*LineFile, c) for i := range r.fileEntries { name, mtime, size, err := r.readLNCT(fileFormat, dwarf64) if err != nil { return err } r.fileEntries[i] = &LineFile{name, mtime, int(size)} } } r.initialFileEntries = len(r.fileEntries) return buf.err } // lnctForm is a pair of an LNCT code and a form. This represents an // entry in the directory name or file name description in the DWARF 5 // line number program header. type lnctForm struct { lnct int form format } // readLNCTFormat reads an LNCT format description. func (r *LineReader) readLNCTFormat() []lnctForm { c := r.buf.uint8() ret := make([]lnctForm, c) for i := range ret { ret[i].lnct = int(r.buf.uint()) ret[i].form = format(r.buf.uint()) } return ret } // readLNCT reads a sequence of LNCT entries and returns path information. func (r *LineReader) readLNCT(s []lnctForm, dwarf64 bool) (path string, mtime uint64, size uint64, err error) { var dir string for _, lf := range s { var str string var val uint64 switch lf.form { case formString: str = r.buf.string() case formStrp, formLineStrp: var off uint64 if dwarf64 { off = r.buf.uint64() } else { off = uint64(r.buf.uint32()) } if uint64(int(off)) != off { return "", 0, 0, DecodeError{"line", r.buf.off, "strp/line_strp offset out of range"} } var b1 buf if lf.form == formStrp { b1 = makeBuf(r.buf.dwarf, r.buf.format, "str", 0, r.str) } else { b1 = makeBuf(r.buf.dwarf, r.buf.format, "line_str", 0, r.lineStr) } b1.skip(int(off)) str = b1.string() if b1.err != nil { return "", 0, 0, DecodeError{"line", r.buf.off, b1.err.Error()} } case formStrpSup: // Supplemental sections not yet supported. if dwarf64 { r.buf.uint64() } else { r.buf.uint32() } case formStrx: // .debug_line.dwo sections not yet supported. r.buf.uint() case formStrx1: r.buf.uint8() case formStrx2: r.buf.uint16() case formStrx3: r.buf.uint24() case formStrx4: r.buf.uint32() case formData1: val = uint64(r.buf.uint8()) case formData2: val = uint64(r.buf.uint16()) case formData4: val = uint64(r.buf.uint32()) case formData8: val = r.buf.uint64() case formData16: r.buf.bytes(16) case formDwarfBlock: r.buf.bytes(int(r.buf.uint())) case formUdata: val = r.buf.uint() } switch lf.lnct { case lnctPath: path = str case lnctDirectoryIndex: if val >= uint64(len(r.directories)) { return "", 0, 0, DecodeError{"line", r.buf.off, "directory index out of range"} } dir = r.directories[val] case lnctTimestamp: mtime = val case lnctSize: size = val case lnctMD5: // Ignored. } } if dir != "" && path != "" { path = pathJoin(dir, path) } return path, mtime, size, nil } // readFileEntry reads a file entry from either the header or a // DW_LNE_define_file extended opcode and adds it to r.fileEntries. A // true return value indicates that there are no more entries to read. func (r *LineReader) readFileEntry() (bool, error) { name := r.buf.string() if r.buf.err != nil { return false, r.buf.err } if len(name) == 0 { return true, nil } off := r.buf.off dirIndex := int(r.buf.uint()) if !pathIsAbs(name) { if dirIndex >= len(r.directories) { return false, DecodeError{"line", off, "directory index too large"} } name = pathJoin(r.directories[dirIndex], name) } mtime := r.buf.uint() length := int(r.buf.uint()) // If this is a dynamically added path and the cursor was // backed up, we may have already added this entry. Avoid // updating existing line table entries in this case. This // avoids an allocation and potential racy access to the slice // backing store if the user called Files. if len(r.fileEntries) < cap(r.fileEntries) { fe := r.fileEntries[:len(r.fileEntries)+1] if fe[len(fe)-1] != nil { // We already processed this addition. r.fileEntries = fe return false, nil } } r.fileEntries = append(r.fileEntries, &LineFile{name, mtime, length}) return false, nil } // updateFile updates r.state.File after r.fileIndex has // changed or r.fileEntries has changed. func (r *LineReader) updateFile() { if r.fileIndex < len(r.fileEntries) { r.state.File = r.fileEntries[r.fileIndex] } else { r.state.File = nil } } // Next sets *entry to the next row in this line table and moves to // the next row. If there are no more entries and the line table is // properly terminated, it returns io.EOF. // // Rows are always in order of increasing entry.Address, but // entry.Line may go forward or backward. func (r *LineReader) Next(entry *LineEntry) error { if r.buf.err != nil { return r.buf.err } // Execute opcodes until we reach an opcode that emits a line // table entry. for { if len(r.buf.data) == 0 { return io.EOF } emit := r.step(entry) if r.buf.err != nil { return r.buf.err } if emit { return nil } } } // knownOpcodeLengths gives the opcode lengths (in varint arguments) // of known standard opcodes. var knownOpcodeLengths = map[int]int{ lnsCopy: 0, lnsAdvancePC: 1, lnsAdvanceLine: 1, lnsSetFile: 1, lnsNegateStmt: 0, lnsSetBasicBlock: 0, lnsConstAddPC: 0, lnsSetPrologueEnd: 0, lnsSetEpilogueBegin: 0, lnsSetISA: 1, // lnsFixedAdvancePC takes a uint8 rather than a varint; it's // unclear what length the header is supposed to claim, so // ignore it. } // step processes the next opcode and updates r.state. If the opcode // emits a row in the line table, this updates *entry and returns // true. func (r *LineReader) step(entry *LineEntry) bool { opcode := int(r.buf.uint8()) if opcode >= r.opcodeBase { // Special opcode [DWARF2 6.2.5.1, DWARF4 6.2.5.1] adjustedOpcode := opcode - r.opcodeBase r.advancePC(adjustedOpcode / r.lineRange) lineDelta := r.lineBase + adjustedOpcode%r.lineRange r.state.Line += lineDelta goto emit } switch opcode { case 0: // Extended opcode [DWARF2 6.2.5.3] length := Offset(r.buf.uint()) startOff := r.buf.off opcode := r.buf.uint8() switch opcode { case lneEndSequence: r.state.EndSequence = true *entry = r.state r.resetState() case lneSetAddress: switch r.addrsize { case 1: r.state.Address = uint64(r.buf.uint8()) case 2: r.state.Address = uint64(r.buf.uint16()) case 4: r.state.Address = uint64(r.buf.uint32()) case 8: r.state.Address = r.buf.uint64() default: r.buf.error("unknown address size") } case lneDefineFile: if done, err := r.readFileEntry(); err != nil { r.buf.err = err return false } else if done { r.buf.err = DecodeError{"line", startOff, "malformed DW_LNE_define_file operation"} return false } r.updateFile() case lneSetDiscriminator: // [DWARF4 6.2.5.3] r.state.Discriminator = int(r.buf.uint()) } r.buf.skip(int(startOff + length - r.buf.off)) if opcode == lneEndSequence { return true } // Standard opcodes [DWARF2 6.2.5.2] case lnsCopy: goto emit case lnsAdvancePC: r.advancePC(int(r.buf.uint())) case lnsAdvanceLine: r.state.Line += int(r.buf.int()) case lnsSetFile: r.fileIndex = int(r.buf.uint()) r.updateFile() case lnsSetColumn: r.state.Column = int(r.buf.uint()) case lnsNegateStmt: r.state.IsStmt = !r.state.IsStmt case lnsSetBasicBlock: r.state.BasicBlock = true case lnsConstAddPC: r.advancePC((255 - r.opcodeBase) / r.lineRange) case lnsFixedAdvancePC: r.state.Address += uint64(r.buf.uint16()) // DWARF3 standard opcodes [DWARF3 6.2.5.2] case lnsSetPrologueEnd: r.state.PrologueEnd = true case lnsSetEpilogueBegin: r.state.EpilogueBegin = true case lnsSetISA: r.state.ISA = int(r.buf.uint()) default: // Unhandled standard opcode. Skip the number of // arguments that the prologue says this opcode has. for i := 0; i < r.opcodeLengths[opcode]; i++ { r.buf.uint() } } return false emit: *entry = r.state r.state.BasicBlock = false r.state.PrologueEnd = false r.state.EpilogueBegin = false r.state.Discriminator = 0 return true } // advancePC advances "operation pointer" (the combination of Address // and OpIndex) in r.state by opAdvance steps. func (r *LineReader) advancePC(opAdvance int) { opIndex := r.state.OpIndex + opAdvance r.state.Address += uint64(r.minInstructionLength * (opIndex / r.maxOpsPerInstruction)) r.state.OpIndex = opIndex % r.maxOpsPerInstruction } // A LineReaderPos represents a position in a line table. type LineReaderPos struct { // off is the current offset in the DWARF line section. off Offset // numFileEntries is the length of fileEntries. numFileEntries int // state and fileIndex are the statement machine state at // offset off. state LineEntry fileIndex int } // Tell returns the current position in the line table. func (r *LineReader) Tell() LineReaderPos { return LineReaderPos{r.buf.off, len(r.fileEntries), r.state, r.fileIndex} } // Seek restores the line table reader to a position returned by Tell. // // The argument pos must have been returned by a call to Tell on this // line table. func (r *LineReader) Seek(pos LineReaderPos) { r.buf.off = pos.off r.buf.data = r.section[r.buf.off:r.endOffset] r.fileEntries = r.fileEntries[:pos.numFileEntries] r.state = pos.state r.fileIndex = pos.fileIndex } // Reset repositions the line table reader at the beginning of the // line table. func (r *LineReader) Reset() { // Reset buffer to the line number program offset. r.buf.off = r.programOffset r.buf.data = r.section[r.buf.off:r.endOffset] // Reset file entries list. r.fileEntries = r.fileEntries[:r.initialFileEntries] // Reset line number program state. r.resetState() } // resetState resets r.state to its default values func (r *LineReader) resetState() { // Reset the state machine registers to the defaults given in // [DWARF4 6.2.2]. r.state = LineEntry{ Address: 0, OpIndex: 0, File: nil, Line: 1, Column: 0, IsStmt: r.defaultIsStmt, BasicBlock: false, PrologueEnd: false, EpilogueBegin: false, ISA: 0, Discriminator: 0, } r.fileIndex = 1 r.updateFile() } // Files returns the file name table of this compilation unit as of // the current position in the line table. The file name table may be // referenced from attributes in this compilation unit such as // AttrDeclFile. // // Entry 0 is always nil, since file index 0 represents "no file". // // The file name table of a compilation unit is not fixed. Files // returns the file table as of the current position in the line // table. This may contain more entries than the file table at an // earlier position in the line table, though existing entries never // change. func (r *LineReader) Files() []*LineFile { return r.fileEntries } // ErrUnknownPC is the error returned by LineReader.ScanPC when the // seek PC is not covered by any entry in the line table. var ErrUnknownPC = errors.New("ErrUnknownPC") // SeekPC sets *entry to the LineEntry that includes pc and positions // the reader on the next entry in the line table. If necessary, this // will seek backwards to find pc. // // If pc is not covered by any entry in this line table, SeekPC // returns ErrUnknownPC. In this case, *entry and the final seek // position are unspecified. // // Note that DWARF line tables only permit sequential, forward scans. // Hence, in the worst case, this takes time linear in the size of the // line table. If the caller wishes to do repeated fast PC lookups, it // should build an appropriate index of the line table. func (r *LineReader) SeekPC(pc uint64, entry *LineEntry) error { if err := r.Next(entry); err != nil { return err } if entry.Address > pc { // We're too far. Start at the beginning of the table. r.Reset() if err := r.Next(entry); err != nil { return err } if entry.Address > pc { // The whole table starts after pc. r.Reset() return ErrUnknownPC } } // Scan until we pass pc, then back up one. for { var next LineEntry pos := r.Tell() if err := r.Next(&next); err != nil { if err == io.EOF { return ErrUnknownPC } return err } if next.Address > pc { if entry.EndSequence { // pc is in a hole in the table. return ErrUnknownPC } // entry is the desired entry. Back up the // cursor to "next" and return success. r.Seek(pos) return nil } *entry = next } } // pathIsAbs reports whether path is an absolute path (or "full path // name" in DWARF parlance). This is in "whatever form makes sense for // the host system", so this accepts both UNIX-style and DOS-style // absolute paths. We avoid the filepath package because we want this // to behave the same regardless of our host system and because we // don't know what system the paths came from. func pathIsAbs(path string) bool { _, path = splitDrive(path) return len(path) > 0 && (path[0] == '/' || path[0] == '\\') } // pathJoin joins dirname and filename. filename must be relative. // DWARF paths can be UNIX-style or DOS-style, so this handles both. func pathJoin(dirname, filename string) string { if len(dirname) == 0 { return filename } // dirname should be absolute, which means we can determine // whether it's a DOS path reasonably reliably by looking for // a drive letter or UNC path. drive, dirname := splitDrive(dirname) if drive == "" { // UNIX-style path. return path.Join(dirname, filename) } // DOS-style path. drive2, filename := splitDrive(filename) if drive2 != "" { if !strings.EqualFold(drive, drive2) { // Different drives. There's not much we can // do here, so just ignore the directory. return drive2 + filename } // Drives are the same. Ignore drive on filename. } if !(strings.HasSuffix(dirname, "/") || strings.HasSuffix(dirname, `\`)) && dirname != "" { sep := `\` if strings.HasPrefix(dirname, "/") { sep = `/` } dirname += sep } return drive + dirname + filename } // splitDrive splits the DOS drive letter or UNC share point from // path, if any. path == drive + rest func splitDrive(path string) (drive, rest string) { if len(path) >= 2 && path[1] == ':' { if c := path[0]; 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' { return path[:2], path[2:] } } if len(path) > 3 && (path[0] == '\\' || path[0] == '/') && (path[1] == '\\' || path[1] == '/') { // Normalize the path so we can search for just \ below. npath := strings.Replace(path, "/", `\`, -1) // Get the host part, which must be non-empty. slash1 := strings.IndexByte(npath[2:], '\\') + 2 if slash1 > 2 { // Get the mount-point part, which must be non-empty. slash2 := strings.IndexByte(npath[slash1+1:], '\\') + slash1 + 1 if slash2 > slash1 { return path[:slash2], path[slash2:] } } } return "", path }