archive.go

Documentation: cmd/internal/archive

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package archive implements reading of archive files generated by the Go
     6  // toolchain.
     7  package archive
     8  
     9  import (
    10  	"bufio"
    11  	"bytes"
    12  	"cmd/internal/bio"
    13  	"cmd/internal/goobj"
    14  	"errors"
    15  	"fmt"
    16  	"io"
    17  	"log"
    18  	"os"
    19  	"strconv"
    20  	"strings"
    21  	"time"
    22  	"unicode/utf8"
    23  )
    24  
    25  /*
    26  The archive format is:
    27  
    28  First, on a line by itself
    29  	!<arch>
    30  
    31  Then zero or more file records. Each file record has a fixed-size one-line header
    32  followed by data bytes followed by an optional padding byte. The header is:
    33  
    34  	%-16s%-12d%-6d%-6d%-8o%-10d`
    35  	name mtime uid gid mode size
    36  
    37  (note the trailing backquote). The %-16s here means at most 16 *bytes* of
    38  the name, and if shorter, space padded on the right.
    39  */
    40  
    41  // A Data is a reference to data stored in an object file.
    42  // It records the offset and size of the data, so that a client can
    43  // read the data only if necessary.
    44  type Data struct {
    45  	Offset int64
    46  	Size   int64
    47  }
    48  
    49  type Archive struct {
    50  	f       *os.File
    51  	Entries []Entry
    52  }
    53  
    54  func (a *Archive) File() *os.File { return a.f }
    55  
    56  type Entry struct {
    57  	Name  string
    58  	Type  EntryType
    59  	Mtime int64
    60  	Uid   int
    61  	Gid   int
    62  	Mode  os.FileMode
    63  	Data
    64  	Obj *GoObj // nil if this entry is not a Go object file
    65  }
    66  
    67  type EntryType int
    68  
    69  const (
    70  	EntryPkgDef EntryType = iota
    71  	EntryGoObj
    72  	EntryNativeObj
    73  )
    74  
    75  func (e *Entry) String() string {
    76  	return fmt.Sprintf("%s %6d/%-6d %12d %s %s",
    77  		(e.Mode & 0777).String(),
    78  		e.Uid,
    79  		e.Gid,
    80  		e.Size,
    81  		time.Unix(e.Mtime, 0).Format(timeFormat),
    82  		e.Name)
    83  }
    84  
    85  type GoObj struct {
    86  	TextHeader []byte
    87  	Arch       string
    88  	Data
    89  }
    90  
    91  const (
    92  	entryHeader = "%s%-12d%-6d%-6d%-8o%-10d`\n"
    93  	// In entryHeader the first entry, the name, is always printed as 16 bytes right-padded.
    94  	entryLen   = 16 + 12 + 6 + 6 + 8 + 10 + 1 + 1
    95  	timeFormat = "Jan _2 15:04 2006"
    96  )
    97  
    98  var (
    99  	archiveHeader = []byte("!<arch>\n")
   100  	archiveMagic  = []byte("`\n")
   101  	goobjHeader   = []byte("go objec") // truncated to size of archiveHeader
   102  
   103  	errCorruptArchive   = errors.New("corrupt archive")
   104  	errTruncatedArchive = errors.New("truncated archive")
   105  	errCorruptObject    = errors.New("corrupt object file")
   106  	errNotObject        = errors.New("unrecognized object file format")
   107  )
   108  
   109  type ErrGoObjOtherVersion struct{ magic []byte }
   110  
   111  func (e ErrGoObjOtherVersion) Error() string {
   112  	return fmt.Sprintf("go object of a different version: %q", e.magic)
   113  }
   114  
   115  // An objReader is an object file reader.
   116  type objReader struct {
   117  	a      *Archive
   118  	b      *bio.Reader
   119  	err    error
   120  	offset int64
   121  	limit  int64
   122  	tmp    [256]byte
   123  }
   124  
   125  func (r *objReader) init(f *os.File) {
   126  	r.a = &Archive{f, nil}
   127  	r.offset, _ = f.Seek(0, os.SEEK_CUR)
   128  	r.limit, _ = f.Seek(0, os.SEEK_END)
   129  	f.Seek(r.offset, os.SEEK_SET)
   130  	r.b = bio.NewReader(f)
   131  }
   132  
   133  // error records that an error occurred.
   134  // It returns only the first error, so that an error
   135  // caused by an earlier error does not discard information
   136  // about the earlier error.
   137  func (r *objReader) error(err error) error {
   138  	if r.err == nil {
   139  		if err == io.EOF {
   140  			err = io.ErrUnexpectedEOF
   141  		}
   142  		r.err = err
   143  	}
   144  	// panic("corrupt") // useful for debugging
   145  	return r.err
   146  }
   147  
   148  // peek returns the next n bytes without advancing the reader.
   149  func (r *objReader) peek(n int) ([]byte, error) {
   150  	if r.err != nil {
   151  		return nil, r.err
   152  	}
   153  	if r.offset >= r.limit {
   154  		r.error(io.ErrUnexpectedEOF)
   155  		return nil, r.err
   156  	}
   157  	b, err := r.b.Peek(n)
   158  	if err != nil {
   159  		if err != bufio.ErrBufferFull {
   160  			r.error(err)
   161  		}
   162  	}
   163  	return b, err
   164  }
   165  
   166  // readByte reads and returns a byte from the input file.
   167  // On I/O error or EOF, it records the error but returns byte 0.
   168  // A sequence of 0 bytes will eventually terminate any
   169  // parsing state in the object file. In particular, it ends the
   170  // reading of a varint.
   171  func (r *objReader) readByte() byte {
   172  	if r.err != nil {
   173  		return 0
   174  	}
   175  	if r.offset >= r.limit {
   176  		r.error(io.ErrUnexpectedEOF)
   177  		return 0
   178  	}
   179  	b, err := r.b.ReadByte()
   180  	if err != nil {
   181  		if err == io.EOF {
   182  			err = io.ErrUnexpectedEOF
   183  		}
   184  		r.error(err)
   185  		b = 0
   186  	} else {
   187  		r.offset++
   188  	}
   189  	return b
   190  }
   191  
   192  // read reads exactly len(b) bytes from the input file.
   193  // If an error occurs, read returns the error but also
   194  // records it, so it is safe for callers to ignore the result
   195  // as long as delaying the report is not a problem.
   196  func (r *objReader) readFull(b []byte) error {
   197  	if r.err != nil {
   198  		return r.err
   199  	}
   200  	if r.offset+int64(len(b)) > r.limit {
   201  		return r.error(io.ErrUnexpectedEOF)
   202  	}
   203  	n, err := io.ReadFull(r.b, b)
   204  	r.offset += int64(n)
   205  	if err != nil {
   206  		return r.error(err)
   207  	}
   208  	return nil
   209  }
   210  
   211  // skip skips n bytes in the input.
   212  func (r *objReader) skip(n int64) {
   213  	if n < 0 {
   214  		r.error(fmt.Errorf("debug/goobj: internal error: misuse of skip"))
   215  	}
   216  	if n < int64(len(r.tmp)) {
   217  		// Since the data is so small, a just reading from the buffered
   218  		// reader is better than flushing the buffer and seeking.
   219  		r.readFull(r.tmp[:n])
   220  	} else if n <= int64(r.b.Buffered()) {
   221  		// Even though the data is not small, it has already been read.
   222  		// Advance the buffer instead of seeking.
   223  		for n > int64(len(r.tmp)) {
   224  			r.readFull(r.tmp[:])
   225  			n -= int64(len(r.tmp))
   226  		}
   227  		r.readFull(r.tmp[:n])
   228  	} else {
   229  		// Seek, giving up buffered data.
   230  		r.b.MustSeek(r.offset+n, os.SEEK_SET)
   231  		r.offset += n
   232  	}
   233  }
   234  
   235  // New writes to f to make a new archive.
   236  func New(f *os.File) (*Archive, error) {
   237  	_, err := f.Write(archiveHeader)
   238  	if err != nil {
   239  		return nil, err
   240  	}
   241  	return &Archive{f: f}, nil
   242  }
   243  
   244  // Parse parses an object file or archive from f.
   245  func Parse(f *os.File, verbose bool) (*Archive, error) {
   246  	var r objReader
   247  	r.init(f)
   248  	t, err := r.peek(8)
   249  	if err != nil {
   250  		if err == io.EOF {
   251  			err = io.ErrUnexpectedEOF
   252  		}
   253  		return nil, err
   254  	}
   255  
   256  	switch {
   257  	default:
   258  		return nil, errNotObject
   259  
   260  	case bytes.Equal(t, archiveHeader):
   261  		if err := r.parseArchive(verbose); err != nil {
   262  			return nil, err
   263  		}
   264  	case bytes.Equal(t, goobjHeader):
   265  		off := r.offset
   266  		o := &GoObj{}
   267  		if err := r.parseObject(o, r.limit-off); err != nil {
   268  			return nil, err
   269  		}
   270  		r.a.Entries = []Entry{{
   271  			Name: f.Name(),
   272  			Type: EntryGoObj,
   273  			Data: Data{off, r.limit - off},
   274  			Obj:  o,
   275  		}}
   276  	}
   277  
   278  	return r.a, nil
   279  }
   280  
   281  // trimSpace removes trailing spaces from b and returns the corresponding string.
   282  // This effectively parses the form used in archive headers.
   283  func trimSpace(b []byte) string {
   284  	return string(bytes.TrimRight(b, " "))
   285  }
   286  
   287  // parseArchive parses a Unix archive of Go object files.
   288  func (r *objReader) parseArchive(verbose bool) error {
   289  	r.readFull(r.tmp[:8]) // consume header (already checked)
   290  	for r.offset < r.limit {
   291  		if err := r.readFull(r.tmp[:60]); err != nil {
   292  			return err
   293  		}
   294  		data := r.tmp[:60]
   295  
   296  		// Each file is preceded by this text header (slice indices in first column):
   297  		//	 0:16	name
   298  		//	16:28 date
   299  		//	28:34 uid
   300  		//	34:40 gid
   301  		//	40:48 mode
   302  		//	48:58 size
   303  		//	58:60 magic - `\n
   304  		// We only care about name, size, and magic, unless in verbose mode.
   305  		// The fields are space-padded on the right.
   306  		// The size is in decimal.
   307  		// The file data - size bytes - follows the header.
   308  		// Headers are 2-byte aligned, so if size is odd, an extra padding
   309  		// byte sits between the file data and the next header.
   310  		// The file data that follows is padded to an even number of bytes:
   311  		// if size is odd, an extra padding byte is inserted betw the next header.
   312  		if len(data) < 60 {
   313  			return errTruncatedArchive
   314  		}
   315  		if !bytes.Equal(data[58:60], archiveMagic) {
   316  			return errCorruptArchive
   317  		}
   318  		name := trimSpace(data[0:16])
   319  		var err error
   320  		get := func(start, end, base, bitsize int) int64 {
   321  			if err != nil {
   322  				return 0
   323  			}
   324  			var v int64
   325  			v, err = strconv.ParseInt(trimSpace(data[start:end]), base, bitsize)
   326  			return v
   327  		}
   328  		size := get(48, 58, 10, 64)
   329  		var (
   330  			mtime    int64
   331  			uid, gid int
   332  			mode     os.FileMode
   333  		)
   334  		if verbose {
   335  			mtime = get(16, 28, 10, 64)
   336  			uid = int(get(28, 34, 10, 32))
   337  			gid = int(get(34, 40, 10, 32))
   338  			mode = os.FileMode(get(40, 48, 8, 32))
   339  		}
   340  		if err != nil {
   341  			return errCorruptArchive
   342  		}
   343  		data = data[60:]
   344  		fsize := size + size&1
   345  		if fsize < 0 || fsize < size {
   346  			return errCorruptArchive
   347  		}
   348  		switch name {
   349  		case "__.PKGDEF":
   350  			r.a.Entries = append(r.a.Entries, Entry{
   351  				Name:  name,
   352  				Type:  EntryPkgDef,
   353  				Mtime: mtime,
   354  				Uid:   uid,
   355  				Gid:   gid,
   356  				Mode:  mode,
   357  				Data:  Data{r.offset, size},
   358  			})
   359  			r.skip(size)
   360  		default:
   361  			var typ EntryType
   362  			var o *GoObj
   363  			offset := r.offset
   364  			p, err := r.peek(8)
   365  			if err != nil {
   366  				return err
   367  			}
   368  			if bytes.Equal(p, goobjHeader) {
   369  				typ = EntryGoObj
   370  				o = &GoObj{}
   371  				r.parseObject(o, size)
   372  			} else {
   373  				typ = EntryNativeObj
   374  				r.skip(size)
   375  			}
   376  			r.a.Entries = append(r.a.Entries, Entry{
   377  				Name:  name,
   378  				Type:  typ,
   379  				Mtime: mtime,
   380  				Uid:   uid,
   381  				Gid:   gid,
   382  				Mode:  mode,
   383  				Data:  Data{offset, size},
   384  				Obj:   o,
   385  			})
   386  		}
   387  		if size&1 != 0 {
   388  			r.skip(1)
   389  		}
   390  	}
   391  	return nil
   392  }
   393  
   394  // parseObject parses a single Go object file.
   395  // The object file consists of a textual header ending in "\n!\n"
   396  // and then the part we want to parse begins.
   397  // The format of that part is defined in a comment at the top
   398  // of cmd/internal/goobj/objfile.go.
   399  func (r *objReader) parseObject(o *GoObj, size int64) error {
   400  	h := make([]byte, 0, 256)
   401  	var c1, c2, c3 byte
   402  	for {
   403  		c1, c2, c3 = c2, c3, r.readByte()
   404  		h = append(h, c3)
   405  		// The new export format can contain 0 bytes.
   406  		// Don't consider them errors, only look for r.err != nil.
   407  		if r.err != nil {
   408  			return errCorruptObject
   409  		}
   410  		if c1 == '\n' && c2 == '!' && c3 == '\n' {
   411  			break
   412  		}
   413  	}
   414  	o.TextHeader = h
   415  	hs := strings.Fields(string(h))
   416  	if len(hs) >= 4 {
   417  		o.Arch = hs[3]
   418  	}
   419  	o.Offset = r.offset
   420  	o.Size = size - int64(len(h))
   421  
   422  	p, err := r.peek(8)
   423  	if err != nil {
   424  		return err
   425  	}
   426  	if !bytes.Equal(p, []byte(goobj.Magic)) {
   427  		if bytes.HasPrefix(p, []byte("\x00go1")) && bytes.HasSuffix(p, []byte("ld")) {
   428  			return r.error(ErrGoObjOtherVersion{p[1:]}) // strip the \x00 byte
   429  		}
   430  		return r.error(errCorruptObject)
   431  	}
   432  	r.skip(o.Size)
   433  	return nil
   434  }
   435  
   436  // AddEntry adds an entry to the end of a, with the content from r.
   437  func (a *Archive) AddEntry(typ EntryType, name string, mtime int64, uid, gid int, mode os.FileMode, size int64, r io.Reader) {
   438  	off, err := a.f.Seek(0, os.SEEK_END)
   439  	if err != nil {
   440  		log.Fatal(err)
   441  	}
   442  	n, err := fmt.Fprintf(a.f, entryHeader, exactly16Bytes(name), mtime, uid, gid, mode, size)
   443  	if err != nil || n != entryLen {
   444  		log.Fatal("writing entry header: ", err)
   445  	}
   446  	n1, _ := io.CopyN(a.f, r, size)
   447  	if n1 != size {
   448  		log.Fatal(err)
   449  	}
   450  	if (off+size)&1 != 0 {
   451  		a.f.Write([]byte{0}) // pad to even byte
   452  	}
   453  	a.Entries = append(a.Entries, Entry{
   454  		Name:  name,
   455  		Type:  typ,
   456  		Mtime: mtime,
   457  		Uid:   uid,
   458  		Gid:   gid,
   459  		Mode:  mode,
   460  		Data:  Data{off + entryLen, size},
   461  	})
   462  }
   463  
   464  // exactly16Bytes truncates the string if necessary so it is at most 16 bytes long,
   465  // then pads the result with spaces to be exactly 16 bytes.
   466  // Fmt uses runes for its width calculation, but we need bytes in the entry header.
   467  func exactly16Bytes(s string) string {
   468  	for len(s) > 16 {
   469  		_, wid := utf8.DecodeLastRuneInString(s)
   470  		s = s[:len(s)-wid]
   471  	}
   472  	const sixteenSpaces = "                "
   473  	s += sixteenSpaces[:16-len(s)]
   474  	return s
   475  }
   476  
   477  // architecture-independent object file output
   478  const HeaderSize = 60
   479  
   480  func ReadHeader(b *bufio.Reader, name string) int {
   481  	var buf [HeaderSize]byte
   482  	if _, err := io.ReadFull(b, buf[:]); err != nil {
   483  		return -1
   484  	}
   485  	aname := strings.Trim(string(buf[0:16]), " ")
   486  	if !strings.HasPrefix(aname, name) {
   487  		return -1
   488  	}
   489  	asize := strings.Trim(string(buf[48:58]), " ")
   490  	i, _ := strconv.Atoi(asize)
   491  	return i
   492  }
   493  
   494  func FormatHeader(arhdr []byte, name string, size int64) {
   495  	copy(arhdr[:], fmt.Sprintf("%-16s%-12d%-6d%-6d%-8o%-10d`\n", name, 0, 0, 0, 0644, size))
   496  }
   497
View as plain text