diff --git a/plumbing/format/packfile/packfile.go b/plumbing/format/packfile/packfile.go index 69b6e85d0..def6e997a 100644 --- a/plumbing/format/packfile/packfile.go +++ b/plumbing/format/packfile/packfile.go @@ -76,20 +76,18 @@ func (p *Packfile) Get(h plumbing.Hash) (plumbing.EncodedObject, error) { return nil, err } - return p.GetByOffset(offset) + return p.objectAtOffset(offset, h) } -// GetByOffset retrieves the encoded object from the packfile with the given +// GetByOffset retrieves the encoded object from the packfile at the given // offset. func (p *Packfile) GetByOffset(o int64) (plumbing.EncodedObject, error) { hash, err := p.FindHash(o) - if err == nil { - if obj, ok := p.deltaBaseCache.Get(hash); ok { - return obj, nil - } + if err != nil { + return nil, err } - return p.objectAtOffset(o) + return p.objectAtOffset(o, hash) } // GetSizeByOffset retrieves the size of the encoded object from the @@ -122,6 +120,13 @@ func (p *Packfile) nextObjectHeader() (*ObjectHeader, error) { return h, err } +func (p *Packfile) getDeltaObjectSize(buf *bytes.Buffer) int64 { + delta := buf.Bytes() + _, delta = decodeLEB128(delta) // skip src size + sz, _ := decodeLEB128(delta) + return int64(sz) +} + func (p *Packfile) getObjectSize(h *ObjectHeader) (int64, error) { switch h.Type { case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject: @@ -135,10 +140,7 @@ func (p *Packfile) getObjectSize(h *ObjectHeader) (int64, error) { return 0, err } - delta := buf.Bytes() - _, delta = decodeLEB128(delta) // skip src size - sz, _ := decodeLEB128(delta) - return int64(sz), nil + return p.getDeltaObjectSize(buf), nil default: return 0, ErrInvalidObject.AddDetails("type %q", h.Type) } @@ -176,10 +178,16 @@ func (p *Packfile) getObjectType(h *ObjectHeader) (typ plumbing.ObjectType, err err = ErrInvalidObject.AddDetails("type %q", h.Type) } + p.offsetToType[h.Offset] = typ + return } -func (p *Packfile) objectAtOffset(offset int64) (plumbing.EncodedObject, error) { +func (p *Packfile) objectAtOffset(offset int64, hash plumbing.Hash) (plumbing.EncodedObject, error) { + if obj, ok := p.cacheGet(hash); ok { + return obj, nil + } + h, err := p.objectHeaderAtOffset(offset) if err != nil { if err == io.EOF || isInvalid(err) { @@ -188,27 +196,54 @@ func (p *Packfile) objectAtOffset(offset int64) (plumbing.EncodedObject, error) return nil, err } + return p.getNextObject(h, hash) +} + +func (p *Packfile) getNextObject(h *ObjectHeader, hash plumbing.Hash) (plumbing.EncodedObject, error) { + var err error + // If we have no filesystem, we will return a MemoryObject instead // of an FSObject. if p.fs == nil { - return p.getNextObject(h) + return p.getNextMemoryObject(h) } - // If the object is not a delta and it's small enough then read it - // completely into memory now since it is already read from disk - // into buffer anyway. - if h.Length <= smallObjectThreshold && h.Type != plumbing.OFSDeltaObject && h.Type != plumbing.REFDeltaObject { - return p.getNextObject(h) - } + // If the object is small enough then read it completely into memory now since + // it is already read from disk into buffer anyway. For delta objects we want + // to perform the optimization too, but we have to be careful about applying + // small deltas on big objects. + var size int64 + if h.Length <= smallObjectThreshold { + if h.Type != plumbing.OFSDeltaObject && h.Type != plumbing.REFDeltaObject { + return p.getNextMemoryObject(h) + } - hash, err := p.FindHash(h.Offset) - if err != nil { - return nil, err - } + // For delta objects we read the delta data and apply the small object + // optimization only if the expanded version of the object still meets + // the small object threshold condition. + buf := bufPool.Get().(*bytes.Buffer) + buf.Reset() + if _, _, err := p.s.NextObject(buf); err != nil { + return nil, err + } + defer bufPool.Put(buf) - size, err := p.getObjectSize(h) - if err != nil { - return nil, err + size = p.getDeltaObjectSize(buf) + if size <= smallObjectThreshold { + var obj = new(plumbing.MemoryObject) + obj.SetSize(size) + if h.Type == plumbing.REFDeltaObject { + err = p.fillREFDeltaObjectContentWithBuffer(obj, h.Reference, buf) + } else { + err = p.fillOFSDeltaObjectContentWithBuffer(obj, h.OffsetReference, buf) + } + return obj, err + } + } else { + size, err = p.getObjectSize(h) + if err != nil { + return nil, err + } } typ, err := p.getObjectType(h) @@ -231,25 +266,14 @@ func (p *Packfile) objectAtOffset(offset int64) (plumbing.EncodedObject, error) } func (p *Packfile) getObjectContent(offset int64) (io.ReadCloser, error) { - ref, err := p.FindHash(offset) - if err == nil { - obj, ok := p.cacheGet(ref) - if ok { - reader, err := obj.Reader() - if err != nil { - return nil, err - } - - return reader, nil - } - } - h, err := p.objectHeaderAtOffset(offset) if err != nil { return nil, err } - obj, err := p.getNextObject(h) + // getObjectContent is called from FSObject, so we have to explicitly + // get memory object here to avoid recursive cycle + obj, err := p.getNextMemoryObject(h) if err != nil { return nil, err } @@ -257,7 +281,7 @@ func (p *Packfile) getObjectContent(offset int64) (io.ReadCloser, error) { return obj.Reader() } -func (p *Packfile) getNextObject(h *ObjectHeader) (plumbing.EncodedObject, error) { +func (p *Packfile) getNextMemoryObject(h *ObjectHeader) (plumbing.EncodedObject, error) { var obj = new(plumbing.MemoryObject) obj.SetSize(h.Length) obj.SetType(h.Type) @@ -278,6 +302,8 @@ func (p *Packfile) getNextObject(h *ObjectHeader) (plumbing.EncodedObject, error return nil, err } + p.offsetToType[h.Offset] = obj.Type() + return obj, nil } @@ -300,6 +326,13 @@ func (p *Packfile) fillREFDeltaObjectContent(obj plumbing.EncodedObject, ref plu if err != nil { return err } + defer bufPool.Put(buf) + + return p.fillREFDeltaObjectContentWithBuffer(obj, ref, buf) +} + +func (p *Packfile) fillREFDeltaObjectContentWithBuffer(obj plumbing.EncodedObject, ref plumbing.Hash, buf *bytes.Buffer) error { + var err error base, ok := p.cacheGet(ref) if !ok { @@ -312,30 +345,31 @@ func (p *Packfile) fillREFDeltaObjectContent(obj plumbing.EncodedObject, ref plu obj.SetType(base.Type()) err = ApplyDelta(obj, base, buf.Bytes()) p.cachePut(obj) - bufPool.Put(buf) return err } func (p *Packfile) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset int64) error { - buf := bytes.NewBuffer(nil) + buf := bufPool.Get().(*bytes.Buffer) + buf.Reset() _, _, err := p.s.NextObject(buf) if err != nil { return err } + defer bufPool.Put(buf) - var base plumbing.EncodedObject - var ok bool + return p.fillOFSDeltaObjectContentWithBuffer(obj, offset, buf) +} + +func (p *Packfile) fillOFSDeltaObjectContentWithBuffer(obj plumbing.EncodedObject, offset int64, buf *bytes.Buffer) error { hash, err := p.FindHash(offset) - if err == nil { - base, ok = p.cacheGet(hash) + if err != nil { + return err } - if !ok { - base, err = p.GetByOffset(offset) - if err != nil { - return err - } + base, err := p.objectAtOffset(offset, hash) + if err != nil { + return err } obj.SetType(base.Type()) @@ -437,14 +471,50 @@ func (i *objectIter) Next() (plumbing.EncodedObject, error) { return nil, err } - obj, err := i.p.GetByOffset(int64(e.Offset)) + if i.typ != plumbing.AnyObject { + if typ, ok := i.p.offsetToType[int64(e.Offset)]; ok { + if typ != i.typ { + continue + } + } else if obj, ok := i.p.cacheGet(e.Hash); ok { + if obj.Type() != i.typ { + i.p.offsetToType[int64(e.Offset)] = obj.Type() + continue + } + return obj, nil + } else { + h, err := i.p.objectHeaderAtOffset(int64(e.Offset)) + if err != nil { + return nil, err + } + + if h.Type == plumbing.REFDeltaObject || h.Type == plumbing.OFSDeltaObject { + typ, err := i.p.getObjectType(h) + if err != nil { + return nil, err + } + if typ != i.typ { + i.p.offsetToType[int64(e.Offset)] = typ + continue + } + // getObjectType will seek in the file so we cannot use getNextObject safely + return i.p.objectAtOffset(int64(e.Offset), e.Hash) + } else { + if h.Type != i.typ { + i.p.offsetToType[int64(e.Offset)] = h.Type + continue + } + return i.p.getNextObject(h, e.Hash) + } + } + } + + obj, err := i.p.objectAtOffset(int64(e.Offset), e.Hash) if err != nil { return nil, err } - if i.typ == plumbing.AnyObject || obj.Type() == i.typ { - return obj, nil - } + return obj, nil } }