Skip to content
This repository has been archived by the owner on Sep 11, 2020. It is now read-only.

plumbing: format/commitgraph, add APIs for reading and writing commit-graph files #1128

Merged
merged 5 commits into from
Apr 24, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions plumbing/format/commitgraph/commitgraph.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package commitgraph

import (
"time"

"gopkg.in/src-d/go-git.v4/plumbing"
)

// Node is a reduced representation of Commit as presented in the commit graph
// file. It is merely useful as an optimization for walking the commit graphs.
type Node struct {
// TreeHash is the hash of the root tree of the commit.
TreeHash plumbing.Hash
// ParentIndexes are the indexes of the parent commits of the commit.
ParentIndexes []int
// ParentHashes are the hashes of the parent commits of the commit.
ParentHashes []plumbing.Hash
// Generation number is the pre-computed generation in the commit graph
// or zero if not available
Generation int
// When is the timestamp of the commit.
When time.Time
}

// Index represents a representation of commit graph that allows indexed
// access to the nodes using commit object hash
type Index interface {
// GetIndexByHash gets the index in the commit graph from commit hash, if available
GetIndexByHash(h plumbing.Hash) (int, error)
// GetNodeByIndex gets the commit node from the commit graph using index
// obtained from child node, if available
GetNodeByIndex(i int) (*Node, error)
// Hashes returns all the hashes that are available in the index
Hashes() []plumbing.Hash
}
135 changes: 135 additions & 0 deletions plumbing/format/commitgraph/commitgraph_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
package commitgraph_test

import (
"io/ioutil"
"os"
"path"
"testing"

"golang.org/x/exp/mmap"

. "gopkg.in/check.v1"
"gopkg.in/src-d/go-git-fixtures.v3"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph"
)

func Test(t *testing.T) { TestingT(t) }

type CommitgraphSuite struct {
fixtures.Suite
}

var _ = Suite(&CommitgraphSuite{})

func testDecodeHelper(c *C, path string) {
reader, err := mmap.Open(path)
c.Assert(err, IsNil)
defer reader.Close()
index, err := commitgraph.OpenFileIndex(reader)
c.Assert(err, IsNil)

// Root commit
nodeIndex, err := index.GetIndexByHash(plumbing.NewHash("347c91919944a68e9413581a1bc15519550a3afe"))
c.Assert(err, IsNil)
node, err := index.GetNodeByIndex(nodeIndex)
c.Assert(err, IsNil)
c.Assert(len(node.ParentIndexes), Equals, 0)
c.Assert(len(node.ParentHashes), Equals, 0)

// Regular commit
nodeIndex, err = index.GetIndexByHash(plumbing.NewHash("e713b52d7e13807e87a002e812041f248db3f643"))
c.Assert(err, IsNil)
node, err = index.GetNodeByIndex(nodeIndex)
c.Assert(err, IsNil)
c.Assert(len(node.ParentIndexes), Equals, 1)
c.Assert(len(node.ParentHashes), Equals, 1)
c.Assert(node.ParentHashes[0].String(), Equals, "347c91919944a68e9413581a1bc15519550a3afe")

// Merge commit
nodeIndex, err = index.GetIndexByHash(plumbing.NewHash("b29328491a0682c259bcce28741eac71f3499f7d"))
c.Assert(err, IsNil)
node, err = index.GetNodeByIndex(nodeIndex)
c.Assert(err, IsNil)
c.Assert(len(node.ParentIndexes), Equals, 2)
c.Assert(len(node.ParentHashes), Equals, 2)
c.Assert(node.ParentHashes[0].String(), Equals, "e713b52d7e13807e87a002e812041f248db3f643")
c.Assert(node.ParentHashes[1].String(), Equals, "03d2c021ff68954cf3ef0a36825e194a4b98f981")

// Octopus merge commit
nodeIndex, err = index.GetIndexByHash(plumbing.NewHash("6f6c5d2be7852c782be1dd13e36496dd7ad39560"))
c.Assert(err, IsNil)
node, err = index.GetNodeByIndex(nodeIndex)
c.Assert(err, IsNil)
c.Assert(len(node.ParentIndexes), Equals, 3)
c.Assert(len(node.ParentHashes), Equals, 3)
c.Assert(node.ParentHashes[0].String(), Equals, "ce275064ad67d51e99f026084e20827901a8361c")
c.Assert(node.ParentHashes[1].String(), Equals, "bb13916df33ed23004c3ce9ed3b8487528e655c1")
c.Assert(node.ParentHashes[2].String(), Equals, "a45273fe2d63300e1962a9e26a6b15c276cd7082")

// Check all hashes
hashes := index.Hashes()
c.Assert(len(hashes), Equals, 11)
c.Assert(hashes[0].String(), Equals, "03d2c021ff68954cf3ef0a36825e194a4b98f981")
c.Assert(hashes[10].String(), Equals, "e713b52d7e13807e87a002e812041f248db3f643")
}

func (s *CommitgraphSuite) TestDecode(c *C) {
fixtures.ByTag("commit-graph").Test(c, func(f *fixtures.Fixture) {
dotgit := f.DotGit()
testDecodeHelper(c, path.Join(dotgit.Root(), "objects", "info", "commit-graph"))
})
}

func (s *CommitgraphSuite) TestReencode(c *C) {
fixtures.ByTag("commit-graph").Test(c, func(f *fixtures.Fixture) {
dotgit := f.DotGit()

reader, err := mmap.Open(path.Join(dotgit.Root(), "objects", "info", "commit-graph"))
c.Assert(err, IsNil)
defer reader.Close()
index, err := commitgraph.OpenFileIndex(reader)
c.Assert(err, IsNil)

writer, err := ioutil.TempFile(dotgit.Root(), "commit-graph")
c.Assert(err, IsNil)
tmpName := writer.Name()
defer os.Remove(tmpName)
encoder := commitgraph.NewEncoder(writer)
err = encoder.Encode(index)
c.Assert(err, IsNil)
writer.Close()

testDecodeHelper(c, tmpName)
})
}

func (s *CommitgraphSuite) TestReencodeInMemory(c *C) {
fixtures.ByTag("commit-graph").Test(c, func(f *fixtures.Fixture) {
dotgit := f.DotGit()

reader, err := mmap.Open(path.Join(dotgit.Root(), "objects", "info", "commit-graph"))
c.Assert(err, IsNil)
index, err := commitgraph.OpenFileIndex(reader)
c.Assert(err, IsNil)
memoryIndex := commitgraph.NewMemoryIndex()
for i, hash := range index.Hashes() {
node, err := index.GetNodeByIndex(i)
c.Assert(err, IsNil)
err = memoryIndex.Add(hash, node)
c.Assert(err, IsNil)
}
reader.Close()

writer, err := ioutil.TempFile(dotgit.Root(), "commit-graph")
c.Assert(err, IsNil)
tmpName := writer.Name()
defer os.Remove(tmpName)
encoder := commitgraph.NewEncoder(writer)
err = encoder.Encode(memoryIndex)
c.Assert(err, IsNil)
writer.Close()

testDecodeHelper(c, tmpName)
})
}
189 changes: 189 additions & 0 deletions plumbing/format/commitgraph/encoder.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
package commitgraph

import (
"crypto/sha1"
"hash"
"io"

"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/utils/binary"
)

// Encoder writes MemoryIndex structs to an output stream.
type Encoder struct {
io.Writer
hash hash.Hash
}

// NewEncoder returns a new stream encoder that writes to w.
func NewEncoder(w io.Writer) *Encoder {
h := sha1.New()
mw := io.MultiWriter(w, h)
return &Encoder{mw, h}
}

func (e *Encoder) Encode(idx Index) error {
filipnavara marked this conversation as resolved.
Show resolved Hide resolved
var err error

// Get all the hashes in the input index
hashes := idx.Hashes()

// Sort the inout and prepare helper structures we'll need for encoding
hashToIndex, fanout, largeEdgesCount := e.prepare(idx, hashes)

chunkSignatures := [][]byte{oidFanoutSignature, oidLookupSignature, commitDataSignature}
chunkSizes := []uint64{4 * 256, uint64(len(hashes)) * 20, uint64(len(hashes)) * 36}
if largeEdgesCount > 0 {
chunkSignatures = append(chunkSignatures, largeEdgeListSignature)
chunkSizes = append(chunkSizes, uint64(largeEdgesCount)*4)
}

if err = e.encodeFileHeader(len(chunkSignatures)); err != nil {
return err
}
if err = e.encodeChunkHeaders(chunkSignatures, chunkSizes); err != nil {
return err
}
if err = e.encodeFanout(fanout); err != nil {
return err
}
if err = e.encodeOidLookup(hashes); err != nil {
return err
}
if largeEdges, err := e.encodeCommitData(hashes, hashToIndex, idx); err == nil {
if err = e.encodeLargeEdges(largeEdges); err != nil {
return err
}
}
if err != nil {
return err
}
return e.encodeChecksum()
}

func (e *Encoder) prepare(idx Index, hashes []plumbing.Hash) (hashToIndex map[plumbing.Hash]uint32, fanout []uint32, largeEdgesCount uint32) {
// Sort the hashes and build our index
plumbing.HashesSort(hashes)
hashToIndex = make(map[plumbing.Hash]uint32)
fanout = make([]uint32, 256)
for i, hash := range hashes {
hashToIndex[hash] = uint32(i)
fanout[hash[0]]++
}

// Convert the fanout to cumulative values
for i := 1; i <= 0xff; i++ {
fanout[i] += fanout[i-1]
}

// Find out if we will need large edge table
for i := 0; i < len(hashes); i++ {
v, _ := idx.GetNodeByIndex(i)
if len(v.ParentHashes) > 2 {
largeEdgesCount += uint32(len(v.ParentHashes) - 1)
break
}
}

return
}

func (e *Encoder) encodeFileHeader(chunkCount int) (err error) {
if _, err = e.Write(commitFileSignature); err == nil {
_, err = e.Write([]byte{1, 1, byte(chunkCount), 0})
}
return
}

func (e *Encoder) encodeChunkHeaders(chunkSignatures [][]byte, chunkSizes []uint64) (err error) {
// 8 bytes of file header, 12 bytes for each chunk header and 12 byte for terminator
offset := uint64(8 + len(chunkSignatures)*12 + 12)
for i, signature := range chunkSignatures {
if _, err = e.Write(signature); err == nil {
err = binary.WriteUint64(e, offset)
}
if err != nil {
return
}
offset += chunkSizes[i]
}
if _, err = e.Write(lastSignature); err == nil {
err = binary.WriteUint64(e, offset)
}
return
}

func (e *Encoder) encodeFanout(fanout []uint32) (err error) {
for i := 0; i <= 0xff; i++ {
if err = binary.WriteUint32(e, fanout[i]); err != nil {
return
}
}
return
}

func (e *Encoder) encodeOidLookup(hashes []plumbing.Hash) (err error) {
for _, hash := range hashes {
if _, err = e.Write(hash[:]); err != nil {
return err
}
}
return
}

func (e *Encoder) encodeCommitData(hashes []plumbing.Hash, hashToIndex map[plumbing.Hash]uint32, idx Index) (largeEdges []uint32, err error) {
for _, hash := range hashes {
origIndex, _ := idx.GetIndexByHash(hash)
commitData, _ := idx.GetNodeByIndex(origIndex)
if _, err = e.Write(commitData.TreeHash[:]); err != nil {
return
}

var parent1, parent2 uint32
if len(commitData.ParentHashes) == 0 {
parent1 = parentNone
parent2 = parentNone
} else if len(commitData.ParentHashes) == 1 {
parent1 = hashToIndex[commitData.ParentHashes[0]]
parent2 = parentNone
} else if len(commitData.ParentHashes) == 2 {
parent1 = hashToIndex[commitData.ParentHashes[0]]
parent2 = hashToIndex[commitData.ParentHashes[1]]
} else if len(commitData.ParentHashes) > 2 {
parent1 = hashToIndex[commitData.ParentHashes[0]]
parent2 = uint32(len(largeEdges)) | parentOctopusUsed
for _, parentHash := range commitData.ParentHashes[1:] {
largeEdges = append(largeEdges, hashToIndex[parentHash])
}
largeEdges[len(largeEdges)-1] |= parentLast
}

if err = binary.WriteUint32(e, parent1); err == nil {
err = binary.WriteUint32(e, parent2)
}
if err != nil {
return
}

unixTime := uint64(commitData.When.Unix())
unixTime |= uint64(commitData.Generation) << 34
if err = binary.WriteUint64(e, unixTime); err != nil {
return
}
}
return
}

func (e *Encoder) encodeLargeEdges(largeEdges []uint32) (err error) {
for _, parent := range largeEdges {
if err = binary.WriteUint32(e, parent); err != nil {
return
}
}
return
}

func (e *Encoder) encodeChecksum() error {
_, err := e.Write(e.hash.Sum(nil)[:20])
return err
}
Loading