diff --git a/_examples/common_test.go b/_examples/common_test.go index aa7c9b446..47463a1cd 100644 --- a/_examples/common_test.go +++ b/_examples/common_test.go @@ -28,6 +28,7 @@ var args = map[string][]string{ "showcase": {defaultURL, tempFolder()}, "tag": {cloneRepository(defaultURL, tempFolder())}, "pull": {createRepositoryWithRemote(tempFolder(), defaultURL)}, + "ls": {cloneRepository(defaultURL, tempFolder()), "HEAD", "vendor"}, } var ignored = map[string]bool{} diff --git a/_examples/ls/main.go b/_examples/ls/main.go new file mode 100644 index 000000000..bb686f1e6 --- /dev/null +++ b/_examples/ls/main.go @@ -0,0 +1,272 @@ +package main + +import ( + "fmt" + "io" + "os" + "path" + "strings" + + "github.com/emirpasic/gods/trees/binaryheap" + "gopkg.in/src-d/go-git.v4" + . "gopkg.in/src-d/go-git.v4/_examples" + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/cache" + commitgraph_fmt "gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph" + "gopkg.in/src-d/go-git.v4/plumbing/object" + "gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph" + "gopkg.in/src-d/go-git.v4/storage/filesystem" + + "gopkg.in/src-d/go-billy.v4" + "gopkg.in/src-d/go-billy.v4/osfs" +) + +// Example how to resolve a revision into its commit counterpart +func main() { + CheckArgs("", "", "") + + path := os.Args[1] + revision := os.Args[2] + treePath := os.Args[3] + + // We instantiate a new repository targeting the given path (the .git folder) + fs := osfs.New(path) + if _, err := fs.Stat(git.GitDirName); err == nil { + fs, err = fs.Chroot(git.GitDirName) + CheckIfError(err) + } + + s := filesystem.NewStorageWithOptions(fs, cache.NewObjectLRUDefault(), filesystem.Options{KeepDescriptors: true}) + r, err := git.Open(s, fs) + CheckIfError(err) + defer s.Close() + + // Resolve revision into a sha1 commit, only some revisions are resolved + // look at the doc to get more details + Info("git rev-parse %s", revision) + + h, err := r.ResolveRevision(plumbing.Revision(revision)) + CheckIfError(err) + + commit, err := r.CommitObject(*h) + CheckIfError(err) + + tree, err := commit.Tree() + CheckIfError(err) + if treePath != "" { + tree, err = tree.Tree(treePath) + CheckIfError(err) + } + + var paths []string + for _, entry := range tree.Entries { + paths = append(paths, entry.Name) + } + + commitNodeIndex, file := getCommitNodeIndex(r, fs) + if file != nil { + defer file.Close() + } + + commitNode, err := commitNodeIndex.Get(*h) + CheckIfError(err) + + revs, err := getLastCommitForPaths(commitNode, treePath, paths) + CheckIfError(err) + for path, rev := range revs { + // Print one line per file (name hash message) + hash := rev.Hash.String() + line := strings.Split(rev.Message, "\n") + fmt.Println(path, hash[:7], line[0]) + } +} + +func getCommitNodeIndex(r *git.Repository, fs billy.Filesystem) (commitgraph.CommitNodeIndex, io.ReadCloser) { + file, err := fs.Open(path.Join("objects", "info", "commit-graph")) + if err == nil { + index, err := commitgraph_fmt.OpenFileIndex(file) + if err == nil { + return commitgraph.NewGraphCommitNodeIndex(index, r.Storer), file + } + file.Close() + } + + return commitgraph.NewObjectCommitNodeIndex(r.Storer), nil +} + +type commitAndPaths struct { + commit commitgraph.CommitNode + // Paths that are still on the branch represented by commit + paths []string + // Set of hashes for the paths + hashes map[string]plumbing.Hash +} + +func getCommitTree(c commitgraph.CommitNode, treePath string) (*object.Tree, error) { + tree, err := c.Tree() + if err != nil { + return nil, err + } + + // Optimize deep traversals by focusing only on the specific tree + if treePath != "" { + tree, err = tree.Tree(treePath) + if err != nil { + return nil, err + } + } + + return tree, nil +} + +func getFullPath(treePath, path string) string { + if treePath != "" { + if path != "" { + return treePath + "/" + path + } + return treePath + } + return path +} + +func getFileHashes(c commitgraph.CommitNode, treePath string, paths []string) (map[string]plumbing.Hash, error) { + tree, err := getCommitTree(c, treePath) + if err == object.ErrDirectoryNotFound { + // The whole tree didn't exist, so return empty map + return make(map[string]plumbing.Hash), nil + } + if err != nil { + return nil, err + } + + hashes := make(map[string]plumbing.Hash) + for _, path := range paths { + if path != "" { + entry, err := tree.FindEntry(path) + if err == nil { + hashes[path] = entry.Hash + } + } else { + hashes[path] = tree.Hash + } + } + + return hashes, nil +} + +func getLastCommitForPaths(c commitgraph.CommitNode, treePath string, paths []string) (map[string]*object.Commit, error) { + // We do a tree traversal with nodes sorted by commit time + heap := binaryheap.NewWith(func(a, b interface{}) int { + if a.(*commitAndPaths).commit.CommitTime().Before(b.(*commitAndPaths).commit.CommitTime()) { + return 1 + } + return -1 + }) + + resultNodes := make(map[string]commitgraph.CommitNode) + initialHashes, err := getFileHashes(c, treePath, paths) + if err != nil { + return nil, err + } + + // Start search from the root commit and with full set of paths + heap.Push(&commitAndPaths{c, paths, initialHashes}) + + for { + cIn, ok := heap.Pop() + if !ok { + break + } + current := cIn.(*commitAndPaths) + + // Load the parent commits for the one we are currently examining + numParents := current.commit.NumParents() + var parents []commitgraph.CommitNode + for i := 0; i < numParents; i++ { + parent, err := current.commit.ParentNode(i) + if err != nil { + break + } + parents = append(parents, parent) + } + + // Examine the current commit and set of interesting paths + pathUnchanged := make([]bool, len(current.paths)) + parentHashes := make([]map[string]plumbing.Hash, len(parents)) + for j, parent := range parents { + parentHashes[j], err = getFileHashes(parent, treePath, current.paths) + if err != nil { + break + } + + for i, path := range current.paths { + if parentHashes[j][path] == current.hashes[path] { + pathUnchanged[i] = true + } + } + } + + var remainingPaths []string + for i, path := range current.paths { + // The results could already contain some newer change for the same path, + // so don't override that and bail out on the file early. + if resultNodes[path] == nil { + if pathUnchanged[i] { + // The path existed with the same hash in at least one parent so it could + // not have been changed in this commit directly. + remainingPaths = append(remainingPaths, path) + } else { + // There are few possible cases how can we get here: + // - The path didn't exist in any parent, so it must have been created by + // this commit. + // - The path did exist in the parent commit, but the hash of the file has + // changed. + // - We are looking at a merge commit and the hash of the file doesn't + // match any of the hashes being merged. This is more common for directories, + // but it can also happen if a file is changed through conflict resolution. + resultNodes[path] = current.commit + } + } + } + + if len(remainingPaths) > 0 { + // Add the parent nodes along with remaining paths to the heap for further + // processing. + for j, parent := range parents { + // Combine remainingPath with paths available on the parent branch + // and make union of them + remainingPathsForParent := make([]string, 0, len(remainingPaths)) + newRemainingPaths := make([]string, 0, len(remainingPaths)) + for _, path := range remainingPaths { + if parentHashes[j][path] == current.hashes[path] { + remainingPathsForParent = append(remainingPathsForParent, path) + } else { + newRemainingPaths = append(newRemainingPaths, path) + } + } + + if remainingPathsForParent != nil { + heap.Push(&commitAndPaths{parent, remainingPathsForParent, parentHashes[j]}) + } + + if len(newRemainingPaths) == 0 { + break + } else { + remainingPaths = newRemainingPaths + } + } + } + } + + // Post-processing + result := make(map[string]*object.Commit) + for path, commitNode := range resultNodes { + var err error + result[path], err = commitNode.Commit() + if err != nil { + return nil, err + } + } + + return result, nil +} diff --git a/plumbing/format/commitgraph/commitgraph_test.go b/plumbing/format/commitgraph/commitgraph_test.go index 0e3870726..0214f49fd 100644 --- a/plumbing/format/commitgraph/commitgraph_test.go +++ b/plumbing/format/commitgraph/commitgraph_test.go @@ -6,10 +6,8 @@ import ( "path" "testing" - "golang.org/x/exp/mmap" - . "gopkg.in/check.v1" - "gopkg.in/src-d/go-git-fixtures.v3" + fixtures "gopkg.in/src-d/go-git-fixtures.v3" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph" ) @@ -23,7 +21,7 @@ type CommitgraphSuite struct { var _ = Suite(&CommitgraphSuite{}) func testDecodeHelper(c *C, path string) { - reader, err := mmap.Open(path) + reader, err := os.Open(path) c.Assert(err, IsNil) defer reader.Close() index, err := commitgraph.OpenFileIndex(reader) @@ -85,7 +83,7 @@ func (s *CommitgraphSuite) TestReencode(c *C) { fixtures.ByTag("commit-graph").Test(c, func(f *fixtures.Fixture) { dotgit := f.DotGit() - reader, err := mmap.Open(path.Join(dotgit.Root(), "objects", "info", "commit-graph")) + reader, err := os.Open(path.Join(dotgit.Root(), "objects", "info", "commit-graph")) c.Assert(err, IsNil) defer reader.Close() index, err := commitgraph.OpenFileIndex(reader) @@ -108,7 +106,7 @@ func (s *CommitgraphSuite) TestReencodeInMemory(c *C) { fixtures.ByTag("commit-graph").Test(c, func(f *fixtures.Fixture) { dotgit := f.DotGit() - reader, err := mmap.Open(path.Join(dotgit.Root(), "objects", "info", "commit-graph")) + reader, err := os.Open(path.Join(dotgit.Root(), "objects", "info", "commit-graph")) c.Assert(err, IsNil) index, err := commitgraph.OpenFileIndex(reader) c.Assert(err, IsNil) diff --git a/plumbing/format/commitgraph/doc.go b/plumbing/format/commitgraph/doc.go new file mode 100644 index 000000000..41cd8b1e3 --- /dev/null +++ b/plumbing/format/commitgraph/doc.go @@ -0,0 +1,103 @@ +// Package commitgraph implements encoding and decoding of commit-graph files. +// +// Git commit graph format +// ======================= +// +// The Git commit graph stores a list of commit OIDs and some associated +// metadata, including: +// +// - The generation number of the commit. Commits with no parents have +// generation number 1; commits with parents have generation number +// one more than the maximum generation number of its parents. We +// reserve zero as special, and can be used to mark a generation +// number invalid or as "not computed". +// +// - The root tree OID. +// +// - The commit date. +// +// - The parents of the commit, stored using positional references within +// the graph file. +// +// These positional references are stored as unsigned 32-bit integers +// corresponding to the array position within the list of commit OIDs. Due +// to some special constants we use to track parents, we can store at most +// (1 << 30) + (1 << 29) + (1 << 28) - 1 (around 1.8 billion) commits. +// +// == Commit graph files have the following format: +// +// In order to allow extensions that add extra data to the graph, we organize +// the body into "chunks" and provide a binary lookup table at the beginning +// of the body. The header includes certain values, such as number of chunks +// and hash type. +// +// All 4-byte numbers are in network order. +// +// HEADER: +// +// 4-byte signature: +// The signature is: {'C', 'G', 'P', 'H'} +// +// 1-byte version number: +// Currently, the only valid version is 1. +// +// 1-byte Hash Version (1 = SHA-1) +// We infer the hash length (H) from this value. +// +// 1-byte number (C) of "chunks" +// +// 1-byte (reserved for later use) +// Current clients should ignore this value. +// +// CHUNK LOOKUP: +// +// (C + 1) * 12 bytes listing the table of contents for the chunks: +// First 4 bytes describe the chunk id. Value 0 is a terminating label. +// Other 8 bytes provide the byte-offset in current file for chunk to +// start. (Chunks are ordered contiguously in the file, so you can infer +// the length using the next chunk position if necessary.) Each chunk +// ID appears at most once. +// +// The remaining data in the body is described one chunk at a time, and +// these chunks may be given in any order. Chunks are required unless +// otherwise specified. +// +// CHUNK DATA: +// +// OID Fanout (ID: {'O', 'I', 'D', 'F'}) (256 * 4 bytes) +// The ith entry, F[i], stores the number of OIDs with first +// byte at most i. Thus F[255] stores the total +// number of commits (N). +// +// OID Lookup (ID: {'O', 'I', 'D', 'L'}) (N * H bytes) +// The OIDs for all commits in the graph, sorted in ascending order. +// +// Commit Data (ID: {'C', 'D', 'A', 'T' }) (N * (H + 16) bytes) +// * The first H bytes are for the OID of the root tree. +// * The next 8 bytes are for the positions of the first two parents +// of the ith commit. Stores value 0x7000000 if no parent in that +// position. If there are more than two parents, the second value +// has its most-significant bit on and the other bits store an array +// position into the Extra Edge List chunk. +// * The next 8 bytes store the generation number of the commit and +// the commit time in seconds since EPOCH. The generation number +// uses the higher 30 bits of the first 4 bytes, while the commit +// time uses the 32 bits of the second 4 bytes, along with the lowest +// 2 bits of the lowest byte, storing the 33rd and 34th bit of the +// commit time. +// +// Extra Edge List (ID: {'E', 'D', 'G', 'E'}) [Optional] +// This list of 4-byte values store the second through nth parents for +// all octopus merges. The second parent value in the commit data stores +// an array position within this list along with the most-significant bit +// on. Starting at that array position, iterate through this list of commit +// positions for the parents until reaching a value with the most-significant +// bit on. The other bits correspond to the position of the last parent. +// +// TRAILER: +// +// H-byte HASH-checksum of all of the above. +// +// Source: +// https://raw.githubusercontent.com/git/git/master/Documentation/technical/commit-graph-format.txt +package commitgraph diff --git a/plumbing/format/commitgraph/encoder.go b/plumbing/format/commitgraph/encoder.go index 648153f05..a06871cb7 100644 --- a/plumbing/format/commitgraph/encoder.go +++ b/plumbing/format/commitgraph/encoder.go @@ -22,6 +22,7 @@ func NewEncoder(w io.Writer) *Encoder { return &Encoder{mw, h} } +// Encode writes an index into the commit-graph file func (e *Encoder) Encode(idx Index) error { var err error diff --git a/plumbing/format/commitgraph/memory.go b/plumbing/format/commitgraph/memory.go index f084b85cb..a4a96e961 100644 --- a/plumbing/format/commitgraph/memory.go +++ b/plumbing/format/commitgraph/memory.go @@ -4,6 +4,8 @@ import ( "gopkg.in/src-d/go-git.v4/plumbing" ) +// MemoryIndex provides a way to build the commit-graph in memory +// for later encoding to file. type MemoryIndex struct { commitData []*CommitData indexMap map[plumbing.Hash]int diff --git a/plumbing/object/commitgraph/commitnode.go b/plumbing/object/commitgraph/commitnode.go new file mode 100644 index 000000000..e218d3210 --- /dev/null +++ b/plumbing/object/commitgraph/commitnode.go @@ -0,0 +1,98 @@ +package commitgraph + +import ( + "io" + "time" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/object" + "gopkg.in/src-d/go-git.v4/plumbing/storer" +) + +// CommitNode is generic interface encapsulating a lightweight commit object retrieved +// from CommitNodeIndex +type CommitNode interface { + // ID returns the Commit object id referenced by the commit graph node. + ID() plumbing.Hash + // Tree returns the Tree referenced by the commit graph node. + Tree() (*object.Tree, error) + // CommitTime returns the Commiter.When time of the Commit referenced by the commit graph node. + CommitTime() time.Time + // NumParents returns the number of parents in a commit. + NumParents() int + // ParentNodes return a CommitNodeIter for parents of specified node. + ParentNodes() CommitNodeIter + // ParentNode returns the ith parent of a commit. + ParentNode(i int) (CommitNode, error) + // ParentHashes returns hashes of the parent commits for a specified node + ParentHashes() []plumbing.Hash + // Generation returns the generation of the commit for reachability analysis. + // Objects with newer generation are not reachable from objects of older generation. + Generation() uint64 + // Commit returns the full commit object from the node + Commit() (*object.Commit, error) +} + +// CommitNodeIndex is generic interface encapsulating an index of CommitNode objects +type CommitNodeIndex interface { + // Get returns a commit node from a commit hash + Get(hash plumbing.Hash) (CommitNode, error) +} + +// CommitNodeIter is a generic closable interface for iterating over commit nodes. +type CommitNodeIter interface { + Next() (CommitNode, error) + ForEach(func(CommitNode) error) error + Close() +} + +// parentCommitNodeIter provides an iterator for parent commits from associated CommitNodeIndex. +type parentCommitNodeIter struct { + node CommitNode + i int +} + +func newParentgraphCommitNodeIter(node CommitNode) CommitNodeIter { + return &parentCommitNodeIter{node, 0} +} + +// Next moves the iterator to the next commit and returns a pointer to it. If +// there are no more commits, it returns io.EOF. +func (iter *parentCommitNodeIter) Next() (CommitNode, error) { + obj, err := iter.node.ParentNode(iter.i) + if err == object.ErrParentNotFound { + return nil, io.EOF + } + if err == nil { + iter.i++ + } + + return obj, err +} + +// ForEach call the cb function for each commit contained on this iter until +// an error appends or the end of the iter is reached. If ErrStop is sent +// the iteration is stopped but no error is returned. The iterator is closed. +func (iter *parentCommitNodeIter) ForEach(cb func(CommitNode) error) error { + for { + obj, err := iter.Next() + if err != nil { + if err == io.EOF { + return nil + } + + return err + } + + if err := cb(obj); err != nil { + if err == storer.ErrStop { + return nil + } + + return err + } + } +} + +func (iter *parentCommitNodeIter) Close() { +} diff --git a/plumbing/object/commitgraph/commitnode_graph.go b/plumbing/object/commitgraph/commitnode_graph.go new file mode 100644 index 000000000..bd54e1888 --- /dev/null +++ b/plumbing/object/commitgraph/commitnode_graph.go @@ -0,0 +1,131 @@ +package commitgraph + +import ( + "fmt" + "time" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph" + "gopkg.in/src-d/go-git.v4/plumbing/object" + "gopkg.in/src-d/go-git.v4/plumbing/storer" +) + +// graphCommitNode is a reduced representation of Commit as presented in the commit +// graph file (commitgraph.Node). It is merely useful as an optimization for walking +// the commit graphs. +// +// graphCommitNode implements the CommitNode interface. +type graphCommitNode struct { + // Hash for the Commit object + hash plumbing.Hash + // Index of the node in the commit graph file + index int + + commitData *commitgraph.CommitData + gci *graphCommitNodeIndex +} + +// graphCommitNodeIndex is an index that can load CommitNode objects from both the commit +// graph files and the object store. +// +// graphCommitNodeIndex implements the CommitNodeIndex interface +type graphCommitNodeIndex struct { + commitGraph commitgraph.Index + s storer.EncodedObjectStorer +} + +// NewGraphCommitNodeIndex returns CommitNodeIndex implementation that uses commit-graph +// files as backing storage and falls back to object storage when necessary +func NewGraphCommitNodeIndex(commitGraph commitgraph.Index, s storer.EncodedObjectStorer) CommitNodeIndex { + return &graphCommitNodeIndex{commitGraph, s} +} + +func (gci *graphCommitNodeIndex) Get(hash plumbing.Hash) (CommitNode, error) { + // Check the commit graph first + parentIndex, err := gci.commitGraph.GetIndexByHash(hash) + if err == nil { + parent, err := gci.commitGraph.GetCommitDataByIndex(parentIndex) + if err != nil { + return nil, err + } + + return &graphCommitNode{ + hash: hash, + index: parentIndex, + commitData: parent, + gci: gci, + }, nil + } + + // Fallback to loading full commit object + commit, err := object.GetCommit(gci.s, hash) + if err != nil { + return nil, err + } + + return &objectCommitNode{ + nodeIndex: gci, + commit: commit, + }, nil +} + +func (c *graphCommitNode) ID() plumbing.Hash { + return c.hash +} + +func (c *graphCommitNode) Tree() (*object.Tree, error) { + return object.GetTree(c.gci.s, c.commitData.TreeHash) +} + +func (c *graphCommitNode) CommitTime() time.Time { + return c.commitData.When +} + +func (c *graphCommitNode) NumParents() int { + return len(c.commitData.ParentIndexes) +} + +func (c *graphCommitNode) ParentNodes() CommitNodeIter { + return newParentgraphCommitNodeIter(c) +} + +func (c *graphCommitNode) ParentNode(i int) (CommitNode, error) { + if i < 0 || i >= len(c.commitData.ParentIndexes) { + return nil, object.ErrParentNotFound + } + + parent, err := c.gci.commitGraph.GetCommitDataByIndex(c.commitData.ParentIndexes[i]) + if err != nil { + return nil, err + } + + return &graphCommitNode{ + hash: c.commitData.ParentHashes[i], + index: c.commitData.ParentIndexes[i], + commitData: parent, + gci: c.gci, + }, nil +} + +func (c *graphCommitNode) ParentHashes() []plumbing.Hash { + return c.commitData.ParentHashes +} + +func (c *graphCommitNode) Generation() uint64 { + // If the commit-graph file was generated with older Git version that + // set the generation to zero for every commit the generation assumption + // is still valid. It is just less useful. + return uint64(c.commitData.Generation) +} + +func (c *graphCommitNode) Commit() (*object.Commit, error) { + return object.GetCommit(c.gci.s, c.hash) +} + +func (c *graphCommitNode) String() string { + return fmt.Sprintf( + "%s %s\nDate: %s", + plumbing.CommitObject, c.ID(), + c.CommitTime().Format(object.DateFormat), + ) +} diff --git a/plumbing/object/commitgraph/commitnode_object.go b/plumbing/object/commitgraph/commitnode_object.go new file mode 100644 index 000000000..2779a54bc --- /dev/null +++ b/plumbing/object/commitgraph/commitnode_object.go @@ -0,0 +1,90 @@ +package commitgraph + +import ( + "math" + "time" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/object" + "gopkg.in/src-d/go-git.v4/plumbing/storer" +) + +// objectCommitNode is a representation of Commit as presented in the GIT object format. +// +// objectCommitNode implements the CommitNode interface. +type objectCommitNode struct { + nodeIndex CommitNodeIndex + commit *object.Commit +} + +// NewObjectCommitNodeIndex returns CommitNodeIndex implementation that uses +// only object storage to load the nodes +func NewObjectCommitNodeIndex(s storer.EncodedObjectStorer) CommitNodeIndex { + return &objectCommitNodeIndex{s} +} + +func (oci *objectCommitNodeIndex) Get(hash plumbing.Hash) (CommitNode, error) { + commit, err := object.GetCommit(oci.s, hash) + if err != nil { + return nil, err + } + + return &objectCommitNode{ + nodeIndex: oci, + commit: commit, + }, nil +} + +// objectCommitNodeIndex is an index that can load CommitNode objects only from the +// object store. +// +// objectCommitNodeIndex implements the CommitNodeIndex interface +type objectCommitNodeIndex struct { + s storer.EncodedObjectStorer +} + +func (c *objectCommitNode) CommitTime() time.Time { + return c.commit.Committer.When +} + +func (c *objectCommitNode) ID() plumbing.Hash { + return c.commit.ID() +} + +func (c *objectCommitNode) Tree() (*object.Tree, error) { + return c.commit.Tree() +} + +func (c *objectCommitNode) NumParents() int { + return c.commit.NumParents() +} + +func (c *objectCommitNode) ParentNodes() CommitNodeIter { + return newParentgraphCommitNodeIter(c) +} + +func (c *objectCommitNode) ParentNode(i int) (CommitNode, error) { + if i < 0 || i >= len(c.commit.ParentHashes) { + return nil, object.ErrParentNotFound + } + + // Note: It's necessary to go through CommitNodeIndex here to ensure + // that if the commit-graph file covers only part of the history we + // start using it when that part is reached. + return c.nodeIndex.Get(c.commit.ParentHashes[i]) +} + +func (c *objectCommitNode) ParentHashes() []plumbing.Hash { + return c.commit.ParentHashes +} + +func (c *objectCommitNode) Generation() uint64 { + // Commit nodes representing objects outside of the commit graph can never + // be reached by objects from the commit-graph thus we return the highest + // possible value. + return math.MaxUint64 +} + +func (c *objectCommitNode) Commit() (*object.Commit, error) { + return c.commit, nil +} diff --git a/plumbing/object/commitgraph/commitnode_test.go b/plumbing/object/commitgraph/commitnode_test.go new file mode 100644 index 000000000..954f873ab --- /dev/null +++ b/plumbing/object/commitgraph/commitnode_test.go @@ -0,0 +1,147 @@ +package commitgraph + +import ( + "path" + "testing" + + . "gopkg.in/check.v1" + fixtures "gopkg.in/src-d/go-git-fixtures.v3" + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/cache" + "gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph" + "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" + "gopkg.in/src-d/go-git.v4/storage/filesystem" +) + +func Test(t *testing.T) { TestingT(t) } + +type CommitNodeSuite struct { + fixtures.Suite +} + +var _ = Suite(&CommitNodeSuite{}) + +func unpackRepositry(f *fixtures.Fixture) *filesystem.Storage { + storer := filesystem.NewStorage(f.DotGit(), cache.NewObjectLRUDefault()) + p := f.Packfile() + defer p.Close() + packfile.UpdateObjectStorage(storer, p) + return storer +} + +func testWalker(c *C, nodeIndex CommitNodeIndex) { + head, err := nodeIndex.Get(plumbing.NewHash("b9d69064b190e7aedccf84731ca1d917871f8a1c")) + c.Assert(err, IsNil) + + iter := NewCommitNodeIterCTime( + head, + nil, + nil, + ) + + var commits []CommitNode + iter.ForEach(func(c CommitNode) error { + commits = append(commits, c) + return nil + }) + + c.Assert(commits, HasLen, 9) + + expected := []string{ + "b9d69064b190e7aedccf84731ca1d917871f8a1c", + "6f6c5d2be7852c782be1dd13e36496dd7ad39560", + "a45273fe2d63300e1962a9e26a6b15c276cd7082", + "c0edf780dd0da6a65a7a49a86032fcf8a0c2d467", + "bb13916df33ed23004c3ce9ed3b8487528e655c1", + "03d2c021ff68954cf3ef0a36825e194a4b98f981", + "ce275064ad67d51e99f026084e20827901a8361c", + "e713b52d7e13807e87a002e812041f248db3f643", + "347c91919944a68e9413581a1bc15519550a3afe", + } + for i, commit := range commits { + c.Assert(commit.ID().String(), Equals, expected[i]) + } +} + +func testParents(c *C, nodeIndex CommitNodeIndex) { + merge3, err := nodeIndex.Get(plumbing.NewHash("6f6c5d2be7852c782be1dd13e36496dd7ad39560")) + c.Assert(err, IsNil) + + var parents []CommitNode + merge3.ParentNodes().ForEach(func(c CommitNode) error { + parents = append(parents, c) + return nil + }) + + c.Assert(parents, HasLen, 3) + + expected := []string{ + "ce275064ad67d51e99f026084e20827901a8361c", + "bb13916df33ed23004c3ce9ed3b8487528e655c1", + "a45273fe2d63300e1962a9e26a6b15c276cd7082", + } + for i, parent := range parents { + c.Assert(parent.ID().String(), Equals, expected[i]) + } +} + +func testCommitAndTree(c *C, nodeIndex CommitNodeIndex) { + merge3node, err := nodeIndex.Get(plumbing.NewHash("6f6c5d2be7852c782be1dd13e36496dd7ad39560")) + c.Assert(err, IsNil) + merge3commit, err := merge3node.Commit() + c.Assert(err, IsNil) + c.Assert(merge3node.ID().String(), Equals, merge3commit.ID().String()) + tree, err := merge3node.Tree() + c.Assert(err, IsNil) + c.Assert(tree.ID().String(), Equals, merge3commit.TreeHash.String()) +} + +func (s *CommitNodeSuite) TestObjectGraph(c *C) { + f := fixtures.ByTag("commit-graph").One() + storer := unpackRepositry(f) + + nodeIndex := NewObjectCommitNodeIndex(storer) + testWalker(c, nodeIndex) + testParents(c, nodeIndex) + testCommitAndTree(c, nodeIndex) +} + +func (s *CommitNodeSuite) TestCommitGraph(c *C) { + f := fixtures.ByTag("commit-graph").One() + storer := unpackRepositry(f) + reader, err := storer.Filesystem().Open(path.Join("objects", "info", "commit-graph")) + c.Assert(err, IsNil) + defer reader.Close() + index, err := commitgraph.OpenFileIndex(reader) + c.Assert(err, IsNil) + + nodeIndex := NewGraphCommitNodeIndex(index, storer) + testWalker(c, nodeIndex) + testParents(c, nodeIndex) + testCommitAndTree(c, nodeIndex) +} + +func (s *CommitNodeSuite) TestMixedGraph(c *C) { + f := fixtures.ByTag("commit-graph").One() + storer := unpackRepositry(f) + + // Take the commit-graph file and copy it to memory index without the last commit + reader, err := storer.Filesystem().Open(path.Join("objects", "info", "commit-graph")) + c.Assert(err, IsNil) + defer reader.Close() + fileIndex, err := commitgraph.OpenFileIndex(reader) + c.Assert(err, IsNil) + memoryIndex := commitgraph.NewMemoryIndex() + for i, hash := range fileIndex.Hashes() { + if hash.String() != "b9d69064b190e7aedccf84731ca1d917871f8a1c" { + node, err := fileIndex.GetCommitDataByIndex(i) + c.Assert(err, IsNil) + memoryIndex.Add(hash, node) + } + } + + nodeIndex := NewGraphCommitNodeIndex(memoryIndex, storer) + testWalker(c, nodeIndex) + testParents(c, nodeIndex) + testCommitAndTree(c, nodeIndex) +} diff --git a/plumbing/object/commitgraph/commitnode_walker_ctime.go b/plumbing/object/commitgraph/commitnode_walker_ctime.go new file mode 100644 index 000000000..f6a1b6a4e --- /dev/null +++ b/plumbing/object/commitgraph/commitnode_walker_ctime.go @@ -0,0 +1,105 @@ +package commitgraph + +import ( + "io" + + "github.com/emirpasic/gods/trees/binaryheap" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/storer" +) + +type commitNodeIteratorByCTime struct { + heap *binaryheap.Heap + seenExternal map[plumbing.Hash]bool + seen map[plumbing.Hash]bool +} + +// NewCommitNodeIterCTime returns a CommitNodeIter that walks the commit history, +// starting at the given commit and visiting its parents while preserving Committer Time order. +// this appears to be the closest order to `git log` +// The given callback will be called for each visited commit. Each commit will +// be visited only once. If the callback returns an error, walking will stop +// and will return the error. Other errors might be returned if the history +// cannot be traversed (e.g. missing objects). Ignore allows to skip some +// commits from being iterated. +func NewCommitNodeIterCTime( + c CommitNode, + seenExternal map[plumbing.Hash]bool, + ignore []plumbing.Hash, +) CommitNodeIter { + seen := make(map[plumbing.Hash]bool) + for _, h := range ignore { + seen[h] = true + } + + heap := binaryheap.NewWith(func(a, b interface{}) int { + if a.(CommitNode).CommitTime().Before(b.(CommitNode).CommitTime()) { + return 1 + } + return -1 + }) + + heap.Push(c) + + return &commitNodeIteratorByCTime{ + heap: heap, + seenExternal: seenExternal, + seen: seen, + } +} + +func (w *commitNodeIteratorByCTime) Next() (CommitNode, error) { + var c CommitNode + for { + cIn, ok := w.heap.Pop() + if !ok { + return nil, io.EOF + } + c = cIn.(CommitNode) + cID := c.ID() + + if w.seen[cID] || w.seenExternal[cID] { + continue + } + + w.seen[cID] = true + + for i, h := range c.ParentHashes() { + if w.seen[h] || w.seenExternal[h] { + continue + } + pc, err := c.ParentNode(i) + if err != nil { + return nil, err + } + w.heap.Push(pc) + } + + return c, nil + } +} + +func (w *commitNodeIteratorByCTime) ForEach(cb func(CommitNode) error) error { + for { + c, err := w.Next() + if err == io.EOF { + break + } + if err != nil { + return err + } + + err = cb(c) + if err == storer.ErrStop { + break + } + if err != nil { + return err + } + } + + return nil +} + +func (w *commitNodeIteratorByCTime) Close() {} diff --git a/plumbing/object/commitgraph/doc.go b/plumbing/object/commitgraph/doc.go new file mode 100644 index 000000000..0a55ad5b0 --- /dev/null +++ b/plumbing/object/commitgraph/doc.go @@ -0,0 +1,7 @@ +// Package commitgraph provides an interface for efficient traversal over Git +// commit graph either through the regular object storage, or optionally with +// the index stored in commit-graph file (Git 2.18+). +// +// The API and functionality of this package are considered EXPERIMENTAL and is +// not considered stable nor production ready. +package commitgraph