DirCacheTree.java
/*
* Copyright (C) 2008-2009, Google Inc.
* Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org> and others
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.dircache;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.eclipse.jgit.lib.FileMode.TREE;
import static org.eclipse.jgit.lib.TreeFormatter.entrySize;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Comparator;
import org.eclipse.jgit.errors.UnmergedPathException;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectInserter;
import org.eclipse.jgit.lib.TreeFormatter;
import org.eclipse.jgit.util.MutableInteger;
import org.eclipse.jgit.util.RawParseUtils;
/**
* Single tree record from the 'TREE' {@link org.eclipse.jgit.dircache.DirCache}
* extension.
* <p>
* A valid cache tree record contains the object id of a tree object and the
* total number of {@link org.eclipse.jgit.dircache.DirCacheEntry} instances
* (counted recursively) from the DirCache contained within the tree. This
* information facilitates faster traversal of the index and quicker generation
* of tree objects prior to creating a new commit.
* <p>
* An invalid cache tree record indicates a known subtree whose file entries
* have changed in ways that cause the tree to no longer have a known object id.
* Invalid cache tree records must be revalidated prior to use.
*/
public class DirCacheTree {
private static final byte[] NO_NAME = {};
private static final DirCacheTree[] NO_CHILDREN = {};
private static final Comparator<DirCacheTree> TREE_CMP = (DirCacheTree o1,
DirCacheTree o2) -> {
final byte[] a = o1.encodedName;
final byte[] b = o2.encodedName;
final int aLen = a.length;
final int bLen = b.length;
int cPos;
for (cPos = 0; cPos < aLen && cPos < bLen; cPos++) {
final int cmp = (a[cPos] & 0xff) - (b[cPos] & 0xff);
if (cmp != 0) {
return cmp;
}
}
if (aLen == bLen) {
return 0;
}
if (aLen < bLen) {
return '/' - (b[cPos] & 0xff);
}
return (a[cPos] & 0xff) - '/';
};
/** Tree this tree resides in; null if we are the root. */
private DirCacheTree parent;
/** Name of this tree within its parent. */
byte[] encodedName;
/** Number of {@link DirCacheEntry} records that belong to this tree. */
private int entrySpan;
/** Unique SHA-1 of this tree; null if invalid. */
private ObjectId id;
/** Child trees, if any, sorted by {@link #encodedName}. */
private DirCacheTree[] children;
/** Number of valid children in {@link #children}. */
private int childCnt;
DirCacheTree() {
encodedName = NO_NAME;
children = NO_CHILDREN;
childCnt = 0;
entrySpan = -1;
}
private DirCacheTree(final DirCacheTree myParent, final byte[] path,
final int pathOff, final int pathLen) {
parent = myParent;
encodedName = new byte[pathLen];
System.arraycopy(path, pathOff, encodedName, 0, pathLen);
children = NO_CHILDREN;
childCnt = 0;
entrySpan = -1;
}
DirCacheTree(final byte[] in, final MutableInteger off,
final DirCacheTree myParent) {
parent = myParent;
int ptr = RawParseUtils.next(in, off.value, '\0');
final int nameLen = ptr - off.value - 1;
if (nameLen > 0) {
encodedName = new byte[nameLen];
System.arraycopy(in, off.value, encodedName, 0, nameLen);
} else
encodedName = NO_NAME;
entrySpan = RawParseUtils.parseBase10(in, ptr, off);
final int subcnt = RawParseUtils.parseBase10(in, off.value, off);
off.value = RawParseUtils.next(in, off.value, '\n');
if (entrySpan >= 0) {
// Valid trees have a positive entry count and an id of a
// tree object that should exist in the object database.
//
id = ObjectId.fromRaw(in, off.value);
off.value += Constants.OBJECT_ID_LENGTH;
}
if (subcnt > 0) {
boolean alreadySorted = true;
children = new DirCacheTree[subcnt];
for (int i = 0; i < subcnt; i++) {
children[i] = new DirCacheTree(in, off, this);
// C Git's ordering differs from our own; it prefers to
// sort by length first. This sometimes produces a sort
// we do not desire. On the other hand it may have been
// created by us, and be sorted the way we want.
//
if (alreadySorted && i > 0
&& TREE_CMP.compare(children[i - 1], children[i]) > 0)
alreadySorted = false;
}
if (!alreadySorted)
Arrays.sort(children, 0, subcnt, TREE_CMP);
} else {
// Leaf level trees have no children, only (file) entries.
//
children = NO_CHILDREN;
}
childCnt = subcnt;
}
void write(byte[] tmp, OutputStream os) throws IOException {
int ptr = tmp.length;
tmp[--ptr] = '\n';
ptr = RawParseUtils.formatBase10(tmp, ptr, childCnt);
tmp[--ptr] = ' ';
ptr = RawParseUtils.formatBase10(tmp, ptr, isValid() ? entrySpan : -1);
tmp[--ptr] = 0;
os.write(encodedName);
os.write(tmp, ptr, tmp.length - ptr);
if (isValid()) {
id.copyRawTo(tmp, 0);
os.write(tmp, 0, Constants.OBJECT_ID_LENGTH);
}
for (int i = 0; i < childCnt; i++)
children[i].write(tmp, os);
}
/**
* Determine if this cache is currently valid.
* <p>
* A valid cache tree knows how many
* {@link org.eclipse.jgit.dircache.DirCacheEntry} instances from the parent
* {@link org.eclipse.jgit.dircache.DirCache} reside within this tree
* (recursively enumerated). It also knows the object id of the tree, as the
* tree should be readily available from the repository's object database.
*
* @return true if this tree is knows key details about itself; false if the
* tree needs to be regenerated.
*/
public boolean isValid() {
return id != null;
}
/**
* Get the number of entries this tree spans within the DirCache.
* <p>
* If this tree is not valid (see {@link #isValid()}) this method's return
* value is always strictly negative (less than 0) but is otherwise an
* undefined result.
*
* @return total number of entries (recursively) contained within this tree.
*/
public int getEntrySpan() {
return entrySpan;
}
/**
* Get the number of cached subtrees contained within this tree.
*
* @return number of child trees available through this tree.
*/
public int getChildCount() {
return childCnt;
}
/**
* Get the i-th child cache tree.
*
* @param i
* index of the child to obtain.
* @return the child tree.
*/
public DirCacheTree getChild(int i) {
return children[i];
}
/**
* Get the tree's ObjectId.
* <p>
* If {@link #isValid()} returns false this method will return null.
*
* @return ObjectId of this tree or null.
* @since 4.3
*/
public ObjectId getObjectId() {
return id;
}
/**
* Get the tree's name within its parent.
* <p>
* This method is not very efficient and is primarily meant for debugging
* and final output generation. Applications should try to avoid calling it,
* and if invoked do so only once per interesting entry, where the name is
* absolutely required for correct function.
*
* @return name of the tree. This does not contain any '/' characters.
*/
public String getNameString() {
final ByteBuffer bb = ByteBuffer.wrap(encodedName);
return UTF_8.decode(bb).toString();
}
/**
* Get the tree's path within the repository.
* <p>
* This method is not very efficient and is primarily meant for debugging
* and final output generation. Applications should try to avoid calling it,
* and if invoked do so only once per interesting entry, where the name is
* absolutely required for correct function.
*
* @return path of the tree, relative to the repository root. If this is not
* the root tree the path ends with '/'. The root tree's path string
* is the empty string ("").
*/
public String getPathString() {
final StringBuilder r = new StringBuilder();
appendName(r);
return r.toString();
}
/**
* Write (if necessary) this tree to the object store.
*
* @param cache
* the complete cache from DirCache.
* @param cIdx
* first position of <code>cache</code> that is a member of this
* tree. The path of <code>cache[cacheIdx].path</code> for the
* range <code>[0,pathOff-1)</code> matches the complete path of
* this tree, from the root of the repository.
* @param pathOffset
* number of bytes of <code>cache[cacheIdx].path</code> that
* matches this tree's path. The value at array position
* <code>cache[cacheIdx].path[pathOff-1]</code> is always '/' if
* <code>pathOff</code> is > 0.
* @param ow
* the writer to use when serializing to the store.
* @return identity of this tree.
* @throws UnmergedPathException
* one or more paths contain higher-order stages (stage > 0),
* which cannot be stored in a tree object.
* @throws IOException
* an unexpected error occurred writing to the object store.
*/
ObjectId writeTree(final DirCacheEntry[] cache, int cIdx,
final int pathOffset, final ObjectInserter ow)
throws UnmergedPathException, IOException {
if (id == null) {
final int endIdx = cIdx + entrySpan;
final TreeFormatter fmt = new TreeFormatter(computeSize(cache,
cIdx, pathOffset, ow));
int childIdx = 0;
int entryIdx = cIdx;
while (entryIdx < endIdx) {
final DirCacheEntry e = cache[entryIdx];
final byte[] ep = e.path;
if (childIdx < childCnt) {
final DirCacheTree st = children[childIdx];
if (st.contains(ep, pathOffset, ep.length)) {
fmt.append(st.encodedName, TREE, st.id);
entryIdx += st.entrySpan;
childIdx++;
continue;
}
}
fmt.append(ep, pathOffset, ep.length - pathOffset, e
.getFileMode(), e.idBuffer(), e.idOffset());
entryIdx++;
}
id = ow.insert(fmt);
}
return id;
}
private int computeSize(final DirCacheEntry[] cache, int cIdx,
final int pathOffset, final ObjectInserter ow)
throws UnmergedPathException, IOException {
final int endIdx = cIdx + entrySpan;
int childIdx = 0;
int entryIdx = cIdx;
int size = 0;
while (entryIdx < endIdx) {
final DirCacheEntry e = cache[entryIdx];
if (e.getStage() != 0)
throw new UnmergedPathException(e);
final byte[] ep = e.path;
if (childIdx < childCnt) {
final DirCacheTree st = children[childIdx];
if (st.contains(ep, pathOffset, ep.length)) {
final int stOffset = pathOffset + st.nameLength() + 1;
st.writeTree(cache, entryIdx, stOffset, ow);
size += entrySize(TREE, st.nameLength());
entryIdx += st.entrySpan;
childIdx++;
continue;
}
}
size += entrySize(e.getFileMode(), ep.length - pathOffset);
entryIdx++;
}
return size;
}
private void appendName(StringBuilder r) {
if (parent != null) {
parent.appendName(r);
r.append(getNameString());
r.append('/');
} else if (nameLength() > 0) {
r.append(getNameString());
r.append('/');
}
}
final int nameLength() {
return encodedName.length;
}
final boolean contains(byte[] a, int aOff, int aLen) {
final byte[] e = encodedName;
final int eLen = e.length;
for (int eOff = 0; eOff < eLen && aOff < aLen; eOff++, aOff++)
if (e[eOff] != a[aOff])
return false;
if (aOff >= aLen)
return false;
return a[aOff] == '/';
}
/**
* Update (if necessary) this tree's entrySpan.
*
* @param cache
* the complete cache from DirCache.
* @param cCnt
* number of entries in <code>cache</code> that are valid for
* iteration.
* @param cIdx
* first position of <code>cache</code> that is a member of this
* tree. The path of <code>cache[cacheIdx].path</code> for the
* range <code>[0,pathOff-1)</code> matches the complete path of
* this tree, from the root of the repository.
* @param pathOff
* number of bytes of <code>cache[cacheIdx].path</code> that
* matches this tree's path. The value at array position
* <code>cache[cacheIdx].path[pathOff-1]</code> is always '/' if
* <code>pathOff</code> is > 0.
*/
void validate(final DirCacheEntry[] cache, final int cCnt, int cIdx,
final int pathOff) {
if (entrySpan >= 0 && cIdx + entrySpan <= cCnt) {
// If we are valid, our children are also valid.
// We have no need to validate them.
//
return;
}
entrySpan = 0;
if (cCnt == 0) {
// Special case of an empty index, and we are the root tree.
//
return;
}
final byte[] firstPath = cache[cIdx].path;
int stIdx = 0;
while (cIdx < cCnt) {
final byte[] currPath = cache[cIdx].path;
if (pathOff > 0 && !peq(firstPath, currPath, pathOff)) {
// The current entry is no longer in this tree. Our
// span is updated and the remainder goes elsewhere.
//
break;
}
DirCacheTree st = stIdx < childCnt ? children[stIdx] : null;
final int cc = namecmp(currPath, pathOff, st);
if (cc > 0) {
// This subtree is now empty.
//
removeChild(stIdx);
continue;
}
if (cc < 0) {
final int p = slash(currPath, pathOff);
if (p < 0) {
// The entry has no '/' and thus is directly in this
// tree. Count it as one of our own.
//
cIdx++;
entrySpan++;
continue;
}
// Build a new subtree for this entry.
//
st = new DirCacheTree(this, currPath, pathOff, p - pathOff);
insertChild(stIdx, st);
}
// The entry is contained in this subtree.
//
assert(st != null);
st.validate(cache, cCnt, cIdx, pathOff + st.nameLength() + 1);
cIdx += st.entrySpan;
entrySpan += st.entrySpan;
stIdx++;
}
// None of our remaining children can be in this tree
// as the current cache entry is after our own name.
//
while (stIdx < childCnt)
removeChild(childCnt - 1);
}
private void insertChild(int stIdx, DirCacheTree st) {
final DirCacheTree[] c = children;
if (childCnt + 1 <= c.length) {
if (stIdx < childCnt)
System.arraycopy(c, stIdx, c, stIdx + 1, childCnt - stIdx);
c[stIdx] = st;
childCnt++;
return;
}
final int n = c.length;
final DirCacheTree[] a = new DirCacheTree[n + 1];
if (stIdx > 0)
System.arraycopy(c, 0, a, 0, stIdx);
a[stIdx] = st;
if (stIdx < n)
System.arraycopy(c, stIdx, a, stIdx + 1, n - stIdx);
children = a;
childCnt++;
}
private void removeChild(int stIdx) {
final int n = --childCnt;
if (stIdx < n)
System.arraycopy(children, stIdx + 1, children, stIdx, n - stIdx);
children[n] = null;
}
static boolean peq(byte[] a, byte[] b, int aLen) {
if (b.length < aLen)
return false;
for (aLen--; aLen >= 0; aLen--)
if (a[aLen] != b[aLen])
return false;
return true;
}
private static int namecmp(byte[] a, int aPos, DirCacheTree ct) {
if (ct == null)
return -1;
final byte[] b = ct.encodedName;
final int aLen = a.length;
final int bLen = b.length;
int bPos = 0;
for (; aPos < aLen && bPos < bLen; aPos++, bPos++) {
final int cmp = (a[aPos] & 0xff) - (b[bPos] & 0xff);
if (cmp != 0)
return cmp;
}
if (bPos == bLen)
return a[aPos] == '/' ? 0 : -1;
return aLen - bLen;
}
private static int slash(byte[] a, int aPos) {
final int aLen = a.length;
for (; aPos < aLen; aPos++)
if (a[aPos] == '/')
return aPos;
return -1;
}
/** {@inheritDoc} */
@Override
public String toString() {
return getNameString();
}
}