Package org.eclipse.jgit.storage.pack

Source Code of org.eclipse.jgit.storage.pack.PackWriter$Statistics

/*
* Copyright (C) 2008-2010, Google Inc.
* Copyright (C) 2008, Marek Zawirski <marek.zawirski@gmail.com>
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
*   notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
*   copyright notice, this list of conditions and the following
*   disclaimer in the documentation and/or other materials provided
*   with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
*   names of its contributors may be used to endorse or promote
*   products derived from this software without specific prior
*   written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

package org.eclipse.jgit.storage.pack;

import static org.eclipse.jgit.storage.pack.StoredObjectRepresentation.PACK_DELTA;
import static org.eclipse.jgit.storage.pack.StoredObjectRepresentation.PACK_WHOLE;

import java.io.IOException;
import java.io.OutputStream;
import java.security.MessageDigest;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Executor;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.zip.Deflater;
import java.util.zip.DeflaterOutputStream;

import org.eclipse.jgit.JGitText;
import org.eclipse.jgit.errors.CorruptObjectException;
import org.eclipse.jgit.errors.IncorrectObjectTypeException;
import org.eclipse.jgit.errors.LargeObjectException;
import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.errors.StoredObjectRepresentationNotAvailableException;
import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.lib.AsyncObjectSizeQueue;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.NullProgressMonitor;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectIdSubclassMap;
import org.eclipse.jgit.lib.ObjectLoader;
import org.eclipse.jgit.lib.ObjectReader;
import org.eclipse.jgit.lib.ProgressMonitor;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.lib.ThreadSafeProgressMonitor;
import org.eclipse.jgit.revwalk.AsyncRevObjectQueue;
import org.eclipse.jgit.revwalk.ObjectWalk;
import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.revwalk.RevFlag;
import org.eclipse.jgit.revwalk.RevFlagSet;
import org.eclipse.jgit.revwalk.RevObject;
import org.eclipse.jgit.revwalk.RevSort;
import org.eclipse.jgit.revwalk.RevTree;
import org.eclipse.jgit.storage.file.PackIndexWriter;
import org.eclipse.jgit.util.TemporaryBuffer;

/**
* <p>
* PackWriter class is responsible for generating pack files from specified set
* of objects from repository. This implementation produce pack files in format
* version 2.
* </p>
* <p>
* Source of objects may be specified in two ways:
* <ul>
* <li>(usually) by providing sets of interesting and uninteresting objects in
* repository - all interesting objects and their ancestors except uninteresting
* objects and their ancestors will be included in pack, or</li>
* <li>by providing iterator of {@link RevObject} specifying exact list and
* order of objects in pack</li>
* </ul>
* Typical usage consists of creating instance intended for some pack,
* configuring options, preparing the list of objects by calling
* {@link #preparePack(Iterator)} or
* {@link #preparePack(ProgressMonitor, Collection, Collection)}, and finally
* producing the stream with {@link #writePack(ProgressMonitor, ProgressMonitor, OutputStream)}.
* </p>
* <p>
* Class provide set of configurable options and {@link ProgressMonitor}
* support, as operations may take a long time for big repositories. Deltas
* searching algorithm is <b>NOT IMPLEMENTED</b> yet - this implementation
* relies only on deltas and objects reuse.
* </p>
* <p>
* This class is not thread safe, it is intended to be used in one thread, with
* one instance per created pack. Subsequent calls to writePack result in
* undefined behavior.
* </p>
*/
public class PackWriter {
  private static final int PACK_VERSION_GENERATED = 2;

  @SuppressWarnings("unchecked")
  private final List<ObjectToPack> objectsLists[] = new List[Constants.OBJ_TAG + 1];
  {
    objectsLists[0] = Collections.<ObjectToPack> emptyList();
    objectsLists[Constants.OBJ_COMMIT] = new ArrayList<ObjectToPack>();
    objectsLists[Constants.OBJ_TREE] = new ArrayList<ObjectToPack>();
    objectsLists[Constants.OBJ_BLOB] = new ArrayList<ObjectToPack>();
    objectsLists[Constants.OBJ_TAG] = new ArrayList<ObjectToPack>();
  }

  private final ObjectIdSubclassMap<ObjectToPack> objectsMap = new ObjectIdSubclassMap<ObjectToPack>();

  // edge objects for thin packs
  private List<ObjectToPack> edgeObjects = new ArrayList<ObjectToPack>();

  private List<CachedPack> cachedPacks = new ArrayList<CachedPack>(2);

  private Deflater myDeflater;

  private final ObjectReader reader;

  /** {@link #reader} recast to the reuse interface, if it supports it. */
  private final ObjectReuseAsIs reuseSupport;

  private final PackConfig config;

  private final Statistics stats;

  private List<ObjectToPack> sortedByName;

  private byte packcsum[];

  private boolean deltaBaseAsOffset;

  private boolean reuseDeltas;

  private boolean thin;

  private boolean useCachedPacks;

  private boolean ignoreMissingUninteresting = true;

  /**
   * Create writer for specified repository.
   * <p>
   * Objects for packing are specified in {@link #preparePack(Iterator)} or
   * {@link #preparePack(ProgressMonitor, Collection, Collection)}.
   *
   * @param repo
   *            repository where objects are stored.
   */
  public PackWriter(final Repository repo) {
    this(repo, repo.newObjectReader());
  }

  /**
   * Create a writer to load objects from the specified reader.
   * <p>
   * Objects for packing are specified in {@link #preparePack(Iterator)} or
   * {@link #preparePack(ProgressMonitor, Collection, Collection)}.
   *
   * @param reader
   *            reader to read from the repository with.
   */
  public PackWriter(final ObjectReader reader) {
    this(new PackConfig(), reader);
  }

  /**
   * Create writer for specified repository.
   * <p>
   * Objects for packing are specified in {@link #preparePack(Iterator)} or
   * {@link #preparePack(ProgressMonitor, Collection, Collection)}.
   *
   * @param repo
   *            repository where objects are stored.
   * @param reader
   *            reader to read from the repository with.
   */
  public PackWriter(final Repository repo, final ObjectReader reader) {
    this(new PackConfig(repo), reader);
  }

  /**
   * Create writer with a specified configuration.
   * <p>
   * Objects for packing are specified in {@link #preparePack(Iterator)} or
   * {@link #preparePack(ProgressMonitor, Collection, Collection)}.
   *
   * @param config
   *            configuration for the pack writer.
   * @param reader
   *            reader to read from the repository with.
   */
  public PackWriter(final PackConfig config, final ObjectReader reader) {
    this.config = config;
    this.reader = reader;
    if (reader instanceof ObjectReuseAsIs)
      reuseSupport = ((ObjectReuseAsIs) reader);
    else
      reuseSupport = null;

    deltaBaseAsOffset = config.isDeltaBaseAsOffset();
    reuseDeltas = config.isReuseDeltas();
    stats = new Statistics();
  }

  /**
   * Check whether writer can store delta base as an offset (new style
   * reducing pack size) or should store it as an object id (legacy style,
   * compatible with old readers).
   *
   * Default setting: {@value PackConfig#DEFAULT_DELTA_BASE_AS_OFFSET}
   *
   * @return true if delta base is stored as an offset; false if it is stored
   *         as an object id.
   */
  public boolean isDeltaBaseAsOffset() {
    return deltaBaseAsOffset;
  }

  /**
   * Set writer delta base format. Delta base can be written as an offset in a
   * pack file (new approach reducing file size) or as an object id (legacy
   * approach, compatible with old readers).
   *
   * Default setting: {@value PackConfig#DEFAULT_DELTA_BASE_AS_OFFSET}
   *
   * @param deltaBaseAsOffset
   *            boolean indicating whether delta base can be stored as an
   *            offset.
   */
  public void setDeltaBaseAsOffset(boolean deltaBaseAsOffset) {
    this.deltaBaseAsOffset = deltaBaseAsOffset;
  }

  /** @return true if this writer is producing a thin pack. */
  public boolean isThin() {
    return thin;
  }

  /**
   * @param packthin
   *            a boolean indicating whether writer may pack objects with
   *            delta base object not within set of objects to pack, but
   *            belonging to party repository (uninteresting/boundary) as
   *            determined by set; this kind of pack is used only for
   *            transport; true - to produce thin pack, false - otherwise.
   */
  public void setThin(final boolean packthin) {
    thin = packthin;
  }

  /** @return true to reuse cached packs. If true index creation isn't available. */
  public boolean isUseCachedPacks() {
    return useCachedPacks;
  }

  /**
   * @param useCached
   *            if set to true and a cached pack is present, it will be
   *            appended onto the end of a thin-pack, reducing the amount of
   *            working set space and CPU used by PackWriter. Enabling this
   *            feature prevents PackWriter from creating an index for the
   *            newly created pack, so its only suitable for writing to a
   *            network client, where the client will make the index.
   */
  public void setUseCachedPacks(boolean useCached) {
    useCachedPacks = useCached;
  }

  /**
   * @return true to ignore objects that are uninteresting and also not found
   *         on local disk; false to throw a {@link MissingObjectException}
   *         out of {@link #preparePack(ProgressMonitor, Collection, Collection)} if an
   *         uninteresting object is not in the source repository. By default,
   *         true, permitting gracefully ignoring of uninteresting objects.
   */
  public boolean isIgnoreMissingUninteresting() {
    return ignoreMissingUninteresting;
  }

  /**
   * @param ignore
   *            true if writer should ignore non existing uninteresting
   *            objects during construction set of objects to pack; false
   *            otherwise - non existing uninteresting objects may cause
   *            {@link MissingObjectException}
   */
  public void setIgnoreMissingUninteresting(final boolean ignore) {
    ignoreMissingUninteresting = ignore;
  }

  /**
   * Returns objects number in a pack file that was created by this writer.
   *
   * @return number of objects in pack.
   */
  public long getObjectsNumber() {
    return stats.totalObjects;
  }

  /**
   * Prepare the list of objects to be written to the pack stream.
   * <p>
   * Iterator <b>exactly</b> determines which objects are included in a pack
   * and order they appear in pack (except that objects order by type is not
   * needed at input). This order should conform general rules of ordering
   * objects in git - by recency and path (type and delta-base first is
   * internally secured) and responsibility for guaranteeing this order is on
   * a caller side. Iterator must return each id of object to write exactly
   * once.
   * </p>
   *
   * @param objectsSource
   *            iterator of object to store in a pack; order of objects within
   *            each type is important, ordering by type is not needed;
   *            allowed types for objects are {@link Constants#OBJ_COMMIT},
   *            {@link Constants#OBJ_TREE}, {@link Constants#OBJ_BLOB} and
   *            {@link Constants#OBJ_TAG}; objects returned by iterator may be
   *            later reused by caller as object id and type are internally
   *            copied in each iteration.
   * @throws IOException
   *             when some I/O problem occur during reading objects.
   */
  public void preparePack(final Iterator<RevObject> objectsSource)
      throws IOException {
    while (objectsSource.hasNext()) {
      addObject(objectsSource.next());
    }
  }

  /**
   * Prepare the list of objects to be written to the pack stream.
   * <p>
   * Basing on these 2 sets, another set of objects to put in a pack file is
   * created: this set consists of all objects reachable (ancestors) from
   * interesting objects, except uninteresting objects and their ancestors.
   * This method uses class {@link ObjectWalk} extensively to find out that
   * appropriate set of output objects and their optimal order in output pack.
   * Order is consistent with general git in-pack rules: sort by object type,
   * recency, path and delta-base first.
   * </p>
   *
   * @param countingMonitor
   *            progress during object enumeration.
   * @param interestingObjects
   *            collection of objects to be marked as interesting (start
   *            points of graph traversal).
   * @param uninterestingObjects
   *            collection of objects to be marked as uninteresting (end
   *            points of graph traversal).
   * @throws IOException
   *             when some I/O problem occur during reading objects.
   */
  public void preparePack(ProgressMonitor countingMonitor,
      final Collection<? extends ObjectId> interestingObjects,
      final Collection<? extends ObjectId> uninterestingObjects)
      throws IOException {
    if (countingMonitor == null)
      countingMonitor = NullProgressMonitor.INSTANCE;
    findObjectsToPack(countingMonitor, interestingObjects,
        uninterestingObjects);
  }

  /**
   * Determine if the pack file will contain the requested object.
   *
   * @param id
   *            the object to test the existence of.
   * @return true if the object will appear in the output pack file.
   * @throws IOException
   *             a cached pack cannot be examined.
   */
  public boolean willInclude(final AnyObjectId id) throws IOException {
    ObjectToPack obj = objectsMap.get(id);
    if (obj != null && !obj.isEdge())
      return true;

    Set<ObjectId> toFind = Collections.singleton(id.toObjectId());
    for (CachedPack pack : cachedPacks) {
      if (pack.hasObject(toFind).contains(id))
        return true;
    }

    return false;
  }

  /**
   * Lookup the ObjectToPack object for a given ObjectId.
   *
   * @param id
   *            the object to find in the pack.
   * @return the object we are packing, or null.
   */
  public ObjectToPack get(AnyObjectId id) {
    ObjectToPack obj = objectsMap.get(id);
    return obj != null && !obj.isEdge() ? obj : null;
  }

  /**
   * Computes SHA-1 of lexicographically sorted objects ids written in this
   * pack, as used to name a pack file in repository.
   *
   * @return ObjectId representing SHA-1 name of a pack that was created.
   */
  public ObjectId computeName() {
    final byte[] buf = new byte[Constants.OBJECT_ID_LENGTH];
    final MessageDigest md = Constants.newMessageDigest();
    for (ObjectToPack otp : sortByName()) {
      otp.copyRawTo(buf, 0);
      md.update(buf, 0, Constants.OBJECT_ID_LENGTH);
    }
    return ObjectId.fromRaw(md.digest());
  }

  /**
   * Create an index file to match the pack file just written.
   * <p>
   * This method can only be invoked after {@link #preparePack(Iterator)} or
   * {@link #preparePack(ProgressMonitor, Collection, Collection)} has been
   * invoked and completed successfully. Writing a corresponding index is an
   * optional feature that not all pack users may require.
   *
   * @param indexStream
   *            output for the index data. Caller is responsible for closing
   *            this stream.
   * @throws IOException
   *             the index data could not be written to the supplied stream.
   */
  public void writeIndex(final OutputStream indexStream) throws IOException {
    if (!cachedPacks.isEmpty())
      throw new IOException(JGitText.get().cachedPacksPreventsIndexCreation);

    final List<ObjectToPack> list = sortByName();
    final PackIndexWriter iw;
    int indexVersion = config.getIndexVersion();
    if (indexVersion <= 0)
      iw = PackIndexWriter.createOldestPossible(indexStream, list);
    else
      iw = PackIndexWriter.createVersion(indexStream, indexVersion);
    iw.write(list, packcsum);
  }

  private List<ObjectToPack> sortByName() {
    if (sortedByName == null) {
      int cnt = 0;
      for (List<ObjectToPack> list : objectsLists)
        cnt += list.size();
      sortedByName = new ArrayList<ObjectToPack>(cnt);
      for (List<ObjectToPack> list : objectsLists) {
        for (ObjectToPack otp : list)
          sortedByName.add(otp);
      }
      Collections.sort(sortedByName);
    }
    return sortedByName;
  }

  /**
   * Write the prepared pack to the supplied stream.
   * <p>
   * At first, this method collects and sorts objects to pack, then deltas
   * search is performed if set up accordingly, finally pack stream is
   * written.
   * </p>
   * <p>
   * All reused objects data checksum (Adler32/CRC32) is computed and
   * validated against existing checksum.
   * </p>
   *
   * @param compressMonitor
   *            progress monitor to report object compression work.
   * @param writeMonitor
   *            progress monitor to report the number of objects written.
   * @param packStream
   *            output stream of pack data. The stream should be buffered by
   *            the caller. The caller is responsible for closing the stream.
   * @throws IOException
   *             an error occurred reading a local object's data to include in
   *             the pack, or writing compressed object data to the output
   *             stream.
   */
  public void writePack(ProgressMonitor compressMonitor,
      ProgressMonitor writeMonitor, OutputStream packStream)
      throws IOException {
    if (compressMonitor == null)
      compressMonitor = NullProgressMonitor.INSTANCE;
    if (writeMonitor == null)
      writeMonitor = NullProgressMonitor.INSTANCE;

    if ((reuseDeltas || config.isReuseObjects()) && reuseSupport != null)
      searchForReuse(compressMonitor);
    if (config.isDeltaCompress())
      searchForDeltas(compressMonitor);

    final PackOutputStream out = new PackOutputStream(writeMonitor,
        packStream, this);

    long objCnt = 0;
    for (List<ObjectToPack> list : objectsLists)
      objCnt += list.size();
    for (CachedPack pack : cachedPacks)
      objCnt += pack.getObjectCount();
    stats.totalObjects = objCnt;

    writeMonitor.beginTask(JGitText.get().writingObjects, (int) objCnt);
    out.writeFileHeader(PACK_VERSION_GENERATED, objCnt);
    out.flush();
    writeObjects(out);
    for (CachedPack pack : cachedPacks) {
      stats.reusedObjects += pack.getObjectCount();
      reuseSupport.copyPackAsIs(out, pack);
    }
    writeChecksum(out);

    reader.release();
    writeMonitor.endTask();
  }

  /**
   * @return description of what this PackWriter did in order to create the
   *         final pack stream. The object is only available to callers after
   *         {@link #writePack(ProgressMonitor, ProgressMonitor, OutputStream)}
   */
  public Statistics getStatistics() {
    return stats;
  }

  /** Release all resources used by this writer. */
  public void release() {
    reader.release();
    if (myDeflater != null) {
      myDeflater.end();
      myDeflater = null;
    }
  }

  private void searchForReuse(ProgressMonitor monitor) throws IOException {
    int cnt = 0;
    for (List<ObjectToPack> list : objectsLists)
      cnt += list.size();
    monitor.beginTask(JGitText.get().searchForReuse, cnt);
    for (List<ObjectToPack> list : objectsLists)
      reuseSupport.selectObjectRepresentation(this, monitor, list);
    monitor.endTask();
  }

  private void searchForDeltas(ProgressMonitor monitor)
      throws MissingObjectException, IncorrectObjectTypeException,
      IOException {
    // Commits and annotated tags tend to have too many differences to
    // really benefit from delta compression. Consequently just don't
    // bother examining those types here.
    //
    ObjectToPack[] list = new ObjectToPack[
          objectsLists[Constants.OBJ_TREE].size()
        + objectsLists[Constants.OBJ_BLOB].size()
        + edgeObjects.size()];
    int cnt = 0;
    cnt = findObjectsNeedingDelta(list, cnt, Constants.OBJ_TREE);
    cnt = findObjectsNeedingDelta(list, cnt, Constants.OBJ_BLOB);
    if (cnt == 0)
      return;
    int nonEdgeCnt = cnt;

    // Queue up any edge objects that we might delta against.  We won't
    // be sending these as we assume the other side has them, but we need
    // them in the search phase below.
    //
    for (ObjectToPack eo : edgeObjects) {
      eo.setWeight(0);
      list[cnt++] = eo;
    }

    // Compute the sizes of the objects so we can do a proper sort.
    // We let the reader skip missing objects if it chooses. For
    // some readers this can be a huge win. We detect missing objects
    // by having set the weights above to 0 and allowing the delta
    // search code to discover the missing object and skip over it, or
    // abort with an exception if we actually had to have it.
    //
    monitor.beginTask(JGitText.get().searchForSizes, cnt);
    AsyncObjectSizeQueue<ObjectToPack> sizeQueue = reader.getObjectSize(
        Arrays.<ObjectToPack> asList(list).subList(0, cnt), false);
    try {
      final long limit = config.getBigFileThreshold();
      for (;;) {
        monitor.update(1);

        try {
          if (!sizeQueue.next())
            break;
        } catch (MissingObjectException notFound) {
          if (ignoreMissingUninteresting) {
            ObjectToPack otp = sizeQueue.getCurrent();
            if (otp != null && otp.isEdge()) {
              otp.setDoNotDelta(true);
              continue;
            }

            otp = objectsMap.get(notFound.getObjectId());
            if (otp != null && otp.isEdge()) {
              otp.setDoNotDelta(true);
              continue;
            }
          }
          throw notFound;
        }

        ObjectToPack otp = sizeQueue.getCurrent();
        if (otp == null)
          otp = objectsMap.get(sizeQueue.getObjectId());

        long sz = sizeQueue.getSize();
        if (limit <= sz || Integer.MAX_VALUE <= sz)
          otp.setDoNotDelta(true); // too big, avoid costly files

        else if (sz <= DeltaIndex.BLKSZ)
          otp.setDoNotDelta(true); // too small, won't work

        else
          otp.setWeight((int) sz);
      }
    } finally {
      sizeQueue.release();
    }
    monitor.endTask();

    // Sort the objects by path hash so like files are near each other,
    // and then by size descending so that bigger files are first. This
    // applies "Linus' Law" which states that newer files tend to be the
    // bigger ones, because source files grow and hardly ever shrink.
    //
    Arrays.sort(list, 0, cnt, new Comparator<ObjectToPack>() {
      public int compare(ObjectToPack a, ObjectToPack b) {
        int cmp = (a.isDoNotDelta() ? 1 : 0)
            - (b.isDoNotDelta() ? 1 : 0);
        if (cmp != 0)
          return cmp;

        cmp = a.getType() - b.getType();
        if (cmp != 0)
          return cmp;

        cmp = (a.getPathHash() >>> 1) - (b.getPathHash() >>> 1);
        if (cmp != 0)
          return cmp;

        cmp = (a.getPathHash() & 1) - (b.getPathHash() & 1);
        if (cmp != 0)
          return cmp;

        cmp = (a.isEdge() ? 0 : 1) - (b.isEdge() ? 0 : 1);
        if (cmp != 0)
          return cmp;

        return b.getWeight() - a.getWeight();
      }
    });

    // Above we stored the objects we cannot delta onto the end.
    // Remove them from the list so we don't waste time on them.
    while (0 < cnt && list[cnt - 1].isDoNotDelta()) {
      if (!list[cnt - 1].isEdge())
        nonEdgeCnt--;
      cnt--;
    }
    if (cnt == 0)
      return;

    monitor.beginTask(JGitText.get().compressingObjects, nonEdgeCnt);
    searchForDeltas(monitor, list, cnt);
    monitor.endTask();
  }

  private int findObjectsNeedingDelta(ObjectToPack[] list, int cnt, int type) {
    for (ObjectToPack otp : objectsLists[type]) {
      if (otp.isDoNotDelta()) // delta is disabled for this path
        continue;
      if (otp.isDeltaRepresentation()) // already reusing a delta
        continue;
      otp.setWeight(0);
      list[cnt++] = otp;
    }
    return cnt;
  }

  private void searchForDeltas(final ProgressMonitor monitor,
      final ObjectToPack[] list, final int cnt)
      throws MissingObjectException, IncorrectObjectTypeException,
      LargeObjectException, IOException {
    int threads = config.getThreads();
    if (threads == 0)
      threads = Runtime.getRuntime().availableProcessors();

    if (threads <= 1 || cnt <= 2 * config.getDeltaSearchWindowSize()) {
      DeltaCache dc = new DeltaCache(config);
      DeltaWindow dw = new DeltaWindow(config, dc, reader);
      dw.search(monitor, list, 0, cnt);
      return;
    }

    final DeltaCache dc = new ThreadSafeDeltaCache(config);
    final ThreadSafeProgressMonitor pm = new ThreadSafeProgressMonitor(monitor);

    // Guess at the size of batch we want. Because we don't really
    // have a way for a thread to steal work from another thread if
    // it ends early, we over partition slightly so the work units
    // are a bit smaller.
    //
    int estSize = cnt / (threads * 2);
    if (estSize < 2 * config.getDeltaSearchWindowSize())
      estSize = 2 * config.getDeltaSearchWindowSize();

    final List<DeltaTask> myTasks = new ArrayList<DeltaTask>(threads * 2);
    for (int i = 0; i < cnt;) {
      final int start = i;
      final int batchSize;

      if (cnt - i < estSize) {
        // If we don't have enough to fill the remaining block,
        // schedule what is left over as a single block.
        //
        batchSize = cnt - i;
      } else {
        // Try to split the block at the end of a path.
        //
        int end = start + estSize;
        while (end < cnt) {
          ObjectToPack a = list[end - 1];
          ObjectToPack b = list[end];
          if (a.getPathHash() == b.getPathHash())
            end++;
          else
            break;
        }
        batchSize = end - start;
      }
      i += batchSize;
      myTasks.add(new DeltaTask(config, reader, dc, pm, batchSize, start, list));
    }
    pm.startWorkers(myTasks.size());

    final Executor executor = config.getExecutor();
    final List<Throwable> errors = Collections
        .synchronizedList(new ArrayList<Throwable>());
    if (executor instanceof ExecutorService) {
      // Caller supplied us a service, use it directly.
      //
      runTasks((ExecutorService) executor, pm, myTasks, errors);

    } else if (executor == null) {
      // Caller didn't give us a way to run the tasks, spawn up a
      // temporary thread pool and make sure it tears down cleanly.
      //
      ExecutorService pool = Executors.newFixedThreadPool(threads);
      try {
        runTasks(pool, pm, myTasks, errors);
      } finally {
        pool.shutdown();
        for (;;) {
          try {
            if (pool.awaitTermination(60, TimeUnit.SECONDS))
              break;
          } catch (InterruptedException e) {
            throw new IOException(
                JGitText.get().packingCancelledDuringObjectsWriting);
          }
        }
      }
    } else {
      // The caller gave us an executor, but it might not do
      // asynchronous execution.  Wrap everything and hope it
      // can schedule these for us.
      //
      for (final DeltaTask task : myTasks) {
        executor.execute(new Runnable() {
          public void run() {
            try {
              task.call();
            } catch (Throwable failure) {
              errors.add(failure);
            }
          }
        });
      }
      try {
        pm.waitForCompletion();
      } catch (InterruptedException ie) {
        // We can't abort the other tasks as we have no handle.
        // Cross our fingers and just break out anyway.
        //
        throw new IOException(
            JGitText.get().packingCancelledDuringObjectsWriting);
      }
    }

    // If any task threw an error, try to report it back as
    // though we weren't using a threaded search algorithm.
    //
    if (!errors.isEmpty()) {
      Throwable err = errors.get(0);
      if (err instanceof Error)
        throw (Error) err;
      if (err instanceof RuntimeException)
        throw (RuntimeException) err;
      if (err instanceof IOException)
        throw (IOException) err;

      IOException fail = new IOException(err.getMessage());
      fail.initCause(err);
      throw fail;
    }
  }

  private void runTasks(ExecutorService pool, ThreadSafeProgressMonitor pm,
      List<DeltaTask> tasks, List<Throwable> errors) throws IOException {
    List<Future<?>> futures = new ArrayList<Future<?>>(tasks.size());
    for (DeltaTask task : tasks)
      futures.add(pool.submit(task));

    try {
      pm.waitForCompletion();
      for (Future<?> f : futures) {
        try {
          f.get();
        } catch (ExecutionException failed) {
          errors.add(failed.getCause());
        }
      }
    } catch (InterruptedException ie) {
      for (Future<?> f : futures)
        f.cancel(true);
      throw new IOException(
          JGitText.get().packingCancelledDuringObjectsWriting);
    }
  }

  private void writeObjects(PackOutputStream out) throws IOException {
    if (reuseSupport != null) {
      for (List<ObjectToPack> list : objectsLists)
        reuseSupport.writeObjects(out, list);
    } else {
      for (List<ObjectToPack> list : objectsLists) {
        for (ObjectToPack otp : list)
          out.writeObject(otp);
      }
    }
  }

  void writeObject(PackOutputStream out, ObjectToPack otp) throws IOException {
    if (otp.isWritten())
      return; // We shouldn't be here.

    otp.markWantWrite();
    if (otp.isDeltaRepresentation())
      writeBaseFirst(out, otp);

    out.resetCRC32();
    otp.setOffset(out.length());

    while (otp.isReuseAsIs()) {
      try {
        reuseSupport.copyObjectAsIs(out, otp);
        out.endObject();
        otp.setCRC(out.getCRC32());
        stats.reusedObjects++;
        if (otp.isDeltaRepresentation())
          stats.reusedDeltas++;
        return;
      } catch (StoredObjectRepresentationNotAvailableException gone) {
        if (otp.getOffset() == out.length()) {
          redoSearchForReuse(otp);
          continue;
        } else {
          // Object writing already started, we cannot recover.
          //
          CorruptObjectException coe;
          coe = new CorruptObjectException(otp, "");
          coe.initCause(gone);
          throw coe;
        }
      }
    }

    // If we reached here, reuse wasn't possible.
    //
    if (otp.isDeltaRepresentation())
      writeDeltaObjectDeflate(out, otp);
    else
      writeWholeObjectDeflate(out, otp);
    out.endObject();
    otp.setCRC(out.getCRC32());
  }

  private void writeBaseFirst(PackOutputStream out, final ObjectToPack otp)
      throws IOException {
    ObjectToPack baseInPack = otp.getDeltaBase();
    if (baseInPack != null) {
      if (!baseInPack.isWritten()) {
        if (baseInPack.wantWrite()) {
          // There is a cycle. Our caller is trying to write the
          // object we want as a base, and called us. Turn off
          // delta reuse so we can find another form.
          //
          reuseDeltas = false;
          redoSearchForReuse(otp);
          reuseDeltas = true;
        } else {
          writeObject(out, baseInPack);
        }
      }
    } else if (!thin) {
      // This should never occur, the base isn't in the pack and
      // the pack isn't allowed to reference base outside objects.
      // Write the object as a whole form, even if that is slow.
      //
      otp.clearDeltaBase();
      otp.clearReuseAsIs();
    }
  }

  private void redoSearchForReuse(final ObjectToPack otp) throws IOException,
      MissingObjectException {
    otp.clearDeltaBase();
    otp.clearReuseAsIs();
    reuseSupport.selectObjectRepresentation(this,
        NullProgressMonitor.INSTANCE, Collections.singleton(otp));
  }

  private void writeWholeObjectDeflate(PackOutputStream out,
      final ObjectToPack otp) throws IOException {
    final Deflater deflater = deflater();
    final ObjectLoader ldr = reader.open(otp, otp.getType());

    out.writeHeader(otp, ldr.getSize());

    deflater.reset();
    DeflaterOutputStream dst = new DeflaterOutputStream(out, deflater);
    ldr.copyTo(dst);
    dst.finish();
  }

  private void writeDeltaObjectDeflate(PackOutputStream out,
      final ObjectToPack otp) throws IOException {
    DeltaCache.Ref ref = otp.popCachedDelta();
    if (ref != null) {
      byte[] zbuf = ref.get();
      if (zbuf != null) {
        out.writeHeader(otp, otp.getCachedSize());
        out.write(zbuf);
        return;
      }
    }

    TemporaryBuffer.Heap delta = delta(otp);
    out.writeHeader(otp, delta.length());

    Deflater deflater = deflater();
    deflater.reset();
    DeflaterOutputStream dst = new DeflaterOutputStream(out, deflater);
    delta.writeTo(dst, null);
    dst.finish();
    stats.totalDeltas++;
  }

  private TemporaryBuffer.Heap delta(final ObjectToPack otp)
      throws IOException {
    DeltaIndex index = new DeltaIndex(buffer(otp.getDeltaBaseId()));
    byte[] res = buffer(otp);

    // We never would have proposed this pair if the delta would be
    // larger than the unpacked version of the object. So using it
    // as our buffer limit is valid: we will never reach it.
    //
    TemporaryBuffer.Heap delta = new TemporaryBuffer.Heap(res.length);
    index.encode(delta, res);
    return delta;
  }

  private byte[] buffer(AnyObjectId objId) throws IOException {
    return buffer(config, reader, objId);
  }

  static byte[] buffer(PackConfig config, ObjectReader or, AnyObjectId objId)
      throws IOException {
    // PackWriter should have already pruned objects that
    // are above the big file threshold, so our chances of
    // the object being below it are very good. We really
    // shouldn't be here, unless the implementation is odd.

    return or.open(objId).getCachedBytes(config.getBigFileThreshold());
  }

  private Deflater deflater() {
    if (myDeflater == null)
      myDeflater = new Deflater(config.getCompressionLevel());
    return myDeflater;
  }

  private void writeChecksum(PackOutputStream out) throws IOException {
    packcsum = out.getDigest();
    out.write(packcsum);
  }

  private void findObjectsToPack(final ProgressMonitor countingMonitor,
      final Collection<? extends ObjectId> want,
      Collection<? extends ObjectId> have)
      throws MissingObjectException, IOException,
      IncorrectObjectTypeException {
    countingMonitor.beginTask(JGitText.get().countingObjects,
        ProgressMonitor.UNKNOWN);

    if (have == null)
      have = Collections.emptySet();

    List<ObjectId> all = new ArrayList<ObjectId>(want.size() + have.size());
    all.addAll(want);
    all.addAll(have);

    final Map<ObjectId, CachedPack> tipToPack = new HashMap<ObjectId, CachedPack>();
    final ObjectWalk walker = new ObjectWalk(reader);
    final RevFlag inCachedPack = walker.newFlag("inCachedPack");
    final RevFlag include = walker.newFlag("include");

    final RevFlagSet keepOnRestart = new RevFlagSet();
    keepOnRestart.add(inCachedPack);

    walker.setRetainBody(false);
    walker.carry(include);

    int haveEst = have.size();
    if (have.isEmpty()) {
      walker.sort(RevSort.COMMIT_TIME_DESC);
      if (useCachedPacks && reuseSupport != null) {
        for (CachedPack pack : reuseSupport.getCachedPacks()) {
          for (ObjectId id : pack.getTips()) {
            tipToPack.put(id, pack);
            all.add(id);
          }
        }
        haveEst += tipToPack.size();
      }
    } else {
      walker.sort(RevSort.TOPO);
      if (thin)
        walker.sort(RevSort.BOUNDARY, true);
    }

    List<RevObject> wantObjs = new ArrayList<RevObject>(want.size());
    List<RevObject> haveObjs = new ArrayList<RevObject>(haveEst);

    AsyncRevObjectQueue q = walker.parseAny(all, true);
    try {
      for (;;) {
        try {
          RevObject o = q.next();
          if (o == null)
            break;

          if (tipToPack.containsKey(o))
            o.add(inCachedPack);

          if (have.contains(o)) {
            haveObjs.add(o);
            walker.markUninteresting(o);
          } else if (want.contains(o)) {
            o.add(include);
            wantObjs.add(o);
            walker.markStart(o);
          }
        } catch (MissingObjectException e) {
          if (ignoreMissingUninteresting
              && have.contains(e.getObjectId()))
            continue;
          throw e;
        }
      }
    } finally {
      q.release();
    }

    int typesToPrune = 0;
    final int maxBases = config.getDeltaSearchWindowSize();
    Set<RevTree> baseTrees = new HashSet<RevTree>();
    RevObject o;
    while ((o = walker.next()) != null) {
      if (o.has(inCachedPack)) {
        CachedPack pack = tipToPack.get(o);
        if (includesAllTips(pack, include, walker)) {
          useCachedPack(walker, keepOnRestart, //
              wantObjs, haveObjs, pack);

          countingMonitor.endTask();
          countingMonitor.beginTask(JGitText.get().countingObjects,
              ProgressMonitor.UNKNOWN);
          continue;
        }
      }

      if (o.has(RevFlag.UNINTERESTING)) {
        if (baseTrees.size() <= maxBases)
          baseTrees.add(((RevCommit) o).getTree());
        continue;
      }

      addObject(o, 0);
      countingMonitor.update(1);
    }

    for (CachedPack p : cachedPacks) {
      for (ObjectId d : p.hasObject(objectsLists[Constants.OBJ_COMMIT])) {
        if (baseTrees.size() <= maxBases)
          baseTrees.add(walker.lookupCommit(d).getTree());
        objectsMap.get(d).setEdge();
        typesToPrune |= 1 << Constants.OBJ_COMMIT;
      }
    }

    BaseSearch bases = new BaseSearch(countingMonitor, baseTrees, //
        objectsMap, edgeObjects, reader);
    while ((o = walker.nextObject()) != null) {
      if (o.has(RevFlag.UNINTERESTING))
        continue;

      int pathHash = walker.getPathHashCode();
      byte[] pathBuf = walker.getPathBuffer();
      int pathLen = walker.getPathLength();

      bases.addBase(o.getType(), pathBuf, pathLen, pathHash);
      addObject(o, pathHash);
      countingMonitor.update(1);
    }

    for (CachedPack p : cachedPacks) {
      for (ObjectId d : p.hasObject(objectsLists[Constants.OBJ_TREE])) {
        objectsMap.get(d).setEdge();
        typesToPrune |= 1 << Constants.OBJ_TREE;
      }
      for (ObjectId d : p.hasObject(objectsLists[Constants.OBJ_BLOB])) {
        objectsMap.get(d).setEdge();
        typesToPrune |= 1 << Constants.OBJ_BLOB;
      }
      for (ObjectId d : p.hasObject(objectsLists[Constants.OBJ_TAG])) {
        objectsMap.get(d).setEdge();
        typesToPrune |= 1 << Constants.OBJ_TAG;
      }
    }

    if (typesToPrune != 0) {
      pruneObjectList(typesToPrune, Constants.OBJ_COMMIT);
      pruneObjectList(typesToPrune, Constants.OBJ_TREE);
      pruneObjectList(typesToPrune, Constants.OBJ_BLOB);
      pruneObjectList(typesToPrune, Constants.OBJ_TAG);
    }

    for (CachedPack pack : cachedPacks)
      countingMonitor.update((int) pack.getObjectCount());
    countingMonitor.endTask();
  }

  private void pruneObjectList(int typesToPrune, int typeCode) {
    if ((typesToPrune & (1 << typeCode)) == 0)
      return;

    final List<ObjectToPack> list = objectsLists[typeCode];
    final int size = list.size();
    int src = 0;
    int dst = 0;

    for (; src < size; src++) {
      ObjectToPack obj = list.get(src);
      if (obj.isEdge())
        continue;
      if (dst != src)
        list.set(dst, obj);
      dst++;
    }

    while (dst < list.size())
      list.remove(dst);
  }

  private void useCachedPack(ObjectWalk walker, RevFlagSet keepOnRestart,
      List<RevObject> wantObj, List<RevObject> baseObj, CachedPack pack)
      throws MissingObjectException, IncorrectObjectTypeException,
      IOException {
    cachedPacks.add(pack);
    for (ObjectId id : pack.getTips())
      baseObj.add(walker.lookupOrNull(id));

    objectsMap.clear();
    objectsLists[Constants.OBJ_COMMIT] = new ArrayList<ObjectToPack>();

    setThin(true);
    walker.resetRetain(keepOnRestart);
    walker.sort(RevSort.TOPO);
    walker.sort(RevSort.BOUNDARY, true);

    for (RevObject id : wantObj)
      walker.markStart(id);
    for (RevObject id : baseObj)
      walker.markUninteresting(id);
  }

  private static boolean includesAllTips(CachedPack pack, RevFlag include,
      ObjectWalk walker) {
    for (ObjectId id : pack.getTips()) {
      if (!walker.lookupOrNull(id).has(include))
        return false;
    }
    return true;
  }

  /**
   * Include one object to the output file.
   * <p>
   * Objects are written in the order they are added. If the same object is
   * added twice, it may be written twice, creating a larger than necessary
   * file.
   *
   * @param object
   *            the object to add.
   * @throws IncorrectObjectTypeException
   *             the object is an unsupported type.
   */
  public void addObject(final RevObject object)
      throws IncorrectObjectTypeException {
    addObject(object, 0);
  }

  private void addObject(final RevObject object, final int pathHashCode)
      throws IncorrectObjectTypeException {
    final ObjectToPack otp;
    if (reuseSupport != null)
      otp = reuseSupport.newObjectToPack(object);
    else
      otp = new ObjectToPack(object);
    otp.setPathHash(pathHashCode);

    try {
      objectsLists[object.getType()].add(otp);
    } catch (ArrayIndexOutOfBoundsException x) {
      throw new IncorrectObjectTypeException(object,
          JGitText.get().incorrectObjectType_COMMITnorTREEnorBLOBnorTAG);
    } catch (UnsupportedOperationException x) {
      // index pointing to "dummy" empty list
      throw new IncorrectObjectTypeException(object,
          JGitText.get().incorrectObjectType_COMMITnorTREEnorBLOBnorTAG);
    }
    objectsMap.add(otp);
  }

  /**
   * Select an object representation for this writer.
   * <p>
   * An {@link ObjectReader} implementation should invoke this method once for
   * each representation available for an object, to allow the writer to find
   * the most suitable one for the output.
   *
   * @param otp
   *            the object being packed.
   * @param next
   *            the next available representation from the repository.
   */
  public void select(ObjectToPack otp, StoredObjectRepresentation next) {
    int nFmt = next.getFormat();
    int nWeight;
    if (otp.isReuseAsIs()) {
      // We've already chosen to reuse a packed form, if next
      // cannot beat that break out early.
      //
      if (PACK_WHOLE < nFmt)
        return; // next isn't packed
      else if (PACK_DELTA < nFmt && otp.isDeltaRepresentation())
        return; // next isn't a delta, but we are

      nWeight = next.getWeight();
      if (otp.getWeight() <= nWeight)
        return; // next would be bigger
    } else
      nWeight = next.getWeight();

    if (nFmt == PACK_DELTA && reuseDeltas) {
      ObjectId baseId = next.getDeltaBase();
      ObjectToPack ptr = objectsMap.get(baseId);
      if (ptr != null && !ptr.isEdge()) {
        otp.setDeltaBase(ptr);
        otp.setReuseAsIs();
        otp.setWeight(nWeight);
      } else if (thin && ptr != null && ptr.isEdge()) {
        otp.setDeltaBase(baseId);
        otp.setReuseAsIs();
        otp.setWeight(nWeight);
      } else {
        otp.clearDeltaBase();
        otp.clearReuseAsIs();
      }
    } else if (nFmt == PACK_WHOLE && config.isReuseObjects()) {
      otp.clearDeltaBase();
      otp.setReuseAsIs();
      otp.setWeight(nWeight);
    } else {
      otp.clearDeltaBase();
      otp.clearReuseAsIs();
    }

    otp.select(next);
  }

  /** Summary of how PackWriter created the pack. */
  public static class Statistics {
    long totalObjects;

    long totalDeltas;

    long reusedObjects;

    long reusedDeltas;

    /** @return formatted message string for display to clients. */
    public String getMessage() {
      return MessageFormat.format(JGitText.get().packWriterStatistics, //
          totalObjects, totalDeltas, //
          reusedObjects, reusedDeltas);
    }
  }
}
TOP

Related Classes of org.eclipse.jgit.storage.pack.PackWriter$Statistics

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.