Package io.fathom.cloud.storage

Source Code of io.fathom.cloud.storage.CompactOperation

package io.fathom.cloud.storage;

import io.fathom.cloud.blobs.BlobData;
import io.fathom.cloud.blobs.BlobStore;
import io.fathom.cloud.blobs.TempFile;
import io.fathom.cloud.io.HashingOutputStream;
import io.fathom.cloud.protobuf.FileModel.FileRange;
import io.fathom.cloud.protobuf.FileModel.FileData.Builder;
import io.fathom.cloud.server.model.Project;

import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.List;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.collect.Lists;
import com.google.common.hash.Hasher;
import com.google.common.hash.Hashing;
import com.google.protobuf.ByteString;

/**
* We create a CompactOperation so that we can (in future) remember that we've
* combined blocks
*
*/
public class CompactOperation {

    private static final Logger log = LoggerFactory.getLogger(CompactOperation.class);

    private static final boolean PARANOID = true;

    final Project project;
    final String bucketName;
    final String name;

    public CompactOperation(Project project, String bucketName, String name) {
        super();
        this.project = project;
        this.bucketName = bucketName;
        this.name = name;
    }

    public Project getProject() {
        return project;
    }

    public String getBucketName() {
        return bucketName;
    }

    public String getName() {
        return name;
    }

    public boolean compact(FileServiceInternal fs, Builder file) throws IOException {
        int n = file.getRangesCount();

        // Quick sanity check
        if (n < 2) {
            return false;
        }

        int bestStart = 0;
        int bestEnd = -1;
        float bestScore = Float.MAX_VALUE;

        // Try to bring it down to a small number of segments; we don't want to
        // immediately re-compact
        int minLength = Math.max(n - FileServiceImpl.COMPACTION_THRESHOLD / 2, 2);
        log.debug("Chose minimum merge length of {}", minLength);

        for (int start = 0; start < n; start++) {
            long maxSize = 0;
            long minSize = Long.MAX_VALUE;
            long totalSize = 0;

            for (int end = start; end < n; end++) {
                FileRange range = file.getRanges(end);

                long size = range.getEnd() - range.getStart();
                maxSize = Math.max(maxSize, size);
                minSize = Math.min(minSize, size);
                totalSize += size;
                int len = end - start;

                if (len >= minLength) {
                    // This is loosely/directly based on Lucene's tiered merge
                    // policy

                    // final float skew = ((float) maxSize) / ((float) minSize);
                    final float skew = ((float) maxSize) / ((float) totalSize);

                    // Strongly favor merges with less skew (smaller
                    // mergeScore is better):
                    float mergeScore = skew;

                    // Gently favor smaller merges over bigger ones. We
                    // don't want to make this exponent too large else we
                    // can end up doing poor merges of small segments in
                    // order to avoid the large merges:
                    mergeScore *= Math.pow(totalSize, 0.05);

                    if (mergeScore < bestScore) {
                        bestScore = mergeScore;
                        bestStart = start;
                        bestEnd = end;
                    }

                    // log.debug("{} - {} => {}", new Object[] { start, end,
                    // mergeScore });
                }

            }
        }

        if (bestEnd == -1) {
            log.warn("Unable to find any merges!");
            return false;
        }

        for (int i = 0; i < n; i++) {
            FileRange range = file.getRanges(i);
            long len = range.getEnd() - range.getStart();
            log.info("{} {}", i, len);
        }
        log.info("Chose merge {}-{}", bestStart, bestEnd);

        BlobStore blobStore = fs.getBlobStore(project);
        List<FileRange> newRanges = Lists.newArrayList();

        for (int i = 0; i < bestStart; i++) {
            newRanges.add(file.getRanges(i));
        }

        try (TempFile tempFile = TempFile.create()) {
            FileRange.Builder c = FileRange.newBuilder();

            Hasher md5 = Hashing.md5().newHasher();
            try (OutputStream fos = new HashingOutputStream(new FileOutputStream(tempFile.getFile()), md5)) {
                for (int i = bestStart; i < bestEnd; i++) {
                    FileRange range = file.getRanges(i);
                    if (i == bestStart) {
                        c.setStart(range.getStart());
                    }
                    if (i == (bestEnd - 1)) {
                        c.setEnd(range.getEnd());
                    }

                    final BlobData blob = blobStore.find(range.getContentKey());
                    if (blob == null) {
                        throw new IOException("Unable to open storage for range: " + range);
                    }

                    blob.copyTo(fos);
                }
            }

            if (!c.hasStart() || !c.hasEnd()) {
                throw new IllegalStateException();
            }

            ByteString hash = ByteString.copyFrom(md5.hash().asBytes());
            BlobData blobData = new BlobData(tempFile.getFile(), hash);
            blobStore.put(blobData);

            if (PARANOID) {
                BlobData blob = blobStore.find(blobData.getHash());
                if (blob == null) {
                    throw new IllegalStateException();
                }
                ByteString checkHash = ByteString.copyFrom(blob.hash(Hashing.md5()).asBytes());
                if (!blobData.getHash().equals(checkHash)) {
                    log.warn("Hash mismatch: {} vs {}", blobData.getHash(), checkHash);
                    throw new IllegalStateException();
                }
            }
            c.setContentKey(blobData.getHash());

            newRanges.add(c.build());
        }

        for (int i = bestEnd; i < file.getRangesCount(); i++) {
            newRanges.add(file.getRanges(i));
        }

        file.clearRanges();
        file.addAllRanges(newRanges);

        if (PARANOID) {
            FileServiceImpl.sanityCheck(blobStore, file);
            log.debug("File before modification OK");
        }

        if (PARANOID) {
            FileServiceImpl.sanityCheck(blobStore, file);
            log.debug("File after modification OK");
        }

        // log.info("New file: {}", file.clone().build());

        return true;
    }

    public String getDebugPath() {
        String path = project.getId() + "/" + bucketName + "/" + name;
        return path;
    }

    @Override
    public String toString() {
        return "CompactOperation [path=" + getDebugPath() + "]";
    }

}
TOP

Related Classes of io.fathom.cloud.storage.CompactOperation

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.