Package com.mapr.synth.samplers

Source Code of com.mapr.synth.samplers.FileSampler

package com.mapr.synth.samplers;

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.JsonNodeFactory;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.common.base.Charsets;
import com.google.common.base.Preconditions;
import com.google.common.base.Splitter;
import com.google.common.collect.Lists;
import com.google.common.io.*;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.Iterator;
import java.util.List;

/**
* Samples lines from a file
*
* Thread safe for sampling
*/
public class FileSampler extends FieldSampler {
    private JsonNode data;
    private IntegerSampler index;
    private int skew = Integer.MAX_VALUE;

    public FileSampler() {
    }

    public void setFile(String lookup) throws IOException {
        if (lookup.matches(".*\\.json")) {
            readJsonData(Files.newInputStreamSupplier(new File(lookup)));
        } else {
            List<String> lines = Files.readLines(new File(lookup), Charsets.UTF_8);
            readDelimitedData(lookup, lines);
        }

        setupIndex();
    }

    private void setupIndex() {
        index = new IntegerSampler();
        index.setMin(0);
        index.setMax(data.size());
        if (skew != Integer.MAX_VALUE) {
            index.setSkew(skew);
        }
    }

    @SuppressWarnings({"UnusedDeclaration"})
    public void setResource(String lookup) throws IOException {
        if (lookup.matches(".*\\.json")) {
            readJsonData(Resources.newInputStreamSupplier(Resources.getResource(lookup)));
        } else {
            List<String> lines = Resources.readLines(Resources.getResource(lookup), Charsets.UTF_8);
            readDelimitedData(lookup, lines);
        }

        setupIndex();
    }

    private void readDelimitedData(String lookup, List<String> lines) {
        Splitter splitter;
        if (lookup.matches(".*\\.csv")) {
            splitter = Splitter.on(",");
        } else if (lookup.matches(".*\\.tsv")) {
            splitter = Splitter.on("\t");
        } else {
            throw new IllegalArgumentException("Must have file with .csv, .tsv or .json suffix");
        }

        List<String> names = Lists.newArrayList(splitter.split(lines.get(0)));
        JsonNodeFactory nf = JsonNodeFactory.withExactBigDecimals(false);
        ArrayNode localData = nf.arrayNode();
        for (String line : lines.subList(1, lines.size())) {
            ObjectNode r = nf.objectNode();
            List<String> fields = Lists.newArrayList(splitter.split(line));
            Preconditions.checkState(names.size() == fields.size(), "Wrong number of fields, expected ", names.size(), fields.size());
            Iterator<String> ix = names.iterator();
            for (String field : fields) {
                r.put(ix.next(), field);
            }
            localData.add(r);
        }
        data = localData;
    }

    private void readJsonData(InputSupplier<? extends InputStream> input) throws IOException {
        ObjectMapper om = new ObjectMapper();
        try (InputStream in = input.getInput()) {
            data = om.readTree(in);
        }
    }

    /**
     * Sets the amount of skew.  Skew is added by taking the min of several samples.
     * Setting power = 0 gives uniform distribution, setting it to 5 gives a very
     * heavily skewed distribution.
     * <p/>
     * If you set power to a negative number, the skew is reversed so large values
     * are preferred.
     *
     * @param skew Controls how skewed the distribution is.
     */
    @SuppressWarnings({"UnusedDeclaration"})
    public void setSkew(int skew) {
        if (index != null) {
            index.setSkew(skew);
        } else {
            this.skew = skew;
        }
    }

    @Override
    public JsonNode sample() {
      synchronized (this) {
        return data.get(index.sample().asInt());
      }
    }
}
TOP

Related Classes of com.mapr.synth.samplers.FileSampler

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.