Package com.digitalpebble.classification

Examples of com.digitalpebble.classification.FileTrainingCorpus


    // rewrite a raw file so that only a subset of the fields are kept

    public static void filterFields(File trainingCorpus, File newRawFile,
            int[] fieldsToKeep) throws IOException {
        FileTrainingCorpus ftc = new FileTrainingCorpus(trainingCorpus);
        Writer writer = new BufferedWriter(new FileWriter(newRawFile));
        Iterator<Document> iterator = ftc.iterator();
        while (iterator.hasNext()) {
            MultiFieldDocument doc = (MultiFieldDocument) iterator.next();
            String representation = doc.getStringSerialization(fieldsToKeep);
            writer.write(representation);
        }
View Full Code Here


    public static void dumpBestAttributes(String raw, String lexiconF)
            throws IOException {
        // load the corpus + the lexicon
        // load the lexicon and the raw file
        Lexicon lexicon = new Lexicon(lexiconF);
        FileTrainingCorpus corpus = new FileTrainingCorpus(new File(raw));
        AttributeScorer scorer = logLikelihoodAttributeScorer.getScorer(corpus,
                lexicon);
    }
View Full Code Here

        WeightingMethod method = WeightingMethod
                .methodFromString(weightingScheme);
        lexicon.setMethod(method);

        // get the raw file
        FileTrainingCorpus ftc = new FileTrainingCorpus(new File(raw));

        int keepNBestAttributes = Integer.parseInt(props.getProperty(
                "keepNBestAttributes", "-1"));

        if (keepNBestAttributes != -1) {
View Full Code Here

TOP

Related Classes of com.digitalpebble.classification.FileTrainingCorpus

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.