Package cc.mallet.pipe

Examples of cc.mallet.pipe.TokenSequence2FeatureSequence


                       double featureVectorSizePoissonLambda,
                       double classInstanceCountPoissonLambda,
                       String[] classNames)
  {
    this (new SerialPipes (new Pipe[]  {
        new TokenSequence2FeatureSequence (),
        new FeatureSequence2FeatureVector (),
        new Target2Label()}));
    //classCentroidDistribution.print();
    Iterator<Instance> iter = new RandomTokenSequenceIterator (
        r, classCentroidDistribution,
View Full Code Here


    pipeList.add( new TokenSequenceRemoveStopwords(new File("stoplist/en.txt"), "UTF-8", false, false, false) );
    //add bigram words
    //pipeList.add(new TokenSequenceNGrams(new int[] {2} ));
       
    //convert to feature
    pipeList.add( new TokenSequence2FeatureSequence() );

    InstanceList instances = new InstanceList (new SerialPipes(pipeList));
    InstanceList testInstances = new InstanceList (instances.getPipe());
       
    Reader insfileReader = new InputStreamReader(new FileInputStream(new File(inputFileName)), "UTF-8");
View Full Code Here

        ArrayList pipeList = new ArrayList();

        pipeList.add(new CharSequence2TokenSequence(tokenPattern));
        pipeList.add(new TokenSequenceRemoveStopwords(false, false)); // we should use a real stop word list
        pipeList.add(new TokenSequenceNGramsDelim(sizes, " "));
        pipeList.add(new TokenSequence2FeatureSequence());
        return new SerialPipes(pipeList);
    }
View Full Code Here

        //  options: [case sensitive] [mark deletions]
        pipeList.add(new TokenSequenceRemoveStopwords(false, false));

        // Rather than storing tokens as strings, convert
        //  them to integers by looking them up in an alphabet.
        pipeList.add(new TokenSequence2FeatureSequence());

        // Do the same thing for the "target" field:
        //  convert a class label string to a Label object,
        //  which has an index in a Label alphabet.
        //pipeList.add(new Target2Label());
View Full Code Here

        //  options: [case sensitive] [mark deletions]
        pipeList.add(new TokenSequenceRemoveStopwords(false, false));

        // Rather than storing tokens as strings, convert
        //  them to integers by looking them up in an alphabet.
        pipeList.add(new TokenSequence2FeatureSequence());

        // Do the same thing for the "target" field:
        //  convert a class label string to a Label object,
        //  which has an index in a Label alphabet.
        //pipeList.add(new Target2Label());
View Full Code Here

TOP

Related Classes of cc.mallet.pipe.TokenSequence2FeatureSequence

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.