Package ch.akuhn.hapax.index

Examples of ch.akuhn.hapax.index.TermDocumentMatrix.toLowerCase()


    }


    public TermDocumentMatrix makeTDM() {
        TermDocumentMatrix tdm = corpus;
        if (ignoreCase) tdm = tdm.toLowerCase();
        if (rejectRareTerms) tdm = tdm.rejectHapaxes();
        if (rejectStopwords) tdm = tdm.rejectStopwords();
        // TODO if (rejectCommonTerms) tdm = tdm.rejectCommonTerms();
        return tdm.weight(local, global);
    }
View Full Code Here


   
    @Test
    @Given("#makeTermDocumentMatrix")
    public TermDocumentMatrix rejectStopWords(final TermDocumentMatrix matrix) {
        TermDocumentMatrix tdm = matrix;
        tdm = tdm.toLowerCase();
        assertEquals(9, tdm.documentCount());
        assertEquals(42, tdm.termCount());
        tdm = tdm.rejectHapaxes();
        assertEquals(9, tdm.documentCount());
        assertEquals(16, tdm.termCount());
View Full Code Here

        assertEquals(9, tdm.documentCount());
        assertEquals(42, tdm.termCount());
        tdm = tdm.rejectHapaxes();
        assertEquals(9, tdm.documentCount());
        assertEquals(16, tdm.termCount());
        tdm = tdm.toLowerCase().rejectStopwords();
        assertEquals(9, tdm.documentCount());
        assertEquals(12, tdm.termCount());
        assertEquals(SORTED, Get.sorted(tdm.terms().elementSet()).toString());
        return tdm;
    }
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.