Examples of BayesClassifier


Examples of com.flaptor.hounder.classifier.bayes.BayesClassifier

       
    }
   
    public boolean reloadProbabilities(){
        for (String cat: config.getCategoryList()){
            classifiers.put(cat, new BayesClassifier(config.getBaseDir(), cat));
        }
        return true;
    }
View Full Code Here

Examples of com.flaptor.hounder.classifier.bayes.BayesClassifier

    }
   
    public Map<String,Map<String, Double>> verify(String catName, boolean loadMaps)
    throws UnsupportedEncodingException{
        if (loadMaps) loadIncludedNotIncludedUrls(catName);
        BayesClassifier classifier= new BayesClassifier(config.getBaseDir(), catName);
        if (classifier.isProbabilitiesFileEmpty()){
            return null;
        }

        Map<String,Map<String, Double>> mp= new HashMap<String,Map<String, Double>>();       
        mp.put("uinc_cinc", new HashMap<String, Double>());
        mp.put("unot_cnot", new HashMap<String, Double>());
        mp.put("uinc_cnot", new HashMap<String, Double>());
        mp.put("unot_cinc", new HashMap<String, Double>());
       
        // traverse the list of included urls and check what the classifier say
        for (String url: includedUrlsList){
            String item=cache.getItem(url);
            if (null==item){
                LOGGER.warn("Page " + url + "is in included for " + catName + " but not in cache");
                continue;
            }
            double classifierScore = classifier.classify(DocumentParser.parse(item, classifier.getMaxTuple()));
            boolean classifierIncluded = (classifierScore > 0.5);
            addToIncNotIncMap(mp, url, true, classifierIncluded, classifierScore);           
        }
        // traverse the list of not included urls and check what the classifier say
        for (String url: notIncludedUrlsList){
            String item=cache.getItem(url);
            if (null==item){
                LOGGER.warn("Page " + url + "is in notIncluded for " + catName + " but not in cache");
                continue;
            }
            double classifierScore = classifier.classify(DocumentParser.parse(item, classifier.getMaxTuple()));
            boolean classifierIncluded = (classifierScore > 0.5);
            addToIncNotIncMap(mp, url, false, classifierIncluded, classifierScore);           
        }
        return mp;
    }
View Full Code Here

Examples of org.apache.mahout.classifier.bayes.BayesClassifier

      log.info("Testing Complementary Bayes Classifier");
      model = new CBayesModel();
    } else {
      throw new IllegalArgumentException("Unrecognized classifier type: " + classifierType);
    }
    Classifier classifier = new BayesClassifier();

    SequenceFileModelReader.loadModel(model, fs, modelPaths, conf);

    log.info("Done loading model: # labels: {}", model.getLabels().size());

    log.info("Done generating Model");


    String defaultCat = "unknown";
    if (cmdLine.hasOption(defaultCatOpt)) {
      defaultCat = (String) cmdLine.getValue(defaultCatOpt);
    }
    File docPath = new File((String) cmdLine.getValue(classifyOpt));
    String encoding = "UTF-8";
    if (cmdLine.hasOption(encodingOpt)) {
      encoding = (String) cmdLine.getValue(encodingOpt);
    }
    Analyzer analyzer = null;
    if (cmdLine.hasOption(analyzerOpt)) {
      String className = (String) cmdLine.getValue(analyzerOpt);
      analyzer = Class.forName(className).asSubclass(Analyzer.class).newInstance();
    }
    if (analyzer == null) {
      analyzer = new StandardAnalyzer();
    }

    int gramSize = 1;
    if (cmdLine.hasOption(gramSizeOpt)) {
      gramSize = Integer.parseInt((String) cmdLine
          .getValue(gramSizeOpt));

    }

    log.info("Converting input document to proper format");
    String[] document = BayesFileFormatter.readerToDocument(analyzer, new InputStreamReader(new FileInputStream(docPath), Charset.forName(encoding)));
    StringBuilder line = new StringBuilder();
    for(String token : document)
    {
      line.append(token).append(' ');
    }
    List<String> doc = Model.generateNGramsWithoutLabel(line.toString(), gramSize) ;
    log.info("Done converting");
    log.info("Classifying document: {}", docPath);
    ClassifierResult category = classifier.classify(model, doc.toArray(new String[doc.size()]), defaultCat);
    log.info("Category for {} is {}", docPath, category);

  }
View Full Code Here

Examples of org.fnlp.ml.classifier.bayes.BayesClassifier

    System.out.print("..Spliting complete!\n");

    System.out.print("Training...\n");
    af.setStopIncrement(true);
    BayesTrainer trainer=new BayesTrainer();
    BayesClassifier classifier= (BayesClassifier) trainer.train(trainset);
    System.out.print("..Training complete!\n");
    System.out.print("Saving model...\n");
    classifier.saveTo(bayesModelFile)
    classifier = null;
    System.out.print("..Saving model complete!\n");
    /**
     * 测试
     */
    System.out.print("Loading model...\n");
    BayesClassifier bayes;
    bayes =BayesClassifier.loadFrom(bayesModelFile);
    System.out.print("..Loading model complete!\n");
   
    System.out.println("Testing Bayes...");
    int flag=0;
    float[] percents_cs=new float[]{1.0f,0.9f,0.8f,0.7f,0.5f,0.3f,0.2f,0.1f};
    int[] counts_cs=new int[10];
    for(int test=0;test<percents_cs.length;test++){
      System.out.println("Testing Bayes"+percents_cs[test]+"...");
      if(test!=0)
        bayes.fS_CS(percents_cs[test]);
      int count=0;
      for(int i=0;i<testset.size();i++){
        Instance data = testset.getInstance(i);
        Integer gold = (Integer) data.getTarget();
        Predict<String> pres=bayes.classify(data, Type.STRING, 3);
        String pred_label=pres.getLabel();
        String gold_label = bayes.getLabel(gold);
       
        if(pred_label.equals(gold_label)){
          count++;
        }
        else{
          flag=i;
//          System.err.println(gold_label+"->"+pred_label+" : "+testset.getInstance(i).getTempData());
//          for(int j=0;j<3;j++)
//            System.out.println(pres.getLabel(j)+":"+pres.getScore(j));
        }
      }
      counts_cs[test]=count;
      System.out.println("Bayes Precision("+percents_cs[test]+"):"
      +((float)count/testset.size())+"("+count+"/"+testset.size()+")");
    }
    bayes.noFeatureSelection();
    float[] percents_csmax=new float[]{1.0f,0.9f,0.8f,0.7f,0.5f,0.3f,0.2f,0.1f};
    int[] counts_csmax=new int[10];
    for(int test=0;test<percents_csmax.length;test++){
      System.out.println("Testing Bayes"+percents_csmax[test]+"...");
      if(test!=0)
        bayes.fS_CS_Max(percents_csmax[test]);
      int count=0;
      for(int i=0;i<testset.size();i++){
        Instance data = testset.getInstance(i);
        Integer gold = (Integer) data.getTarget();
        Predict<String> pres=bayes.classify(data, Type.STRING, 3);
        String pred_label=pres.getLabel();
        String gold_label = bayes.getLabel(gold);
       
        if(pred_label.equals(gold_label)){
          count++;
        }
        else{
//          System.err.println(gold_label+"->"+pred_label+" : "+testset.getInstance(i).getTempData());
//          for(int j=0;j<3;j++)
//            System.out.println(pres.getLabel(j)+":"+pres.getScore(j));
        }
      }
      counts_csmax[test]=count;
      System.out.println("Bayes Precision("+percents_csmax[test]+"):"
      +((float)count/testset.size())+"("+count+"/"+testset.size()+")");
    }
    bayes.noFeatureSelection();
    float[] percents_ig=new float[]{1.0f,0.9f,0.8f,0.7f,0.5f,0.3f,0.2f,0.1f};
    int[] counts_ig=new int[10];
    for(int test=0;test<percents_ig.length;test++){
      System.out.println("Testing Bayes"+percents_ig[test]+"...");
      if(test!=0)
        bayes.fS_IG(percents_ig[test]);
      int count=0;
      for(int i=0;i<testset.size();i++){
        Instance data = testset.getInstance(i);
        Integer gold = (Integer) data.getTarget();
        Predict<String> pres=bayes.classify(data, Type.STRING, 3);
        String pred_label=pres.getLabel();
        String gold_label = bayes.getLabel(gold);
       
        if(pred_label.equals(gold_label)){
          count++;
        }
        else{
View Full Code Here

Examples of org.fnlp.ml.classifier.bayes.BayesClassifier

    InstanceSet testset = splitsets[1]
    System.out.print("..Spliting complete!\n");

    System.out.print("Training...\n");
    BayesTrainer trainer=new BayesTrainer();
    BayesClassifier classifier= (BayesClassifier) trainer.train(trainset);
    pp.removeTargetPipe();
    classifier.setPipe(pp);
    af.setStopIncrement(true);
    System.out.print("..Training complete!\n");
    System.out.print("Saving model...\n");
    classifier.saveTo(bayesModelFile)
    classifier = null;
    System.out.print("..Saving model complete!\n");
    /**
     * 测试
     */
    System.out.print("Loading model...\n");
    BayesClassifier bayes;
    bayes =BayesClassifier.loadFrom(bayesModelFile);
//    bayes =classifier;
    System.out.print("..Loading model complete!\n");
   
    System.out.println("Testing Bayes...");
    int count=0;
    for(int i=0;i<testset.size();i++){
      Instance data = testset.getInstance(i);
      Integer gold = (Integer) data.getTarget();
      Predict<String> pres=bayes.classify(data, Type.STRING, 3);
      String pred_label=pres.getLabel();
//      String pred_label = bayes.getStringLabel(data);
      String gold_label = bayes.getLabel(gold);
     
      if(pred_label.equals(gold_label)){
        //System.out.println(pred_label+" : "+testsetbayes.getInstance(i).getTempData());
        count++;
      }
View Full Code Here

Examples of org.fnlp.ml.classifier.bayes.BayesClassifier

   */
  private static String modelFile = "../example-data/text-classification/modelBayes2.gz";

  public static void main(String[] args) throws Exception {
   
    BayesClassifier bayes;
    bayes =BayesClassifier.loadFrom(modelFile);
   
    /**
     * 分类器使用
     */
    String str = "韦德:不拿冠军就是失败 詹皇:没拿也不意味失败";
    System.out.println("============\n分类:"+ str);
    Pipe p = bayes.getPipe();
    Instance inst = new Instance(str);
    try {
      //特征转换
      p.addThruPipe(inst);
    } catch (Exception e) {
      e.printStackTrace();
    }
    String res = bayes.getStringLabel(inst);
    System.out.println("xxx")
    System.out.println("类别:"+ res)
    //建立字典管理器
    AlphabetFactory af = AlphabetFactory.buildFactory();
   
    //使用n元特征
    Pipe ngrampp = new NGram(new int[] {1,2});
    //分词
//    CWSTagger tag = new CWSTagger("../models/seg.m");
//    Pipe segpp=new CNPipe(tag);
    //将字符特征转换成字典索引
    Pipe indexpp = new StringArray2IndexArray(af)
    Pipe sparsepp=new StringArray2SV(af);
    //将目标值对应的索引号作为类别
    Pipe targetpp = new Target2Label(af.DefaultLabelAlphabet())
    //建立pipe组合
    SeriesPipes pp = new SeriesPipes(new Pipe[]{ngrampp,targetpp,sparsepp});
   
    InstanceSet instset = new InstanceSet(pp,af);
   
    //用不同的Reader读取相应格式的文件
    Reader reader = new FileReader(trainDataPath,"UTF-8",".data");
   
    //读入数据,并进行数据处理
    instset.loadThruStagePipes(reader);
    //将数据集分为训练是和测试集
    float percent = 0.8f;
    InstanceSet[] splitsets = instset.split(percent);
   
    InstanceSet trainset = splitsets[0];
    InstanceSet testset = splitsets[1]

    /**
     * 测试
     */
    System.out.println("类别 : 文本内容");
    System.out.println("===================");
    for(int i=0;i<testset.size();i++){
      Instance data = testset.getInstance(i);
     
      Integer gold = (Integer) data.getTarget();
      Predict<String> pres=bayes.classify(data, Type.STRING, 3);
      String pred_label=pres.getLabel();
//      String pred_label = bayes.getStringLabel(data);
      String gold_label = bayes.getLabel(gold);
     
      if(pred_label.equals(gold_label))
        System.out.println(pred_label+" : "+testset.getInstance(i).getSource());
      else
        System.err.println(gold_label+"->"+pred_label+" : "+testset.getInstance(i).getSource());
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.