Examples of FNLPCorpus


Examples of org.fnlp.nlp.corpus.fnlp.FNLPCorpus

    String trainfile = datapath + "/FNLPDATA/train.dep";
    MyFiles.delete(testfile);
    MyFiles.delete(trainfile);
    MyFiles.delete(allfile);

    FNLPCorpus corpus = new FNLPCorpus();
    //读FNLP数据
    corpus.read(datapath + "/FNLPDATA/ctb7.dat", null);
    corpus.read(datapath + "/FNLPDATA/WeiboFTB(v1.0)-train.dat", null);
    //读自有数据
    corpus.readOurCorpus(datapath + "/ourdata",".txt","UTF8");

    corpus.writeOne(trainfile);

    TreeSet<String> allRelSet = new TreeSet<String>();

    TreeSet<String> set1 = corpus.getAllRelations();
    allRelSet.addAll(set1);

    //处理测试数据
    FNLPCorpus corpust = new FNLPCorpus();
    //读自有数据
    corpust.read(datapath + "/FNLPDATA/WeiboFTB(v1.0)-test.dat", null)
    corpust.writeOne(testfile);

    TreeSet<String> set2 = corpus.getAllRelations();
    allRelSet.addAll(set2);

    System.out.println(allRelSet);
View Full Code Here

Examples of org.fnlp.nlp.corpus.fnlp.FNLPCorpus

    //清理
    MyFiles.delete(dictfile);
    MyFiles.delete(segfile);
   

    FNLPCorpus corpus = new FNLPCorpus();
    //读自有数据
    corpus.readOurCorpus(datapath + "/ourdata",".txt","UTF8");
    //读分词文件
    corpus.readCWS(datapath + "/FNLPDATA/seg",".txt","UTF8")
    //读分词+词性文件
    corpus.readPOS(datapath + "/FNLPDATA/pos",".txt","UTF8")
    //读FNLP数据
    corpus.read(datapath + "/FNLPDATA/ctb7.dat", null);     
    corpus.read(datapath + "/FNLPDATA/WeiboFTB(v1.0)-train.dat", null);



    FNLP2BMES.w2BMES(corpus,segfile);   
    //FNLP2BMES.w2BMES(corpus,segfile_w); //?


    //词典转BMES
    //搜狗词典
    DICT dict = new DICT();
    String sougou = datapath + "/FNLPDATA/dict/SogouLabDic.dic.raw";

//    dict.readSougou(sougou,2,3,"sougou");
    //互动词典
    String hudong = datapath + "/FNLPDATA/dict/hudong.dic.all";
//    dict.readSougou(hudong,2,3,"");
    //添加其他词典
    dict.readDictionary(datapath + "/FNLPDATA/dict",".dic");

    //添加其他词典
//    dict.readDictionaryWithFrequency(datapath + "/FNLPDATA/dict",".dic.freq");




    //添加词性字典
    dict.readPOSDICT(datapath + "/FNLPDATA/词性字典", ".txt");
    dict.readPOSDICT(datapath + "/FNLPDATA/dict-sogou-input/txt", ".txt");

    dict.toBMES(dictfile,3);
    new File(dictfile).deleteOnExit();

    //合并训练文件
   
   
   
    List<File> files = MyFiles.getAllFiles(datapath + "/FNLPDATA/", ".seg");
    MyFiles.combine(trainfile,files.toArray(new File[files.size()]));
   
    //生成新字典   
    String dicfile = datapath + "/FNLPDATA/train.dict";
    DICT.BMES2DICT(trainfile,dicfile);

    //处理测试数据
    FNLPCorpus corpust = new FNLPCorpus();
    //读自有数据
    corpust.read(datapath + "/FNLPDATA/WeiboFTB(v1.0)-test.dat", null)
   
    FNLP2BMES.w2BMES(corpust,testfile);   
   
   
View Full Code Here

Examples of org.fnlp.nlp.corpus.fnlp.FNLPCorpus

  /**
   * @param args
   * @throws IOException
   */
  public static void main(String[] args) throws IOException {
    FNLPCorpus corpus = new FNLPCorpus();
    corpus.read("./tmpdata/FNLPDATA",".dat")
    String file = "./tmpdata/FNLPDATA/data-cws.txt";

    w2BMES(corpus, file);   
    System.out.println(new Date().toString());
    System.out.println("Done!");
View Full Code Here

Examples of org.fnlp.nlp.corpus.fnlp.FNLPCorpus

    charset = Charset.forName(charsetName);
    tc = new TagCorrect();
  }

  public void read() throws IOException {
    corpus = new FNLPCorpus();
    List<String> carrier = new ArrayList<String>();
    Iterator<File> it = files.iterator();
    while(it.hasNext()){
      BufferedReader bfr =null;
      File file = it.next();
View Full Code Here

Examples of org.fnlp.nlp.corpus.fnlp.FNLPCorpus

public class TrainTestSplit {

  public static void main(String[] args) throws Exception {

    String datapath = "../data";
    FNLPCorpus corpus = new FNLPCorpus();
    corpus.read(datapath + "/FNLPDATA/WeiboFTB(v1.0).dat", null);
   
    System.out.println(corpus.getDocumenNum());
    System.out.println(corpus.getSentenceNum());
    System.out.println(corpus.getAllPOS());
   
    FNLPDoc doc = corpus.docs.get(0);
    List<FNLPSent> train = doc.sentences.subList(0, 3000);
    List<FNLPSent> test = doc.sentences.subList(3000,doc.sentences.size());
   
    doc.sentences =  new LinkedList<FNLPSent>();
    doc.sentences.addAll(train);
    corpus.writeOne(datapath + "/FNLPDATA/WeiboFTB(v1.0)-train.dat");
    System.out.println(corpus.getSentenceNum());
    System.out.println(corpus.getAllPOS().size());
   
   
    doc.sentences =  new LinkedList<FNLPSent>();
    doc.sentences.addAll(test);
    corpus.writeOne(datapath + "/FNLPDATA/WeiboFTB(v1.0)-test.dat");
    System.out.println(corpus.getSentenceNum());
    System.out.println(corpus.getAllPOS().size());
  }
View Full Code Here

Examples of org.fnlp.nlp.corpus.fnlp.FNLPCorpus

    System.out.println("Done!");

  }
  static void trans(String path, String toPath) throws IOException,
  UnsupportedEncodingException, FileNotFoundException {
    FNLPCorpus corpus = new FNLPCorpus();
    corpus.read(path,".dat");   
    trans(corpus,toPath);
  }
View Full Code Here

Examples of org.fnlp.nlp.corpus.fnlp.FNLPCorpus

    String testfile = datapath + "/FNLPDATA/test.pos";
    (new File(testfile)).delete();
    String dictfile = datapath + "/FNLPDATA/dict.pos";
    (new File(dictfile)).delete();

    FNLPCorpus corpus = new FNLPCorpus();
    //读FNLP数据
    corpus.read(datapath + "/FNLPDATA/ctb7.dat", null);
    corpus.read(datapath + "/FNLPDATA/WeiboFTB(v1.0)-train.dat", null);

    //读分词+词性文件
    corpus.readPOS(datapath + "/FNLPDATA/pos",".txt","UTF8")
    //读自有数据
    corpus.readOurCorpus(datapath + "/ourdata",".txt","UTF8");

    FNLP2POS.trans(corpus,ctbfile);

    allpostSet.addAll(corpus.getAllPOS());

   


    //读字典
    DictPOS dp = new DictPOS();
    String out = datapath + "/FNLPDATA/dict.pos";
//    dp.loadPath(datapath + "/FNLPDATA/词性字典",".txt");
//    dp.loadPath(datapath + "/FNLPDATA/dict-sogou-input/txt", ".txt");
    dp.save(out);
   
    allpostSet.addAll(dp.getPosSet());


    //合并

    List<File> files = MyFiles.getAllFiles(datapath + "/FNLPDATA/", ".pos");

    MyFiles.combine(trainfile,files.toArray(new File[files.size()]))


    //处理测试数据
    FNLPCorpus corpust = new FNLPCorpus();
    //读自有数据
    corpust.read(datapath + "/FNLPDATA/WeiboFTB(v1.0)-test.dat", null)
   
    allpostSet.addAll(corpust.getAllPOS());
   
   
    System.out.println(allpostSet);

    FNLP2POS.trans(corpust,testfile);
View Full Code Here

Examples of org.fnlp.nlp.corpus.fnlp.FNLPCorpus

  /**
   * @param args
   * @throws IOException
   */
  public static void main(String[] args) throws IOException {
    FNLPCorpus corpus = new FNLPCorpus();
    corpus.read("./data/FNLPDATA/ctb7.dat",null);
    String pattern  = "北京";
    Pattern p = Pattern.compile(pattern);
    ArrayList<FNLPSent> sents = new ArrayList<FNLPSent> ();
   
   
View Full Code Here

Examples of org.fnlp.nlp.corpus.fnlp.FNLPCorpus

  private int curSentNo;
  private int curDocNo;
  private FNLPDoc curDoc;

  public FNLPReader(String filepath) throws IOException {
    corpus = new FNLPCorpus();
    corpus.read(filepath, null);
    size = corpus.getDocumenNum();
    curDocNo = 0;
    curSentNo = 0;
   
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.