Package opennlp.ccg.parse.tagger.util

Examples of opennlp.ccg.parse.tagger.util.ResultSink


      ht.betas = new double[1];
      ht.betas[0] = beta;
    }
    resBetas = new ArrayList<ResultSink>(ht.betas.length);
    for(int i = 0; i < ht.betas.length; i++) {
      ResultSink r = new ResultSink();
      resBetas.add(r);
    }
    ArrayList<BufferedWriter> errFiles = new ArrayList<BufferedWriter>();
    for(int i = 0; i < ht.betas.length; i++) {
      BufferedWriter b = new BufferedWriter(new FileWriter(new File("tagdict.err.out." + i)));
View Full Code Here


                if (args[i].equals("-c")) { configFile = args[++i]; continue; }
                if (args[i].equals("-beta")) { beta = Double.parseDouble(args[++i]); continue; }
                System.out.println("Unrecognized option: " + args[i]);
            }

            ResultSink rs = new ResultSink(ResultSink.ResultSinkType.SUPERTAG);
            try {               
                in = new SRILMFactoredBundleCorpusIterator(
                        (inputCorp.equals("<stdin>")) ?
                            new BufferedReader(new InputStreamReader(System.in)) :
                            new BufferedReader(new FileReader(new File(inputCorp))));               
            } catch (FileNotFoundException ex) {
                System.err.print("Input corpus " + inputCorp + " not found.  Exiting...");
                Logger.getLogger(WordAndPOSDictionaryLabellingStrategy.class.getName()).log(Level.SEVERE, null, ex);
                System.exit(-1);
            }

            try {
                out = (output.equals("<stdout>")) ? new BufferedWriter(new OutputStreamWriter(System.out)) : new BufferedWriter(new FileWriter(new File(output)));
            } catch (IOException ex) {
                System.err.print("Output file " + output + " not found.  Exiting...");
                Logger.getLogger(WordAndPOSDictionaryLabellingStrategy.class.getName()).log(Level.SEVERE, null, ex);
                System.exit(-1);
            }

            WordAndPOSDictionaryLabellingStrategy stgger = WordAndPOSDictionaryLabellingStrategy.supertaggerFactory(configFile);
           
            // for each sentence, print out:
            // <s>
            // w1   <numPOSTags>    <posTag1>   ... <posTagK>   <numSupertags>  <supertag1> ... <supertagL>
            // ...
            // wN   <numPOSTags>    <posTag1>   ... <posTagM>   <numSupertags>  <supertag1> ... <supertagU>
            // </s>
            for (List<Word> inLine : in) {
               
                List<List<Pair<Double,String>>> taggedSent = stgger.multitag(inLine, beta);
                if(test) { rs.addSent(taggedSent, inLine); }
                // beginning of sentence...
                out.write("<s>" + System.getProperty("line.separator"));               
                List<TaggedWord> posTagging = stgger.getCurrentTagging();
                int cursor = -1;
                while(++cursor < taggedSent.size()) {
                    Word wdIn = inLine.get(cursor);
                    // word form...
                    out.write(wdIn.getForm());
                    TaggedWord posT = posTagging.get(cursor);
                    // print out number of POS tags, followed by tab-separated probabilized POS tagging.
                    out.write("\t" + posT.getPOSTagging().size());
                    for(Pair<Double,String> pt : posT.getPOSTagging()) {
                        out.write("\t" + pt.b + "\t" + pt.a);
                    }
                    // now print out number of and list of tab-separated, probabilized supertags.
                    out.write("\t" + taggedSent.get(cursor).size());
                    for(Pair<Double,String> stg : taggedSent.get(cursor)) {
                        out.write("\t" + stg.b + "\t" + stg.a);
                    }
                    out.write(System.getProperty("line.separator"));
                }
                out.write("</s>" + System.getProperty("line.separator"));
            }
            out.flush();

            if(test) { System.err.println(rs.report()); }
        } catch (IOException ex) {
            Logger.getLogger(WordAndPOSDictionaryLabellingStrategy.class.getName()).log(Level.SEVERE, null, ex);
        } finally {
            try {
                out.close();
View Full Code Here

             
              BufferedWriter outf = new BufferedWriter(new FileWriter(options.valueOf(outputspec)));
             
              boolean test = options.has("test");
             
              ResultSink results = new ResultSink();
              int sentCnt = 0;
             
              tagger.setBetas(new double[] {beta});
             
              while(corpus.hasNext()) {
                  sentCnt++;
                  List<Word> sent = corpus.next();
                 
                  List<List<Pair<Double,String>>> taggings = tagger.multitag(sent, beta);
                 
                  if(test) {
                      List<Word> goldsent = goldCorpus.next();
                      results.addSent(taggings, goldsent);
                  }                 
                 
                  Iterator<Word> sentiter = sent.iterator();
                  // output file format = word goldtag tag1 ... tagK                 
                  outf.write("<s>"+System.getProperty("line.separator"));
                  for(List<Pair<Double,String>> tagging : taggings) {                     
                      Word nextw = sentiter.next();
                      outf.write(nextw.getForm() + "\t1\t" + nextw.getPOS() + "\t1.0\t" + tagging.size() + "\t");// + nextw.getSupertag() + " ");
                      //outf.write(nextw.getForm() + "|||"+ nextw.getStem() + "|||" + nextw.getPOS() + "|||");
                      String tags = "";
                      for(Pair<Double,String> tg : tagging) {
                          //tags+="^"+tg.b+":"+tg.a;
                          tags+= "\t" + tg.b + "\t"+tg.a;
                      }
                      // write out the multitagging, minus the initial space (tab).
                      outf.write(tags.substring(1) + System.getProperty("line.separator"));
                     
                      //// write out the multitagging, minus the initial ^.
                      //outf.write(tags.substring(1) + " ");
                  }                 
                               
                  outf.write("</s>"+System.getProperty("line.separator"));
                  if(sentCnt % 10 == 0) {
                      outf.flush();
                  }
              }
              outf.flush();
              outf.close();
              if(test) {
                  System.err.println(results.report());
              }
              long end = System.currentTimeMillis();
              System.err.println("Time to tag: " + ((end - start + 0.0)/1000) + " seconds.");
             
            } else if (options.has("tagdictextract")) {
View Full Code Here

                if (args[i].equals("-e")) { test = true; continue; }
                if (args[i].equals("-c")) { configFile = args[++i]; continue; }
                System.out.println("Unrecognized option: " + args[i]);
            }

            ResultSink rs = new ResultSink(ResultSink.ResultSinkType.POSTAG);
           
            try {                       
                inp = new SRILMFactoredBundleCorpusIterator(
                        (inputCorp.equals("<stdin>")) ?
                            new BufferedReader(new InputStreamReader(System.in)) :
                            new BufferedReader(new FileReader(new File(inputCorp))));               
            } catch (FileNotFoundException ex) {
                System.err.print("Input corpus " + inputCorp + " not found.  Exiting...");
                Logger.getLogger(POSPriorModel.class.getName()).log(Level.SEVERE, null, ex);
                System.exit(-1);
           

            try {
                out = (output.equals("<stdout>")) ? new BufferedWriter(new OutputStreamWriter(System.out)) : new BufferedWriter(new FileWriter(new File(output)));
            } catch (IOException ex) {
                System.err.print("Output file " + output + " not found.  Exiting...");
                Logger.getLogger(POSPriorModel.class.getName()).log(Level.SEVERE, null, ex);
                System.exit(-1);
            }

            POSTagger post = POSTagger.posTaggerFactory(configFile);
           
            for (List<Word> inLine : inp) {
                List<TaggedWord> taggedSent = post.tagSentence(inLine);
                List<List<Pair<Double,String>>> sentTagging = new ArrayList<List<Pair<Double,String>>>(taggedSent.size());
                for(TaggedWord tw : taggedSent) { sentTagging.add(tw.getPOSTagging()); }
                if(test) { rs.addSent(sentTagging, inLine); }
                out.write("<s>" + System.getProperty("line.separator"));
                for(TaggedWord tw : taggedSent) {
                    out.write(tw.getForm());
                    for(Pair<Double,String> tg : tw.getPOSTagging()) {
                        out.write("\t" + tg.b + "\t" + tg.a);
                    }
                    out.write(System.getProperty("line.separator"));
                }
                out.write("</s>" + System.getProperty("line.separator"));
            }
            out.flush();

            if(test) { System.err.println(rs.report()); }
        } catch(Throwable t) {
            t.printStackTrace();
        } finally {
            try {               
                inp.close();
View Full Code Here

        if (args[i].equals("-m")) { modelFile = args[++i]; continue; }
        if (args[i].equals("-b")) { beta = Double.parseDouble(args[++i]); continue; }
        System.out.println("Unrecognized option: " + args[i]);
      }

      ResultSink rs = new ResultSink(ResultSink.ResultSinkType.POSTAG);

      try {                       
        inp = new BufferedReader(new FileReader(new File(inputCorp)));               
      } catch (FileNotFoundException ex) {
        System.err.print("Input corpus " + inputCorp + " not found.  Exiting...");
View Full Code Here

TOP

Related Classes of opennlp.ccg.parse.tagger.util.ResultSink

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.