Package org.ictclas4j.bean

Examples of org.ictclas4j.bean.SegAtom


              midResult = outputResult(adjResult);
            }
            break;
          }
        } else {
          SegAtom atom = new SegAtom(sen.getContent());
          SegAtom[] atoms = new SegAtom[1];
          atoms[0] = atom;
          midResult = new SegResult();
          midResult.setRawContent(sen.getContent());
          midResult.setAtoms(atoms);
View Full Code Here


      ArrayList<SegAtom> saList = new ArrayList<SegAtom>();
      for (int i = 0; i < wrList.size(); i++) {

        SegNode sn = wrList.get(i);
        if (sn.getPos() != POSTag.SEN_BEGIN && sn.getPos() != POSTag.SEN_END) {
          SegAtom sa =sn.toSegAtom();
          saList.add(sa);
        }
      }

      SegAtom[] atoms = new SegAtom[saList.size() - 1];
View Full Code Here

            flag = true;
            j++;
          }
        }

        SegAtom sa = null;
        String word = words.toString();
        int gbkID = dictLib.getGBKID(word);
        int wordMaxLen = dict.getWordMaxLen(word, gbkID);
        for (; j <= atoms.size() && word.length() < wordMaxLen; j++) {
          word = words.toString();
          sa = dict.getSegAtom(word, gbkID);
          if (sa != null) {
            // 1���ڣ�1999��ĩ
            // if (word.length() == 2 && segGraph.getSize() > 0) {
            // SegNode g2 = segGraph.getLast();
            // if (Utility.isAllNum(g2.getWord()) ||
            // Utility.isAllChinese(g2.getWord())
            // && (g2.getWord().indexOf("��") == 0 ||
            // g2.getWord().indexOf("��") == 0)) {
            // if ("ĩ���е�ǰ���".indexOf(words.substring(1)) != -1)
            // break;
            // }
            // }
            // ֻ��һ���Դʣ�������
            SegNode sg = null;
            if (sa.getPosCount() == 1) {
              Pos pos = sa.getPos(0);
              sg = new SegNode(i, j, pos.getTag(), sa.getTotalFreq(), word);
            } else
              sg = new SegNode(i, j, 0, sa.getTotalFreq(), word);
            sg.setSrcWord(word);
            sg.setGbkID(gbkID);
            segGraph.insert(sg, true);
          }
View Full Code Here

      int wordMaxLen=0;
      if(wis0!=null){
        System.out.println("size:"+i+","+wis0.size()+","+size);
        HashMap<String, SegAtom> wordMap = new HashMap<String, SegAtom>();
        for(int j=0;j<wis0.size();j++){
          SegAtom sa=new SegAtom();
          WordItem0 wi=wis0.get(j);
          sa.setWord(wi.getWord());
          sa.addPos(new Pos(wi.getHandle(),wi.getFreq(),false));
          count++;
          size+=8+wi.getWord().getBytes().length;
         
          //����ͬ�Ĵʣ����кϲ�
          while(j<wis0.size()-1 && wis0.get(j).getWord()!=null && wis0.get(j).getWord().equals(wis0.get(j+1).getWord())){
            wi=wis0.get(j+1);
            sa.addPos(new Pos(wi.getHandle(),wi.getFreq(),false));
            j++;
            size+=8;
          }
         
          wordMap.put(sa.getWord(), sa);
          if(sa.getWord().length()>wordMaxLen)
            wordMaxLen=sa.getWord().length();
         
        }
       
        WordTable wt=new WordTable();
        wt.setWordCount(count);
View Full Code Here

          if (sn.getPos() < 0) {
            AdjoiningPos pos = new AdjoiningPos( 0 , 0);
            sn.addPos(pos);
          } else {
            // ��unknownDict�ʵ���л�ȡ��ǰ�����д���
            SegAtom sa = unknownDict.getSegAtom(curWord, gbkID);
            for (int j = 0; sa != null && j < sa.getPosCount(); j++) {
              Pos pos = sa.getPos(j);
              double value = -Math.log((1 + pos.getFreq()));
              value += Math.log((context.getFreq(pos.getTag()) + sa.getPosCount() + 1));
              AdjoiningPos apos = new AdjoiningPos(pos , value);
              sn.addPos(apos);
            }

            if (Utility.SENTENCE_BEGIN.equals(curWord))
              sn.addPos(new AdjoiningPos( 100 , 0));

            else if (Utility.SENTENCE_END.equals(curWord))
              sn.addPos(new AdjoiningPos( 101 , 0));
            else {
              int freq = 0;
              sa = coreDict.getSegAtom(curWord, gbkID);
              if (sa != null) {
                double value = -Math.log((double) (1 + freq));
                value += Math.log((double) (context.getFreq(0) + sa.getPosCount()));
                sn.addPos(new AdjoiningPos( 0 , value));

              }
            }
          }
        } else {
          if (sn.getPos() > 0) {
            int tag = sn.getPos();
            double value = -Math.log(1 + sn.getFreq());
            value += Math.log(1 + context.getFreq(tag));
            if (value < 0)
              value = 0;
            sn.addPos(new AdjoiningPos( tag,  value));
          } else {
            if (sn.getPos() < 0) {
              sn.setPos(-sn.getPos());
              sn.addPos(new AdjoiningPos( -sn.getPos(),  sn.getFreq()));
            }
            SegAtom sa = coreDict.getSegAtom(curWord, gbkID);
            if (sa != null) {
              for (int j = 0; j < sa.getPosCount(); j++) {
                Pos pos = sa.getPos(j);
                double value = -Math.log(1 + pos.getFreq());
                value += Math.log(context.getFreq(pos.getTag()) + sa.getPosCount());
                sn.addPos(new AdjoiningPos(pos , value));
              }
            }
          }
        }
View Full Code Here

TOP

Related Classes of org.ictclas4j.bean.SegAtom

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.