Package org.ictclas4j.bean

Examples of org.ictclas4j.bean.SegNode


            else
              break;
          }
        }
        if (end > start) {
          SegNode newsn = new SegNode();
          newsn.setRow(sns.get(start).getRow());
          newsn.setCol(sns.get(end - 1).getCol());
          newsn.setPos(pos);
          newsn.setWord(unknownFlags);
          newsn.setSrcWord(srcWord);
          double value = computePossibility(start, end - start + 1, sns);
          newsn.setValue(value);
          segGraph.insert(newsn, true);
        }
      }
    }
  }
View Full Code Here


    }
  }

  private int getBestTag(ArrayList<SegNode> sns, int index) {
    if (sns != null && index >= 0 && index < sns.size()) {
      SegNode sn = sns.get(index);
      return getBestTag(sn);

    }

    return -1;
View Full Code Here

  private double computePossibility(int startPos, int length, ArrayList<SegNode> sns) {
    double retValue = 0, posPoss;

    if (sns != null && unknownDict != null && context != null) {
      for (int i = startPos; i < startPos + length && sns != null; i++) {
        SegNode sn = sns.get(i);
        int bestTag = getBestTag(sn);
        if (bestTag != -1) {
          int freq = unknownDict.getFreq(sn.getSrcWord(), bestTag);
          posPoss = Math.log((double) (context.getFreq(0, sn.getPos()) + 1));
          posPoss += -Math.log((double) (freq + 1));
          retValue += posPoss;
        }
      }
    }
View Full Code Here

    String temp=null;
    char[] pos = new char[2];
    if (wrList != null && wrList.size() > 0) {
      result = "";
      for (int i = 0; i < wrList.size(); i++) {
        SegNode sn = wrList.get(i);
        if (sn.getPos() != POSTag.SEN_BEGIN && sn.getPos() != POSTag.SEN_END) {
          int tag = Math.abs(sn.getPos());
          pos[0] = (char) (tag / 256);
          pos[1] = (char) (tag % 256);
          temp=""+pos[0];
          if(pos[1]>0)
            temp+=""+pos[1];
          result += sn.getSrcWord() + "/" + temp + " ";
        }
      }
    }

    return result;
View Full Code Here

    if (sns != null && coreDict != null && unknownDict != null && context != null) {
      int i = 0;
      String curWord = null;

      for (; i < sns.size(); i++) {
        SegNode sn = sns.get(i);
        sn.setAllPos(null);
        curWord = sn.getSrcWord();
        // if (tagType == Utility.TAG_TYPE.TT_NORMAL ||
        // !unknownDict.isExist(sn.getWord(), 44)) {
        //
        // }

        if (tagType != Utility.TAG_TYPE.TT_NORMAL) {

          // ��ȫ���ַ����ɰ�ǵ��ַ�
          if (tagType == Utility.TAG_TYPE.TT_TRANS_PERSON && i > 0) {
            String prevWord = sns.get(i - 1).getSrcWord();
            if (Utility.charType(prevWord) == Utility.CT_CHINESE) {
              if (".".equals(curWord))
                curWord = "��";
              else if ("-".equals(curWord))
                curWord = "��";
            }
          }

          // ��unknownDict�ʵ���л�ȡ��ǰ�����д���
          ArrayList<WordItem> wis = unknownDict.getHandle(curWord);
          for (int j = 0; wis != null && j < wis.size(); j++) {
            WordItem wi = wis.get(j);
            int tag = wi.getHandle();
            double freq = -Math.log((1 + wi.getFreq()));
            freq += Math.log((context.getFreq(0, wi.getHandle()) + wis.size() + 1));
            POS pos = new POS(tag, freq);
            sn.addPos(pos);
          }

          if (Utility.SENTENCE_BEGIN.equals(curWord))
            sn.addPos(new POS(100, 0));

          else if (Utility.SENTENCE_END.equals(curWord))
            sn.addPos(new POS(101, 0));
          else {
            int nFreq = 0;
            wis = coreDict.getHandle(curWord);
            if (wis != null) {
              for (WordItem wi : wis)
                nFreq += wi.getFreq();

              if (wis.size() > 0) {
                double freq = -Math.log((double) (1 + nFreq));
                freq += Math.log((double) (context.getFreq(0, 0) + wis.size()));
                sn.addPos(new POS(0, freq));
              }
            }
          }
        } else {
          if (sn.getPos() > 0) {
            int tag = sn.getPos();
            double value = -Math.log(sn.getValue());
            value += Math.log(context.getFreq(0, tag));
            if (value < 0)
              value = 0;
            sn.addPos(new POS(tag, value));
          } else {
            if (sn.getPos() < 0) {
              sn.setPos(-sn.getPos());
              sn.addPos(new POS(-sn.getPos(), sn.getValue()));
            }
            ArrayList<WordItem> wis = coreDict.getHandle(curWord);
            if (wis != null) {
              for (WordItem wi : wis) {
                int tag = wi.getHandle();
                double value = -Math.log(1 + wi.getFreq());
                value += Math.log(context.getFreq(0, tag) + wis.size());
                sn.addPos(new POS(tag, value));
              }
            }
          }
        }

        if (sn.getAllPos() == null)
          guessPos(tagType, sn);
       
        // ���һ���ʽڵ��Ӧ��allPosΪnull����˵�����޷������ɴ�
        // ���Ĵ�������һ���ʵĴ���
        if (i - 1 >= 0 && sns.get(i - 1).getPosSize() == -1) {
          if (sn.getPosSize() > 0) {
            POS pos = new POS(sn.getAllPos().get(0).getTag(), 0);
            sns.get(i - 1).addPos(pos);
          }
        }
      }

      // ���һ��������
      SegNode last = sns.get(i - 1);
      if (last != null) {
        SegNode sn = new SegNode();
        int tag = 0;
        if (tagType != Utility.TAG_TYPE.TT_NORMAL)
          tag = 101;
        else
          tag = 1;
        POS pos = new POS(tag, 0);
        sn.addPos(pos);
        sns.add(sn);
      }
    }
  }
View Full Code Here

    if (sns != null && coreDict != null && unknownDict != null && context != null) {
      int i = 0;
      String curWord = null;

      for (; i < sns.size(); i++) {
        SegNode sn = sns.get(i);
        sn.setAllPos(null);
        curWord = sn.getSrcWord();
        int gbkID = sn.getGbkID();// dictLib.getGBKID(curWord);
        // if (tagType == Utility.TAG_TYPE.TT_NORMAL ||
        // !unknownDict.isExist(sn.getWord(), 44)) {
        //
        // }

        if (tagType != Utility.TAG_TYPE.TT_NORMAL) {

          // ��ȫ���ַ����ɰ�ǵ��ַ�
          if (tagType == Utility.TAG_TYPE.TT_TRANS_PERSON && i > 0) {
            String prevWord = sns.get(i - 1).getSrcWord();
            if (Utility.charType(prevWord) == Utility.CT_CHINESE) {
              if (".".equals(curWord))
                curWord = "��";
              else if ("-".equals(curWord))
                curWord = "��";
            }
          }

          if (sn.getPos() < 0) {
            AdjoiningPos pos = new AdjoiningPos( 0 , 0);
            sn.addPos(pos);
          } else {
            // ��unknownDict�ʵ���л�ȡ��ǰ�����д���
            SegAtom sa = unknownDict.getSegAtom(curWord, gbkID);
            for (int j = 0; sa != null && j < sa.getPosCount(); j++) {
              Pos pos = sa.getPos(j);
              double value = -Math.log((1 + pos.getFreq()));
              value += Math.log((context.getFreq(pos.getTag()) + sa.getPosCount() + 1));
              AdjoiningPos apos = new AdjoiningPos(pos , value);
              sn.addPos(apos);
            }

            if (Utility.SENTENCE_BEGIN.equals(curWord))
              sn.addPos(new AdjoiningPos( 100 , 0));

            else if (Utility.SENTENCE_END.equals(curWord))
              sn.addPos(new AdjoiningPos( 101 , 0));
            else {
              int freq = 0;
              sa = coreDict.getSegAtom(curWord, gbkID);
              if (sa != null) {
                double value = -Math.log((double) (1 + freq));
                value += Math.log((double) (context.getFreq(0) + sa.getPosCount()));
                sn.addPos(new AdjoiningPos( 0 , value));

              }
            }
          }
        } else {
          if (sn.getPos() > 0) {
            int tag = sn.getPos();
            double value = -Math.log(1 + sn.getFreq());
            value += Math.log(1 + context.getFreq(tag));
            if (value < 0)
              value = 0;
            sn.addPos(new AdjoiningPos( tag,  value));
          } else {
            if (sn.getPos() < 0) {
              sn.setPos(-sn.getPos());
              sn.addPos(new AdjoiningPos( -sn.getPos(),  sn.getFreq()));
            }
            SegAtom sa = coreDict.getSegAtom(curWord, gbkID);
            if (sa != null) {
              for (int j = 0; j < sa.getPosCount(); j++) {
                Pos pos = sa.getPos(j);
                double value = -Math.log(1 + pos.getFreq());
                value += Math.log(context.getFreq(pos.getTag()) + sa.getPosCount());
                sn.addPos(new AdjoiningPos(pos , value));
              }
            }
          }
        }

        if (sn.getAllPos() == null)
          guessPos(tagType, sn);

        // ���һ���ʽڵ��Ӧ��allPosΪnull����˵�����޷������ɴ�
        // ���Ĵ�������һ���ʵĴ���,���ǽ�����ʶ��ĩ##ĩ������
        if (i - 1 >= 0 && sns.get(i - 1).getPosSize() == -1) {
          if (sn.getPosSize() > 0) {
            Pos pos = sn.getAllPos().get(0).getPos();
            int ipos = pos.getTag() == POSTag.SEN_END ? POSTag.UNKNOWN : pos.getTag();
            AdjoiningPos apos = new AdjoiningPos( ipos , 0);
            sns.get(i - 1).addPos(apos);
          }
        }
      }

      // ���һ��������
      SegNode last = sns.get(i - 1);
      if (last != null) {
        SegNode sn = new SegNode();
        int tag = 0;
        if (tagType != Utility.TAG_TYPE.TT_NORMAL)
          tag = 101;
        else
          tag = 1;
        AdjoiningPos pos = new AdjoiningPos( tag, 0);
        sn.addPos(pos);
        sns.add(sn);
      }
    }
  }
View Full Code Here

    if (sns != null && coreDict != null && unknownDict != null && context != null) {
      int i = 0;
      String curWord = null;

      for (; i < sns.size(); i++) {
        SegNode sn = sns.get(i);
        sn.setAllPos(null);
        curWord = sn.getSrcWord();
        // if (tagType == Utility.TAG_TYPE.TT_NORMAL ||
        // !unknownDict.isExist(sn.getWord(), 44)) {
        //
        // }

        if (tagType != Utility.TAG_TYPE.TT_NORMAL) {

          // ��ȫ���ַ����ɰ�ǵ��ַ�
          if (tagType == Utility.TAG_TYPE.TT_TRANS_PERSON && i > 0) {
            String prevWord = sns.get(i - 1).getSrcWord();
            if (Utility.charType(prevWord) == Utility.CT_CHINESE) {
              if (".".equals(curWord))
                curWord = "��";
              else if ("-".equals(curWord))
                curWord = "��";
            }
          }

          // ��unknownDict�ʵ���л�ȡ��ǰ�����д���
          ArrayList<WordItem> wis = unknownDict.getHandle(curWord);
          for (int j = 0; wis != null && j < wis.size(); j++) {
            WordItem wi = wis.get(j);
            int tag = wi.getHandle();
            double freq = -Math.log((1 + wi.getFreq()));
            freq += Math.log((context.getFreq(0, wi.getHandle()) + wis.size() + 1));
            POS pos = new POS(tag, freq);
            sn.addPos(pos);
          }

          if (Utility.SENTENCE_BEGIN.equals(curWord))
            sn.addPos(new POS(100, 0));

          else if (Utility.SENTENCE_END.equals(curWord))
            sn.addPos(new POS(101, 0));
          else {
            int nFreq = 0;
            wis = coreDict.getHandle(curWord);
            if (wis != null) {
              for (WordItem wi : wis)
                nFreq += wi.getFreq();

              if (wis.size() > 0) {
                double freq = -Math.log((double) (1 + nFreq));
                freq += Math.log((double) (context.getFreq(0, 0) + wis.size()));
                sn.addPos(new POS(0, freq));
              }
            }
          }
        } else {
          if (sn.getPos() > 0) {
            int tag = sn.getPos();
            double value = -Math.log(sn.getValue());
            value += Math.log(context.getFreq(0, tag));
            if (value < 0)
              value = 0;
            sn.addPos(new POS(tag, value));
          } else {
            if (sn.getPos() < 0) {
              sn.setPos(-sn.getPos());
              sn.addPos(new POS(-sn.getPos(), sn.getValue()));
            }
            ArrayList<WordItem> wis = coreDict.getHandle(curWord);
            if (wis != null) {
              for (WordItem wi : wis) {
                int tag = wi.getHandle();
                double value = -Math.log(1 + wi.getFreq());
                value += Math.log(context.getFreq(0, tag) + wis.size());
                sn.addPos(new POS(tag, value));
              }
            }
          }
        }

        if (sn.getAllPos() == null)
          guessPos(tagType, sn);
       
        // ���һ���ʽڵ��Ӧ��allPosΪnull����˵�����޷������ɴ�
        // ���Ĵ�������һ���ʵĴ���
        if (i - 1 >= 0 && sns.get(i - 1).getPosSize() == -1) {
          if (sn.getPosSize() > 0) {
            POS pos = new POS(sn.getAllPos().get(0).getTag(), 0);
            sns.get(i - 1).addPos(pos);
          }
        }
      }

      // ���һ��������
      SegNode last = sns.get(i - 1);
      if (last != null) {
        SegNode sn = new SegNode();
        int tag = 0;
        if (tagType != Utility.TAG_TYPE.TT_NORMAL)
          tag = 101;
        else
          tag = 1;
        POS pos = new POS(tag, 0);
        sn.addPos(pos);
        sns.add(sn);
      }
    }
  }
View Full Code Here

            nPos = j;
            personName = "";
            // Get the possible person name
            while (nPos < j + patterns[k].length()) {
              SegNode sn = sns.get(nPos);
              if (sn.getPos() < 4
                  && unknownDict.getFreq(sn.getWord(), sn.getPos()) < Utility.LITTLE_FREQUENCY)
                personName += sn.getWord();
              nPos += 1;
            }
            if ("CDCD".equals(patterns[k])) {
              if (GetForeignCharCount(personName) > 0)
                j += patterns[k].length() - 1;
              continue;
            }

            SegNode usn = new SegNode();
            usn.setRow(sns.get(j).getRow());
            usn.setCol(sns.get(j + patterns[k].length() - 1).getCol());
            usn.setWord(unknownFlags);
            usn.setSrcWord(personName);
            double value = -Math.log(factor[k]) + computePossibility(j, patterns[k].length(), sns);
            usn.setPos(pos);
            usn.setValue(value);
            segGraph.insert(usn, true);

            j += patterns[k].length();
            bMatched = true;
          }
View Full Code Here

            else
              break;
          }
        }
        if (end > start) {
          SegNode newsn = new SegNode();
          newsn.setRow(sns.get(start).getRow());
          newsn.setCol(sns.get(end - 1).getCol());
          newsn.setPos(pos);
          newsn.setWord(unknownFlags);
          newsn.setSrcWord(srcWord);
          double value = computePossibility(start, end - start + 1, sns);
          newsn.setValue(value);
          segGraph.insert(newsn, true);
        }
      }
    }
  }
View Full Code Here

    }
  }

  private int getBestTag(ArrayList<SegNode> sns, int index) {
    if (sns != null && index >= 0 && index < sns.size()) {
      SegNode sn = sns.get(index);
      return getBestTag(sn);

    }

    return -1;
View Full Code Here

TOP

Related Classes of org.ictclas4j.bean.SegNode

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.