// 大概有5639个字有词语,故取0x2fff=x^13>8000>8000*0.75=6000>5639
vocabularyDictionary = new HashBinaryDictionary(
getVocabularyWords(), 0x2fff, 0.75f);
Dictionary noiseWordsDic = getNoiseWordsDictionary();
for (int i = 0; i < noiseWordsDic.size(); i++) {
Hit hit = vocabularyDictionary.search(noiseWordsDic.get(i), 0, noiseWordsDic.get(i).length());
if (hit.isHit()) {
hit.getWord().setNoiseWord();
}
}
Dictionary noiseCharactorsDic = getNoiseCharactorsDictionary();
for (int i = 0; i < noiseCharactorsDic.size(); i++) {
Hit hit = vocabularyDictionary.search(noiseCharactorsDic.get(i), 0, noiseCharactorsDic.get(i).length());
if (hit.isHit()) {
hit.getWord().setNoiseCharactor();
}
}
}
return vocabularyDictionary;