Examples of ivory.core.exception.RetrievalException

ivory.core.exception.RetrievalException

      float[] z_scores = new float[accumulated_scores.length];
      for (int i = 0; i < z_scores.length; i++) {
        z_scores[i] = (accumulated_scores[i] - avgScores) / stddev;
      }
    } else {
      throw new RetrievalException("PruningFunction " + pruner + " is not supported!");
    }


    if (retainSize < mK) {
      if (mDocSet.length >= mK) {
        retainSize = mK;

View Full Code Here

          featureID = c.getParamID().trim(); // termWt, orderedWt, unorderedWt
          scoringFunction = c.getScoringFunction();


          mSize = c.getWindowSize(); // window width
          if (mSize == -1 && !(featureID.equals("termWt"))) {
            throw new RetrievalException("Only term features don't support getWindowSize()! " + featureID);
          }
          concepts_this_stage[cntConcepts] = c.getSingleTerms();
          clique_wgts[cntConcepts] = c.getWeight();


          cntConcepts++;
          subTotal_cascadeCost += c.cost;
        }


        // for use in pruning


        // score-based
        float max_score = results[0].score;
        float min_score = results[results.length - 1].score;
        float score_threshold = (max_score - min_score) * pruningParameter + min_score;
        float mean_max_score_threshold = pruningParameter * max_score + (1.0f - pruningParameter) * meanScore;


        // rank-based
        int retainSize = (int) ((1.0 - pruningParameter) * ((double) (results.length)));
        int size = 0;


        // Clear priority queue.
        mSortedAccumulators.clear();


        float[] termCollectionFreqs = new float[cntConcepts];
        float[] termDFs = new float[cntConcepts];
        int[][] termIndexes = new int[cntConcepts][];


        float sumScore = 0;


        for (int j = 0; j < cntConcepts; j++) {
          String[] singleTerms = concepts_this_stage[j];


          int termIndex1 = termToCliqueNumber.get(singleTerms[0]);


          if (featureID.indexOf("termWt") != -1) {
            float termCollectionFreq = cf.get(singleTerms[0]);
            termCollectionFreqs[j] = termCollectionFreq;


            float termDF = df.get(singleTerms[0]);
            termDFs[j] = termDF;


            termIndexes[j] = new int[1];
            termIndexes[j][0] = termIndex1;


            if (singleTerms.length != 1) {
              System.out.println("Should have length 1 " + singleTerms.length);
              System.exit(-1);
            }
          } else {
            int termIndex2 = termToCliqueNumber.get(singleTerms[1]);


            termIndexes[j] = new int[2];
            termIndexes[j][0] = termIndex1;
            termIndexes[j][1] = termIndex2;


            if (singleTerms.length != 2) {
              System.out.println("Should have length 2 " + singleTerms.length);
              System.exit(-1);
            }
          }
        }


        // iterate over results documents, which are sorted in scores
        for (int i = 0; i < results.length; i++) {
          // pruning, if okay, scoring, update pruning stats for next cascade stage


          boolean passedPruning = false;
          if (pruningFunction.equals("rank")) {
            if (i < retainSize) {
              passedPruning = true;
            } else {
              if (size < mK && mK != defaultNumDocs) {
                passedPruning = true;
              } else {
                break;
              }
            }
          } else if (pruningFunction.equals("score")) {
            if (results[i].score > score_threshold) {
              passedPruning = true;
            } else {
              if (size < mK && mK != defaultNumDocs) {
                passedPruning = true;
              } else {
                break;
              }
            }
          } else if (pruningFunction.equals("mean-max")) {
            if (results[i].score > mean_max_score_threshold) {
              passedPruning = true;
            } else {
              if (size < mK && mK != defaultNumDocs) {
                passedPruning = true;
              } else {
                break;
              }
            }
          } else {
            throw new RetrievalException("Not supported pruner! "+pruningFunction);
          }


          if (passedPruning) {
            size++;


            int docIndex = results[i].index_into_keptDocs;
            int docLen = keptDocLengths[docIndex];
            float docScore_cascade = 0;


            for (int j = 0; j < cntConcepts; j++) {
              if (featureID.equals("termWt")) {
                int termIndex1 = termIndexes[j][0];
                int[] positions1 = keptDocs[docIndex][termIndex1];


                int tf = 0;
                if (positions1 != null) {
                  tf = positions1.length;
                }


                docScore_cascade += clique_wgts[j] * scoringFunction.getScore(tf, docLen);


              } else { // term proximity


                // merge into a single stream and compute matches. Assume there are only two
                // terms!!!


                int termIndex1 = termIndexes[j][0];
                int termIndex2 = termIndexes[j][1];


                int[] positions1 = keptDocs[docIndex][termIndex1];
                int[] positions2 = keptDocs[docIndex][termIndex2];


                int matches = 0;


                if (positions1 != null && positions2 != null) { // both query terms are in the doc


                  termMatches++;
                  int[] ids = new int[positions1.length];
                  Arrays.fill(ids, 0);
                  int length = positions1.length;


                  int length2 = positions2.length;


                  int[] newPositions = new int[length + length2];
                  int[] newIds = new int[length + length2];


                  int posA = 0;
                  int posB = 0;


                  int ii = 0;
                  while (ii < length + length2) {
                    if (posB == length2 || posA < length && positions1[posA] <= positions2[posB]) {
                      newPositions[ii] = positions1[posA];
                      newIds[ii] = ids[posA];
                      posA++;
                    } else {
                      newPositions[ii] = positions2[posB];
                      newIds[ii] = 1;
                      posB++;
                    }
                    ii++;
                  }


                  int[] positions = newPositions;
                  ids = newIds;


                  BitSet mMatchedIds = new BitSet(2); // Assume there are only two terms!!!


                  if (featureID.equals("orderedWt")) {


                    for (ii = 0; ii < positions.length; ii++) {
                      mMatchedIds.clear();
                      int maxGap = 0;
                      boolean ordered = true;
                      mMatchedIds.set(ids[ii]);
                      int matchedIDCounts = 1;
                      int lastMatchedID = ids[ii];
                      int lastMatchedPos = positions[ii];


                      for (int jj = ii + 1; jj < positions.length; jj++) {
                        int curID = ids[jj];
                        int curPos = positions[jj];
                        if (!mMatchedIds.get(curID)) {
                          mMatchedIds.set(curID);
                          matchedIDCounts++;
                          if (curID < lastMatchedID) {
                            ordered = false;
                          }
                          if (curPos - lastMatchedPos > maxGap) {
                            maxGap = curPos - lastMatchedPos;
                          }
                        }
                        // stop looking if the maximum gap is too large
                        // or the terms appear out of order
                        if (maxGap > mSize || !ordered) {
                          break;
                        }
                        // did we match all the terms, and in order?
                        if (matchedIDCounts == 2 && ordered) {
                          matches++;
                          break;
                        }
                      }
                    }
                  } else if (featureID.equals("unorderedWt")) {


                    for (ii = 0; ii < positions.length; ii++) {
                      mMatchedIds.clear();


                      mMatchedIds.set(ids[ii]);
                      int matchedIDCounts = 1;
                      int startPos = positions[ii];


                      for (int jj = ii + 1; jj < positions.length; jj++) {
                        int curID = ids[jj];
                        int curPos = positions[jj];
                        int windowSize = curPos - startPos + 1;


                        if (!mMatchedIds.get(curID)) {
                          mMatchedIds.set(curID);
                          matchedIDCounts++;
                        }
                        // stop looking if we've exceeded the maximum window size
                        if (windowSize > mSize) {
                          break;
                        }
                        // did we match all the terms?
                        if (matchedIDCounts == 2) {
                          matches++;
                          break;
                        }
                      }
                    }
                  } else {
                    System.out.println("Invalid featureID " + featureID);
                    System.exit(-1);
                  }
                } // end if this is a match, i.e., both query terms are in the doc


//                float s = getScore(matches, docLen, RetrievalEnvironment.defaultCf,
//                    (float) RetrievalEnvironment.defaultDf, scoringFunctionName);
//                docScore_cascade += clique_wgts[j] * s;
                
                GlobalTermEvidence termEvidence = scoringFunction.getGlobalTermEvidence();
                termEvidence.cf = RetrievalEnvironment.defaultCf;
                termEvidence.df = RetrievalEnvironment.defaultDf;


                scoringFunction.initialize(termEvidence, scoringFunction.getGlobalEvidence());
                docScore_cascade += clique_wgts[j] * scoringFunction.getScore(matches, docLen);


              } // end else it's proximity feature
            } // end for (each concept)


            // accumulate doc score in results[i] across cascade stages
            results[i].score += docScore_cascade;


            mSortedAccumulators.add(results[i]);


            sumScore += results[i].score;


          } // end if passed pruning
        } // end iterating over docs


        // order based on new scores in results[], put into priority queue
        if (size != mSortedAccumulators.size()) {
          throw new RetrievalException("They should be equal right here " + size + " "
              + mSortedAccumulators.size());
        }


        CascadeAccumulator[] results_tmp = new CascadeAccumulator[size];

View Full Code Here


    // Cliques associated with the MRF.
    List<Clique> cliques = mMRF.getCliques();


    if (cliques.size() == 0) {
      throw new RetrievalException("Shouldn't have size 0!");
    }


    // Current accumulator.
    CascadeAccumulator a = mAccumulators[0];

View Full Code Here


      if ("feature".equals(child.getNodeName())) {
        // Get the feature id.
        String featureID = XMLTools.getAttributeValue(child, "id", "");
        if (featureID.equals("")) {
          throw new RetrievalException("Each feature must specify an id attribute!");
        }


        // Get feature weight (default = 1.0).
        float weight = XMLTools.getAttributeValue(child, "weight", 1.0f);


        // Concept importance model (optional).
        ConceptImportanceModel importanceModel = null;


        // Get concept importance source (if applicable).
        String importanceSource = XMLTools.getAttributeValue(child, "importance", "");
        if (!importanceSource.equals("")) {
          importanceModel = env.getImportanceModel(importanceSource);
          if (importanceModel == null) {
            throw new RetrievalException("ImportanceModel " + importanceSource + " not found!");
          }
        }


        // Get CliqueSet type.
        String cliqueSetType = XMLTools.getAttributeValue(child, "cliqueSet", "");


        // Get Cascade stage (if any)
        int cascadeStage = XMLTools.getAttributeValue(child, "cascadeStage", -1);


        String pruner_and_params = XMLTools.getAttributeValue(child, "prune", "null");
        String thePruner = (pruner_and_params.trim().split("\\s+"))[0];
        String conceptBinType = XMLTools.getAttributeValue(child, "conceptBinType", "");
        String conceptBinParams = XMLTools.getAttributeValue(child, "conceptBinParams", "");
        String scoreFunction = XMLTools.getAttributeValue(child, "scoreFunction", null);


        int width = XMLTools.getAttributeValue(child, "width", -1);


        if (cascadeStage != -1) {
          RetrievalEnvironment.setIsNew(true);
        } else {
          RetrievalEnvironment.setIsNew(false);
        }


        if (cascadeStage != -1) {
          if (!conceptBinType.equals("") || !conceptBinParams.equals("")) {
            if (conceptBinType.equals("") || conceptBinParams.equals("")) {
              throw new RetrievalException("Most specify conceptBinType || conceptBinParams");
            }
            importanceModel = env.getImportanceModel("wsd");


            if (importanceModel == null) {
              throw new RetrievalException("ImportanceModel " + importanceSource + " not found!");
            }
          }
        }


        cascade_stage_proper = cascadeStage;

View Full Code Here


      // [0]: # bins; [1]: which bin for this feature
      String[] tokens = conceptBinParams.split("\\s+");


      if (tokens.length != 2) {
        throw new RetrievalException(
            "For impact binning, should specify # bins(as a fraction of # total cliques) and which bin for this feature");
      }


      // K
      double numBins = Math.floor(Double.parseDouble(tokens[0]));


      // 1-indexed!!!!
      int whichBin = Integer.parseInt(tokens[1]);


      if (sanityCheck.containsKey(conceptBinType + " " + numBins + " " + whichBin + " "
          + all_concepts + " " + featureID + " " + thePruner + " " + width + " " + scoreFunction)) {
        throw new RetrievalException("Bin " + whichBin
            + " has been used by this concept type before " + conceptBinType + " " + numBins + " "
            + all_concepts + " " + featureID + " " + thePruner + " " + width + " " + scoreFunction);
      } else {
        sanityCheck.put(conceptBinType + " " + numBins + " " + whichBin + " " + all_concepts + " "
            + featureID + " " + thePruner + " " + width + " " + scoreFunction, "1");
      }


      if (conceptBinType.equals("default")) {
        // concept importance in descending order
        int[] order_descending = new int[order.length];
        for (int i = 0; i < order_descending.length; i++) {
          order_descending[i] = order[order.length - i - 1];
        }


        int[] cascadeCliques = null;


        // if there are 5 bigram concepts, if there are 3 bins, the last bin will take concepts 3,
        // 4, 5
        if (numBins == whichBin && order_descending.length > numBins) {
          cascadeCliques = new int[order_descending.length - (int) numBins + 1];
          for (int j = whichBin - 1; j < order_descending.length; j++) { // 0-indexed
            cascadeCliques[j - whichBin + 1] = order_descending[j];
          }
        } else {
          cascadeCliques = new int[1];


          if ((whichBin - 1) < order_descending.length) {
            cascadeCliques[0] = order_descending[whichBin - 1];
          } else {
            return new int[0];
          }
        }


        // sort by clique numbers
        double[] cascadeCliques_sorted_by_clique_number = new double[cascadeCliques.length];
        int[] order1 = new int[cascadeCliques.length];
        for (int j = 0; j < order1.length; j++) {
          order1[j] = j;
          cascadeCliques_sorted_by_clique_number[j] = cascadeCliques[j];
        }
        ivory.smrf.model.constrained.ConstraintModel.Quicksort(
            cascadeCliques_sorted_by_clique_number, order1, 0, order1.length - 1);


        for (int j = 0; j < cascadeCliques_sorted_by_clique_number.length; j++) {
          cascadeCliques[j] = (int) cascadeCliques_sorted_by_clique_number[j];
        }
        return cascadeCliques;
      }


      else if (conceptBinType.equals("impact")) {


        double totalCliques = (double) (conceptWeights.length);
        double base = Math.pow((totalCliques + 1), (1 / numBins));


        double firstBinSize = base - 1;
        if (firstBinSize < 1) {
          firstBinSize = 1;
        }


        int start = 0;
        int end = (int) (Math.round(firstBinSize));
        double residual = firstBinSize - end;


        for (int i = 2; i <= whichBin; i++) {
          start = end;
          double v = firstBinSize * Math.pow(base, (i - 1));
          double v_plus_residual = v + residual;
          double v_round = Math.round(v_plus_residual);
          residual = v_plus_residual - v_round;
          end += (int) v_round;
        }


        if (start >= totalCliques) {
          return new int[0];
        }


        if (end > totalCliques) {
          end = (int) totalCliques;
        }


        int[] cascadeCliques = new int[end - start];


        // concept importance in descending order
        int[] order_descending = new int[order.length];
        for (int i = 0; i < order_descending.length; i++) {
          order_descending[i] = order[order.length - i - 1];
        }


        for (int i = start; i < end; i++) {
          cascadeCliques[i - start] = order_descending[i];
        }


        // sort by clique numbers
        double[] cascadeCliques_sorted_by_clique_number = new double[cascadeCliques.length];
        int[] order1 = new int[cascadeCliques.length];
        for (int j = 0; j < order1.length; j++) {
          cascadeCliques_sorted_by_clique_number[j] = cascadeCliques[j];
          order1[j] = j;
        }
        ivory.smrf.model.constrained.ConstraintModel.Quicksort(
            cascadeCliques_sorted_by_clique_number, order1, 0, order1.length - 1);


        for (int j = 0; j < cascadeCliques_sorted_by_clique_number.length; j++) {
          cascadeCliques[j] = (int) cascadeCliques_sorted_by_clique_number[j];
        }
        return cascadeCliques;
      }
    } else {
      throw new RetrievalException("Not yet supported " + conceptBinType);
    }


    return null;
  }

View Full Code Here

        }


        try {
          metafeatureValues.put(mf, readDataStats(file));
        } catch (IOException e) {
          throw new RetrievalException("Error: " + e);
        }


        float defaultValue = XMLTools.getAttributeValue(child, "default", 0.0f);
        defaultValues.put(mf.getName(), defaultValue);
      }

View Full Code Here

        // Get concept importance source (if applicable).
        String importanceSource = XMLTools.getAttributeValue(child, "importance", "");
        if (!importanceSource.equals("")) {
          importanceModel = env.getImportanceModel(importanceSource);
          if (importanceModel == null) {
            throw new RetrievalException("ImportanceModel " + importanceSource + " not found!");
          }
        }


        // Get CliqueSet type.
        String cliqueSetType = XMLTools.getAttributeValue(child, "cliqueSet", "");

View Full Code Here

        builder = new CascadeFeatureBasedMRFBuilder(env, model);
      } else {
        throw new ConfigurationException("Unrecognized model type: " + modelType);
      }
    } catch (IOException e) {
      throw new RetrievalException("Error getting MRFBuilder: " + e);
    }


    return builder;
  }

View Full Code Here

    super(env, model);


    // model type
    modelType = XMLTools.getAttributeValue(model, "style", null);
    if (modelType == null || (!"Indep".equals(modelType) && !"Joint".equals(modelType))) {
      throw new RetrievalException(
          "Error: GreedyConstrainedMRFBuilder requires a model type attribute of Indep or Joint!");
    }


    // query likelihood
    qlMultiple = XMLTools.getAttributeValue(model, "qlMultiple", -1.0f);


    // unigram and bigram basic thresholds
    unigramAddThreshold = XMLTools.getAttributeValue(model, "unigramAddThreshold", -1.0f);
    bigramAddThreshold = XMLTools.getAttributeValue(model, "bigramAddThreshold", -1.0f);


    // unigram and bigram redundancy thresholds
    unigramRedundThreshold = XMLTools.getAttributeValue(model, "unigramRedundThreshold", -1.0f);
    bigramRedundThreshold = XMLTools.getAttributeValue(model, "bigramRedundThreshold", -1.0f);


    // beta value
    beta = XMLTools.getAttributeValue(model, "beta", -1.0f);


    if ("Indep".equals(modelType) && (qlMultiple == -1 || unigramAddThreshold == -1)) {
      throw new RetrievalException(
          "Error: Indep model must specify valid qlMultiple, unigramAddThreshold, and bigramAddThreshold attributes!");
    }


    if ("Joint".equals(modelType) &&
         (qlMultiple == -1 || unigramAddThreshold == -1 || bigramAddThreshold == -1
            || unigramRedundThreshold == -1 || bigramRedundThreshold == -1 || beta == -1)) {
      throw new RetrievalException(
          "Error: Joint model must specify valid qlMultiple, unigramAddThreshold, bigramAddThreshold, unigramRedundThreshold, bigramRedundThreshold, and beta attributes!");
    }


    String file = XMLTools.getAttributeValue(model, "file", null);
    if (file == null) {
      throw new RetrievalException(
          "Error: GreedyConstrainedMRFBuilder requires a file attribute specifying the location of the document frequencies!");
    }


    // Read document frequencies.
    dfs = LinearImportanceModel.readDataStats(file);

View Full Code Here

        Arrays.sort(conceptResults, new Accumulator.DocnoComparator());


        float score = 0.0f;
        for (int i = 0; i < conceptResults.length; i++) {
          if (fbResults[i].docno != conceptResults[i].docno) {
            throw new RetrievalException("Error: Mismatch occured in getExpandedMRF!");
          }
          score += Math.exp(fbResults[i].score + conceptResults[i].score);
        }


        int size = sortedConcepts.size();

View Full Code Here

0 1

TOP

Related Classes of ivory.core.exception.RetrievalException

ivory.cascade.model.builder.CascadeFeatureBasedMRFBuilder

ivory.cascade.retrieval.CascadeEval

ivory.smrf.model.builder.FeatureBasedMRFBuilder

ivory.smrf.model.builder.MRFBuilder

ivory.smrf.model.constrained.GreedyConstrainedMRFBuilder

ivory.smrf.model.expander.MRFExpander

ivory.smrf.model.expander.NGramLatentConceptExpander

ivory.smrf.model.expander.UnigramLatentConceptExpander

ivory.smrf.model.importance.LinearImportanceModel

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.