Package ivory.smrf.model.score

Examples of ivory.smrf.model.score.ScoringFunction


  public Accumulator[] rank(String qid, JSONObject query, int queryLength) {
    GlobalEvidence globalEvidence = new GlobalEvidence(env.getDocumentCount(), env.getCollectionSize(), queryLength);

    PostingsReaderWrapper structureReader;
    ScoringFunction scoringFunction = new BM25ScoringFunction();
    try {
      structureReader = new PostingsReaderWrapper(query, env, scoringFunction, globalEvidence);
    } catch (JSONException e) {
      e.printStackTrace();
      throw new RuntimeException(e);
View Full Code Here


        results = executeInitialStage();

        cascadeStage++;
      } else {
        String featureID = null;
        ScoringFunction scoringFunction = null;

        int mSize = -1;
        String[][] concepts_this_stage = new String[totalCnt][];
        float[] clique_wgts = new float[concepts_this_stage.length];

        int cntConcepts = 0;

        for (CascadeClique c : cascadeStages.get(cascadeStage)) {
          cnt++;
          pruningFunction = c.getPruningFunction();
          pruningParameter = c.getPruningParameter();

          featureID = c.getParamID().trim(); // termWt, orderedWt, unorderedWt
          scoringFunction = c.getScoringFunction();

          mSize = c.getWindowSize(); // window width
          if (mSize == -1 && !(featureID.equals("termWt"))) {
            throw new RetrievalException("Only term features don't support getWindowSize()! " + featureID);
          }
          concepts_this_stage[cntConcepts] = c.getSingleTerms();
          clique_wgts[cntConcepts] = c.getWeight();

          cntConcepts++;
          subTotal_cascadeCost += c.cost;
        }

        // for use in pruning

        // score-based
        float max_score = results[0].score;
        float min_score = results[results.length - 1].score;
        float score_threshold = (max_score - min_score) * pruningParameter + min_score;
        float mean_max_score_threshold = pruningParameter * max_score + (1.0f - pruningParameter) * meanScore;

        // rank-based
        int retainSize = (int) ((1.0 - pruningParameter) * ((double) (results.length)));
        int size = 0;

        // Clear priority queue.
        mSortedAccumulators.clear();

        float[] termCollectionFreqs = new float[cntConcepts];
        float[] termDFs = new float[cntConcepts];
        int[][] termIndexes = new int[cntConcepts][];

        float sumScore = 0;

        for (int j = 0; j < cntConcepts; j++) {
          String[] singleTerms = concepts_this_stage[j];

          int termIndex1 = termToCliqueNumber.get(singleTerms[0]);

          if (featureID.indexOf("termWt") != -1) {
            float termCollectionFreq = cf.get(singleTerms[0]);
            termCollectionFreqs[j] = termCollectionFreq;

            float termDF = df.get(singleTerms[0]);
            termDFs[j] = termDF;

            termIndexes[j] = new int[1];
            termIndexes[j][0] = termIndex1;

            if (singleTerms.length != 1) {
              System.out.println("Should have length 1 " + singleTerms.length);
              System.exit(-1);
            }
          } else {
            int termIndex2 = termToCliqueNumber.get(singleTerms[1]);

            termIndexes[j] = new int[2];
            termIndexes[j][0] = termIndex1;
            termIndexes[j][1] = termIndex2;

            if (singleTerms.length != 2) {
              System.out.println("Should have length 2 " + singleTerms.length);
              System.exit(-1);
            }
          }
        }

        // iterate over results documents, which are sorted in scores
        for (int i = 0; i < results.length; i++) {
          // pruning, if okay, scoring, update pruning stats for next cascade stage

          boolean passedPruning = false;
          if (pruningFunction.equals("rank")) {
            if (i < retainSize) {
              passedPruning = true;
            } else {
              if (size < mK && mK != defaultNumDocs) {
                passedPruning = true;
              } else {
                break;
              }
            }
          } else if (pruningFunction.equals("score")) {
            if (results[i].score > score_threshold) {
              passedPruning = true;
            } else {
              if (size < mK && mK != defaultNumDocs) {
                passedPruning = true;
              } else {
                break;
              }
            }
          } else if (pruningFunction.equals("mean-max")) {
            if (results[i].score > mean_max_score_threshold) {
              passedPruning = true;
            } else {
              if (size < mK && mK != defaultNumDocs) {
                passedPruning = true;
              } else {
                break;
              }
            }
          } else {
            throw new RetrievalException("Not supported pruner! "+pruningFunction);
          }

          if (passedPruning) {
            size++;

            int docIndex = results[i].index_into_keptDocs;
            int docLen = keptDocLengths[docIndex];
            float docScore_cascade = 0;

            for (int j = 0; j < cntConcepts; j++) {
              if (featureID.equals("termWt")) {
                int termIndex1 = termIndexes[j][0];
                int[] positions1 = keptDocs[docIndex][termIndex1];

                int tf = 0;
                if (positions1 != null) {
                  tf = positions1.length;
                }

                docScore_cascade += clique_wgts[j] * scoringFunction.getScore(tf, docLen);

              } else { // term proximity

                // merge into a single stream and compute matches. Assume there are only two
                // terms!!!

                int termIndex1 = termIndexes[j][0];
                int termIndex2 = termIndexes[j][1];

                int[] positions1 = keptDocs[docIndex][termIndex1];
                int[] positions2 = keptDocs[docIndex][termIndex2];

                int matches = 0;

                if (positions1 != null && positions2 != null) { // both query terms are in the doc

                  termMatches++;
                  int[] ids = new int[positions1.length];
                  Arrays.fill(ids, 0);
                  int length = positions1.length;

                  int length2 = positions2.length;

                  int[] newPositions = new int[length + length2];
                  int[] newIds = new int[length + length2];

                  int posA = 0;
                  int posB = 0;

                  int ii = 0;
                  while (ii < length + length2) {
                    if (posB == length2 || posA < length && positions1[posA] <= positions2[posB]) {
                      newPositions[ii] = positions1[posA];
                      newIds[ii] = ids[posA];
                      posA++;
                    } else {
                      newPositions[ii] = positions2[posB];
                      newIds[ii] = 1;
                      posB++;
                    }
                    ii++;
                  }

                  int[] positions = newPositions;
                  ids = newIds;

                  BitSet mMatchedIds = new BitSet(2); // Assume there are only two terms!!!

                  if (featureID.equals("orderedWt")) {

                    for (ii = 0; ii < positions.length; ii++) {
                      mMatchedIds.clear();
                      int maxGap = 0;
                      boolean ordered = true;
                      mMatchedIds.set(ids[ii]);
                      int matchedIDCounts = 1;
                      int lastMatchedID = ids[ii];
                      int lastMatchedPos = positions[ii];

                      for (int jj = ii + 1; jj < positions.length; jj++) {
                        int curID = ids[jj];
                        int curPos = positions[jj];
                        if (!mMatchedIds.get(curID)) {
                          mMatchedIds.set(curID);
                          matchedIDCounts++;
                          if (curID < lastMatchedID) {
                            ordered = false;
                          }
                          if (curPos - lastMatchedPos > maxGap) {
                            maxGap = curPos - lastMatchedPos;
                          }
                        }
                        // stop looking if the maximum gap is too large
                        // or the terms appear out of order
                        if (maxGap > mSize || !ordered) {
                          break;
                        }
                        // did we match all the terms, and in order?
                        if (matchedIDCounts == 2 && ordered) {
                          matches++;
                          break;
                        }
                      }
                    }
                  } else if (featureID.equals("unorderedWt")) {

                    for (ii = 0; ii < positions.length; ii++) {
                      mMatchedIds.clear();

                      mMatchedIds.set(ids[ii]);
                      int matchedIDCounts = 1;
                      int startPos = positions[ii];

                      for (int jj = ii + 1; jj < positions.length; jj++) {
                        int curID = ids[jj];
                        int curPos = positions[jj];
                        int windowSize = curPos - startPos + 1;

                        if (!mMatchedIds.get(curID)) {
                          mMatchedIds.set(curID);
                          matchedIDCounts++;
                        }
                        // stop looking if we've exceeded the maximum window size
                        if (windowSize > mSize) {
                          break;
                        }
                        // did we match all the terms?
                        if (matchedIDCounts == 2) {
                          matches++;
                          break;
                        }
                      }
                    }
                  } else {
                    System.out.println("Invalid featureID " + featureID);
                    System.exit(-1);
                  }
                } // end if this is a match, i.e., both query terms are in the doc

//                float s = getScore(matches, docLen, RetrievalEnvironment.defaultCf,
//                    (float) RetrievalEnvironment.defaultDf, scoringFunctionName);
//                docScore_cascade += clique_wgts[j] * s;
               
                GlobalTermEvidence termEvidence = scoringFunction.getGlobalTermEvidence();
                termEvidence.cf = RetrievalEnvironment.defaultCf;
                termEvidence.df = RetrievalEnvironment.defaultDf;

                scoringFunction.initialize(termEvidence, scoringFunction.getGlobalEvidence());
                docScore_cascade += clique_wgts[j] * scoringFunction.getScore(matches, docLen);

              } // end else it's proximity feature
            } // end for (each concept)

            // accumulate doc score in results[i] across cascade stages
View Full Code Here

          float weight = parameters.get(j).getWeight();
          ConceptImportanceModel importanceModel = importanceModels.get(j);
          if (importanceModel != null) {
            weight *= importanceModel.getConceptWeight(concept);
          }
          ScoringFunction fn = scoringFunctions[j];
          fn.initialize(termEvidence, globalEvidence);

          Short tf = tfs[i].get(vocab[conceptID].getKey());
          if (tf == null) {
            tf = 0;
          }
          float s = fn.getScore(tf, doclens[i]);

          docScore += weight * s;
        }
        score += Math.exp(fbResults[i].score + docScore);
      }

      int size = sortedConcepts.size();
      if (size < numFeedbackTerms || sortedConcepts.peek().score < score) {
        if (size == numFeedbackTerms) {
          sortedConcepts.poll(); // Remove worst concept.
        }
        sortedConcepts.add(new Accumulator(conceptID, score));
      }
    }

    // Compute the weights of the expanded terms.
    int numTerms = Math.min(numFeedbackTerms, sortedConcepts.size());
    float totalWt = 0.0f;
    Accumulator[] bestConcepts = new Accumulator[numTerms];
    for (int i = 0; i < numTerms; i++) {
      Accumulator a = sortedConcepts.poll();
      bestConcepts[i] = a;
      totalWt += a.score;
    }

    // Document node (shared across all expansion cliques).
    DocumentNode docNode = new DocumentNode();

    // Expression generator (shared across all expansion cliques).
    ExpressionGenerator generator = new TermExpressionGenerator();

    // Add cliques corresponding to best expansion concepts.
    for (int i = 0; i < numTerms; i++) {
      Accumulator a = bestConcepts[i];

      // Construct the MRF corresponding to this concept.
      String concept = vocab[a.docno].getKey();

      for (int j = 0; j < scoringFunctionNodes.size(); j++) {
        Node functionNode = scoringFunctionNodes.get(j);
        String functionType = XMLTools.getAttributeValue(functionNode, "scoreFunction", null);
        ScoringFunction fn = ScoringFunction.create(functionType, functionNode);

        Parameter parameter = parameters.get(j);
        ConceptImportanceModel importanceModel = importanceModels.get(j);

        List<GraphNode> cliqueNodes = Lists.newArrayList();
View Full Code Here

          float weight = parameters.get(j).getWeight();
          ConceptImportanceModel importanceModel = importanceModels.get(j);
          if (importanceModel != null) {
            weight *= importanceModel.getConceptWeight(concept);
          }
          ScoringFunction fn = scoringFunctions[j];
          fn.initialize(termEvidence, globalEvidence);

          Short tf = tfs[i].get(vocab[conceptID].getKey());
          if (tf == null) {
            tf = 0;
          }
          float s = fn.getScore(tf, doclens[i]);

          docScore += weight * s;
        }
        score += Math.exp(fbResults[i].score + docScore);
      }

      int size = sortedConcepts.size();
      if (size < numFeedbackTerms || sortedConcepts.peek().score < score) {
        if (size == numFeedbackTerms) {
          sortedConcepts.poll(); // Remove worst concept.
        }
        sortedConcepts.add(new Accumulator(conceptID, score));
      }
    }

    // Compute the weights of the expanded terms.
    int numTerms = Math.min(numFeedbackTerms, sortedConcepts.size());
    float totalWt = 0.0f;
    Accumulator[] bestConcepts = new Accumulator[numTerms];
    for (int i = 0; i < numTerms; i++) {
      Accumulator a = sortedConcepts.poll();
      bestConcepts[i] = a;
      totalWt += a.score;
    }

    // Document node (shared across all expansion cliques).
    DocumentNode docNode = new DocumentNode();

    // Expression generator (shared across all expansion cliques).
    ExpressionGenerator generator = new TermExpressionGenerator();

    // Add cliques corresponding to best expansion concepts.
    for (int i = 0; i < numTerms; i++) {
      Accumulator a = bestConcepts[i];

      // Construct the MRF corresponding to this concept.
      String concept = vocab[a.docno].getKey();

      for (int j = 0; j < scoringFunctionNodes.size(); j++) {
        Node functionNode = scoringFunctionNodes.get(j);
        String functionType = XMLTools.getAttributeValue(functionNode, "scoreFunction", null);
        ScoringFunction fn = ScoringFunction.create(functionType, functionNode);

        Parameter parameter = parameters.get(j);
        ConceptImportanceModel importanceModel = importanceModels.get(j);

        List<GraphNode> cliqueNodes = Lists.newArrayList();
View Full Code Here

        results = executeInitialStage();

        cascadeStage++;
      } else {
        String featureID = null;
        ScoringFunction scoringFunction = null;

        int mSize = -1;
        String[][] concepts_this_stage = new String[totalCnt][];
        float[] clique_wgts = new float[concepts_this_stage.length];

        int cntConcepts = 0;

        for (CascadeClique c : cascadeStages.get(cascadeStage)) {
          cnt++;
          pruningFunction = c.getPruningFunction();
          pruningParameter = c.getPruningParameter();

          featureID = c.getParamID().trim(); // termWt, orderedWt, unorderedWt
          scoringFunction = c.getScoringFunction();

          mSize = c.getWindowSize(); // window width
          if (mSize == -1 && !(featureID.equals("termWt"))) {
            throw new RetrievalException("Only term features don't support getWindowSize()! " + featureID);
          }
          concepts_this_stage[cntConcepts] = c.getSingleTerms();
          clique_wgts[cntConcepts] = c.getWeight();

          cntConcepts++;
          subTotal_cascadeCost += c.cost;
        }

        // for use in pruning

        // score-based
        float max_score = results[0].score;
        float min_score = results[results.length - 1].score;
        float score_threshold = (max_score - min_score) * pruningParameter + min_score;
        float mean_max_score_threshold = pruningParameter * max_score + (1.0f - pruningParameter) * meanScore;

        // rank-based
        int retainSize = (int) ((1.0 - pruningParameter) * ((double) (results.length)));
        int size = 0;

        // Clear priority queue.
        mSortedAccumulators.clear();

        float[] termCollectionFreqs = new float[cntConcepts];
        float[] termDFs = new float[cntConcepts];
        int[][] termIndexes = new int[cntConcepts][];

        float sumScore = 0;

        for (int j = 0; j < cntConcepts; j++) {
          String[] singleTerms = concepts_this_stage[j];

          int termIndex1 = termToCliqueNumber.get(singleTerms[0]);

          if (featureID.indexOf("termWt") != -1) {
            float termCollectionFreq = cf.get(singleTerms[0]);
            termCollectionFreqs[j] = termCollectionFreq;

            float termDF = df.get(singleTerms[0]);
            termDFs[j] = termDF;

            termIndexes[j] = new int[1];
            termIndexes[j][0] = termIndex1;

            if (singleTerms.length != 1) {
              System.out.println("Should have length 1 " + singleTerms.length);
              System.exit(-1);
            }
          } else {
            int termIndex2 = termToCliqueNumber.get(singleTerms[1]);

            termIndexes[j] = new int[2];
            termIndexes[j][0] = termIndex1;
            termIndexes[j][1] = termIndex2;

            if (singleTerms.length != 2) {
              System.out.println("Should have length 2 " + singleTerms.length);
              System.exit(-1);
            }
          }
        }

        // iterate over results documents, which are sorted in scores
        for (int i = 0; i < results.length; i++) {
          // pruning, if okay, scoring, update pruning stats for next cascade stage

          boolean passedPruning = false;
          if (pruningFunction.equals("rank")) {
            if (i < retainSize) {
              passedPruning = true;
            } else {
              if (size < mK && mK != defaultNumDocs) {
                passedPruning = true;
              } else {
                break;
              }
            }
          } else if (pruningFunction.equals("score")) {
            if (results[i].score > score_threshold) {
              passedPruning = true;
            } else {
              if (size < mK && mK != defaultNumDocs) {
                passedPruning = true;
              } else {
                break;
              }
            }
          } else if (pruningFunction.equals("mean-max")) {
            if (results[i].score > mean_max_score_threshold) {
              passedPruning = true;
            } else {
              if (size < mK && mK != defaultNumDocs) {
                passedPruning = true;
              } else {
                break;
              }
            }
          } else {
            throw new RetrievalException("Not supported pruner! "+pruningFunction);
          }

          if (passedPruning) {
            size++;

            int docIndex = results[i].index_into_keptDocs;
            int docLen = keptDocLengths[docIndex];
            float docScore_cascade = 0;

            for (int j = 0; j < cntConcepts; j++) {
              if (featureID.equals("termWt")) {
                int termIndex1 = termIndexes[j][0];
                int[] positions1 = keptDocs[docIndex][termIndex1];

                int tf = 0;
                if (positions1 != null) {
                  tf = positions1.length;
                }

                docScore_cascade += clique_wgts[j] * scoringFunction.getScore(tf, docLen);

              } else { // term proximity

                // merge into a single stream and compute matches. Assume there are only two
                // terms!!!

                int termIndex1 = termIndexes[j][0];
                int termIndex2 = termIndexes[j][1];

                int[] positions1 = keptDocs[docIndex][termIndex1];
                int[] positions2 = keptDocs[docIndex][termIndex2];

                int matches = 0;

                if (positions1 != null && positions2 != null) { // both query terms are in the doc

                  termMatches++;
                  int[] ids = new int[positions1.length];
                  Arrays.fill(ids, 0);
                  int length = positions1.length;

                  int length2 = positions2.length;

                  int[] newPositions = new int[length + length2];
                  int[] newIds = new int[length + length2];

                  int posA = 0;
                  int posB = 0;

                  int ii = 0;
                  while (ii < length + length2) {
                    if (posB == length2 || posA < length && positions1[posA] <= positions2[posB]) {
                      newPositions[ii] = positions1[posA];
                      newIds[ii] = ids[posA];
                      posA++;
                    } else {
                      newPositions[ii] = positions2[posB];
                      newIds[ii] = 1;
                      posB++;
                    }
                    ii++;
                  }

                  int[] positions = newPositions;
                  ids = newIds;

                  BitSet mMatchedIds = new BitSet(2); // Assume there are only two terms!!!

                  if (featureID.equals("orderedWt")) {

                    for (ii = 0; ii < positions.length; ii++) {
                      mMatchedIds.clear();
                      int maxGap = 0;
                      boolean ordered = true;
                      mMatchedIds.set(ids[ii]);
                      int matchedIDCounts = 1;
                      int lastMatchedID = ids[ii];
                      int lastMatchedPos = positions[ii];

                      for (int jj = ii + 1; jj < positions.length; jj++) {
                        int curID = ids[jj];
                        int curPos = positions[jj];
                        if (!mMatchedIds.get(curID)) {
                          mMatchedIds.set(curID);
                          matchedIDCounts++;
                          if (curID < lastMatchedID) {
                            ordered = false;
                          }
                          if (curPos - lastMatchedPos > maxGap) {
                            maxGap = curPos - lastMatchedPos;
                          }
                        }
                        // stop looking if the maximum gap is too large
                        // or the terms appear out of order
                        if (maxGap > mSize || !ordered) {
                          break;
                        }
                        // did we match all the terms, and in order?
                        if (matchedIDCounts == 2 && ordered) {
                          matches++;
                          break;
                        }
                      }
                    }
                  } else if (featureID.equals("unorderedWt")) {

                    for (ii = 0; ii < positions.length; ii++) {
                      mMatchedIds.clear();

                      mMatchedIds.set(ids[ii]);
                      int matchedIDCounts = 1;
                      int startPos = positions[ii];

                      for (int jj = ii + 1; jj < positions.length; jj++) {
                        int curID = ids[jj];
                        int curPos = positions[jj];
                        int windowSize = curPos - startPos + 1;

                        if (!mMatchedIds.get(curID)) {
                          mMatchedIds.set(curID);
                          matchedIDCounts++;
                        }
                        // stop looking if we've exceeded the maximum window size
                        if (windowSize > mSize) {
                          break;
                        }
                        // did we match all the terms?
                        if (matchedIDCounts == 2) {
                          matches++;
                          break;
                        }
                      }
                    }
                  } else {
                    System.out.println("Invalid featureID " + featureID);
                    System.exit(-1);
                  }
                } // end if this is a match, i.e., both query terms are in the doc

//                float s = getScore(matches, docLen, RetrievalEnvironment.defaultCf,
//                    (float) RetrievalEnvironment.defaultDf, scoringFunctionName);
//                docScore_cascade += clique_wgts[j] * s;
               
                GlobalTermEvidence termEvidence = scoringFunction.getGlobalTermEvidence();
                termEvidence.cf = RetrievalEnvironment.defaultCf;
                termEvidence.df = RetrievalEnvironment.defaultDf;

                scoringFunction.initialize(termEvidence, scoringFunction.getGlobalEvidence());
                docScore_cascade += clique_wgts[j] * scoringFunction.getScore(matches, docLen);

              } // end else it's proximity feature
            } // end for (each concept)

            // accumulate doc score in results[i] across cascade stages
View Full Code Here

  public Accumulator[] rank(String qid, JSONObject query, int queryLength) {
    GlobalEvidence globalEvidence = new GlobalEvidence(env.getDocumentCount(), env.getCollectionSize(), queryLength);

    PostingsReaderWrapper structureReader;
    ScoringFunction scoringFunction = new BM25ScoringFunction();
    try {
      structureReader = new PostingsReaderWrapper(query, env, scoringFunction, globalEvidence);
    } catch (JSONException e) {
      e.printStackTrace();
      throw new RuntimeException(e);
View Full Code Here

TOP

Related Classes of ivory.smrf.model.score.ScoringFunction

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.