Package org.apache.solr.common.util

Examples of org.apache.solr.common.util.NamedList


   * @see #getFacetTermEnumCounts
   */
  public NamedList getFacetFieldCounts()
          throws IOException, ParseException {

    NamedList res = new SimpleOrderedMap();
    String[] facetFs = params.getParams(FacetParams.FACET_FIELD);
    if (null != facetFs) {
      for (String f : facetFs) {
        try {
          parseParams(FacetParams.FACET_FIELD, f);
          String termList = localParams == null ? null : localParams.get(CommonParams.TERMS);
          if (termList != null) {
            res.add(key, getListedTermCounts(facetValue, termList));
          } else {
            res.add(key, getTermCounts(facetValue));
          }
        } catch (Exception e) {
          String msg = "Exception during facet.field of " + f;
          SolrException.logOnce(SolrCore.log, msg, e);
          addException(msg , e);
View Full Code Here



  private NamedList getListedTermCounts(String field, String termList) throws IOException {
    FieldType ft = searcher.getSchema().getFieldType(field);
    List<String> terms = StrUtils.splitSmart(termList, ",", true);
    NamedList res = new NamedList();
    Term t = new Term(field);
    for (String term : terms) {
      String internal = ft.toInternal(term);
      int count = searcher.numDocs(new TermQuery(t.createTerm(internal)), base);
      res.add(term, count);
    }
    return res;   
  }
View Full Code Here

    // TODO: this function is too big and could use some refactoring, but
    // we also need a facet cache, and refactoring of SimpleFacets instead of
    // trying to pass all the various params around.

    FieldType ft = searcher.getSchema().getFieldType(fieldName);
    NamedList res = new NamedList();

    FieldCache.StringIndex si = FieldCache.DEFAULT.getStringIndex(searcher.getReader(), fieldName);
    final String[] terms = si.lookup;
    final int[] termNum = si.order;

    if (prefix!=null && prefix.length()==0) prefix=null;

    int startTermIndex, endTermIndex;
    if (prefix!=null) {
      startTermIndex = Arrays.binarySearch(terms,prefix,nullStrComparator);
      if (startTermIndex<0) startTermIndex=-startTermIndex-1;
      // find the end term.  \uffff isn't a legal unicode char, but only compareTo
      // is used, so it should be fine, and is guaranteed to be bigger than legal chars.
      endTermIndex = Arrays.binarySearch(terms,prefix+"\uffff\uffff\uffff\uffff",nullStrComparator);
      endTermIndex = -endTermIndex-1;
    } else {
      startTermIndex=1;
      endTermIndex=terms.length;
    }

    final int nTerms=endTermIndex-startTermIndex;

    if (nTerms>0 && docs.size() >= mincount) {

      // count collection array only needs to be as big as the number of terms we are
      // going to collect counts for.
      final int[] counts = new int[nTerms];

      DocIterator iter = docs.iterator();
      while (iter.hasNext()) {
        int term = termNum[iter.nextDoc()];
        int arrIdx = term-startTermIndex;
        if (arrIdx>=0 && arrIdx<nTerms) counts[arrIdx]++;
      }

      // IDEA: we could also maintain a count of "other"... everything that fell outside
      // of the top 'N'

      int off=offset;
      int lim=limit>=0 ? limit : Integer.MAX_VALUE;

      if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
        int maxsize = limit>0 ? offset+limit : Integer.MAX_VALUE-1;
        maxsize = Math.min(maxsize, nTerms);
        final BoundedTreeSet<CountPair<String,Integer>> queue = new BoundedTreeSet<CountPair<String,Integer>>(maxsize);
        int min=mincount-1// the smallest value in the top 'N' values
        for (int i=0; i<nTerms; i++) {
          int c = counts[i];
          if (c>min) {
            // NOTE: we use c>min rather than c>=min as an optimization because we are going in
            // index order, so we already know that the keys are ordered.  This can be very
            // important if a lot of the counts are repeated (like zero counts would be).
            queue.add(new CountPair<String,Integer>(terms[startTermIndex+i], c));
            if (queue.size()>=maxsize) min=queue.last().val;
          }
        }
        // now select the right page from the results
        for (CountPair<String,Integer> p : queue) {
          if (--off>=0) continue;
          if (--lim<0) break;
          res.add(ft.indexedToReadable(p.key), p.val);
        }
      } else {
        // add results in index order
        int i=0;
        if (mincount<=0) {
          // if mincount<=0, then we won't discard any terms and we know exactly
          // where to start.
          i=off;
          off=0;
        }

        for (; i<nTerms; i++) {         
          int c = counts[i];
          if (c<mincount || --off>=0) continue;
          if (--lim<0) break;
          res.add(ft.indexedToReadable(terms[startTermIndex+i]), c);
        }
      }
    }

    if (missing) {
      res.add(null, getFieldMissingCount(searcher,docs,fieldName));
    }
   
    return res;
  }
View Full Code Here

    IndexReader r = searcher.getReader();
    FieldType ft = schema.getFieldType(field);

    final int maxsize = limit>=0 ? offset+limit : Integer.MAX_VALUE-1;   
    final BoundedTreeSet<CountPair<String,Integer>> queue = (sort.equals("count") || sort.equals("true")) ? new BoundedTreeSet<CountPair<String,Integer>>(maxsize) : null;
    final NamedList res = new NamedList();

    int min=mincount-1// the smallest value in the top 'N' values   
    int off=offset;
    int lim=limit>=0 ? limit : Integer.MAX_VALUE;

    String startTerm = prefix==null ? "" : ft.toInternal(prefix);
    TermEnum te = r.terms(new Term(field,startTerm));
    TermDocs td = r.termDocs();
    SolrIndexSearcher.TermDocsState tdState = new SolrIndexSearcher.TermDocsState();
    tdState.tenum = te;
    tdState.tdocs = td;

    if (docs.size() >= mincount) {
    do {
      Term t = te.term();

      if (null == t || ! t.field().equals(field))
        break;

      if (prefix!=null && !t.text().startsWith(prefix)) break;

      int df = te.docFreq();

      // If we are sorting, we can use df>min (rather than >=) since we
      // are going in index order.  For certain term distributions this can
      // make a large difference (for example, many terms with df=1).
      if (df>0 && df>min) {
        int c;

        if (df >= minDfFilterCache) {
          // use the filter cache
          c = docs.intersectionSize( searcher.getPositiveDocSet(new TermQuery(t), tdState) );
        } else {
          // iterate over TermDocs to calculate the intersection
          td.seek(te);
          c=0;
          while (td.next()) {
            if (docs.exists(td.doc())) c++;
          }
        }

        if (sort.equals("count") || sort.equals("true")) {
          if (c>min) {
            queue.add(new CountPair<String,Integer>(t.text(), c));
            if (queue.size()>=maxsize) min=queue.last().val;
          }
        } else {
          if (c >= mincount && --off<0) {
            if (--lim<0) break;
            res.add(ft.indexedToReadable(t.text()), c);
          }
        }
      }
    } while (te.next());
    }

    if (sort.equals("count") || sort.equals("true")) {
      for (CountPair<String,Integer> p : queue) {
        if (--off>=0) continue;
        if (--lim<0) break;
        res.add(ft.indexedToReadable(p.key), p.val);
      }
    }

    if (missing) {
      res.add(null, getFieldMissingCount(searcher,docs,field));
    }

    te.close();
    td.close();   
View Full Code Here

   */
  @Deprecated
  public NamedList getFacetDateCounts()
    throws IOException, ParseException {

    final NamedList resOuter = new SimpleOrderedMap();
    final String[] fields = params.getParams(FacetParams.FACET_DATE);

    if (null == fields || 0 == fields.length) return resOuter;

    for (String f : fields) {
View Full Code Here

    parseParams(FacetParams.FACET_DATE, dateFacet);
    String f = facetValue;


    final NamedList resInner = new SimpleOrderedMap();
    resOuter.add(key, resInner);
    final SchemaField sf = schema.getField(f);
    if (! (sf.getType() instanceof DateField)) {
      throw new SolrException
          (SolrException.ErrorCode.BAD_REQUEST,
              "Can not date facet on a field which is not a DateField: " + f);
    }
    final DateField ft = (DateField) sf.getType();
    final String startS
        = required.getFieldParam(f,FacetParams.FACET_DATE_START);
    final Date start;
    try {
      start = ft.parseMath(NOW, startS);
    } catch (SolrException e) {
      throw new SolrException
          (SolrException.ErrorCode.BAD_REQUEST,
              "date facet 'start' is not a valid Date string: " + startS, e);
    }
    final String endS
        = required.getFieldParam(f,FacetParams.FACET_DATE_END);
    Date end; // not final, hardend may change this
    try {
      end = ft.parseMath(NOW, endS);
    } catch (SolrException e) {
      throw new SolrException
          (SolrException.ErrorCode.BAD_REQUEST,
              "date facet 'end' is not a valid Date string: " + endS, e);
    }

    if (end.before(start)) {
      throw new SolrException
          (SolrException.ErrorCode.BAD_REQUEST,
              "date facet 'end' comes before 'start': "+endS+" < "+startS);
    }

    final String gap = required.getFieldParam(f,FacetParams.FACET_DATE_GAP);
    final DateMathParser dmp = new DateMathParser(ft.UTC, Locale.US);
    dmp.setNow(NOW);

    final int minCount = params.getFieldInt(f,FacetParams.FACET_MINCOUNT, 0);

    String[] iStrs = params.getFieldParams(f,FacetParams.FACET_DATE_INCLUDE);
    // Legacy support for default of [lower,upper,edge] for date faceting
    // this is not handled by FacetRangeInclude.parseParam because
    // range faceting has differnet defaults
    final EnumSet<FacetRangeInclude> include =
      (null == iStrs || 0 == iStrs.length ) ?
      EnumSet.of(FacetRangeInclude.LOWER,
                 FacetRangeInclude.UPPER,
                 FacetRangeInclude.EDGE)
      : FacetRangeInclude.parseParam(iStrs);

    try {
      Date low = start;
      while (low.before(end)) {
        dmp.setNow(low);
        String label = ft.toExternal(low);

        Date high = dmp.parseMath(gap);
        if (end.before(high)) {
          if (params.getFieldBool(f,FacetParams.FACET_DATE_HARD_END,false)) {
            high = end;
          } else {
            end = high;
          }
        }
        if (high.before(low)) {
          throw new SolrException
              (SolrException.ErrorCode.BAD_REQUEST,
                  "date facet infinite loop (is gap negative?)");
        }
        final boolean includeLower =
            (include.contains(FacetRangeInclude.LOWER) ||
                (include.contains(FacetRangeInclude.EDGE) && low.equals(start)));
        final boolean includeUpper =
            (include.contains(FacetRangeInclude.UPPER) ||
                (include.contains(FacetRangeInclude.EDGE) && high.equals(end)));

        final int count = rangeCount(sf,low,high,includeLower,includeUpper);
        if (count >= minCount) {
          resInner.add(label, count);
        }
        low = high;
      }
    } catch (java.text.ParseException e) {
      throw new SolrException
          (SolrException.ErrorCode.BAD_REQUEST,
              "date facet 'gap' is not a valid Date Math string: " + gap, e);
    }

    // explicitly return the gap and end so all the counts
    // (including before/after/between) are meaningful - even if mincount
    // has removed the neighboring ranges
    resInner.add("gap", gap);
    resInner.add("start", start);
    resInner.add("end", end);

    final String[] othersP =
        params.getFieldParams(f,FacetParams.FACET_DATE_OTHER);
    if (null != othersP && 0 < othersP.length ) {
      final Set<FacetRangeOther> others = EnumSet.noneOf(FacetRangeOther.class);

      for (final String o : othersP) {
        others.add(FacetRangeOther.get(o));
      }

      // no matter what other values are listed, we don't do
      // anything if "none" is specified.
      if (! others.contains(FacetRangeOther.NONE) ) {
        boolean all = others.contains(FacetRangeOther.ALL);

        if (all || others.contains(FacetRangeOther.BEFORE)) {
          // include upper bound if "outer" or if first gap doesn't already include it
          resInner.add(FacetRangeOther.BEFORE.toString(),
              rangeCount(sf,null,start,
                  false,
                  (include.contains(FacetRangeInclude.OUTER) ||
                      (! (include.contains(FacetRangeInclude.LOWER) ||
                          include.contains(FacetRangeInclude.EDGE))))));
        }
        if (all || others.contains(FacetRangeOther.AFTER)) {
          // include lower bound if "outer" or if last gap doesn't already include it
          resInner.add(FacetRangeOther.AFTER.toString(),
              rangeCount(sf,end,null,
                  (include.contains(FacetRangeInclude.OUTER) ||
                      (! (include.contains(FacetRangeInclude.UPPER) ||
                          include.contains(FacetRangeInclude.EDGE)))),
                  false));
        }
        if (all || others.contains(FacetRangeOther.BETWEEN)) {
          resInner.add(FacetRangeOther.BETWEEN.toString(),
              rangeCount(sf,start,end,
                  (include.contains(FacetRangeInclude.LOWER) ||
                      include.contains(FacetRangeInclude.EDGE)),
                  (include.contains(FacetRangeInclude.UPPER) ||
                      include.contains(FacetRangeInclude.EDGE))));
View Full Code Here

   *
   * @see FacetParams#FACET_RANGE
   */

  public NamedList getFacetRangeCounts() {
    final NamedList resOuter = new SimpleOrderedMap();
    final String[] fields = params.getParams(FacetParams.FACET_RANGE);

    if (null == fields || 0 == fields.length) return resOuter;

    for (String f : fields) {
View Full Code Here

  private <T extends Comparable<T>> NamedList getFacetRangeCounts
    (final SchemaField sf,
     final RangeEndpointCalculator<T> calc) throws IOException {
   
    final String f = sf.getName();
    final NamedList res = new SimpleOrderedMap();
    final NamedList counts = new NamedList();
    res.add("counts", counts);

    final T start = calc.getValue(required.getFieldParam(f,FacetParams.FACET_RANGE_START));
    // not final, hardend may change this
    T end = calc.getValue(required.getFieldParam(f,FacetParams.FACET_RANGE_END));
    if (end.compareTo(start) < 0) {
      throw new SolrException
        (SolrException.ErrorCode.BAD_REQUEST,
         "range facet 'end' comes before 'start': "+end+" < "+start);
    }
   
    final String gap = required.getFieldParam(f, FacetParams.FACET_RANGE_GAP);
    // explicitly return the gap.  compute this early so we are more
    // likely to catch parse errors before attempting math
    res.add("gap", calc.getGap(gap));
   
    final int minCount = params.getFieldInt(f,FacetParams.FACET_MINCOUNT, 0);
   
    final EnumSet<FacetRangeInclude> include = FacetRangeInclude.parseParam
      (params.getFieldParams(f,FacetParams.FACET_RANGE_INCLUDE));
   
    T low = start;
   
    while (low.compareTo(end) < 0) {
      T high = calc.addGap(low, gap);
      if (end.compareTo(high) < 0) {
        if (params.getFieldBool(f,FacetParams.FACET_RANGE_HARD_END,false)) {
          high = end;
        } else {
          end = high;
        }
      }
      if (high.compareTo(low) < 0) {
        throw new SolrException
          (SolrException.ErrorCode.BAD_REQUEST,
           "range facet infinite loop (is gap negative? did the math overflow?)");
      }
     
      final boolean includeLower =
        (include.contains(FacetRangeInclude.LOWER) ||
         (include.contains(FacetRangeInclude.EDGE) &&
          0 == low.compareTo(start)));
      final boolean includeUpper =
        (include.contains(FacetRangeInclude.UPPER) ||
         (include.contains(FacetRangeInclude.EDGE) &&
          0 == high.compareTo(end)));
     
      final String lowS = calc.formatValue(low);
      final String highS = calc.formatValue(high);

      final int count = rangeCount(sf, lowS, highS,
                                   includeLower,includeUpper);
      if (count >= minCount) {
        counts.add(lowS, count);
      }
     
      low = high;
    }
   
View Full Code Here

    SolrParams params = rb.req.getParams();
    if (!params.getBool(COMPONENT_NAME, false)) {
      return;
    }

    NamedList termVectors = new NamedList();
    rb.rsp.add(TERM_VECTORS, termVectors);
    FieldOptions allFields = new FieldOptions();
    //figure out what options we have, and try to get the appropriate vector
    allFields.termFreq = params.getBool(TermVectorParams.TF, false);
    allFields.positions = params.getBool(TermVectorParams.POSITIONS, false);
    allFields.offsets = params.getBool(TermVectorParams.OFFSETS, false);
    allFields.docFreq = params.getBool(TermVectorParams.DF, false);
    allFields.tfIdf = params.getBool(TermVectorParams.TF_IDF, false);
    //boolean cacheIdf = params.getBool(TermVectorParams.IDF, false);
    //short cut to all values.
    boolean all = params.getBool(TermVectorParams.ALL, false);
    if (all == true) {
      allFields.termFreq = true;
      allFields.positions = true;
      allFields.offsets = true;
      allFields.docFreq = true;
      allFields.tfIdf = true;
    }

    String fldLst = params.get(TermVectorParams.FIELDS);
    if (fldLst == null) {
      fldLst = params.get(CommonParams.FL);
    }

    //use this to validate our fields
    IndexSchema schema = rb.req.getSchema();
    //Build up our per field mapping
    Map<String, FieldOptions> fieldOptions = new HashMap<String, FieldOptions>();
    NamedList warnings = new NamedList();
    List<String>  noTV = new ArrayList<String>();
    List<String>  noPos = new ArrayList<String>();
    List<String>  noOff = new ArrayList<String>();

    //we have specific fields to retrieve
    if (fldLst != null) {
      String [] fields = SolrPluginUtils.split(fldLst);
      for (String field : fields) {
        SchemaField sf = schema.getFieldOrNull(field);
        if (sf != null) {
          if (sf.storeTermVector()) {
            FieldOptions option = fieldOptions.get(field);
            if (option == null) {
              option = new FieldOptions();
              option.fieldName = field;
              fieldOptions.put(field, option);
            }
            //get the per field mappings
            option.termFreq = params.getFieldBool(field, TermVectorParams.TF, allFields.termFreq);
            option.docFreq = params.getFieldBool(field, TermVectorParams.DF, allFields.docFreq);
            option.tfIdf = params.getFieldBool(field, TermVectorParams.TF_IDF, allFields.tfIdf);
            //Validate these are even an option
            option.positions = params.getFieldBool(field, TermVectorParams.POSITIONS, allFields.positions);
            if (option.positions == true && sf.storeTermPositions() == false){
              noPos.add(field);
            }
            option.offsets = params.getFieldBool(field, TermVectorParams.OFFSETS, allFields.offsets);
            if (option.offsets == true && sf.storeTermOffsets() == false){
              noOff.add(field);
            }
          } else {//field doesn't have term vectors
            noTV.add(field);
          }
        } else {
          //field doesn't exist
          throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "undefined field: " + field);
        }
      }
    } //else, deal with all fields
    boolean hasWarnings = false;
    if (noTV.isEmpty() == false) {
      warnings.add("noTermVectors", noTV);
      hasWarnings = true;
    }
    if (noPos.isEmpty() == false) {
      warnings.add("noPositions", noPos);
      hasWarnings = true;
    }
    if (noOff.isEmpty() == false) {
      warnings.add("noOffsets", noOff);
      hasWarnings = true;
    }
    if (hasWarnings == true) {
      termVectors.add("warnings", warnings);
    }

    DocListAndSet listAndSet = rb.getResults();
    List<Integer> docIds = getInts(params.getParams(TermVectorParams.DOC_IDS));
    Iterator<Integer> iter;
    if (docIds != null && docIds.isEmpty() == false) {
      iter = docIds.iterator();
    } else {
      DocList list = listAndSet.docList;
      iter = list.iterator();
    }
    SolrIndexSearcher searcher = rb.req.getSearcher();

    IndexReader reader = searcher.getReader();
    //the TVMapper is a TermVectorMapper which can be used to optimize loading of Term Vectors
    SchemaField keyField = schema.getUniqueKeyField();
    String uniqFieldName = null;
    if (keyField != null) {
      uniqFieldName = keyField.getName();
    }
    //Only load the id field to get the uniqueKey of that field
    SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(Collections.singleton(uniqFieldName), Collections.<String>emptySet());
    TVMapper mapper = new TVMapper(reader);
    mapper.fieldOptions = allFields; //this will only stay set if fieldOptions.isEmpty() (in other words, only if the user didn't set any fields)
    while (iter.hasNext()) {
      Integer docId = iter.next();
      NamedList docNL = new NamedList();
      mapper.docNL = docNL;
      termVectors.add("doc-" + docId, docNL);

      if (keyField != null) {
        Document document = reader.document(docId, fieldSelector);
        Fieldable uniqId = document.getFieldable(uniqFieldName);
        String uniqVal = null;
        if (uniqId != null) {
          uniqVal = keyField.getType().storedToReadable(uniqId);         
        }
        if (uniqVal != null) {
          docNL.add("uniqueKey", uniqVal);
          termVectors.add("uniqueKeyFieldName", uniqFieldName);
        }
      }
      if (fieldOptions.isEmpty() == false) {
        for (Map.Entry<String, FieldOptions> entry : fieldOptions.entrySet()) {
View Full Code Here

  public NamedList getCounts(SolrIndexSearcher searcher, DocSet baseDocs, int offset, int limit, Integer mincount, boolean missing, String sort, String prefix) throws IOException {
    use.incrementAndGet();

    FieldType ft = searcher.getSchema().getFieldType(field);

    NamedList res = new NamedList()// order is important

    DocSet docs = baseDocs;
    int baseSize = docs.size();
    int maxDoc = searcher.maxDoc();

    if (baseSize >= mincount) {

      final int[] index = this.index;
      final int[] counts = new int[numTermsInField];

      //
      // If there is prefix, find it's start and end term numbers
      //
      int startTerm = 0;
      int endTerm = numTermsInField;  // one past the end

      NumberedTermEnum te = ti.getEnumerator(searcher.getReader());
      if (prefix != null && prefix.length() > 0) {
        te.skipTo(prefix);
        startTerm = te.getTermNumber();
        te.skipTo(prefix + "\uffff\uffff\uffff\uffff");
        endTerm = te.getTermNumber();
      }

      /***********
      // Alternative 2: get the docSet of the prefix (could take a while) and
      // then do the intersection with the baseDocSet first.
      if (prefix != null && prefix.length() > 0) {
        docs = searcher.getDocSet(new ConstantScorePrefixQuery(new Term(field, ft.toInternal(prefix))), docs);
        // The issue with this method are problems of returning 0 counts for terms w/o
        // the prefix.  We can't just filter out those terms later because it may
        // mean that we didn't collect enough terms in the queue (in the sorted case).
      }
      ***********/

      boolean doNegative = baseSize > maxDoc >> 1 && termInstances > 0
              && startTerm==0 && endTerm==numTermsInField
              && docs instanceof BitDocSet;

      if (doNegative) {
        OpenBitSet bs = (OpenBitSet)((BitDocSet)docs).getBits().clone();
        bs.flip(0, maxDoc);
        // TODO: when iterator across negative elements is available, use that
        // instead of creating a new bitset and inverting.
        docs = new BitDocSet(bs, maxDoc - baseSize);
        // simply negating will mean that we have deleted docs in the set.
        // that should be OK, as their entries in our table should be empty.
      }

      // For the biggest terms, do straight set intersections
      for (TopTerm tt : bigTerms.values()) {
        // TODO: counts could be deferred if sorted==false
        if (tt.termNum >= startTerm && tt.termNum < endTerm) {
          counts[tt.termNum] = searcher.numDocs(new TermQuery(tt.term), docs);
        }
      }

      // TODO: we could short-circuit counting altogether for sorted faceting
      // where we already have enough terms from the bigTerms

      // TODO: we could shrink the size of the collection array, and
      // additionally break when the termNumber got above endTerm, but
      // it would require two extra conditionals in the inner loop (although
      // they would be predictable for the non-prefix case).
      // Perhaps a different copy of the code would be warranted.

      if (termInstances > 0) {
        DocIterator iter = docs.iterator();
        while (iter.hasNext()) {
          int doc = iter.nextDoc();
          int code = index[doc];

          if ((code & 0xff)==1) {
            int pos = code>>>8;
            int whichArray = (doc >>> 16) & 0xff;
            byte[] arr = tnums[whichArray];
            int tnum = 0;
            for(;;) {
              int delta = 0;
              for(;;) {
                byte b = arr[pos++];
                delta = (delta << 7) | (b & 0x7f);
                if ((b & 0x80) == 0) break;
              }
              if (delta == 0) break;
              tnum += delta - TNUM_OFFSET;
              counts[tnum]++;
            }
          } else {
            int tnum = 0;
            int delta = 0;
            for (;;) {
              delta = (delta << 7) | (code & 0x7f);
              if ((code & 0x80)==0) {
                if (delta==0) break;
                tnum += delta - TNUM_OFFSET;
                counts[tnum]++;
                delta = 0;
              }
              code >>>= 8;
            }
          }
        }
      }

      int off=offset;
      int lim=limit>=0 ? limit : Integer.MAX_VALUE;

      if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
        int maxsize = limit>0 ? offset+limit : Integer.MAX_VALUE-1;
        maxsize = Math.min(maxsize, numTermsInField);
        final BoundedTreeSet<Long> queue = new BoundedTreeSet<Long>(maxsize);
        int min=mincount-1// the smallest value in the top 'N' values
        for (int i=startTerm; i<endTerm; i++) {
          int c = doNegative ? maxTermCounts[i] - counts[i] : counts[i];
          if (c>min) {
            // NOTE: we use c>min rather than c>=min as an optimization because we are going in
            // index order, so we already know that the keys are ordered.  This can be very
            // important if a lot of the counts are repeated (like zero counts would be).

            // minimize object creation and speed comparison by creating a long that
            // encompasses both count and term number.
            // Since smaller values are kept in the TreeSet, make higher counts smaller.
            //
            //   for equal counts, lower term numbers
            // should come first and hence be "greater"

            //long pair = (((long)c)<<32) | (0x7fffffff-i) ;   // use if priority queue
            long pair = (((long)-c)<<32) | i;
            queue.add(new Long(pair));
            if (queue.size()>=maxsize) min=-(int)(queue.last().longValue() >>> 32);
          }
        }

        final int[] tnums = new int[Math.min(Math.max(0, queue.size()-off), lim)];
        final int[] indirect = counts;  // reuse the counts array for the index into the tnums array
        assert indirect.length >= tnums.length;
       
        int tnumCount = 0;

        for (Long p : queue) {
          if (--off>=0) continue;
          if (--lim<0) break;
          int c = -(int)(p.longValue() >>> 32);
          //int tnum = 0x7fffffff - (int)p.longValue();  // use if priority queue
          int tnum = (int)p.longValue();
          indirect[tnumCount] = tnumCount;
          tnums[tnumCount++] = tnum;
          // String label = ft.indexedToReadable(getTermText(te, tnum));
          // add a null label for now... we'll fill it in later.
          res.add(null, c);
        }

        // now sort the indexes by the term numbers
        PrimUtils.sort(0, tnumCount, indirect, new PrimUtils.IntComparator() {
          @Override
          public int compare(int a, int b) {
            return tnums[a] - tnums[b];
          }
        });

        // convert the term numbers to term values and set as the label
        for (int i=0; i<tnumCount; i++) {
          int idx = indirect[i];
          int tnum = tnums[idx];
          String label = ft.indexedToReadable(getTermText(te, tnum));         
          res.setName(idx, label);
        }

      } else {
        // add results in index order
        int i=startTerm;
        if (mincount<=0) {
          // if mincount<=0, then we won't discard any terms and we know exactly
          // where to start.
          i=startTerm+off;
          off=0;
        }

        for (; i<endTerm; i++) {
          int c = doNegative ? maxTermCounts[i] - counts[i] : counts[i];
          if (c<mincount || --off>=0) continue;
          if (--lim<0) break;

          String label = ft.indexedToReadable(getTermText(te, i));
          res.add(label, c);
        }
      }

      te.close();
    }


    if (missing) {
      // TODO: a faster solution for this?
      res.add(null, SimpleFacets.getFieldMissingCount(searcher, baseDocs, field));
    }

    return res;
  }
View Full Code Here

TOP

Related Classes of org.apache.solr.common.util.NamedList

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.