Examples of ProgressLogger


Examples of htsjdk.samtools.util.ProgressLogger

        // Write the records to the output file in specified sorted order,
        header.setSortOrder(this.sortOrder);
        final boolean presorted = this.sortOrder == SortOrder.coordinate;
        final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(header, presorted, this.targetBamFile);
      writer.setProgressLogger(
          new ProgressLogger(log, (int) 1e7, "Wrote", "records from a sorting collection"));
        final ProgressLogger finalProgress = new ProgressLogger(log, 10000000, "Written in coordinate order to output", "records");

        for (final SAMRecord rec : sorted) {
            if (!rec.getReadUnmappedFlag()) {
                if (refSeq != null) {
                    final byte[] referenceBases = refSeq.get(sequenceDictionary.getSequenceIndex(rec.getReferenceName())).getBases();
                    rec.setAttribute(SAMTag.NM.name(), SequenceUtil.calculateSamNmTag(rec, referenceBases, 0, bisulfiteSequence));

                    if (rec.getBaseQualities() != SAMRecord.NULL_QUALS) {
                        rec.setAttribute(SAMTag.UQ.name(), SequenceUtil.sumQualitiesOfMismatches(rec, referenceBases, 0, bisulfiteSequence));
                    }
                }
            }
            writer.addAlignment(rec);
            finalProgress.record(rec);
        }
        writer.close();
        sorted.cleanup();

        log.info("Wrote " + aligned + " alignment records and " + (alignedReadsOnly ? 0 : unmapped) + " unmapped reads.");
View Full Code Here

Examples of it.cnr.isti.hpc.log.ProgressLogger

  @SuppressWarnings("unchecked")
  public static void main(String[] args) {

    GetTitlesCLI cli = new GetTitlesCLI(args);
    ProgressLogger pl = new ProgressLogger("dumped {} titles", 10000);
    cli.openOutput();
    RecordReader<Article> reader = new RecordReader<Article>(
        cli.getInput(), new JsonRecordParser<Article>(Article.class));

    reader.filter(ShortTitleFilter.FEWER_THAN_THREE,
        TypeFilter.MAIN_CATEGORY_TEMPLATE,
        RedirectFilter.FILTER_OUT_REDIRECTS);

    for (Article a : reader) {
      pl.up();
      cli.writeLineInOutput(a.getTitleInWikistyle());

    }
    cli.closeOutput();
  }
View Full Code Here

Examples of it.cnr.isti.hpc.log.ProgressLogger

  }

  public static void main(String[] args) {
    JsonToLineCLI cli = new JsonToLineCLI(args);
    String format = cli.getParam("format");
    ProgressLogger pl = new ProgressLogger("dumped {} articles ", 100000);
    RecordReader<Article> reader = new RecordReader<Article>(
        cli.getInput(), new JsonRecordParser<Article>(Article.class));
    cli.openOutput();

    for (Article a : reader) {
      pl.up();
      for (int i = 0; i < format.length(); i++) {
        char c = format.charAt(i);
        if (c == 'w') {
          cli.writeInOutput(a.getWikiTitle());
          continue;
View Full Code Here

Examples of it.cnr.isti.hpc.log.ProgressLogger

  }

  public static void main(String[] args) {

    GetDumpSummaryCLI cli = new GetDumpSummaryCLI(args);
    ProgressLogger pl = new ProgressLogger("dumped {} titles", 10000);
    cli.openOutput();
    RecordReader<Article> reader = new RecordReader<Article>(
        cli.getInput(), new JsonRecordParser<Article>(Article.class));
    ArticleSummarizer summarizer = new ArticleSummarizer();

    for (Article a : reader) {
      pl.up();
      cli.writeInOutput(a.getTypeName());
      cli.writeInOutput(TAB);
      cli.writeInOutput(String.valueOf(a.getWikiId()));
      cli.writeInOutput(TAB);
      cli.writeInOutput(a.getWikiTitle());
View Full Code Here

Examples of it.cnr.isti.hpc.log.ProgressLogger

    cli.openOutput();
   
    RecordReader<Article> reader = new RecordReader<Article>(cli.getInput(),
        new JsonRecordParser<Article>(Article.class));

    ProgressLogger pl = new ProgressLogger("processed {} articles",100000);

    for (Article a : reader) {
      pl.up();
      if (a.getTitleInWikistyle() == null) {
        logger.warn("current title is null, ignoring ",
            a.getTitleInWikistyle());
        continue;
      }
View Full Code Here

Examples of it.unimi.dsi.logging.ProgressLogger

  protected abstract int combine( int numUsedIndices ) throws IOException;
 
 
  public void run() throws ConfigurationException, IOException {
    final Logger logger = Util.getLogger( this.getClass() );
    final ProgressLogger pl = new ProgressLogger( logger, logInterval );
    pl.displayFreeMemory = true;

    final int maxDocSize;

    if ( writeSizes ) {
      logger.info( "Combining sizes..." );
      final OutputBitStream sizesOutputBitStream = new OutputBitStream( outputBasename + DiskBasedIndex.SIZES_EXTENSION, bufferSize );
      maxDocSize = combineSizes( sizesOutputBitStream );
      sizesOutputBitStream.close();
      logger.info( "Sizes combined." );
    }
    else maxDocSize = -1;
   
    // To write the global count of each term
    final OutputBitStream outputGlobCounts = writeGlobCounts ? new OutputBitStream( outputBasename + DiskBasedIndex.GLOBCOUNTS_EXTENSION ) : null;
    // To write the frequency of each term
    final OutputBitStream frequencies = new OutputBitStream( outputBasename + DiskBasedIndex.FREQUENCIES_EXTENSION );
    // To write the new term list
    final PrintWriter termFile = new PrintWriter( new BufferedWriter( new OutputStreamWriter( new FileOutputStream( outputBasename + DiskBasedIndex.TERMS_EXTENSION ), "UTF-8" ), bufferSize ) );
   
    // The current term
    MutableString currTerm;
   
    // Total number of pointers and occurrences
    long numPointers = 0;
   
    pl.expectedUpdates = writeGlobCounts ? numberOfOccurrences : -1;
    pl.itemsName = writeGlobCounts ? "occurrences" : "terms";
    pl.logInterval = logInterval;
    pl.start( "Combining lists..." );

    int totalFrequency, numTerms = 0, numUsedIndices, k;
    long totalGlobCount = 0;
    predictedSize = -1;
    predictedLengthNumBits = -1;
   
    // Discard first zero from offsets
    if ( p != 0 ) for( InputBitStream ibs: offsets ) ibs.readGamma();
   
    // TODO: use the front of the queue?
    while( ! termQueue.isEmpty() ) {
      numUsedIndices = 0;
      // We read a new word from the queue, copy it and write it to the term file
      currTerm = term[ k = usedIndex[ numUsedIndices++ ] = termQueue.first() ].copy();
     
      if ( DEBUG ) System.err.println( "Merging term " + currTerm );
     
      currTerm.println( termFile );
      if ( termReader[ k ].readLine( term[ k ] ) == null ) termQueue.dequeue();
      else termQueue.changed();
     
      // Then, we extract all equal words from the queue, accumulating the set of indices in inIndex and currIndex
      while( ! termQueue.isEmpty() && term[ termQueue.first() ].equals( currTerm ) ) {
        k = usedIndex[ numUsedIndices++ ] = termQueue.first();
        if ( termReader[ k ].readLine( term[ k ] ) == null ) termQueue.dequeue();
        else termQueue.changed();
      }
     
      if ( numUsedIndices > 1 ) Arrays.sort( usedIndex, 0, numUsedIndices );

      // Load index iterators
      for( int i = numUsedIndices; i-- != 0; ) indexIterator[ usedIndex[ i ] ] = indexReader[ usedIndex[ i ] ].nextIterator();

      numTerms++;

      if ( writeGlobCounts ) {
        // Compute and write the total global count. This works for all kind of indices.
        totalGlobCount = 0;
        for( int i = 0; i < numUsedIndices; i++ ) totalGlobCount += globCounts[ usedIndex[ i ] ].readLongGamma();
        outputGlobCounts.writeLongGamma( totalGlobCount );
      }
           
      if ( p != 0 ) {
        predictedSize = 0;
        predictedLengthNumBits = 0;

        for( int i = numUsedIndices; i-- != 0; ) {

          if ( index[ usedIndex[ i ] ] instanceof BitStreamHPIndex ) {
            predictedSize += offsets[ usedIndex[ i ] ].readLongGamma();
            if ( hasPositions ) predictedLengthNumBits += posNumBits[ usedIndex[ i ] ].readLongGamma();
          }
          else {
            // Interleaved index: we must subtract the number of bits used for positions from the length of the overall inverted list
            final long t = hasPositions ? posNumBits[ usedIndex[ i ] ].readLongGamma() : 0;
            predictedSize += offsets[ usedIndex[ i ] ].readLongGamma() - t;
            predictedLengthNumBits += t;
          }
        }
      }
           
      totalFrequency = combine( numUsedIndices );
      frequencies.writeGamma( totalFrequency );
      numPointers += totalFrequency;

      /* A trick to get a correct prediction. */
      if ( writeGlobCounts ) pl.count += totalGlobCount - 1;
      pl.update();
    }
    pl.done();
   
    if ( writeGlobCounts ) outputGlobCounts.close();
    termFile.close();
    frequencies.close();

View Full Code Here

Examples of it.unimi.dsi.logging.ProgressLogger

    localBasename = new String[ numIndices ];
    for( int i = 0; i < numIndices; i++ ) localBasename[ i ] = outputBasename + "-" + i;
  }
 
  public void runTermsOnly() throws IOException {
    final ProgressLogger pl = new ProgressLogger( LOGGER, logInterval );
   
    final PrintWriter[] localTerms = new PrintWriter[ numIndices ];
    final int numTerms[] = new int[ numIndices ];
    final FastBufferedReader terms = new FastBufferedReader( new InputStreamReader( new FileInputStream( inputBasename + DiskBasedIndex.TERMS_EXTENSION ), "UTF-8" ) );
   
    for( int i = 0; i < numIndices; i++ ) localTerms[ i ] = new PrintWriter( new OutputStreamWriter( new FastBufferedOutputStream( new FileOutputStream( localBasename[ i ] + DiskBasedIndex.TERMS_EXTENSION ) ), "UTF-8" ) );

    // The current term
    final MutableString currTerm = new MutableString();
   
    pl.itemsName = "terms";
    pl.logInterval = logInterval;
    pl.start( "Partitioning index terms..." );

    int termNumber = 0, k;
   
    while( terms.readLine( currTerm ) != null ) {
      k = strategy.localIndex( termNumber ); // The local index for this term
      if ( numTerms[ k ] != strategy.localNumber( termNumber ) ) throw new IllegalStateException();
      numTerms[ k ]++;
      currTerm.println( localTerms[ k ] );
      pl.update();
      termNumber++;
    }

    terms.close();
    for( int i = 0; i < numIndices; i++ ) localTerms[ i ].close();

    pl.done();
  }
View Full Code Here

Examples of it.unimi.dsi.logging.ProgressLogger

    pl.done();
  }
 
  public void run() throws ConfigurationException, IOException, ClassNotFoundException {
    final ProgressLogger pl = new ProgressLogger( LOGGER, logInterval );
    final byte[] buffer = new byte[ bufferSize ];
   
    final OutputBitStream[] localIndexStream = new OutputBitStream[ numIndices ];
    final OutputBitStream[] localPositionsStream = new OutputBitStream[ numIndices ];
    final OutputBitStream[] localOffsets = new OutputBitStream[ numIndices ];
    final OutputBitStream[] localPosNumBits = new OutputBitStream[ numIndices ];
    final OutputBitStream[] localFrequencies = new OutputBitStream[ numIndices ];
    final OutputBitStream[] localGlobCounts = new OutputBitStream[ numIndices ];
    final PrintWriter[] localTerms = new PrintWriter[ numIndices ];
    final int numTerms[] = new int[ numIndices ];
    final long numberOfOccurrences[] = new long[ numIndices ];
    final long numberOfPostings[] = new long[ numIndices ];
   
    final boolean isHighPerformance = BitStreamHPIndex.class.isAssignableFrom( Class.forName( new Properties( inputBasename + DiskBasedIndex.PROPERTIES_EXTENSION ).getString( Index.PropertyKeys.INDEXCLASS ) ) );
   
    final InputBitStream globalIndex = new InputBitStream( inputBasename + DiskBasedIndex.INDEX_EXTENSION, bufferSize );
    final long globalPositionsLength = new File( inputBasename + DiskBasedIndex.POSITIONS_EXTENSION ).length();
    final InputBitStream globalPositions = isHighPerformance ? new InputBitStream( inputBasename + DiskBasedIndex.POSITIONS_EXTENSION, bufferSize ) : null;
    final FastBufferedReader terms = new FastBufferedReader( new InputStreamReader( new FileInputStream( inputBasename + DiskBasedIndex.TERMS_EXTENSION ), "UTF-8" ) );
    final InputBitStream offsets = new InputBitStream( inputBasename + DiskBasedIndex.OFFSETS_EXTENSION );
   
    final File posNumBitsFile = new File( inputBasename + DiskBasedIndex.POSITIONS_NUMBER_OF_BITS_EXTENSION );
    final InputBitStream posNumBits = posNumBitsFile.exists() ? new InputBitStream( inputBasename + DiskBasedIndex.POSITIONS_NUMBER_OF_BITS_EXTENSION ) : null;
    final InputBitStream frequencies = new InputBitStream( inputBasename + DiskBasedIndex.FREQUENCIES_EXTENSION );
    final InputBitStream globCounts = new InputBitStream( inputBasename + DiskBasedIndex.GLOBCOUNTS_EXTENSION );
    offsets.readGamma();
   
    for( int i = 0; i < numIndices; i++ ) {
      localIndexStream[ i ] = new OutputBitStream( localBasename[ i ] + DiskBasedIndex.INDEX_EXTENSION, bufferSize );
      if ( isHighPerformance ) localPositionsStream[ i ] = new OutputBitStream( localBasename[ i ] + DiskBasedIndex.POSITIONS_EXTENSION, bufferSize );
      localFrequencies[ i ] = new OutputBitStream( localBasename[ i ] + DiskBasedIndex.FREQUENCIES_EXTENSION );
      localGlobCounts[ i ] = new OutputBitStream( localBasename[ i ] + DiskBasedIndex.GLOBCOUNTS_EXTENSION );
      localTerms[ i ] = new PrintWriter( new OutputStreamWriter( new FastBufferedOutputStream( new FileOutputStream( localBasename[ i ] + DiskBasedIndex.TERMS_EXTENSION ) ), "UTF-8" ) );
      localOffsets[ i ] = new OutputBitStream( localBasename[ i ] + DiskBasedIndex.OFFSETS_EXTENSION );
      if ( posNumBits != null ) localPosNumBits[ i ] = new OutputBitStream( localBasename[ i ] + DiskBasedIndex.POSITIONS_NUMBER_OF_BITS_EXTENSION );
      localOffsets[ i ].writeGamma( 0 );
    }

    // The current term
    final MutableString currTerm = new MutableString();
   
    pl.expectedUpdates = ( new File( inputBasename + DiskBasedIndex.INDEX_EXTENSION ).length() + ( isHighPerformance ? new File( inputBasename + DiskBasedIndex.POSITIONS_EXTENSION ).length() : 0 ) ) * 8;
    pl.itemsName = "bits";
    pl.logInterval = logInterval;
    pl.start( "Partitioning index..." );

    int termNumber = 0, k, prevK = -1, previousHeaderLength = 0, newHeaderLength = 0;
    long length, count, positionsOffset = 0;
    int res, frequency;
   
    while( terms.readLine( currTerm ) != null ) {
      k = strategy.localIndex( termNumber ); // The local index for this term
      if ( numTerms[ k ] != strategy.localNumber( termNumber ) ) throw new IllegalStateException();
      numTerms[ k ]++;
     
      if ( isHighPerformance ) {
        final long temp = globalIndex.readBits();
        positionsOffset = globalIndex.readLongDelta();
        previousHeaderLength = (int)( globalIndex.readBits() - temp );
        if ( prevK != -1 ) {
          length = positionsOffset - globalPositions.readBits();
          pl.count += length;
          while( length > 0 ) {
            res = (int)Math.min( bufferSize * 8, length );
            globalPositions.read( buffer, res );
            localPositionsStream[ prevK ].write( buffer, res );
            length -= res;
          }
        }
        newHeaderLength = localIndexStream[ k ].writeLongDelta( localPositionsStream[ k ].writtenBits() );
      }
     
     
      frequency = frequencies.readGamma();
      localFrequencies[ k ].writeGamma( frequency );
      numberOfPostings[ k ] += frequency;

      if ( posNumBits != null ) localPosNumBits[ k ].writeGamma( posNumBits.readGamma() );
     
      count = globCounts.readLongGamma();
      numberOfOccurrences[ k ] += count;
      localGlobCounts[ k ].writeLongGamma( count );
     
      currTerm.println( localTerms[ k ] );
     
      length = offsets.readLongGamma() - previousHeaderLength;
      localOffsets[ k ].writeLongGamma( length + newHeaderLength );
      pl.count += length + previousHeaderLength - 1;
     
      while( length > 0 ) {
        res = (int)Math.min( bufferSize * 8, length );
        globalIndex.read( buffer, res );
        localIndexStream[ k ].write( buffer, res );
        length -= res;
      }
     
      pl.update();
      prevK = k;
      termNumber++;
    }

    // We pour the last piece of positions
    if ( isHighPerformance ) {
      if ( prevK != -1 ) {
        length = globalPositionsLength * 8 - globalPositions.readBits();
        System.err.println( globalPositionsLength * 8 - globalPositions.readBits() );
        while( length > 0 ) {
          res = (int)Math.min( bufferSize * 8, length );
          globalPositions.read( buffer, res );
          localPositionsStream[ prevK ].write( buffer, res );
          length -= res;
        }
      }
    }

    pl.done();

    terms.close();
    offsets.close();
    frequencies.close();
    globCounts.close();
View Full Code Here

Examples of it.unimi.dsi.logging.ProgressLogger

    final Scan[] scan = new Scan[ numberOfIndexedFields ]; // To scan textual content
    final PayloadAccumulator[] accumulator = new PayloadAccumulator[ numberOfIndexedFields ]; // To accumulate
    // document data

    final ProgressLogger pl = new ProgressLogger( LOGGER, logInterval, "documents" );
    if ( documentSequence instanceof DocumentCollection ) pl.expectedUpdates = ( (DocumentCollection)documentSequence ).size();
 
   
    for ( int i = 0; i < numberOfIndexedFields; i++ ) {
      final String fieldName = factory.fieldName( indexedField[ i ] );
      switch ( factory.fieldType( indexedField[ i ] ) ) {
      case TEXT:
        scan[ i ] = new Scan( basename + '-' + fieldName, fieldName, completeness, termProcessor, map != null ? IndexingType.REMAPPED
            : IndexingType.STANDARD, 0, 0, bufferSize, builder, tempDir );
        break;
      case VIRTUAL:
        scan[ i ] = new Scan( basename + '-' + fieldName, fieldName, completeness, termProcessor, IndexingType.VIRTUAL,
            virtualDocumentResolver[ i ].numberOfDocuments(), virtualGap[ i ], bufferSize, builder, tempDir );
        break;

      case DATE:
        accumulator[ i ] = new PayloadAccumulator( basename + '-' + fieldName, new DatePayload(), fieldName,
            map != null ? IndexingType.REMAPPED : IndexingType.STANDARD, documentsPerBatch, tempDir );
        break;
      case INT:
        accumulator[ i ] = new PayloadAccumulator( basename + '-' + fieldName, new IntegerPayload(), fieldName,
            map != null ? IndexingType.REMAPPED : IndexingType.STANDARD, documentsPerBatch, tempDir );
        break;
      default:

      }
    }

    if ( building ) builder.open( "@0" ); // First batch
   
    pl.displayFreeMemory = true;
    pl.start( "Indexing documents..." );

    DocumentIterator iterator = documentSequence.iterator();
    Reader reader;
    WordReader wordReader;
    ObjectList<VirtualDocumentFragment> fragments;
    Document document;

    int documentPointer = 0, documentsInBatch = 0;
    long batchStartTime = System.currentTimeMillis();
    boolean outOfMemoryError = false;

    while ( ( document = iterator.nextDocument() ) != null ) {
     
      long overallTerms = 0;
      if ( building ) builder.startDocument( document.title(), document.uri() );
      for ( int i = 0; i < numberOfIndexedFields; i++ ) {
        switch ( factory.fieldType( indexedField[ i ] ) ) {
        case TEXT:
          reader = (Reader)document.content( indexedField[ i ] );
          wordReader = document.wordReader( indexedField[ i ] );
          wordReader.setReader( reader );
          if ( building ) builder.startTextField();
          scan[ i ].processDocument( map != null ? map[ documentPointer ] : documentPointer, wordReader );
          if ( building ) builder.endTextField();
          overallTerms += scan[ i ].numTerms;
          break;
        case VIRTUAL:
          fragments = (ObjectList<VirtualDocumentFragment>)document.content( indexedField[ i ] );
          wordReader = document.wordReader( indexedField[ i ] );
          virtualDocumentResolver[ i ].context( document );
          for( VirtualDocumentFragment fragment: fragments ) {
            int virtualDocumentPointer = virtualDocumentResolver[ i ].resolve( fragment.documentSpecifier() );
            if ( virtualDocumentPointer < 0 ) continue;
            if ( map != null ) virtualDocumentPointer = map[ virtualDocumentPointer ];
            wordReader.setReader( new FastBufferedReader( fragment.text() ) );
            scan[ i ].processDocument( virtualDocumentPointer, wordReader );
          }
          if ( building ) builder.virtualField( fragments );
          overallTerms += scan[ i ].numTerms;
          break;
        default:
          Object o = document.content( indexedField[ i ] );
          accumulator[ i ].processData( map != null ? map[ documentPointer ] : documentPointer, o );
          if ( building ) builder.nonTextField( o );
          break;
        }

        if ( scan[ i ] != null && scan[ i ].outOfMemoryError ) outOfMemoryError = true;
      }
      if ( building ) builder.endDocument();
      documentPointer++;
      documentsInBatch++;
      document.close();
      pl.update();

      // We try compaction if we detect less than PERC_AVAILABLE_MEMORY_CHECK memory available
      long percAvailableMemory = Util.percAvailableMemory();
      boolean compacted = false;
      if ( ! outOfMemoryError && percAvailableMemory < PERC_AVAILABLE_MEMORY_CHECK ) {
        LOGGER.info( "Starting compaction... (" + percAvailableMemory + "% available)" );
        compacted = true;
        Util.compactMemory();
        percAvailableMemory = Util.percAvailableMemory();
        LOGGER.info( "Compaction completed (" + percAvailableMemory + "% available)" );
      }
     
      if ( outOfMemoryError || overallTerms >= maxTerms || documentsInBatch == documentsPerBatch || ( compacted && percAvailableMemory < PERC_AVAILABLE_MEMORY_DUMP ) ) {
        if ( outOfMemoryError ) LOGGER.warn( "OutOfMemoryError during buffer reallocation: writing a batch of " + documentsInBatch + " documents" );
        else if ( overallTerms >= maxTerms ) LOGGER.warn( "Too many terms (" + overallTerms + "): writing a batch of " + documentsInBatch + " documents" );
        else if ( compacted && percAvailableMemory < PERC_AVAILABLE_MEMORY_DUMP ) LOGGER.warn( "Available memory below " + PERC_AVAILABLE_MEMORY_DUMP + "%: writing a batch of " + documentsInBatch + " documents" );

        long occurrences = 0;
        for ( int i = 0; i < numberOfIndexedFields; i++ ) {
          switch ( factory.fieldType( indexedField[ i ] ) ) {
          case TEXT:
          case VIRTUAL:
            occurrences += scan[ i ].dumpBatch();
            scan[ i ].openSizeBitStream();
            break;
          default:
            accumulator[ i ].writeData();
          }
        }
       
        if ( building ) {
          builder.close();
          builder.open( "@" + scan[ 0 ].batch );
        }

        LOGGER.info( "Last set of batches indexed at " + Util.format( ( 1000. * occurrences ) / ( System.currentTimeMillis() - batchStartTime ) ) + " occurrences/s" );
        batchStartTime = System.currentTimeMillis();
        documentsInBatch = 0;
        outOfMemoryError = false;
      }
    }

    iterator.close();
    if ( builder != null ) builder.close();

    for ( int i = 0; i < numberOfIndexedFields; i++ ) {
      switch ( factory.fieldType( indexedField[ i ] ) ) {
      case TEXT:
      case VIRTUAL:
        scan[ i ].close();
        break;
      default:
        accumulator[ i ].close();
        break;
      }

    }

    documentSequence.close();
   
    pl.done();

    if ( building ) {
      final String name = new File( builder.basename() ).getName();
      final String[] collectionName = new String[ scan[ 0 ].batch ];
      for ( int i = scan[ 0 ].batch; i-- != 0; ) collectionName[ i ] = name + "@" + i + DocumentCollection.DEFAULT_EXTENSION;
View Full Code Here

Examples of it.unimi.dsi.logging.ProgressLogger

    int words[] = new int[ 1024 ];
    final FastBufferedReader reader = new FastBufferedReader( new InputStreamReader( System.in, "UTF-8" ) );
   
    int lineNumber = 0;
    int numberOfPartialQueries = queries;
    ProgressLogger pl = new ProgressLogger( LOGGER );
    pl.itemsName = "Klines";
    pl.expectedUpdates = maxDoc / 1000;
    pl.start( "Generating queries..." );
    MutableString line = new MutableString();
    while( reader.readLine( line ) != null && numberOfPartialQueries > 0 ) {
      if ( used[ lineNumber ] ) {
        for ( q = 0; q < queries; q++ )
          if ( coveredForQuery[ q ] < docperquery && docForQuery[ q ][ coveredForQuery[ q ] ] == lineNumber ) {
            split = line.toString().split( " " );
            int nw = split.length;
            words = IntArrays.ensureCapacity( words, nw + 1 );
            for ( i = 0; i < nw; i++ ) words[ i ] = i;
            for ( i = 0; i < Math.min( wordsperdoc, nw ); i++ ) {
              j = i + (int)( ( nw - i ) * Math.random() );
              t = words[ i ]; words[ i ] = words[ j ]; words[ j ] = t;
              query[ q ][ coveredForQuery[ q ] ][ i ] = split[ words[ i ] ];
            }
            coveredForQuery[ q ]++;
            if ( coveredForQuery[ q ] == docperquery ) numberOfPartialQueries--;
          }
      }
      lineNumber++;
      if ( lineNumber % 1000 == 0 ) pl.update();
    }
    pl.done();

    MutableString p[] = new MutableString[ Math.max( queries, wordsperdoc ) ], s = new MutableString();
    for( i = 0; i < p.length; i++ ) p[ i ] = new MutableString();

    for ( q = 0; q < queries; q++ ) {
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.