Package it.unimi.dsi.mg4j.query

Source Code of it.unimi.dsi.mg4j.query.MarkingMutableString$NullEscape

package it.unimi.dsi.mg4j.query;


/*    
* MG4J: Managing Gigabytes for Java
*
* Copyright (C) 2005-2010 Sebastiano Vigna
*
*  This library is free software; you can redistribute it and/or modify it
*  under the terms of the GNU Lesser General Public License as published by the Free
*  Software Foundation; either version 3 of the License, or (at your option)
*  any later version.
*
*  This library is distributed in the hope that it will be useful, but
*  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
*  or FITfNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
*  for more details.
*
*  You should have received a copy of the GNU Lesser General Public License
*  along with this program; if not, see <http://www.gnu.org/licenses/>.
*
*/

import it.unimi.dsi.io.FastBufferedReader;
import it.unimi.dsi.io.WordReader;
import it.unimi.dsi.lang.MutableString;

import java.io.IOException;

// ALERT: this class need desperately to be documented.

/** A mutable string with a special method to append text that should be marked.
*
* <p>A marking mutable string can mark several <em>fields</em> (which will often correspond to indexed fields).
* Each time you {@linkplain #startField(SelectedInterval[]) start a field}, you pass some intervals to be marked. Then,
* you call {@link #appendAndMark(WordReader)}, which will add words and nonwords coming from the provided
* {@link it.unimi.dsi.io.WordReader}, marking as suggested by the interval set. The number of words
* around each interval can be set in the constructor. When a field is finished, you must call {@link #endField()}.
*/
public class MarkingMutableString extends MutableString {
  private static final long serialVersionUID = 1L;

  /** The default number of words before and after each interval. */
  public final static int DEFAULT_INTERVAL_SURROUND = 8;
 
  public boolean resume = true;
  public boolean marking;
  /** The current set of intervals for marking. */
  private SelectedInterval[] interval;
  private int count;
  private int currMarkingInterval, currResumeInterval;
  private boolean skipping;
  private boolean oneCharOut;

  private final Marker marker;
  private final EscapeStrategy escapeStrategy;

  /** An escaping strategy. Such a strategy is used by a {@link MarkingMutableString} to escape
   * strings passed to the {@link MarkingMutableString#appendAndMark(WordReader)} method. */

  public interface EscapeStrategy {
    public MutableString escape( MutableString s );
  };
 
  private static final char[] HTML_ESCAPE_CHAR = new char[] { '<', '&' };
  private static final String[] HTML_ESCAPE_STRING = new String[] { "&lt;", "&amp;" };
 
  /** A singleton for the strategy that escapes HTML. */

  private static final class HtmlEscape implements EscapeStrategy {
    private HtmlEscape() {}
    public MutableString escape( final MutableString s ) {
      return s.replace( HTML_ESCAPE_CHAR, HTML_ESCAPE_STRING );
    }
  }
 
  /** A singleton for the null escape strategy (which does nothing). */
 
  public static final EscapeStrategy NULL_ESCAPE = new NullEscape();
 
  private static final class NullEscape implements EscapeStrategy {
    private NullEscape() {}
    public MutableString escape( final MutableString s ) {
      return s;
    }
  }
 
  /** A singleton for the HTML escape strategy. */
 
  public static final EscapeStrategy HTML_ESCAPE = new HtmlEscape();
  /** The number of surrounding word around each interval. */
  private final int intervalSurround;
 
  /** Creates a new loose empty marking mutable string.
   *
   * @param marker a marker that will decide how to highlight intervals.
   * @param escapeStrategy the escape strategy for strings passed to {@link #appendAndMark(WordReader)}, or <code>null</code>.
   * @param intervalSurround the number of words printed before and after each interval.
   */
  public MarkingMutableString( final Marker marker, final EscapeStrategy escapeStrategy, final int intervalSurround ) {
    this.marker = marker;
    this.escapeStrategy = escapeStrategy;
    this.intervalSurround = intervalSurround;
  }
 
  /** Creates a new loose empty marking mutable string default interval surround.
   *
   * @param marker a marker that will decide how to highlight intervals.
   * @param escapeStrategy the escape strategy for strings passed to {@link #appendAndMark(WordReader)}, or <code>null</code>.
   */
  public MarkingMutableString( final Marker marker, final EscapeStrategy escapeStrategy ) {
    this( marker, escapeStrategy, DEFAULT_INTERVAL_SURROUND );
  }

  /** Creates a new loose empty marking mutable string with default interval surround,
   * no escaping strategy and no term processor.
   *
   * @param marker a marker that will decide how to highlight intervals.
   */
  public MarkingMutableString( final Marker marker ) {
    this( marker, NULL_ESCAPE );
  }
 
  /** Prepares this marking mutable string for a new field. We append
   * {@link TextMarker#startOfField()},
   * the interval marking state is reset and the intervals for marking are set to <code>interval</code>.
   *
   * @param interval the new selected-interval array for marking.
   */
 
  public MarkingMutableString startField( final SelectedInterval[] interval ) {
    if ( interval == null ) throw new IllegalArgumentException();
    count = -1;
    currResumeInterval = currMarkingInterval = 0;
    skipping = oneCharOut = marking = false;
    this.interval = interval;
    append( marker.startOfField() );
    return this;
  }
 
  /** Closes the current field. The value of {@link TextMarker#startOfField()} is appended to the string.
   */
  public MarkingMutableString endField() {
    append( marker.endOfField() );
    return this;
  }
 
  private int leftRadius( int currResumeInterval ) {
    switch( interval[ currResumeInterval].type ) {
    case WHOLE: return intervalSurround;
    case PREFIX: return intervalSurround;
    case SUFFIX: return 0;
    default: throw new IllegalArgumentException();
    }
  }
 
  private int rightRadius( int currResumeInterval ) {
    switch( interval[ currResumeInterval].type ) {
    case WHOLE: return intervalSurround;
    case PREFIX: return 0;
    case SUFFIX: return intervalSurround;
    default: throw new IllegalArgumentException();
    }
  }
 
  public MarkingMutableString appendAndMark( final String s ) {
    return appendAndMark( new MutableString( s ) );
  }
 
  public MarkingMutableString appendAndMark( final MutableString s ) {
    return appendAndMark( new FastBufferedReader( s ) );
  }
 
  public MarkingMutableString appendAndMark( final WordReader wordReader ) {
    //System.err.println( interval[ currInterval ] + "|" + new String( array, offset, length ) );
   
    MutableString word = new MutableString(), nonWord = new MutableString();
    try {
      while( wordReader.next( word, nonWord ) ) {
        if ( word.length() != 0 ) count++;
       
        if ( resume ) {
          while( currResumeInterval < interval.length && interval[ currResumeInterval ].interval.compareTo( count, leftRadius( currResumeInterval), rightRadius( currResumeInterval ) ) > 0 ) currResumeInterval++;
          if ( currResumeInterval == interval.length || ! interval[ currResumeInterval ].interval.contains( count, leftRadius( currResumeInterval), rightRadius( currResumeInterval ) ) ) {
            if ( ! skipping && oneCharOut ) append( marker.endOfBlock() );
            // There's nothing else we can do...
            if ( resume && currResumeInterval == interval.length ) return this;
            // Otherwise, we continue, but skipping.
            skipping = true;
            continue;
          }
         
          if ( skipping ) append( marker.startOfBlock() );
          skipping = false;
        }
       
        if ( word.length() !=0 ) {
          if ( ! marking && currMarkingInterval < interval.length && interval[ currMarkingInterval ].interval.contains( count ) ) {
            append( marker.startOfMark() );
            marking = true;
          }
         
          append( word );

          if ( marking && ( currMarkingInterval == interval.length || ! interval[ currMarkingInterval ].interval.contains( count + 1 ) ) ) {
            append( marker.endOfMark() );
            marking = false;
          }

          oneCharOut = true;
          if ( currMarkingInterval < interval.length && interval[ currMarkingInterval ].interval.compareTo( count + 1 ) > 0 ) currMarkingInterval++;
        }
       
        if ( nonWord.length() > 0 ) {
          oneCharOut = true;
          nonWord.squeezeWhitespace();
          append( escapeStrategy.escape( nonWord ) );
        }
      }
     
      if ( marking ) append( marker.endOfMark() );
    } catch ( IOException e ) {
      throw new RuntimeException( e );
    }
   
    return this;
  }
}
TOP

Related Classes of it.unimi.dsi.mg4j.query.MarkingMutableString$NullEscape

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.