Source Code of org.eclipse.jface.text.FindReplaceDocumentAdapter

/*******************************************************************************
 * Copyright (c) 2000, 2012 IBM Corporation and others.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors:
 *     IBM Corporation - initial API and implementation
 *     Cagatay Calli <ccalli@gmail.com> - [find/replace] retain caps when replacing - https://bugs.eclipse.org/bugs/show_bug.cgi?id=28949
 *     Cagatay Calli <ccalli@gmail.com> - [find/replace] define & fix behavior of retain caps with other escapes and text before \C - https://bugs.eclipse.org/bugs/show_bug.cgi?id=217061
 *******************************************************************************/
package org.eclipse.jface.text;


import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;


import org.eclipse.core.runtime.Assert;




/**
 * Provides search and replace operations on
 * {@link org.eclipse.jface.text.IDocument}.
 * <p>
 * Replaces
 * {@link org.eclipse.jface.text.IDocument#search(int, String, boolean, boolean, boolean)}.
 *
 * @since 3.0
 */
public class FindReplaceDocumentAdapter implements CharSequence {


  /**
   * Internal type for operation codes.
   */
  private static class FindReplaceOperationCode {
  }


  // Find/replace operation codes.
  private static final FindReplaceOperationCode FIND_FIRST= new FindReplaceOperationCode();
  private static final FindReplaceOperationCode FIND_NEXT= new FindReplaceOperationCode();
  private static final FindReplaceOperationCode REPLACE= new FindReplaceOperationCode();
  private static final FindReplaceOperationCode REPLACE_FIND_NEXT= new FindReplaceOperationCode();


  /**
   * Retain case mode constants.
   * @since 3.4
   */
  private static final int RC_MIXED= 0;
  private static final int RC_UPPER= 1;
  private static final int RC_LOWER= 2;
  private static final int RC_FIRSTUPPER= 3;




  /**
   * The adapted document.
   */
  private IDocument fDocument;


  /**
   * State for findReplace.
   */
  private FindReplaceOperationCode fFindReplaceState= null;


  /**
   * The matcher used in findReplace.
   */
  private Matcher fFindReplaceMatcher;


  /**
   * The match offset from the last findReplace call.
   */
  private int fFindReplaceMatchOffset;


  /**
   * Retain case mode
   */
  private int fRetainCaseMode;


  /**
   * Constructs a new find replace document adapter.
   *
   * @param document the adapted document
   */
  public FindReplaceDocumentAdapter(IDocument document) {
    Assert.isNotNull(document);
    fDocument= document;
  }


  /**
   * Returns the location of a given string in this adapter's document based on a set of search criteria.
   *
   * @param startOffset document offset at which search starts
   * @param findString the string to find
   * @param forwardSearch the search direction
   * @param caseSensitive indicates whether lower and upper case should be distinguished
   * @param wholeWord indicates whether the findString should be limited by white spaces as
   *       defined by Character.isWhiteSpace. Must not be used in combination with <code>regExSearch</code>.
   * @param regExSearch if <code>true</code> findString represents a regular expression
   *       Must not be used in combination with <code>wholeWord</code>.
   * @return the find or replace region or <code>null</code> if there was no match
   * @throws BadLocationException if startOffset is an invalid document offset
   * @throws PatternSyntaxException if a regular expression has invalid syntax
   */
  public IRegion find(int startOffset, String findString, boolean forwardSearch, boolean caseSensitive, boolean wholeWord, boolean regExSearch) throws BadLocationException {
    Assert.isTrue(!(regExSearch && wholeWord));


    // Adjust offset to special meaning of -1
    if (startOffset == -1 && forwardSearch)
      startOffset= 0;
    if (startOffset == -1 && !forwardSearch)
      startOffset= length() - 1;


    return findReplace(FIND_FIRST, startOffset, findString, null, forwardSearch, caseSensitive, wholeWord, regExSearch);
  }


  /**
   * Stateful findReplace executes a FIND, REPLACE, REPLACE_FIND or FIND_FIRST operation.
   * In case of REPLACE and REPLACE_FIND it sends a <code>DocumentEvent</code> to all
   * registered <code>IDocumentListener</code>.
   *
   * @param startOffset document offset at which search starts
   *       this value is only used in the FIND_FIRST operation and otherwise ignored
   * @param findString the string to find
   *       this value is only used in the FIND_FIRST operation and otherwise ignored
   * @param replaceText the string to replace the current match
   *       this value is only used in the REPLACE and REPLACE_FIND operations and otherwise ignored
   * @param forwardSearch the search direction
   * @param caseSensitive indicates whether lower and upper case should be distinguished
   * @param wholeWord indicates whether the findString should be limited by white spaces as
   *       defined by Character.isWhiteSpace. Must not be used in combination with <code>regExSearch</code>.
   * @param regExSearch if <code>true</code> this operation represents a regular expression
   *       Must not be used in combination with <code>wholeWord</code>.
   * @param operationCode specifies what kind of operation is executed
   * @return the find or replace region or <code>null</code> if there was no match
   * @throws BadLocationException if startOffset is an invalid document offset
   * @throws IllegalStateException if a REPLACE or REPLACE_FIND operation is not preceded by a successful FIND operation
   * @throws PatternSyntaxException if a regular expression has invalid syntax
   */
  private IRegion findReplace(final FindReplaceOperationCode operationCode, int startOffset, String findString, String replaceText, boolean forwardSearch, boolean caseSensitive, boolean wholeWord, boolean regExSearch) throws BadLocationException {


    // Validate option combinations
    Assert.isTrue(!(regExSearch && wholeWord));


    // Validate state
    if ((operationCode == REPLACE || operationCode == REPLACE_FIND_NEXT) && (fFindReplaceState != FIND_FIRST && fFindReplaceState != FIND_NEXT))
      throw new IllegalStateException("illegal findReplace state: cannot replace without preceding find"); //$NON-NLS-1$


    if (operationCode == FIND_FIRST) {
      // Reset


      if (findString == null || findString.length() == 0)
        return null;


      // Validate start offset
      if (startOffset < 0 || startOffset > length())
        throw new BadLocationException();


      int patternFlags= 0;


      if (regExSearch) {
        patternFlags |= Pattern.MULTILINE;
        findString= substituteLinebreak(findString);
      }


      if (!caseSensitive)
        patternFlags |= Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;


      if (wholeWord)
        findString= "\\b" + findString + "\\b"; //$NON-NLS-1$ //$NON-NLS-2$


      if (!regExSearch && !wholeWord)
        findString= asRegPattern(findString);


      fFindReplaceMatchOffset= startOffset;
      if (fFindReplaceMatcher != null && fFindReplaceMatcher.pattern().pattern().equals(findString) && fFindReplaceMatcher.pattern().flags() == patternFlags) {
        /*
         * Commented out for optimization:
         * The call is not needed since FIND_FIRST uses find(int) which resets the matcher
         */
        // fFindReplaceMatcher.reset();
      } else {
        Pattern pattern= Pattern.compile(findString, patternFlags);
        fFindReplaceMatcher= pattern.matcher(this);
      }
    }


    // Set state
    fFindReplaceState= operationCode;


    if (operationCode == REPLACE || operationCode == REPLACE_FIND_NEXT) {
      if (regExSearch) {
        Pattern pattern= fFindReplaceMatcher.pattern();
        String prevMatch= fFindReplaceMatcher.group();
        try {
          replaceText= interpretReplaceEscapes(replaceText, prevMatch);
          Matcher replaceTextMatcher= pattern.matcher(prevMatch);
          replaceText= replaceTextMatcher.replaceFirst(replaceText);
        } catch (IndexOutOfBoundsException ex) {
          throw new PatternSyntaxException(ex.getLocalizedMessage(), replaceText, -1);
        }
      }


      int offset= fFindReplaceMatcher.start();
      int length= fFindReplaceMatcher.group().length();


      if (fDocument instanceof IRepairableDocumentExtension
          && ((IRepairableDocumentExtension)fDocument).isLineInformationRepairNeeded(offset, length, replaceText)) {
        String message= TextMessages.getString("FindReplaceDocumentAdapter.incompatibleLineDelimiter"); //$NON-NLS-1$
        throw new PatternSyntaxException(message, replaceText, offset);
      }


      fDocument.replace(offset, length, replaceText);


      if (operationCode == REPLACE) {
        return new Region(offset, replaceText.length());
      }
    }


    if (operationCode != REPLACE) {
      try {
        if (forwardSearch) {


          boolean found= false;
          if (operationCode == FIND_FIRST)
            found= fFindReplaceMatcher.find(startOffset);
          else
            found= fFindReplaceMatcher.find();


          if (operationCode == REPLACE_FIND_NEXT)
            fFindReplaceState= FIND_NEXT;


          if (found && fFindReplaceMatcher.group().length() > 0)
            return new Region(fFindReplaceMatcher.start(), fFindReplaceMatcher.group().length());
          return null;
        }
        // backward search
        boolean found= fFindReplaceMatcher.find(0);
        int index= -1;
        int length= -1;
        while (found && fFindReplaceMatcher.start() + fFindReplaceMatcher.group().length() <= fFindReplaceMatchOffset + 1) {
          index= fFindReplaceMatcher.start();
          length= fFindReplaceMatcher.group().length();
          found= fFindReplaceMatcher.find(index + 1);
        }
        fFindReplaceMatchOffset= index;
        if (index > -1) {
          // must set matcher to correct position
          fFindReplaceMatcher.find(index);
          return new Region(index, length);
        }
        return null;
      } catch (StackOverflowError e) {
        String message= TextMessages.getString("FindReplaceDocumentAdapter.patternTooComplex"); //$NON-NLS-1$
        throw new PatternSyntaxException(message, findString, -1);
      }
    }


    return null;
  }


  /**
   * Substitutes \R in a regex find pattern with (?>\r\n?|\n)
   *
   * @param findString the original find pattern
   * @return the transformed find pattern
   * @throws PatternSyntaxException if \R is added at an illegal position (e.g. in a character set)
   * @since 3.4
   */
  private String substituteLinebreak(String findString) throws PatternSyntaxException {
    int length= findString.length();
    StringBuffer buf= new StringBuffer(length);


    int inCharGroup= 0;
    int inBraces= 0;
    boolean inQuote= false;
    for (int i= 0; i < length; i++) {
      char ch= findString.charAt(i);
      switch (ch) {
        case '[':
          buf.append(ch);
          if (! inQuote)
            inCharGroup++;
          break;


        case ']':
          buf.append(ch);
          if (! inQuote)
            inCharGroup--;
          break;


        case '{':
          buf.append(ch);
          if (! inQuote && inCharGroup == 0)
            inBraces++;
          break;


        case '}':
          buf.append(ch);
          if (! inQuote && inCharGroup == 0)
            inBraces--;
          break;


        case '\\':
          if (i + 1 < length) {
            char ch1= findString.charAt(i + 1);
            if (inQuote) {
              if (ch1 == 'E')
                inQuote= false;
              buf.append(ch).append(ch1);
              i++;


            } else if (ch1 == 'R') {
              if (inCharGroup > 0 || inBraces > 0) {
                String msg= TextMessages.getString("FindReplaceDocumentAdapter.illegalLinebreak"); //$NON-NLS-1$
                throw new PatternSyntaxException(msg, findString, i);
              }
              buf.append("(?>\\r\\n?|\\n)"); //$NON-NLS-1$
              i++;


            } else {
              if (ch1 == 'Q') {
                inQuote= true;
              }
              buf.append(ch).append(ch1);
              i++;
            }
          } else {
            buf.append(ch);
          }
          break;


        default:
          buf.append(ch);
          break;
      }


    }
    return buf.toString();
  }


  /**
   * Interprets current Retain Case mode (all upper-case,all lower-case,capitalized or mixed)
   * and appends the character <code>ch</code> to <code>buf</code> after processing.
   *
   * @param buf the output buffer
   * @param ch the character to process
   * @since 3.4
   */
  private void interpretRetainCase(StringBuffer buf, char ch) {
    if (fRetainCaseMode == RC_UPPER)
      buf.append(Character.toUpperCase(ch));
    else if (fRetainCaseMode == RC_LOWER)
      buf.append(Character.toLowerCase(ch));
    else if (fRetainCaseMode == RC_FIRSTUPPER) {
      buf.append(Character.toUpperCase(ch));
      fRetainCaseMode= RC_MIXED;
    } else
      buf.append(ch);
  }


  /**
   * Interprets escaped characters in the given replace pattern.
   *
   * @param replaceText the replace pattern
   * @param foundText the found pattern to be replaced
   * @return a replace pattern with escaped characters substituted by the respective characters
   * @since 3.4
   */
  private String interpretReplaceEscapes(String replaceText, String foundText) {
    int length= replaceText.length();
    boolean inEscape= false;
    StringBuffer buf= new StringBuffer(length);


    /* every string we did not check looks mixed at first
     * so initialize retain case mode with RC_MIXED
     */
    fRetainCaseMode= RC_MIXED;


    for (int i= 0; i < length; i++) {
      final char ch= replaceText.charAt(i);
      if (inEscape) {
        i= interpretReplaceEscape(ch, i, buf, replaceText, foundText);
        inEscape= false;


      } else if (ch == '\\') {
        inEscape= true;


      } else if (ch == '$') {
        buf.append(ch);


        /*
         * Feature in java.util.regex.Matcher#replaceFirst(String):
         * $00, $000, etc. are interpreted as $0 and
         * $01, $001, etc. are interpreted as $1, etc. .
         * If we support \0 as replacement pattern for capturing group 0,
         * it would not be possible any more to write a replacement pattern
         * that appends 0 to a capturing group (like $0\0).
         * The fix is to interpret \00 and $00 as $0\0, and
         * \01 and $01 as $0\1, etc.
         */
        if (i + 2 < length) {
          char ch1= replaceText.charAt(i + 1);
          char ch2= replaceText.charAt(i + 2);
          if (ch1 == '0' && '0' <= ch2 && ch2 <= '9') {
            buf.append("0\\"); //$NON-NLS-1$
            i++; // consume the 0
          }
        }
      } else {
        interpretRetainCase(buf, ch);
      }
    }


    if (inEscape) {
      // '\' as last character is invalid, but we still add it to get an error message
      buf.append('\\');
    }
    return buf.toString();
  }


  /**
   * Interprets the escaped character <code>ch</code> at offset <code>i</code>
   * of the <code>replaceText</code> and appends the interpretation to <code>buf</code>.
   *
   * @param ch the escaped character
   * @param i the offset
   * @param buf the output buffer
   * @param replaceText the original replace pattern
   * @param foundText the found pattern to be replaced
   * @return the new offset
   * @since 3.4
   */
  private int interpretReplaceEscape(final char ch, int i, StringBuffer buf, String replaceText, String foundText) {
    int length= replaceText.length();
    switch (ch) {
      case 'r':
        buf.append('\r');
        break;
      case 'n':
        buf.append('\n');
        break;
      case 't':
        buf.append('\t');
        break;
      case 'f':
        buf.append('\f');
        break;
      case 'a':
        buf.append('\u0007');
        break;
      case 'e':
        buf.append('\u001B');
        break;
      case 'R': //see http://www.unicode.org/unicode/reports/tr18/#Line_Boundaries
        buf.append(TextUtilities.getDefaultLineDelimiter(fDocument));
        break;
      /*
       * \0 for octal is not supported in replace string, since it
       * would conflict with capturing group \0, etc.
       */
      case '0':
        buf.append('$').append(ch);
        /*
         * See explanation in "Feature in java.util.regex.Matcher#replaceFirst(String)"
         * in interpretReplaceEscape(String) above.
         */
        if (i + 1 < length) {
          char ch1= replaceText.charAt(i + 1);
          if ('0' <= ch1 && ch1 <= '9') {
            buf.append('\\');
          }
        }
        break;


      case '1':
      case '2':
      case '3':
      case '4':
      case '5':
      case '6':
      case '7':
      case '8':
      case '9':
        buf.append('$').append(ch);
        break;


      case 'c':
        if (i + 1 < length) {
          char ch1= replaceText.charAt(i + 1);
          interpretRetainCase(buf, (char)(ch1 ^ 64));
          i++;
        } else {
          String msg= TextMessages.getFormattedString("FindReplaceDocumentAdapter.illegalControlEscape", "\\c"); //$NON-NLS-1$ //$NON-NLS-2$
          throw new PatternSyntaxException(msg, replaceText, i);
        }
        break;


      case 'x':
        if (i + 2 < length) {
          int parsedInt;
          try {
            parsedInt= Integer.parseInt(replaceText.substring(i + 1, i + 3), 16);
            if (parsedInt < 0)
              throw new NumberFormatException();
          } catch (NumberFormatException e) {
            String msg= TextMessages.getFormattedString("FindReplaceDocumentAdapter.illegalHexEscape", replaceText.substring(i - 1, i + 3)); //$NON-NLS-1$
            throw new PatternSyntaxException(msg, replaceText, i);
          }
          interpretRetainCase(buf, (char) parsedInt);
          i+= 2;
        } else {
          String msg= TextMessages.getFormattedString("FindReplaceDocumentAdapter.illegalHexEscape", replaceText.substring(i - 1, length)); //$NON-NLS-1$
          throw new PatternSyntaxException(msg, replaceText, i);
        }
        break;


      case 'u':
        if (i + 4 < length) {
          int parsedInt;
          try {
            parsedInt= Integer.parseInt(replaceText.substring(i + 1, i + 5), 16);
            if (parsedInt < 0)
              throw new NumberFormatException();
          } catch (NumberFormatException e) {
            String msg= TextMessages.getFormattedString("FindReplaceDocumentAdapter.illegalUnicodeEscape", replaceText.substring(i - 1, i + 5)); //$NON-NLS-1$
            throw new PatternSyntaxException(msg, replaceText, i);
          }
          interpretRetainCase(buf, (char) parsedInt);
          i+= 4;
        } else {
          String msg= TextMessages.getFormattedString("FindReplaceDocumentAdapter.illegalUnicodeEscape", replaceText.substring(i - 1, length)); //$NON-NLS-1$
          throw new PatternSyntaxException(msg, replaceText, i);
        }
        break;


      case 'C':
        if(foundText.toUpperCase().equals(foundText)) // is whole match upper-case?
          fRetainCaseMode= RC_UPPER;
        else if (foundText.toLowerCase().equals(foundText)) // is whole match lower-case?
          fRetainCaseMode= RC_LOWER;
        else if(Character.isUpperCase(foundText.charAt(0))) // is first character upper-case?
          fRetainCaseMode= RC_FIRSTUPPER;
        else
          fRetainCaseMode= RC_MIXED;
        break;


      default:
        // unknown escape k: append uninterpreted \k
        buf.append('\\').append(ch);
        break;
    }
    return i;
  }


  /**
   * Converts a non-regex string to a pattern
   * that can be used with the regex search engine.
   *
   * @param string the non-regex pattern
   * @return the string converted to a regex pattern
   */
  private String asRegPattern(String string) {
    StringBuffer out= new StringBuffer(string.length());
    boolean quoting= false;


    for (int i= 0, length= string.length(); i < length; i++) {
      char ch= string.charAt(i);
      if (ch == '\\') {
        if (quoting) {
          out.append("\\E"); //$NON-NLS-1$
          quoting= false;
        }
        out.append("\\\\"); //$NON-NLS-1$
        continue;
      }
      if (!quoting) {
        out.append("\\Q"); //$NON-NLS-1$
        quoting= true;
      }
      out.append(ch);
    }
    if (quoting)
      out.append("\\E"); //$NON-NLS-1$


    return out.toString();
  }


  /**
   * Substitutes the previous match with the given text.
   * Sends a <code>DocumentEvent</code> to all registered <code>IDocumentListener</code>.
   *
   * @param text the substitution text
   * @param regExReplace if <code>true</code> <code>text</code> represents a regular expression
   * @return the replace region or <code>null</code> if there was no match
   * @throws BadLocationException if startOffset is an invalid document offset
   * @throws IllegalStateException if a REPLACE or REPLACE_FIND operation is not preceded by a successful FIND operation
   * @throws PatternSyntaxException if a regular expression has invalid syntax
   *
   * @see DocumentEvent
   * @see IDocumentListener
   */
  public IRegion replace(String text, boolean regExReplace) throws BadLocationException {
    return findReplace(REPLACE, -1, null, text, false, false, false, regExReplace);
  }


  // ---------- CharSequence implementation ----------


  /*
   * @see java.lang.CharSequence#length()
   */
  public int length() {
    return fDocument.getLength();
  }


  /*
   * @see java.lang.CharSequence#charAt(int)
   */
  public char charAt(int index) {
    try {
      return fDocument.getChar(index);
    } catch (BadLocationException e) {
      throw new IndexOutOfBoundsException();
    }
  }


  /*
   * @see java.lang.CharSequence#subSequence(int, int)
   */
  public CharSequence subSequence(int start, int end) {
    try {
      return fDocument.get(start, end - start);
    } catch (BadLocationException e) {
      throw new IndexOutOfBoundsException();
    }
  }


  /*
   * @see java.lang.Object#toString()
   */
  public String toString() {
    return fDocument.get();
  }


  /**
   * Escapes special characters in the string, such that the resulting pattern
   * matches the given string.
   *
   * @param string the string to escape
   * @return a regex pattern that matches the given string
   * @since 3.5
   */
  public static String escapeForRegExPattern(String string) {
    //implements https://bugs.eclipse.org/bugs/show_bug.cgi?id=44422
  
    StringBuffer pattern= new StringBuffer(string.length() + 16);
    int length= string.length();
    for (int i= 0; i < length; i++) {
      char ch= string.charAt(i);
      switch (ch) {
        case '\\':
        case '(':
        case ')':
        case '[':
        case ']':
        case '{':
        case '}':
        case '.':
        case '?':
        case '*':
        case '+':
        case '|':
        case '^':
        case '$':
          pattern.append('\\').append(ch);
          break;
  
        case '\r':
          if (i + 1 < length && string.charAt(i + 1) == '\n')
            i++;
          //$FALL-THROUGH$
        case '\n':
          pattern.append("\\R"); //$NON-NLS-1$
          break;
        case '\t':
          pattern.append("\\t"); //$NON-NLS-1$
          break;
        case '\f':
          pattern.append("\\f"); //$NON-NLS-1$
          break;
        case 0x07:
          pattern.append("\\a"); //$NON-NLS-1$
          break;
        case 0x1B:
          pattern.append("\\e"); //$NON-NLS-1$
          break;
  
        default:
          if (0 <= ch && ch < 0x20) {
            pattern.append("\\x"); //$NON-NLS-1$
            String hexString= Integer.toHexString(ch).toUpperCase();
            if (hexString.length() == 1)
              pattern.append('0');
            pattern.append(hexString);
          } else {
            pattern.append(ch);
          }
      }
    }
    return pattern.toString();
  }
}
Source Code of org.eclipse.jface.text.FindReplaceDocumentAdapter

Related Classes of org.eclipse.jface.text.FindReplaceDocumentAdapter