Package org.apache.regexp

Examples of org.apache.regexp.RE


    //       overriden by the normal field. This way we can be sure that the
    //       normal fields have the value we expect.
    AuxiliaryField[] auxiliaryFieldArr = mConfig.getAuxiliaryFieldList();
    if (auxiliaryFieldArr != null) {
      for (int i = 0; i < auxiliaryFieldArr.length; i++) {
        RE regex = auxiliaryFieldArr[i].getUrlRegex();
        if (regex.match(url)) {
          String fieldName = auxiliaryFieldArr[i].getFieldName();

          String value = auxiliaryFieldArr[i].getValue();
          if (value == null) {
            // We have no value set -> Extract the value from the regex
            value = regex.getParen(auxiliaryFieldArr[i].getUrlRegexGroup());
          }

          if (value != null) {
            if (auxiliaryFieldArr[i].getToLowerCase()) {
              value = value.toLowerCase();
View Full Code Here


        "parse=true|false index=true|false >http://someULR</prefix|regex> instead.");
      mHtmlParserPatternReArr = new RE[mHtmlParserUrlPatternArr.length];
      for (int i = 0; i < mHtmlParserPatternReArr.length; i++) {
        String regex = mHtmlParserUrlPatternArr[i].getRegexPattern();
        try {
          mHtmlParserPatternReArr[i] = new RE(regex);
        }
        catch (RESyntaxException exc) {
          throw new RegainException("Regular exception of HTML parser pattern #"
            + (i + 1) + " has a wrong syntax: '" + regex + "'", exc);
        }
View Full Code Here

   * @param rawDocument Das zu durchsuchende Dokument.
   * @throws RegainException Wenn das Dokument nicht gelesen werden konnte.
   */
  private void parseHtmlDocument(RawDocument rawDocument) throws RegainException {
    for (int i = 0; i < mHtmlParserPatternReArr.length; i++) {
      RE re = mHtmlParserPatternReArr[i];
      int urlGroup = mHtmlParserUrlPatternArr[i].getRegexUrlGroup();
      boolean shouldBeParsed = mHtmlParserUrlPatternArr[i].getShouldBeParsed();
      boolean shouldBeIndexed = mHtmlParserUrlPatternArr[i].getShouldBeIndexed();

      int offset = 0;
      String contentAsString = rawDocument.getContentAsString();
      try {
        while (re.match(contentAsString, offset)) {
          offset = re.getParenEnd(0);

          String parentUrl = rawDocument.getUrl();
          String url = re.getParen(urlGroup);

          if (url != null) {
            // Convert the URL to an absolute URL
            url = CrawlerToolkit.toAbsoluteUrl(url, parentUrl);

View Full Code Here

      return null;
    }

    String regex = "\\." + extention + "$";
    try {
      return new RE(regex, RE.MATCH_CASEINDEPENDENT);
    } catch (RESyntaxException exc) {
      throw new RegainException("Creating accept regex for preparator failed: "
              + regex, exc);
    }
  }
View Full Code Here

    }
    buffer.append(")$");

    String urlRegex = buffer.toString();
    try {
      return new RE(urlRegex, RE.MATCH_CASEINDEPENDENT);
    } catch (RESyntaxException exc) {
      throw new RegainException("Creating accept regex for preparator failed: "
              + urlRegex, exc);
    }
  }
View Full Code Here

      return null;
    }

    if (mValueRegex == null) {
      try {
        mValueRegex = new RE("^\\s+(.*)\\s+REG_SZ\\s+(.*)$");
      } catch (RESyntaxException exc) {
        throw new RegainException("Creating registry value regex failed", exc);
      }
    }

View Full Code Here

      }

      String openInNewWindowRegex = indexConfigs[0].getOpenInNewWindowRegex();
      if (openInNewWindowRegex != null) {
        try {
          mOpenInNewWindowRegex = new RE(openInNewWindowRegex);
        } catch (RESyntaxException exc) {
          throw new RegainException("Syntax error in openInNewWindowRegex: '" + openInNewWindowRegex + "'", exc);
        }
      }
View Full Code Here

   {
     Hashtable myParameters = new Hashtable();

try {
  String contentType = this.myRequest.getContentType();
  RE r = new RE("multipart/form-data");
  if ( r.match (" " + contentType) ) {
    // We are dealing with a multipart form
    MultipartRequest formHandler = new MultipartRequest
          (this.myRequest, tmpDir);
    Enumeration paramList = formHandler.getParameterNames();
    for (; paramList.hasMoreElements() ;) {
View Full Code Here

    throws RegainException
  {
    super(prefix, pathStartRegex, pathEndRegex);

    try {
      mPathNodeRE = new RE(pathNodeRegex, RE.MATCH_CASEINDEPENDENT);
    }
    catch (RESyntaxException exc) {
      throw new RegainException("Syntax error in regular expression", exc);
    }
View Full Code Here

  {
    super(prefix, contentStartRegex, contentEndRegex);

    try {
      if ((headlineRegex != null) && (headlineRegex.length() != 0)) {
        mHeadlineRE = new RE(headlineRegex, RE.MATCH_CASEINDEPENDENT | RE.MATCH_MULTILINE);
        mHeadlineRegexGroup = headlineRegexGroup;
      }
    }
    catch (RESyntaxException exc) {
      throw new RegainException("Syntax error in regular expression", exc);
View Full Code Here

TOP

Related Classes of org.apache.regexp.RE

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.