Package com.linkedin.databus2.producers.db

Source Code of com.linkedin.databus2.producers.db.GGXMLTrailTransactionFinder$DatabusTrailFileParseException

/*
* Copyright 2013 LinkedIn Corp. All rights reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package com.linkedin.databus2.producers.db;

import java.io.StringReader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;

import org.apache.log4j.Logger;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;

import com.linkedin.databus.core.DatabusRuntimeException;
import com.linkedin.databus.core.ScnTxnPos;
import com.linkedin.databus.core.TrailFilePositionSetter;
import com.linkedin.databus.core.TrailFilePositionSetter.TransactionSCNFinderCallback;
import com.linkedin.databus.core.util.RateMonitor;
import com.linkedin.databus2.core.DatabusException;

/**
*
* Transaction callback that reads XML trail files, locating transaction boundaries and extracting SCN.
*
*/
public class GGXMLTrailTransactionFinder implements TransactionSCNFinderCallback
{
  public static final String MODULE = GGXMLTrailTransactionFinder.class.getName();
  public static final Logger LOG = Logger.getLogger(MODULE);

  /**
   * The patterns for detecting SCN and transaction are selected in such a way that they won't be ambiguous.
   */
  public static final String TRANSACTION_BEGIN_PREFIX  = "<transaction"; // Uniquely identifies the transaction begin.
  public static final String TRANSACTION_END_PREFIX  = "</transaction"// Uniquely identifies the transaction end

  public static final String TRANSACTION_END_STR  = "</transaction>";

  // X-Path expression for extracting SCN
  public static final String SCN_XPATH_STR = "//transaction/dbupdate/tokens/token[@name=\"TK-CSN\"]/text()";
  public static final String SCN_REGEX_STR = "(<token\\s+name=\"TK-CSN\"\\s*>([0-9]+)\\s*</token>)";
  public boolean _enableRegex = true;

  // Current Txn Position and SCN
  private ScnTxnPos _txnPos;

  // Prev Txn Position and SCN
  private ScnTxnPos _prevTxnPos;

  // For Tracking current cursor position
  private String _currFile;

  /** Byte offset within the file where the current cursor is */
  private long _currFileByteOffset;

  /** Line number of the cursor */
  private long _currLineNumber;

  /** The string buffer containing the current txn */
  private StringBuilder _currTxnStr = new StringBuilder();

  /** TargetSCN to be located */
  private long _targetScn = TrailFilePositionSetter.USE_LATEST_SCN;

  /** Flag to indicate if end of txn is seen */
  private boolean _txnEndSeen = false;

  /** Number of valid txns completely seen by this instance */
  private long _numTxnsSeen = 0;

  /** Number of invalid txns (i.e., containing no valid SCNs) seen by this instance */
  private long _numInvalidTxnsSeen = 0;

  /** Flag to indicate if at least one txn is completely seen */
  private boolean _firstTxnSeen = false;

  private final XPathExpression _expr;
  private final Pattern _rexpr;

  /** Member variables used in intermediate computations in regexQuery() / xpathQuery() */
  private long _minScn, _maxScn;

  private  RateMonitor _queryRateMonitor = new RateMonitor("Query_GGTransactionFinder");

  private  RateMonitor _rateMonitor = new RateMonitor("GGTransactionFinder");

  private boolean _beginTxnSeen = false;

  public GGXMLTrailTransactionFinder(boolean enableRegex) throws Exception
  {
    reset();
    XPathFactory xpathFactory = XPathFactory.newInstance();
    XPath xpath = xpathFactory.newXPath();
    _expr = xpath.compile(SCN_XPATH_STR);
    _rexpr = Pattern.compile(SCN_REGEX_STR);
    _enableRegex = enableRegex;
    _minScn = Long.MAX_VALUE;
    _maxScn = Long.MIN_VALUE;
  }

  public GGXMLTrailTransactionFinder() throws Exception
  {
    this(true);
  }

  @Override
  public void beginFileProcessing(String file)
  {
    if(LOG.isDebugEnabled())
     LOG.debug("Switching to file :" + file);
    _currFile = file;
    _currLineNumber = 0;
    _currFileByteOffset = 0;
  }

  @Override
  public boolean processLine(String line, int newLineCharLen) throws DatabusException
  {
    try
    {
      _rateMonitor.resume();
      _rateMonitor.ticks(line.length());
      String l = line;
      int totalOffset = 0; // tracks the byteOffset within the line on transaction beginning.
      boolean ret = false;

      /**
       * The general XML syntax allow for newlines to be optional between XML elements. Even though GG is empirically
       * shown to be inserting newlines between XML element tags, this assumption is not made here. A single line
       * can contain zero or more complete transactions.
       */
      int beginOffset = l.indexOf(TRANSACTION_BEGIN_PREFIX);
      int endOffset = l.indexOf(TRANSACTION_END_PREFIX);
      if ((beginOffset >= 0) || (endOffset >= 0))
      {
        /**
         * A transaction can be contained in a single or multiple lines. No assumptions should be made about its placement.
         */
        while (true)
        {
          _txnEndSeen = false;

          // Start and end of txns can be in a single line. Moreover, many such transactions be on a line.
          beginOffset = l.indexOf(TRANSACTION_BEGIN_PREFIX);
          endOffset = l.indexOf(TRANSACTION_END_PREFIX);

          if ( beginOffset >= 0)
            totalOffset += beginOffset;

          // no more endpoints (begin or end of transactions)
          if ( (endOffset == -1) && (beginOffset == -1))
            break;

          // Case where only beginning of transaction tag or a complete transaction is present
          if ( (endOffset == -1) ||
              ((beginOffset >= 0 ) && (beginOffset < endOffset)))
          {
            _currTxnStr.setLength(0);
            processBegin(totalOffset);

            if (endOffset == -1)
            {
              _currTxnStr.append(l);
              break;
            } else {
              _currTxnStr.append(l.subSequence(beginOffset, endOffset));
              _currTxnStr.append(TRANSACTION_END_STR);
              processEnd();
              totalOffset += endOffset + TRANSACTION_END_STR.length();
              l = l.substring(endOffset + TRANSACTION_END_STR.length());
            }
          } else if ( (beginOffset == -1) || (beginOffset > endOffset)) {
            // Case where only endTag is seen or a transaction completes and another starts in the same line
            if (beginOffset == -1)
            {
              _currTxnStr.append(l);
              processEnd();
              if (isDone())
                ret = true;
              break// nothing left to process on this line => must break unconditionally
            } else {
              _currTxnStr.append(l.subSequence(0, beginOffset));
              l = l.substring(beginOffset);
              processEnd();
            }
          }

          if (isDone())
          {
            ret = true;
            break;
          }
        }
      } else {
        _currTxnStr.append(l)// continue accumulating "middle stuff" (between begin- and end-transaction tags)
      }
      _currFileByteOffset += line.length();

      if ( newLineCharLen > 0)
        _currFileByteOffset += newLineCharLen;

      _currLineNumber++;
      return ret;
    } finally {
      _rateMonitor.suspend();
    }
  }

  private boolean isDone()
  {
    // last condition works only because code elsewhere has checked for _targetScn >= min SCN ?
    if (_txnEndSeen &&
        ((_targetScn == TrailFilePositionSetter.USE_EARLIEST_SCN)
           || (( _targetScn != TrailFilePositionSetter.USE_LATEST_SCN) && (_txnPos.getMaxScn() >= _targetScn))))
    {
      return true;
    }

    return false;
  }

  /**
   * When transaction begin is seen, this should be called to save the positions
   * @param byteLineOffset
   */
  private void processBegin(int byteLineOffset)
  {
    _prevTxnPos.copyFrom(_txnPos);
    _txnPos.setFile(_currFile);
    _txnPos.setFileOffset(_currFileByteOffset + byteLineOffset);
    _txnPos.setLineNumber(_currLineNumber+1);
    _txnPos.setLineOffset(byteLineOffset);
    _txnPos.setMinScn(-1);
    _txnPos.setMaxScn(-1);
    _txnPos.setTxnRank(_numTxnsSeen); // Rank = Number of transactions before this transactions.
    _beginTxnSeen = true;
  }

  private void xpathQuery() throws DatabusTrailFileParseException
  {
    try
    {
      //Set SCN
      InputSource source = new InputSource(new StringReader(_currTxnStr.toString()));

      _queryRateMonitor.resume()// count time consumed by XML parsing
      Object result = _expr.evaluate(source, XPathConstants.NODESET);
      _queryRateMonitor.ticks(_currTxnStr.length());
      _queryRateMonitor.suspend();

      NodeList nodes = (NodeList) result;
      for (int i = 0; i < nodes.getLength(); i++)
      {
        long newScn = Long.parseLong((nodes.item(i).getNodeValue().trim()));
        _minScn = Math.min(_minScn, newScn);
        _maxScn = Math.max(_maxScn, newScn);
      }
    }
    catch (XPathExpressionException xpxe)
    {
      throw new DatabusTrailFileParseException("Got XPath exception for trail-file entry: " + _currTxnStr, xpxe);
    }
    catch (NumberFormatException nfe)
    {
      throw new DatabusTrailFileParseException("Got parseLong() exception for trail-file entry: " + _currTxnStr, nfe);
    }
  }

  private void regexQuery() throws DatabusTrailFileParseException
  {
    String source = _currTxnStr.toString();

    _queryRateMonitor.resume()// count time consumed by regex parsing
    Matcher result = _rexpr.matcher(source);
    boolean foundScn = result.find();
    _queryRateMonitor.ticks(source.length());
    _queryRateMonitor.suspend();

    if (!foundScn)
    {
      throw new DatabusTrailFileParseException("Could not find TK-SCN with regex; " +
                                               "likely error in trail-file entry: " + _currTxnStr);
    }

    // Loop through all SCNs in the transaction and save max/min ones.
    while (foundScn)
    {
      String m = result.group(2);
      long newScn = Long.parseLong(m)// TODO:  try/catch?  regex will catch most errors, but NumberFormatException still ~possible
      _minScn = Math.min(_minScn, newScn);
      _maxScn = Math.max(_maxScn, newScn);
      _queryRateMonitor.resume()// also count time consumed by regex find() calls
      foundScn = result.find();
      _queryRateMonitor.suspend();
    }
  }

  /**
   * When the transaction end is seen, this should be called to save SCN
   * @throws DatabusException
   */
  private void processEnd() throws DatabusException
  {
    if (! _beginTxnSeen)
    {
      _currTxnStr.setLength(0);
      return;
    }

    _maxScn = Long.valueOf(-1);
    _minScn = Long.MAX_VALUE;

    try
    {
      if (!_enableRegex)
      {
        xpathQuery();
      }
      else
      {
        regexQuery();
      }
    }
    catch (DatabusTrailFileParseException ex)
    {
      LOG.warn("empty/corrupted txn (" + ex.getMessage() + "); resetting invalid _txnPos (" + _txnPos +
               ") to _prevTxnPos (" + _prevTxnPos + ")");
      _txnPos.copyFrom(_prevTxnPos);
      ++_numInvalidTxnsSeen;  // TODO:  wire into metrics/monitoring (need accessor plus whatever lies on caller's end)
      return;
    }

    _txnPos.setMaxScn(_maxScn);
    _txnPos.setMinScn(_minScn);
    _txnEndSeen = true;
    _numTxnsSeen++;

    if (! _firstTxnSeen )
    {
      if ((_targetScn >= 0) && (_targetScn < _minScn))  // common case:  need to try previous trail file instead
        throw new DatabusException("SinceSCN is less than MinScn available in trail file. Requested SinceSCN is :"
            + _targetScn + " but found only : " + _minScn
            + " in Location " + _txnPos);
    }
    _firstTxnSeen = true;
    _beginTxnSeen = false;
    if (LOG.isDebugEnabled())
    {
      LOG.debug("Seen Txn : " + _txnPos);
    }
  }

  @Override
  public void endFileProcessing(String file)
  {
  }

  @Override
  public ScnTxnPos getTxnPos()
  {
    if (_txnPos.isEmpty() && _prevTxnPos.isEmpty())
      return null;

    if (_txnPos.isEmpty())
      return _prevTxnPos;

    return _txnPos;
  }

  @Override
  public void reset()
  {
    _txnPos = new ScnTxnPos();
    _prevTxnPos = new ScnTxnPos();
    _currFile = null;
    _currFileByteOffset = 0;
    _currLineNumber = 0;
    _numTxnsSeen = 0;
    _numInvalidTxnsSeen = 0;
    _txnEndSeen = false;
    _beginTxnSeen = false;
    _firstTxnSeen = false;
    _currTxnStr.setLength(0);
    _queryRateMonitor = new RateMonitor("XPath_GGTransactionFinder");
    _queryRateMonitor.start();
    _queryRateMonitor.suspend();

    _rateMonitor = new RateMonitor("GGTransactionFinder");
    _rateMonitor.start();
    _rateMonitor.suspend();
  }

  @Override
  public void begin(long targetScn)
  {
    _targetScn = targetScn;
  }

  @Override
  public long getNumTxnsSeen()
  {
    return _numTxnsSeen;
  }

  @Override
  public long getCurrentFileOffset()
  {
    return _currFileByteOffset;
  }

  public RateMonitor getQueryRateMonitor()
  {
    return _queryRateMonitor;
  }

  public RateMonitor getRateMonitor()
  {
      return _queryRateMonitor;
  }

  @Override
  public String getPerfStats()
  {
    String overallRateMonitor = _rateMonitor.toString();
    String queryRateMonitor  = _queryRateMonitor.toString();

    // TODO Auto-generated method stub
    return "queryRM : " + queryRateMonitor +
           ", OverallRM : " + overallRateMonitor;
  }

  /** Special-purpose exception used only by processEnd() and the xpath and regex parsers. */
  private class DatabusTrailFileParseException extends Exception
  {
    public DatabusTrailFileParseException()
    {
      super();
    }

    public DatabusTrailFileParseException(String message, Throwable cause)
    {
      super(message, cause);
    }

    public DatabusTrailFileParseException(String message)
    {
      super(message);
    }

    public DatabusTrailFileParseException(Throwable cause)
    {
      super(cause);
    }
  }
}
TOP

Related Classes of com.linkedin.databus2.producers.db.GGXMLTrailTransactionFinder$DatabusTrailFileParseException

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.