Package eu.stratosphere.compiler.dag

Source Code of eu.stratosphere.compiler.dag.DataSourceNode

/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/

package eu.stratosphere.compiler.dag;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;

import eu.stratosphere.api.common.io.FileInputFormat;
import eu.stratosphere.api.common.io.InputFormat;
import eu.stratosphere.api.common.io.NonParallelInput;
import eu.stratosphere.api.common.io.statistics.BaseStatistics;
import eu.stratosphere.api.common.operators.base.GenericDataSourceBase;
import eu.stratosphere.api.common.operators.Operator;
import eu.stratosphere.compiler.DataStatistics;
import eu.stratosphere.compiler.PactCompiler;
import eu.stratosphere.compiler.costs.CostEstimator;
import eu.stratosphere.compiler.costs.Costs;
import eu.stratosphere.compiler.plan.PlanNode;
import eu.stratosphere.compiler.plan.SourcePlanNode;
import eu.stratosphere.configuration.Configuration;
import eu.stratosphere.util.Visitor;

/**
* The optimizer's internal representation of a data source.
*/
public class DataSourceNode extends OptimizerNode {
 
  private final boolean sequentialInput;

  /**
   * Creates a new DataSourceNode for the given contract.
   *
   * @param pactContract
   *        The data source contract object.
   */
  public DataSourceNode(GenericDataSourceBase<?, ?> pactContract) {
    super(pactContract);
   
    if (pactContract.getUserCodeWrapper().getUserCodeClass() == null) {
      throw new IllegalArgumentException("Input format has not been set.");
    }
   
    if (NonParallelInput.class.isAssignableFrom(pactContract.getUserCodeWrapper().getUserCodeClass())) {
      setDegreeOfParallelism(1);
      setSubtasksPerInstance(1);
      this.sequentialInput = true;
    } else {
      this.sequentialInput = false;
    }
  }

  /**
   * Gets the contract object for this data source node.
   *
   * @return The contract.
   */
  @Override
  public GenericDataSourceBase<?, ?> getPactContract() {
    return (GenericDataSourceBase<?, ?>) super.getPactContract();
  }

  @Override
  public String getName() {
    return "Data Source";
  }

  @Override
  public boolean isMemoryConsumer() {
    return false;
  }
 

  @Override
  public void setDegreeOfParallelism(int degreeOfParallelism) {
    // if unsplittable, DOP remains at 1
    if (!this.sequentialInput) {
      super.setDegreeOfParallelism(degreeOfParallelism);
    }
  }
 

  @Override
  public void setSubtasksPerInstance(int instancesPerMachine) {
    // if unsplittable, DOP remains at 1
    if (!this.sequentialInput) {
      super.setSubtasksPerInstance(instancesPerMachine);
    }
  }

  @Override
  public List<PactConnection> getIncomingConnections() {
    return Collections.<PactConnection>emptyList();
  }

  @Override
  public void setInput(Map<Operator<?>, OptimizerNode> contractToNode) {}

  @Override
  protected void computeOperatorSpecificDefaultEstimates(DataStatistics statistics) {
    // see, if we have a statistics object that can tell us a bit about the file
    if (statistics != null) {
      // instantiate the input format, as this is needed by the statistics
      InputFormat<?, ?> format = null;
      String inFormatDescription = "<unknown>";
     
      try {
        format = getPactContract().getFormatWrapper().getUserCodeObject();
        Configuration config = getPactContract().getParameters();
        config.setClassLoader(format.getClass().getClassLoader());
        format.configure(config);
      }
      catch (Throwable t) {
        if (PactCompiler.LOG.isWarnEnabled()) {
          PactCompiler.LOG.warn("Could not instantiate InputFormat to obtain statistics."
            + " Limited statistics will be available.", t);
        }
        return;
      }
      try {
        inFormatDescription = format.toString();
      }
      catch (Throwable t) {}
     
      // first of all, get the statistics from the cache
      final String statisticsKey = getPactContract().getStatisticsKey();
      final BaseStatistics cachedStatistics = statistics.getBaseStatistics(statisticsKey);
     
      BaseStatistics bs = null;
      try {
        bs = format.getStatistics(cachedStatistics);
      }
      catch (Throwable t) {
        if (PactCompiler.LOG.isWarnEnabled()) {
          PactCompiler.LOG.warn("Error obtaining statistics from input format: " + t.getMessage(), t);
        }
      }
     
      if (bs != null) {
        final long len = bs.getTotalInputSize();
        if (len == BaseStatistics.SIZE_UNKNOWN) {
          if (PactCompiler.LOG.isInfoEnabled()) {
            PactCompiler.LOG.info("Compiler could not determine the size of input '" + inFormatDescription + "'. Using default estimates.");
          }
        }
        else if (len >= 0) {
          this.estimatedOutputSize = len;
        }
       
        final long card = bs.getNumberOfRecords();
        if (card != BaseStatistics.NUM_RECORDS_UNKNOWN) {
          this.estimatedNumRecords = card;
        }
      }
    }
  }

  @Override
  public void computeInterestingPropertiesForInputs(CostEstimator estimator) {
    // no children, so nothing to compute
  }

  @Override
  public void computeUnclosedBranchStack() {
    // because there are no inputs, there are no unclosed branches.
    this.openBranches = Collections.emptyList();
  }

  @Override
  public List<PlanNode> getAlternativePlans(CostEstimator estimator) {
    if (this.cachedPlans != null) {
      return this.cachedPlans;
    }
   
    SourcePlanNode candidate = new SourcePlanNode(this, "DataSource("+this.getPactContract().getName()+")");
    candidate.updatePropertiesWithUniqueSets(getUniqueFields());
   
    final Costs costs = new Costs();
    if (FileInputFormat.class.isAssignableFrom(getPactContract().getFormatWrapper().getUserCodeClass()) &&
        this.estimatedOutputSize >= 0)
    {
      estimator.addFileInputCost(this.estimatedOutputSize, costs);
    }
    candidate.setCosts(costs);

    // since there is only a single plan for the data-source, return a list with that element only
    List<PlanNode> plans = new ArrayList<PlanNode>(1);
    plans.add(candidate);

    this.cachedPlans = plans;
    return plans;
  }

  @Override
  public boolean isFieldConstant(int input, int fieldNumber) {
    return false;
  }
 
  @Override
  public void accept(Visitor<OptimizerNode> visitor) {
    if (visitor.preVisit(this)) {
      visitor.postVisit(this);
    }
  }
}
TOP

Related Classes of eu.stratosphere.compiler.dag.DataSourceNode

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.