Package org.apache.falcon.workflow

Source Code of org.apache.falcon.workflow.OozieProcessWorkflowBuilder

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.falcon.workflow;

import org.apache.commons.lang.StringUtils;
import org.apache.falcon.FalconException;
import org.apache.falcon.Tag;
import org.apache.falcon.entity.CatalogStorage;
import org.apache.falcon.entity.ClusterHelper;
import org.apache.falcon.entity.EntityUtil;
import org.apache.falcon.entity.FeedHelper;
import org.apache.falcon.entity.ProcessHelper;
import org.apache.falcon.entity.Storage;
import org.apache.falcon.entity.v0.EntityType;
import org.apache.falcon.entity.v0.Frequency;
import org.apache.falcon.entity.v0.Frequency.TimeUnit;
import org.apache.falcon.entity.v0.SchemaHelper;
import org.apache.falcon.entity.v0.cluster.Cluster;
import org.apache.falcon.entity.v0.feed.Feed;
import org.apache.falcon.entity.v0.feed.LocationType;
import org.apache.falcon.entity.v0.process.EngineType;
import org.apache.falcon.entity.v0.process.Input;
import org.apache.falcon.entity.v0.process.Output;
import org.apache.falcon.entity.v0.process.Process;
import org.apache.falcon.entity.v0.process.Property;
import org.apache.falcon.entity.v0.process.Workflow;
import org.apache.falcon.expression.ExpressionHelper;
import org.apache.falcon.hadoop.HadoopClientFactory;
import org.apache.falcon.messaging.EntityInstanceMessage.ARG;
import org.apache.falcon.oozie.coordinator.CONTROLS;
import org.apache.falcon.oozie.coordinator.COORDINATORAPP;
import org.apache.falcon.oozie.coordinator.DATAIN;
import org.apache.falcon.oozie.coordinator.DATAOUT;
import org.apache.falcon.oozie.coordinator.DATASETS;
import org.apache.falcon.oozie.coordinator.INPUTEVENTS;
import org.apache.falcon.oozie.coordinator.OUTPUTEVENTS;
import org.apache.falcon.oozie.coordinator.SYNCDATASET;
import org.apache.falcon.oozie.coordinator.WORKFLOW;
import org.apache.falcon.oozie.workflow.ACTION;
import org.apache.falcon.oozie.workflow.DELETE;
import org.apache.falcon.oozie.workflow.PIG;
import org.apache.falcon.oozie.workflow.PREPARE;
import org.apache.falcon.oozie.workflow.WORKFLOWAPP;
import org.apache.falcon.security.CurrentUser;
import org.apache.falcon.update.UpdateHelper;
import org.apache.falcon.util.OozieUtils;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.log4j.Logger;
import org.apache.oozie.client.CoordinatorJob.Timeunit;
import org.apache.oozie.client.OozieClient;

import javax.xml.bind.JAXBElement;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;

/**
* Oozie workflow builder for falcon entities.
*/
public class OozieProcessWorkflowBuilder extends OozieWorkflowBuilder<Process> {
    private static final Logger LOG = Logger.getLogger(OozieProcessWorkflowBuilder.class);

    private static final Set<String> FALCON_PROCESS_HIVE_ACTIONS = new HashSet<String>(
            Arrays.asList(new String[]{"recordsize", "user-oozie-workflow", "user-pig-job", "user-hive-job", }));

    public OozieProcessWorkflowBuilder(Process entity) {
        super(entity);
    }

    @Override
    public Map<String, Properties> newWorkflowSchedule(String... clusters) throws FalconException {
        Map<String, Properties> propertiesMap = new HashMap<String, Properties>();

        for (String clusterName : clusters) {
            org.apache.falcon.entity.v0.process.Cluster processCluster = ProcessHelper.getCluster(entity, clusterName);
            if (processCluster.getValidity().getStart().compareTo(processCluster.getValidity().getEnd()) >= 0) {
                LOG.info("process validity start <= end for cluster " + clusterName + ". Skipping schedule");
                break;
            }

            Cluster cluster = CONFIG_STORE.get(EntityType.CLUSTER, processCluster.getName());
            Path bundlePath = EntityUtil.getNewStagingPath(cluster, entity);
            map(cluster, bundlePath);
            Properties properties = createAppProperties(clusterName, bundlePath, CurrentUser.getUser());

            //Add libpath
            String libPath = entity.getWorkflow().getLib();
            if (!StringUtils.isEmpty(libPath)) {
                String path = libPath.replace("${nameNode}", "");
                properties.put(OozieClient.LIBPATH, "${nameNode}" + path);
            }

            if (entity.getInputs() != null) {
                for (Input in : entity.getInputs().getInputs()) {
                    if (in.isOptional()) {
                        addOptionalInputProperties(properties, in, clusterName);
                    }
                }
            }
            propertiesMap.put(clusterName, properties);
        }
        return propertiesMap;
    }

    private void addOptionalInputProperties(Properties properties, Input in, String clusterName)
        throws FalconException {
        Feed feed = EntityUtil.getEntity(EntityType.FEED, in.getFeed());
        org.apache.falcon.entity.v0.feed.Cluster cluster = FeedHelper.getCluster(feed, clusterName);
        String inName = in.getName();
        properties.put(inName + ".frequency", String.valueOf(feed.getFrequency().getFrequency()));
        properties.put(inName + ".freq_timeunit", mapToCoordTimeUnit(feed.getFrequency().getTimeUnit()).name());
        properties.put(inName + ".timezone", feed.getTimezone().getID());
        properties.put(inName + ".end_of_duration", Timeunit.NONE.name());
        properties.put(inName + ".initial-instance", SchemaHelper.formatDateUTC(cluster.getValidity().getStart()));
        properties.put(inName + ".done-flag", "notused");

        String locPath = FeedHelper.createStorage(clusterName, feed)
                .getUriTemplate(LocationType.DATA).replace('$', '%');
        properties.put(inName + ".uri-template", locPath);

        properties.put(inName + ".start-instance", in.getStart());
        properties.put(inName + ".end-instance", in.getEnd());
    }

    private Timeunit mapToCoordTimeUnit(TimeUnit tu) {
        switch (tu) {
        case days:
            return Timeunit.DAY;

        case hours:
            return Timeunit.HOUR;

        case minutes:
            return Timeunit.MINUTE;

        case months:
            return Timeunit.MONTH;

        default:
            throw new IllegalArgumentException("Unhandled time unit " + tu);
        }
    }

    @Override
    public Date getNextStartTime(Process process, String cluster, Date now) throws FalconException {
        org.apache.falcon.entity.v0.process.Cluster processCluster = ProcessHelper.getCluster(process, cluster);
        return EntityUtil.getNextStartTime(processCluster.getValidity().getStart(),
                process.getFrequency(), process.getTimezone(), now);
    }

    @Override
    public String[] getWorkflowNames() {
        return new String[]{EntityUtil.getWorkflowName(Tag.DEFAULT, entity).toString()};
    }

    private static final String DEFAULT_WF_TEMPLATE = "/config/workflow/process-parent-workflow.xml";
    private static final int THIRTY_MINUTES = 30 * 60 * 1000;

    @Override
    public List<COORDINATORAPP> getCoordinators(Cluster cluster, Path bundlePath) throws FalconException {
        try {
            FileSystem fs = HadoopClientFactory.get().createFileSystem(ClusterHelper.getConfiguration(cluster));

            //Copy user workflow and lib to staging dir
            Map<String, String> checksums = UpdateHelper.checksumAndCopy(fs, new Path(entity.getWorkflow().getPath()),
                new Path(bundlePath, EntityUtil.PROCESS_USER_DIR));
            if (entity.getWorkflow().getLib() != null && fs.exists(new Path(entity.getWorkflow().getLib()))) {
                checksums.putAll(UpdateHelper.checksumAndCopy(fs, new Path(entity.getWorkflow().getLib()),
                    new Path(bundlePath, EntityUtil.PROCESS_USERLIB_DIR)));
            }

            writeChecksums(fs, new Path(bundlePath, EntityUtil.PROCESS_CHECKSUM_FILE), checksums);
        } catch (IOException e) {
            throw new FalconException("Failed to copy user workflow/lib", e);
        }

        List<COORDINATORAPP> apps = new ArrayList<COORDINATORAPP>();
        apps.add(createDefaultCoordinator(cluster, bundlePath));

        return apps;
    }

    private void writeChecksums(FileSystem fs, Path path, Map<String, String> checksums) throws FalconException {
        try {
            FSDataOutputStream stream = fs.create(path);
            try {
                for (Map.Entry<String, String> entry : checksums.entrySet()) {
                    stream.write((entry.getKey() + "=" + entry.getValue() + "\n").getBytes());
                }
            } finally {
                stream.close();
            }
        } catch (IOException e) {
            throw new FalconException("Failed to copy user workflow/lib", e);
        }
    }

    private Path getUserWorkflowPath(Cluster cluster, Path bundlePath) throws FalconException {
        try {
            FileSystem fs = HadoopClientFactory.get().createProxiedFileSystem(ClusterHelper.getConfiguration(cluster));
            Path wfPath = new Path(entity.getWorkflow().getPath());
            if (fs.isFile(wfPath)) {
                return new Path(bundlePath, EntityUtil.PROCESS_USER_DIR + "/" + wfPath.getName());
            } else {
                return new Path(bundlePath, EntityUtil.PROCESS_USER_DIR);
            }
        } catch(IOException e) {
            throw new FalconException("Failed to get workflow path", e);
        }
    }

    private Path getUserLibPath(Cluster cluster, Path bundlePath) throws FalconException {
        try {
            if (entity.getWorkflow().getLib() == null) {
                return null;
            }
            Path libPath = new Path(entity.getWorkflow().getLib());

            FileSystem fs = HadoopClientFactory.get().createProxiedFileSystem(ClusterHelper.getConfiguration(cluster));
            if (fs.isFile(libPath)) {
                return new Path(bundlePath, EntityUtil.PROCESS_USERLIB_DIR + "/" + libPath.getName());
            } else {
                return new Path(bundlePath, EntityUtil.PROCESS_USERLIB_DIR);
            }
        } catch(IOException e) {
            throw new FalconException("Failed to get user lib path", e);
        }
    }

    /**
     * Creates default oozie coordinator.
     *
     * @param cluster    - Cluster for which the coordinator app need to be created
     * @param bundlePath - bundle path
     * @return COORDINATORAPP
     * @throws FalconException on Error
     */
    public COORDINATORAPP createDefaultCoordinator(Cluster cluster, Path bundlePath) throws FalconException {
        if (entity == null) {
            return null;
        }

        COORDINATORAPP coord = new COORDINATORAPP();
        String coordName = EntityUtil.getWorkflowName(Tag.DEFAULT, entity).toString();
        Path coordPath = getCoordPath(bundlePath, coordName);

        // coord attributes
        initializeCoordAttributes(cluster, entity, coord, coordName);

        CONTROLS controls = initializeControls(entity); // controls
        coord.setControls(controls);

        // Configuration
        Map<String, String> props = createCoordDefaultConfiguration(cluster, coordPath, coordName);

        initializeInputPaths(cluster, entity, coord, props); // inputs
        initializeOutputPaths(cluster, entity, coord, props)// outputs

        Workflow processWorkflow = entity.getWorkflow();
        propagateUserWorkflowProperties(processWorkflow, props, entity.getName());

        // create parent wf
        createWorkflow(cluster, entity, processWorkflow, coordName, coordPath);

        WORKFLOW wf = new WORKFLOW();
        wf.setAppPath(getStoragePath(coordPath.toString()));
        wf.setConfiguration(getCoordConfig(props));

        // set coord action to parent wf
        org.apache.falcon.oozie.coordinator.ACTION action = new org.apache.falcon.oozie.coordinator.ACTION();
        action.setWorkflow(wf);
        coord.setAction(action);

        return coord;
    }

    private void initializeCoordAttributes(Cluster cluster, Process process, COORDINATORAPP coord, String coordName) {
        coord.setName(coordName);
        org.apache.falcon.entity.v0.process.Cluster processCluster =
            ProcessHelper.getCluster(process, cluster.getName());
        coord.setStart(SchemaHelper.formatDateUTC(processCluster.getValidity().getStart()));
        coord.setEnd(SchemaHelper.formatDateUTC(processCluster.getValidity().getEnd()));
        coord.setTimezone(process.getTimezone().getID());
        coord.setFrequency("${coord:" + process.getFrequency().toString() + "}");
    }

    private CONTROLS initializeControls(Process process)
        throws FalconException {
        CONTROLS controls = new CONTROLS();
        controls.setConcurrency(String.valueOf(process.getParallel()));
        controls.setExecution(process.getOrder().name());

        Frequency timeout = process.getTimeout();
        long frequencyInMillis = ExpressionHelper.get().evaluate(process.getFrequency().toString(), Long.class);
        long timeoutInMillis;
        if (timeout != null) {
            timeoutInMillis = ExpressionHelper.get().
                evaluate(process.getTimeout().toString(), Long.class);
        } else {
            timeoutInMillis = frequencyInMillis * 6;
            if (timeoutInMillis < THIRTY_MINUTES) {
                timeoutInMillis = THIRTY_MINUTES;
            }
        }
        controls.setTimeout(String.valueOf(timeoutInMillis / (1000 * 60)));

        if (timeoutInMillis / frequencyInMillis * 2 > 0) {
            controls.setThrottle(String.valueOf(timeoutInMillis / frequencyInMillis * 2));
        }

        return controls;
    }

    private void initializeInputPaths(Cluster cluster, Process process, COORDINATORAPP coord,
        Map<String, String> props) throws FalconException {
        if (process.getInputs() == null) {
            props.put("falconInputFeeds", "NONE");
            props.put("falconInPaths", IGNORE);
            return;
        }

        List<String> inputFeeds = new ArrayList<String>();
        List<String> inputPaths = new ArrayList<String>();
        List<String> inputFeedStorageTypes = new ArrayList<String>();
        for (Input input : process.getInputs().getInputs()) {
            Feed feed = EntityUtil.getEntity(EntityType.FEED, input.getFeed());
            Storage storage = FeedHelper.createStorage(cluster, feed);

            if (!input.isOptional()) {
                if (coord.getDatasets() == null) {
                    coord.setDatasets(new DATASETS());
                }
                if (coord.getInputEvents() == null) {
                    coord.setInputEvents(new INPUTEVENTS());
                }

                SYNCDATASET syncdataset = createDataSet(feed, cluster, storage, input.getName(), LocationType.DATA);
                coord.getDatasets().getDatasetOrAsyncDataset().add(syncdataset);

                DATAIN datain = createDataIn(input);
                coord.getInputEvents().getDataIn().add(datain);
            }

            String inputExpr = null;
            if (storage.getType() == Storage.TYPE.FILESYSTEM) {
                inputExpr = getELExpression("dataIn('" + input.getName() + "', '" + input.getPartition() + "')");
                props.put(input.getName(), inputExpr);
            } else if (storage.getType() == Storage.TYPE.TABLE) {
                inputExpr = "${coord:dataIn('" + input.getName() + "')}";
                propagateCatalogTableProperties(input, (CatalogStorage) storage, props);
            }

            inputFeeds.add(feed.getName());
            inputPaths.add(inputExpr);
            inputFeedStorageTypes.add(storage.getType().name());
        }

        propagateLateDataProperties(inputFeeds, inputPaths, inputFeedStorageTypes, props);
    }

    private void propagateLateDataProperties(List<String> inputFeeds, List<String> inputPaths,
                                             List<String> inputFeedStorageTypes, Map<String, String> props) {
        // populate late data handler - should-record action
        props.put("falconInputFeeds", join(inputFeeds.iterator(), '#'));
        props.put("falconInPaths", join(inputPaths.iterator(), '#'));

        // storage type for each corresponding feed sent as a param to LateDataHandler
        // needed to compute usage based on storage type in LateDataHandler
        props.put("falconInputFeedStorageTypes", join(inputFeedStorageTypes.iterator(), '#'));
    }

    private void initializeOutputPaths(Cluster cluster, Process process, COORDINATORAPP coord,
                                       Map<String, String> props) throws FalconException {
        if (process.getOutputs() == null) {
            props.put(ARG.feedNames.getPropName(), "NONE");
            props.put(ARG.feedInstancePaths.getPropName(), IGNORE);
            return;
        }

        if (coord.getDatasets() == null) {
            coord.setDatasets(new DATASETS());
        }

        if (coord.getOutputEvents() == null) {
            coord.setOutputEvents(new OUTPUTEVENTS());
        }

        List<String> outputFeeds = new ArrayList<String>();
        List<String> outputPaths = new ArrayList<String>();
        for (Output output : process.getOutputs().getOutputs()) {
            Feed feed = EntityUtil.getEntity(EntityType.FEED, output.getFeed());
            Storage storage = FeedHelper.createStorage(cluster, feed);

            SYNCDATASET syncdataset = createDataSet(feed, cluster, storage, output.getName(), LocationType.DATA);
            coord.getDatasets().getDatasetOrAsyncDataset().add(syncdataset);

            DATAOUT dataout = createDataOut(output);
            coord.getOutputEvents().getDataOut().add(dataout);

            String outputExpr = "${coord:dataOut('" + output.getName() + "')}";
            outputFeeds.add(feed.getName());
            outputPaths.add(outputExpr);

            if (storage.getType() == Storage.TYPE.FILESYSTEM) {
                props.put(output.getName(), outputExpr);

                propagateFileSystemProperties(output, feed, cluster, coord, storage, props);
            } else if (storage.getType() == Storage.TYPE.TABLE) {
                propagateCatalogTableProperties(output, (CatalogStorage) storage, props);
            }
        }

        // Output feed name and path for parent workflow
        props.put(ARG.feedNames.getPropName(), join(outputFeeds.iterator(), ','));
        props.put(ARG.feedInstancePaths.getPropName(), join(outputPaths.iterator(), ','));
    }

    private SYNCDATASET createDataSet(Feed feed, Cluster cluster, Storage storage,
        String datasetName, LocationType locationType) throws FalconException {

        SYNCDATASET syncdataset = new SYNCDATASET();
        syncdataset.setName(datasetName);
        syncdataset.setFrequency("${coord:" + feed.getFrequency().toString() + "}");

        String uriTemplate = storage.getUriTemplate(locationType);
        if (storage.getType() == Storage.TYPE.TABLE) {
            uriTemplate = uriTemplate.replace("thrift", "hcat"); // Oozie requires this!!!
        }
        syncdataset.setUriTemplate(uriTemplate);

        org.apache.falcon.entity.v0.feed.Cluster feedCluster = FeedHelper.getCluster(feed, cluster.getName());
        syncdataset.setInitialInstance(SchemaHelper.formatDateUTC(feedCluster.getValidity().getStart()));
        syncdataset.setTimezone(feed.getTimezone().getID());

        if (feed.getAvailabilityFlag() == null) {
            syncdataset.setDoneFlag("");
        } else {
            syncdataset.setDoneFlag(feed.getAvailabilityFlag());
        }

        return syncdataset;
    }

    private DATAOUT createDataOut(Output output) {
        DATAOUT dataout = new DATAOUT();
        dataout.setName(output.getName());
        dataout.setDataset(output.getName());
        dataout.setInstance(getELExpression(output.getInstance()));
        return dataout;
    }

    private DATAIN createDataIn(Input input) {
        DATAIN datain = new DATAIN();
        datain.setName(input.getName());
        datain.setDataset(input.getName());
        datain.setStartInstance(getELExpression(input.getStart()));
        datain.setEndInstance(getELExpression(input.getEnd()));
        return datain;
    }

    private void propagateFileSystemProperties(Output output, Feed feed, Cluster cluster, COORDINATORAPP coord,
        Storage storage, Map<String, String> props)
        throws FalconException {

        // stats and meta paths
        createOutputEvent(output, feed, cluster, LocationType.STATS, coord, props, storage);
        createOutputEvent(output, feed, cluster, LocationType.META, coord, props, storage);
        createOutputEvent(output, feed, cluster, LocationType.TMP, coord, props, storage);
    }

    //SUSPEND CHECKSTYLE CHECK ParameterNumberCheck
    private void createOutputEvent(Output output, Feed feed, Cluster cluster, LocationType locType,
        COORDINATORAPP coord, Map<String, String> props, Storage storage)
        throws FalconException {

        String name = output.getName();
        String type = locType.name().toLowerCase();

        SYNCDATASET dataset = createDataSet(feed, cluster, storage, name + type, locType);
        coord.getDatasets().getDatasetOrAsyncDataset().add(dataset);

        DATAOUT dataout = new DATAOUT();
        dataout.setName(name + type);
        dataout.setDataset(name + type);
        dataout.setInstance(getELExpression(output.getInstance()));

        OUTPUTEVENTS outputEvents = coord.getOutputEvents();
        if (outputEvents == null) {
            outputEvents = new OUTPUTEVENTS();
            coord.setOutputEvents(outputEvents);
        }
        outputEvents.getDataOut().add(dataout);

        String outputExpr = "${coord:dataOut('" + name + type + "')}";
        props.put(name + "." + type, outputExpr);
    }
    //RESUME CHECKSTYLE CHECK ParameterNumberCheck

    private void propagateCommonCatalogTableProperties(CatalogStorage tableStorage,
        Map<String, String> props, String prefix) {
        props.put(prefix + "_storage_type", tableStorage.getType().name());
        props.put(prefix + "_catalog_url", tableStorage.getCatalogUrl());
        props.put(prefix + "_database", tableStorage.getDatabase());
        props.put(prefix + "_table", tableStorage.getTable());
    }

    private void propagateCatalogTableProperties(Input input, CatalogStorage tableStorage,
        Map<String, String> props) {
        String prefix = "falcon_" + input.getName();

        propagateCommonCatalogTableProperties(tableStorage, props, prefix);

        props.put(prefix + "_partition_filter_pig",
            "${coord:dataInPartitionFilter('" + input.getName() + "', 'pig')}");
        props.put(prefix + "_partition_filter_hive",
            "${coord:dataInPartitionFilter('" + input.getName() + "', 'hive')}");
        props.put(prefix + "_partition_filter_java",
            "${coord:dataInPartitionFilter('" + input.getName() + "', 'java')}");
    }

    private void propagateCatalogTableProperties(Output output, CatalogStorage tableStorage,
                                                 Map<String, String> props) {
        String prefix = "falcon_" + output.getName();

        propagateCommonCatalogTableProperties(tableStorage, props, prefix);

        props.put(prefix + "_dataout_partitions",
            "${coord:dataOutPartitions('" + output.getName() + "')}");
        props.put(prefix + "_dated_partition_value", "${coord:dataOutPartitionValue('"
            + output.getName() + "', '" + tableStorage.getDatedPartitionKey() + "')}");
    }

    private String join(Iterator<String> itr, char sep) {
        String joinedStr = StringUtils.join(itr, sep);
        if (joinedStr.isEmpty()) {
            joinedStr = "null";
        }
        return joinedStr;
    }

    private String getELExpression(String expr) {
        if (expr != null) {
            expr = "${" + expr + "}";
        }
        return expr;
    }

    @Override
    protected Map<String, String> getEntityProperties() {
        Map<String, String> props = new HashMap<String, String>();
        if (entity.getProperties() != null) {
            for (Property prop : entity.getProperties().getProperties()) {
                props.put(prop.getName(), prop.getValue());
            }
        }
        return props;
    }

    private void propagateUserWorkflowProperties(Workflow processWorkflow,
        Map<String, String> props, String processName) {
        props.put("userWorkflowName", ProcessHelper.getProcessWorkflowName(
            processWorkflow.getName(), processName));
        props.put("userWorkflowVersion", processWorkflow.getVersion());
        props.put("userWorkflowEngine", processWorkflow.getEngine().value());
    }

    protected void createWorkflow(Cluster cluster, Process process, Workflow processWorkflow,
                                  String wfName, Path parentWfPath) throws FalconException {
        WORKFLOWAPP wfApp = getWorkflowTemplate(DEFAULT_WF_TEMPLATE);
        wfApp.setName(wfName);
        try {
            addLibExtensionsToWorkflow(cluster, wfApp, EntityType.PROCESS, null);
        } catch (IOException e) {
            throw new FalconException("Failed to add library extensions for the workflow", e);
        }

        final boolean isTableStorageType = isTableStorageType(cluster, process);
        if (isTableStorageType) {
            setupHiveCredentials(cluster, parentWfPath, wfApp);
        }

        String userWfPath = getUserWorkflowPath(cluster, parentWfPath.getParent()).toString();
        EngineType engineType = processWorkflow.getEngine();
        for (Object object : wfApp.getDecisionOrForkOrJoin()) {
            if (!(object instanceof ACTION)) {
                continue;
            }

            ACTION action = (ACTION) object;
            String actionName = action.getName();
            if (engineType == EngineType.OOZIE && actionName.equals("user-oozie-workflow")) {
                action.getSubWorkflow().setAppPath("${nameNode}" + userWfPath);
            } else if (engineType == EngineType.PIG && actionName.equals("user-pig-job")) {
                decoratePIGAction(cluster, process, action.getPig(), parentWfPath, isTableStorageType);
            } else if (engineType == EngineType.HIVE && actionName.equals("user-hive-job")) {
                decorateHiveAction(cluster, process, action, parentWfPath);
            } else if (FALCON_ACTIONS.contains(actionName)) {
                decorateWithOozieRetries(action);
                if (isTableStorageType && actionName.equals("recordsize")) {
                    // adds hive-site.xml in actions classpath
                    action.getJava().setJobXml("${wf:appPath()}/conf/hive-site.xml");
                }
            }
        }

        //Create parent workflow
        marshal(cluster, wfApp, parentWfPath);
    }

    private void setupHiveCredentials(Cluster cluster, Path parentWfPath,
                                      WORKFLOWAPP wfApp) throws FalconException {
        // create hive-site.xml file so actions can use it in the classpath
        createHiveConfiguration(cluster, parentWfPath, ""); // DO NOT ADD PREFIX!!!

        if (isSecurityEnabled) {
            // add hcatalog credentials for secure mode and add a reference to each action
            addHCatalogCredentials(wfApp, cluster, HIVE_CREDENTIAL_NAME, FALCON_PROCESS_HIVE_ACTIONS);
        }
    }

    private void decoratePIGAction(Cluster cluster, Process process, PIG pigAction,
                                   Path parentWfPath, boolean isTableStorageType) throws FalconException {
        Path userWfPath = getUserWorkflowPath(cluster, parentWfPath.getParent());
        pigAction.setScript("${nameNode}" + userWfPath.toString());

        addPrepareDeleteOutputPath(process, pigAction);

        final List<String> paramList = pigAction.getParam();
        addInputFeedsAsParams(paramList, process, cluster, EngineType.PIG.name().toLowerCase());
        addOutputFeedsAsParams(paramList, process, cluster);

        propagateProcessProperties(pigAction, process);

        if (isTableStorageType) { // adds hive-site.xml in pig classpath
            pigAction.getFile().add("${wf:appPath()}/conf/hive-site.xml");
        }

        addArchiveForCustomJars(cluster, pigAction.getArchive(),
            getUserLibPath(cluster, parentWfPath.getParent()));
    }

    private void decorateHiveAction(Cluster cluster, Process process, ACTION wfAction,
                                    Path parentWfPath) throws FalconException {

        JAXBElement<org.apache.falcon.oozie.hive.ACTION> actionJaxbElement = OozieUtils.unMarshalHiveAction(wfAction);
        org.apache.falcon.oozie.hive.ACTION hiveAction = actionJaxbElement.getValue();

        Path userWfPath = getUserWorkflowPath(cluster, parentWfPath.getParent());
        hiveAction.setScript("${nameNode}" + userWfPath.toString());

        addPrepareDeleteOutputPath(process, hiveAction);

        final List<String> paramList = hiveAction.getParam();
        addInputFeedsAsParams(paramList, process, cluster, EngineType.HIVE.name().toLowerCase());
        addOutputFeedsAsParams(paramList, process, cluster);

        propagateProcessProperties(hiveAction, process);

        // adds hive-site.xml in hive classpath
        hiveAction.setJobXml("${wf:appPath()}/conf/hive-site.xml");

        addArchiveForCustomJars(cluster, hiveAction.getArchive(),
            getUserLibPath(cluster, parentWfPath.getParent()));

        OozieUtils.marshalHiveAction(wfAction, actionJaxbElement);
    }

    private void addPrepareDeleteOutputPath(Process process,
                                            PIG pigAction) throws FalconException {
        List<String> deleteOutputPathList = getPrepareDeleteOutputPathList(process);
        if (deleteOutputPathList.isEmpty()) {
            return;
        }

        final PREPARE prepare = new PREPARE();
        final List<DELETE> deleteList = prepare.getDelete();

        for (String deletePath : deleteOutputPathList) {
            final DELETE delete = new DELETE();
            delete.setPath(deletePath);
            deleteList.add(delete);
        }

        if (!deleteList.isEmpty()) {
            pigAction.setPrepare(prepare);
        }
    }

    private void addPrepareDeleteOutputPath(Process process, org.apache.falcon.oozie.hive.ACTION hiveAction)
        throws FalconException {

        List<String> deleteOutputPathList = getPrepareDeleteOutputPathList(process);
        if (deleteOutputPathList.isEmpty()) {
            return;
        }

        org.apache.falcon.oozie.hive.PREPARE prepare = new org.apache.falcon.oozie.hive.PREPARE();
        List<org.apache.falcon.oozie.hive.DELETE> deleteList = prepare.getDelete();

        for (String deletePath : deleteOutputPathList) {
            org.apache.falcon.oozie.hive.DELETE delete = new org.apache.falcon.oozie.hive.DELETE();
            delete.setPath(deletePath);
            deleteList.add(delete);
        }

        if (!deleteList.isEmpty()) {
            hiveAction.setPrepare(prepare);
        }
    }

    private List<String> getPrepareDeleteOutputPathList(Process process) throws FalconException {
        final List<String> deleteList = new ArrayList<String>();
        if (process.getOutputs() == null) {
            return deleteList;
        }

        for (Output output : process.getOutputs().getOutputs()) {
            Feed feed = EntityUtil.getEntity(EntityType.FEED, output.getFeed());

            if (FeedHelper.getStorageType(feed) == Storage.TYPE.TABLE) {
                continue; // prepare delete only applies to FileSystem storage
            }

            deleteList.add("${wf:conf('" + output.getName() + "')}");
        }

        return deleteList;
    }

    private void addInputFeedsAsParams(List<String> paramList, Process process, Cluster cluster,
                                       String engineType) throws FalconException {
        if (process.getInputs() == null) {
            return;
        }

        for (Input input : process.getInputs().getInputs()) {
            Feed feed = EntityUtil.getEntity(EntityType.FEED, input.getFeed());
            Storage storage = FeedHelper.createStorage(cluster, feed);

            final String inputName = input.getName();
            if (storage.getType() == Storage.TYPE.FILESYSTEM) {
                paramList.add(inputName + "=${" + inputName + "}"); // no prefix for backwards compatibility
            } else if (storage.getType() == Storage.TYPE.TABLE) {
                final String paramName = "falcon_" + inputName; // prefix 'falcon' for new params
                Map<String, String> props = new HashMap<String, String>();
                propagateCommonCatalogTableProperties((CatalogStorage) storage, props, paramName);
                for (String key : props.keySet()) {
                    paramList.add(key + "=${wf:conf('" + key + "')}");
                }

                paramList.add(paramName + "_filter=${wf:conf('"
                    + paramName + "_partition_filter_" + engineType + "')}");
            }
        }
    }

    private void addOutputFeedsAsParams(List<String> paramList, Process process,
                                        Cluster cluster) throws FalconException {
        if (process.getOutputs() == null) {
            return;
        }

        for (Output output : process.getOutputs().getOutputs()) {
            Feed feed = EntityUtil.getEntity(EntityType.FEED, output.getFeed());
            Storage storage = FeedHelper.createStorage(cluster, feed);

            if (storage.getType() == Storage.TYPE.FILESYSTEM) {
                final String outputName = output.getName()// no prefix for backwards compatibility
                paramList.add(outputName + "=${" + outputName + "}");
            } else if (storage.getType() == Storage.TYPE.TABLE) {
                Map<String, String> props = new HashMap<String, String>();
                propagateCatalogTableProperties(output, (CatalogStorage) storage, props); // prefix is auto added
                for (String key : props.keySet()) {
                    paramList.add(key + "=${wf:conf('" + key + "')}");
                }
            }
        }
    }

    private void propagateProcessProperties(PIG pigAction, Process process) {
        org.apache.falcon.entity.v0.process.Properties processProperties = process.getProperties();
        if (processProperties == null) {
            return;
        }

        // Propagate user defined properties to job configuration
        final List<org.apache.falcon.oozie.workflow.CONFIGURATION.Property> configuration =
            pigAction.getConfiguration().getProperty();

        // Propagate user defined properties to pig script as macros
        // passed as parameters -p name=value that can be accessed as $name
        final List<String> paramList = pigAction.getParam();

        for (org.apache.falcon.entity.v0.process.Property property : processProperties.getProperties()) {
            org.apache.falcon.oozie.workflow.CONFIGURATION.Property configProperty =
                new org.apache.falcon.oozie.workflow.CONFIGURATION.Property();
            configProperty.setName(property.getName());
            configProperty.setValue(property.getValue());
            configuration.add(configProperty);

            paramList.add(property.getName() + "=" + property.getValue());
        }
    }

    private void propagateProcessProperties(org.apache.falcon.oozie.hive.ACTION hiveAction, Process process) {
        org.apache.falcon.entity.v0.process.Properties processProperties = process.getProperties();
        if (processProperties == null) {
            return;
        }

        // Propagate user defined properties to job configuration
        final List<org.apache.falcon.oozie.hive.CONFIGURATION.Property> configuration =
            hiveAction.getConfiguration().getProperty();

        // Propagate user defined properties to pig script as macros
        // passed as parameters -p name=value that can be accessed as $name
        final List<String> paramList = hiveAction.getParam();

        for (org.apache.falcon.entity.v0.process.Property property : processProperties.getProperties()) {
            org.apache.falcon.oozie.hive.CONFIGURATION.Property configProperty =
                new org.apache.falcon.oozie.hive.CONFIGURATION.Property();
            configProperty.setName(property.getName());
            configProperty.setValue(property.getValue());
            configuration.add(configProperty);

            paramList.add(property.getName() + "=" + property.getValue());
        }
    }

    private void addArchiveForCustomJars(Cluster cluster, List<String> archiveList,
        Path libPath) throws FalconException {
        if (libPath == null) {
            return;
        }

        try {
            final FileSystem fs = libPath.getFileSystem(ClusterHelper.getConfiguration(cluster));
            if (fs.isFile(libPath)) {  // File, not a Dir
                archiveList.add(libPath.toString());
                return;
            }

            // lib path is a directory, add each file under the lib dir to archive
            final FileStatus[] fileStatuses = fs.listStatus(libPath, new PathFilter() {
                @Override
                public boolean accept(Path path) {
                    try {
                        return fs.isFile(path) && path.getName().endsWith(".jar");
                    } catch (IOException ignore) {
                        return false;
                    }
                }
            });

            for (FileStatus fileStatus : fileStatuses) {
                archiveList.add(fileStatus.getPath().toString());
            }
        } catch (IOException e) {
            throw new FalconException("Error adding archive for custom jars under: " + libPath, e);
        }
    }
}
TOP

Related Classes of org.apache.falcon.workflow.OozieProcessWorkflowBuilder

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.