Package org.apache.falcon.entity.parser

Source Code of org.apache.falcon.entity.parser.FeedEntityParser

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.falcon.entity.parser;

import org.apache.commons.lang.StringUtils;
import org.apache.falcon.FalconException;
import org.apache.falcon.catalog.CatalogServiceFactory;
import org.apache.falcon.entity.CatalogStorage;
import org.apache.falcon.entity.ClusterHelper;
import org.apache.falcon.entity.EntityUtil;
import org.apache.falcon.entity.FeedHelper;
import org.apache.falcon.entity.Storage;
import org.apache.falcon.entity.store.ConfigurationStore;
import org.apache.falcon.entity.v0.Entity;
import org.apache.falcon.entity.v0.EntityGraph;
import org.apache.falcon.entity.v0.EntityType;
import org.apache.falcon.entity.v0.feed.Cluster;
import org.apache.falcon.entity.v0.feed.ClusterType;
import org.apache.falcon.entity.v0.feed.Feed;
import org.apache.falcon.entity.v0.feed.LocationType;
import org.apache.falcon.entity.v0.process.Input;
import org.apache.falcon.entity.v0.process.Output;
import org.apache.falcon.entity.v0.process.Process;
import org.apache.falcon.expression.ExpressionHelper;
import org.apache.falcon.group.FeedGroup;
import org.apache.falcon.group.FeedGroupMap;
import org.apache.falcon.security.SecurityUtil;
import org.apache.log4j.Logger;

import java.util.Date;
import java.util.HashSet;
import java.util.Set;
import java.util.TimeZone;

/**
* Parser that parses feed entity definition.
*/
public class FeedEntityParser extends EntityParser<Feed> {

    private static final Logger LOG = Logger.getLogger(FeedEntityParser.class);

    public FeedEntityParser() {
        super(EntityType.FEED);
    }

    @Override
    public void validate(Feed feed) throws FalconException {
        if (feed.getTimezone() == null) {
            feed.setTimezone(TimeZone.getTimeZone("UTC"));
        }

        if (feed.getClusters() == null) {
            throw new ValidationException("Feed should have atleast one cluster");
        }

        for (Cluster cluster : feed.getClusters().getClusters()) {
            validateEntityExists(EntityType.CLUSTER, cluster.getName());
            validateClusterValidity(cluster.getValidity().getStart(), cluster.getValidity().getEnd(),
                    cluster.getName());
            validateClusterHasRegistry(feed, cluster);
            validateFeedCutOffPeriod(feed, cluster);
        }

        validateFeedStorage(feed);
        validateFeedPartitionExpression(feed);
        validateFeedGroups(feed);

        // Seems like a good enough entity object for a new one
        // But is this an update ?

        Feed oldFeed = ConfigurationStore.get().get(EntityType.FEED, feed.getName());
        if (oldFeed == null) {
            return; // Not an update case
        }

        // Is actually an update. Need to iterate over all the processes
        // depending on this feed and see if they are valid with the new
        // feed reference
        EntityGraph graph = EntityGraph.get();
        Set<Entity> referenced = graph.getDependents(oldFeed);
        Set<Process> processes = findProcesses(referenced);
        if (processes.isEmpty()) {
            return;
        }

        ensureValidityFor(feed, processes);
    }

    private Set<Process> findProcesses(Set<Entity> referenced) {
        Set<Process> processes = new HashSet<Process>();
        for (Entity entity : referenced) {
            if (entity.getEntityType() == EntityType.PROCESS) {
                processes.add((Process) entity);
            }
        }
        return processes;
    }

    private void validateFeedGroups(Feed feed) throws FalconException {
        String[] groupNames = feed.getGroups() != null ? feed.getGroups().split(",") : new String[]{};
        final Storage storage = FeedHelper.createStorage(feed);
        String defaultPath = storage.getUriTemplate(LocationType.DATA);
        for (Cluster cluster : feed.getClusters().getClusters()) {
            final String uriTemplate = FeedHelper.createStorage(cluster, feed).getUriTemplate(LocationType.DATA);
            if (!FeedGroup.getDatePattern(uriTemplate).equals(
                    FeedGroup.getDatePattern(defaultPath))) {
                throw new ValidationException("Feeds default path pattern: "
                        + storage.getUriTemplate(LocationType.DATA)
                        + ", does not match with cluster: "
                        + cluster.getName()
                        + " path pattern: "
                        + uriTemplate);
            }
        }
        for (String groupName : groupNames) {
            FeedGroup group = FeedGroupMap.get().getGroupsMapping().get(groupName);
            if (group != null && !group.canContainFeed(feed)) {
                throw new ValidationException(
                        "Feed " + feed.getName() + "'s frequency: " + feed.getFrequency().toString()
                                + ", path pattern: " + storage
                                + " does not match with group: " + group.getName() + "'s frequency: "
                                + group.getFrequency()
                                + ", date pattern: " + group.getDatePattern());
            }
        }
    }

    private void ensureValidityFor(Feed newFeed, Set<Process> processes) throws FalconException {
        for (Process process : processes) {
            try {
                ensureValidityFor(newFeed, process);
            } catch (FalconException e) {
                throw new ValidationException(
                        "Process " + process.getName() + " is not compatible " + "with changes to feed "
                                + newFeed.getName(), e);
            }
        }
    }

    private void ensureValidityFor(Feed newFeed, Process process) throws FalconException {
        for (org.apache.falcon.entity.v0.process.Cluster cluster : process.getClusters().getClusters()) {
            String clusterName = cluster.getName();
            if (process.getInputs() != null) {
                for (Input input : process.getInputs().getInputs()) {
                    if (!input.getFeed().equals(newFeed.getName())) {
                        continue;
                    }
                    CrossEntityValidations.validateFeedDefinedForCluster(newFeed, clusterName);
                    CrossEntityValidations.validateFeedRetentionPeriod(input.getStart(), newFeed, clusterName);
                    CrossEntityValidations.validateInstanceRange(process, input, newFeed);

                    validateInputPartition(newFeed, input);
                }
            }

            if (process.getOutputs() != null) {
                for (Output output : process.getOutputs().getOutputs()) {
                    if (!output.getFeed().equals(newFeed.getName())) {
                        continue;
                    }
                    CrossEntityValidations.validateFeedDefinedForCluster(newFeed, clusterName);
                    CrossEntityValidations.validateInstance(process, output, newFeed);
                }
            }
            LOG.debug("Verified and found " + process.getName() + " to be valid for new definition of "
                    + newFeed.getName());
        }
    }

    private void validateInputPartition(Feed newFeed, Input input) throws FalconException {
        if (input.getPartition() == null) {
            return;
        }

        final Storage.TYPE baseFeedStorageType = FeedHelper.getStorageType(newFeed);
        if (baseFeedStorageType == Storage.TYPE.FILESYSTEM) {
            CrossEntityValidations.validateInputPartition(input, newFeed);
        } else if (baseFeedStorageType == Storage.TYPE.TABLE) {
            throw new ValidationException("Input partitions are not supported for table storage: " + input.getName());
        }
    }

    private void validateClusterValidity(Date start, Date end, String clusterName) throws FalconException {
        try {
            if (start.after(end)) {
                throw new ValidationException("Feed start time: " + start + " cannot be after feed end time: " + end
                        + " for cluster: " + clusterName);
            }
        } catch (ValidationException e) {
            throw new ValidationException(e);
        } catch (Exception e) {
            throw new FalconException(e);
        }
    }

    private void validateFeedCutOffPeriod(Feed feed, Cluster cluster) throws FalconException {
        ExpressionHelper evaluator = ExpressionHelper.get();

        String feedRetention = cluster.getRetention().getLimit().toString();
        long retentionPeriod = evaluator.evaluate(feedRetention, Long.class);

        if (feed.getLateArrival() == null) {
            LOG.debug("Feed's late arrival cut-off not set");
            return;
        }
        String feedCutoff = feed.getLateArrival().getCutOff().toString();
        long feedCutOffPeriod = evaluator.evaluate(feedCutoff, Long.class);

        if (retentionPeriod < feedCutOffPeriod) {
            throw new ValidationException(
                    "Feed's retention limit: " + feedRetention + " of referenced cluster " + cluster.getName()
                            + " should be more than feed's late arrival cut-off period: " + feedCutoff + " for feed: "
                            + feed.getName());
        }
    }

    private void validateFeedPartitionExpression(Feed feed) throws FalconException {
        int numSourceClusters = 0, numTrgClusters = 0;
        Set<String> clusters = new HashSet<String>();
        for (Cluster cl : feed.getClusters().getClusters()) {
            if (!clusters.add(cl.getName())) {
                throw new ValidationException("Cluster: " + cl.getName()
                        + " is defined more than once for feed: " + feed.getName());
            }
            if (cl.getType() == ClusterType.SOURCE) {
                numSourceClusters++;
            } else if (cl.getType() == ClusterType.TARGET) {
                numTrgClusters++;
            }
        }

        if (numTrgClusters >= 1 && numSourceClusters == 0) {
            throw new ValidationException("Feed: " + feed.getName()
                    + " should have atleast one source cluster defined");
        }

        int feedParts = feed.getPartitions() != null ? feed.getPartitions().getPartitions().size() : 0;

        for (Cluster cluster : feed.getClusters().getClusters()) {

            if (cluster.getType() == ClusterType.SOURCE && numSourceClusters > 1 && numTrgClusters >= 1) {
                String part = FeedHelper.normalizePartitionExpression(cluster.getPartition());
                if (StringUtils.split(part, '/').length == 0) {
                    throw new ValidationException(
                            "Partition expression has to be specified for cluster " + cluster.getName()
                                    + " as there are more than one source clusters");
                }
                validateClusterExpDefined(cluster);

            } else if (cluster.getType() == ClusterType.TARGET) {

                for (Cluster src : feed.getClusters().getClusters()) {
                    if (src.getType() == ClusterType.SOURCE) {
                        String part = FeedHelper.normalizePartitionExpression(src.getPartition(),
                                cluster.getPartition());
                        int numParts = StringUtils.split(part, '/').length;
                        if (numParts > feedParts) {
                            throw new ValidationException(
                                    "Partition for " + src.getName() + " and " + cluster.getName()
                                            + "clusters is more than the number of partitions defined in feed");
                        }
                    }
                }

                if (numTrgClusters > 1 && numSourceClusters >= 1) {
                    validateClusterExpDefined(cluster);
                }
            }
        }
    }

    private void validateClusterExpDefined(Cluster cl) throws FalconException {
        if (cl.getPartition() == null) {
            return;
        }

        org.apache.falcon.entity.v0.cluster.Cluster cluster = EntityUtil.getEntity(EntityType.CLUSTER, cl.getName());
        String part = FeedHelper.normalizePartitionExpression(cl.getPartition());
        if (FeedHelper.evaluateClusterExp(cluster, part).equals(part)) {
            throw new ValidationException(
                    "Alteast one of the partition tags has to be a cluster expression for cluster " + cl.getName());
        }
    }

    /**
     * Ensure table is already defined in the catalog registry.
     * Does not matter for FileSystem storage.
     */
    private void validateFeedStorage(Feed feed) throws FalconException {
        final Storage.TYPE baseFeedStorageType = FeedHelper.getStorageType(feed);
        validateMultipleSourcesExist(feed, baseFeedStorageType);
        validateUniformStorageType(feed, baseFeedStorageType);
        validatePartitions(feed, baseFeedStorageType);
        validateStorageExists(feed);
    }

    private void validateMultipleSourcesExist(Feed feed, Storage.TYPE baseFeedStorageType) throws FalconException {
        if (baseFeedStorageType == Storage.TYPE.FILESYSTEM) {
            return;
        }

        // validate that there is only one source cluster
        int numberOfSourceClusters = 0;
        for (Cluster cluster : feed.getClusters().getClusters()) {
            if (cluster.getType() == ClusterType.SOURCE) {
                numberOfSourceClusters++;
            }
        }

        if (numberOfSourceClusters > 1) {
            throw new ValidationException("Multiple sources are not supported for feed with table storage: "
                    + feed.getName());
        }
    }

    private void validateUniformStorageType(Feed feed, Storage.TYPE feedStorageType) throws FalconException {
        for (Cluster cluster : feed.getClusters().getClusters()) {
            Storage.TYPE feedClusterStorageType = FeedHelper.getStorageType(feed, cluster);

            if (feedStorageType != feedClusterStorageType) {
                throw new ValidationException("The storage type is not uniform for cluster: " + cluster.getName());
            }
        }
    }

    private void validateClusterHasRegistry(Feed feed, Cluster cluster) throws FalconException {
        Storage.TYPE feedClusterStorageType = FeedHelper.getStorageType(feed, cluster);

        if (feedClusterStorageType != Storage.TYPE.TABLE) {
            return;
        }

        org.apache.falcon.entity.v0.cluster.Cluster clusterEntity = EntityUtil.getEntity(EntityType.CLUSTER,
                cluster.getName());
        if (ClusterHelper.getRegistryEndPoint(clusterEntity) == null) {
            throw new ValidationException("Cluster should have registry interface defined: " + clusterEntity.getName());
        }
    }

    private void validatePartitions(Feed feed, Storage.TYPE storageType) throws  FalconException {
        if (storageType == Storage.TYPE.TABLE && feed.getPartitions() != null) {
            throw new ValidationException("Partitions are not supported for feeds with table storage. "
                    + "It should be defined as part of the table URI. "
                    + feed.getName());
        }
    }

    private void validateStorageExists(Feed feed) throws FalconException {
        StringBuilder buffer = new StringBuilder();
        for (Cluster cluster : feed.getClusters().getClusters()) {
            org.apache.falcon.entity.v0.cluster.Cluster clusterEntity =
                    EntityUtil.getEntity(EntityType.CLUSTER, cluster.getName());
            if (!EntityUtil.responsibleFor(clusterEntity.getColo())) {
                continue;
            }

            final Storage storage = FeedHelper.createStorage(cluster, feed);
            // this is only true for table, filesystem always returns true
            if (storage.getType() == Storage.TYPE.FILESYSTEM) {
                continue;
            }

            CatalogStorage catalogStorage = (CatalogStorage) storage;
            String metaStorePrincipal = ClusterHelper.getPropertyValue(clusterEntity,
                    SecurityUtil.HIVE_METASTORE_PRINCIPAL);
            if (!CatalogServiceFactory.getCatalogService().tableExists(catalogStorage.getCatalogUrl(),
                    catalogStorage.getDatabase(), catalogStorage.getTable(), metaStorePrincipal)) {
                buffer.append("Table [")
                        .append(catalogStorage.getTable())
                        .append("] does not exist for feed: ")
                        .append(feed.getName())
                        .append(" in cluster: ")
                        .append(cluster.getName());
            }
        }

        if (buffer.length() > 0) {
            throw new ValidationException(buffer.toString());
        }
    }
}
TOP

Related Classes of org.apache.falcon.entity.parser.FeedEntityParser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.