Examples of PCollectionImpl

Examples of org.apache.crunch.impl.dist.collect.PCollectionImpl

            // Execute an Edge split
            Vertex newGraphTail = graph.getVertexAt(e.getTail().getPCollection());
            Map<NodePath, PCollectionImpl> splitPoints = e.getSplitPoints(outputs);
            for (Map.Entry<NodePath, PCollectionImpl> s : splitPoints.entrySet()) {
              NodePath path = s.getKey();
              PCollectionImpl split = s.getValue();
              InputCollection<?> inputNode = handleSplitTarget(split);
              Vertex splitTail = graph.addVertex(split, true);
              Vertex splitHead = graph.addVertex(inputNode, false);
              NodePath headPath = path.splitAt(split, splitHead.getPCollection());
              graph.getEdge(vertex, splitTail).addNodePath(headPath);

View Full Code Here

Examples of org.apache.crunch.impl.dist.collect.PCollectionImpl

      HashMultimap<Target, NodePath> outputPaths = HashMultimap.create();
      for (Vertex v : component) {
        if (v.isInput()) {
          for (Edge e : v.getOutgoingEdges()) {
            for (NodePath nodePath : e.getNodePaths()) {
              PCollectionImpl target = nodePath.tail();
              for (Target t : outputs.get(target)) {
                outputPaths.put(t, nodePath);
              }
            }
          }
        }
      }
      if (outputPaths.isEmpty()) {
        throw new IllegalStateException("No outputs?");
      }
      JobPrototype prototype = JobPrototype.createMapOnlyJob(
          ++lastJobID, outputPaths, pipeline.createTempPath());
      for (Vertex v : component) {
        assignment.put(v, prototype);
      }
    } else {
      Set<Edge> usedEdges = Sets.newHashSet();
      for (Vertex g : gbks) {
        Set<NodePath> inputs = Sets.newHashSet();
        HashMultimap<Target, NodePath> mapSideOutputPaths = HashMultimap.create();
        for (Edge e : g.getIncomingEdges()) {
          inputs.addAll(e.getNodePaths());
          usedEdges.add(e);
          if (e.getHead().isInput()) {
            for (Edge ep : e.getHead().getOutgoingEdges()) {
              if (ep.getTail().isOutput() && !usedEdges.contains(ep)) { // map-side output
                for (Target t : outputs.get(ep.getTail().getPCollection())) {
                  mapSideOutputPaths.putAll(t, ep.getNodePaths());
                }
                usedEdges.add(ep);
              }
            }
          }
        }
        JobPrototype prototype = JobPrototype.createMapReduceJob(
            ++lastJobID, (PGroupedTableImpl) g.getPCollection(), inputs, pipeline.createTempPath());
        prototype.addMapSideOutputs(mapSideOutputPaths);
        assignment.put(g, prototype);
        for (Edge e : g.getIncomingEdges()) {
          assignment.put(e.getHead(), prototype);
          if (e.getHead().isInput()) {
            for (Edge ep : e.getHead().getOutgoingEdges()) {
              if (ep.getTail().isOutput() && !assignment.containsKey(ep.getTail())) { // map-side output
                assignment.put(ep.getTail(), prototype);
              }
            }
          }
        }
        
        HashMultimap<Target, NodePath> outputPaths = HashMultimap.create();
        for (Edge e : g.getOutgoingEdges()) {
          Vertex output = e.getTail();
          for (Target t : outputs.get(output.getPCollection())) {
            outputPaths.putAll(t, e.getNodePaths());
          }
          assignment.put(output, prototype);
          usedEdges.add(e);
        }
        prototype.addReducePaths(outputPaths);
      }


      // Check for any un-assigned vertices, which should be map-side outputs
      // that we will need to run in a map-only job.
      HashMultimap<Target, NodePath> outputPaths = HashMultimap.create();
      Set<Vertex> orphans = Sets.newHashSet();
      for (Vertex v : component) {
        // Check if this vertex has multiple inputs but only a subset of
        // them have already been assigned
        boolean vertexHasUnassignedIncomingEdges = false;
        if (v.isOutput()) {
          for (Edge e : v.getIncomingEdges()) {
            if (!usedEdges.contains(e)) {
              vertexHasUnassignedIncomingEdges = true;
            }
          }
        }


        if (v.isOutput() && (vertexHasUnassignedIncomingEdges || !assignment.containsKey(v))) {
          orphans.add(v);
          for (Edge e : v.getIncomingEdges()) {
            if (vertexHasUnassignedIncomingEdges && usedEdges.contains(e)) {
              // We've already dealt with this incoming edge
              continue;
            }
            orphans.add(e.getHead());
            for (NodePath nodePath : e.getNodePaths()) {
              PCollectionImpl target = nodePath.tail();
              for (Target t : outputs.get(target)) {
                outputPaths.put(t, nodePath);
              }
            }
          }

View Full Code Here

Examples of org.apache.crunch.impl.dist.collect.PCollectionImpl

            // Execute an Edge split
            Vertex newGraphTail = graph.getVertexAt(e.getTail().getPCollection());
            Map<NodePath, PCollectionImpl> splitPoints = e.getSplitPoints(outputs);
            for (Map.Entry<NodePath, PCollectionImpl> s : splitPoints.entrySet()) {
              NodePath path = s.getKey();
              PCollectionImpl split = s.getValue();
              InputCollection<?> inputNode = handleSplitTarget(split);
              Vertex splitTail = graph.addVertex(split, true);
              Vertex splitHead = graph.addVertex(inputNode, false);
              NodePath headPath = path.splitAt(split, splitHead.getPCollection());
              graph.getEdge(vertex, splitTail).addNodePath(headPath);

View Full Code Here

Examples of org.apache.crunch.impl.dist.collect.PCollectionImpl

      HashMultimap<Target, NodePath> outputPaths = HashMultimap.create();
      for (Vertex v : component) {
        if (v.isInput()) {
          for (Edge e : v.getOutgoingEdges()) {
            for (NodePath nodePath : e.getNodePaths()) {
              PCollectionImpl target = nodePath.tail();
              for (Target t : outputs.get(target)) {
                outputPaths.put(t, nodePath);
              }
            }
          }
        }
      }
      if (outputPaths.isEmpty()) {
        throw new IllegalStateException("No outputs?");
      }
      JobPrototype prototype = JobPrototype.createMapOnlyJob(
          ++lastJobID, outputPaths, pipeline.createTempPath());
      for (Vertex v : component) {
        assignment.put(v, prototype);
      }
    } else {
      Set<Edge> usedEdges = Sets.newHashSet();
      for (Vertex g : gbks) {
        Set<NodePath> inputs = Sets.newHashSet();
        HashMultimap<Target, NodePath> mapSideOutputPaths = HashMultimap.create();
        for (Edge e : g.getIncomingEdges()) {
          inputs.addAll(e.getNodePaths());
          usedEdges.add(e);
          if (e.getHead().isInput()) {
            for (Edge ep : e.getHead().getOutgoingEdges()) {
              if (ep.getTail().isOutput() && !usedEdges.contains(ep)) { // map-side output
                for (Target t : outputs.get(ep.getTail().getPCollection())) {
                  mapSideOutputPaths.putAll(t, ep.getNodePaths());
                }
                usedEdges.add(ep);
              }
            }
          }
        }
        JobPrototype prototype = JobPrototype.createMapReduceJob(
            ++lastJobID, (PGroupedTableImpl) g.getPCollection(), inputs, pipeline.createTempPath());
        prototype.addMapSideOutputs(mapSideOutputPaths);
        assignment.put(g, prototype);
        for (Edge e : g.getIncomingEdges()) {
          assignment.put(e.getHead(), prototype);
          if (e.getHead().isInput()) {
            for (Edge ep : e.getHead().getOutgoingEdges()) {
              if (ep.getTail().isOutput() && !assignment.containsKey(ep.getTail())) { // map-side output
                assignment.put(ep.getTail(), prototype);
              }
            }
          }
        }
        
        HashMultimap<Target, NodePath> outputPaths = HashMultimap.create();
        for (Edge e : g.getOutgoingEdges()) {
          Vertex output = e.getTail();
          for (Target t : outputs.get(output.getPCollection())) {
            outputPaths.putAll(t, e.getNodePaths());
          }
          assignment.put(output, prototype);
          usedEdges.add(e);
        }
        prototype.addReducePaths(outputPaths);
      }


      // Check for any un-assigned vertices, which should be map-side outputs
      // that we will need to run in a map-only job.
      HashMultimap<Target, NodePath> outputPaths = HashMultimap.create();
      Set<Vertex> orphans = Sets.newHashSet();
      for (Vertex v : component) {
        // Check if this vertex has multiple inputs but only a subset of
        // them have already been assigned
        boolean vertexHasUnassignedIncomingEdges = false;
        if (v.isOutput()) {
          for (Edge e : v.getIncomingEdges()) {
            if (!usedEdges.contains(e)) {
              vertexHasUnassignedIncomingEdges = true;
            }
          }
        }


        if (v.isOutput() && (vertexHasUnassignedIncomingEdges || !assignment.containsKey(v))) {
          orphans.add(v);
          for (Edge e : v.getIncomingEdges()) {
            if (vertexHasUnassignedIncomingEdges && usedEdges.contains(e)) {
              // We've already dealt with this incoming edge
              continue;
            }
            orphans.add(e.getHead());
            for (NodePath nodePath : e.getNodePaths()) {
              PCollectionImpl target = nodePath.tail();
              for (Target t : outputs.get(target)) {
                outputPaths.put(t, nodePath);
              }
            }
          }

View Full Code Here

Examples of org.apache.crunch.impl.dist.collect.PCollectionImpl

  }


  private Set<Target> getDependencies(PipelineCallable<?> callable) {
    Set<Target> deps = Sets.newHashSet(callable.getAllTargets().values());
    for (PCollection pc : callable.getAllPCollections().values()) {
      PCollectionImpl pcImpl = (PCollectionImpl) pc;
      deps.addAll(pcImpl.getTargetDependencies());
      MaterializableIterable iter = (MaterializableIterable) pc.materialize();
      Source pcSrc = iter.getSource();
      if (pcSrc instanceof Target) {
        deps.add((Target) pcSrc);
      }

View Full Code Here

Examples of org.apache.crunch.impl.dist.collect.PCollectionImpl

            if (splitPoints.isEmpty()) {
              graph.getEdge(newHead, vertex).addAllNodePaths(e.getNodePaths());
            } else {
              for (Map.Entry<NodePath, PCollectionImpl> s : splitPoints.entrySet()) {
                NodePath path = s.getKey();
                PCollectionImpl split = s.getValue();
                InputCollection<?> inputNode = handleSplitTarget(split);
                Vertex splitTail = graph.addVertex(split, true);
                Vertex splitHead = graph.addVertex(inputNode, false);
                NodePath headPath = path.splitAt(split, splitHead.getPCollection());
                graph.getEdge(newHead, splitTail).addNodePath(headPath);
                graph.getEdge(splitHead, vertex).addNodePath(path);
                // Note the dependency between the vertices in the graph.
                graph.markDependency(splitHead, splitTail);
              }
            }
          }
        }
        for (Edge e : baseVertex.getOutgoingEdges()) {
          if (!e.getTail().isGBK()) {
            Vertex newTail = graph.getVertexAt(e.getTail().getPCollection());
            graph.getEdge(vertex, newTail).addAllNodePaths(e.getNodePaths());
          } else {
            // Execute an Edge split
            Vertex newGraphTail = graph.getVertexAt(e.getTail().getPCollection());
            Map<NodePath, PCollectionImpl> splitPoints = e.getSplitPoints(false /* breakpoints only */);
            for (Map.Entry<NodePath, PCollectionImpl> s : splitPoints.entrySet()) {
              NodePath path = s.getKey();
              PCollectionImpl split = s.getValue();
              InputCollection<?> inputNode = handleSplitTarget(split);
              Vertex splitTail = graph.addVertex(split, true);
              Vertex splitHead = graph.addVertex(inputNode, false);
              NodePath headPath = path.splitAt(split, splitHead.getPCollection());
              graph.getEdge(vertex, splitTail).addNodePath(headPath);

View Full Code Here

Examples of org.apache.crunch.impl.dist.collect.PCollectionImpl

      HashMultimap<Target, NodePath> outputPaths = HashMultimap.create();
      for (Vertex v : component) {
        if (v.isInput()) {
          for (Edge e : v.getOutgoingEdges()) {
            for (NodePath nodePath : e.getNodePaths()) {
              PCollectionImpl target = nodePath.tail();
              for (Target t : outputs.get(target)) {
                outputPaths.put(t, nodePath);
              }
            }
          }
        }
      }
      if (outputPaths.isEmpty()) {
        throw new IllegalStateException("No outputs?");
      }
      JobPrototype prototype = JobPrototype.createMapOnlyJob(
          ++lastJobID, outputPaths, pipeline.createTempPath());
      for (Vertex v : component) {
        assignment.put(v, prototype);
      }
    } else {
      Set<Edge> usedEdges = Sets.newHashSet();
      for (Vertex g : gbks) {
        Set<NodePath> inputs = Sets.newHashSet();
        HashMultimap<Target, NodePath> mapSideOutputPaths = HashMultimap.create();
        for (Edge e : g.getIncomingEdges()) {
          inputs.addAll(e.getNodePaths());
          usedEdges.add(e);
          if (e.getHead().isInput()) {
            for (Edge ep : e.getHead().getOutgoingEdges()) {
              if (ep.getTail().isOutput() && !usedEdges.contains(ep)) { // map-side output
                for (Target t : outputs.get(ep.getTail().getPCollection())) {
                  mapSideOutputPaths.putAll(t, ep.getNodePaths());
                }
                usedEdges.add(ep);
              }
            }
          }
        }
        JobPrototype prototype = JobPrototype.createMapReduceJob(
            ++lastJobID, (PGroupedTableImpl) g.getPCollection(), inputs, pipeline.createTempPath());
        prototype.addMapSideOutputs(mapSideOutputPaths);
        assignment.put(g, prototype);
        for (Edge e : g.getIncomingEdges()) {
          assignment.put(e.getHead(), prototype);
          if (e.getHead().isInput()) {
            for (Edge ep : e.getHead().getOutgoingEdges()) {
              if (ep.getTail().isOutput() && !assignment.containsKey(ep.getTail())) { // map-side output
                assignment.put(ep.getTail(), prototype);
              }
            }
          }
        }
        
        HashMultimap<Target, NodePath> outputPaths = HashMultimap.create();
        for (Edge e : g.getOutgoingEdges()) {
          Vertex output = e.getTail();
          for (Target t : outputs.get(output.getPCollection())) {
            outputPaths.putAll(t, e.getNodePaths());
          }
          assignment.put(output, prototype);
          usedEdges.add(e);
        }
        prototype.addReducePaths(outputPaths);
      }


      // Check for any un-assigned vertices, which should be map-side outputs
      // that we will need to run in a map-only job.
      HashMultimap<Target, NodePath> outputPaths = HashMultimap.create();
      Set<Vertex> orphans = Sets.newHashSet();
      for (Vertex v : component) {
        // Check if this vertex has multiple inputs but only a subset of
        // them have already been assigned
        boolean vertexHasUnassignedIncomingEdges = false;
        if (v.isOutput()) {
          for (Edge e : v.getIncomingEdges()) {
            if (!usedEdges.contains(e)) {
              vertexHasUnassignedIncomingEdges = true;
            }
          }
        }


        if (v.isOutput() && (vertexHasUnassignedIncomingEdges || !assignment.containsKey(v))) {
          orphans.add(v);
          for (Edge e : v.getIncomingEdges()) {
            if (vertexHasUnassignedIncomingEdges && usedEdges.contains(e)) {
              // We've already dealt with this incoming edge
              continue;
            }
            orphans.add(e.getHead());
            for (NodePath nodePath : e.getNodePaths()) {
              PCollectionImpl target = nodePath.tail();
              for (Target t : outputs.get(target)) {
                outputPaths.put(t, nodePath);
              }
            }
          }

View Full Code Here

Examples of org.apache.crunch.impl.mr.collect.PCollectionImpl

            Vertex newTail = graph.getVertexAt(e.getTail().getPCollection());
            graph.getEdge(vertex, newTail).addAllNodePaths(e.getNodePaths());
          } else {
            // Execute an Edge split
            Vertex newGraphTail = graph.getVertexAt(e.getTail().getPCollection());
            PCollectionImpl split = e.getSplit();
            InputCollection<?> inputNode = handleSplitTarget(split);
            Vertex splitTail = graph.addVertex(split, true);
            Vertex splitHead = graph.addVertex(inputNode, false);
            
            // Divide up the node paths in the edge between the two GBK nodes so

View Full Code Here

Examples of org.apache.crunch.impl.mr.collect.PCollectionImpl

      HashMultimap<Target, NodePath> outputPaths = HashMultimap.create();
      for (Vertex v : component) {
        if (v.isInput()) {
          for (Edge e : v.getOutgoingEdges()) {
            for (NodePath nodePath : e.getNodePaths()) {
              PCollectionImpl target = nodePath.tail();
              for (Target t : outputs.get(target)) {
                outputPaths.put(t, nodePath);
              }
            }
          }
        }
      }
      if (outputPaths.isEmpty()) {
        throw new IllegalStateException("No outputs?");
      }
      JobPrototype prototype = JobPrototype.createMapOnlyJob(
          ++lastJobID, outputPaths, pipeline.createTempPath());
      for (Vertex v : component) {
        assignment.put(v, prototype);
      }
    } else {
      Set<Edge> usedEdges = Sets.newHashSet();
      for (Vertex g : gbks) {
        Set<NodePath> inputs = Sets.newHashSet();
        HashMultimap<Target, NodePath> mapSideOutputPaths = HashMultimap.create();
        for (Edge e : g.getIncomingEdges()) {
          inputs.addAll(e.getNodePaths());
          usedEdges.add(e);
          if (e.getHead().isInput()) {
            for (Edge ep : e.getHead().getOutgoingEdges()) {
              if (ep.getTail().isOutput() && !usedEdges.contains(ep)) { // map-side output
                for (Target t : outputs.get(ep.getTail().getPCollection())) {
                  mapSideOutputPaths.putAll(t, ep.getNodePaths());
                }
                usedEdges.add(ep);
              }
            }
          }
        }
        JobPrototype prototype = JobPrototype.createMapReduceJob(
            ++lastJobID, (PGroupedTableImpl) g.getPCollection(), inputs, pipeline.createTempPath());
        prototype.addMapSideOutputs(mapSideOutputPaths);
        assignment.put(g, prototype);
        for (Edge e : g.getIncomingEdges()) {
          assignment.put(e.getHead(), prototype);
          if (e.getHead().isInput()) {
            for (Edge ep : e.getHead().getOutgoingEdges()) {
              if (ep.getTail().isOutput() && !assignment.containsKey(ep.getTail())) { // map-side output
                assignment.put(ep.getTail(), prototype);
              }
            }
          }
        }
        
        HashMultimap<Target, NodePath> outputPaths = HashMultimap.create();
        for (Edge e : g.getOutgoingEdges()) {
          Vertex output = e.getTail();
          for (Target t : outputs.get(output.getPCollection())) {
            outputPaths.putAll(t, e.getNodePaths());
          }
          assignment.put(output, prototype);
          usedEdges.add(e);
        }
        prototype.addReducePaths(outputPaths);
      }


      // Check for any un-assigned vertices, which should be map-side outputs
      // that we will need to run in a map-only job.
      HashMultimap<Target, NodePath> outputPaths = HashMultimap.create();
      Set<Vertex> orphans = Sets.newHashSet();
      for (Vertex v : component) {
        // Check if this vertex has multiple inputs but only a subset of
        // them have already been assigned
        boolean vertexHasUnassignedIncomingEdges = false;
        if (v.isOutput()) {
          for (Edge e : v.getIncomingEdges()) {
            if (!usedEdges.contains(e)) {
              vertexHasUnassignedIncomingEdges = true;
            }
          }
        }


        if (v.isOutput() && (vertexHasUnassignedIncomingEdges || !assignment.containsKey(v))) {
          orphans.add(v);
          for (Edge e : v.getIncomingEdges()) {
            if (vertexHasUnassignedIncomingEdges && usedEdges.contains(e)) {
              // We've already dealt with this incoming edge
              continue;
            }
            orphans.add(e.getHead());
            for (NodePath nodePath : e.getNodePaths()) {
              PCollectionImpl target = nodePath.tail();
              for (Target t : outputs.get(target)) {
                outputPaths.put(t, nodePath);
              }
            }
          }

View Full Code Here

Examples of org.apache.crunch.impl.mr.collect.PCollectionImpl

            Vertex newTail = graph.getVertexAt(e.getTail().getPCollection());
            graph.getEdge(vertex, newTail).addAllNodePaths(e.getNodePaths());
          } else {
            // Execute an Edge split
            Vertex newGraphTail = graph.getVertexAt(e.getTail().getPCollection());
            PCollectionImpl split = e.getSplit();
            InputCollection<?> inputNode = handleSplitTarget(split);
            Vertex splitTail = graph.addVertex(split, true);
            Vertex splitHead = graph.addVertex(inputNode, false);
            
            // Divide up the node paths in the edge between the two GBK nodes so

View Full Code Here

0 1

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.