Examples of org.apache.crunch.impl.mr.collect.PCollectionImpl

Package org.apache.crunch.impl.mr.collect

Examples of org.apache.crunch.impl.mr.collect.PCollectionImpl

org.apache.crunch.impl.mr.collect.PCollectionImpl

            Vertex newTail = graph.getVertexAt(e.getTail().getPCollection());
            graph.getEdge(vertex, newTail).addAllNodePaths(e.getNodePaths());
          } else {
            // Execute an Edge split
            Vertex newGraphTail = graph.getVertexAt(e.getTail().getPCollection());
            PCollectionImpl split = e.getSplit();
            InputCollection<?> inputNode = handleSplitTarget(split);
            Vertex splitTail = graph.addVertex(split, true);
            Vertex splitHead = graph.addVertex(inputNode, false);
            
            // Divide up the node paths in the edge between the two GBK nodes so

View Full Code Here

      HashMultimap<Target, NodePath> outputPaths = HashMultimap.create();
      for (Vertex v : component) {
        if (v.isInput()) {
          for (Edge e : v.getOutgoingEdges()) {
            for (NodePath nodePath : e.getNodePaths()) {
              PCollectionImpl target = nodePath.tail();
              for (Target t : outputs.get(target)) {
                outputPaths.put(t, nodePath);
              }
            }
          }
        }
      }
      if (outputPaths.isEmpty()) {
        throw new IllegalStateException("No outputs?");
      }
      JobPrototype prototype = JobPrototype.createMapOnlyJob(
          ++lastJobID, outputPaths, pipeline.createTempPath());
      for (Vertex v : component) {
        assignment.put(v, prototype);
      }
    } else {
      Set<Edge> usedEdges = Sets.newHashSet();
      for (Vertex g : gbks) {
        Set<NodePath> inputs = Sets.newHashSet();
        HashMultimap<Target, NodePath> mapSideOutputPaths = HashMultimap.create();
        for (Edge e : g.getIncomingEdges()) {
          inputs.addAll(e.getNodePaths());
          usedEdges.add(e);
          if (e.getHead().isInput()) {
            for (Edge ep : e.getHead().getOutgoingEdges()) {
              if (ep.getTail().isOutput() && !usedEdges.contains(ep)) { // map-side output
                for (Target t : outputs.get(ep.getTail().getPCollection())) {
                  mapSideOutputPaths.putAll(t, ep.getNodePaths());
                }
                usedEdges.add(ep);
              }
            }
          }
        }
        JobPrototype prototype = JobPrototype.createMapReduceJob(
            ++lastJobID, (PGroupedTableImpl) g.getPCollection(), inputs, pipeline.createTempPath());
        prototype.addMapSideOutputs(mapSideOutputPaths);
        assignment.put(g, prototype);
        for (Edge e : g.getIncomingEdges()) {
          assignment.put(e.getHead(), prototype);
          if (e.getHead().isInput()) {
            for (Edge ep : e.getHead().getOutgoingEdges()) {
              if (ep.getTail().isOutput() && !assignment.containsKey(ep.getTail())) { // map-side output
                assignment.put(ep.getTail(), prototype);
              }
            }
          }
        }
        
        HashMultimap<Target, NodePath> outputPaths = HashMultimap.create();
        for (Edge e : g.getOutgoingEdges()) {
          Vertex output = e.getTail();
          for (Target t : outputs.get(output.getPCollection())) {
            outputPaths.putAll(t, e.getNodePaths());
          }
          assignment.put(output, prototype);
          usedEdges.add(e);
        }
        prototype.addReducePaths(outputPaths);
      }


      // Check for any un-assigned vertices, which should be map-side outputs
      // that we will need to run in a map-only job.
      HashMultimap<Target, NodePath> outputPaths = HashMultimap.create();
      Set<Vertex> orphans = Sets.newHashSet();
      for (Vertex v : component) {
        // Check if this vertex has multiple inputs but only a subset of
        // them have already been assigned
        boolean vertexHasUnassignedIncomingEdges = false;
        if (v.isOutput()) {
          for (Edge e : v.getIncomingEdges()) {
            if (!usedEdges.contains(e)) {
              vertexHasUnassignedIncomingEdges = true;
            }
          }
        }


        if (v.isOutput() && (vertexHasUnassignedIncomingEdges || !assignment.containsKey(v))) {
          orphans.add(v);
          for (Edge e : v.getIncomingEdges()) {
            if (vertexHasUnassignedIncomingEdges && usedEdges.contains(e)) {
              // We've already dealt with this incoming edge
              continue;
            }
            orphans.add(e.getHead());
            for (NodePath nodePath : e.getNodePaths()) {
              PCollectionImpl target = nodePath.tail();
              for (Target t : outputs.get(target)) {
                outputPaths.put(t, nodePath);
              }
            }
          }

View Full Code Here

            Vertex newTail = graph.getVertexAt(e.getTail().getPCollection());
            graph.getEdge(vertex, newTail).addAllNodePaths(e.getNodePaths());
          } else {
            // Execute an Edge split
            Vertex newGraphTail = graph.getVertexAt(e.getTail().getPCollection());
            PCollectionImpl split = e.getSplit();
            InputCollection<?> inputNode = handleSplitTarget(split);
            Vertex splitTail = graph.addVertex(split, true);
            Vertex splitHead = graph.addVertex(inputNode, false);
            
            // Divide up the node paths in the edge between the two GBK nodes so

View Full Code Here

      HashMultimap<Target, NodePath> outputPaths = HashMultimap.create();
      for (Vertex v : component) {
        if (v.isInput()) {
          for (Edge e : v.getOutgoingEdges()) {
            for (NodePath nodePath : e.getNodePaths()) {
              PCollectionImpl target = nodePath.tail();
              for (Target t : outputs.get(target)) {
                outputPaths.put(t, nodePath);
              }
            }
          }
        }
      }
      if (outputPaths.isEmpty()) {
        throw new IllegalStateException("No outputs?");
      }
      JobPrototype prototype = JobPrototype.createMapOnlyJob(
          ++lastJobID, outputPaths, pipeline.createTempPath());
      for (Vertex v : component) {
        assignment.put(v, prototype);
      }
    } else {
      Set<Edge> usedEdges = Sets.newHashSet();
      for (Vertex g : gbks) {
        Set<NodePath> inputs = Sets.newHashSet();
        for (Edge e : g.getIncomingEdges()) {
          inputs.addAll(e.getNodePaths());
          usedEdges.add(e);
        }
        JobPrototype prototype = JobPrototype.createMapReduceJob(
            ++lastJobID, (PGroupedTableImpl) g.getPCollection(), inputs, pipeline.createTempPath());
        assignment.put(g, prototype);
        for (Edge e : g.getIncomingEdges()) {
          assignment.put(e.getHead(), prototype);
          usedEdges.add(e);
        }
        HashMultimap<Target, NodePath> outputPaths = HashMultimap.create();
        for (Edge e : g.getOutgoingEdges()) {
          Vertex output = e.getTail();
          for (Target t : outputs.get(output.getPCollection())) {
            outputPaths.putAll(t, e.getNodePaths());
          }
          assignment.put(output, prototype);
          usedEdges.add(e);
        }
        prototype.addReducePaths(outputPaths);
      }
      
      // Check for any un-assigned vertices, which should be map-side outputs
      // that we will need to run in a map-only job.
      HashMultimap<Target, NodePath> outputPaths = HashMultimap.create();
      Set<Vertex> orphans = Sets.newHashSet();
      for (Vertex v : component) {


        // Check if this vertex has multiple inputs but only a subset of
        // them have already been assigned
        boolean vertexHasUnassignedIncomingEdges = false;
        if (v.isOutput()) {
          for (Edge e : v.getIncomingEdges()) {
            if (!usedEdges.contains(e)) {
              vertexHasUnassignedIncomingEdges = true;
            }
          }
        }


        if (v.isOutput() && (vertexHasUnassignedIncomingEdges || !assignment.containsKey(v))) {
          orphans.add(v);
          for (Edge e : v.getIncomingEdges()) {
            if (vertexHasUnassignedIncomingEdges && usedEdges.contains(e)) {
              // We've already dealt with this incoming edge
              continue;
            }
            orphans.add(e.getHead());
            for (NodePath nodePath : e.getNodePaths()) {
              PCollectionImpl target = nodePath.tail();
              for (Target t : outputs.get(target)) {
                outputPaths.put(t, nodePath);
              }
            }
          }

View Full Code Here

            Vertex newTail = graph.getVertexAt(e.getTail().getPCollection());
            graph.getEdge(vertex, newTail).addAllNodePaths(e.getNodePaths());
          } else {
            // Execute an Edge split
            Vertex newGraphTail = graph.getVertexAt(e.getTail().getPCollection());
            PCollectionImpl split = e.getSplit();
            InputCollection<?> inputNode = handleSplitTarget(split);
            Vertex splitTail = graph.addVertex(split, true);
            Vertex splitHead = graph.addVertex(inputNode, false);
            
            // Divide up the node paths in the edge between the two GBK nodes so

View Full Code Here

      HashMultimap<Target, NodePath> outputPaths = HashMultimap.create();
      for (Vertex v : component) {
        if (v.isInput()) {
          for (Edge e : v.getOutgoingEdges()) {
            for (NodePath nodePath : e.getNodePaths()) {
              PCollectionImpl target = nodePath.tail();
              for (Target t : outputs.get(target)) {
                outputPaths.put(t, nodePath);
              }
            }
          }
        }
      }
      if (outputPaths.isEmpty()) {
        throw new IllegalStateException("No outputs?");
      }
      JobPrototype prototype = JobPrototype.createMapOnlyJob(
          ++lastJobID, outputPaths, pipeline.createTempPath());
      for (Vertex v : component) {
        assignment.put(v, prototype);
      }
    } else {
      Set<Edge> usedEdges = Sets.newHashSet();
      for (Vertex g : gbks) {
        Set<NodePath> inputs = Sets.newHashSet();
        HashMultimap<Target, NodePath> mapSideOutputPaths = HashMultimap.create();
        for (Edge e : g.getIncomingEdges()) {
          inputs.addAll(e.getNodePaths());
          usedEdges.add(e);
          if (e.getHead().isInput()) {
            for (Edge ep : e.getHead().getOutgoingEdges()) {
              if (ep.getTail().isOutput() && !usedEdges.contains(ep)) { // map-side output
                for (Target t : outputs.get(ep.getTail().getPCollection())) {
                  mapSideOutputPaths.putAll(t, ep.getNodePaths());
                }
                usedEdges.add(ep);
              }
            }
          }
        }
        JobPrototype prototype = JobPrototype.createMapReduceJob(
            ++lastJobID, (PGroupedTableImpl) g.getPCollection(), inputs, pipeline.createTempPath());
        prototype.addMapSideOutputs(mapSideOutputPaths);
        assignment.put(g, prototype);
        for (Edge e : g.getIncomingEdges()) {
          assignment.put(e.getHead(), prototype);
          if (e.getHead().isInput()) {
            for (Edge ep : e.getHead().getOutgoingEdges()) {
              if (ep.getTail().isOutput() && !assignment.containsKey(ep.getTail())) { // map-side output
                assignment.put(ep.getTail(), prototype);
              }
            }
          }
        }
        
        HashMultimap<Target, NodePath> outputPaths = HashMultimap.create();
        for (Edge e : g.getOutgoingEdges()) {
          Vertex output = e.getTail();
          for (Target t : outputs.get(output.getPCollection())) {
            outputPaths.putAll(t, e.getNodePaths());
          }
          assignment.put(output, prototype);
          usedEdges.add(e);
        }
        prototype.addReducePaths(outputPaths);
      }


      // Check for any un-assigned vertices, which should be map-side outputs
      // that we will need to run in a map-only job.
      HashMultimap<Target, NodePath> outputPaths = HashMultimap.create();
      Set<Vertex> orphans = Sets.newHashSet();
      for (Vertex v : component) {
        // Check if this vertex has multiple inputs but only a subset of
        // them have already been assigned
        boolean vertexHasUnassignedIncomingEdges = false;
        if (v.isOutput()) {
          for (Edge e : v.getIncomingEdges()) {
            if (!usedEdges.contains(e)) {
              vertexHasUnassignedIncomingEdges = true;
            }
          }
        }


        if (v.isOutput() && (vertexHasUnassignedIncomingEdges || !assignment.containsKey(v))) {
          orphans.add(v);
          for (Edge e : v.getIncomingEdges()) {
            if (vertexHasUnassignedIncomingEdges && usedEdges.contains(e)) {
              // We've already dealt with this incoming edge
              continue;
            }
            orphans.add(e.getHead());
            for (NodePath nodePath : e.getNodePaths()) {
              PCollectionImpl target = nodePath.tail();
              for (Target t : outputs.get(target)) {
                outputPaths.put(t, nodePath);
              }
            }
          }

View Full Code Here

            Vertex newTail = graph.getVertexAt(e.getTail().getPCollection());
            graph.getEdge(vertex, newTail).addAllNodePaths(e.getNodePaths());
          } else {
            // Execute an Edge split
            Vertex newGraphTail = graph.getVertexAt(e.getTail().getPCollection());
            PCollectionImpl split = e.getSplit();
            InputCollection<?> inputNode = handleSplitTarget(split);
            Vertex splitTail = graph.addVertex(split, true);
            Vertex splitHead = graph.addVertex(inputNode, false);
            
            // Divide up the node paths in the edge between the two GBK nodes so

View Full Code Here

      HashMultimap<Target, NodePath> outputPaths = HashMultimap.create();
      for (Vertex v : component) {
        if (v.isInput()) {
          for (Edge e : v.getOutgoingEdges()) {
            for (NodePath nodePath : e.getNodePaths()) {
              PCollectionImpl target = nodePath.tail();
              for (Target t : outputs.get(target)) {
                outputPaths.put(t, nodePath);
              }
            }
          }
        }
      }
      if (outputPaths.isEmpty()) {
        throw new IllegalStateException("No outputs?");
      }
      JobPrototype prototype = JobPrototype.createMapOnlyJob(
          outputPaths, pipeline.createTempPath()); 
      for (Vertex v : component) {
        assignment.put(v, prototype);
      }
    } else {
      for (Vertex g : gbks) {
        Set<NodePath> inputs = Sets.newHashSet();
        for (Edge e : g.getIncomingEdges()) {
          inputs.addAll(e.getNodePaths());
        }
        JobPrototype prototype = JobPrototype.createMapReduceJob(
            (PGroupedTableImpl) g.getPCollection(), inputs, pipeline.createTempPath());
        assignment.put(g, prototype);
        for (Edge e : g.getIncomingEdges()) {
          assignment.put(e.getHead(), prototype);
        }
        HashMultimap<Target, NodePath> outputPaths = HashMultimap.create();
        for (Edge e : g.getOutgoingEdges()) {
          Vertex output = e.getTail();
          for (Target t : outputs.get(output.getPCollection())) {
            outputPaths.putAll(t, e.getNodePaths());
          }
          assignment.put(output, prototype);
        }
        prototype.addReducePaths(outputPaths);
      }
      
      // Check for any un-assigned vertices, which should be map-side outputs
      // that we will need to run in a map-only job.
      HashMultimap<Target, NodePath> outputPaths = HashMultimap.create();
      Set<Vertex> orphans = Sets.newHashSet();
      for (Vertex v : component) {


        // Check if this vertex has multiple inputs but only a subset of
        // them have already been assigned
        boolean vertexHasUnassignedIncomingEdges = false;
        if (v.isOutput()) {
          for (Edge e : v.getIncomingEdges()) {
            if (!assignment.containsKey(e.getHead())) {
              vertexHasUnassignedIncomingEdges = true;
            }
          }
        }


        if (v.isOutput() && (vertexHasUnassignedIncomingEdges || !assignment.containsKey(v))) {
          orphans.add(v);
          for (Edge e : v.getIncomingEdges()) {
            if (vertexHasUnassignedIncomingEdges && assignment.containsKey(e.getHead())) {
              // We've already dealt with this incoming edge
              continue;
            }
            orphans.add(e.getHead());
            for (NodePath nodePath : e.getNodePaths()) {
              PCollectionImpl target = nodePath.tail();
              for (Target t : outputs.get(target)) {
                outputPaths.put(t, nodePath);
              }
            }
          }

View Full Code Here

            Vertex newTail = graph.getVertexAt(e.getTail().getPCollection());
            graph.getEdge(vertex, newTail).addAllNodePaths(e.getNodePaths());
          } else {
            // Execute an Edge split
            Vertex newGraphTail = graph.getVertexAt(e.getTail().getPCollection());
            PCollectionImpl split = e.getSplit();
            InputCollection<?> inputNode = handleSplitTarget(split);
            Vertex splitTail = graph.addVertex(split, true);
            Vertex splitHead = graph.addVertex(inputNode, false);
            
            // Divide up the node paths in the edge between the two GBK nodes so

View Full Code Here

      HashMultimap<Target, NodePath> outputPaths = HashMultimap.create();
      for (Vertex v : component) {
        if (v.isInput()) {
          for (Edge e : v.getOutgoingEdges()) {
            for (NodePath nodePath : e.getNodePaths()) {
              PCollectionImpl target = nodePath.tail();
              for (Target t : outputs.get(target)) {
                outputPaths.put(t, nodePath);
              }
            }
          }
        }
      }
      if (outputPaths.isEmpty()) {
        throw new IllegalStateException("No outputs?");
      }
      JobPrototype prototype = JobPrototype.createMapOnlyJob(
          outputPaths, pipeline.createTempPath()); 
      for (Vertex v : component) {
        assignment.put(v, prototype);
      }
    } else {
      Set<Edge> usedEdges = Sets.newHashSet();
      for (Vertex g : gbks) {
        Set<NodePath> inputs = Sets.newHashSet();
        for (Edge e : g.getIncomingEdges()) {
          inputs.addAll(e.getNodePaths());
          usedEdges.add(e);
        }
        JobPrototype prototype = JobPrototype.createMapReduceJob(
            (PGroupedTableImpl) g.getPCollection(), inputs, pipeline.createTempPath());
        assignment.put(g, prototype);
        for (Edge e : g.getIncomingEdges()) {
          assignment.put(e.getHead(), prototype);
          usedEdges.add(e);
        }
        HashMultimap<Target, NodePath> outputPaths = HashMultimap.create();
        for (Edge e : g.getOutgoingEdges()) {
          Vertex output = e.getTail();
          for (Target t : outputs.get(output.getPCollection())) {
            outputPaths.putAll(t, e.getNodePaths());
          }
          assignment.put(output, prototype);
          usedEdges.add(e);
        }
        prototype.addReducePaths(outputPaths);
      }
      
      // Check for any un-assigned vertices, which should be map-side outputs
      // that we will need to run in a map-only job.
      HashMultimap<Target, NodePath> outputPaths = HashMultimap.create();
      Set<Vertex> orphans = Sets.newHashSet();
      for (Vertex v : component) {


        // Check if this vertex has multiple inputs but only a subset of
        // them have already been assigned
        boolean vertexHasUnassignedIncomingEdges = false;
        if (v.isOutput()) {
          for (Edge e : v.getIncomingEdges()) {
            if (!usedEdges.contains(e)) {
              vertexHasUnassignedIncomingEdges = true;
            }
          }
        }


        if (v.isOutput() && (vertexHasUnassignedIncomingEdges || !assignment.containsKey(v))) {
          orphans.add(v);
          for (Edge e : v.getIncomingEdges()) {
            if (vertexHasUnassignedIncomingEdges && usedEdges.contains(e)) {
              // We've already dealt with this incoming edge
              continue;
            }
            orphans.add(e.getHead());
            for (NodePath nodePath : e.getNodePaths()) {
              PCollectionImpl target = nodePath.tail();
              for (Target t : outputs.get(target)) {
                outputPaths.put(t, nodePath);
              }
            }
          }

View Full Code Here

TOP

Related Classes of org.apache.crunch.impl.mr.collect.PCollectionImpl

org.apache.crunch.impl.mr.MRPipeline

org.apache.crunch.impl.mr.plan.MSCRPlanner

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.