Package de.lmu.ifi.dbs.elki.datasource.bundle

Examples of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle


    List<MultipleObjectsBundle> bundles = new ArrayList<MultipleObjectsBundle>(sources.size());
    for(DatabaseConnection dbc : sources) {
      bundles.add(dbc.loadData());
    }

    MultipleObjectsBundle first = bundles.get(0);
    Map<String, Integer> labelmap = new HashMap<String, Integer>(first.dataLength());
    // Process first bundle
    {
      // Identify a label column
      final int lblcol;
      {
        int lblc = -1;
        for(int i = 0; i < first.metaLength(); i++) {
          if(TypeUtil.GUESSED_LABEL.isAssignableFromType(first.meta(i))) {
            lblc = i;
            break;
          }
        }
        lblcol = lblc; // make static
      }
      if(lblcol == -1) {
        throw new AbortException("No label column found in first source, cannot join (do you want to use " + ExternalIDJoinDatabaseConnection.class.getSimpleName() + " instead?)");
      }
      for(int i = 0; i < first.dataLength(); i++) {
        Object data = first.data(i, lblcol);
        if(data == null) {
          logger.warning("Object without label encountered.");
          continue;
        }
        if(data instanceof String) {
          Integer old = labelmap.put((String) data, i);
          if(old != null) {
            logger.warning("Duplicate label encountered: " + data + " in rows " + old + " and " + i);
          }
        }
        else if(data instanceof LabelList) {
          for(String lbl : (LabelList) data) {
            Integer old = labelmap.put(lbl, i);
            if(old != null) {
              logger.warning("Duplicate label encountered: " + lbl + " in rows " + old + " and " + i);
            }
          }
        }
        else {
          String lbl = data.toString();
          Integer old = labelmap.put(lbl, i);
          if(old != null) {
            logger.warning("Duplicate label encountered: " + lbl + " in rows " + old + " and " + i);
          }
        }
      }
    }
    // Process additional columns
    for(int c = 1; c < sources.size(); c++) {
      MultipleObjectsBundle cur = bundles.get(c);
      final int lblcol;
      {
        int lblc = -1;
        for(int i = 0; i < cur.metaLength(); i++) {
          if(TypeUtil.GUESSED_LABEL.isAssignableFromType(cur.meta(i))) {
            lblc = i;
            break;
          }
        }
        lblcol = lblc; // make static
      }
      if(lblcol == -1) {
        throw new AbortException("No label column found in source " + (c + 1) + ", cannot join (do you want to use " + ExternalIDJoinDatabaseConnection.class.getSimpleName() + " instead?)");
      }
      // Destination columns
      List<ArrayList<Object>> dcol = new ArrayList<ArrayList<Object>>(cur.metaLength());
      for(int i = 0; i < cur.metaLength(); i++) {
        // Skip the label columns
        if(i == lblcol) {
          dcol.add(null);
          continue;
        }
        ArrayList<Object> newcol = new ArrayList<Object>(first.dataLength());
        // Pre-fill with nulls.
        for(int j = 0; j < first.dataLength(); j++) {
          newcol.add(null);
        }
        first.appendColumn(cur.meta(i), newcol);
        dcol.add(newcol);
      }
      for(int i = 0; i < cur.dataLength(); i++) {
        Object data = cur.data(i, lblcol);
        if(data == null) {
          logger.warning("Object without label encountered.");
          continue;
        }
        Integer row = null;
        if(data instanceof String) {
          row = labelmap.get(data);
        }
        else if(data instanceof LabelList) {
          for(String lbl : (LabelList) data) {
            row = labelmap.get(lbl);
            if(row != null) {
              break;
            }
          }
        }
        else {
          row = labelmap.get(data.toString());
        }
        if(row == null) {
          logger.warning("Label not found for join: " + data + " in row " + i);
          continue;
        }
        for(int d = 0; d < cur.metaLength(); d++) {
          if(d == lblcol) {
            continue;
          }
          List<Object> col = dcol.get(d);
          assert (col != null);
          col.set(row, cur.data(i, d));
        }
      }
    }
    for(int i = 0; i < first.dataLength(); i++) {
      for(int d = 0; d < first.metaLength(); d++) {
View Full Code Here


      // normalize objects and transform labels
      if(logger.isDebugging()) {
        logger.debugFine("Invoking filters.");
      }
      MultipleObjectsBundle objects = MultipleObjectsBundle.fromStream(invokeFilters(streamParser));
      return objects;
    }
    else {
      MultipleObjectsBundle parsingResult = parser.parse(in);

      // normalize objects and transform labels
      if(logger.isDebugging()) {
        logger.debugFine("Invoking filters.");
      }
      MultipleObjectsBundle objects = invokeFilters(parsingResult);
      return objects;
    }
  }
View Full Code Here

   * @param bundle the objects to process
   * @return processed objects
   */
  protected MultipleObjectsBundle invokeFilters(MultipleObjectsBundle bundle) {
    BundleStreamSource prevs = null;
    MultipleObjectsBundle prevb = bundle;
    if(filters != null) {
      for(ObjectFilter filter : filters) {
        if(filter instanceof StreamFilter) {
          StreamFilter sfilter = (StreamFilter) filter;
          if(prevs != null) {
View Full Code Here

   * @param bundle the objects to process
   * @return processed objects
   */
  protected BundleStreamSource invokeFilters(BundleStreamSource bundle) {
    BundleStreamSource prevs = bundle;
    MultipleObjectsBundle prevb = null;
    if(filters != null) {
      for(ObjectFilter filter : filters) {
        if(filter instanceof StreamFilter) {
          StreamFilter sfilter = (StreamFilter) filter;
          if(prevs != null) {
View Full Code Here

    this.parser = parser;
  }

  @Override
  public MultipleObjectsBundle loadData() {
    MultipleObjectsBundle objects = new MultipleObjectsBundle();
    objects.appendColumn(TypeUtil.STRING, new ArrayList<Object>());
    for(File file : files) {
      String filestr = file.getPath();
      try {
        InputStream inputStream = new FileInputStream(file);
        inputStream = FileUtil.tryGzipInput(inputStream);

        final BundleStreamSource source;
        if(parser instanceof StreamingParser) {
          final StreamingParser streamParser = (StreamingParser) parser;
          streamParser.initStream(inputStream);
          source = streamParser;
        }
        else {
          MultipleObjectsBundle parsingResult = parser.parse(inputStream);
          // normalize objects and transform labels
          source = new StreamFromBundle(parsingResult);
        }
        BundleMeta meta = null; // NullPointerException on invalid streams
        loop: for(Event e = source.nextEvent();; e = source.nextEvent()) {
View Full Code Here

  public void initialize() {
    if(databaseConnection != null) {
      if(logger.isDebugging()) {
        logger.debugFine("Loading data from database connection.");
      }
      MultipleObjectsBundle objpackages = databaseConnection.loadData();
      // Run at most once.
      databaseConnection = null;

      // Find DBID column
      int idrepnr = findDBIDColumn(objpackages);
      // Build DBID array
      if(idrepnr == -1) {
        this.ids = DBIDUtil.generateStaticDBIDRange(objpackages.dataLength());
      }
      else {
        final ArrayModifiableDBIDs newids = DBIDUtil.newArray(objpackages.dataLength());
        for(int j = 0; j < objpackages.dataLength(); j++) {
          DBID newid = (DBID) objpackages.data(j, idrepnr);
          newids.add(newid);
        }
        this.ids = newids;
      }
      // Replace id representation.
      // TODO: this is an ugly hack
      this.idrep = new DBIDView(this, this.ids);
      relations.add(this.idrep);
      getHierarchy().add(this, idrep);

      // insert into db - note: DBIDs should have been prepared before this!
      Relation<?>[] targets = alignColumns(objpackages);

      for(int j = 0; j < objpackages.dataLength(); j++) {
        // insert object
        final DBID newid = ids.get(j);
        for(int i = 0; i < targets.length; i++) {
          // DBIDs were handled above.
          if(i == idrepnr) {
            continue;
          }
          @SuppressWarnings("unchecked")
          final Relation<Object> relation = (Relation<Object>) targets[i];
          relation.set(newid, objpackages.data(j, i));
        }
      }

      for(Relation<?> relation : relations) {
        SimpleTypeInformation<?> meta = relation.getDataTypeInformation();
View Full Code Here

   * for each object) and indexes and fires a deletion event.
   */
  @Override
  public MultipleObjectsBundle delete(DBIDs ids) {
    // Prepare bundle to return
    MultipleObjectsBundle bundle = new MultipleObjectsBundle();
    for(Relation<?> relation : relations) {
      ArrayList<Object> data = new ArrayList<Object>(ids.size());
      for(DBID id : ids) {
        data.add(relation.get(id));
      }
      bundle.appendColumn(relation.getDataTypeInformation(), data);
    }
    // remove from db
    for(DBID id : ids) {
      doDelete(id);
    }
View Full Code Here

      int[] dimsize = new int[names.size()];
      processColumnTypes(names, types, targ, elkitypes, dimsize);

      // Prepare bundle:
      // This is a bit complicated to produce vector fields.
      MultipleObjectsBundle bundle = new MultipleObjectsBundle();
      StreamTokenizer tokenizer = makeArffTokenizer(br);

      int state = 0;

      nextToken(tokenizer);
      while(tokenizer.ttype != StreamTokenizer.TT_EOF) {
        // Parse instance
        if(tokenizer.ttype == StreamTokenizer.TT_EOL) {
          // ignore empty lines
        }
        else if(tokenizer.ttype != '{') {
          if(state == 0) {
            setupBundleHeaders(names, targ, elkitypes, dimsize, bundle, false);
            state = 1; // dense
          }
          if(state != 1) {
            throw new AbortException("Mixing dense and sparse vectors is currently not allowed.");
          }
          // Load a dense instance
          bundle.appendSimple(loadDenseInstance(tokenizer, dimsize, elkitypes, bundle.metaLength()));
        }
        else {
          if(state == 0) {
            setupBundleHeaders(names, targ, elkitypes, dimsize, bundle, true);
            state = 2; // dense
          }
          if(state != 2) {
            throw new AbortException("Mixing dense and sparse vectors is currently not allowed.");
          }
          bundle.appendSimple(loadSparseInstance(tokenizer, targ, dimsize, elkitypes, bundle.metaLength()));
        }
        if(tokenizer.ttype != StreamTokenizer.TT_EOF) {
          nextToken(tokenizer);
        }
      }
View Full Code Here

TOP

Related Classes of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.