Package org.archive.wayback.util

Source Code of org.archive.wayback.util.ARCCreator

/* ARCCreator
*
* $Id: ARCCreator.java 1668 2007-03-31 00:43:05Z bradtofel $
*
* Created on 6:09:27 PM Mar 29, 2006.
*
* Copyright (C) 2006 Internet Archive.
*
* This file is part of wayback.
*
* wayback is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser Public License as published by
* the Free Software Foundation; either version 2.1 of the License, or
* any later version.
*
* wayback is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU Lesser Public License for more details.
*
* You should have received a copy of the GNU Lesser Public License
* along with wayback; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
package org.archive.wayback.util;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.text.ParseException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Logger;

import org.archive.io.arc.ARCConstants;
import org.archive.io.arc.ARCWriter;
import org.archive.util.ArchiveUtils;

/**
*
*
* @author brad
* @version $Date: 2007-03-31 01:43:05 +0100 (Sat, 31 Mar 2007) $, $Revision: 1668 $
*/
public class ARCCreator {
    Logger logger = Logger.getLogger(getClass().getName());

  private static String DEFAULT_PREFIX = "test-arc";
  private HashMap<String,RecordComponents> components =
    new HashMap<String,RecordComponents>();


  private RecordComponents getRecordComponents(final String key) {
    RecordComponents rc = components.get(key);
    if(rc == null) {
      rc = new RecordComponents(key);
      components.put(key,rc);
    }
    return rc;
  }

  private void addFile(File file) {
    String key = null;
    String name = file.getName();
   
    RecordComponents rc;

    if(!file.isFile()) {
      throw new RuntimeException("file " + file.getAbsolutePath() +
          "is not a regular file");
    }
   
    if(name.endsWith(".meta")) {
      key = name.substring(0,name.length() - 5);
      rc = getRecordComponents(key);
      rc.noteMeta();
    } else if(name.endsWith(".body")) {
      key = name.substring(0,name.length() - 5);
      rc = getRecordComponents(key);
      rc.noteBody();
    } else if(name.endsWith(".sh")) {
      key = name.substring(0,name.length() - 3);
      rc = getRecordComponents(key);
      rc.noteScript();
    } else {
      throw new RuntimeException("No key for file " +
          file.getAbsolutePath());
    }
  }

  /**
   * Reads all component files (.meta, .body, .sh) in srcDir, and writes
   * one or more ARC files in tgtDir with names beginning with prefix.
   *
   * @param srcDir
   * @param tgtDir
   * @param prefix
   * @throws IOException
   */
  public void directoryToArc(File srcDir, File tgtDir, String prefix)
  throws IOException {
   
    File target[] = {tgtDir};
    ARCWriter writer = new ARCWriter(new AtomicInteger(),
        Arrays.asList(target),prefix,true,
        ARCConstants.DEFAULT_MAX_ARC_FILE_SIZE);
    File sources[] = srcDir.listFiles();
    logger.info("Found " + sources.length + " files in " + srcDir);
    for(int i = 0; i<sources.length; i++) {
      addFile(sources[i]);
    }
    logger.info("Associated " + sources.length + " files in " + srcDir);

    // sort keys and write them all:
    Object arr[] = components.keySet().toArray();
    Arrays.sort(arr);
    for(int i = 0; i < arr.length; i++) {
      String key = (String) arr[i];
      RecordComponents rc = components.get(key);
      rc.writeRecord(writer,srcDir);
      logger.info("Wrote record keyed " + rc.key);     
    }
    writer.close();
    logger.info("Closed arc file named " +
        writer.getFile().getAbsolutePath());
  }
 
  /**
   * @param args
   */
  public static void main(String[] args) {
    if((args.length < 2) || (args.length > 3)) {
      System.err.println("USAGE: srcDir tgtDir [arc_prefix]");
      System.exit(1);
    }
    File srcDir = new File(args[0]);
    File tgtDir = new File(args[1]);
    String prefix = null;
    if(args.length == 3) {
      prefix = args[2];
    } else {
      prefix = DEFAULT_PREFIX;
    }
    ARCCreator creator = new ARCCreator();
    try {
      creator.directoryToArc(srcDir,tgtDir,prefix);
    } catch (IOException e) {
      e.printStackTrace();
      System.exit(2);
    }
  }

 
  private class RecordComponents {
    private String key;
    private boolean meta;
    private boolean body;
    private boolean script;
   
    /**
     * constructor
     *
     * @param key
     */
    public RecordComponents(final String key) {
      super();
      this.key = key;
    }
    private boolean isComplete() {
      return meta && body && script;
    }
    /**
     * notes that the .meta file has been seen
     */
    public void noteMeta()   { meta = true;   }
    /**
     * notes that the .body file has been seen
     */
    public void noteBody()   { body = true;   }
    /**
     * notes that the .sh file has been seen
     */
    public void noteScript() { script = true; }
    /**
     * checks that all required files have been seen for this record, then
     * reads and parses the metafile, then write()s the record on the
     * writer.
     *
     * @param writer
     * @param componentDir
     * @throws IOException
     */
    public void writeRecord(ARCWriter writer, File componentDir)
    throws IOException {
      if(!isComplete()) {
        throw new RuntimeException("Missing components for key " + key +
            " in directory " + componentDir.getAbsolutePath());
      }
      File metaFile = new File(componentDir,key + ".meta");
      RandomAccessFile raFile = new RandomAccessFile(metaFile, "r");
      String metaLine = raFile.readLine();
      if (metaLine == null) {
        throw new IOException("No meta info in " +
            metaFile.getAbsolutePath());
      }
      String metaParts[] = metaLine.split(" ");
      if(metaParts.length != 5) {
        throw new IOException("Should be 5 elements in " +
            metaFile.getAbsolutePath());       
      }
      String uri = metaParts[0];
      String ip = metaParts[1];
      long fetchTS = 0;
      try {
        fetchTS = ArchiveUtils.parse14DigitDate(metaParts[2]).getTime();
      } catch (ParseException e) {
        throw new IOException("unparseable metaline timestamp in " +
            metaFile.getAbsolutePath());
      }
      String type = metaParts[3];
      int length = Integer.valueOf(metaParts[4]).intValue();
     
      File bodyFile = new File(componentDir,key + ".body");
      if(bodyFile.length() != length) {
        throw new IOException("byte mismatch in meta length and body " +
            "byte size for " + bodyFile.getAbsolutePath());
      }
      FileInputStream fis = new FileInputStream(bodyFile);

      writer.write(uri,type,ip,fetchTS,length,fis);
    }
   
  }
}
TOP

Related Classes of org.archive.wayback.util.ARCCreator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.
})(window,document,'script','//www.google-analytics.com/analytics.js','ga'); ga('create', 'UA-20639858-1', 'auto'); ga('send', 'pageview');