Source Code of com.google.appengine.tools.mapreduce.bigqueryjobs.RetryLoadOrCleanupJob

package com.google.appengine.tools.mapreduce.bigqueryjobs;


import com.google.api.services.bigquery.model.ErrorProto;
import com.google.api.services.bigquery.model.Job;
import com.google.api.services.bigquery.model.TableSchema;
import com.google.appengine.tools.cloudstorage.GcsFilename;
import com.google.appengine.tools.mapreduce.impl.pipeline.DeleteFilesJob;
import com.google.appengine.tools.mapreduce.impl.util.SerializableValue;
import com.google.appengine.tools.pipeline.FutureValue;
import com.google.appengine.tools.pipeline.Job1;
import com.google.appengine.tools.pipeline.Job2;
import com.google.appengine.tools.pipeline.Value;


import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;


/**
 * A pipeline job to either retry or clean up the files from google cloud storage loaded by the
 * bigquery load {@link Job} based on its status.
 */
// TODO : make clean up optional. There can be cases where the users want to retain the files
// after load
final class RetryLoadOrCleanupJob extends
    Job2<BigQueryLoadJobReference, BigQueryLoadJobReference, Integer> {


  private static final long serialVersionUID = 6203149802079995465L;
  private static final Logger log = Logger.getLogger(RetryLoadOrCleanupJob.class.getName());


  private final String dataset;
  private final String tableName;
  private final String projectId;
  private final List<GcsFilename> bundle;
  private final SerializableValue<TableSchema> schema;


  /**
   * @param bundle list of GCS files to load
   * @param schema wrapper around a non-serializable {@link TableSchema} object.
   */
  RetryLoadOrCleanupJob(String dataset, String tableName, String projectId,
      List<GcsFilename> bundle, SerializableValue<TableSchema> schema) {
    this.dataset = dataset;
    this.tableName = tableName;
    this.projectId = projectId;
    this.bundle = bundle;
    this.schema = schema;
  }


  @Override
  public Value<BigQueryLoadJobReference> run(BigQueryLoadJobReference pollResult,
      Integer numRetries) throws Exception {
    Job pollJob = BigQueryLoadGoogleCloudStorageFilesJob.getBigquery().jobs()
        .get(pollResult.getJobReference().getProjectId(), pollResult.getJobReference().getJobId())
        .execute();
    ErrorProto fatalError = pollJob.getStatus().getErrorResult();
    List<ErrorProto> errors = pollJob.getStatus().getErrors();
    if (fatalError != null) {
      log.severe("Job failed while writing to Bigquery. Retrying...#attempt " + numRetries
          + " Error details : " + fatalError.getReason() + ": " + fatalError.getMessage() + " at "
          + fatalError.getLocation());
      return futureCall(new BigQueryLoadFileSetJob(dataset, tableName, projectId, bundle, schema),
          immediate(++numRetries));
    }
    if (errors != null) {
      log.log(Level.SEVERE, "Bigquery load job for files " + bundle
          + " completed with following errors. Bigquery does not consider these errors fatal. Hence the job went to completion.");
      for (ErrorProto error : errors) {
        log.log(Level.SEVERE, "Error: [REASON] " + error.getReason() + " [MESSAGE] "
            + error.getMessage() + " [LOCATION] " + error.getLocation());
      }
    }
    FutureValue<Void> deleteJob = futureCall(new DeleteFilesJob(), immediate(bundle));
    return futureCall(new ReturnResult<BigQueryLoadJobReference>(),
        immediate(new BigQueryLoadJobReference("DONE", pollJob.getJobReference())),
        waitFor(deleteJob));
  }


  /**
   * Return the argument passed as of the job.
   */
  private static class ReturnResult<R> extends Job1<R, R> {
    private static final long serialVersionUID = -5043192512880493589L;
    @Override
    public Value<R> run(R r) throws Exception {
      return immediate(r);
    }
  }
}
Source Code of com.google.appengine.tools.mapreduce.bigqueryjobs.RetryLoadOrCleanupJob

Related Classes of com.google.appengine.tools.mapreduce.bigqueryjobs.RetryLoadOrCleanupJob