Package org.apache.solr.crunch

Source Code of org.apache.solr.crunch.MemoryCrunchIndexerToolTest

/*
* Copyright 2013 Cloudera Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.crunch;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import org.apache.crunch.PipelineResult;
import org.apache.crunch.PipelineResult.StageResult;
import org.apache.crunch.test.TemporaryPath;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.ToolRunner;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.crunch.CrunchIndexerToolOptions.PipelineType;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Rule;
import org.kitesdk.morphline.api.MorphlineRuntimeException;
import org.kitesdk.morphline.solr.AbstractSolrMorphlineZkTest;
import org.kitesdk.morphline.stdlib.DropRecordBuilder;

import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import com.google.common.collect.ObjectArrays;

//@ThreadLeakAction({Action.WARN})
//@ThreadLeakAction({Action.INTERRUPT})
//@ThreadLeakLingering(linger = 0)
//@ThreadLeakZombies(Consequence.CONTINUE)
//@ThreadLeakZombies(Consequence.IGNORE_REMAINING_TESTS)
@ThreadLeakScope(Scope.NONE)
//@ThreadLeakScope(Scope.TEST)
@SuppressCodecs({"Lucene3x", "Lucene40"})
public class MemoryCrunchIndexerToolTest extends AbstractSolrMorphlineZkTest {
 
  @Rule
  public TemporaryPath tmpDir = TemporaryPaths.create();
  private PipelineType pipelineType;
  private boolean isRandomizingWithDoFn;
  private boolean isDryRun;
  private int numExpectedFailedRecords;
  private int numExpectedExceptionRecords;

  private static final String SCHEMA_FILE = "src/test/resources/test-documents/string.avsc";
  private static final String NOP = "nop.conf";
  private static final String LOAD_SOLR_LINE = "loadSolrLine.conf";
  private static final String EXTRACT_AVRO_PATH = "extractAvroPath.conf"
  private static final String READ_AVRO_PARQUET_FILE = "readAvroParquetFile.conf"
  private static final String FAIL_FILE = "fail.conf"
  private static final String THROW_EXCEPTION_FILE = "throwException.conf"
  private static final String RESOURCES_DIR = "target/test-classes";
 
 
  public MemoryCrunchIndexerToolTest() {
    this(PipelineType.memory, false);
  }
 
  protected MemoryCrunchIndexerToolTest(PipelineType pipelineType, boolean isDryRun) {
    this.pipelineType = pipelineType;
    this.isDryRun = isDryRun;
    sliceCount = 1;
    shardCount = 1;
  }
   
  @Before
  public void setUp() throws Exception {
    super.setUp();
  }

  @Override
  public void doTest() throws Exception {
    waitForRecoveriesToFinish(false);
    resetTest();
    testStreamTextInputFile();
    resetTest();
    testSplittableTextInputFile();   
    resetTest();
    testSplittableAvroFile();   
    resetTest();
    testSplittableAvroFileWithReaderSchema();
    resetTest();
    testSplittableAvroFileWithDryRun();
    resetTest();
    testSplittableAvroParquetFile();
    resetTest();
    testStreamAvroParquetFile();
    resetTest();
    testStreamTextInputFiles();
    resetTest();
    testFileList();
    resetTest();
    testFileListWithScheme();
    resetTest();
    testRecursiveInputDir();
    resetTest();
    testRandomizeInputFiles();
    resetTest();
    testHelp();
    resetTest();
    testHelpWithoutArgs();
    resetTest();
    testCommandThatFails();
    if (pipelineType != PipelineType.spark) { // FIXME
      resetTest();
      testCommandThatThrowsException();
      resetTest();
      testIllegalCommandLineArgument();
      resetTest();
      testIllegalCommandLineClassNameArgument();
    }
    cloudClient.shutdown();
  }
 
  @Override
  protected void commit() throws Exception {
    morphline = new DropRecordBuilder().build(null, null, null, null); // just a dummy to make the superclass happy
    super.commit();
  }
 
  private void resetTest() throws SolrServerException, IOException {
    //tmpDir.delete();
    isRandomizingWithDoFn = false;
    numExpectedFailedRecords = 0;
    numExpectedExceptionRecords = 0;
   
    cloudClient.deleteByQuery("*:*"); // delete everything!
    cloudClient.commit();
  }
 
  private String[] getInitialArgs(String morphlineConfigFile) {
    String[] args = new String[] {
        "--log4j=" + RESOURCES_DIR + "/log4j.properties",
        "--chatty",
        "--pipeline-type=" + pipelineType,
    };
    if (morphlineConfigFile != null) {
      args = ObjectArrays.concat(args, "--morphline-file=" + RESOURCES_DIR + "/test-morphlines/" + morphlineConfigFile);
      args = ObjectArrays.concat(args, "--morphline-id=morphline1");
    }
    if (isDryRun) {
      args = ObjectArrays.concat(args, "--dry-run");     
    }
    return args;
  }

  private void testStreamTextInputFile() throws Exception {
    String inputPath = tmpDir.copyResourceFileName("test-documents/hello1.txt");
    String[] expected = new String[] {"hello foo", "hello world"};
    String[] args = getInitialArgs(LOAD_SOLR_LINE);
    args = ObjectArrays.concat(args, inputPath);   
    PipelineResult pipelineResult = runIntoSolr(args, expected);
    Assert.assertTrue(pipelineResult.getStageResults().get(0).getCounterValue("morphline", "morphline.app.numRecords") > 0);
  }
 
  private void testSplittableTextInputFile() throws Exception {
    String inputPath = tmpDir.copyResourceFileName("test-documents/hello1.txt");
    String[] expected = new String[] {"hello foo", "hello world"};
    String[] args = getInitialArgs("readSplittableLines.conf");
    args = ObjectArrays.concat(args, "--input-file-format=text");
    args = ObjectArrays.concat(args, inputPath);
    runIntoSolr(args, expected);
  }

  private void testSplittableAvroFile() throws Exception {
    String inputPath = tmpDir.copyResourceFileName("test-documents/strings-2.avro");
    String[] expected = new String[] {"hello foo", "hello world"};
    String[] args = getInitialArgs(EXTRACT_AVRO_PATH);
    args = ObjectArrays.concat(args, "--input-file-format=avro");
    args = ObjectArrays.concat(args, inputPath);   
    runIntoSolr(args, expected);   
  }
 
  private void testSplittableAvroFileWithReaderSchema() throws Exception {
    String inputPath = tmpDir.copyResourceFileName("test-documents/strings-2.avro");
    String[] expected = new String[] {"hello foo", "hello world"};
    String[] args = getInitialArgs(EXTRACT_AVRO_PATH);
    args = ObjectArrays.concat(args, "--input-file-format=avro");
    args = ObjectArrays.concat(args, "--input-file-reader-schema=" + SCHEMA_FILE);
    args = ObjectArrays.concat(args, inputPath);   
    runIntoSolr(args, expected);   
  }
 
  private void testSplittableAvroFileWithDryRun() throws Exception {
    boolean oldValue = isDryRun;
    isDryRun = true;
    try {
      testSplittableAvroFile();
    } finally {
      isDryRun = oldValue;
    }
  }
 
  private void testSplittableAvroParquetFile() throws Exception {
    String inputPath = tmpDir.copyResourceFileName("test-documents/strings-2.parquet");
    String[] expected = new String[] {"hello foo", "hello world"};
    String[] args = getInitialArgs(EXTRACT_AVRO_PATH);
    args = ObjectArrays.concat(args, "--input-file-format=avroParquet");
    args = ObjectArrays.concat(args, "--input-file-reader-schema=" + SCHEMA_FILE);
    args = ObjectArrays.concat(args, inputPath);   
    runIntoSolr(args, expected);   
  }
 
  private void testStreamAvroParquetFile() throws Exception {
    String inputPath = tmpDir.copyResourceFileName("test-documents/strings-2.parquet");
    String[] expected = new String[] {"hello foo", "hello world"};
    String[] args = getInitialArgs(READ_AVRO_PARQUET_FILE);
    args = ObjectArrays.concat(args, inputPath);   
    runIntoSolr(args, expected);   
  }
 
  private void testStreamTextInputFiles() throws Exception {
    String inputPath1 = tmpDir.copyResourceFileName("test-documents/hello1.txt");
    String inputPath2 = tmpDir.copyResourceFileName("test-documents/hello2.txt");
   
    String[] expected = new String[] {
        "hello foo",
        "hello world",
        "hello2 file",
        };
    String[] args = getInitialArgs(LOAD_SOLR_LINE);
    args = ObjectArrays.concat(args, new String[]{inputPath1, inputPath2}, String.class);   
    runIntoSolr(args, expected);
  }
 
  private void testFileList() throws Exception {
    String inputPath = tmpDir.copyResourceFileName("test-documents/filelist1.txt");
    String[] expected = new String[] {"hello foo", "hello world", "hello2 file"};
    String[] args = getInitialArgs(LOAD_SOLR_LINE);
    args = ObjectArrays.concat(args, "--input-file-list=" + inputPath);   
    runIntoSolr(args, expected);
  }
 
  private void testFileListWithScheme() throws Exception {
    String inputPath = tmpDir.copyResourceFileName("test-documents/filelist1.txt");
    String[] expected = new String[] {"hello foo", "hello world", "hello2 file"};
    String[] args = getInitialArgs(LOAD_SOLR_LINE);
    args = ObjectArrays.concat(args, "--input-file-list=file:" + inputPath);   
    runIntoSolr(args, expected);
  }
 
  private void testRecursiveInputDir() throws Exception {
    String[] expected = new String[] {"hello nadja"};
    String[] args = getInitialArgs(LOAD_SOLR_LINE);
    args = ObjectArrays.concat(args, RESOURCES_DIR + "/test-documents/subdir");   
    runIntoSolr(args, expected);
  }
 
  private void testRandomizeInputFiles() throws Exception {
    String inputPath = tmpDir.copyResourceFileName("test-documents/hello1.txt");
    String[] expected = new String[] {"hello foo", "hello world"};
    String[] args = getInitialArgs(LOAD_SOLR_LINE);
    args = ObjectArrays.concat(args, inputPath);
    isRandomizingWithDoFn = true;
    runIntoSolr(args, expected);   
  }
 
  private void testCommandThatFails() throws Exception {
    if (pipelineType == PipelineType.memory) {
      return;
    }
    String inputPath = tmpDir.copyResourceFileName("test-documents/hello1.txt");
    String[] expected = new String[] {};
    String[] args = getInitialArgs(FAIL_FILE);
    args = ObjectArrays.concat(args, inputPath);   
    numExpectedFailedRecords = 1;
    PipelineResult pipelineResult = runIntoSolr(args, expected);
    Assert.assertTrue(pipelineResult.getStageResults().get(0).getCounterValue("morphline", "morphline.app.numRecords") > 0);
  }
 
  private void testCommandThatThrowsException() throws Exception {
    String inputPath = tmpDir.copyResourceFileName("test-documents/hello1.txt");
    String[] expected = new String[] {};
    String[] args = getInitialArgs(THROW_EXCEPTION_FILE);
    args = ObjectArrays.concat(args, inputPath);   
    numExpectedExceptionRecords = 1;
    if (pipelineType != PipelineType.memory) {
      PipelineResult pipelineResult = runIntoSolr(args, expected);
      Assert.assertTrue(pipelineResult.getStageResults().get(0).getCounterValue("morphline", "morphline.app.numRecords") > 0);
    } else {
      try {
        runIntoSolr(args, expected);
        Assert.fail();
      } catch (MorphlineRuntimeException e) {
        ; // expected
      }
    }
  }
 
  private void testHelp() throws Exception {
    String inputPath = tmpDir.copyResourceFileName("test-documents/hello1.txt");
    String[] args = getInitialArgs(NOP);
    args = ObjectArrays.concat(args, inputPath);   
    args = ObjectArrays.concat(args, "--help");
    CrunchIndexerTool tool = new CrunchIndexerTool();
    int res = ToolRunner.run(tmpDir.getDefaultConfiguration(), tool, args);
    Assert.assertEquals(0, res);
    Assert.assertNull(tool.pipelineResult);
  }
 
  private void testHelpWithoutArgs() throws Exception {
    String[] args = new String[0];
    CrunchIndexerTool tool = new CrunchIndexerTool();
    int res = ToolRunner.run(tmpDir.getDefaultConfiguration(), tool, args);
    Assert.assertEquals(0, res);
    Assert.assertNull(tool.pipelineResult);
  }
 
  private void testIllegalCommandLineArgument() throws Exception {
    String inputPath = tmpDir.copyResourceFileName("test-documents/hello1.txt");
    String[] args = getInitialArgs(NOP);
    args = ObjectArrays.concat(args, "--illegalParam=foo");
    args = ObjectArrays.concat(args, inputPath);   
    CrunchIndexerTool tool = new CrunchIndexerTool();
    int res = ToolRunner.run(tmpDir.getDefaultConfiguration(), tool, args);
    Assert.assertEquals(1, res);
    Assert.assertNull(tool.pipelineResult);
  }
 
  private void testIllegalCommandLineClassNameArgument() throws Exception {
    String inputPath = tmpDir.copyResourceFileName("test-documents/hello1.txt");
    String[] args = getInitialArgs(NOP);
    args = ObjectArrays.concat(args, "--input-file-format=" + ProcessBuilder.class.getName());
    args = ObjectArrays.concat(args, inputPath);   
    CrunchIndexerTool tool = new CrunchIndexerTool();
    int res = ToolRunner.run(tmpDir.getDefaultConfiguration(), tool, args);
    Assert.assertEquals(1, res);
    Assert.assertNull(tool.pipelineResult);
  }
 
  private PipelineResult runIntoSolr(String[] args, String[] expected) throws Exception {
    PipelineResult pipelineResult = runPipeline(args);   
    if (!isDryRun) {
      List<Map<String, Object>> records;
      records = new ArrayList();
      commit();       
      QueryResponse rsp = cloudClient.query(new SolrQuery("*:*").setRows(100000).addSort("text", SolrQuery.ORDER.asc));
      //System.out.println(rsp);
      Iterator<SolrDocument> iter = rsp.getResults().iterator();
      while (iter.hasNext()) {
        SolrDocument doc = iter.next();
        System.out.println("mydoc = "+ doc);
        records.add(ImmutableMap.of("text", doc.getFirstValue("text")));
      }
      records = sort(records);
     
      Assert.assertEquals(expected.length, records.size());
      for (int i = 0; i < expected.length; i++) {
        Assert.assertEquals(ImmutableMap.of("text", expected[i]), records.get(i));
      }
    }   
    return pipelineResult;
  }
 
  private PipelineResult runPipeline(String[] args) throws Exception {
    CrunchIndexerTool tool = new CrunchIndexerTool();
    Configuration config = tmpDir.getDefaultConfiguration();
    config.set(CrunchIndexerTool.MORPHLINE_VARIABLE_PARAM + ".ZK_HOST", zkServer.getZkAddress());
    config.set(CrunchIndexerTool.MORPHLINE_VARIABLE_PARAM + ".myMorphlineVar", "foo");
    if (isRandomizingWithDoFn) {
      config.setInt(CrunchIndexerTool.MAIN_MEMORY_RANDOMIZATION_THRESHOLD, -1);
    }
    int res = ToolRunner.run(config, tool, args);
    Assert.assertEquals(0, res);
    Assert.assertTrue(tool.pipelineResult.succeeded());     
    Assert.assertEquals(1, tool.pipelineResult.getStageResults().size());
    StageResult stageResult = tool.pipelineResult.getStageResults().get(0);
    Assert.assertEquals(numExpectedFailedRecords, stageResult.getCounterValue("morphline", "morphline.app.numFailedRecords"));
    Assert.assertEquals(numExpectedExceptionRecords, stageResult.getCounterValue("morphline", "morphline.app.numExceptionRecords"));
    return tool.pipelineResult;
  }
 
  private List<Map<String, Object>> sort(List<Map<String, Object>> records) {
    Collections.sort(records, new Comparator<Map>() {

      @Override
      public int compare(Map o1, Map o2) {
        Comparable c1 = Iterables.toArray(o1.values(), Comparable.class)[0];
        Comparable c2 = Iterables.toArray(o2.values(), Comparable.class)[0];
        return c1.compareTo(c2);
      }
     
    });
    return records;   
  }   
}
TOP

Related Classes of org.apache.solr.crunch.MemoryCrunchIndexerToolTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.