Examples of org.elasticsearch.hadoop.cfg.Settings

org.elasticsearch.hadoop.cfg.Settings
Holder class containing the various configuration bits used by ElasticSearch Hadoop. Handles internally the fall back to defaults when looking for undefined, optional settings.


    @Override
    public TupleEntryIterator openForRead(FlowProcess<Properties> flowProcess, ScrollQuery input) throws IOException {
        if (input == null) {
            // get original copy
            Settings settings = CascadingUtils.addDefaultsToSettings(CascadingUtils.extractOriginalProperties(flowProcess.getConfigCopy()), tapProperties, log);


            // will be closed by the query is finished
            RestRepository client = new RestRepository(settings);
            Field mapping = client.getMapping();
            Collection<String> fields = CascadingUtils.fieldToAlias(settings, getSourceFields());


            // validate if possible
            FieldPresenceValidation validation = settings.getFieldExistanceValidation();
            if (validation.isRequired()) {
                MappingUtils.validateMapping(fields, mapping, validation, log);
            }


            input = QueryBuilder.query(settings).fields(StringUtils.concatenateAndUriEncode(fields,  ",")).build(client,
                          new ScrollReader(new JdkValueReader(), mapping, settings.getReadMetadata(), settings.getReadMetadataField()));
        }
        return new TupleEntrySchemeIterator<Properties, ScrollQuery>(flowProcess, getScheme(), input, getIdentifier());
    }

View Full Code Here


        Object[] context = new Object[3];
        context[0] = sourceCall.getInput().createKey();
        context[1] = sourceCall.getInput().createValue();
        // as the tuple _might_ vary (some objects might be missing), we use a map rather then a collection
        Settings settings = loadSettings(flowProcess.getConfigCopy(), true);
        context[2] = CascadingUtils.alias(settings);
        sourceCall.setContext(context);
        IS_ES_10 = SettingsUtils.isEs10(settings);
    }

View Full Code Here

    public void sinkPrepare(FlowProcess<JobConf> flowProcess, SinkCall<Object[], OutputCollector> sinkCall) throws IOException {
        super.sinkPrepare(flowProcess, sinkCall);


        Object[] context = new Object[1];
        // the tuple is fixed, so we can just use a collection/index
        Settings settings = loadSettings(flowProcess.getConfigCopy(), false);
        context[0] = CascadingUtils.fieldToAlias(settings, getSinkFields());
        sinkCall.setContext(context);
        IS_ES_10 = SettingsUtils.isEs10(settings);
    }

View Full Code Here

    }


    @Override
    public void sourceConfInit(FlowProcess<JobConf> flowProcess, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) {
        conf.setInputFormat(EsInputFormat.class);
        Settings set = loadSettings(conf, true);


        Collection<String> fields = CascadingUtils.fieldToAlias(set, getSourceFields());
        // load only the necessary fields
        conf.set(InternalConfigurationOptions.INTERNAL_ES_TARGET_FIELDS, StringUtils.concatenateAndUriEncode(fields, ","));

View Full Code Here

    @Override
    public void sinkConfInit(FlowProcess<JobConf> flowProcess, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) {


        conf.setOutputFormat(EsOutputFormat.class);
        // define an output dir to prevent Cascading from setting up a TempHfs and overriding the OutputFormat
        Settings set = loadSettings(conf, false);


        Log log = LogFactory.getLog(EsTap.class);
        InitializationUtils.setValueWriterIfNotSet(set, CascadingValueWriter.class, log);
        InitializationUtils.setValueReaderIfNotSet(set, JdkValueReader.class, log);
        InitializationUtils.setBytesConverterIfNeeded(set, CascadingLocalBytesConverter.class, log);
        InitializationUtils.setFieldExtractorIfNotSet(set, CascadingFieldExtractor.class, log);


        // NB: we need to set this property even though it is not being used - and since and URI causes problem, use only the resource/file
        //conf.set("mapred.output.dir", set.getTargetUri() + "/" + set.getTargetResource());
        HadoopCfgUtils.setFileOutputFormatDir(conf, set.getResourceWrite());
        HadoopCfgUtils.setOutputCommitterClass(conf, EsOutputFormat.EsOldAPIOutputCommitter.class.getName());


        if (log.isTraceEnabled()) {
            log.trace("Initialized (sink) configuration " + HadoopCfgUtils.asProperties(conf));
        }

View Full Code Here


    // Note: data written to the JobConf will be silently discarded
    @Override
    public org.apache.hadoop.mapred.InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {


        Settings settings = HadoopSettingsManager.loadFrom(job);
        Collection<PartitionDefinition> partitions = RestService.findPartitions(settings, log);
        ShardInputSplit[] splits = new ShardInputSplit[partitions.size()];


        int index = 0;
        for (PartitionDefinition part : partitions) {

View Full Code Here

    @Override
    public void sourcePrepare(FlowProcess<Properties> flowProcess, SourceCall<Object[], ScrollQuery> sourceCall) throws IOException {
        super.sourcePrepare(flowProcess, sourceCall);


        Object[] context = new Object[1];
        Settings settings = HadoopSettingsManager.loadFrom(flowProcess.getConfigCopy()).merge(props);
        context[0] = CascadingUtils.alias(settings);
        sourceCall.setContext(context);
        IS_ES_10 = SettingsUtils.isEs10(settings);
    }

View Full Code Here

    @Override
    public void sinkPrepare(FlowProcess<Properties> flowProcess, SinkCall<Object[], Object> sinkCall) throws IOException {
        super.sinkPrepare(flowProcess, sinkCall);


        Object[] context = new Object[1];
        Settings settings = HadoopSettingsManager.loadFrom(flowProcess.getConfigCopy()).merge(props);
        context[0] = CascadingUtils.fieldToAlias(settings, getSinkFields());
        sinkCall.setContext(context);
    }

View Full Code Here

        InitializationUtils.checkIndexExistence(client);
    }


    private void initClient(Properties props, boolean read) {
        if (client == null) {
            Settings settings = CascadingUtils.addDefaultsToSettings(props, this.props, LogFactory.getLog(EsTap.class));
            CascadingUtils.init(settings, host, port, resource, query, read);
            client = new RestRepository(settings);
        }
    }

View Full Code Here

            init((ShardInputSplit) split, compatContext.getConfiguration(), compatContext);
        }


        void init(ShardInputSplit esSplit, Configuration cfg, Progressable progressable) {
            // get a copy to override the host/port
            Settings settings = HadoopSettingsManager.loadFrom(cfg).copy().load(esSplit.settings);


            if (log.isTraceEnabled()) {
                log.trace(String.format("Init shard reader from cfg %s", HadoopCfgUtils.asProperties(cfg)));
                log.trace(String.format("Init shard reader w/ settings %s", esSplit.settings));
            }


            this.esSplit = esSplit;


            // initialize mapping/ scroll reader
            InitializationUtils.setValueReaderIfNotSet(settings, WritableValueReader.class, log);


            PartitionDefinition part = new PartitionDefinition(esSplit.nodeIp, esSplit.httpPort, esSplit.nodeName, esSplit.nodeId, esSplit.shardId, settings.save(), esSplit.mapping);
            PartitionReader partitionReader = RestService.createReader(settings, part, log);


            this.scrollReader = partitionReader.scrollReader;
            this.client = partitionReader.client;
            this.queryBuilder = partitionReader.queryBuilder;


            // heart-beat
            beat = new HeartBeat(progressable, cfg, settings.getHeartBeatLead(), log);


            this.progressable = progressable;


            if (log.isDebugEnabled()) {
                log.debug(String.format("Initializing RecordReader for [%s]", esSplit));

View Full Code Here

0 1 2 3 4 5

TOP

Related Classes of org.elasticsearch.hadoop.cfg.Settings

org.elasticsearch.hadoop.cascading.CascadingUtils

org.elasticsearch.hadoop.cascading.EsHadoopScheme

org.elasticsearch.hadoop.cascading.EsLocalScheme

org.elasticsearch.hadoop.cascading.EsLocalTap

org.elasticsearch.hadoop.hive.EsHiveInputFormat

org.elasticsearch.hadoop.hive.EsHiveOutputFormat

org.elasticsearch.hadoop.hive.EsSerDe

org.elasticsearch.hadoop.hive.EsStorageHandler

org.elasticsearch.hadoop.integration.rest.AbstractRestQueryTest

org.elasticsearch.hadoop.mr.EsInputFormat

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.