Package com.asakusafw.runtime.directio.hadoop

Examples of com.asakusafw.runtime.directio.hadoop.BlockMap


                        context.getDataType().getSimpleName(),
                        status.getPath(),
                        orc.getNumberOfRows(),
                        orc.getRawDataSize()));
            }
            BlockMap blockMap = BlockMap.create(
                    status.getPath().toString(),
                    status.getLen(),
                    BlockMap.computeBlocks(context.getFileSystem(), status),
                    false);
            // TODO configurable split
            for (StripeInformation stripe : orc.getStripes()) {
                long begin = stripe.getOffset();
                long end = begin + stripe.getLength();
                DirectInputFragment fragment = blockMap.get(begin, end);
                if (LOG.isDebugEnabled()) {
                    LOG.debug(MessageFormat.format(
                            "Detect ORCFile stripe: path={0}, rows={1}, range={2}+{3}, allocation={4}",
                            fragment.getPath(),
                            stripe.getNumberOfRows(),
View Full Code Here


                LOG.info(MessageFormat.format(
                        "Analyzing Parquet file metadata ({0}): {1}",
                        context.getDataType().getSimpleName(),
                        status.getPath()));
            }
            BlockMap blockMap = BlockMap.create(
                    status.getPath().toString(),
                    status.getLen(),
                    BlockMap.computeBlocks(context.getFileSystem(), status),
                    false);
            for (BlockMetaData block : footer.getParquetMetadata().getBlocks()) {
                if (block.getColumns().isEmpty()) {
                    continue;
                }
                long begin = Long.MAX_VALUE;
                long end = -1L;
                for (ColumnChunkMetaData column : block.getColumns()) {
                    long offset = column.getFirstDataPageOffset();
                    long size = column.getTotalSize();
                    begin = Math.min(begin, offset);
                    end = Math.max(end, offset + size);
                }
                assert begin >= 0;
                assert end >= 0;
                DirectInputFragment fragment = blockMap.get(begin, end);
                if (LOG.isDebugEnabled()) {
                    LOG.debug(MessageFormat.format(
                            "Detect Parquet file block: path={0}, rows={1}, range={2}+{3}, allocation={4}",
                            status.getPath(),
                            block.getRowCount(),
View Full Code Here

            FileStatus[] statuses = fs.globStatus(path);
            if (statuses == null) {
                continue;
            }
            for (FileStatus status : statuses) {
                BlockMap blockMap = BlockMap.create(
                        status.getPath().toString(),
                        status.getLen(),
                        BlockMap.computeBlocks(fs, status),
                        false);
                results.addAll(computeSplits(status.getPath(), blockMap, splitSize));
View Full Code Here

    /**
     * simple case for computing splits.
     */
    @Test
    public void splits_simple() {
        BlockMap blocks = blocks("testing", m(10));
        List<FileSplit> splits = TemporaryInputFormat.computeSplits(new Path("testing"), blocks, m(64));

        assertThat(splits, hasSize(1));
        FileSplit s0 = find(splits, 0);
        assertThat(s0.getLength(), is(m(10)));
View Full Code Here

    /**
     * computing splits with already aligned blocks.
     */
    @Test
    public void splits_aligned() {
        BlockMap blocks = blocks("testing", TemporaryFile.BLOCK_SIZE, TemporaryFile.BLOCK_SIZE);
        List<FileSplit> splits = TemporaryInputFormat.computeSplits(new Path("testing"), blocks, m(64));

        assertThat(splits, hasSize(2));

        FileSplit s0 = find(splits, 0);
View Full Code Here

    /**
     * computing splits without unaligned blocks.
     */
    @Test
    public void splits_unaligned() {
        BlockMap blocks = blocks("testing", TemporaryFile.BLOCK_SIZE - 10, TemporaryFile.BLOCK_SIZE);
        List<FileSplit> splits = TemporaryInputFormat.computeSplits(new Path("testing"), blocks, m(128));

        assertThat(splits, hasSize(2));

        FileSplit s0 = find(splits, 0);
View Full Code Here

    /**
     * computing splits with aligned blocks plus.
     */
    @Test
    public void splits_aligned_rest() {
        BlockMap blocks = blocks("testing", TemporaryFile.BLOCK_SIZE, TemporaryFile.BLOCK_SIZE + 10);
        List<FileSplit> splits = TemporaryInputFormat.computeSplits(new Path("testing"), blocks, m(64));

        assertThat(splits, hasSize(2));

        FileSplit s0 = find(splits, 0);
View Full Code Here

    /**
     * computing splits with forcibly splitting.
     */
    @Test
    public void splits_force() {
        BlockMap blocks = blocks("testing", TemporaryFile.BLOCK_SIZE * 10);
        List<FileSplit> splits = TemporaryInputFormat.computeSplits(
                new Path("testing"), blocks, TemporaryFile.BLOCK_SIZE + 1);

        assertThat(splits, hasSize(5));

View Full Code Here

    /**
     * computing splits w/ suppress.
     */
    @Test
    public void splits_suppress() {
        BlockMap blocks = blocks("testing", TemporaryFile.BLOCK_SIZE * 10);
        List<FileSplit> splits = TemporaryInputFormat.computeSplits(new Path("testing"), blocks, 0);

        assertThat(splits, hasSize(1));
        FileSplit s0 = find(splits, 0);
        assertThat(s0.getLength(), is((long) TemporaryFile.BLOCK_SIZE * 10));
 
View Full Code Here

TOP

Related Classes of com.asakusafw.runtime.directio.hadoop.BlockMap

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.