Package org.apache.hadoop.hive.ql.io

Examples of org.apache.hadoop.hive.ql.io.ContentSummaryInputFormat


                Class<? extends InputFormat> inputFormatCls = partDesc
                    .getInputFileFormatClass();
                InputFormat inputFormatObj = HiveInputFormat.getInputFormatFromCache(
                    inputFormatCls, myJobConf);
                if (inputFormatObj instanceof ContentSummaryInputFormat) {
                  ContentSummaryInputFormat cs = (ContentSummaryInputFormat) inputFormatObj;
                  resultMap.put(pathStr, cs.getContentSummary(p, myJobConf));
                  return;
                }
                HiveStorageHandler handler = HiveUtils.getStorageHandler(myConf,
                    partDesc.getOverlayedProperties().getProperty(
                    hive_metastoreConstants.META_TABLE_STORAGE));
                if (handler instanceof InputEstimator) {
                  long total = 0;
                  TableDesc tableDesc = partDesc.getTableDesc();
                  InputEstimator estimator = (InputEstimator) handler;
                  for (String alias : HiveFileFormatUtils.doGetAliasesFromPath(pathToAlias, p)) {
                    JobConf jobConf = new JobConf(myJobConf);
                    TableScanOperator scanOp = (TableScanOperator) aliasToWork.get(alias);
                    Utilities.setColumnNameList(jobConf, scanOp, true);
                    Utilities.setColumnTypeList(jobConf, scanOp, true);
                    PlanUtils.configureInputJobPropertiesForStorageHandler(tableDesc);
                    Utilities.copyTableJobPropertiesToConf(tableDesc, jobConf);
                    total += estimator.estimate(myJobConf, scanOp, -1).getTotalLength();
                  }
                  resultMap.put(pathStr, new ContentSummary(total, -1, -1));
                }
                // todo: should nullify summary for non-native tables,
                // not to be selected as a mapjoin target
                FileSystem fs = p.getFileSystem(myConf);
                resultMap.put(pathStr, fs.getContentSummary(p));
              } catch (Exception e) {
                // We safely ignore this exception for summary data.
                // We don't update the cache to protect it from polluting other
                // usages. The worst case is that IOException will always be
                // retried for another getInputSummary(), which is fine as
                // IOException is not considered as a common case.
                LOG.info("Cannot get size of " + pathStr + ". Safely ignored.");
              }
            }
          };

          if (executor == null) {
            r.run();
          } else {
            Future<?> result = executor.submit(r);
            results.add(result);
          }
        }

        if (executor != null) {
          for (Future<?> result : results) {
            boolean executorDone = false;
            do {
              try {
                result.get();
                executorDone = true;
              } catch (InterruptedException e) {
                LOG.info("Interrupted when waiting threads: ", e);
                Thread.currentThread().interrupt();
                break;
              } catch (ExecutionException e) {
                throw new IOException(e);
              }
            } while (!executorDone);
          }
          executor.shutdown();
        }
        HiveInterruptUtils.checkInterrupted();
        for (Map.Entry<String, ContentSummary> entry : resultMap.entrySet()) {
          ContentSummary cs = entry.getValue();

          summary[0] += cs.getLength();
          summary[1] += cs.getFileCount();
          summary[2] += cs.getDirectoryCount();

          ctx.addCS(entry.getKey(), cs);
          LOG.info("Cache Content Summary for " + entry.getKey() + " length: " + cs.getLength()
              + " file count: "
              + cs.getFileCount() + " directory count: " + cs.getDirectoryCount());
        }

        perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.INPUT_SUMMARY);
        return new ContentSummary(summary[0], summary[1], summary[2]);
      } finally {
View Full Code Here


                Class<? extends InputFormat> inputFormatCls = partDesc
                    .getInputFileFormatClass();
                InputFormat inputFormatObj = HiveInputFormat.getInputFormatFromCache(
                    inputFormatCls, myJobConf);
                if (inputFormatObj instanceof ContentSummaryInputFormat) {
                  ContentSummaryInputFormat cs = (ContentSummaryInputFormat) inputFormatObj;
                  resultMap.put(pathStr, cs.getContentSummary(p, myJobConf));
                  return;
                }
                HiveStorageHandler handler = HiveUtils.getStorageHandler(myConf,
                    partDesc.getOverlayedProperties().getProperty(
                    hive_metastoreConstants.META_TABLE_STORAGE));
                if (handler instanceof InputEstimator) {
                  long total = 0;
                  TableDesc tableDesc = partDesc.getTableDesc();
                  InputEstimator estimator = (InputEstimator) handler;
                  for (String alias : HiveFileFormatUtils.doGetAliasesFromPath(pathToAlias, p)) {
                    JobConf jobConf = new JobConf(myJobConf);
                    TableScanOperator scanOp = (TableScanOperator) aliasToWork.get(alias);
                    Utilities.setColumnNameList(jobConf, scanOp, true);
                    Utilities.setColumnTypeList(jobConf, scanOp, true);
                    PlanUtils.configureInputJobPropertiesForStorageHandler(tableDesc);
                    Utilities.copyTableJobPropertiesToConf(tableDesc, jobConf);
                    total += estimator.estimate(myJobConf, scanOp, -1).getTotalLength();
                  }
                  resultMap.put(pathStr, new ContentSummary(total, -1, -1));
                }
                // todo: should nullify summary for non-native tables,
                // not to be selected as a mapjoin target
                FileSystem fs = p.getFileSystem(myConf);
                resultMap.put(pathStr, fs.getContentSummary(p));
              } catch (Exception e) {
                // We safely ignore this exception for summary data.
                // We don't update the cache to protect it from polluting other
                // usages. The worst case is that IOException will always be
                // retried for another getInputSummary(), which is fine as
                // IOException is not considered as a common case.
                LOG.info("Cannot get size of " + pathStr + ". Safely ignored.");
              }
            }
          };

          if (executor == null) {
            r.run();
          } else {
            Future<?> result = executor.submit(r);
            results.add(result);
          }
        }

        if (executor != null) {
          for (Future<?> result : results) {
            boolean executorDone = false;
            do {
              try {
                result.get();
                executorDone = true;
              } catch (InterruptedException e) {
                LOG.info("Interrupted when waiting threads: ", e);
                Thread.currentThread().interrupt();
                break;
              } catch (ExecutionException e) {
                throw new IOException(e);
              }
            } while (!executorDone);
          }
          executor.shutdown();
        }
        HiveInterruptUtils.checkInterrupted();
        for (Map.Entry<String, ContentSummary> entry : resultMap.entrySet()) {
          ContentSummary cs = entry.getValue();

          summary[0] += cs.getLength();
          summary[1] += cs.getFileCount();
          summary[2] += cs.getDirectoryCount();

          ctx.addCS(entry.getKey(), cs);
          LOG.info("Cache Content Summary for " + entry.getKey() + " length: " + cs.getLength()
              + " file count: "
              + cs.getFileCount() + " directory count: " + cs.getDirectoryCount());
        }

        perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.INPUT_SUMMARY);
        return new ContentSummary(summary[0], summary[1], summary[2]);
      } finally {
View Full Code Here

                Class<? extends InputFormat> inputFormatCls = partDesc
                    .getInputFileFormatClass();
                InputFormat inputFormatObj = HiveInputFormat.getInputFormatFromCache(
                    inputFormatCls, myJobConf);
                if (inputFormatObj instanceof ContentSummaryInputFormat) {
                  ContentSummaryInputFormat cs = (ContentSummaryInputFormat) inputFormatObj;
                  resultMap.put(pathStr, cs.getContentSummary(p, myJobConf));
                  return;
                }
                HiveStorageHandler handler = HiveUtils.getStorageHandler(myConf,
                    SerDeUtils.createOverlayedProperties(
                        partDesc.getTableDesc().getProperties(),
                        partDesc.getProperties())
                        .getProperty(hive_metastoreConstants.META_TABLE_STORAGE));
                if (handler instanceof InputEstimator) {
                  long total = 0;
                  TableDesc tableDesc = partDesc.getTableDesc();
                  InputEstimator estimator = (InputEstimator) handler;
                  for (String alias : HiveFileFormatUtils.doGetAliasesFromPath(pathToAlias, p)) {
                    JobConf jobConf = new JobConf(myJobConf);
                    TableScanOperator scanOp = (TableScanOperator) aliasToWork.get(alias);
                    Utilities.setColumnNameList(jobConf, scanOp, true);
                    Utilities.setColumnTypeList(jobConf, scanOp, true);
                    PlanUtils.configureInputJobPropertiesForStorageHandler(tableDesc);
                    Utilities.copyTableJobPropertiesToConf(tableDesc, jobConf);
                    total += estimator.estimate(myJobConf, scanOp, -1).getTotalLength();
                  }
                  resultMap.put(pathStr, new ContentSummary(total, -1, -1));
                }
                // todo: should nullify summary for non-native tables,
                // not to be selected as a mapjoin target
                FileSystem fs = p.getFileSystem(myConf);
                resultMap.put(pathStr, fs.getContentSummary(p));
              } catch (Exception e) {
                // We safely ignore this exception for summary data.
                // We don't update the cache to protect it from polluting other
                // usages. The worst case is that IOException will always be
                // retried for another getInputSummary(), which is fine as
                // IOException is not considered as a common case.
                LOG.info("Cannot get size of " + pathStr + ". Safely ignored.");
              }
            }
          };

          if (executor == null) {
            r.run();
          } else {
            Future<?> result = executor.submit(r);
            results.add(result);
          }
        }

        if (executor != null) {
          for (Future<?> result : results) {
            boolean executorDone = false;
            do {
              try {
                result.get();
                executorDone = true;
              } catch (InterruptedException e) {
                LOG.info("Interrupted when waiting threads: ", e);
                Thread.currentThread().interrupt();
                break;
              } catch (ExecutionException e) {
                throw new IOException(e);
              }
            } while (!executorDone);
          }
          executor.shutdown();
        }
        HiveInterruptUtils.checkInterrupted();
        for (Map.Entry<String, ContentSummary> entry : resultMap.entrySet()) {
          ContentSummary cs = entry.getValue();

          summary[0] += cs.getLength();
          summary[1] += cs.getFileCount();
          summary[2] += cs.getDirectoryCount();

          ctx.addCS(entry.getKey(), cs);
          LOG.info("Cache Content Summary for " + entry.getKey() + " length: " + cs.getLength()
              + " file count: "
              + cs.getFileCount() + " directory count: " + cs.getDirectoryCount());
        }

        perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.INPUT_SUMMARY);
        return new ContentSummary(summary[0], summary[1], summary[2]);
      } finally {
View Full Code Here

                Class<? extends InputFormat> inputFormatCls = partDesc
                    .getInputFileFormatClass();
                InputFormat inputFormatObj = HiveInputFormat.getInputFormatFromCache(
                    inputFormatCls, myJobConf);
                if (inputFormatObj instanceof ContentSummaryInputFormat) {
                  ContentSummaryInputFormat cs = (ContentSummaryInputFormat) inputFormatObj;
                  resultMap.put(pathStr, cs.getContentSummary(p, myJobConf));
                  return;
                }
                HiveStorageHandler handler = HiveUtils.getStorageHandler(myConf,
                    partDesc.getOverlayedProperties().getProperty(
                    hive_metastoreConstants.META_TABLE_STORAGE));
                if (handler instanceof InputEstimator) {
                  long total = 0;
                  TableDesc tableDesc = partDesc.getTableDesc();
                  InputEstimator estimator = (InputEstimator) handler;
                  for (String alias : HiveFileFormatUtils.doGetAliasesFromPath(pathToAlias, p)) {
                    JobConf jobConf = new JobConf(myJobConf);
                    TableScanOperator scanOp = (TableScanOperator) aliasToWork.get(alias);
                    Utilities.setColumnNameList(jobConf, scanOp, true);
                    Utilities.setColumnTypeList(jobConf, scanOp, true);
                    PlanUtils.configureInputJobPropertiesForStorageHandler(tableDesc);
                    Utilities.copyTableJobPropertiesToConf(tableDesc, jobConf);
                    total += estimator.estimate(myJobConf, scanOp, -1).getTotalLength();
                  }
                  resultMap.put(pathStr, new ContentSummary(total, -1, -1));
                }
                // todo: should nullify summary for non-native tables,
                // not to be selected as a mapjoin target
                FileSystem fs = p.getFileSystem(myConf);
                resultMap.put(pathStr, fs.getContentSummary(p));
              } catch (Exception e) {
                // We safely ignore this exception for summary data.
                // We don't update the cache to protect it from polluting other
                // usages. The worst case is that IOException will always be
                // retried for another getInputSummary(), which is fine as
                // IOException is not considered as a common case.
                LOG.info("Cannot get size of " + pathStr + ". Safely ignored.");
              }
            }
          };

          if (executor == null) {
            r.run();
          } else {
            Future<?> result = executor.submit(r);
            results.add(result);
          }
        }

        if (executor != null) {
          for (Future<?> result : results) {
            boolean executorDone = false;
            do {
              try {
                result.get();
                executorDone = true;
              } catch (InterruptedException e) {
                LOG.info("Interrupted when waiting threads: ", e);
                Thread.currentThread().interrupt();
                break;
              } catch (ExecutionException e) {
                throw new IOException(e);
              }
            } while (!executorDone);
          }
          executor.shutdown();
        }
        HiveInterruptUtils.checkInterrupted();
        for (Map.Entry<String, ContentSummary> entry : resultMap.entrySet()) {
          ContentSummary cs = entry.getValue();

          summary[0] += cs.getLength();
          summary[1] += cs.getFileCount();
          summary[2] += cs.getDirectoryCount();

          ctx.addCS(entry.getKey(), cs);
          LOG.info("Cache Content Summary for " + entry.getKey() + " length: " + cs.getLength()
              + " file count: "
              + cs.getFileCount() + " directory count: " + cs.getDirectoryCount());
        }

        perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.INPUT_SUMMARY);
        return new ContentSummary(summary[0], summary[1], summary[2]);
      } finally {
View Full Code Here

                Class<? extends InputFormat> inputFormatCls = partDesc
                    .getInputFileFormatClass();
                InputFormat inputFormatObj = HiveInputFormat.getInputFormatFromCache(
                    inputFormatCls, myJobConf);
                if (inputFormatObj instanceof ContentSummaryInputFormat) {
                  ContentSummaryInputFormat cs = (ContentSummaryInputFormat) inputFormatObj;
                  resultMap.put(pathStr, cs.getContentSummary(p, myJobConf));
                  return;
                }
                HiveStorageHandler handler = HiveUtils.getStorageHandler(myConf,
                    partDesc.getOverlayedProperties().getProperty(
                    hive_metastoreConstants.META_TABLE_STORAGE));
                if (handler instanceof InputEstimator) {
                  long total = 0;
                  TableDesc tableDesc = partDesc.getTableDesc();
                  InputEstimator estimator = (InputEstimator) handler;
                  for (String alias : HiveFileFormatUtils.doGetAliasesFromPath(pathToAlias, p)) {
                    JobConf jobConf = new JobConf(myJobConf);
                    TableScanOperator scanOp = (TableScanOperator) aliasToWork.get(alias);
                    Utilities.setColumnNameList(jobConf, scanOp, true);
                    Utilities.setColumnTypeList(jobConf, scanOp, true);
                    PlanUtils.configureInputJobPropertiesForStorageHandler(tableDesc);
                    Utilities.copyTableJobPropertiesToConf(tableDesc, jobConf);
                    total += estimator.estimate(myJobConf, scanOp, -1).getTotalLength();
                  }
                  resultMap.put(pathStr, new ContentSummary(total, -1, -1));
                }
                // todo: should nullify summary for non-native tables,
                // not to be selected as a mapjoin target
                FileSystem fs = p.getFileSystem(myConf);
                resultMap.put(pathStr, fs.getContentSummary(p));
              } catch (Exception e) {
                // We safely ignore this exception for summary data.
                // We don't update the cache to protect it from polluting other
                // usages. The worst case is that IOException will always be
                // retried for another getInputSummary(), which is fine as
                // IOException is not considered as a common case.
                LOG.info("Cannot get size of " + pathStr + ". Safely ignored.");
              }
            }
          };

          if (executor == null) {
            r.run();
          } else {
            Future<?> result = executor.submit(r);
            results.add(result);
          }
        }

        if (executor != null) {
          for (Future<?> result : results) {
            boolean executorDone = false;
            do {
              try {
                result.get();
                executorDone = true;
              } catch (InterruptedException e) {
                LOG.info("Interrupted when waiting threads: ", e);
                Thread.currentThread().interrupt();
                break;
              } catch (ExecutionException e) {
                throw new IOException(e);
              }
            } while (!executorDone);
          }
          executor.shutdown();
        }
        HiveInterruptUtils.checkInterrupted();
        for (Map.Entry<String, ContentSummary> entry : resultMap.entrySet()) {
          ContentSummary cs = entry.getValue();

          summary[0] += cs.getLength();
          summary[1] += cs.getFileCount();
          summary[2] += cs.getDirectoryCount();

          ctx.addCS(entry.getKey(), cs);
          LOG.info("Cache Content Summary for " + entry.getKey() + " length: " + cs.getLength()
              + " file count: "
              + cs.getFileCount() + " directory count: " + cs.getDirectoryCount());
        }

        perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.INPUT_SUMMARY);
        return new ContentSummary(summary[0], summary[1], summary[2]);
      } finally {
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.ql.io.ContentSummaryInputFormat

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.