Package org.docx4j.convert.in

Source Code of org.docx4j.convert.in.Doc

/*
*  Copyright 2007-2008, Plutext Pty Ltd.
*  
*  This file is part of docx4j.

    docx4j is licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.

    You may obtain a copy of the License at

        http://www.apache.org/licenses/LICENSE-2.0

    Unless required by applicable law or agreed to in writing, software
    distributed under the License is distributed on an "AS IS" BASIS,
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.

*/

package org.docx4j.convert.in;

import java.io.InputStream;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.hwpf.usermodel.Section;
import org.apache.poi.hwpf.usermodel.Table;
import org.apache.poi.hwpf.usermodel.TableCell;
import org.apache.poi.hwpf.usermodel.TableRow;
import org.docx4j.UnitsOfMeasurement;
import org.docx4j.XmlUtils;
import org.docx4j.dml.wordprocessingDrawing.Inline;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.openpackaging.parts.WordprocessingML.BinaryPartAbstractImage;
import org.docx4j.openpackaging.parts.WordprocessingML.MainDocumentPart;


/**
* @author jason
*
*/
public class Doc {

  private static Logger log = LoggerFactory.getLogger(Doc.class);

  /**
   * @param in
   *            doc file
   * @return new WordprocessingMLPackage containing the results of the
   *         conversion
   * @throws Exception
   */
  public static WordprocessingMLPackage convert(InputStream in)
      throws Exception {

    HWPFDocument doc = new HWPFDocument(in);

    WordprocessingMLPackage out = WordprocessingMLPackage.createPackage();

    convert(doc, out);

    return out;
  }

  // public static boolean convert(FileInputStream in, WordprocessingMLPackage
  // out) throws Exception {
  // HWPFDocument doc = new HWPFDocument(in);
  // return convert(doc, out);
  // }

  // public static boolean convert(org.apache.commons.vfs.FileObject in,
  // WordprocessingMLPackage out) throws Exception {

  /**
   * This method is private, since the fact that conversion is (currently)
   * performed using POI's HWPF should be encapsulated.
   *
   * @param doc
   * @param wordMLPackage
   * @return success or failure
   */
  private static void convert(HWPFDocument doc,
      WordprocessingMLPackage wordMLPackage) throws Exception {

    // Convert styles
    org.apache.poi.hwpf.model.StyleSheet stylesheet = doc.getStyleSheet();
    // TODO - higher priority
    // At present, a default set of styles are defined in the output
    // document.

    // Convert lists
    org.apache.poi.hwpf.model.ListTables listTables = doc.getListTables();
    // TODO

    // Convert document properties
    org.apache.poi.hwpf.model.DocumentProperties docProps = doc
        .getDocProperties();
    // TODO

    // Convert main document part

    MainDocumentPart documentPart = wordMLPackage.getMainDocumentPart();
    org.docx4j.wml.ObjectFactory factory = new org.docx4j.wml.ObjectFactory();

    Range r = doc.getRange();

    for (int x = 0; x < r.numSections(); x++) {
      Section s = r.getSection(x);

      // TODO - convert section

      for (int y = 0; y < s.numParagraphs(); y++) {
        Paragraph p = s.getParagraph(y);

        if (p.isInTable()) {
          Table t = s.getTable(p);
          int cl = numCol(t);

          log.info("Found " + t.numRows() + "x" + cl
              + " table - TODO - convert");

          handleTable(wordMLPackage, doc,t, stylesheet, documentPart, factory);

          // addTODO(factory, wmlP, "[TABLE " + + t.numRows() + "x" +
          // cl
          // + " - can't convert tables yet]");

          y += t.numParagraphs() - 1;
        }

        else {
          org.docx4j.wml.P paraToAdd = handleP(wordMLPackage, doc,
              p, stylesheet, documentPart, factory);

          documentPart.addObject(paraToAdd);
        }

      }
    }

  }

  private static int ID1 = 1;
  private static int ID2 = 2;

  private static org.docx4j.wml.P handleP(
      WordprocessingMLPackage wordMLPackage, HWPFDocument doc,
      Paragraph p, org.apache.poi.hwpf.model.StyleSheet stylesheet,
      MainDocumentPart documentPart, org.docx4j.wml.ObjectFactory factory) {

    org.docx4j.wml.P wmlP = null;

    if (p.getStyleIndex() > 0) {
      log.debug("Styled paragraph, with index: " + p.getStyleIndex());
      String styleName = stylesheet
          .getStyleDescription(p.getStyleIndex()).getName();

      log.debug(styleName);

      wmlP = documentPart.createStyledParagraphOfText(
          stripSpace(styleName), null);

    } else {
      wmlP = documentPart.createParagraphOfText(null);
    }

    // LineSpacingDescriptor lsd = p.getLineSpacing();
    // if (lsd==null || lsd.isEmpty()) {
    // // do nothing
    // } else {
    // PPr pPr = wmlP.getPPr();
    // if (pPr==null) {
    // pPr = Context.getWmlObjectFactory().createPPr();
    // wmlP.setPPr(pPr);
    // }
    // Spacing spacing =
    // Context.getWmlObjectFactory().createPPrBaseSpacing();
    // spacing.setLine(lsd._dyaLine); // not visible
    // spacing.setLineRule(STLineSpacingRule.AUTO);
    // pPr.setSpacing(spacing);
    // }

   
    for (int z = 0; z < p.numCharacterRuns(); z++) {
      // character run
      CharacterRun run = p.getCharacterRun(z);

      // No character styles defined in there??

      org.docx4j.wml.RPr rPr = null;

      if (run.isBold()) {

        // TODO - HIGH PRIORITY- handle other run properties
        // esp underline, font size
        if (rPr == null) {
          rPr = factory.createRPr();
        }

        org.docx4j.wml.BooleanDefaultTrue boldOn = factory
            .createBooleanDefaultTrue();
        boldOn.setVal(Boolean.TRUE);

        rPr.setB(boldOn);

      }

      //Process image
      if (doc instanceof HWPFDocument
          && ((HWPFDocument) doc).getPicturesTable().hasPicture(run)) {
       
        Picture picture = doc.getPicturesTable().extractPicture(run,
            true);
        Inline inline;
        try {
          BinaryPartAbstractImage imagePart = BinaryPartAbstractImage
              .createImagePart(wordMLPackage, picture.getContent());

          long cx = UnitsOfMeasurement
              .twipToEMU(Math.round((double) imagePart
                  .getImageInfo().getSize().getWidthMpt()
                  * ((double) picture
                      .getHorizontalScalingFactor() * 0.00001d))) * 2L;
          long cy = UnitsOfMeasurement
              .twipToEMU(Math.round((double) imagePart
                  .getImageInfo().getSize().getHeightMpt()
                  * ((double) picture
                      .getVerticalScalingFactor() * 0.00001d))) * 2L;

          inline = imagePart.createImageInline(null, "", ID1++,
              ID2++, cx, cy, false);

          org.docx4j.wml.R imgrun = factory.createR();
          org.docx4j.wml.Drawing drawing = factory.createDrawing();
          imgrun.getContent().add(drawing);
          drawing.getAnchorOrInline().add(inline);
          wmlP.getContent().add(imgrun);
        } catch (Exception e1) {
          // TODO Auto-generated catch block
          e1.printStackTrace();
        }

      } else {
        // character run text
        String text = run.text();

        // show us the text
        log.debug("Processing: " + text);

        String cleansed = stripNonValidXMLCharacters(text);
        // Necessary to avoid org.xml.sax.SAXParseException: An invalid
        // XML character
        // (Unicode: 0xb) was found in the element content of the
        // document.
        // when trying to open the resulting docx.
        // ie JAXB happily writes (marshals) it, but doesn't want to
        // unmarshall.

        if (!text.equals(cleansed)) {
          log.warn("Cleansed..");
        }

        org.docx4j.wml.Text t = factory.createText();
        t.setValue(cleansed);

        org.docx4j.wml.R wmlRun = factory.createR();

        if (rPr != null) {
          wmlRun.setRPr(rPr);
        }
        wmlRun.getRunContent().add(t);
        wmlP.getParagraphContent().add(wmlRun);
      }
    }

    System.out.println(XmlUtils.marshaltoString(wmlP, true, true));

    return wmlP;

  }

  private static String stripSpace(String in) {

    StringBuffer sb = new StringBuffer();

    for (int i = 0; i < in.length(); i++) {
      if (in.charAt(i) != ' ') {
        sb.append(in.charAt(i));
      }
    }

    return sb.toString();
  }

  private static void addTODO(org.docx4j.wml.ObjectFactory factory,
      org.docx4j.wml.P wmlP, String message) {

    org.docx4j.wml.Text t = factory.createText();
    t.setValue(message);

    org.docx4j.wml.R wmlRun = factory.createR();
    wmlRun.getRunContent().add(t);

    wmlP.getParagraphContent().add(wmlRun);

  }

  /**
   * This method ensures that the output String has only valid XML unicode
   * characters as specified by the XML 1.0 standard. For reference, please
   * see <a href="http://www.w3.org/TR/2000/REC-xml-20001006#NT-Char">the
   * standard</a>. This method will return an empty String if the input is
   * null or empty.
   *
   * See http://cse-mjmcl.cse.bris.ac.uk/blog/2007/02/14/1171465494443.html
   *
   * @param in
   *            The String whose non-valid characters we want to remove.
   * @return The in String, stripped of non-valid characters.
   */
  public static String stripNonValidXMLCharacters(String in) {
    StringBuffer out = new StringBuffer(); // Used to hold the output.
    char current; // Used to reference the current character.

    if (in == null || ("".equals(in)))
      return ""; // vacancy test.
    for (int i = 0; i < in.length(); i++) {
      current = in.charAt(i); // NOTE: No IndexOutOfBoundsException caught
                  // here; it should not happen.
      if ((current == 0x9) || (current == 0xA) || (current == 0xD)
          || ((current >= 0x20) && (current <= 0xD7FF))
          || ((current >= 0xE000) && (current <= 0xFFFD))
          || ((current >= 0x10000) && (current <= 0x10FFFF))) {
        out.append(current);
      } else {
        out.append("[#?]");
      }
    }
    return out.toString();
  }

  private static int numCol(Table t) {
    int col = 0;
    for (int i = 0; i < t.numRows(); i++) {
      if (t.getRow(i).numCells() > col)
        col = t.getRow(i).numCells();
    }

    return col;
  }

  private static void handleTable(WordprocessingMLPackage wordMLPackage, HWPFDocument doc,Table t,
      org.apache.poi.hwpf.model.StyleSheet stylesheet,
      MainDocumentPart documentPart, org.docx4j.wml.ObjectFactory factory) {

    org.docx4j.wml.Tbl tbl = factory.createTbl();
    documentPart.addObject(tbl);

    org.docx4j.wml.TblPr tblPr = factory.createTblPr();
    tbl.setTblPr(tblPr);
    // TODO - set tblPr values

    org.docx4j.wml.TblGrid tblGrid = factory.createTblGrid();
    tbl.setTblGrid(tblGrid);
    // TODO - set tblGrid values

    for (int i = 0; i < t.numRows(); i++) {
      TableRow tr = t.getRow(i);

      org.docx4j.wml.Tr trOut = factory.createTr();
      tbl.getEGContentRowContent().add(trOut);

      for (int j = 0; j < tr.numCells(); j++) {
        TableCell tc = tr.getCell(j);

        org.docx4j.wml.Tc tcOut = factory.createTc();
        trOut.getEGContentCellContent().add(tcOut);

        // System.out.println("CELL[" + i + "][" + j + "]=" +
        // tc.text());

        for (int y = 0; y < tc.numParagraphs(); y++) {
          Paragraph p = tc.getParagraph(y);

          // Nested tables?
          // if (p.isInTable()) ???

          // TOTO fill up parameters
          org.docx4j.wml.P paraToAdd = handleP(wordMLPackage, doc, p,
              stylesheet, documentPart, factory);

          tcOut.getEGBlockLevelElts().add(paraToAdd);

          log.debug("Added p to tc");
        }

      }
    }
  }

  public static void main(String[] args) throws Exception {

    String localPath = System.getProperty("user.dir") + "/LineSpacing.doc";

    WordprocessingMLPackage out = convert(new java.io.FileInputStream(localPath));

    // String outputfilepath = "/home/dev/tmp/test-out.docx";
    //
    // SaveToZipFile saver = new SaveToZipFile(out);
    // saver.save(outputfilepath);
    // log.info("Done - saved docx as " + outputfilepath);

  }
}
TOP

Related Classes of org.docx4j.convert.in.Doc

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.