Package org.apache.poi.hwpf.usermodel

Examples of org.apache.poi.hwpf.usermodel.Paragraph


    public void testExtractFromModel() {
      Range r = doc.getRange();
     
      String[] text = new String[r.numParagraphs()];
      for(int i=0; i < r.numParagraphs(); i++) {
        Paragraph p = r.getParagraph(i);
        text[i] = p.text();
      }
     
      assertEquals(p_text.length, text.length);
      for(int i=0; i<p_text.length; i++) {
        assertEquals(p_text[i], text[i]);
View Full Code Here


        protected static String[] getParagraphText(Range r) {
                String[] ret;
                ret = new String[r.numParagraphs()];
                for (int i = 0; i < ret.length; i++) {
                        Paragraph p = r.getParagraph(i);
                        ret[i] = p.text();

                        // Fix the line ending
                        if (ret[i].endsWith("\r")) {
                                ret[i] = ret[i] + "\n";
                        }
View Full Code Here

                    } else if ("Document".equals(name)) {
                        HWPFDocument doc = new HWPFDocument(data.getData());
                        //read the word document
                        Range r = doc.getRange();
                        for(int k = 0; k < r.numParagraphs(); k++) {
                            Paragraph p = r.getParagraph(k);
                            System.out.println(p.text());
                         }

                        //save on disk
                        FileOutputStream out = new FileOutputStream(name + "-("+(j)+").doc");
                        doc.write(out);
                        out.close();
                     else {
                        FileOutputStream out = new FileOutputStream(ole.getProgID() + "-"+(j+1)+".dat");
                        InputStream dis = data.getData();
                        byte[] chunk = new byte[2048];
                        int count;
                        while ((count = dis.read(chunk)) >= 0) {
                          out.write(chunk,0,count);
                        }
                        is.close();
                        out.close();
                    }
                }

            }
        }

        //Pictures
        for (int i = 0; i < slide.length; i++) {
            Shape[] shape = slide[i].getShapes();
            for (int j = 0; j < shape.length; j++) {
                if (shape[j] instanceof Picture) {
                    Picture p = (Picture) shape[j];
                    PictureData data = p.getPictureData();
                    String name = p.getPictureName();
                    int type = data.getType();
                    String ext;
                    switch (type) {
                        case Picture.JPEG:
                            ext = ".jpg";
View Full Code Here

    int sectionLevel = 0;
    int lenParagraph = r.numParagraphs ();
    boolean inCode = false;
    for (int x = 0; x < lenParagraph; x++)
    {
      Paragraph p = r.getParagraph (x);
      String text = p.text ();
      if (text.trim ().length () == 0)
      {
        continue;
      }
      StyleDescription paragraphStyle = styleSheet.getStyleDescription (p.
        getStyleIndex ());
      String styleName = paragraphStyle.getName();
      if (styleName.startsWith ("Heading"))
      {
        if (inCode)
        {
          closeSource();
          inCode = false;
        }

        int headerLevel = Integer.parseInt (styleName.substring (8));
        if (headerLevel > sectionLevel)
        {
          openSection ();
        }
        else
        {
          for (int y = 0; y < (sectionLevel - headerLevel) + 1; y++)
          {
            closeSection ();
          }
          openSection ();
        }
        sectionLevel = headerLevel;
        openTitle ();
        writePlainText (text);
        closeTitle ();
      }
      else
      {
        int cruns = p.numCharacterRuns ();
        CharacterRun run = p.getCharacterRun (0);
        String fontName = run.getFontName();
        if (fontName.startsWith ("Courier"))
        {
          if (!inCode)
          {
            openSource ();
            inCode = true;
          }
          writePlainText (p.text());
        }
        else
        {
          if (inCode)
          {
            inCode = false;
            closeSource();
          }
          openParagraph();
          writePlainText(p.text());
          closeParagraph();
        }
      }
    }
    for (int x = 0; x < sectionLevel; x++)
View Full Code Here

      for (int x = 0; x < r.numSections(); x++)
      {
        Section s = r.getSection(x);
        for (int y = 0; y < s.numParagraphs(); y++)
        {
          Paragraph p = s.getParagraph(y);
          for (int z = 0; z < p.numCharacterRuns(); z++)
          {
            //character run
            CharacterRun run = p.getCharacterRun(z);
            //character run text
            String text = run.text();
            // show us the text
            System.out.print(text);
          }
View Full Code Here

        Range range = document.getRange();

        StyleSheet stylesheet = document.getStyleSheet();

        for (int i = 0; i < range.numParagraphs(); i++) {
            Paragraph paragraph = range.getParagraph(i);

            String styleName = stylesheet.getStyleDescription(paragraph.getStyleIndex()).getName();

            if (styleName.startsWith(HEADER_PREFIX)) {
                String rawLevelNum = styleName.substring(HEADER_PREFIX.length() + 1).trim();
                int levelNum = 0;

                try {
                    levelNum = Integer.parseInt(rawLevelNum);
                } catch (NumberFormatException nfe) {
                    log.debug("Could not parse heading level from: " + styleName);
                }

                String text = Paragraph.stripFields(paragraph.text());

                if ('\r' == text.charAt(text.length() - 1)) {
                    text = text.substring(0, text.length() - 1);
                }
View Full Code Here

    public void dumpParagraphsDom( boolean withText )
    {
        Range range = _doc.getOverallRange();
        for ( int p = 0; p < range.numParagraphs(); p++ )
        {
            Paragraph paragraph = range.getParagraph( p );
            System.out.println( p + ":\t" + paragraph.toString() );

            if ( withText )
                System.out.println( paragraph.text() );
        }
    }
View Full Code Here

        int currentListInfo = 0;

        final int paragraphs = range.numParagraphs();
        for ( int p = 0; p < paragraphs; p++ )
        {
            Paragraph paragraph = range.getParagraph( p );

            if ( paragraph.isInTable()
                    && paragraph.getTableLevel() != currentTableLevel )
            {
                if ( paragraph.getTableLevel() < currentTableLevel )
                    throw new IllegalStateException(
                            "Trying to process table cell with higher level ("
                                    + paragraph.getTableLevel()
                                    + ") than current table level ("
                                    + currentTableLevel
                                    + ") as inner table part" );

                Table table = range.getTable( paragraph );
                processTable( wordDocument, flow, table );

                p += table.numParagraphs();
                p--;
                continue;
            }

            if ( paragraph.text().equals( "\u000c" ) )
            {
                processPageBreak( wordDocument, flow );
            }

            if ( paragraph.getIlfo() != currentListInfo )
            {
                currentListInfo = paragraph.getIlfo();
            }

            if ( currentListInfo != 0 )
            {
                if ( listTables != null )
                {
                    final ListFormatOverride listFormatOverride = listTables
                            .getOverride( paragraph.getIlfo() );

                    String label = AbstractWordUtils.getBulletText( listTables,
                            paragraph, listFormatOverride.getLsid() );

                    processParagraph( wordDocument, flow, currentTableLevel,
                            paragraph, label );
                }
                else
                {
                    logger.log( POILogger.WARN,
                            "Paragraph #" + paragraph.getStartOffset() + "-"
                                    + paragraph.getEndOffset()
                                    + " has reference to list structure #"
                                    + currentListInfo
                                    + ", but listTables not defined in file" );

                    processParagraph( wordDocument, flow, currentTableLevel,
View Full Code Here

        PicturesSource pictures = new PicturesSource(document);
       
        // Do the main paragraph text
        Range r = document.getRange();
        for(int i=0; i<r.numParagraphs(); i++) {
           Paragraph p = r.getParagraph(i);
           i += handleParagraph(p, 0, r, document, pictures, pictureTable, xhtml);
        }

        // Do everything else
        for (String paragraph : wordExtractor.getFootnoteText()) {
View Full Code Here

             for(int cn=0; cn<row.numCells(); cn++) {
                TableCell cell = row.getCell(cn);
                xhtml.startElement("td");

                for(int pn=0; pn<cell.numParagraphs(); pn++) {
                   Paragraph cellP = cell.getParagraph(pn);
                   handleParagraph(cellP, p.getTableLevel(), cell, document, pictures, pictureTable, xhtml);
                }
                xhtml.endElement("td");
             }
             xhtml.endElement("tr");
View Full Code Here

TOP

Related Classes of org.apache.poi.hwpf.usermodel.Paragraph

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.