Package org.apache.poi.hwpf

Examples of org.apache.poi.hwpf.HWPFDocument


    assertTrue(b.toString().contains("TestEndnote"));
  }

  public void testComments() {
    HWPFDocument doc = HWPFTestDataSamples.openSampleFile(filename6);
    extractor = new WordExtractor(doc);

    String[] text = extractor.getCommentsText();
    StringBuffer b = new StringBuffer();
    for (int i = 0; i < text.length; i++) {
View Full Code Here


          assertEquals(p_text1_block, extractor.getText());
       }

       // Open via a HWPFDocument
       for(DirectoryNode dir : files) {
          HWPFDocument doc = new HWPFDocument(dir);
          WordExtractor extractor = new WordExtractor(doc);
          assertEquals(p_text1_block, extractor.getText());
       }
      
       npoifsFileSystem.close();
View Full Code Here

    {
        final POIFSFileSystem poifsFileSystem = HWPFDocumentCore
                .verifyAndBuildPOIFS( inputStream );
        try
        {
            return new HWPFDocument( poifsFileSystem );
        }
        catch ( OldWordFileFormatException exc )
        {
            return new HWPFOldDocument( poifsFileSystem );
        }
View Full Code Here

        {
            System.out.println( "Word 95 not supported so far" );
            return;
        }

        HWPFDocument document = (HWPFDocument) _doc;
        Bookmarks bookmarks = document.getBookmarks();
        for ( int b = 0; b < bookmarks.getBookmarksCount(); b++ )
        {
            Bookmark bookmark = bookmarks.getBookmark( b );
            System.out.println( "[" + bookmark.getStart() + "; "
                    + bookmark.getEnd() + "): " + bookmark.getName() );
View Full Code Here

    /**
     * Check that we do the positions correctly when working with pure-ascii
     */
    public void testAsciiParts() throws Exception {
        HWPFDocument doc = HWPFTestDataSamples
                .openSampleFile("ThreeColHeadFoot.doc");
        TextPieceTable tbl = doc.getTextTable();

        // All ascii, so stored in one big lump
        assertEquals(1, tbl.getTextPieces().size());
        TextPiece tp = tbl.getTextPieces().get(0);

        assertEquals(0, tp.getStart());
        assertEquals(339, tp.getEnd());
        assertEquals(339, tp.characterLength());
        assertEquals(339, tp.bytesLength());
        assertTrue(tp.getStringBuilder().toString()
                .startsWith("This is a sample word document"));

        // Save and re-load
        HWPFDocument docB = saveAndReload(doc);
        tbl = docB.getTextTable();

        assertEquals(1, tbl.getTextPieces().size());
        tp = tbl.getTextPieces().get(0);

        assertEquals(0, tp.getStart());
View Full Code Here

        {
            System.out.println( "Word 95 not supported so far" );
            return;
        }

        HWPFDocument document = (HWPFDocument) _doc;

        for ( FieldsDocumentPart part : FieldsDocumentPart.values() )
        {
            System.out.println( "=== Document part: " + part + " ===" );
            for ( Field field : document.getFields().getFields( part ) )
            {
                System.out.println( field );
            }
        }
    }
View Full Code Here

        {
            System.out.println( "Word 95 not supported so far" );
            return;
        }

        HWPFDocument document = (HWPFDocument) _doc;

        if ( document.getOfficeDrawingsHeaders() != null )
        {
            System.out.println( "=== Document part: HEADER ===" );
            for ( OfficeDrawing officeDrawing : document
                    .getOfficeDrawingsHeaders().getOfficeDrawings() )
            {
                System.out.println( officeDrawing );
            }
        }

        if ( document.getOfficeDrawingsHeaders() != null )
        {
            System.out.println( "=== Document part: MAIN ===" );
            for ( OfficeDrawing officeDrawing : document
                    .getOfficeDrawingsMain().getOfficeDrawings() )
            {
                System.out.println( officeDrawing );
            }
        }
View Full Code Here

    /**
     * Check that we do the positions correctly when working with a mix ascii,
     * unicode file
     */
    public void testUnicodeParts() throws Exception {
        HWPFDocument doc = HWPFTestDataSamples
                .openSampleFile("HeaderFooterUnicode.doc");
        TextPieceTable tbl = doc.getTextTable();

        // In three bits, split every 512 bytes
        assertEquals(3, tbl.getTextPieces().size());
        TextPiece tpA = tbl.getTextPieces().get(0);
        TextPiece tpB = tbl.getTextPieces().get(1);
        TextPiece tpC = tbl.getTextPieces().get(2);

        assertTrue(tpA.isUnicode());
        assertTrue(tpB.isUnicode());
        assertTrue(tpC.isUnicode());

        assertEquals(256, tpA.characterLength());
        assertEquals(256, tpB.characterLength());
        assertEquals(19, tpC.characterLength());

        assertEquals(512, tpA.bytesLength());
        assertEquals(512, tpB.bytesLength());
        assertEquals(38, tpC.bytesLength());

        assertEquals(0, tpA.getStart());
        assertEquals(256, tpA.getEnd());
        assertEquals(256, tpB.getStart());
        assertEquals(512, tpB.getEnd());
        assertEquals(512, tpC.getStart());
        assertEquals(531, tpC.getEnd());

        // Save and re-load
        HWPFDocument docB = saveAndReload(doc);
        tbl = docB.getTextTable();

        assertEquals(3, tbl.getTextPieces().size());
        tpA = tbl.getTextPieces().get(0);
        tpB = tbl.getTextPieces().get(1);
        tpC = tbl.getTextPieces().get(2);
View Full Code Here

    {
        if ( _doc instanceof HWPFDocument )
        {
            System.out.println( "binary PAP pages " );

            HWPFDocument doc = (HWPFDocument) _doc;

            java.lang.reflect.Field fMainStream = HWPFDocumentCore.class
                    .getDeclaredField( "_mainStream" );
            fMainStream.setAccessible( true );
            byte[] mainStream = (byte[]) fMainStream.get( _doc );

            PlexOfCps binTable = new PlexOfCps( doc.getTableStream(), doc
                    .getFileInformationBlock().getFcPlcfbtePapx(), doc
                    .getFileInformationBlock().getLcbPlcfbtePapx(), 4 );

            List<PAPX> papxs = new ArrayList<PAPX>();

            int length = binTable.length();
            for ( int x = 0; x < length; x++ )
            {
                GenericPropertyNode node = binTable.getProperty( x );

                int pageNum = LittleEndian.getInt( node.getBytes() );
                int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE
                        * pageNum;

                PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(
                        mainStream, doc.getDataStream(), pageOffset,
                        doc.getTextTable() );

                System.out.println( "* PFKP: " + pfkp );

                for ( PAPX papx : pfkp.getPAPXs() )
                {
View Full Code Here

    protected HWPFDocument saveAndReload(HWPFDocument doc) throws Exception {
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        doc.write(baos);

        return new HWPFDocument(new ByteArrayInputStream(baos.toByteArray()));
    }
View Full Code Here

TOP

Related Classes of org.apache.poi.hwpf.HWPFDocument

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.