Package datasoul.util

Source Code of datasoul.util.OfficeTextExtractorFrame

/*
* Copyright 2005-2010 Samuel Mello
*
*   This program is free software; you can redistribute it and/or modify
*   it under the terms of the GNU General Public License as published by
*   the Free Software Foundation; version 2 or later of the License.
*
*   This program is distributed in the hope that it will be useful,
*   but WITHOUT ANY WARRANTY; without even the implied warranty of
*   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*   GNU General Public License for more details.
*
*/

/*
* OfficeTextExtractorFrame.java
*
* Created on 29/07/2010, 21:37:12
*/

package datasoul.util;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;

import javax.swing.JFileChooser;
import javax.swing.filechooser.FileNameExtensionFilter;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

import datasoul.DatasoulMainForm;
import datasoul.config.ConfigObj;
import datasoul.config.DisplayControlConfig;
import datasoul.serviceitems.song.AllSongsListTable;
import datasoul.serviceitems.song.Song;
import datasoul.serviceitems.text.TextServiceItem;

/**
*
* @author samuel
*/
public class OfficeTextExtractorFrame extends javax.swing.JFrame {

    /**
   *
   */
  private static final long serialVersionUID = -5452527503567755096L;

  /** Creates new form OfficeTextExtractorFrame */
    public OfficeTextExtractorFrame() {
        initComponents();
        DatasoulMainForm.setDatasoulIcon(this);
    }

    /** This method is called from within the constructor to
     * initialize the form.
     * WARNING: Do NOT modify this code. The content of this method is
     * always regenerated by the Form Editor.
     */
    @SuppressWarnings("unchecked")
    // <editor-fold defaultstate="collapsed" desc="Generated Code">//GEN-BEGIN:initComponents
    private void initComponents() {

        jProgressBar1 = new javax.swing.JProgressBar();
        jLabel1 = new javax.swing.JLabel();
        lblName = new javax.swing.JLabel();

        setDefaultCloseOperation(javax.swing.WindowConstants.DISPOSE_ON_CLOSE);
        java.util.ResourceBundle bundle = java.util.ResourceBundle.getBundle("datasoul/internationalize"); // NOI18N
        setTitle(bundle.getString("CONVERTING OFFICE FILES")); // NOI18N
        setResizable(false);

        jLabel1.setText(bundle.getString("CONVERTING:")); // NOI18N

        lblName.setText("jLabel2");

        javax.swing.GroupLayout layout = new javax.swing.GroupLayout(getContentPane());
        getContentPane().setLayout(layout);
        layout.setHorizontalGroup(
            layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
            .addGroup(layout.createSequentialGroup()
                .addContainerGap()
                .addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
                    .addComponent(jProgressBar1, javax.swing.GroupLayout.DEFAULT_SIZE, 470, Short.MAX_VALUE)
                    .addGroup(layout.createSequentialGroup()
                        .addComponent(jLabel1)
                        .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.UNRELATED)
                        .addComponent(lblName)))
                .addContainerGap())
        );
        layout.setVerticalGroup(
            layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
            .addGroup(layout.createSequentialGroup()
                .addContainerGap()
                .addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.BASELINE)
                    .addComponent(jLabel1)
                    .addComponent(lblName))
                .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
                .addComponent(jProgressBar1, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)
                .addContainerGap())
        );

        pack();
    }// </editor-fold>//GEN-END:initComponents

    // Variables declaration - do not modify//GEN-BEGIN:variables
    private javax.swing.JLabel jLabel1;
    private javax.swing.JProgressBar jProgressBar1;
    private javax.swing.JLabel lblName;
    // End of variables declaration//GEN-END:variables

    public void findText(Node n, StringBuffer sb){
        if (n.getNodeName().equals("text:span")){
            sb.append(n.getTextContent());
            sb.append("\n");
        }else if (n.getNodeName().equals("text:p")){
            sb.append(n.getTextContent());
            sb.append("\n");
        }else{
            NodeList childs = n.getChildNodes();
            for (int i=0; i<childs.getLength(); i++){
                if (childs.item(i).getNodeType() == Node.ELEMENT_NODE)
                findText(childs.item(i), sb);
            }
        }
    }

    public void convertPresentationToText (File f, StringBuffer sb) throws IOException {

        DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();

        //Using factory get an instance of document builder
        DocumentBuilder db;
        try {
            db = dbf.newDocumentBuilder();
        } catch (ParserConfigurationException ex) {
            throw new IOException(ex);
        }

        ZipFile zip;
        Document dom;

        // Newer format, zip
        zip = new ZipFile(f);
        try{
            ZipEntry ze = zip.getEntry("content.xml");

            if (ze == null){
                zip.close();
                throw new IOException(java.util.ResourceBundle.getBundle("datasoul/internationalize").getString("INVALID OPENOFFICE PRESENTATION FILE"));
            }

            dom = db.parse(zip.getInputStream(ze));

            NodeList pagelist = dom.getElementsByTagName("draw:page");

            for (int i=0; i<pagelist.getLength(); i++){
                StringBuffer temp = new StringBuffer();
                findText(pagelist.item(i), temp);

                // if there is any text, add it
                String text = temp.toString().trim();
                if (text.length() > 0){
                    sb.append(text);
                    sb.append("\n");
                    if (i+1 < pagelist.getLength()){
                        sb.append(TextServiceItem.CHORUS_MARK);
                        sb.append("\n");
                    }
                }
            }
        }catch(SAXException e){
            throw new IOException(e);
        }finally{
            // Close the input stream
            zip.close();
        }
    }


    public void processOfficeFile(File officeFile) throws IOException, InterruptedException{

        // Determine the name
        String origname = officeFile.getName();
        origname = origname.substring(0, origname.lastIndexOf("."));

        // Find a valid name
        int count = 0;
        String name = origname;
        String path = ConfigObj.getActiveInstance().getStoragePathSongs();
        File songFile = new File(path + File.separator + name + ".song");
        while (songFile.exists()){
            count++;
            name = origname + count;
            songFile = new File(path + File.separator + name + ".song");
        }


        // Create song object
        Song song = new Song();
        song.setTitle(name);
        song.setFilePath(songFile.getAbsolutePath());
        song.setTemplate(DisplayControlConfig.getInstance().getDefaultTemplateSong());

        // If needed, convert the file to ODP
        File processFile = officeFile;
        if (officeFile.getName().toLowerCase().endsWith("ppt") ||
                officeFile.getName().toLowerCase().endsWith("pptx")){
            OpenofficeHelper helper = new OpenofficeHelper();
            String convFile = helper.convertToODP(officeFile);
            processFile = new File(convFile);
            if (!processFile.exists()){
                throw new IOException(java.util.ResourceBundle.getBundle("datasoul/internationalize").getString("UNABLE TO CONVERT DOCUMENT: ")+officeFile.getName());
            }
            helper.dispose();
        }

        // Extract text from Office file
        StringBuffer sb = new StringBuffer();
        convertPresentationToText(processFile, sb);
        song.setText(sb.toString());

        // If needed, cleanup converted ODP file
        if (processFile != officeFile){
            processFile.delete();
        }

        // Write the file
        try{
            Node node = song.writeObject(null);
            Document doc = node.getOwnerDocument();
            doc.appendChild( node);                        // Add Root to Document
            FileOutputStream fos = new FileOutputStream(song.getFilePath());

            Source source = new DOMSource(doc);

            // Prepare the output file
            Result result = new StreamResult(fos);

            // Write the DOM document to the file
            Transformer xformer = TransformerFactory.newInstance().newTransformer();
            xformer.setOutputProperty(OutputKeys.INDENT, "yes");
            xformer.transform(source, result);

            fos.close();

        } catch(Exception e){
            ShowDialog.showWriteFileError(song.getFileName(), e);
        }

        // Update table
        AllSongsListTable.getInstance().addItem(song);

    }

    public void showConvertDialog(){

        final JFileChooser fc = new JFileChooser();
        fc.setAcceptAllFileFilterUsed(false);
        fc.setMultiSelectionEnabled(true);
        fc.addChoosableFileFilter( new FileNameExtensionFilter(java.util.ResourceBundle.getBundle("datasoul/internationalize").getString("PRESENTATION FILES")+" (*.odp,*.ppt,*.pptx)", "odp", "ppt", "pptx") );
        if (fc.showOpenDialog(null) == JFileChooser.APPROVE_OPTION) {

            jProgressBar1.setMaximum(fc.getSelectedFiles().length);
            lblName.setText("");
            setVisible(true);

            Thread process = new Thread(){
                public void run(){
                    int i = 0;
                    for (File f : fc.getSelectedFiles()){

                        // Update graphic
                        lblName.setText(f.getName());
                        jProgressBar1.setValue(++i);

                        // Process file
                        try {
                            processOfficeFile(f);
                        } catch (IOException ex) {
                            ShowDialog.showWriteFileError(f.getName(), ex);
                        } catch (InterruptedException ex2) {
                            ShowDialog.showWriteFileError(f.getName(), ex2);
                        }
                    }
                    AllSongsListTable.getInstance().sortByName();

                    OfficeTextExtractorFrame.this.dispose();
                }
            };

            process.start();
        }
    }


}
TOP

Related Classes of datasoul.util.OfficeTextExtractorFrame

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.