Source Code of org.apache.roller.weblogger.ui.struts2.editor.WeblogExport

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 *  contributor license agreements.  The ASF licenses this file to You
 * under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.  For additional information regarding
 * copyright in this work, please see the NOTICE file in the top level
 * directory of this distribution.
 */
package org.apache.roller.weblogger.ui.struts2.editor;


import java.io.IOException;
import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.LinkedHashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
// import java.net.URL;
// import java.net.MalformedURLException;


import javax.servlet.ServletOutputStream;
import javax.servlet.http.HttpServletResponse;


// import org.apache.abdera.Abdera;
// import org.apache.abdera.ext.thread.ThreadHelper;
// import org.apache.abdera.model.Entry;
// import org.apache.abdera.model.Feed;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.roller.RollerException;
import org.apache.roller.weblogger.WebloggerException;
import org.apache.roller.weblogger.business.MediaFileManager;
import org.apache.roller.weblogger.business.URLStrategy;
import org.apache.roller.weblogger.business.WeblogEntryManager;
import org.apache.roller.weblogger.business.WebloggerFactory;
import org.apache.roller.weblogger.config.WebloggerConfig;
import org.apache.roller.weblogger.pojos.MediaFile;
import org.apache.roller.weblogger.pojos.MediaFileDirectory;
import org.apache.roller.weblogger.pojos.WeblogEntry;
import org.apache.roller.weblogger.pojos.WeblogPermission;
import org.apache.roller.weblogger.pojos.wrapper.WeblogEntryCommentWrapper;
// import org.apache.roller.weblogger.pojos.wrapper.WeblogEntryTagWrapper;
import org.apache.roller.weblogger.pojos.wrapper.WeblogEntryWrapper;
import org.apache.roller.weblogger.ui.struts2.util.UIAction;
import org.apache.struts2.interceptor.ServletResponseAware;


/**
 * Provides export functionality for the author of a weblog.
 */
public final class WeblogExport extends UIAction
        implements ServletResponseAware {


    // Static Variables --------------------------------------------------------
    private static final Log log = LogFactory.getLog(WeblogExport.class);


    private static final Pattern SC_TAG_PATTERN =
            Pattern.compile("(([\\S])(/>))");


    private static final Pattern PRE_TAG_PATTERN =
            Pattern.compile("<pre>[\\s\\S]+?</pre>");


    private static final Pattern NEWLINE_PATTERN =
            Pattern.compile("\\r\\n|\\r|\\n");


    // TODO: Perhaps add enum to manage the different MT constants
    private static final String MT_SECTION_DIVIDER = "-----\n";
    private static final String MT_ENTRY_DIVIDER = "--------\n";


    private static final SimpleDateFormat MT_DATE_FORMAT =
            new SimpleDateFormat("MM/dd/yyyy HH:mm:ss");
    private static final SimpleDateFormat ATOM_ID_DATE_FORMAT =
            new SimpleDateFormat("yyyy-MM-dd");


    private static final String MT_FORMAT = "mtimport";
    private static final String MT_PLUS_FORMAT = "mtimportplus";
    private static final String ATOM_FORMAT = "atom";


    // private static final Abdera abdera = new Abdera();


    // Instance Variables ------------------------------------------------------
    private Pattern baseUrlPattern;


    private HttpServletResponse response;
    private String baseUrl;
    private String format;


    // Constructors ------------------------------------------------------------
    public WeblogExport() {
        this.actionName = "weblogExport";
        this.desiredMenu = "editor";
        this.pageTitle = "weblogExport.title";


        // Set the default format
        this.format = MT_FORMAT;
    }


    // Public Methods ----------------------------------------------------------
    /**
     * Keeps a reference to the current HTTP servlet response object.
     *
     * @param httpServletResponse The HTTP servlet response.
     */
    public void setServletResponse(HttpServletResponse httpServletResponse) {
        this.response = httpServletResponse;
    }


    /**
     * Sets the base URL to be used when replacing references to resource files.
     *
     * @param baseUrl The desired base URL.
     */
    public void setBaseUrl(String baseUrl) {
        this.baseUrl = baseUrl;
    }


    /**
     * Get the current format.
     *
     * @return The current format.
     */
    public String getFormat() {
        return format;
    }


    /**
     * Sets the desired export format.
     *
     * @param format The desired export format.
     */
    public void setFormat(String format) {
        this.format = format;
    }


    /**
     * Gets the list of supported export formats.
     *
     * @return A list of string made of of supported export formats.
     */
    public Map<String, String> getFormatOptions() {
        Map<String, String> options;
        options = new LinkedHashMap<String, String>();


        options.put(MT_FORMAT, getText("weblogExport.format.mtimport"));
        options.put(MT_PLUS_FORMAT, getText("weblogExport.format.mtimportplus"));
        // options.put(ATOM_FORMAT, getText("weblogExport.format.atom"));


        return options;
    }


    /**
     * Require the author role before allowing export functionality.
     */
    @Override
    public List<String> requiredWeblogPermissionActions() {
        return Collections.singletonList(WeblogPermission.ADMIN);
    }


    /**
     * Simply triggers the display of the export options UI.
     */
    @Override
    public String execute() throws WebloggerException {


        if (!WebloggerConfig.getBooleanProperty("weblog.export.enabled")) {
            throw new WebloggerException("ERROR: export is disabled");
        }


        // We need to gather some more info before we can attempt an export
        return INPUT;
    }


    /**
     * Returns an output stream to the client containing a text file of all
     * entries and comments. This will include draft entries as well.
     *
     * Currently the only file format supported is mtimport.
     */
    public void exportEntries() throws WebloggerException {


        if (!WebloggerConfig.getBooleanProperty("weblog.export.enabled")) {
            throw new WebloggerException("ERROR: export is disabled");
        }


        try {
            WeblogEntryManager wmgr =
                    WebloggerFactory.getWeblogger().getWeblogEntryManager();


            URLStrategy urlStrategy;
            urlStrategy = WebloggerFactory.getWeblogger().getUrlStrategy();


            List rawEntries;
            rawEntries = wmgr.getWeblogEntries(getActionWeblog(), null, null,
                    null, null, null, null, null, null, null, null, 0, -1);


            List<WeblogEntryWrapper> entries;
            entries = new ArrayList<WeblogEntryWrapper>();


            for (Object entry : rawEntries) {
                entries.add(WeblogEntryWrapper.wrap((WeblogEntry) entry,
                        urlStrategy));
            }


            // Compile the resource URL pattern using the weblog handle
            baseUrlPattern = Pattern.compile(
                    "(<[\\s\\S]+?=[\"'])(http[s]*?://[\\S]+/" +
                            getActionWeblog().getHandle() + "/resource/|/" +
                            getActionWeblog().getHandle() + "/resource/)");


            // Produce the selected output format
            String output;
            output = formatAsMoveableType(entries);
            /*
            if (format.equals(ATOM_FORMAT)) {
                output = formatAsAtom(entries);
            }
            else {
                output = formatAsMoveableType(entries);
            }
            */


            if (!response.isCommitted()) {
                response.reset();


                SimpleDateFormat dateFormat;
                dateFormat = new SimpleDateFormat("MMddyyyy'T'HHmmss");


                StringBuilder fileName;
                fileName = new StringBuilder();
                fileName.append(getActionWeblog().getHandle());
                fileName.append("-entries-");
                fileName.append(dateFormat.format(System.currentTimeMillis()));


                if (format.equals(ATOM_FORMAT)) {
                    fileName.append(".xml");
                }
                else {
                    fileName.append(".txt");
                }


                // Force the browser to download the export file
                response.setContentType(
                        "application/octet-stream; charset=utf-8");
                response.setContentLength(
                        output.getBytes("UTF-8").length);
                response.setHeader(
                        "Content-Disposition", "attachment; filename=\"" +
                                fileName.toString() + "\"");


                ServletOutputStream outputStream;
                outputStream = response.getOutputStream();
                outputStream.print(output);
                outputStream.flush();
                outputStream.close();
            }
        } catch (WebloggerException e) {
            log.error("Error looking up entries: ", e);
        } catch (IOException e) {
            log.error("Error getting output stream: ", e);
        }
    }


    /**
     * Returns an output stream to the client of all uploaded resource files as
     * a ZIP archive.
     */
    public void exportResources() {
        SimpleDateFormat dateFormat;
        dateFormat = new SimpleDateFormat("MMddyyyy'T'HHmmss");


        StringBuilder fileName;
        fileName = new StringBuilder();
        fileName.append(getActionWeblog().getHandle());
        fileName.append("-resources-");
        fileName.append(dateFormat.format(System.currentTimeMillis()));
        fileName.append(".zip");


        if (!response.isCommitted()) {
            response.reset();


            response.setContentType("application/zip");
            response.setHeader("Content-Disposition",
                    "attachment; filename=\"" + fileName.toString() + "\"");


            try {
                MediaFileManager fmgr =
                        WebloggerFactory.getWeblogger().getMediaFileManager();


                List<MediaFile> resources = new ArrayList<MediaFile>();


                // Load the contents of any sub-directories
                for (MediaFileDirectory mdir : fmgr.getMediaFileDirectories(getActionWeblog())) {
                    loadResources(resources, mdir);
                }


                // Load the files at the root of the specific upload directory
                loadResources(resources, null);


                // Create a buffer for reading the files
                byte[] buffer;
                buffer = new byte[1024];


                ServletOutputStream servletOutput;
                servletOutput = response.getOutputStream();


                ZipOutputStream zipOutput;
                zipOutput = new ZipOutputStream(servletOutput);


                for (MediaFile resource : resources) {
                    InputStream input;
                    input = resource.getInputStream();


                    // Add a new ZIP entry to output stream
                    zipOutput.putNextEntry(new ZipEntry(resource.getPath()));


                    int length;
                    while ((length = input.read(buffer)) > 0) {
                        zipOutput.write(buffer, 0, length);
                    }


                    // Cleanup the entry
                    input.close();
                    zipOutput.closeEntry();
                }


                // Cleanup the output stream
                zipOutput.flush();
                zipOutput.close();
            } catch (Exception e) {
                log.error("Error exporting resources: " + e.getMessage());
            }
        }
    }


    // Private Methods ---------------------------------------------------------
    /**
     * Formats all entries and comments, including draft entries, in the
     * Atom Syndication Format.
     *
     * @param entries A collection of entries to format.
     * @return A String of all entries and comments formatted as Atom
     */
    /*
    private String formatAsAtom(List<WeblogEntryWrapper> entries) {
        Weblog weblog;
        weblog = getActionWeblog();


        String hostname;
        URL absoluteUrl;
        try {
            absoluteUrl = new URL(weblog.getAbsoluteURL());
            hostname = absoluteUrl.getHost();
        }
        catch (MalformedURLException e) {
            log.error("Unable to parse the absolute URL: " + e.getMessage());
            hostname = "unknown";
        }


        // Feed
        StringBuilder feedId;
        feedId = new StringBuilder();


        feedId.append("tag:");
        feedId.append(hostname);
        feedId.append(",");
        feedId.append(ATOM_ID_DATE_FORMAT.format(weblog.getDateCreated()));
        feedId.append(":");
        feedId.append(weblog.getId());


        Feed feed;
        feed = abdera.newFeed();


        feed.setId(feedId.toString());
        feed.setTitle(weblog.getName());


        if (weblog.getDescription() != null &&
                !weblog.getDescription().equals("")) {
            feed.setSubtitle(weblog.getDescription());
        }


        // TODO: Maybe want to add all authors
        feed.addAuthor(weblog.getCreator().getScreenName());


        // TODO: Really need a "self" link, but what to use?
        feed.addLink(weblog.getAbsoluteURL(), "self");
        feed.addLink(weblog.getAbsoluteURL(), "alternate");


        feed.setUpdated(weblog.getLastModified());


        feed.setGenerator("http://roller.apache.org",
                WebloggerFactory.getWeblogger().getVersion(), "Apache Roller");


        // Entries
        for (WeblogEntryWrapper entryWrapper : entries) {
            StringBuilder entryId;
            entryId = new StringBuilder();


            entryId.append("tag:");
            entryId.append(hostname);
            entryId.append(",");
            entryId.append(ATOM_ID_DATE_FORMAT.format(
                    (entryWrapper.getPubTime() != null) ?
                            entryWrapper.getPubTime() :
                            entryWrapper.getUpdateTime()));
            entryId.append(":");
            entryId.append(entryWrapper.getId());


            Entry entry;
            entry = feed.addEntry();


            entry.setId(entryId.toString());
            entry.setTitle(entryWrapper.getTitle());
            entry.addAuthor(entryWrapper.getCreator().getScreenName());
            entry.addLink(entryWrapper.getPermalink(), "alternate");
            entry.setPublished(entryWrapper.getPubTime());
            entry.setUpdated(entryWrapper.getUpdateTime());


            // Category
            entry.addCategory(null, entryWrapper.getCategory().getPath(),
                    entryWrapper.getCategory().getName());


            // Tags
            for (Object tagWrapperObj : entryWrapper.getTags()) {
                WeblogEntryTagWrapper tagWrapper;
                tagWrapper = (WeblogEntryTagWrapper) tagWrapperObj;


                entry.addCategory("http://roller.apache.org/ns/tags/",
                        tagWrapper.getName(), tagWrapper.getName());
            }


            // Enclosure
            String enclosureUrl;
            enclosureUrl = entryWrapper.findEntryAttribute(
                    "att_mediacast_url");


            if (enclosureUrl != null && !enclosureUrl.equals("")) {
                String enclosureType;
                enclosureType = entryWrapper.findEntryAttribute(
                        "att_mediacast_type");


                Long enclosureLength;
                try {
                    enclosureLength = Long.parseLong(
                            entryWrapper.findEntryAttribute(
                                    "att_mediacast_length"));
                }
                catch (NumberFormatException e) {
                    log.error("Unable to parse 'att_mediacast_length': " +
                            e.getMessage());
                    enclosureLength = (long) 0;
                }


                entry.addLink(enclosureUrl, "enclosure", enclosureType, null,
                        null, enclosureLength);
            }


            // Summary
            if (entryWrapper.getSummary() != null &&
                    !entryWrapper.getSummary().equals("")) {
                entry.setSummaryAsHtml(processEntry(
                        entryWrapper.getSummary().trim()));
            }


            // Content
            if (entryWrapper.getText() != null &&
                    !entryWrapper.getText().equals("")) {
                entry.setContentAsHtml(processEntry(
                        entryWrapper.getText().trim()));
            }


            // Comments in reply to the entry
            for (Object commentObj : entryWrapper.getComments()) {
                WeblogEntryCommentWrapper commentEntryWrapper;
                commentEntryWrapper = (WeblogEntryCommentWrapper) commentObj;


                StringBuilder commentEntryId;
                commentEntryId = new StringBuilder();


                commentEntryId.append("tag:");
                commentEntryId.append(hostname);
                commentEntryId.append(",");
                commentEntryId.append(ATOM_ID_DATE_FORMAT.format(
                        commentEntryWrapper.getPostTime()));
                commentEntryId.append(":");
                commentEntryId.append(commentEntryWrapper.getId());


                Entry commentEntry;
                commentEntry = feed.addEntry();


                commentEntry.setId(commentEntryId.toString());
                commentEntry.setTitle("Re: " + entryWrapper.getTitle());


                // Author
                if (commentEntryWrapper.getName() != null &&
                        ! commentEntryWrapper.getName().equals("")) {
                    commentEntry.addAuthor(commentEntryWrapper.getName());
                }
                else {
                    commentEntry.addAuthor("Anonymous");
                }


                commentEntry.addLink(entryWrapper.getPermalink() +
                        "#comment-" + commentEntryWrapper.getTimestamp(),
                        "alternate");
                commentEntry.setPublished(commentEntryWrapper.getPostTime());
                commentEntry.setUpdated(commentEntryWrapper.getPostTime());
                commentEntry.setContentAsHtml(commentEntryWrapper.getContent());


                // The important bit
                ThreadHelper.addInReplyTo(commentEntry, entry);
            }
        }


        return feed.toString();
    }
    */


    /**
     * Formats all entries and comments, including draft entries, in the
     * Moveable Type Import Format (mtimport). This format can be imported
     * into both Moveable Type and WordPress blogging platforms.
     *
     * @param entries A collection of entries to format.
     * @return A String of all entries and comments formatted as mtimport
     */
    private String formatAsMoveableType(List<WeblogEntryWrapper> entries) {
        StringBuilder result;
        result = new StringBuilder();


        for (WeblogEntryWrapper entry : entries) {
            // Author
            result.append("AUTHOR: ");
            result.append(entry.getCreator().getScreenName());
            result.append("\n");


            // Title
            result.append("TITLE: ");
            result.append(entry.getTitle());
            result.append("\n");


            // Date
            result.append("DATE: ");
            if (entry.getStatus().equals(WeblogEntry.PUBLISHED)) {
                result.append(MT_DATE_FORMAT.format(entry.getPubTime()));
            } else {
                result.append(MT_DATE_FORMAT.format(entry.getUpdateTime()));
            }
            result.append("\n");


            // Primary category
            result.append("PRIMARY CATEGORY: ");
            result.append(entry.getCategory().getName());
            result.append("\n");


            // Status
            result.append("STATUS: ");
            if (entry.getStatus().equals(WeblogEntry.PUBLISHED)) {
                result.append("publish");
            } else {
                result.append("draft");
            }
            result.append("\n");


            // Allow comments
            result.append("ALLOW COMMENTS: ");
            if (entry.getAllowComments()) {
                result.append("1");
            } else {
                result.append("0");
            }
            result.append("\n");


            result.append(MT_SECTION_DIVIDER);


            // Body
            // TODO: May want to use transformed text here
            result.append("BODY: \n");
            result.append(processEntry(entry.getText().trim()));
            result.append("\n");


            result.append(MT_SECTION_DIVIDER);


            // Excerpt
            if (entry.getSummary() != null && !entry.getSummary().equals("")) {
                // TODO: May want to use transformed summary here
                result.append("EXCERPT: \n");
                result.append(processEntry(entry.getSummary().trim()));
                result.append("\n");


                result.append(MT_SECTION_DIVIDER);
            }


            for (Object commentObj : entry.getComments()) {
                WeblogEntryCommentWrapper comment;
                comment = (WeblogEntryCommentWrapper) commentObj;
                result.append("COMMENT: \n");


                result.append("AUTHOR: ");
                result.append(comment.getName());
                result.append("\n");


                result.append("EMAIL: ");
                result.append(comment.getEmail());
                result.append("\n");


                result.append("URL: ");
                result.append(comment.getUrl());
                result.append("\n");


                result.append("DATE: ");
                result.append(MT_DATE_FORMAT.format(comment.getPostTime()));
                result.append("\n");


                result.append(comment.getContent());
                result.append("\n");


                result.append(MT_SECTION_DIVIDER);
            }


            result.append(MT_ENTRY_DIVIDER);
        }


        return result.toString();
    }


    /**
     * Performs some pre-processing of entry text. It fixes a problem when
     * WordPress imports a self-closing HTML tag that does not have a space
     * preceding the "/>" characters. It also provides a replacment base URL
     * for all referenced resource files if requested.
     *
     * @param text The entry text to process.
     * @return The resulting String after processing has taken place.
     */
    private String processEntry(String text) {
        String result;
        result = text;


        // Some special processing is needed for mtimport
        if (format.startsWith(MT_FORMAT)) {
            // Fix self closing tags that are missing a space,
            // replaceing <foo bar="foobar"/> with <foo bar="foobar" />
            Matcher badSelfClosingTagMatcher;
            badSelfClosingTagMatcher = SC_TAG_PATTERN.matcher(result);


            result = badSelfClosingTagMatcher.replaceAll("$2 />");


            if (format.equals(MT_PLUS_FORMAT)) {
                // Replace all newlines with spaces leaving "<pre>" blocks
                // alone. WordPress will automatically convert newlines to
                // "<br />" which alters the intended formatting.
                Matcher preTagMatcher;
                preTagMatcher = PRE_TAG_PATTERN.matcher(result);


                StringBuilder replacedNewLines;
                replacedNewLines = new StringBuilder();


                int index;
                index = 0;


                while (preTagMatcher.find()) {
                    replacedNewLines.append(NEWLINE_PATTERN.matcher(
                            result.substring(index, preTagMatcher.start())).
                            replaceAll(" "));
                    replacedNewLines.append(preTagMatcher.group());
                    index = preTagMatcher.end();
                }


                replacedNewLines.append(NEWLINE_PATTERN.matcher(
                            result.substring(index, result.length())).
                            replaceAll(" "));


                result = replacedNewLines.toString();
            }
        }


        // Replace all /weblog-handle/resource/ links with a specified base URL
        if (baseUrl != null && !baseUrl.equals("")) {
            Matcher baseUrlMatcher;
            baseUrlMatcher = baseUrlPattern.matcher(result);


            try {
                result = baseUrlMatcher.replaceAll("$1" + baseUrl);
            } catch (IllegalArgumentException e) {
                log.error("Invalid base URL submitted: " + baseUrl + ": " +
                        e.getMessage());
            }
        }


        return result;
    }


    /**
     * Adds all the non-directory files for the specified path to the provided
     * List.
     *
     * @param mfiles The List in which to add the resource objects.
     * @param mdir The path from which to load. If null, the root path is used.
     */
    private void loadResources(List<MediaFile> mfiles, MediaFileDirectory mdir) {
        try {
            // Load the non-directory files
            mfiles.addAll(mdir.getMediaFiles());
        } catch (Exception e) {
            log.error("Error loading resources: " + e.getMessage());
        }
    }
}
Source Code of org.apache.roller.weblogger.ui.struts2.editor.WeblogExport

Related Classes of org.apache.roller.weblogger.ui.struts2.editor.WeblogExport