/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. The ASF licenses this file to You
* under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. For additional information regarding
* copyright in this work, please see the NOTICE file in the top level
* directory of this distribution.
*/
package org.apache.roller.weblogger.ui.struts2.editor;
import java.io.IOException;
import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.LinkedHashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
// import java.net.URL;
// import java.net.MalformedURLException;
import javax.servlet.ServletOutputStream;
import javax.servlet.http.HttpServletResponse;
// import org.apache.abdera.Abdera;
// import org.apache.abdera.ext.thread.ThreadHelper;
// import org.apache.abdera.model.Entry;
// import org.apache.abdera.model.Feed;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.roller.RollerException;
import org.apache.roller.weblogger.WebloggerException;
import org.apache.roller.weblogger.business.MediaFileManager;
import org.apache.roller.weblogger.business.URLStrategy;
import org.apache.roller.weblogger.business.WeblogEntryManager;
import org.apache.roller.weblogger.business.WebloggerFactory;
import org.apache.roller.weblogger.config.WebloggerConfig;
import org.apache.roller.weblogger.pojos.MediaFile;
import org.apache.roller.weblogger.pojos.MediaFileDirectory;
import org.apache.roller.weblogger.pojos.WeblogEntry;
import org.apache.roller.weblogger.pojos.WeblogPermission;
import org.apache.roller.weblogger.pojos.wrapper.WeblogEntryCommentWrapper;
// import org.apache.roller.weblogger.pojos.wrapper.WeblogEntryTagWrapper;
import org.apache.roller.weblogger.pojos.wrapper.WeblogEntryWrapper;
import org.apache.roller.weblogger.ui.struts2.util.UIAction;
import org.apache.struts2.interceptor.ServletResponseAware;
/**
* Provides export functionality for the author of a weblog.
*/
public final class WeblogExport extends UIAction
implements ServletResponseAware {
// Static Variables --------------------------------------------------------
private static final Log log = LogFactory.getLog(WeblogExport.class);
private static final Pattern SC_TAG_PATTERN =
Pattern.compile("(([\\S])(/>))");
private static final Pattern PRE_TAG_PATTERN =
Pattern.compile("<pre>[\\s\\S]+?</pre>");
private static final Pattern NEWLINE_PATTERN =
Pattern.compile("\\r\\n|\\r|\\n");
// TODO: Perhaps add enum to manage the different MT constants
private static final String MT_SECTION_DIVIDER = "-----\n";
private static final String MT_ENTRY_DIVIDER = "--------\n";
private static final SimpleDateFormat MT_DATE_FORMAT =
new SimpleDateFormat("MM/dd/yyyy HH:mm:ss");
private static final SimpleDateFormat ATOM_ID_DATE_FORMAT =
new SimpleDateFormat("yyyy-MM-dd");
private static final String MT_FORMAT = "mtimport";
private static final String MT_PLUS_FORMAT = "mtimportplus";
private static final String ATOM_FORMAT = "atom";
// private static final Abdera abdera = new Abdera();
// Instance Variables ------------------------------------------------------
private Pattern baseUrlPattern;
private HttpServletResponse response;
private String baseUrl;
private String format;
// Constructors ------------------------------------------------------------
public WeblogExport() {
this.actionName = "weblogExport";
this.desiredMenu = "editor";
this.pageTitle = "weblogExport.title";
// Set the default format
this.format = MT_FORMAT;
}
// Public Methods ----------------------------------------------------------
/**
* Keeps a reference to the current HTTP servlet response object.
*
* @param httpServletResponse The HTTP servlet response.
*/
public void setServletResponse(HttpServletResponse httpServletResponse) {
this.response = httpServletResponse;
}
/**
* Sets the base URL to be used when replacing references to resource files.
*
* @param baseUrl The desired base URL.
*/
public void setBaseUrl(String baseUrl) {
this.baseUrl = baseUrl;
}
/**
* Get the current format.
*
* @return The current format.
*/
public String getFormat() {
return format;
}
/**
* Sets the desired export format.
*
* @param format The desired export format.
*/
public void setFormat(String format) {
this.format = format;
}
/**
* Gets the list of supported export formats.
*
* @return A list of string made of of supported export formats.
*/
public Map<String, String> getFormatOptions() {
Map<String, String> options;
options = new LinkedHashMap<String, String>();
options.put(MT_FORMAT, getText("weblogExport.format.mtimport"));
options.put(MT_PLUS_FORMAT, getText("weblogExport.format.mtimportplus"));
// options.put(ATOM_FORMAT, getText("weblogExport.format.atom"));
return options;
}
/**
* Require the author role before allowing export functionality.
*/
@Override
public List<String> requiredWeblogPermissionActions() {
return Collections.singletonList(WeblogPermission.ADMIN);
}
/**
* Simply triggers the display of the export options UI.
*/
@Override
public String execute() throws WebloggerException {
if (!WebloggerConfig.getBooleanProperty("weblog.export.enabled")) {
throw new WebloggerException("ERROR: export is disabled");
}
// We need to gather some more info before we can attempt an export
return INPUT;
}
/**
* Returns an output stream to the client containing a text file of all
* entries and comments. This will include draft entries as well.
*
* Currently the only file format supported is mtimport.
*/
public void exportEntries() throws WebloggerException {
if (!WebloggerConfig.getBooleanProperty("weblog.export.enabled")) {
throw new WebloggerException("ERROR: export is disabled");
}
try {
WeblogEntryManager wmgr =
WebloggerFactory.getWeblogger().getWeblogEntryManager();
URLStrategy urlStrategy;
urlStrategy = WebloggerFactory.getWeblogger().getUrlStrategy();
List rawEntries;
rawEntries = wmgr.getWeblogEntries(getActionWeblog(), null, null,
null, null, null, null, null, null, null, null, 0, -1);
List<WeblogEntryWrapper> entries;
entries = new ArrayList<WeblogEntryWrapper>();
for (Object entry : rawEntries) {
entries.add(WeblogEntryWrapper.wrap((WeblogEntry) entry,
urlStrategy));
}
// Compile the resource URL pattern using the weblog handle
baseUrlPattern = Pattern.compile(
"(<[\\s\\S]+?=[\"'])(http[s]*?://[\\S]+/" +
getActionWeblog().getHandle() + "/resource/|/" +
getActionWeblog().getHandle() + "/resource/)");
// Produce the selected output format
String output;
output = formatAsMoveableType(entries);
/*
if (format.equals(ATOM_FORMAT)) {
output = formatAsAtom(entries);
}
else {
output = formatAsMoveableType(entries);
}
*/
if (!response.isCommitted()) {
response.reset();
SimpleDateFormat dateFormat;
dateFormat = new SimpleDateFormat("MMddyyyy'T'HHmmss");
StringBuilder fileName;
fileName = new StringBuilder();
fileName.append(getActionWeblog().getHandle());
fileName.append("-entries-");
fileName.append(dateFormat.format(System.currentTimeMillis()));
if (format.equals(ATOM_FORMAT)) {
fileName.append(".xml");
}
else {
fileName.append(".txt");
}
// Force the browser to download the export file
response.setContentType(
"application/octet-stream; charset=utf-8");
response.setContentLength(
output.getBytes("UTF-8").length);
response.setHeader(
"Content-Disposition", "attachment; filename=\"" +
fileName.toString() + "\"");
ServletOutputStream outputStream;
outputStream = response.getOutputStream();
outputStream.print(output);
outputStream.flush();
outputStream.close();
}
} catch (WebloggerException e) {
log.error("Error looking up entries: ", e);
} catch (IOException e) {
log.error("Error getting output stream: ", e);
}
}
/**
* Returns an output stream to the client of all uploaded resource files as
* a ZIP archive.
*/
public void exportResources() {
SimpleDateFormat dateFormat;
dateFormat = new SimpleDateFormat("MMddyyyy'T'HHmmss");
StringBuilder fileName;
fileName = new StringBuilder();
fileName.append(getActionWeblog().getHandle());
fileName.append("-resources-");
fileName.append(dateFormat.format(System.currentTimeMillis()));
fileName.append(".zip");
if (!response.isCommitted()) {
response.reset();
response.setContentType("application/zip");
response.setHeader("Content-Disposition",
"attachment; filename=\"" + fileName.toString() + "\"");
try {
MediaFileManager fmgr =
WebloggerFactory.getWeblogger().getMediaFileManager();
List<MediaFile> resources = new ArrayList<MediaFile>();
// Load the contents of any sub-directories
for (MediaFileDirectory mdir : fmgr.getMediaFileDirectories(getActionWeblog())) {
loadResources(resources, mdir);
}
// Load the files at the root of the specific upload directory
loadResources(resources, null);
// Create a buffer for reading the files
byte[] buffer;
buffer = new byte[1024];
ServletOutputStream servletOutput;
servletOutput = response.getOutputStream();
ZipOutputStream zipOutput;
zipOutput = new ZipOutputStream(servletOutput);
for (MediaFile resource : resources) {
InputStream input;
input = resource.getInputStream();
// Add a new ZIP entry to output stream
zipOutput.putNextEntry(new ZipEntry(resource.getPath()));
int length;
while ((length = input.read(buffer)) > 0) {
zipOutput.write(buffer, 0, length);
}
// Cleanup the entry
input.close();
zipOutput.closeEntry();
}
// Cleanup the output stream
zipOutput.flush();
zipOutput.close();
} catch (Exception e) {
log.error("Error exporting resources: " + e.getMessage());
}
}
}
// Private Methods ---------------------------------------------------------
/**
* Formats all entries and comments, including draft entries, in the
* Atom Syndication Format.
*
* @param entries A collection of entries to format.
* @return A String of all entries and comments formatted as Atom
*/
/*
private String formatAsAtom(List<WeblogEntryWrapper> entries) {
Weblog weblog;
weblog = getActionWeblog();
String hostname;
URL absoluteUrl;
try {
absoluteUrl = new URL(weblog.getAbsoluteURL());
hostname = absoluteUrl.getHost();
}
catch (MalformedURLException e) {
log.error("Unable to parse the absolute URL: " + e.getMessage());
hostname = "unknown";
}
// Feed
StringBuilder feedId;
feedId = new StringBuilder();
feedId.append("tag:");
feedId.append(hostname);
feedId.append(",");
feedId.append(ATOM_ID_DATE_FORMAT.format(weblog.getDateCreated()));
feedId.append(":");
feedId.append(weblog.getId());
Feed feed;
feed = abdera.newFeed();
feed.setId(feedId.toString());
feed.setTitle(weblog.getName());
if (weblog.getDescription() != null &&
!weblog.getDescription().equals("")) {
feed.setSubtitle(weblog.getDescription());
}
// TODO: Maybe want to add all authors
feed.addAuthor(weblog.getCreator().getScreenName());
// TODO: Really need a "self" link, but what to use?
feed.addLink(weblog.getAbsoluteURL(), "self");
feed.addLink(weblog.getAbsoluteURL(), "alternate");
feed.setUpdated(weblog.getLastModified());
feed.setGenerator("http://roller.apache.org",
WebloggerFactory.getWeblogger().getVersion(), "Apache Roller");
// Entries
for (WeblogEntryWrapper entryWrapper : entries) {
StringBuilder entryId;
entryId = new StringBuilder();
entryId.append("tag:");
entryId.append(hostname);
entryId.append(",");
entryId.append(ATOM_ID_DATE_FORMAT.format(
(entryWrapper.getPubTime() != null) ?
entryWrapper.getPubTime() :
entryWrapper.getUpdateTime()));
entryId.append(":");
entryId.append(entryWrapper.getId());
Entry entry;
entry = feed.addEntry();
entry.setId(entryId.toString());
entry.setTitle(entryWrapper.getTitle());
entry.addAuthor(entryWrapper.getCreator().getScreenName());
entry.addLink(entryWrapper.getPermalink(), "alternate");
entry.setPublished(entryWrapper.getPubTime());
entry.setUpdated(entryWrapper.getUpdateTime());
// Category
entry.addCategory(null, entryWrapper.getCategory().getPath(),
entryWrapper.getCategory().getName());
// Tags
for (Object tagWrapperObj : entryWrapper.getTags()) {
WeblogEntryTagWrapper tagWrapper;
tagWrapper = (WeblogEntryTagWrapper) tagWrapperObj;
entry.addCategory("http://roller.apache.org/ns/tags/",
tagWrapper.getName(), tagWrapper.getName());
}
// Enclosure
String enclosureUrl;
enclosureUrl = entryWrapper.findEntryAttribute(
"att_mediacast_url");
if (enclosureUrl != null && !enclosureUrl.equals("")) {
String enclosureType;
enclosureType = entryWrapper.findEntryAttribute(
"att_mediacast_type");
Long enclosureLength;
try {
enclosureLength = Long.parseLong(
entryWrapper.findEntryAttribute(
"att_mediacast_length"));
}
catch (NumberFormatException e) {
log.error("Unable to parse 'att_mediacast_length': " +
e.getMessage());
enclosureLength = (long) 0;
}
entry.addLink(enclosureUrl, "enclosure", enclosureType, null,
null, enclosureLength);
}
// Summary
if (entryWrapper.getSummary() != null &&
!entryWrapper.getSummary().equals("")) {
entry.setSummaryAsHtml(processEntry(
entryWrapper.getSummary().trim()));
}
// Content
if (entryWrapper.getText() != null &&
!entryWrapper.getText().equals("")) {
entry.setContentAsHtml(processEntry(
entryWrapper.getText().trim()));
}
// Comments in reply to the entry
for (Object commentObj : entryWrapper.getComments()) {
WeblogEntryCommentWrapper commentEntryWrapper;
commentEntryWrapper = (WeblogEntryCommentWrapper) commentObj;
StringBuilder commentEntryId;
commentEntryId = new StringBuilder();
commentEntryId.append("tag:");
commentEntryId.append(hostname);
commentEntryId.append(",");
commentEntryId.append(ATOM_ID_DATE_FORMAT.format(
commentEntryWrapper.getPostTime()));
commentEntryId.append(":");
commentEntryId.append(commentEntryWrapper.getId());
Entry commentEntry;
commentEntry = feed.addEntry();
commentEntry.setId(commentEntryId.toString());
commentEntry.setTitle("Re: " + entryWrapper.getTitle());
// Author
if (commentEntryWrapper.getName() != null &&
! commentEntryWrapper.getName().equals("")) {
commentEntry.addAuthor(commentEntryWrapper.getName());
}
else {
commentEntry.addAuthor("Anonymous");
}
commentEntry.addLink(entryWrapper.getPermalink() +
"#comment-" + commentEntryWrapper.getTimestamp(),
"alternate");
commentEntry.setPublished(commentEntryWrapper.getPostTime());
commentEntry.setUpdated(commentEntryWrapper.getPostTime());
commentEntry.setContentAsHtml(commentEntryWrapper.getContent());
// The important bit
ThreadHelper.addInReplyTo(commentEntry, entry);
}
}
return feed.toString();
}
*/
/**
* Formats all entries and comments, including draft entries, in the
* Moveable Type Import Format (mtimport). This format can be imported
* into both Moveable Type and WordPress blogging platforms.
*
* @param entries A collection of entries to format.
* @return A String of all entries and comments formatted as mtimport
*/
private String formatAsMoveableType(List<WeblogEntryWrapper> entries) {
StringBuilder result;
result = new StringBuilder();
for (WeblogEntryWrapper entry : entries) {
// Author
result.append("AUTHOR: ");
result.append(entry.getCreator().getScreenName());
result.append("\n");
// Title
result.append("TITLE: ");
result.append(entry.getTitle());
result.append("\n");
// Date
result.append("DATE: ");
if (entry.getStatus().equals(WeblogEntry.PUBLISHED)) {
result.append(MT_DATE_FORMAT.format(entry.getPubTime()));
} else {
result.append(MT_DATE_FORMAT.format(entry.getUpdateTime()));
}
result.append("\n");
// Primary category
result.append("PRIMARY CATEGORY: ");
result.append(entry.getCategory().getName());
result.append("\n");
// Status
result.append("STATUS: ");
if (entry.getStatus().equals(WeblogEntry.PUBLISHED)) {
result.append("publish");
} else {
result.append("draft");
}
result.append("\n");
// Allow comments
result.append("ALLOW COMMENTS: ");
if (entry.getAllowComments()) {
result.append("1");
} else {
result.append("0");
}
result.append("\n");
result.append(MT_SECTION_DIVIDER);
// Body
// TODO: May want to use transformed text here
result.append("BODY: \n");
result.append(processEntry(entry.getText().trim()));
result.append("\n");
result.append(MT_SECTION_DIVIDER);
// Excerpt
if (entry.getSummary() != null && !entry.getSummary().equals("")) {
// TODO: May want to use transformed summary here
result.append("EXCERPT: \n");
result.append(processEntry(entry.getSummary().trim()));
result.append("\n");
result.append(MT_SECTION_DIVIDER);
}
for (Object commentObj : entry.getComments()) {
WeblogEntryCommentWrapper comment;
comment = (WeblogEntryCommentWrapper) commentObj;
result.append("COMMENT: \n");
result.append("AUTHOR: ");
result.append(comment.getName());
result.append("\n");
result.append("EMAIL: ");
result.append(comment.getEmail());
result.append("\n");
result.append("URL: ");
result.append(comment.getUrl());
result.append("\n");
result.append("DATE: ");
result.append(MT_DATE_FORMAT.format(comment.getPostTime()));
result.append("\n");
result.append(comment.getContent());
result.append("\n");
result.append(MT_SECTION_DIVIDER);
}
result.append(MT_ENTRY_DIVIDER);
}
return result.toString();
}
/**
* Performs some pre-processing of entry text. It fixes a problem when
* WordPress imports a self-closing HTML tag that does not have a space
* preceding the "/>" characters. It also provides a replacment base URL
* for all referenced resource files if requested.
*
* @param text The entry text to process.
* @return The resulting String after processing has taken place.
*/
private String processEntry(String text) {
String result;
result = text;
// Some special processing is needed for mtimport
if (format.startsWith(MT_FORMAT)) {
// Fix self closing tags that are missing a space,
// replaceing <foo bar="foobar"/> with <foo bar="foobar" />
Matcher badSelfClosingTagMatcher;
badSelfClosingTagMatcher = SC_TAG_PATTERN.matcher(result);
result = badSelfClosingTagMatcher.replaceAll("$2 />");
if (format.equals(MT_PLUS_FORMAT)) {
// Replace all newlines with spaces leaving "<pre>" blocks
// alone. WordPress will automatically convert newlines to
// "<br />" which alters the intended formatting.
Matcher preTagMatcher;
preTagMatcher = PRE_TAG_PATTERN.matcher(result);
StringBuilder replacedNewLines;
replacedNewLines = new StringBuilder();
int index;
index = 0;
while (preTagMatcher.find()) {
replacedNewLines.append(NEWLINE_PATTERN.matcher(
result.substring(index, preTagMatcher.start())).
replaceAll(" "));
replacedNewLines.append(preTagMatcher.group());
index = preTagMatcher.end();
}
replacedNewLines.append(NEWLINE_PATTERN.matcher(
result.substring(index, result.length())).
replaceAll(" "));
result = replacedNewLines.toString();
}
}
// Replace all /weblog-handle/resource/ links with a specified base URL
if (baseUrl != null && !baseUrl.equals("")) {
Matcher baseUrlMatcher;
baseUrlMatcher = baseUrlPattern.matcher(result);
try {
result = baseUrlMatcher.replaceAll("$1" + baseUrl);
} catch (IllegalArgumentException e) {
log.error("Invalid base URL submitted: " + baseUrl + ": " +
e.getMessage());
}
}
return result;
}
/**
* Adds all the non-directory files for the specified path to the provided
* List.
*
* @param mfiles The List in which to add the resource objects.
* @param mdir The path from which to load. If null, the root path is used.
*/
private void loadResources(List<MediaFile> mfiles, MediaFileDirectory mdir) {
try {
// Load the non-directory files
mfiles.addAll(mdir.getMediaFiles());
} catch (Exception e) {
log.error("Error loading resources: " + e.getMessage());
}
}
}