Package org.opoo.press.importer

Source Code of org.opoo.press.importer.WordPressImporter

/*
* Copyright 2013 Alex Lin.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.opoo.press.importer;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.List;
import java.util.Map;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.LineIterator;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.Namespace;
import org.dom4j.QName;
import org.dom4j.io.SAXReader;
import org.opoo.press.Category;
import org.opoo.press.Site;
import org.opoo.press.Tag;
import org.opoo.press.source.Source;

/**
* Import posts and pages from WordPress exported XML file.
*
* @author Alex Lin
*
*/
public class WordPressImporter implements Importer {
  private static final Logger log = LoggerFactory.getLogger(WordPressImporter.class);
  /**
   * xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/"
   */
  public static Namespace NS_EXCERPT = new Namespace("excerpt", "http://wordpress.org/export/1.2/excerpt/");
  /**
   * xmlns:content="http://purl.org/rss/1.0/modules/content/"
   */
  public static Namespace NS_CONTENT = new Namespace("content", "http://purl.org/rss/1.0/modules/content/");
  /**
   * xmlns:wfw="http://wellformedweb.org/CommentAPI/"
   */
  public static Namespace NS_WFW = new Namespace("wfw", "http://wellformedweb.org/CommentAPI/");
  /**
   * xmlns:dc="http://purl.org/dc/elements/1.1/"
   */
  public static Namespace NS_DC = new Namespace("dc", "http://purl.org/dc/elements/1.1/");
  /**
   * xmlns:wp="http://wordpress.org/export/1.2/"
   */
  public static Namespace NS_WP = new Namespace("wp", "http://wordpress.org/export/1.2/");
  /**
   * DateFormat for parse the post date in WordPress XML exported file.
   */
  private static final SimpleDateFormat DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
  /**
   * for the file name.
   */
  private static final SimpleDateFormat NAME_FORMAT = new SimpleDateFormat("yyyy-MM-dd");
 
  public WordPressImporter(){
  }
 
 
  @Override
  public void doImport(Site site, Map<String,Object> props) throws ImportException{
    String fileStr = (String) props.get("file");
    File file = new File(fileStr);
   
    if(!file.exists()){
      throw new ImportException("File not found: " + file);
    }
   
    try {
      importFromtFile(site, file, props);
    } catch (DocumentException e) {
      throw new ImportException(e);
    } catch (FileNotFoundException e) {
      throw new ImportException(e);
    } catch (ParseException e) {
      throw new ImportException(e);
    } catch (IOException e) {
      throw new ImportException(e);
    }
  }
 
  @SuppressWarnings("unchecked")
  private void importFromtFile(Site site, File file, Map<String,Object> props) throws DocumentException, ParseException, IOException {
    FileInputStream fileInputStream = new FileInputStream(file);
    InputStreamReader inputStreamReader = new InputStreamReader(fileInputStream, "UTF-8");
    Reader reader = new BufferedReader(inputStreamReader);
    try{
      SAXReader xmlReader = new SAXReader();
      Document doc = xmlReader.read(reader);
     
      Element root = doc.getRootElement();
      Element channel = root.element("channel");
   
      List<Element> list = channel.elements("item");
      for(Element e: list){
        String postType = e.elementText("post_type");
        if("post".equals(postType) || "page".equals(postType)){
          importPostOrPage(site, postType, e, props);
        }
      }
    }finally{
      IOUtils.closeQuietly(reader);
      IOUtils.closeQuietly(inputStreamReader);
      IOUtils.closeQuietly(fileInputStream);
    }
  }

  @SuppressWarnings("unchecked")
  private void importPostOrPage(Site site, String postType, Element e, Map<String,Object> props) throws ParseException, IOException {
    boolean includeDrafts = "true".equals(props.get("include_drafts"));

    String status = e.elementTextTrim("status");
    boolean published = "publish".equals(status);
    String title = e.elementTextTrim("title");
    String name = e.elementTextTrim("post_name");
    boolean comments = "open".equals(e.elementTextTrim("comment_status"));
    String date = e.elementTextTrim("post_date");
    String author = e.elementTextTrim("creator");
    String postid = e.elementTextTrim("post_id");
    String link = e.elementTextTrim("link");
    String excerpt = e.elementTextTrim(new QName("encoded", NS_EXCERPT));
    String content = e.elementText(new QName("encoded", NS_CONTENT));
   
    boolean isPage = "page".equals(postType);
    Date parse = DATE_FORMAT.parse(date);
   
    String postname = name;
    if(postname.startsWith("%")){
      postname = title;
    }
    String url = buildURL(parse, postname, postid, author, props);
   
    if(!includeDrafts && !published){
      log.info(name + " is draft, skiping import. Set 'include_drafts' peroperty to enabled import drafts.");
      return;
    }
   
    //excerpt
    boolean excerpted = StringUtils.isNotBlank(excerpt);
    StringBuilder excerptBuilder = excerpted ? null : new StringBuilder();
    if(isPage){
      excerptBuilder = null;
    }
   
    //replace the content
    Map<String,String> rp = (Map<String, String>) props.get("content_replacements");
    if(rp != null){
      for(Map.Entry<String, String> en: rp.entrySet()){
        content = StringUtils.replace(content, en.getKey(), en.getValue());
      }
    }
   
    List<String> contentLines = processContent(content, excerptBuilder);
   
    if(!isPage && !excerpted){
      excerpt = excerptBuilder.toString();
    }
   
    //Categories and tags
    List<String> cats = new ArrayList<String>();
    List<String> tags = new ArrayList<String>();
    List<Element> list = e.elements("category");
    for(Element n: list){
      String domain = n.attributeValue("domain");
      String nicename = n.attributeValue("nicename");
      String text = n.getTextTrim();

      if("post_tag".equals(domain)){
        String stringTag = text;
        Tag tag = site.getTag(nicename);
        if(tag != null){
          stringTag = tag.getSlug();
        }
       
        tags.add(stringTag);
      }
      if("category".equals(domain)){
        String stringCategory = text;
        Category category = site.getCategory(nicename);
        if(category != null){
          stringCategory = category.getNicename();
        }
        cats.add(stringCategory);
      }
    }
   
    List<String> lines = new ArrayList<String>();
    lines.add(Source.TRIPLE_DASHED_LINE);
    lines.add("layout: " + postType);
    lines.add("title: '" + title + "'");
//    lines.add("name", name);
    lines.add("comments: " +  comments);
    lines.add("published: " + published);
    lines.add("date: '" + date + "'");
   
    if("true".equals(props.get("include_author"))){
      lines.add("author: " + author);
    }
   
    lines.add("link: " + link);
    lines.add("post_id: " + postid);
    if(url != null){
      lines.add("url: '" + url + "'");
    }

    if(StringUtils.isNotBlank(excerpt)){
      excerpt = excerpt.replace('"', '\'');
      lines.add("excerpt: \"" + excerpt + "\"");
    }
   
    if(!cats.isEmpty()){
      lines.add("categories: " + cats);
    }
    if(!tags.isEmpty()){
      lines.add("tags: " + tags);
    }
   
    List<Element> meta = e.elements("postmeta");
    for(Element n: meta){
      String key = n.elementTextTrim("meta_key");
      String value = n.elementTextTrim("meta_value");
      if(key.startsWith("_")){
        log.debug("It's a WordPress intenal meta, skip parse: " + key);
        continue;
      }
      lines.add(key + ": \"" + value + "\"");
    }
   
   
    lines.add(Source.TRIPLE_DASHED_LINE);
    lines.addAll(contentLines);
   
    //filename
    String filename = NAME_FORMAT.format(parse) + "-" + postname + ".html";

    String importDir = (String) props.get("import_dir");
    if(StringUtils.isBlank(importDir)){
      importDir = "wordpress";
    }
   
    File dir = site.getSource();
    File file = new File(dir, importDir + "/" + filename);

    if(!file.getParentFile().exists()){
      file.getParentFile().mkdir();
    }
    log.info("Writing file " + file);
    FileUtils.writeLines(file, "UTF-8", lines);
  }
 
  private List<String> processContent(String content, StringBuilder excerptBuilder) {
    boolean excerptFound = false;
    List<String> contentLines = new ArrayList<String>();
    boolean lastLineIsBlank = true;
    LineIterator it = IOUtils.lineIterator(new StringReader(content));
    int preCount = 0;
    while(it.hasNext()){
      String line = it.next();
      boolean isBlank = StringUtils.isBlank(line);
      if(!isBlank){
        String lower = line.toLowerCase().trim();
        if(preCount == 0 && lastLineIsBlank && !lower.startsWith("<h") && !lower.startsWith("<!--more-->")){
          line = "<p>" + line;
        }
        contentLines.add(line);
       
        if(excerptBuilder != null){
          int indexOf = line.indexOf("<!--more-->");
          if(indexOf >= 0){
            excerptBuilder.append(line.substring(0, indexOf));
            excerptFound = true;
          }else{
            if(!excerptFound){
              excerptBuilder.append(line);
            }
          }
        }
        if(lower.contains("<pre>") || lower.contains("<pre ")){
          preCount++;
        }
        if(lower.contains("</pre>")){
          preCount--;
        }
        lastLineIsBlank = false;
      }else{
        lastLineIsBlank = true;
        contentLines.add(line);
      }
    }
    return contentLines;
  }
 
  /**
   *  Build post/page url.
   *
   * <p>Permalink details: http://codex.wordpress.org/Using_Permalinks.
   * <p> %year%, %monthnum%, %day%, %hour%, %minute%, %second%, %postname%, %post_id%,
   * %category%,%tag%,%author%</p>
   * @param date
   * @param postname
   * @param post_id
   * @param author
   * @param props
   * @return page/post url, return null if no 'permalink_style' defined.
   */
  private String buildURL(Date date, String postname, String post_id, String author, Map<String,Object> props){
    String permalinkStyle = (String) props.get("permalink_style");
    if(StringUtils.isBlank(permalinkStyle)){
      return null;
    }
   
    Calendar c = Calendar.getInstance();
    c.setTime(date);
    int year = c.get(Calendar.YEAR);
    int monthnum = c.get(Calendar.MONTH) + 1;
    int day = c.get(Calendar.DAY_OF_MONTH);
    int hour = c.get(Calendar.HOUR_OF_DAY);
    int minute = c.get(Calendar.MINUTE);
    int second = c.get(Calendar.SECOND);
   
    permalinkStyle = StringUtils.replace(permalinkStyle, "%postname%", postname);
    permalinkStyle = StringUtils.replace(permalinkStyle, "%post_id%", post_id);
    permalinkStyle = StringUtils.replace(permalinkStyle, "%author%", author);
    permalinkStyle = StringUtils.replace(permalinkStyle, "%year%", year + "");
    permalinkStyle = StringUtils.replace(permalinkStyle, "%monthnum%", StringUtils.leftPad(monthnum + "", 2, '0'));
    permalinkStyle = StringUtils.replace(permalinkStyle, "%day%", StringUtils.leftPad(day + "", 2, '0'));
    permalinkStyle = StringUtils.replace(permalinkStyle, "%hour%", StringUtils.leftPad(hour + "", 2, '0'));
    permalinkStyle = StringUtils.replace(permalinkStyle, "%minute%", StringUtils.leftPad(minute + "", 2, '0'));
    permalinkStyle = StringUtils.replace(permalinkStyle, "%second%", StringUtils.leftPad(second + "", 2, '0'));
   
    return permalinkStyle;
  }
}
TOP

Related Classes of org.opoo.press.importer.WordPressImporter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.