/*
* Copyright 2004-2011 H2 Group. Multiple-Licensed under the H2 License,
* Version 1.0, and under the Eclipse Public License, Version 1.0
* (http://h2database.com/html/license.html).
* Initial Developer: H2 Group
*/
package org.h2.build.i18n;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Properties;
import java.util.Stack;
import org.h2.build.doc.XMLParser;
import org.h2.server.web.PageParser;
import org.h2.util.IOUtils;
import org.h2.util.New;
import org.h2.util.SortedProperties;
import org.h2.util.StringUtils;
/**
* This class updates the translation source code files by parsing
* the HTML documentation. It also generates the translated HTML
* documentation.
*/
public class PrepareTranslation {
private static final String MAIN_LANGUAGE = "en";
private static final String[] EXCLUDE = { "datatypes.html", "functions.html", "grammar.html" };
/**
* This method is called when executing this application from the command
* line.
*
* @param args the command line parameters
*/
public static void main(String... args) throws Exception {
String baseDir = "src/docsrc/textbase";
prepare(baseDir, "src/main/org/h2/res", true);
prepare(baseDir, "src/main/org/h2/server/web/res", true);
// convert the txt files to properties files
PropertiesToUTF8.textUTF8ToProperties("src/docsrc/text/_docs_de.utf8.txt",
"src/docsrc/text/_docs_de.properties");
PropertiesToUTF8.textUTF8ToProperties("src/docsrc/text/_docs_ja.utf8.txt",
"src/docsrc/text/_docs_ja.properties");
// create the .jsp files and extract the text in the main language
extractFromHtml("docs/html", "src/docsrc/text");
// add missing translations and create a new baseline
prepare(baseDir, "src/docsrc/text", false);
// create the translated documentation
buildHtml("src/docsrc/text", "docs/html", "en");
// buildHtml("src/docsrc/text", "docs/html", "de");
// buildHtml("src/docsrc/text", "docs/html", "ja");
// convert the properties files back to utf8 text files, including the
// main language (to be used as a template)
PropertiesToUTF8.propertiesToTextUTF8("src/docsrc/text/_docs_en.properties",
"src/docsrc/text/_docs_en.utf8.txt");
PropertiesToUTF8.propertiesToTextUTF8("src/docsrc/text/_docs_de.properties",
"src/docsrc/text/_docs_de.utf8.txt");
PropertiesToUTF8.propertiesToTextUTF8("src/docsrc/text/_docs_ja.properties",
"src/docsrc/text/_docs_ja.utf8.txt");
// delete temporary files
for (File f : new File("src/docsrc/text").listFiles()) {
if (!f.getName().endsWith(".utf8.txt")) {
f.delete();
}
}
}
private static void buildHtml(String templateDir, String targetDir, String language) throws IOException {
File[] list = new File(templateDir).listFiles();
new File(targetDir).mkdirs();
// load the main 'translation'
String propName = templateDir + "/_docs_" + MAIN_LANGUAGE + ".properties";
Properties prop = load(propName, false);
propName = templateDir + "/_docs_" + language + ".properties";
if (!(new File(propName)).exists()) {
throw new IOException("Translation not found: " + propName);
}
Properties transProp = load(propName, false);
for (Object k : transProp.keySet()) {
String key = (String) k;
String t = transProp.getProperty(key);
// overload with translations, but not the ones starting with #
if (t.startsWith("##")) {
prop.put(key, t.substring(2));
} else if (!t.startsWith("#")) {
prop.put(key, t);
}
}
ArrayList <String>fileNames = new ArrayList<String>();
for (File f : list) {
String name = f.getName();
if (!name.endsWith(".jsp")) {
continue;
}
// remove '.jsp'
name = name.substring(0, name.length() - 4);
fileNames.add(name);
}
for (File f : list) {
String name = f.getName();
if (!name.endsWith(".jsp")) {
continue;
}
// remove '.jsp'
name = name.substring(0, name.length() - 4);
String template = IOUtils.readStringAndClose(new FileReader(templateDir + "/" + name + ".jsp"), -1);
HashMap<String, Object> map = New.hashMap();
for (Object k : prop.keySet()) {
map.put(k.toString(), prop.get(k));
}
String html = PageParser.parse(template, map);
html = StringUtils.replaceAll(html, "lang=\"" + MAIN_LANGUAGE + "\"", "lang=\"" + language + "\"");
for (String n : fileNames) {
if ("frame".equals(n)) {
// don't translate 'frame.html' to 'frame_ja.html',
// otherwise we can't switch back to English
continue;
}
html = StringUtils.replaceAll(html, n + ".html\"", n + "_" + language + ".html\"");
}
html = StringUtils.replaceAll(html, "_" + MAIN_LANGUAGE + ".html\"", ".html\"");
String target;
if (language.equals(MAIN_LANGUAGE)) {
target = targetDir + "/" + name + ".html";
} else {
target = targetDir + "/" + name + "_" + language + ".html";
}
OutputStream out = new FileOutputStream(target);
OutputStreamWriter writer = new OutputStreamWriter(out, "UTF-8");
writer.write(html);
writer.close();
}
}
private static boolean exclude(String fileName) {
for (String e : EXCLUDE) {
if (fileName.endsWith(e)) {
return true;
}
}
return false;
}
private static void extractFromHtml(String dir, String target) throws Exception {
for (File f : new File(dir).listFiles()) {
String name = f.getName();
if (!name.endsWith(".html")) {
continue;
}
if (exclude(name)) {
continue;
}
// remove '.html'
name = name.substring(0, name.length() - 5);
if (name.indexOf('_') >= 0) {
// ignore translated files
continue;
}
String template = extract(name, f, target);
FileWriter writer = new FileWriter(target + "/" + name + ".jsp");
writer.write(template);
writer.close();
}
}
// private static boolean isText(String s) {
// if (s.length() < 2) {
// return false;
// }
// for (int i = 0; i < s.length(); i++) {
// char c = s.charAt(i);
// if (!Character.isDigit(c) && c != '.' && c != '-' && c != '+') {
// return true;
// }
// }
// return false;
// }
private static String getSpace(String s, boolean start) {
if (start) {
for (int i = 0; i < s.length(); i++) {
if (!Character.isSpaceChar(s.charAt(i))) {
if (i == 0) {
return "";
}
return s.substring(0, i);
}
}
return s;
}
for (int i = s.length() - 1; i >= 0; i--) {
if (!Character.isSpaceChar(s.charAt(i))) {
if (i == s.length() - 1) {
return "";
}
return s.substring(i + 1, s.length());
}
}
// if all spaces, return an empty string to avoid duplicate spaces
return "";
}
private static String extract(String documentName, File f, String target) throws Exception {
String xml = IOUtils.readStringAndClose(new InputStreamReader(new FileInputStream(f), "UTF-8"), -1);
// the template contains ${} instead of text
StringBuilder template = new StringBuilder(xml.length());
int id = 0;
SortedProperties prop = new SortedProperties();
XMLParser parser = new XMLParser(xml);
StringBuilder buff = new StringBuilder();
Stack<String> stack = new Stack<String>();
String tag = "";
boolean ignoreEnd = false;
String nextKey = "";
// for debugging
boolean templateIsCopy = false;
while (true) {
int event = parser.next();
if (event == XMLParser.END_DOCUMENT) {
break;
} else if (event == XMLParser.CHARACTERS) {
String s = parser.getText();
if (s.trim().length() == 0) {
if (buff.length() > 0) {
buff.append(s);
} else {
template.append(s);
}
} else if ("p".equals(tag) || "li".equals(tag) || "a".equals(tag) || "td".equals(tag)
|| "th".equals(tag) || "h1".equals(tag) || "h2".equals(tag) || "h3".equals(tag)
|| "h4".equals(tag) || "body".equals(tag) || "b".equals(tag) || "code".equals(tag)
|| "form".equals(tag) || "span".equals(tag) || "em".equals(tag) || "div".equals(tag)
|| "label".equals(tag)) {
if (buff.length() == 0) {
nextKey = documentName + "_" + (1000 + id++) + "_" + tag;
template.append(getSpace(s, true));
} else if (templateIsCopy) {
buff.append(getSpace(s, true));
}
buff.append(s);
} else if ("pre".equals(tag) || "title".equals(tag) || "script".equals(tag) || "style".equals(tag)) {
// ignore, don't translate
template.append(s);
} else {
System.out.println(f.getName() + " invalid wrapper tag for text: " + tag + " text: " + s);
System.out.println(parser.getRemaining());
throw new Exception();
}
} else if (event == XMLParser.START_ELEMENT) {
stack.add(tag);
String name = parser.getName();
if ("code".equals(name) || "a".equals(name) || "b".equals(name) || "span".equals(name)) {
// keep tags if wrapped, but not if this is the wrapper
if (buff.length() > 0) {
buff.append(parser.getToken());
ignoreEnd = false;
} else {
ignoreEnd = true;
template.append(parser.getToken());
}
} else if ("p".equals(tag) || "li".equals(tag) || "td".equals(tag) || "th".equals(tag)
|| "h1".equals(tag) || "h2".equals(tag) || "h3".equals(tag) || "h4".equals(tag)
|| "body".equals(tag) || "form".equals(tag)) {
if (buff.length() > 0) {
if (templateIsCopy) {
template.append(buff.toString());
} else {
template.append("${" + nextKey + "}");
}
add(prop, nextKey, buff);
}
template.append(parser.getToken());
} else {
template.append(parser.getToken());
}
tag = name;
} else if (event == XMLParser.END_ELEMENT) {
String name = parser.getName();
if ("code".equals(name) || "a".equals(name) || "b".equals(name) || "span".equals(name)
|| "em".equals(name)) {
if (ignoreEnd) {
if (buff.length() > 0) {
if (templateIsCopy) {
template.append(buff.toString());
} else {
template.append("${" + nextKey + "}");
}
add(prop, nextKey, buff);
}
template.append(parser.getToken());
} else {
if (buff.length() > 0) {
buff.append(parser.getToken());
}
}
} else {
if (buff.length() > 0) {
if (templateIsCopy) {
template.append(buff.toString());
} else {
template.append("${" + nextKey + "}");
}
add(prop, nextKey, buff);
}
template.append(parser.getToken());
}
tag = stack.pop();
} else if (event == XMLParser.DTD) {
template.append(parser.getToken());
} else if (event == XMLParser.COMMENT) {
template.append(parser.getToken());
} else {
int eventType = parser.getEventType();
throw new Exception("Unexpected event " + eventType + " at " + parser.getRemaining());
}
// if(!xml.startsWith(template.toString())) {
// System.out.println(nextKey);
// System.out.println(template.substring(template.length()-60)
// +";");
// System.out.println(xml.substring(template.length()-60,
// template.length()));
// System.out.println(template.substring(template.length()-55)
// +";");
// System.out.println(xml.substring(template.length()-55,
// template.length()));
// break;
// }
}
new File(target).mkdirs();
String propFileName = target + "/_docs_" + MAIN_LANGUAGE + ".properties";
Properties old = load(propFileName, false);
prop.putAll(old);
store(prop, propFileName, false);
String t = template.toString();
if (templateIsCopy && !t.equals(xml)) {
for (int i = 0; i < Math.min(t.length(), xml.length()); i++) {
if (t.charAt(i) != xml.charAt(i)) {
int start = Math.max(0, i - 30), end = Math.min(i + 30, xml.length());
t = t.substring(start, end);
xml = xml.substring(start, end);
}
}
System.out.println("xml--------------------------------------------------: ");
System.out.println(xml);
System.out.println("t---------------------------------------------------: ");
System.out.println(t);
System.exit(1);
}
return t;
}
private static String clean(String text) {
if (text.indexOf('\r') < 0 && text.indexOf('\n') < 0) {
return text;
}
text = text.replace('\r', ' ');
text = text.replace('\n', ' ');
while (true) {
String s = StringUtils.replaceAll(text, " ", " ");
if (s.equals(text)) {
break;
}
text = s;
}
return text;
}
private static void add(Properties prop, String document, StringBuilder text) {
String s = clean(text.toString());
text.setLength(0);
prop.setProperty(document, s);
}
private static void prepare(String baseDir, String path, boolean utf8) throws IOException {
String suffix = utf8 ? ".prop" : ".properties";
File dir = new File(path);
File main = null;
ArrayList<File> translations = new ArrayList<File>();
for (File f : dir.listFiles()) {
if (f.getName().endsWith(suffix) && f.getName().indexOf('_') >= 0) {
if (f.getName().endsWith("_" + MAIN_LANGUAGE + suffix)) {
main = f;
} else {
translations.add(f);
}
}
}
SortedProperties p = load(main.getAbsolutePath(), utf8);
Properties base = load(baseDir + "/" + main.getName(), utf8);
store(p, main.getAbsolutePath(), utf8);
for (File trans : translations) {
String language = trans.getName();
language = language.substring(language.lastIndexOf('_') + 1, language.lastIndexOf('.'));
prepare(p, base, trans, utf8);
}
store(p, baseDir + "/" + main.getName(), utf8);
}
private static SortedProperties load(String fileName, boolean utf8) throws IOException {
if (utf8) {
String s = new String(IOUtils.readBytesAndClose(new FileInputStream(fileName), -1), "UTF-8");
return SortedProperties.fromLines(s);
}
return SortedProperties.loadProperties(fileName);
}
private static void store(SortedProperties p, String fileName, boolean utf8) throws IOException {
if (utf8) {
String s = p.toLines();
FileOutputStream f = new FileOutputStream(fileName);
f.write(s.getBytes("UTF-8"));
f.close();
} else {
p.store(fileName);
}
}
private static void prepare(Properties main, Properties base, File trans, boolean utf8) throws IOException {
SortedProperties p = load(trans.getAbsolutePath(), utf8);
Properties oldTranslations = new Properties();
for (Object k : base.keySet()) {
String key = (String) k;
String m = base.getProperty(key);
String t = p.getProperty(key);
if (t != null && !t.startsWith("#")) {
oldTranslations.setProperty(m, t);
}
}
HashSet<String> toTranslate = new HashSet<String>();
// add missing keys, using # and the value from the main file
for (Object k : main.keySet()) {
String key = (String) k;
String now = main.getProperty(key);
if (!p.containsKey(key)) {
String t = oldTranslations.getProperty(now);
if (t == null) {
// System.out.println(trans.getName() +
// ": key " + key + " not found in " +
// "translation file; added # 'translation'");
t = "#" + now;
p.put(key, t);
} else {
p.put(key, t);
}
} else {
String t = p.getProperty(key);
String last = base.getProperty(key);
if (t.startsWith("#") && !t.startsWith("##")) {
// not translated before
t = oldTranslations.getProperty(now);
if (t == null) {
t = "#" + now;
}
p.put(key, t);
} else if (last != null && !last.equals(now)) {
t = oldTranslations.getProperty(now);
if (t == null) {
// main data changed since the last run: review translation
System.out.println(trans.getName() + ": key " + key + " changed, please review; last=" + last
+ " now=" + now);
String old = p.getProperty(key);
t = "#" + now + " #" + old;
p.put(key, t);
} else {
p.put(key, t);
}
}
}
}
for (String key : toTranslate) {
String now = main.getProperty(key);
String t;
System.out.println(trans.getName() + ": key " + key + " not found in translation file; added dummy # 'translation'");
t = "#" + now;
p.put(key, t);
}
// remove keys that don't exist in the main file (deleted or typo in the key)
for (Object k : new ArrayList<Object>(p.keySet())) {
String key = (String) k;
if (!main.containsKey(key)) {
p.remove(key);
}
}
store(p, trans.getAbsolutePath(), utf8);
}
}