Package

Source Code of GenerateNFKC

/**
* Copyright (C) 2004, 2005, 2006, 2007  Free Software Foundation, Inc.
*
* Author: Oliver Hitz
*
* This file is part of GNU Libidn.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.Iterator;
import java.util.StringTokenizer;
import java.util.regex.Pattern;

public class GenerateNFKC
{
  static String stripComment(String in)
  {
    int c = in.indexOf('#');
    if (c == -1) {
      return in;
    } else {
      return in.substring(0, c);
    }
  }

  static String[] split(String in, char sep)
  {
    StringBuffer sb = new StringBuffer(in);
    int c = 0;
    for (int i = 0; i < sb.length(); i++) {
      if (sb.charAt(i) == sep) {
  c++;
      }
    }

    String out[] = new String[c+1];
    c = 0;
    int l = 0;
    for (int i = 0; i < sb.length(); i++) {
      if (sb.charAt(i) == sep) {
  if (l >= i) {
    out[c] = "";
  } else {
    out[c] = sb.substring(l, i);
  }
        l = i+1;
  c++;
      }
    }
    if (l < sb.length()) {
      out[c] = sb.substring(l);
    }
    return out;
  }

  static boolean isCompatibilityMapping(String in)
  {
    return in.length() > 0 && in.charAt(0) == '<';
  }

  static String stripCompatibilityTag(String in)
  {
    return in.substring(in.indexOf('>')+2);
  }

  static String toJavaString(String in)
  {
    StringBuffer out = new StringBuffer();
    String[] chars = split(in, ' ');
    for (int i = 0; i < chars.length; i++) {
      if (chars[i].equals("005C")) {
  out.append("\\\\");
      } else if (chars[i].equals("0022")) {
  out.append("\\\"");
      } else {
  out.append("\\u");
  out.append(chars[i]);
      }
    }
    return out.toString();
  }

  static String decompose(String in, TreeMap mappings)
  {
    StringBuffer out = new StringBuffer("");
    String[] c = split(in, ' ');

    for (int i = 0; i < c.length; i++) {
      if (mappings.containsKey(c[i])) {
  if (out.length() > 0) {
    out.append(" ");
  }
  out.append(decompose((String) mappings.get(c[i]), mappings));
      } else {
  if (out.length() > 0) {
    out.append(" ");
  }
  out.append(c[i]);
      }
    }

    return out.toString();
  }

  public static void main(String[] arg)
    throws Exception
  {
    // Check if the unicode files exist
    {
      File f1 = new File("CompositionExclusions.txt");
      File f2 = new File("UnicodeData.txt");
      if (!f1.exists() || !f2.exists()) {
  System.err.println("Unable to find UnicodeData.txt or CompositionExclusions.txt.");
  System.err.println("Please download the latest version of these file from:");
  System.err.println("http://www.unicode.org/Public/UNIDATA/");
  System.exit(1);
      }
    }

    // Read CompositionExclusions
    TreeSet exclusions = new TreeSet();
    {
      BufferedReader r = new BufferedReader(new FileReader("CompositionExclusions.txt"));
      String line;
      while (null != (line = r.readLine())) {
  line = stripComment(line);
  line = line.trim();
  if (line.length() == 0) {
    // Empty line
  } else if (line.length() == 4) {
    exclusions.add(line);
  } else {
    // Skip code points > 0xffff
  }
      }
      r.close();
    }

    // Read UnicodeData
    TreeMap canonical = new TreeMap();
    TreeMap compatibility = new TreeMap();
    TreeMap combiningClasses = new TreeMap();

    {
      BufferedReader r = new BufferedReader(new FileReader("UnicodeData.txt"));
      String line;
      while (null != (line = r.readLine())) {
  line = stripComment(line);
  line = line.trim();
 
  if (line.length() == 0) {
    // Empty line
  } else {
    String[] f = split(line, ';');
   
    if (f[0].length() == 4) {
      if (!f[5].equals("")) {
        if (isCompatibilityMapping(f[5])) {
    compatibility.put(f[0], stripCompatibilityTag(f[5]));
        } else {
    compatibility.put(f[0], f[5]);
    if (!exclusions.contains(f[0])) {
      canonical.put(f[0], f[5]);
    }
        }
      }
      if (!f[3].equals("0")) {
        combiningClasses.put(new Integer(Integer.parseInt(f[0], 16)), f[3]);
      }
    } else {
      // Skip code points > 0xffff
    }
  }
      }
      r.close();
    }

    // Recursively apply compatibility mappings
    while (true) {
      boolean replaced = false;

      Iterator i = compatibility.keySet().iterator();
      while (i.hasNext()) {
  String k = (String) i.next();
  String v = (String) compatibility.get(k);

   String d = decompose(v, compatibility);
  if (!d.equals(v)) {
    replaced = true;
    compatibility.put(k, d);
  }
      }

      if (!replaced) {
  break;
      }
    }

    // Eliminate duplicate mappings
    TreeMap compatibilityKeys = new TreeMap();
    ArrayList compatibilityMappings = new ArrayList();

    {
      Iterator i = compatibility.keySet().iterator();
      while (i.hasNext()) {
  String k = (String) i.next();
  String v = (String) compatibility.get(k);
 
  int index = compatibilityMappings.indexOf(v);
  if (index == -1) {
    index = compatibilityMappings.size();
    compatibilityMappings.add(v);
  }
  compatibilityKeys.put(k, new Integer(index));
      }
    }

    // Create composition tables
    TreeMap firstMap = new TreeMap();
    TreeMap secondMap = new TreeMap();
   
    {
      Iterator i = canonical.keySet().iterator();
      while (i.hasNext()) {
  String k = (String) i.next();
  String v = (String) canonical.get(k);

  String[] s = split(v, ' ');

  if (s.length == 2) {
    // If both characters have the same combining class, they
    // won't be combined (in the sequence AB, B is blocked from
    // A if both have the same combining class)
    String cc1 = (String) combiningClasses.get(new Integer(Integer.parseInt(s[0], 16)));
    String cc2 = (String) combiningClasses.get(new Integer(Integer.parseInt(s[1], 16)));
    if (cc1 != null || (cc1 != null && cc1.equals(cc2))) {
      // Ignore this composition
      i.remove();
      continue;
    }

    if (firstMap.containsKey(s[0])) {
      Integer c = (Integer) firstMap.get(s[0]);
      firstMap.put(s[0], new Integer(c.intValue()+1));
    } else {
      firstMap.put(s[0], new Integer(1));
    }

    if (secondMap.containsKey(s[1])) {
      Integer c = (Integer) secondMap.get(s[1]);
      secondMap.put(s[1], new Integer(c.intValue()+1));
    } else {
      secondMap.put(s[1], new Integer(1));
    }
  } else if (s.length > 2) {
    System.err.println("? wrong canonical mapping for "+k);
    System.exit(1);
 
      }
    }

    TreeMap singleFirstComposition = new TreeMap();
    TreeMap singleSecondComposition = new TreeMap();
    TreeMap complexComposition = new TreeMap();

    int composeLookupMax = 0;
    {
      Iterator i = canonical.keySet().iterator();
      while (i.hasNext()) {
  String k = (String) i.next();
  String v = (String) canonical.get(k);

  String[] s = split(v, ' ');

  if (s.length == 2) {
    Integer first = (Integer) firstMap.get(s[0]);
    Integer second = (Integer) secondMap.get(s[1]);

    if (first.intValue() == 1) {
      singleFirstComposition.put(s[0], new String[] { s[1], k });
      composeLookupMax = Math.max(composeLookupMax, Integer.parseInt(s[0], 16));
    } else if (second.intValue() == 1) {
      singleSecondComposition.put(s[1], new String[] { s[0], k });
      composeLookupMax = Math.max(composeLookupMax, Integer.parseInt(s[1], 16));
    } else {
      if (complexComposition.containsKey(s[0])) {
        TreeMap m = (TreeMap) complexComposition.get(s[0]);
        if (m.containsKey(s[1])) {
    System.err.println("? ambiguous canonical mapping for "+s[0]);
    System.exit(1);
        }
        m.put(s[1], k);
      } else {
        TreeMap m = new TreeMap();
        m.put(s[1], k);
        complexComposition.put(s[0], m);
      }
      composeLookupMax = Math.max(composeLookupMax, Integer.parseInt(s[0], 16));
      composeLookupMax = Math.max(composeLookupMax, Integer.parseInt(s[1], 16));
    }
  }
      }
    }

    System.out.print("Creating CombiningClass.java...");

    // Dump combining classes
    {
      PrintWriter w = new PrintWriter(new FileWriter("CombiningClass.java"));
      w.println("/* Do not edit - automatically generated */");
      w.println();
      w.println("public class CombiningClass");
      w.println("{");
      w.println("  public final static int[][] c = new int[][] {");
      StringBuffer index = new StringBuffer();

      int count = 0;
     
      for (int i = 0; i < 256; i++) {
  boolean empty = true;
 
  StringBuffer page = new StringBuffer();
  page.append("    { /* Page "+i+" */");
 
  for (int j = 0; j < 256; j++) {
    Integer c = new Integer((i << 8) + j);
    String cc = (String) combiningClasses.get(c);
   
    if (0 == (j & 31)) {
      page.append("\n      ");
    }
    if (cc == null) {
      page.append("0, ");
    } else {
      page.append(cc+", ");
      empty = false;
    }
  }
  page.append("\n    },");
 
  index.append("    ");

  if (!empty) {
    w.println(page.toString());
    index.append(count++);
    index.append(",\n");
  } else {
    index.append("-1,\n");
  }
      }
      w.println("  };\n");

      w.println("  public final static int[] i = new int[] {");
      w.print(index.toString());
      w.println("  };");
      w.println("}");
      w.close();
    }

    System.out.println(" Ok.");
    System.out.print("Creating DecompositionKeys.java...");

    // Dump compatibility decomposition
    {
      PrintWriter w = new PrintWriter(new FileWriter("DecompositionKeys.java"));
      w.println("/* Do not edit - automatically generated */");
      w.println();
      w.println("public class DecompositionKeys");
      w.println("{");
      w.println("  public final static int[] k = new int[] {");
      Iterator i = compatibilityKeys.keySet().iterator();
      while (i.hasNext()) {
  String k  = (String) i.next();
  int index = ((Integer) compatibilityKeys.get(k)).intValue();
  w.println("    '\\u"+k+"', "+index+",");
      }
      w.println("  };");
      w.println("}");
      w.close();
    }

    System.out.println(" Ok.");
    System.out.print("Creating DecompositionMappings.java...");
   
    {
      PrintWriter w = new PrintWriter(new FileWriter("DecompositionMappings.java"));
      w.println("/* Do not edit - automatically generated */");
      w.println();
      w.println("public class DecompositionMappings");
      w.println("{");
      w.println("  public final static String[] m = new String[] {");
      Iterator i = compatibilityMappings.iterator();
      while (i.hasNext()) {
  String m = (String) i.next();
  w.println("    \""+toJavaString(m)+"\",");
      }
      w.println("  };");
      w.println("}");
      w.close();
    }

    System.out.println(" Ok.");
    System.out.print("Creating Composition.java...");

    // Dump canonical composition
    {
      PrintWriter w = new PrintWriter(new FileWriter("Composition.java"));
      w.println("/* Do not edit - automatically generated */");
      w.println();
      w.println("public class Composition");
      w.println("{");

      Iterator i;
      int index = 0;

      TreeMap indices = new TreeMap();

      i = complexComposition.keySet().iterator();
      while (i.hasNext()) {
  String s0 = (String) i.next();
  indices.put(new Integer(Integer.parseInt(s0, 16)), new Integer(index));
  index++;
      }

      int multiSecondStart = index;

      w.println("  public final static char[][] multiFirst = new char[][] {");
      i = complexComposition.keySet().iterator();
      while (i.hasNext()) {
  String s0 = (String) i.next();
  TreeMap m = (TreeMap) complexComposition.get(s0);

  TreeMap line = new TreeMap();
  int maxIndex = 1;

  Iterator i2 = m.keySet().iterator();
  while (i2.hasNext()) {
    String s1 = (String) i2.next();
    String k = (String) m.get(s1);

    Integer s1i = new Integer(Integer.parseInt(s1, 16));

    if (!indices.containsKey(s1i)) {
      indices.put(s1i, new Integer(index));
      index++;
    }
    line.put(indices.get(s1i), k);
    maxIndex = Math.max(maxIndex, ((Integer) indices.get(s1i)).intValue());
  }

  w.print("    { ");
  for (int j = multiSecondStart; j <= maxIndex; j++) {
    if (line.containsKey(new Integer(j))) {
      String s = (String) line.get(new Integer(j));
      w.print("'"+toJavaString(s)+"', ");
    } else {
      w.print("       0, ");
    }
  }
  w.println("},")
      }
      w.println("  };");

      int singleFirstStart = index;

      w.println("  public final static char[][] singleFirst = new char[][] {");
      i = singleFirstComposition.keySet().iterator();
      while (i.hasNext()) {
  String k = (String) i.next();
  String[] v = ((String[]) singleFirstComposition.get(k));
  w.println("    { '"+toJavaString(v[0])+"', '"+toJavaString(v[1])+"' },");

  if (indices.containsKey(new Integer(Integer.parseInt(k, 16)))) {
    System.out.println(k+" already indexed!");
  }

  indices.put(new Integer(Integer.parseInt(k, 16)), new Integer(index));
  index++;
      }
      w.println("  };");

      int singleSecondStart = index;

      w.println("  public final static char[][] singleSecond = new char[][] {");
      i = singleSecondComposition.keySet().iterator();
      while (i.hasNext()) {
  String k = (String) i.next();
  String[] v = ((String[]) singleSecondComposition.get(k));
  w.println("    { '"+toJavaString(v[0])+"', '"+toJavaString(v[1])+"' },");

   indices.put(new Integer(Integer.parseInt(k, 16)), new Integer(index));
  index++;
      }
      w.println("  };");

      w.println("  public final static int multiSecondStart = "+multiSecondStart+";");
      w.println("  public final static int singleFirstStart = "+singleFirstStart+";");
      w.println("  public final static int singleSecondStart = "+singleSecondStart+";");

      StringBuffer compositionPages = new StringBuffer();

      w.println("  public final static int[] composePage = new int[] {");
      int pageCount = 0;
      for (int j = 0; j*256 < composeLookupMax+255; j++) {
  boolean empty = true;
  StringBuffer page = new StringBuffer();
  for (int k = 0; k < 256; k++) {
    if (k % 16 == 0) {
      page.append("\n      ");
    }
    if (indices.containsKey(new Integer(j*256+k))) {
      page.append(indices.get(new Integer(j*256+k)));
      page.append(", ");
      empty = false;
    } else {
      page.append("-1, ");
    }
  }

  if (empty) {
    w.println("    -1,");
  } else {
    w.println("    "+pageCount+",");
    compositionPages.append("    {");
    compositionPages.append(page);
    compositionPages.append("\n    },\n");
    pageCount++;
  }
      }
      w.println("  };");

      w.println("  public final static int[][] composeData = new int[][] {");
      w.print(compositionPages);
      w.println("  };");

      w.println("}");
      w.close();
    }

    System.out.println(" Ok.");
  }
}
TOP

Related Classes of GenerateNFKC

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.