Package com.caucho.quercus.lib.i18n

Source Code of com.caucho.quercus.lib.i18n.MbstringModule

/*
* Copyright (c) 1998-2008 Caucho Technology -- all rights reserved
*
* This file is part of Resin(R) Open Source
*
* Each copy or derived work must preserve the copyright notice and this
* notice unmodified.
*
* Resin Open Source is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* Resin Open Source is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty
* of NON-INFRINGEMENT.  See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with Resin Open Source; if not, write to the
*
*   Free Software Foundation, Inc.
*   59 Temple Place, Suite 330
*   Boston, MA 02111-1307  USA
*
* @author Nam Nguyen
*/

package com.caucho.quercus.lib.i18n;

import com.caucho.quercus.QuercusModuleException;
import com.caucho.quercus.UnimplementedException;
import com.caucho.quercus.annotation.Optional;
import com.caucho.quercus.annotation.Reference;
import com.caucho.quercus.annotation.VariableArguments;
import com.caucho.quercus.env.*;
import com.caucho.quercus.lib.MailModule;
import com.caucho.quercus.lib.regexp.Eregi;
import com.caucho.quercus.lib.regexp.RegexpModule;
import com.caucho.quercus.lib.regexp.UnicodeEreg;
import com.caucho.quercus.lib.regexp.UnicodeEregi;
import com.caucho.quercus.lib.string.StringModule;
import com.caucho.quercus.module.AbstractQuercusModule;
import com.caucho.quercus.module.IniDefinitions;
import com.caucho.quercus.module.IniDefinition;
import com.caucho.util.L10N;
import com.caucho.vfs.Encoding;

import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.nio.charset.UnsupportedCharsetException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;

public class MbstringModule
  extends AbstractQuercusModule
{
  private static final Logger log
    = Logger.getLogger(MbstringModule.class.getName());
 
  private static final L10N L = new L10N(MbstringModule.class);
 
  private static final IniDefinitions _iniDefinitions = new IniDefinitions();

  public static final int MB_CASE_UPPER = 0;
  public static final int MB_CASE_LOWER = 1;
  public static final int MB_CASE_TITLE = 2;

  /**
   * Returns the extensions implemented by the module.
   */
  public String []getLoadedExtensions()
  {
    return new String[] { "mbstring" };
  }

  /**
   * Returns the default php.ini values.
   */
  public IniDefinitions getIniDefinitions()
  {
    return _iniDefinitions;
  }
 
  /**
   * Checks if the string is correctly encoded.
   * XXX: no args
   */
  public boolean mb_check_encoding(Env env,
                                   @Optional Value var,
                                   @Optional String encoding)
  {
    if (encoding == null || encoding.length() == 0)
      encoding = getEncoding(env);
  
    Decoder decoder = Decoder.create(encoding);
   
    if (! var.isDefault())
      return decoder.isDecodable(env, var.toStringValue(env));
  
    else
      throw new UnimplementedException("mb_check_encoding() with no args");
  }

  /**
   * Upper-cases, lower-cases, or capitalizes first letter of words.
   */
  public static StringValue mb_convert_case(Env env,
                              StringValue str,
                              int mode,
                              @Optional("") String encoding)
  {
    if (mode == MB_CASE_TITLE) {
      encoding = getEncoding(env, encoding);
     
      CharSequence unicodeStr = decode(env, str, encoding);

      unicodeStr = toUpperCaseTitle(env, unicodeStr);
     
      return encode(env, unicodeStr, encoding);
    }
    else if (mode == MB_CASE_LOWER)
      return mb_strtolower(env, str, encoding);
    else if (mode == MB_CASE_UPPER)
      return mb_strtoupper(env, str, encoding);
    else
      return str;
  }

  /**
   * Converts string of one encoding to another.
   */
  public static Value mb_convert_encoding(Env env,
                                          StringValue str,
                                          String destEncoding,
                                          @Optional Value fromEncodings)
  {
    ArrayList<String> charsetList = getEncodingList(env, fromEncodings);
   
    CharSequence unicodeStr = null;
    for (int i = 0; i < charsetList.size(); i++) {
      String charset = charsetList.get(i);

      try {
        unicodeStr = decode(env, str, charset);
      } catch (UnsupportedCharsetException e) {
        // should probably not log anything here because this is normal
        // behavior because of fallback encodings
        continue;
      }
    }
   
    if (unicodeStr == null) {
      log.log(Level.FINE, L.l("unsupported character encoding {0}", fromEncodings));
      env.warning(L.l("unsupported character encoding {0}", fromEncodings));
      return str;
    }
   
    try {
      return encode(env, unicodeStr, destEncoding);
    } catch (UnsupportedCharsetException e) {
      log.log(Level.FINE, e.getMessage(), e);
      env.warning(L.l("unsupported character encoding {0}", fromEncodings));

      return BooleanValue.FALSE;
    }
  }

  /**
   * Performs Japanese specific charset conversion.
   */
  public static StringValue mb_convert_kana(Env env,
                              StringValue str,
                              @Optional("") String option,
                              @Optional("") String encoding)
  {
    throw new UnimplementedException("mb_convert_kana");
  }

  /**
   * Decodes and then encodes variables.
   *
   * XXX: variable arguments to convert.
   */
  @VariableArguments
  public static StringValue mb_convert_variables(Env env,
                                                  String toEncoding,
                                                  String fromEncodings,
                                                  @Reference Value vars)
  {
    // XXX: fallback encoding
    int tail = fromEncodings.indexOf(',', 1);

    if (tail < 0)
      tail = fromEncodings.length();

    String srcEncoding;

    if (tail < 0)
      srcEncoding = fromEncodings;
    else
      srcEncoding = getEncoding(env, fromEncodings.substring(0, tail).trim());

    Value decoded = decodeAll(env, vars, srcEncoding);
    vars.set(encodeAll(env, decoded, toEncoding));

    return env.createStringOld(srcEncoding);
  }

  /**
   * Decodes mime field.
   */
  public static Value mb_decode_mimeheader(Env env,
                              StringValue str)
  {
    String encoding = getEncoding(env);

    try {
      return QuercusMimeUtility.decodeMime(env, str, encoding);

    } catch (UnsupportedEncodingException e) {
      throw new QuercusModuleException(e.getMessage());
    }
  }

  /**
   * Decodes HTML numeric entity.
   */
  public static StringValue mb_decode_numericentity(Env env,
                              StringValue str,
                              ArrayValue convmap,
                              @Optional String encoding)
  {
    throw new UnimplementedException("mb_decode_numericentity");
  }

  /**
   * Detects encoding of string.
   */
  public static Value mb_detect_encoding(Env env,
                                         StringValue str,
                                         @Optional Value encodingV,
                                         @Optional boolean isStrict)
  {
    // XXX: strict
   
    ArrayList<String> encodingList = getDetectOrderList(env, encodingV);  
   
    int len = encodingList.size();
    for (int i = 0; i < len; i++) {
      String charset = encodingList.get(i);
     
      Decoder decoder = Decoder.create(charset);
     
      if (decoder.isDecodable(env, str))
        return env.createStringOld(charset);
    }
   
    return BooleanValue.FALSE;
  }

  /**
   * Specifies order of charsets to test when detecting encoding.
   */
  public static Value mb_detect_order(Env env,
                                      @Optional Value encodingV)
  {
    if (encodingV.isDefault()) {
      ArrayValue array = new ArrayValueImpl();
     
      ArrayList<String> list = getDetectOrderList(env, encodingV);
     
      for (String encoding : list) {
        array.put(encoding);
      }
     
      return array;
    }
    else {
      ArrayList<String> list = new ArrayList<String>();
     
      if (encodingV.isArray()) {
        Iterator<Value> iter = encodingV.getValueIterator(env);
       
        while (iter.hasNext()) {
          list.add(iter.next().toString());
        }
      }
      else
        parseCommaSeparatedList(list, encodingV.toString());
     
      env.setSpecialValue("mb.detect_order", list);
     
      return BooleanValue.TRUE;
    }
  }
 
  @SuppressWarnings("unchecked")
  private static ArrayList<String> getDetectOrderList(Env env,
                                                         Value encodingV)
  {
    if (encodingV.isDefault() && env.getSpecialValue("mb.detect_order") != null)
      return (ArrayList<String>) env.getSpecialValue("mb.detect_order");
   
    ArrayList<String> list = new ArrayList<String>();
   
    if (encodingV.isDefault()) {
      String encodings = env.getIniString("mbstring.detect_order");
     
      if (encodings != null)
        parseCommaSeparatedList(list, encodings);
      else {
        list.add("ASCII");
        list.add("UTF-8");
      }
    }
    else if (encodingV.isArray()) {
      Iterator<Value> iter = encodingV.getValueIterator(env);
     
      while (iter.hasNext()) {
        list.add(iter.next().toString());
      }
    }
    else {
      String encodings = encodingV.toString();
     
      if (encodings.equalsIgnoreCase("auto")) {
        list.add("ASCII");
        list.add("JIS");
        list.add("UTF-8");
        list.add("EUC-JP");
        list.add("SJIS");
      }
      else
        parseCommaSeparatedList(list, encodingV.toString());
    }
   
    return list;
  }
 
  private static void parseCommaSeparatedList(ArrayList<String> list,
                                              String str)
  {
    int start = 0;
    int index;
   
    while ((index = str.indexOf(",", start)) >= 0) {
      String charset = str.substring(start, index).trim();
     
      start = index + 1;
     
      list.add(charset);
    }
   
    list.add(str.substring(start).trim());
  }

  /**
   * Encodes a string into mime.
   */
  public static StringValue mb_encode_mimeheader(Env env,
                              StringValue str,
                              @Optional("") String charset,
                              @Optional("B") String transfer_encoding,
                              @Optional("") String linefeed)
  {
    charset = getEncoding(env, charset);

    try {
      String mime = QuercusMimeUtility.encodeMimeWord(str.toString(),
                                                      charset,
                                                      transfer_encoding,
                                                      linefeed,
                                                      76);
      return env.createStringOld(mime);

    } catch (UnsupportedEncodingException e) {
      throw new QuercusModuleException(e.getMessage());
    }

  }

  /**
   * Encodes HTML numeric string entity.
   */
  public static StringValue mb_encode_numericentity(Env env,
                              StringValue str,
                              ArrayValue convmap,
                              @Optional String encoding)
  {
    throw new UnimplementedException();
  }

  /**
   * Returns true if pattern matches a part of string.
   */
  public static BooleanValue mb_ereg_match(Env env,
                                           UnicodeEreg ereg,
                                           StringValue string,
                                           @Optional String option)
  {
    String encoding = getEncoding(env);

    string = string.convertToUnicode(env, encoding);

    // XXX: option

    Value val = RegexpModule.eregImpl(env, ereg, string, null);

    if (val == BooleanValue.FALSE)
      return BooleanValue.FALSE;
    else
      return BooleanValue.TRUE;
  }

  /**
   * Multibyte version of ereg_replace.
   */
  public static Value mb_ereg_replace(Env env,
                                      UnicodeEreg ereg,
                                      StringValue replacement,
                                      StringValue subject,
                                      @Optional String option)
  {
    String encoding = getEncoding(env);

    replacement = replacement.convertToUnicode(env, encoding);
    subject = subject.convertToUnicode(env, encoding);

    //XXX: option
   
    Value val = RegexpModule.ereg_replace(env,
                                          ereg,
                                          replacement,
                                          subject);

    return encodeAll(env, val, encoding);
  }

  /**
   * Multibyte version of ereg.
   */
  public static Value mb_ereg(Env env,
                              UnicodeEreg ereg,
                              StringValue string,
                              @Optional ArrayValue regs)
  {
    return eregImpl(env, ereg, string, regs);
  }

  /**
   * Multibyte version of eregi_replace.
   */
  public static Value mb_eregi_replace(Env env,
                              StringValue pattern,
                              StringValue replacement,
                              StringValue subject,
                              @Optional String option)
  {
    String encoding = getEncoding(env);

    pattern = pattern.convertToUnicode(env, encoding);
    replacement = replacement.convertToUnicode(env, encoding);
    subject = subject.convertToUnicode(env, encoding);

    //XXX: option

    Eregi regexp = RegexpModule.createEregi(env, pattern);
   
    Value val = RegexpModule.eregi_replace(env, regexp, replacement, subject);

    return encodeAll(env, val, encoding);
  }

  /**
   * Multibyte version of eregi.
   */
  public static Value mb_eregi(Env env,
                               UnicodeEregi eregi,
                               StringValue string,
                               @Optional ArrayValue regs)
  {
    return eregImpl(env, eregi, string, regs);
  }

  private static Value eregImpl(Env env,
                                UnicodeEreg ereg,
                                StringValue string,
                                ArrayValue regs)
  {
    String encoding = getEncoding(env);

    string = string.convertToUnicode(env, encoding);

    if (regs == null) {
      return RegexpModule.eregImpl(env, ereg, string, null);
    }

    Value val;
    Var regVar = new Var();

    val = RegexpModule.eregImpl(env, ereg, string, regVar);
   
    if (regVar.isset()) {
      regs.clear();
      ArrayValue results = regVar.toArrayValue(env);

      for (Map.Entry<Value,Value> entry : results.entrySet()) {

        Value bytes = encodeAll(env, entry.getValue(), encoding);
        regs.put(entry.getKey(), bytes);
      }

      val = LongValue.create(
              regs.get(LongValue.ZERO).toStringValue(env).length());
    }

    return val;
  }

  /**
   * Gets current position of ereg state object.
   */
  public static LongValue mb_ereg_search_getpos(Env env)
  {
    EregSearch ereg = getEreg(env);

    if (ereg == null)
      return LongValue.ZERO;

    return LongValue.create(ereg._position);
  }

  /**
   * Gets the last match of ereg state object from previous matching.
   */
  public static Value mb_ereg_search_getregs(Env env)
  {
    EregSearch ereg = getEreg(env);

    if (ereg == null || ereg._lastMatch == null)
      return BooleanValue.FALSE;

    return ereg._lastMatch;
  }

  /**
   * Initializes a ereg state object.
   */
  public static BooleanValue mb_ereg_search_init(Env env,
                                                 StringValue string,
                                                 @Optional Value rawRegexp,
                                                 @Optional Value option)
  {
    UnicodeEregi regexp = null;
   
    if (! rawRegexp.isDefault()) {
      regexp
        = RegexpModule.createUnicodeEregi(env, rawRegexp.toStringValue(env));
    }
   
    EregSearch ereg = new EregSearch(env, string, regexp, option);
    env.setSpecialValue("mb.search", ereg);

    return BooleanValue.TRUE;
  }

  /**
   * Returns index and position after matching.
   */
  public static Value mb_ereg_search_pos(Env env,
                                         @Optional Value rawRegexp,
                                         @Optional Value option)
  {
    UnicodeEregi regexp = null;
   
    if (! rawRegexp.isDefault()) {
      regexp
        = RegexpModule.createUnicodeEregi(env, rawRegexp.toStringValue(env));
    }
   
    EregSearch ereg = getEreg(env, regexp, option);

    if (ereg == null) {
      env.warning(L.l("Regular expression not set"));
      return BooleanValue.FALSE;
    }

    return ereg.search(env, true);
  }

  /**
   * Returns match array after matching.
   */
  public static Value mb_ereg_search_regs(Env env,
                                          @Optional Value rawRegexp,
                                          @Optional Value option)
  {
    UnicodeEregi regexp = null;
   
    if (! rawRegexp.isDefault()) {
      regexp
        = RegexpModule.createUnicodeEregi(env, rawRegexp.toStringValue(env));
    }
   
    EregSearch ereg = getEreg(env, regexp, option);

    if (ereg == null) {
      env.warning(L.l("Regular expression not set"));
      return BooleanValue.FALSE;
    }

    if (ereg.search(env, false) == BooleanValue.FALSE)
      return BooleanValue.FALSE;

    return ereg._lastMatch;
  }

  /**
   * Sets the position of the ereg state object.
   */
  public static BooleanValue mb_ereg_search_setpos(Env env,
                              int position)
  {
    EregSearch ereg = getEreg(env);

    if (ereg == null)
      return BooleanValue.FALSE;

    ereg._position = position;
    return BooleanValue.TRUE;
  }

  /**
   * Returns whether or not pattern matches string.
   */
  public static BooleanValue mb_ereg_search(Env env,
                                            @Optional Value rawRegexp,
                                            @Optional Value option)
  {
    UnicodeEregi regexp = null;
   
    if (! rawRegexp.isDefault()) {
      regexp
        = RegexpModule.createUnicodeEregi(env, rawRegexp.toStringValue(env));
    }
   
    EregSearch ereg = getEreg(env, regexp, option);

    if (ereg == null) {
      env.warning(L.l("Regular expression not set"));
      return BooleanValue.FALSE;
    }

    Value result = ereg.search(env, false);

    return BooleanValue.create(result.toBoolean());
  }

  /**
   * Returns the ereg state object from the environment.
   */
  private static EregSearch getEreg(Env env)
  {
    Object obj = env.getSpecialValue("mb.search");

    if (obj == null)
      return null;

    return (EregSearch) obj;
  }

  /**
   * Returns the ereg state object from the environment iff the ereg object
   * is a valid one.
   */
  private static EregSearch getEreg(Env env,
                                    UnicodeEregi regexp,
                                    Value option)
  {
    Object obj = env.getSpecialValue("mb.search");

    if (obj != null) {
      EregSearch ereg = (EregSearch) obj;

      if (regexp != null)
        ereg.init(regexp, option);

      if (ereg._isValidRegexp)
        return ereg;
      else
        return null;
    }
    else
      return null;
  }

  /**
   * Returns current mb settings.
   */
  public static Value mb_get_info(Env env,
                              @Optional("") String type)
  {
    if (type.length() == 0) {
      ArrayValue array = new ArrayValueImpl();

      array.put(env.createStringOld("internal_encoding"),
                env.createStringOld(getEncoding(env)));
     
      array.put(env.createStringOld("http_output"),
                env.createStringOld(getOutputEncoding(env)));

      return array;
    }
    else if (type.equals("internal_encoding")) {
      return env.createStringOld(getEncoding(env));
    }
    else if (type.equals("http_output")) {
      return env.createStringOld(getOutputEncoding(env));
    }
    else {
      env.warning(L.l("unsupported option: {0}", type));
     
      return BooleanValue.FALSE;
    }
  }

  /**
   * Returns and/or sets the http input encoding
   */
  public static Value mb_http_input(Env env,
                              @Optional String type)
  {
    throw new UnimplementedException("mb_http_input");
  }
  /**
   * Returns and/or sets the http output encoding
   */
  public static Value mb_http_output(Env env,
                                     @Optional String encoding)
  {
    if (encoding.length() == 0) {
      return env.createStringOld(getOutputEncoding(env));
    }
    else {
      env.setIni("mbstring.http_output", encoding);
     
      return BooleanValue.TRUE;
    }
  }

  /**
   * Returns and/or sets the internal encoding.
   */
  public static Value mb_internal_encoding(Env env,
                              @Optional String encoding)
  {
    if (encoding.length() == 0)
      return env.createStringOld(getEncoding(env));
    else {
      setEncoding(env, encoding);
      return BooleanValue.TRUE;
    }
  }

  /**
   * Returns and/or sets the encoding for mail.
   */
  public static Value mb_language(Env env,
                                  @Optional String language)
  {
    String encoding = getEncodingLanguage(env);

    if (language == null || language.length() == 0) {
      if (encoding.equalsIgnoreCase("ISO-2022-JP"))
        return env.createStringOld("Japanese");
      else if (encoding.equalsIgnoreCase("ISO-8859-1"))
        return env.createStringOld("English");
      else if (encoding.equalsIgnoreCase("UTF-8"))
        return env.createStringOld("uni");
      else
        return env.createStringOld(encoding);
    }
    else if (language.equals("Japanese") || language.equals("ja"))
      setEncodingLanguage(env, "ISO-2022-JP");
    else if (language.equals("English") || language.equals("en"))
      setEncodingLanguage(env, "ISO-8859-1");
    else if (language.equals("uni"))
      setEncodingLanguage(env, "UTF-8");
    else
      return BooleanValue.FALSE;

    return BooleanValue.TRUE;
  }
 
  private static String getEncodingLanguage(Env env)
  {
    String encoding = (String) env.getSpecialValue("mb.internal_encoding");
   
    if (encoding == null)
      return "ISO-8859-1";
   
    return encoding;
  }
 
  private static void setEncodingLanguage(Env env, String encoding)
  {
    env.setSpecialValue("mb.internal_encoding", encoding);
  }

  /**
   * Get all supported encodings.
   */
  public static ArrayValue mb_list_encodings(Env env)
  {
    ArrayValue array = new ArrayValueImpl();

    Map<String,Charset> charsetMap = Charset.availableCharsets();

    for (String name : charsetMap.keySet()) {
      array.put(env.createStringOld(name));
    }

    return array;
  }

  /**
   * ob_start() handler
   */
  public static StringValue mb_output_handler(Env env,
                                              StringValue contents,
                                              int status)
  {
    // XXX: status?

    String toEncoding = getOutputEncoding(env);

    if (toEncoding.equals("pass"))
      return contents;
   
    String fromEncoding = getEncoding(env);
   
    Decoder decoder = getDecoder(env, fromEncoding);
    CharSequence contentsUnicode = decoder.decode(env, contents);

    Encoder encoder = getEncoder(env, toEncoding);
    return encoder.encode(env, contentsUnicode);
  }

  /**
   * Multibyte version of parse_str.
   */
  public static BooleanValue mb_parse_str(Env env,
                              StringValue strValue,
                              @Optional @Reference Value result)
  {
    String encoding = getEncoding(env);
    StringModule.parse_str(env,strValue.toString(), result);

    if (result == null) {
      // XXX: encode newly added global variables
      return BooleanValue.TRUE;
    }
    else {
      Value array = encodeAll(env, result, encoding);
      result.set(array);

      return BooleanValue.TRUE;
    }
  }

  /**
   * Returns the preferred mime name of this encoding.
   */
  public static StringValue mb_preferred_mime_name(Env env,
                              StringValue encoding)
  {
    String mimeName = Encoding.getMimeName(encoding.toString());

    return env.createStringOld(mimeName);
  }

  /**
   * Returns and/or sets encoding for mb regular expressions.
   */
  public static Value mb_regex_encoding(Env env,
                              @Optional("") String encoding)
  {
    return mb_internal_encoding(env, encoding);
  }

  /**
   * XXX: what does this actually do?
   */
  public static StringValue mb_regex_set_options(Env env,
                              @Optional String options)
  {
    throw new UnimplementedException("mb_regex_set_options");
  }

  /**
   * Multibyte version of mail.
   */
  public static BooleanValue mb_send_mail(Env env,
                              StringValue to,
                              StringValue subject,
                              StringValue message,
                              @Optional StringValue additionalHeaders,
                              @Optional StringValue additionalParameters)
  {
    //XXX: not correct
   
    String encoding = getEncoding(env);

    subject = subject.toBinaryValue(encoding);
    message = message.toBinaryValue(encoding);
    additionalHeaders = additionalHeaders.toBinaryValue(encoding);

    boolean result = MailModule.mail(env,
                                     to.toString(),
                                     subject.toString(),
                                     message,
                                     additionalHeaders.toString(),
                                     additionalParameters.toString());

    return BooleanValue.create(result);
  }

  /**
   * Multibyte version of split.
   */
  public static Value mb_split(Env env,
                               UnicodeEreg ereg,
                              StringValue string,
                              @Optional("-1") long limit)
  {
    String encoding = getEncoding(env);

    string = string.convertToUnicode(env, encoding);
   
    Value val = RegexpModule.split(env, ereg, string, limit);

    return encodeAll(env, val, encoding);
  }

  /**
   * Similar to substr except start index is at the beginning of char
   * boundaries.
   */
  public static StringValue mb_strcut(Env env,
                              final StringValue str,
                              int start,
                              @Optional("7fffffff") int length,
                              @Optional String encoding)
  {
    encoding = getEncoding(env, encoding);
   
    CharSequence unicodeStr = decode(env, str, encoding);

    int len = unicodeStr.length();
    int end = start + length;

    if (end > len)
      end = len;

    if (start < 0 || start > end)
      return StringValue.EMPTY;

    // XXX: not quite exactly the same behavior as PHP
    if (start < len && Character.isHighSurrogate(unicodeStr.charAt(start)))
      start--;

    StringBuilder sb = new StringBuilder();
   
    sb.append(unicodeStr, start, end);

    return encode(env, sb, encoding);
  }

  /**
   * Truncates the string.
   */
  public static StringValue mb_strimwidth(Env env,
                              final StringValue str,
                              int start,
                              int width,
                              @Optional() StringValue trimmarker,
                              @Optional("") String encoding)
  {
    encoding = getEncoding(env, encoding);
   
    CharSequence unicodeStr = decode(env, str, encoding);

    int len = unicodeStr.length();
    int end = start + width;

    if (end > len)
      end = len;

    if (start < 0 || start > end)
      return StringValue.EMPTY;

    StringBuilder sb = new StringBuilder();

    if (end < len && trimmarker.length() > 0) {
      sb.append(unicodeStr, start, end - 1);
      sb.append(decode(env, trimmarker, encoding));

      unicodeStr = sb;
    }
    else
      sb.append(unicodeStr, start, end);

    return encode(env, sb, encoding);
  }

  /**
   * Multibyte version of strlen.
   */
  public static LongValue mb_strlen(Env env,
                              StringValue str,
                              @Optional("") String encoding)
  {
    encoding = getEncoding(env, encoding);

    str = str.convertToUnicode(env, encoding);

    return LongValue.create(str.length());
  }

  /**
   * Multibyte version of strpos.
   */
  public static Value mb_strpos(Env env,
                              StringValue haystack,
                              StringValue needle,
                              @Optional("0") int offset,
                              @Optional("") String encoding)
  {
    encoding = getEncoding(env, encoding);

    haystack = haystack.convertToUnicode(env, encoding);
    needle = needle.convertToUnicode(env, encoding);

    return StringModule.strpos(env, haystack, needle, offset);
  }

  /**
   * Multibyte version of strrpos.
   */
  public static Value mb_strrpos(Env env,
                              StringValue haystack,
                              StringValue needle,
                              @Optional Value offsetV,
                              @Optional("") String encoding)
  {
    encoding = getEncoding(env, encoding);

    haystack = haystack.convertToUnicode(env, encoding);
    needle = needle.convertToUnicode(env, encoding);

    return StringModule.strrpos(env, haystack, needle, offsetV);
  }

  /**
   * Converts all characters to lower-case.
   */
  public static StringValue mb_strtolower(Env env,
                                          StringValue str,
                                          @Optional("") String encoding)
  {
    encoding = getEncoding(env, encoding);

    StringValue unicodeStr = str.convertToUnicode(env, encoding);
    unicodeStr = StringModule.strtolower(str);

    return str.create(env, unicodeStr, encoding);
  }

  /**
   * Converts all characters to upper-case.
   */
  public static StringValue mb_strtoupper(Env env,
                                          StringValue str,
                                          @Optional("") String encoding)
  {
    encoding = getEncoding(env, encoding);

    StringValue unicodeStr = str.convertToUnicode(env, encoding);
    unicodeStr = StringModule.strtoupper(str);

    return str.create(env, unicodeStr, encoding);
  }

  /**
   * Returns the width of this multibyte string.
   */
  public static LongValue mb_strwidth(Env env,
                              StringValue str,
                              @Optional("") String encoding)
  {
    encoding = getEncoding(env, encoding);

    str = str.convertToUnicode(env, encoding);

    return LongValue.create(str.length());

/*
    int width = 0;
    int len = string.length();

    // Per PHP manual
    for (int i = 0; i < len; i++) {
      char ch = string.charAt(i);

      if (ch <= 0x19)
        continue;
      else if (ch <= 0x1fff)
        width += 1;
      else if (ch <= 0xff60)
        width += 2;
      else if (ch <= 0xff9f)
        width += 1;
      else
        width += 2;
    }

    return LongValue.create(width);
*/
  }

  /**
   * Sets the character to use when decoding/encoding fails on a character.
   */
  public static Value mb_substitute_character(Value substrchar)
  {
    throw new UnimplementedException("mb_substitute_character");
  }

  public static LongValue mb_substr_count(Env env,
                              StringValue haystack,
                              StringValue needle,
                              @Optional("") String encoding)
  {
    encoding = getEncoding(env, encoding);

    haystack = haystack.convertToUnicode(env, encoding);
    needle = needle.convertToUnicode(env, encoding);

    int count = 0;
    int sublen = needle.length();

    int i = haystack.indexOf(needle);

    while (i >= 0) {
      i = haystack.indexOf(needle, i + sublen);
      count++;
    }

    return LongValue.create(count);
  }

  /**
   * Multibyte version of substr.
   */
  public static StringValue mb_substr(Env env,
                                      StringValue str,
                                      int start,
                                      @Optional Value lengthV,
                                      @Optional String encoding)
  {
    encoding = getEncoding(env, encoding);

    StringValue unicodeStr = str.convertToUnicode(env, encoding);

    Value val = StringModule.substr(env, unicodeStr, start, lengthV);

    if (val == BooleanValue.FALSE)
      return StringValue.EMPTY;
   
    return encode(env, val.toStringValue(env), encoding);
  }


  // Private helper functions

  /**
   * Returns string with words capitalized and intermediate letters are
   * made lower-case.
   */
  private static CharSequence toUpperCaseTitle(Env env, CharSequence str)
  {
    StringBuilder sb = new StringBuilder();

    int strLen = str.length();
    boolean isWordStart = true;

    for (int i = 0; i < strLen; i++) {
      char ch = str.charAt(i);

      switch (ch) {
      case ' ': case '\t': case '\r': case '\n':
        isWordStart = true;
        sb.append(ch);
        break;
      default:
        if (isWordStart) {
          sb.append(Character.toUpperCase(ch));
          isWordStart = false;
        }
        else
          sb.append(Character.toLowerCase(ch));
        break;
      }
    }

    return sb;
  }
 
  private static CharSequence decode(Env env,
                                     StringValue str,
                                     String encoding)
  {
    if (str.isUnicode())
      return str;
   
    Decoder decoder = getDecoder(env, encoding);
   
    return decoder.decode(env, str);
  }
 
  private static Decoder getDecoder(Env env, String encoding)
  {
    Decoder decoder = Decoder.create(encoding);
   
    String ini = env.getIniString("mbstring.substitute_character");
   
    if (ini == null) {
      decoder.setReplacement("?");
    }
    else if (ini.equalsIgnoreCase("none")) {
      decoder.setIgnoreErrors(true);
    }
    else if (ini.equalsIgnoreCase("long")) {
      decoder.setReplaceUnicode(true);
    }
    else {
      int len = ini.length();
     
      int value = 0;
     
      for (int i = 0; i < len; i++) {
        char ch = ini.charAt(i);
       
        if ('0' <= ch && ch <= '9')
          value = value * 10 + ini.charAt(i) - '0';
        else
          break;
      }
     
      // XXX: surrogate pairs
      decoder.setReplacement("" + (char) value);
    }
   
    return decoder;
  }
 
  private static StringValue encode(Env env,
                                    CharSequence str,
                                    String encoding)
  {
    Encoder encoder = getEncoder(env, encoding);

    return encoder.encode(env, str);
  }
 
  private static Encoder getEncoder(Env env, String encoding)
  {
    Encoder encoder = Encoder.create(encoding);
   
    String ini = env.getIniString("mbstring.substitute_character");
   
    if (ini == null) {
      encoder.setReplacement("?");
    }
    else if (ini.equalsIgnoreCase("none")) {
      encoder.setIgnoreErrors(true);
    }
    else if (ini.equalsIgnoreCase("long")) {
      encoder.setReplaceUnicode(true);
    }
    else {
      int len = ini.length();
     
      int value = 0;
     
      for (int i = 0; i < len; i++) {
        char ch = ini.charAt(i);
       
        if ('0' <= ch && ch <= '9')
          value = value * 10 + ini.charAt(i) - '0';
        else
          break;
      }
     
      // XXX: surrogate pairs
      encoder.setReplacement("" + (char) value);
    }
   
    return encoder;
  }

  public static String getEncoding(Env env)
  {
    Value encoding = env.getIni("mbstring.internal_encoding");
   
    if (encoding.length() > 0)
      return encoding.toString();
    else
      return env.getRuntimeEncoding();
  }

  private static String getEncoding(Env env, String encoding)
  {
    if (encoding == null || encoding.length() == 0)
      return getEncoding(env);
    else
      return encoding;
  }

  private static void setEncoding(Env env, String encoding)
  {
    env.setIni("mbstring.internal_encoding", encoding);
  }
 
  private static String getOutputEncoding(Env env)
  {
    Value encoding = env.getIni("mbstring.http_output");
   
    if (encoding.length() != 0)
      return encoding.toString();
    else
      return env.getOutputEncoding();
  }
 
  private static ArrayList<String> getEncodingList(Env env, Value encodingV)
  {
    ArrayList<String> list = new ArrayList<String>();
   
    if (encodingV.isDefault()) {
      list.add(getEncoding(env));
    }
    else if (encodingV.isArray()) {
      Iterator<Value> iter = encodingV.getValueIterator(env);
     
      while (iter.hasNext()) {
        list.add(iter.next().toString());
      }
    }
    else {
      String encodings = encodingV.toString();
     
      if (encodings.equals("auto")) {
        list.add("ASCII");
        list.add("JIS");
        list.add("UTF-8");
        list.add("EUC-JP");
        list.add("SJIS");
      }
      else {
        int start = 0;
        int index;
       
        while ((index = encodings.indexOf(",", start)) >= 0) {
          String charset = encodings.substring(start, index).trim();
         
          start = index + 1;
         
          list.add(charset);
        }
       
        list.add(encodings.substring(start).trim());
      }
    }
   
    return list;
  }

  /**
   * Recursively decodes objects and arrays.
   */
  private static Value decodeAll(Env env,
                                 Value val,
                                 String encoding)
  {
    Decoder decoder = getDecoder(env, encoding);
   
    return decodeAll(env, val, decoder);
  }
 
  /**
   * Recursively decodes objects and arrays.
   */
  private static Value decodeAll(Env env,
                                 Value val,
                                 Decoder decoder)
  {
    decoder.reset();
   
    val = val.toValue();

    if (val.isString()) {
      return decoder.decodeUnicode(env, val.toStringValue(env));
    }

    else if (val.isArray()) {
      ArrayValue array = new ArrayValueImpl();

      for (Map.Entry<Value,Value> entry : ((ArrayValue)val).entrySet()) {
        array.put(entry.getKey(),
                  decodeAll(env, entry.getValue(), decoder));
      }

      return array;
    } else if (val.isObject()) {

      ObjectValue obj = (ObjectValue) val.toObject(env);

      for (Map.Entry<Value,Value> entry : obj.entrySet()) {
        obj.putThisField(env,
                         entry.getKey().toStringValue(env),
                         decodeAll(env, entry.getValue(), decoder));
      }

      return obj;
    } else
      return val;
  }

  /**
   * Recursively encodes objects and arrays.
   */
  private static Value encodeAll(Env env,
                                 Value val,
                                 String encoding)
  {
    Encoder encoder = getEncoder(env, encoding);
   
    return encodeAll(env, val, encoder);
  }
 
  /**
   * Recursively encodes objects and arrays.
   */
  private static Value encodeAll(Env env,
                                 Value val,
                                 Encoder encoder)
  {
    val = val.toValue();

    if (val.isString()) {
      return encoder.encode(env, val.toStringValue(env), true);
    }
    else if (val.isArray()) {
      ArrayValue array = new ArrayValueImpl();

      for (Map.Entry<Value,Value> entry : ((ArrayValue)val).entrySet()) {
        array.put(entry.getKey(),
                  encodeAll(env, entry.getValue(), encoder));
      }

      return array;
    } else if (val.isObject()) {

      ObjectValue obj = (ObjectValue)val;

      for (Map.Entry<Value,Value> entry : obj.entrySet()) {
        obj.putThisField(env,
                         entry.getKey().toStringValue(env),
                         encodeAll(env, entry.getValue(), encoder));
      }

      return obj;
    } else
      return val;
  }

  /**
   * ereg state object (saves previous match and other info)
   *
   * XXX: option
   */
  static class EregSearch {
    private StringValue _string;
    private UnicodeEregi _ereg;
    @SuppressWarnings("unused")
    private Value _option;
    private int _length;

    ArrayValue _lastMatch;
    int _position;
    boolean _isValidRegexp;

    EregSearch(Env env,
               StringValue string,
               UnicodeEregi ereg,
               Value option)
    {
      _string = string.convertToUnicode(env, getEncoding(env));
      _position = 0;
      _length = _string.length();
     
      _ereg = ereg;
      _isValidRegexp = ereg != null;
     
      _option = option;
    }
   
    void init(UnicodeEregi ereg, Value option)
    {
      _ereg = ereg;
      _isValidRegexp = ereg != null;
     
      _option = option;
    }

    StringValue getString(Env env)
    {
      if (_position == 0)
        return _string;
      else if (_position < _length)
        return _string.substring(_position);
      else
        return StringValue.EMPTY;
    }

    Value search(Env env, boolean isArrayReturn)
    {
      if (_position < 0)
        return BooleanValue.FALSE;
     
      StringValue string = getString(env);

      ArrayValue regs = new ArrayValueImpl();
      Value val = eregImpl(env, _ereg, string, regs);

      if (val == BooleanValue.FALSE)
        return BooleanValue.FALSE;

      StringValue match = regs.get(LongValue.ZERO).toStringValue(env);

      int matchIndex = _string.indexOf(match, _position);
      int matchLength = match.length();

      _position = matchIndex + matchLength;

      _lastMatch = regs;

      if (isArrayReturn) {
        ArrayValue array = new ArrayValueImpl();

        array.put(LongValue.create(matchIndex));
        array.put(LongValue.create(matchLength));

        return array;

      } else
        return BooleanValue.TRUE;
    }
  }

  static final IniDefinition INI_MBSTRING_HTTP_INPUT
    = _iniDefinitions.add("mbstring.http_input", "pass", PHP_INI_ALL);
  static final IniDefinition INI_MBSTRING_HTTP_OUTPUT
    = _iniDefinitions.add("mbstring.http_output", "pass", PHP_INI_ALL);
}
TOP

Related Classes of com.caucho.quercus.lib.i18n.MbstringModule

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.