Package org.jruby.util.io

Source Code of org.jruby.util.io.EncodingUtils$ResizeFunction

package org.jruby.util.io;

import org.jcodings.Encoding;
import org.jcodings.EncodingDB;
import org.jcodings.Ptr;
import org.jcodings.specific.ASCIIEncoding;
import org.jcodings.specific.USASCIIEncoding;
import org.jcodings.specific.UTF16BEEncoding;
import org.jcodings.specific.UTF16LEEncoding;
import org.jcodings.specific.UTF32BEEncoding;
import org.jcodings.specific.UTF32LEEncoding;
import org.jcodings.specific.UTF8Encoding;
import org.jcodings.transcode.EConv;
import org.jcodings.transcode.EConvFlags;
import org.jcodings.transcode.EConvResult;
import org.jcodings.transcode.Transcoder;
import org.jcodings.transcode.TranscoderDB;
import org.jcodings.transcode.Transcoding;
import org.jruby.Ruby;
import org.jruby.RubyArray;
import org.jruby.RubyBasicObject;
import org.jruby.RubyConverter;
import org.jruby.RubyEncoding;
import org.jruby.RubyFixnum;
import org.jruby.RubyHash;
import org.jruby.RubyIO;
import org.jruby.RubyInteger;
import org.jruby.RubyMethod;
import org.jruby.RubyNumeric;
import org.jruby.RubyProc;
import org.jruby.RubyString;
import org.jruby.RubySymbol;
import org.jruby.exceptions.RaiseException;
import org.jruby.platform.Platform;
import org.jruby.runtime.Block;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.builtin.IRubyObject;
import org.jruby.runtime.encoding.EncodingCapable;
import org.jruby.runtime.encoding.EncodingService;
import org.jruby.util.ByteList;
import org.jruby.util.StringSupport;
import org.jruby.util.TypeConverter;

import java.util.Arrays;

public class EncodingUtils {
    public static final int ECONV_DEFAULT_NEWLINE_DECORATOR = Platform.IS_WINDOWS ? EConvFlags.CRLF_NEWLINE_DECORATOR : 0;
    public static final int DEFAULT_TEXTMODE = Platform.IS_WINDOWS ? OpenFile.TEXTMODE : 0;
    public static final int TEXTMODE_NEWLINE_DECORATOR_ON_WRITE = Platform.IS_WINDOWS ? EConvFlags.CRLF_NEWLINE_DECORATOR : 0;
   
    private static final byte[] NULL_BYTE_ARRAY = new byte[0];

    // rb_to_encoding
    public static Encoding rbToEncoding(ThreadContext context, IRubyObject enc) {
        if (enc instanceof RubyEncoding) return ((RubyEncoding) enc).getEncoding();
       
        return toEncoding(context, enc);
    }
   
    // to_encoding
    public static Encoding toEncoding(ThreadContext context, IRubyObject enc) {
        RubyString encStr = enc.convertToString();
        if (!encStr.getEncoding().isAsciiCompatible()) {
            throw context.runtime.newArgumentError("invalid name encoding (non ASCII)");
        }
        Encoding idx = context.runtime.getEncodingService().getEncodingFromObject(enc);
        // check for missing encoding is in getEncodingFromObject
        return idx;
    }
   
    public static IRubyObject[] openArgsToArgs(Ruby runtime, IRubyObject firstElement, RubyHash options) {
        IRubyObject value = hashARef(runtime, options, "open_args");
       
        if (value.isNil()) return new IRubyObject[] { firstElement, options };
       
        RubyArray array = value.convertToArray();
       
        IRubyObject[] openArgs = new IRubyObject[array.size()];
        value.convertToArray().toArray(openArgs);
        IRubyObject[] args = new IRubyObject[openArgs.length + 1];
       
        args[0] = firstElement;
       
        System.arraycopy(openArgs, 0, args, 1, openArgs.length);
       
        return args;
    }

    // FIXME: This could be smarter amount determining whether optionsArg is a RubyHash and !null (invariant)
    // mri: extract_binmode
    public static void extractBinmode(Ruby runtime, IRubyObject optionsArg, int[] fmode_p) {
        int fmodeMask = 0;
       
        IRubyObject v = hashARef(runtime, optionsArg, "textmode");
        if (!v.isNil() && v.isTrue()) fmodeMask |= OpenFile.TEXTMODE;
       
        v = hashARef(runtime, optionsArg, "binmode");
        if (!v.isNil() && v.isTrue()) fmodeMask |= OpenFile.BINMODE;

        if ((fmodeMask & OpenFile.BINMODE) != 0 && (fmodeMask & OpenFile.TEXTMODE) != 0) {
            throw runtime.newArgumentError("both textmode and binmode specified");
        }
       
        fmode_p[0] |= fmodeMask;
    }
   
    private static IRubyObject hashARef(Ruby runtime, IRubyObject hash, String symbol) {
        if (hash == null || !(hash instanceof RubyHash)) return runtime.getNil();
       
        IRubyObject value = ((RubyHash) hash).fastARef(runtime.newSymbol(symbol));
       
        return value == null ? runtime.getNil() : value;
    }
   
    public static Encoding ascii8bitEncoding(Ruby runtime) {
        return runtime.getEncodingService().getAscii8bitEncoding();  
    }

    static final int VMODE = 0;
    static final int PERM = 1;

    public static Object vmodeVperm(IRubyObject vmode, IRubyObject vperm) {
        return new IRubyObject[] {vmode, vperm};
    }

    public static IRubyObject vmode(Object vmodeVperm) {
        return ((IRubyObject[])vmodeVperm)[VMODE];
    }

    public static void vmode(Object vmodeVperm, IRubyObject vmode) {
        ((IRubyObject[])vmodeVperm)[VMODE] = vmode;
    }

    public static IRubyObject vperm(Object vmodeVperm) {
        return ((IRubyObject[])vmodeVperm)[PERM];
    }

    public static void vperm(Object vmodeVperm, IRubyObject vperm) {
        ((IRubyObject[])vmodeVperm)[PERM] = vperm;
    }
   
    public static final int MODE_BTMODE(int fmode, int a, int b, int c) {
        if ((fmode & OpenFile.BINMODE) != 0) {
            return b;
        } else if ((fmode & OpenFile.TEXTMODE) != 0) {
            return c;
        }
        return a;
    }
   
    public static int SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(Encoding enc2, int ecflags) {
        if (enc2 != null && (ecflags & ECONV_DEFAULT_NEWLINE_DECORATOR) != 0) {
            return ecflags | EConvFlags.UNIVERSAL_NEWLINE_DECORATOR;
        }
        return ecflags;
    }
   
    /*
     * This is a wacky method which is a very near port from MRI.  pm passes in
     * a permissions value and a mode value.  As a side-effect mode will get set
     * if this found any 'mode'-like stuff so the caller can know whether mode
     * has been handled yet.   The same story for permission value.  If it has
     * not been set then we know it needs to default permissions from the caller.
     */
    // mri: rb_io_extract_modeenc
    public static void extractModeEncoding(ThreadContext context,
            IOEncodable ioEncodable, Object vmodeAndVperm_p, IRubyObject options, int[] oflags_p, int[] fmode_p) {
        Ruby runtime = context.runtime;
        int ecflags;
        IRubyObject[] ecopts_p = {context.nil};
        boolean hasEnc = false, hasVmode = false;
        IRubyObject intmode;
       
        // Give default encodings
        ioExtIntToEncs(context, ioEncodable, null, null, 0);

        vmode_handle: do {
            if (vmode(vmodeAndVperm_p) == null || vmode(vmodeAndVperm_p).isNil()) {
                fmode_p[0] = OpenFile.READABLE;
                oflags_p[0] = ModeFlags.RDONLY;
            } else {
                intmode = TypeConverter.checkIntegerType(context.runtime, vmode(vmodeAndVperm_p), "to_int");

                if (!intmode.isNil()) {
                    vmode(vmodeAndVperm_p, intmode);
                    oflags_p[0] = RubyNumeric.num2int(intmode);
                    fmode_p[0] = ModeFlags.getOpenFileFlagsFor(oflags_p[0]);
                } else {
                    String p = vmode(vmodeAndVperm_p).convertToString().asJavaString();
                    fmode_p[0] = OpenFile.ioModestrFmode(runtime, p);
                    oflags_p[0] = OpenFile.ioFmodeOflags(fmode_p[0]);
                    int colonSplit = p.indexOf(":");

                    if (colonSplit != -1) {
                        hasEnc = true;
                        parseModeEncoding(context, ioEncodable, p.substring(colonSplit + 1), fmode_p);
                    } else {
                        Encoding e = (fmode_p[0] & OpenFile.BINMODE) != 0 ? ascii8bitEncoding(context.runtime) : null;
                        ioExtIntToEncs(context, ioEncodable, e, null, fmode_p[0]);
                    }
                }
            }

            if (options == null || options.isNil()) {
                ecflags = (fmode_p[0] & OpenFile.READABLE) != 0
                        ? MODE_BTMODE(fmode_p[0], ECONV_DEFAULT_NEWLINE_DECORATOR, 0, EConvFlags.UNIVERSAL_NEWLINE_DECORATOR)
                        : 0;
                if (TEXTMODE_NEWLINE_DECORATOR_ON_WRITE != 0) {
                    ecflags |= (fmode_p[0] & OpenFile.WRITABLE) != 0
                            ? MODE_BTMODE(fmode_p[0], TEXTMODE_NEWLINE_DECORATOR_ON_WRITE, 0, TEXTMODE_NEWLINE_DECORATOR_ON_WRITE)
                            : 0;
                }
                ecflags = SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(ioEncodable.getEnc2(), ecflags);
                ecopts_p[0] = context.nil;
            } else {
                extractBinmode(context.runtime, options, fmode_p);
                // Differs from MRI but we open with ModeFlags
                if ((fmode_p[0] & OpenFile.BINMODE) != 0) {
                    oflags_p[0] |= ModeFlags.BINARY;
               
                    if (!hasEnc) {
                        ioExtIntToEncs(context, ioEncodable, ascii8bitEncoding(context.runtime), null, fmode_p[0]);
                    }
                } else if (DEFAULT_TEXTMODE != 0 && (vmode(vmodeAndVperm_p) == null || vmode(vmodeAndVperm_p).isNil())) {
                    fmode_p[0] |= DEFAULT_TEXTMODE;
                }

                if (!hasVmode) {
                    IRubyObject v = hashARef(context.runtime, options, "mode");

                    if (!v.isNil()) {
                        if (vmode(vmodeAndVperm_p) != null && !vmode(vmodeAndVperm_p).isNil()) {
                            throw context.runtime.newArgumentError("mode specified twice");
                        }
                        hasVmode = true;
                        vmode(vmodeAndVperm_p, v);

                        continue vmode_handle;
                    }
                }
                IRubyObject v = hashARef(context.runtime, options, "perm");
                if (!v.isNil()) {
                    if (vperm(vmodeAndVperm_p) != null) {
                        if (!vperm(vmodeAndVperm_p).isNil()) throw context.runtime.newArgumentError("perm specified twice");

                        vperm(vmodeAndVperm_p, v);
                    }
                }
               
                ecflags = (fmode_p[0] & OpenFile.READABLE) != 0 ?
                        MODE_BTMODE(fmode_p[0], ECONV_DEFAULT_NEWLINE_DECORATOR, 0, EConvFlags.UNIVERSAL_NEWLINE_DECORATOR) : 0;
                if (TEXTMODE_NEWLINE_DECORATOR_ON_WRITE != -1) {
                    ecflags |= (fmode_p[0] & OpenFile.WRITABLE) != 0 ?
                            MODE_BTMODE(fmode_p[0], TEXTMODE_NEWLINE_DECORATOR_ON_WRITE, 0, TEXTMODE_NEWLINE_DECORATOR_ON_WRITE) : 0;
                }

                if (ioExtractEncodingOption(context, ioEncodable, options, fmode_p)) {
                    if (hasEnc) throw context.runtime.newArgumentError("encoding specified twice");
                }
               
                ecflags = SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(ioEncodable.getEnc2(), ecflags);
                ecflags = econvPrepareOptions(context, options, ecopts_p, ecflags);
            }
           
            EncodingUtils.validateEncodingBinmode(context, fmode_p, ecflags, ioEncodable);
           
            ioEncodable.setEcflags(ecflags);
            ioEncodable.setEcopts(ecopts_p[0]);
            return;
        } while (true);
    }

    // mri: rb_io_extract_encoding_option
    public static boolean ioExtractEncodingOption(ThreadContext context, IOEncodable ioEncodable, IRubyObject options, int[] fmode_p) {
        Ruby runtime = context.runtime;
       
        IRubyObject encoding = context.nil;
        IRubyObject extenc = null;
        IRubyObject intenc = null;
        IRubyObject tmp;
        boolean extracted = false;
        Encoding extencoding = null;
        Encoding intencoding = null;
       
        if (options != null || !options.isNil()) {
            RubyHash opts = (RubyHash) options;

            IRubyObject encodingOpt = opts.op_aref(context, runtime.newSymbol("encoding"));
            if (!encodingOpt.isNil()) encoding = encodingOpt;
            IRubyObject externalOpt = opts.op_aref(context, runtime.newSymbol("external_encoding"));
            if (!externalOpt.isNil()) extenc = externalOpt;
            IRubyObject internalOpt = opts.op_aref(context, runtime.newSymbol("internal_encoding"));
            if (!internalOpt.isNil()) intenc = internalOpt;
        }
       
        if ((extenc != null || intenc != null) && !encoding.isNil()) {
            if (runtime.isVerbose()) {
                    runtime.getWarnings().warn("Ignoring encoding parameter '" + encoding + "': " +
                            (extenc == null ? "internal" : "external") + "_encoding is used");
            }
            encoding = context.nil;
        }
       
        if (extenc != null && !extenc.isNil()) {
            extencoding = rbToEncoding(context, extenc);
        }

        if (intenc != null) {
            if (intenc.isNil()) {
                intencoding = null;
            } else if (!(tmp = intenc.checkStringType19()).isNil()) {
                String p = tmp.toString();
                if (p.equals("-")) {
                    intencoding = null;
                } else {
                    intencoding = rbToEncoding(context, intenc);
                }
            } else {
                intencoding = rbToEncoding(context, intenc);
            }
            if (extencoding == intencoding) {
                intencoding = null;
            }
        }
       
        if (!encoding.isNil()) {
            extracted = true;
           
            if (!(tmp = encoding.checkStringType19()).isNil()) {
                parseModeEncoding(context, ioEncodable, tmp.asJavaString(), fmode_p);
            } else {
                ioExtIntToEncs(context, ioEncodable, rbToEncoding(context, encoding), null, 0);
            }
        } else if (extenc != null || intenc != null) {
            extracted = true;
            ioExtIntToEncs(context, ioEncodable, extencoding, intencoding, 0);
        }
       
        return extracted;
    }
   
    // mri: rb_io_ext_int_to_encs
    public static void ioExtIntToEncs(ThreadContext context, IOEncodable encodable, Encoding external, Encoding internal, int fmode) {
        boolean defaultExternal = false;
       
        if (external == null) {
            external = context.runtime.getDefaultExternalEncoding();
            defaultExternal = true;
        }
       
        if (external == ascii8bitEncoding(context.runtime)) {
            internal = null;
        } else if (internal == null) {
            internal = context.runtime.getDefaultInternalEncoding();
        }
       
        if (internal == null ||
                ((fmode & OpenFile.SETENC_BY_BOM) == 0) && internal == external) {
            encodable.setEnc((defaultExternal && internal != external) ? null : external);
            encodable.setEnc2(null);
        } else {
            encodable.setEnc(internal);
            encodable.setEnc2(external);
        }
    }   

    // mri: parse_mode_enc
    public static void parseModeEncoding(ThreadContext context, IOEncodable ioEncodable, String option, int[] fmode_p) {
        Ruby runtime = context.runtime;
        EncodingService service = runtime.getEncodingService();
        Encoding idx, idx2 = null;
        Encoding intEnc, extEnc;
        if (fmode_p == null) fmode_p = new int[]{0};
        String estr;

        String[] encs = option.split(":", 2);

        if (encs.length == 2) {
            estr = encs[0];
            if (estr.toLowerCase().startsWith("bom|utf-")) {
                fmode_p[0] |= OpenFile.SETENC_BY_BOM;
                ioEncodable.setBOM(true);
                estr = estr.substring(4);
            }
            idx = service.getEncodingFromString(estr);
        } else {
            estr = option;
            if (estr.toLowerCase().startsWith("bom|utf-")) {
                fmode_p[0] |= OpenFile.SETENC_BY_BOM;
                ioEncodable.setBOM(true);
                estr = estr.substring(4);
            }
            idx = service.getEncodingFromString(estr);
        }

        extEnc = idx;
       
        intEnc = null;
        if (encs.length == 2) {
            if (encs[1].equals("-")) {
                intEnc = null;
            } else {
                idx2 = service.getEncodingFromString(encs[1]);
                if (idx2 == null) {
                    context.runtime.getWarnings().warn("ignoring internal encoding " + idx2 + ": it is identical to external encoding " + idx);
                    intEnc = null;
                } else {
                    intEnc = idx2;
                }
            }
        }

        ioExtIntToEncs(context, ioEncodable, extEnc, intEnc, fmode_p[0]);
    }

    // rb_econv_str_convert
    public static ByteList econvStrConvert(ThreadContext context, EConv ec, ByteList src, int flags) {
        return econvSubstrAppend(context, ec, src, null, flags);
    }

    // rb_econv_substr_append
    public static ByteList econvSubstrAppend(ThreadContext context, EConv ec, ByteList src, ByteList dst, int flags) {
        return econvAppend(context, ec, src, dst, flags);
    }

    // rb_econv_append
    public static ByteList econvAppend(ThreadContext context, EConv ec, ByteList sByteList, ByteList dst, int flags) {
        int len = sByteList.getRealSize();

        Ptr sp = new Ptr(0);
        int se = 0;
        int ds = 0;
        int ss = sByteList.getBegin();
        byte[] dBytes;
        Ptr dp = new Ptr(0);
        int de = 0;
        EConvResult res;
        int maxOutput;

        if (dst == null) {
            dst = new ByteList(len);
            if (ec.destinationEncoding != null) {
                dst.setEncoding(ec.destinationEncoding);
            }
        }

        if (ec.lastTranscoding != null) {
            maxOutput = ec.lastTranscoding.transcoder.maxOutput;
        } else {
            maxOutput = 1;
        }

        do {
            int dlen = dst.getRealSize();
            if ((dst.getUnsafeBytes().length - dst.getBegin()) - dlen < len + maxOutput) {
                long newCapa = dlen + len + maxOutput;
                if (Integer.MAX_VALUE < newCapa) {
                    throw context.runtime.newArgumentError("too long string");
                }
                dst.ensure((int)newCapa);
                dst.setRealSize(dlen);
            }
            sp.p = ss;
            se = sp.p + len;
            dBytes = dst.getUnsafeBytes();
            ds = dst.getBegin();
            de = dBytes.length;
            dp.p = ds += dlen;
            res = ec.convert(sByteList.getUnsafeBytes(), sp, se, dBytes, dp, de, flags);
            len -= sp.p - ss;
            ss = sp.p;
            dst.setRealSize(dlen + (dp.p - ds));
            EncodingUtils.econvCheckError(context, ec);
        } while (res == EConvResult.DestinationBufferFull);

        return dst;
    }

    // rb_econv_check_error
    public static void econvCheckError(ThreadContext context, EConv ec) {
        RaiseException re = makeEconvException(context.runtime, ec);
        if (re != null) throw re;
    }

    // rb_econv_prepare_opts
    public static int econvPrepareOpts(ThreadContext context, IRubyObject opthash, IRubyObject[] opts) {
        return econvPrepareOptions(context, opthash, opts, 0);
    }
   
    // rb_econv_prepare_options
    public static int econvPrepareOptions(ThreadContext context, IRubyObject opthash, IRubyObject[] opts, int ecflags) {
        IRubyObject newhash = context.nil;
        IRubyObject v;
       
        if (opthash.isNil()) {
            opts[0] = context.nil;
            return ecflags;
        }

        RubyHash optHash2 = (RubyHash)opthash;
        ecflags = econvOpts(context, opthash, ecflags);
       
        v = optHash2.op_aref(context, context.runtime.newSymbol("replace"));
        if (!v.isNil()) {
            RubyString v_str = v.convertToString();
            if (v_str.scanForCodeRange() == StringSupport.CR_BROKEN) {
                throw context.runtime.newArgumentError("replacement string is broken: " + v_str);
            }
            v = v_str.freeze(context);
            newhash = RubyHash.newHash(context.runtime);
            ((RubyHash)newhash).op_aset(context, context.runtime.newSymbol("replace"), v);
        }
       
        v = optHash2.op_aref(context, context.runtime.newSymbol("fallback"));
        if (!v.isNil()) {
            IRubyObject h = TypeConverter.checkHashType(context.runtime, v);
            boolean condition;
            if (h.isNil()) {
                condition = (v instanceof RubyProc || v instanceof RubyMethod || v.respondsTo("[]"));
            } else {
                v = h;
                condition = true;
            }
           
            if (condition) {
                if (newhash.isNil()) {
                    newhash = RubyHash.newHash(context.runtime);
                }
                ((RubyHash)newhash).op_aset(context, context.runtime.newSymbol("fallback"), v);
            }
        }
       
        if (!newhash.isNil()) {
            newhash.setFrozen(true);
        }
        opts[0] = newhash;
       
        return ecflags;
    }
   
    // econv_opts
    public static int econvOpts(ThreadContext context, IRubyObject opt, int ecflags) {
        Ruby runtime = context.runtime;
        IRubyObject v;
       
        v = ((RubyHash)opt).op_aref(context, runtime.newSymbol("invalid"));
        if (v.isNil()) {
        } else if (v.toString().equals("replace")) {
            ecflags |= EConvFlags.INVALID_REPLACE;
        } else {
            throw runtime.newArgumentError("unknown value for invalid character option");
        }
       
        v = ((RubyHash)opt).op_aref(context, runtime.newSymbol("undef"));
        if (v.isNil()) {
        } else if (v.toString().equals("replace")) {
            ecflags |= EConvFlags.UNDEF_REPLACE;
        } else {
            throw runtime.newArgumentError("unknown value for undefined character option");
        }
       
        v = ((RubyHash)opt).op_aref(context, runtime.newSymbol("replace"));
        if (!v.isNil() && (ecflags & EConvFlags.INVALID_REPLACE) != 0) {
            ecflags |= EConvFlags.UNDEF_REPLACE;
        }
       
        v = ((RubyHash)opt).op_aref(context, runtime.newSymbol("xml"));
        if (!v.isNil()) {
            if (v.toString().equals("text")) {
                ecflags |= EConvFlags.XML_TEXT_DECORATOR | EConvFlags.UNDEF_HEX_CHARREF;
            } else if (v.toString().equals("attr")) {
                ecflags |= EConvFlags.XML_ATTR_CONTENT_DECORATOR | EConvFlags.XML_ATTR_QUOTE_DECORATOR | EConvFlags.UNDEF_HEX_CHARREF;
            } else {
                throw runtime.newArgumentError("unexpected value for xml option: " + v);
            }
        }
       
        v = ((RubyHash)opt).op_aref(context, runtime.newSymbol("newline"));
        if (!v.isNil()) {
            ecflags &= ~EConvFlags.NEWLINE_DECORATOR_MASK;
            if (v.toString().equals("universal")) {
                ecflags |= EConvFlags.UNIVERSAL_NEWLINE_DECORATOR;
            } else if (v.toString().equals("crlf")) {
                ecflags |= EConvFlags.CRLF_NEWLINE_DECORATOR;
            } else if (v.toString().equals("cr")) {
                ecflags |= EConvFlags.CR_NEWLINE_DECORATOR;
            } else if (v.toString().equals("lf")) {
//                ecflags |= ECONV_LF_NEWLINE_DECORATOR;
            } else if (v instanceof RubySymbol) {
                throw runtime.newArgumentError("unexpected value for newline option: " + ((RubySymbol) v).to_s(context).toString());
            } else {
                throw runtime.newArgumentError("unexpected value for newline option");
            }
        }
       
        int setflags = 0;
        boolean newlineflag = false;
       
        v = ((RubyHash)opt).op_aref(context, runtime.newSymbol("universal_newline"));
        if (v.isTrue()) {
            setflags |= EConvFlags.UNIVERSAL_NEWLINE_DECORATOR;
        }
        newlineflag |= !v.isNil();
       
        v = ((RubyHash)opt).op_aref(context, runtime.newSymbol("crlf_newline"));
        if (v.isTrue()) {
            setflags |= EConvFlags.CRLF_NEWLINE_DECORATOR;
        }
        newlineflag |= !v.isNil();
       
        v = ((RubyHash)opt).op_aref(context, runtime.newSymbol("cr_newline"));
        if (v.isTrue()) {
            setflags |= EConvFlags.CR_NEWLINE_DECORATOR;
        }
        newlineflag |= !v.isNil();
       
        if (newlineflag) {
            ecflags &= ~EConvFlags.NEWLINE_DECORATOR_MASK;
            ecflags |= setflags;
        }
       
        return ecflags;
    }
   
    // rb_econv_open_opts
    public static EConv econvOpenOpts(ThreadContext context, byte[] sourceEncoding, byte[] destinationEncoding, int ecflags, IRubyObject opthash) {
        Ruby runtime = context.runtime;
        IRubyObject replacement;
       
        if (opthash == null || opthash.isNil()) {
            replacement = context.nil;
        } else {
            if (!(opthash instanceof RubyHash) || !opthash.isFrozen()) {
                throw runtime.newRuntimeError("bug: EncodingUtils.econvOpenOpts called with invalid opthash");
            }
            replacement = ((RubyHash)opthash).op_aref(context, runtime.newSymbol("replace"));
        }
       
        EConv ec = TranscoderDB.open(sourceEncoding, destinationEncoding, ecflags);
        if (ec == null) return ec;

        if (!replacement.isNil()) {
            int ret;
            RubyString replStr = (RubyString)replacement;
            ByteList replBL = replStr.getByteList();
            ec.makeReplacement();

            ret = ec.setReplacement(replBL.getUnsafeBytes(), replBL.getBegin(), replBL.getRealSize(), replBL.getEncoding().getName());

            if (ret == -1) {
                ec.close();
                return null;
            }
        }

        return ec;
    }
   
    // rb_econv_open_exc
    public static RaiseException econvOpenExc(ThreadContext context, byte[] sourceEncoding, byte[] destinationEncoding, int ecflags) {
        String message = econvDescription(context, sourceEncoding, destinationEncoding, ecflags, "code converter not found (") + ")";
        return context.runtime.newConverterNotFoundError(message);
    }
   
    // rb_econv_description
    public static String econvDescription(ThreadContext context, byte[] sourceEncoding, byte[] destinationEncoding, int ecflags, String message) {
        // limited port for now
        return message + new String(sourceEncoding) + " to " + new String(destinationEncoding);
    }
   
    // rb_econv_asciicompat_encoding
    // Missing proper logic from transcoding subsystem
    public static Encoding econvAsciicompatEncoding(Encoding enc) {
        return RubyConverter.NONASCII_TO_ASCII.get(enc);
    }
   
    // rb_enc_asciicompat
    public static boolean encAsciicompat(Encoding enc) {
        return encMbminlen(enc) == 1 && !encDummy(enc);
    }
   
    // rb_enc_ascget
    public static int encAscget(byte[] pBytes, int p, int e, int[] len, Encoding enc) {
        int c;
        int l;

        // if e < p check unnecessary

        if (encAsciicompat(enc)) {
            c = pBytes[p] & 0xFF;
            if (!Encoding.isAscii((byte)c)) {
                return -1;
            }
            if (len != null) len[0] = 1;
            return c;
        }
        l = StringSupport.preciseLength(enc, pBytes, p, e);
        if (!StringSupport.MBCLEN_CHARFOUND_P(l)) {
            return -1;
        }
        c = enc.mbcToCode(pBytes, p, e);
        if (!Encoding.isAscii(c)) {
            return -1;
        }
        if (len != null) len[0] = l;
        return c;
    }
   
    // rb_enc_mbminlen
    public static int encMbminlen(Encoding encoding) {
        return encoding.minLength();
    }
   
    // rb_enc_dummy_p
    public static boolean encDummy(Encoding enc) {
        return enc.isDummy();
    }
   
    // rb_enc_get
    public static Encoding encGet(ThreadContext context, IRubyObject obj) {
        if (obj instanceof EncodingCapable) {
            return ((EncodingCapable)obj).getEncoding();
        }
       
        return context.runtime.getDefaultInternalEncoding();
    }
   
    // encoding_equal
    public static boolean encodingEqual(byte[] enc1, byte[] enc2) {
        return new String(enc1).equalsIgnoreCase(new String(enc2));
    }
   
    // enc_arg
    public static Encoding encArg(ThreadContext context, IRubyObject encval, byte[][] name_p, Encoding[] enc_p) {
        Encoding enc;
        if ((enc = toEncodingIndex(context, encval)) == null) {
            name_p[0] = encval.convertToString().getBytes();
        } else {
            name_p[0] = enc.getName();
        }

        return enc_p[0] = enc;
    }
   
    // rb_to_encoding_index
    public static Encoding toEncodingIndex(ThreadContext context, IRubyObject enc) {
        if (enc instanceof RubyEncoding) {
            return ((RubyEncoding)enc).getEncoding();
        } else if ((enc = enc.checkStringType19()).isNil()) {
            return null;
        }
        if (!((RubyString)enc).getEncoding().isAsciiCompatible()) {
            return null;
        }
        return context.runtime.getEncodingService().getEncodingFromObjectNoError(enc);
    }
   
    // encoded_dup
    public static IRubyObject encodedDup(ThreadContext context, IRubyObject newstr, IRubyObject str, Encoding encindex) {
        if (encindex == null) return str.dup();
        if (newstr == str) {
            newstr = str.dup();
        } else {
            // set to same superclass
            ((RubyBasicObject)newstr).setMetaClass(str.getMetaClass());
        }
        ((RubyString)newstr).modify19();
        return strEncodeAssociate(context, newstr, encindex);
    }
   
    // str_encode_associate
    public static IRubyObject strEncodeAssociate(ThreadContext context, IRubyObject str, Encoding encidx) {
        encAssociateIndex(str, encidx);
       
        if (encAsciicompat(encidx)) {
            ((RubyString)str).scanForCodeRange();
        } else {
            ((RubyString)str).setCodeRange(StringSupport.CR_VALID);
        }
       
        return str;
    }
   
    // rb_enc_associate_index
    public static IRubyObject encAssociateIndex(IRubyObject obj, Encoding encidx) {
        ((RubyBasicObject)obj).checkFrozen();
        if (encidx == null) encidx = ASCIIEncoding.INSTANCE;
        if (((EncodingCapable)obj).getEncoding() == encidx) {
            return obj;
        }
        if (obj instanceof RubyString &&
                !((RubyString)obj).isCodeRangeAsciiOnly() ||
                encAsciicompat(encidx)) {
            ((RubyString)obj).clearCodeRange();
        }
        ((EncodingCapable)obj).setEncoding(encidx);
        return obj;
    }
   
    // str_encode
    public static IRubyObject strEncode(ThreadContext context, IRubyObject str, IRubyObject... args) {
        IRubyObject[] newstr_p = {str};
       
        Encoding dencindex = strTranscode(context, args, newstr_p);
       
        return encodedDup(context, newstr_p[0], str, dencindex);
    }
   
    // rb_str_encode
    public static IRubyObject rbStrEncode(ThreadContext context, IRubyObject str, IRubyObject to, int ecflags, IRubyObject ecopt) {
        IRubyObject[] newstr_p = {str};
       
        Encoding dencindex = strTranscode0(context, 1, new IRubyObject[]{to}, newstr_p, ecflags, ecopt);
       
        return encodedDup(context, newstr_p[0], str, dencindex);
    }
   
    // str_transcode
    public static Encoding strTranscode(ThreadContext context, IRubyObject[] args, IRubyObject[] self_p) {
        int ecflags = 0;
        int argc = args.length;
        IRubyObject[] ecopts_p = {context.nil};

        if (args.length >= 1) {
            IRubyObject tmp = TypeConverter.checkHashType(context.runtime, args[args.length - 1]);
            if (!tmp.isNil()) {
                argc--;
                ecflags = econvPrepareOpts(context, tmp, ecopts_p);
            }
        }
       
        return strTranscode0(context, argc, args, self_p, ecflags, ecopts_p[0]);
    }
   
    // str_transcode0
    public static Encoding strTranscode0(ThreadContext context, int argc, IRubyObject[] args, IRubyObject[] self_p, int ecflags, IRubyObject ecopts) {
        Ruby runtime = context.runtime;
       
        IRubyObject str = self_p[0];
        IRubyObject arg1, arg2;
        Encoding[] senc_p = {null}, denc_p = {null};
        byte[][] sname_p = {null}, dname_p = {null};
        Encoding dencindex;
        boolean explicitlyInvalidReplace = true;
       
        if (argc > 2) {
            throw context.runtime.newArgumentError(args.length, 2);
        }
       
        if (argc == 0) {
            arg1 = runtime.getEncodingService().getDefaultInternal();
            if (arg1 == null || arg1.isNil()) {
                if (ecflags == 0) return null;
                arg1 = objEncoding(context, str);
            }
            if ((ecflags & EConvFlags.INVALID_MASK) == 0) {
                explicitlyInvalidReplace = false;
            }
            ecflags |= EConvFlags.INVALID_REPLACE | EConvFlags.UNDEF_REPLACE;
        } else {
            arg1 = args[0];
        }
       
        arg2 = argc <= 1 ? context.nil : args[1];
        dencindex = strTranscodeEncArgs(context, str, arg1, arg2, sname_p, senc_p, dname_p, denc_p);

        IRubyObject dest;
       
        if ((ecflags & (EConvFlags.NEWLINE_DECORATOR_MASK
                | EConvFlags.XML_TEXT_DECORATOR
                | EConvFlags.XML_ATTR_CONTENT_DECORATOR
                | EConvFlags.XML_ATTR_QUOTE_DECORATOR)) == 0) {
            if (senc_p[0] != null && senc_p[0] == denc_p[0]) {
                if ((ecflags & EConvFlags.INVALID_MASK) != 0 && explicitlyInvalidReplace) {
                    IRubyObject rep = context.nil;
                    if (!ecopts.isNil()) {
                        rep = ((RubyHash)ecopts).op_aref(context, runtime.newString("replace"));
                    }
                    dest = ((RubyString)str).scrub(context, rep, Block.NULL_BLOCK);
                    if (dest.isNil()) dest = str;
                    self_p[0] = dest;
                    return dencindex;
                }
                return arg2.isNil() ? null : dencindex;
            } else if (senc_p[0] != null && denc_p[0] != null && senc_p[0].isAsciiCompatible() && denc_p[0].isAsciiCompatible()) {
                if (((RubyString)str).scanForCodeRange() == StringSupport.CR_7BIT) {
                    return dencindex;
                }
            }
            if (encodingEqual(sname_p[0], dname_p[0])) {
                return arg2.isNil() ? null : dencindex;
            }
        } else {
            if (encodingEqual(sname_p[0], dname_p[0])) {
                sname_p[0] = NULL_BYTE_ARRAY;
                dname_p[0] = NULL_BYTE_ARRAY;
            }
        }

        ByteList sp = ((RubyString)str).getByteList();
        ByteList fromp = sp;
        int slen = ((RubyString)str).size();
        int blen = slen + 30;
        dest = RubyString.newStringLight(runtime, blen);
        ByteList destp = ((RubyString)dest).getByteList();

        byte[] frompBytes = fromp.unsafeBytes();
        byte[] destpBytes = destp.unsafeBytes();
        Ptr frompPos = new Ptr(fromp.getBegin());
        Ptr destpPos = new Ptr(destp.getBegin());
        transcodeLoop(context, frompBytes, frompPos, destpBytes, destpPos, frompPos.p + slen, destpPos.p + blen, destp, strTranscodingResize, sname_p[0], dname_p[0], ecflags, ecopts);

        if (frompPos.p != sp.begin() + slen) {
            throw runtime.newArgumentError("not fully converted, " + (slen - frompPos.p) + " bytes left");
        }
        destp.setRealSize(destpPos.p);
       
        if (denc_p[0] == null) {
            dencindex = defineDummyEncoding(context, dname_p[0]);
        }
       
        self_p[0] = dest;
       
        return dencindex;
    }
   
    // rb_obj_encoding
    public static IRubyObject objEncoding(ThreadContext context, IRubyObject obj) {
        Encoding enc = encGet(context, obj);
        if (enc == null) {
            throw context.runtime.newTypeError("unknown encoding");
        }
        return context.runtime.getEncodingService().convertEncodingToRubyEncoding(enc);
    }
   
    public static Encoding strTranscodeEncArgs(ThreadContext context, IRubyObject str, IRubyObject arg1, IRubyObject arg2, byte[][] sname_p, Encoding[] senc_p, byte[][] dname_p, Encoding[] denc_p) {
        Encoding dencindex;
       
        dencindex = encArg(context, arg1, dname_p, denc_p);
       
        if (arg2.isNil()) {
            senc_p[0] = encGet(context, str);
            sname_p[0] = senc_p[0].getName();
        } else {
            encArg(context, arg2, sname_p, senc_p);
        }
       
        return dencindex;
    }
   
    public static boolean encRegistered(byte[] name) {
        return EncodingDB.getEncodings().get(name) != null;
    }
   
    // enc_check_duplication
    public static void encCheckDuplication(ThreadContext context, byte[] name) {
        if (encRegistered(name)) {
            throw context.runtime.newArgumentError("encoding " + new String(name) + " is already registered");
        }
    }
   
    // rb_enc_replicate
    public static Encoding encReplicate(ThreadContext context, byte[] name, Encoding encoding) {
        encCheckDuplication(context, name);
        EncodingDB.replicate(new String(name), new String(encoding.getName()));
        return EncodingDB.getEncodings().get(name).getEncoding();
    }
   
    // rb_define_dummy_encoding
    public static Encoding defineDummyEncoding(ThreadContext context, byte[] name) {
        Encoding dummy = encReplicate(context, name, ascii8bitEncoding(context.runtime));
        // TODO: set dummy on encoding; this probably should live in jcodings
        return dummy;
    }

    public static boolean DECORATOR_P(byte[] sname, byte[] dname) {
        return sname == null || sname.length == 0 || sname[0] == 0;
    }

    // TODO: Get rid of this and get consumers calling with existing RubyString
    public static ByteList strConvEncOpts(ThreadContext context, ByteList str, Encoding fromEncoding,
                                            Encoding toEncoding, int ecflags, IRubyObject ecopts) {
        return strConvEncOpts(
                context,
                RubyString.newString(context.runtime, str),
                fromEncoding, toEncoding, ecflags, ecopts).getByteList();
    }

    /**
     * This will try and transcode the supplied ByteList to the supplied toEncoding.  It will use
     * forceEncoding as its encoding if it is supplied; otherwise it will use the encoding it has
     * tucked away in the bytelist.  This will return a new copy of a ByteList in the request
     * encoding or die trying (ConverterNotFound).
     *
     * c: rb_str_conv_enc_opts
     */
    public static RubyString strConvEncOpts(ThreadContext context, RubyString str, Encoding fromEncoding,
            Encoding toEncoding, int ecflags, IRubyObject ecopts) {

        if (toEncoding == null) return str;
        if (fromEncoding == null) fromEncoding = str.getEncoding();
        if (fromEncoding == toEncoding) return str;
        if ((toEncoding.isAsciiCompatible() && str.isAsciiOnly()) ||
                toEncoding == ASCIIEncoding.INSTANCE) {
            if (str.getEncoding() != toEncoding) {
                str = (RubyString)str.dup();
                str.setEncoding(toEncoding);
            }
            return str;
        }

        ByteList strByteList = str.getByteList();
        int len = strByteList.getRealSize();
        ByteList newStr = new ByteList(len);
        int olen = len;

        EConv ec = econvOpenOpts(context, fromEncoding.getName(), toEncoding.getName(), ecflags, ecopts);
        if (ec == null) return str;

        byte[] sbytes = strByteList.getUnsafeBytes();
        Ptr sp = new Ptr(strByteList.getBegin());
        int start = sp.p;

        byte[] destbytes;
        Ptr dp = new Ptr(0);
        EConvResult ret;
        int convertedOutput = 0;

        destbytes = newStr.getUnsafeBytes();
        int dest = newStr.begin();
        dp.p = dest + convertedOutput;
        ret = ec.convert(sbytes, sp, start + len, destbytes, dp, dest + olen, 0);
        while (ret == EConvResult.DestinationBufferFull) {
            int convertedInput = sp.p - start;
            int rest = len - convertedInput;
            convertedOutput = dp.p - dest;
            newStr.setRealSize(convertedOutput);
            if (convertedInput != 0 && convertedOutput != 0 &&
                    rest < (Integer.MAX_VALUE / convertedOutput)) {
                rest = (rest * convertedOutput) / convertedInput;
            } else {
                rest = olen;
            }
            olen += rest < 2 ? 2 : rest;
            newStr.ensure(olen);
        }
        ec.close();

        switch (ret) {
            case Finished:
                len = dp.p;
                newStr.setRealSize(len);
                newStr.setEncoding(toEncoding);
                return RubyString.newString(context.runtime, newStr);

            default:
                // some error, return original
                return str;
        }
    }

    // rb_str_conv_enc
    public static RubyString strConvEnc(ThreadContext context, RubyString value, Encoding fromEncoding, Encoding toEncoding) {
        return strConvEncOpts(context, value, fromEncoding, toEncoding, 0, context.nil);
    }

    public static ByteList strConvEnc(ThreadContext context, ByteList value, Encoding fromEncoding, Encoding toEncoding) {
        return strConvEncOpts(context, value, fromEncoding, toEncoding, 0, context.nil);
    }

    public static IRubyObject setStrBuf(Ruby runtime, IRubyObject str, int len) {
        if (str == null || str.isNil()) {
            str = RubyString.newStringLight(runtime, len);
        } else {
            RubyString s = str.convertToString();
            int clen = s.size();
            if (clen >= len) {
                s.modify();
                return s;
            }
            str = s;
            len -= clen;
            ((RubyString)str).modify(((RubyString)str).size() + len);
        }
        return str;
    }

    public interface ResizeFunction {
        /**
         * Resize the destination, returning the new begin offset.
         *
         * @param destination
         * @param len
         * @param new_len
         * @return
         */
        int resize(ByteList destination, int len, int new_len);
    }

    public static final ResizeFunction strTranscodingResize = new ResizeFunction() {
        @Override
        public int resize(ByteList destination, int len, int new_len) {
            destination.setRealSize(len);
            destination.ensure(new_len);
            return destination.getBegin();
        }
    };

    public interface TranscodeFallback {
        IRubyObject call(ThreadContext context, IRubyObject fallback, IRubyObject c);
    }

    private static final TranscodeFallback HASH_FALLBACK = new TranscodeFallback() {
        @Override
        public IRubyObject call(ThreadContext context, IRubyObject fallback, IRubyObject c) {
            return ((RubyHash)fallback).op_aref(context, c);
        }
    };

    private static final TranscodeFallback PROC_FALLBACK = new TranscodeFallback() {
        @Override
        public IRubyObject call(ThreadContext context, IRubyObject fallback, IRubyObject c) {
            return ((RubyProc)fallback).call(context, new IRubyObject[]{c});
        }
    };

    private static final TranscodeFallback METHOD_FALLBACK = new TranscodeFallback() {
        @Override
        public IRubyObject call(ThreadContext context, IRubyObject fallback, IRubyObject c) {
            return fallback.callMethod(context, "call", c);
        }
    };

    private static final TranscodeFallback AREF_FALLBACK = new TranscodeFallback() {
        @Override
        public IRubyObject call(ThreadContext context, IRubyObject fallback, IRubyObject c) {
            return fallback.callMethod(context, "[]", c);
        }
    };
   
    // transcode_loop
    public static void transcodeLoop(ThreadContext context, byte[] inBytes, Ptr inPos, byte[] outBytes, Ptr outPos, int inStop, int _outStop, ByteList destination, ResizeFunction resizeFunction, byte[] sname, byte[] dname, int ecflags, IRubyObject ecopts) {
        Ruby runtime = context.runtime;
        EConv ec;
        Ptr outStop = new Ptr(_outStop);
        IRubyObject fallback = context.nil;
        TranscodeFallback fallbackFunc = null;
       
        ec = econvOpenOpts(context, sname, dname, ecflags, ecopts);
       
        if (ec == null) {
            throw econvOpenExc(context, sname, dname, ecflags);
        }

        if (!ecopts.isNil() && ecopts instanceof RubyHash) {
            fallback = ((RubyHash)ecopts).op_aref(context, runtime.newSymbol("fallback"));
            if (fallback instanceof RubyHash) {
                fallbackFunc = HASH_FALLBACK;
            } else if (fallback instanceof RubyProc) { // not quite same check as MRI
                fallbackFunc = PROC_FALLBACK;
            } else if (fallback instanceof RubyMethod) { // not quite same check as MRI
                fallbackFunc = METHOD_FALLBACK;
            } else {
                fallbackFunc = AREF_FALLBACK;
            }
        }

        Transcoding lastTC = ec.lastTranscoding;
        int maxOutput = lastTC != null ? lastTC.transcoder.maxOutput : 1;

        Ptr outStart = new Ptr(outPos.p);

        // resume:
        while (true) {
            EConvResult ret = ec.convert(inBytes, inPos, inStop, outBytes, outPos, outStop.p, 0);

            if (!fallback.isNil() && ret == EConvResult.UndefinedConversion) {
                IRubyObject rep = RubyString.newStringNoCopy(
                        runtime,
                        new ByteList(
                                ec.lastError.getErrorBytes(),
                                ec.lastError.getErrorBytesP(),
                                ec.lastError.getErrorBytesLength(),
                                runtime.getEncodingService().findEncodingOrAliasEntry(ec.lastError.getSource()).getEncoding(),
                                false)
                );
                rep = fallbackFunc.call(context, fallback, rep);
                if (!rep.isNil()) {
                    rep = rep.convertToString();
                    Encoding repEnc = ((RubyString)rep).getEncoding();
                    ByteList repByteList = ((RubyString)rep).getByteList();
                    ec.insertOutput(repByteList.getUnsafeBytes(), repByteList.begin(), repByteList.getRealSize(), repEnc.getName());

                    // TODO: check for too-large replacement

                    continue;
                }
            }

            if (ret == EConvResult.InvalidByteSequence ||
                    ret == EConvResult.IncompleteInput ||
                    ret == EConvResult.UndefinedConversion) {
                RaiseException re = makeEconvException(runtime, ec);
                ec.close();
                throw re;
            }

            if (ret == EConvResult.DestinationBufferFull) {
                moreOutputBuffer(destination, resizeFunction, maxOutput, outStart, outPos, outStop);
                outBytes = destination.getUnsafeBytes();
                continue;
            }

            ec.close();

            return;
        }
    }

    // make_econv_exception
    public static RaiseException makeEconvException(Ruby runtime, EConv ec) {
        String mesg;
        RaiseException exc;

        if (ec.lastError.getResult() == EConvResult.InvalidByteSequence ||
                ec.lastError.getResult() == EConvResult.IncompleteInput) {
            byte[] errBytes = ec.lastError.getErrorBytes();
            int errBytesP = ec.lastError.getErrorBytesP();
            int errorLen = ec.lastError.getErrorBytesLength();
            ByteList _bytes = new ByteList(errBytes, errBytesP, errorLen - errBytesP);
            RubyString bytes = RubyString.newString(runtime, _bytes);
            RubyString dumped = (RubyString)bytes.dump();
            int readagainLen = ec.lastError.getReadAgainLength();
            IRubyObject bytes2 = runtime.getNil();
            IRubyObject dumped2;
            int idx;
            if (ec.lastError.getResult() == EConvResult.IncompleteInput) {
                mesg = "incomplete " + dumped + " on " + new String(ec.lastError.getSource());
            } else if (readagainLen != 0) {
                bytes2 = RubyString.newString(runtime, new ByteList(errBytes, errorLen + errBytesP, ec.lastError.getReadAgainLength()));
                dumped2 = ((RubyString)bytes2).dump();
                mesg = dumped + " followed by " + dumped2 + " on " + new String(ec.lastError.getSource());
            } else {
                mesg = dumped + " on " + new String(ec.lastError.getSource());
            }

            exc = runtime.newInvalidByteSequenceError(mesg);
            exc.getException().setInternalVariable("error_bytes", bytes);
            exc.getException().setInternalVariable("readagain_bytes", bytes2);
            exc.getException().setInternalVariable("incomplete_input", ec.lastError.getResult() == EConvResult.IncompleteInput ? runtime.getTrue() : runtime.getFalse());

            return makeEConvExceptionSetEncs(exc, runtime, ec);
        } else if (ec.lastError.getResult() == EConvResult.UndefinedConversion) {
            byte[] errBytes = ec.lastError.getErrorBytes();
            int errBytesP = ec.lastError.getErrorBytesP();
            int errorLen = ec.lastError.getErrorBytesLength();
            ByteList _bytes = new ByteList(errBytes, errBytesP, errorLen - errBytesP);
            RubyString bytes = RubyString.newString(runtime, _bytes);
            if (Arrays.equals(ec.lastError.getSource(), "UTF-8".getBytes())) {
                // prepare dumped form
            }
            RubyString dumped = (RubyString)bytes.dump();

            if (Arrays.equals(ec.lastError.getSource(), ec.source) &&
                    Arrays.equals(ec.lastError.getDestination(), ec.destination)) {
                mesg = dumped + " from " + new String(ec.lastError.getSource()) + " to " + new String(ec.lastError.getDestination());
            } else {
                mesg = dumped + " to " + new String(ec.lastError.getDestination()) + " in conversion from " + new String(ec.source);
                for (int i = 0; i < ec.numTranscoders; i++) {
                    mesg += " to " + new String(ec.elements[i].transcoding.transcoder.getDestination());
                }
            }

            exc = runtime.newUndefinedConversionError(mesg);

            EncodingDB.Entry entry = runtime.getEncodingService().findEncodingOrAliasEntry(ec.lastError.getSource());
            if (entry != null) {
                bytes.setEncoding(entry.getEncoding());
                exc.getException().setInternalVariable("error_char", bytes);
            }

            return makeEConvExceptionSetEncs(exc, runtime, ec);
        }
        return null;
    }

    private static RaiseException makeEConvExceptionSetEncs(RaiseException exc, Ruby runtime, EConv ec) {
        exc.getException().setInternalVariable("source_encoding_name", RubyString.newString(runtime, ec.lastError.getSource()));
        exc.getException().setInternalVariable("destination_encoding_name", RubyString.newString(runtime, ec.lastError.getDestination()));

        EncodingDB.Entry entry = runtime.getEncodingService().findEncodingOrAliasEntry(ec.lastError.getSource());
        if (entry != null) {
            exc.getException().setInternalVariable("source_encoding", runtime.getEncodingService().convertEncodingToRubyEncoding(entry.getEncoding()));
        }
        entry = runtime.getEncodingService().findEncodingOrAliasEntry(ec.lastError.getDestination());
        if (entry != null) {
            exc.getException().setInternalVariable("destination_encoding", runtime.getEncodingService().convertEncodingToRubyEncoding(entry.getEncoding()));
        }

        return exc;
    }

    // more_output_buffer
    static void moreOutputBuffer(ByteList destination, ResizeFunction resizeDestination, int maxOutput, Ptr outStart, Ptr outPos, Ptr outStop) {
        int len = outPos.p - outStart.p;
        int newLen = (len + maxOutput) * 2;
        outStart.p = resizeDestination.resize(destination, len, newLen);
        outPos.p = outStart.p + len;
        outStop.p = outStart.p + newLen;
    }
   
    // io_set_encoding_by_bom
    public static void ioSetEncodingByBOM(ThreadContext context, RubyIO io) {
        Ruby runtime = context.runtime;
        Encoding bomEncoding = ioStripBOM(io);
       
        if (bomEncoding != null) {
            // FIXME: Wonky that we acquire RubyEncoding to pass these encodings through
            IRubyObject theBom = runtime.getEncodingService().getEncoding(bomEncoding);
            IRubyObject theInternal = io.internal_encoding(context);

            io.setEncoding(runtime.getCurrentContext(), theBom, theInternal, context.nil);
        }
    }
   
    // mri: io_strip_bom
    @Deprecated
    public static Encoding ioStripBOM(RubyIO io) {
        return ioStripBOM(io.getRuntime().getCurrentContext(), io);
    }
    public static Encoding ioStripBOM(ThreadContext context, RubyIO io) {
        IRubyObject b1, b2, b3, b4;

        if ((b1 = io.getbyte(context)).isNil()) return null;
        switch ((int)((RubyFixnum)b1).getLongValue()) {
            case 0xEF:
                if ((b2 = io.getbyte(context)).isNil()) break;
                if (((RubyFixnum)b2).getLongValue() == 0xBB && !(b3 = io.getbyte(context)).isNil()) {
                    if (((RubyFixnum)b3).getLongValue() == 0xBF) {
                        return UTF8Encoding.INSTANCE;
                    }
                    io.ungetbyte(context, b3);
                }
                io.ungetbyte(context, b2);
                break;
            case 0xFE:
                if ((b2 = io.getbyte(context)).isNil()) break;
                if (((RubyFixnum)b2).getLongValue() == 0xFF) {
                    return UTF16BEEncoding.INSTANCE;
                }
                io.ungetbyte(context, b2);
                break;
            case 0xFF:
                if ((b2 = io.getbyte(context)).isNil()) break;
                if (((RubyFixnum)b2).getLongValue() == 0xFE) {
                    b3 = io.getbyte(context);
                    if (((RubyFixnum)b3).getLongValue() == 0 && !(b4 = io.getbyte(context)).isNil()) {
                        if (((RubyFixnum)b4).getLongValue() == 0) {
                            return UTF32LEEncoding.INSTANCE;
                        }
                        io.ungetbyte(context, b4);
                    } else {
                        io.ungetbyte(context, b3);
                        return UTF16LEEncoding.INSTANCE;
                    }
                    io.ungetbyte(context, b3);
                }
                io.ungetbyte(context, b2);
                break;
            case 0:
                if ((b2 = io.getbyte(context)).isNil()) break;
                if (((RubyFixnum)b2).getLongValue() == 0 && !(b3 = io.getbyte(context)).isNil()) {
                    if (((RubyFixnum)b3).getLongValue() == 0xFE && !(b4 = io.getbyte(context)).isNil()) {
                        if (((RubyFixnum)b4).getLongValue() == 0xFF) {
                            return UTF32BEEncoding.INSTANCE;
                        }
                        io.ungetbyte(context, b4);
                    }
                    io.ungetbyte(context, b3);
                }
                io.ungetbyte(context, b2);
                break;
        }
        io.ungetbyte(context, b1);
        return null;
    }
   
    // validate_enc_binmode
    public static void validateEncodingBinmode(ThreadContext context, int[] fmode_p, int ecflags, IOEncodable ioEncodable) {
        Ruby runtime = context.runtime;
        int fmode = fmode_p[0];
       
        if ((fmode & OpenFile.READABLE) != 0 &&
                ioEncodable.getEnc2() == null &&
                (fmode & OpenFile.BINMODE) == 0 &&
                !(ioEncodable.getEnc() != null ? ioEncodable.getEnc() : runtime.getDefaultExternalEncoding()).isAsciiCompatible()) {
            throw runtime.newArgumentError("ASCII incompatible encoding needs binmode");
        }
       
        if ((fmode & OpenFile.BINMODE) == 0 && (EncodingUtils.DEFAULT_TEXTMODE != 0 || (ecflags & EConvFlags.NEWLINE_DECORATOR_MASK) != 0)) {
            fmode |= EncodingUtils.DEFAULT_TEXTMODE;
            fmode_p[0] = fmode;
        } else if (EncodingUtils.DEFAULT_TEXTMODE == 0 && (ecflags & EConvFlags.NEWLINE_DECORATOR_MASK) == 0) {
            fmode &= ~OpenFile.TEXTMODE;
            fmode_p[0] = fmode;
        }
    }
   
    // rb_enc_set_default_external
    public static void rbEncSetDefaultExternal(ThreadContext context, IRubyObject encoding) {
        if (encoding.isNil()) {
            throw context.runtime.newArgumentError("default external can not be nil");
        }
       
        Encoding[] enc_p = {context.runtime.getDefaultExternalEncoding()};
        encSetDefaultEncoding(context, enc_p, encoding, "external");
        context.runtime.setDefaultExternalEncoding(enc_p[0]);
    }
   
    // rb_enc_set_default_internal
    public static void rbEncSetDefaultInternal(ThreadContext context, IRubyObject encoding) {
        Encoding[] enc_p = {context.runtime.getDefaultInternalEncoding()};
        encSetDefaultEncoding(context, enc_p, encoding, "internal");
        context.runtime.setDefaultInternalEncoding(enc_p[0]);
    }
   
    // enc_set_default_encoding
    public static boolean encSetDefaultEncoding(ThreadContext context, Encoding[] def_p, IRubyObject encoding, String name) {
        boolean overridden = false;
       
        if (def_p != null) {
            overridden = true;
        }
       
        if (encoding.isNil()) {
            def_p[0] = null;
            // don't set back into encoding table since it defers to us
        } else {
            def_p[0] = rbToEncoding(context, encoding);
            // don't set back into encoding table since it defers to us
        }
       
        if (name.equals("external")) {
            // TODO: set filesystem encoding
        }
       
        return overridden;
    }
   
    // rb_default_external_encoding
    public static Encoding defaultExternalEncoding(Ruby runtime) {
        if (runtime.getDefaultExternalEncoding() != null) return runtime.getDefaultExternalEncoding();
       
        return runtime.getEncodingService().getLocaleEncoding();
    }

    // rb_str_buf_cat
    public static void  rbStrBufCat(Ruby runtime, RubyString str, ByteList ptr) {
        if (ptr.length() == 0) return;
        // negative length check here, we shouldn't need
        strBufCat(runtime, str, ptr);
    }
    public static void  rbStrBufCat(Ruby runtime, RubyString str, byte[] ptrBytes, int ptr, int len) {
        if (len == 0) return;
        // negative length check here, we shouldn't need
        strBufCat(runtime, str, ptrBytes, ptr, len);
    }
    public static void  rbStrBufCat(Ruby runtime, ByteList str, byte[] ptrBytes, int ptr, int len) {
        if (len == 0) return;
        // negative length check here, we shouldn't need
        strBufCat(str, ptrBytes, ptr, len);
    }

    // str_buf_cat
    public static void strBufCat(Ruby runtime, RubyString str, ByteList ptr) {
        strBufCat(runtime, str, ptr.getUnsafeBytes(), ptr.getBegin(), ptr.getRealSize());
    }
    public static void strBufCat(Ruby runtime, RubyString str, byte[] ptrBytes, int ptr, int len) {
        str.modify();
        strBufCat(str.getByteList(), ptrBytes, ptr, len);
    }
    public static void strBufCat(ByteList str, byte[] ptrBytes, int ptr, int len) {
        int total, off = -1;

        // termlen is not relevant since we have no termination sequence

        // missing: if ptr string is inside str, off = ptr start minus str start

//        str.modify();
        if (len == 0) return;

        // much logic is missing here, since we don't manually manage the ByteList buffer

        total = str.getRealSize() + len;
        str.ensure(total);
        str.append(ptrBytes, ptr, len);
    }

    // rb_enc_str_buf_cat
    public static void encStrBufCat(Ruby runtime, RubyString str, ByteList ptr, Encoding enc) {
        encCrStrBufCat(runtime, str, ptr.getUnsafeBytes(), ptr.getBegin(), ptr.getRealSize(),
                enc, StringSupport.CR_UNKNOWN, null);
    }
    public static void encStrBufCat(Ruby runtime, RubyString str, byte[] ptrBytes, int ptr, int len, Encoding enc) {
        encCrStrBufCat(runtime, str, ptrBytes, ptr, len,
                enc, StringSupport.CR_UNKNOWN, null);
    }

    // rb_enc_cr_str_buf_cat
    public static void encCrStrBufCat(Ruby runtime, RubyString str, ByteList ptr, Encoding ptrEnc, int ptr_cr, int[] ptr_cr_ret) {
        encCrStrBufCat(runtime, str, ptr.getUnsafeBytes(), ptr.getBegin(), ptr.getRealSize(), ptrEnc, ptr_cr, ptr_cr_ret);
    }
    public static void encCrStrBufCat(Ruby runtime, RubyString str, byte[] ptrBytes, int ptr, int len, Encoding ptrEnc, int ptr_cr, int[] ptr_cr_ret) {
        Encoding strEnc = str.getEncoding();
        Encoding resEnc;
        int str_cr, res_cr;
        boolean incompatible = false;

        str_cr = str.size() > 0 ? str.getCodeRange() : StringSupport.CR_7BIT;

        if (strEnc == ptrEnc) {
            if (str_cr == StringSupport.CR_UNKNOWN) {
                ptr_cr = StringSupport.CR_UNKNOWN;
            } else if (ptr_cr == StringSupport.CR_UNKNOWN) {
                ptr_cr = StringSupport.codeRangeScan(ptrEnc, ptrBytes, ptr, len);
            }
        } else {
            if (!EncodingUtils.encAsciicompat(strEnc) || !EncodingUtils.encAsciicompat(ptrEnc)) {
                if (len == 0) {
                    return;
                }
                if (str.size() == 0) {
                    rbStrBufCat(runtime, str, ptrBytes, ptr, len);
                    str.setEncodingAndCodeRange(ptrEnc, ptr_cr);
                    return;
                }
                incompatible = true;
            }
            if (!incompatible) {
                if (ptr_cr == StringSupport.CR_UNKNOWN) {
                    ptr_cr = StringSupport.codeRangeScan(ptrEnc, ptrBytes, ptr, len);
                }
                if (str_cr == StringSupport.CR_UNKNOWN) {
                    if (strEnc == ASCIIEncoding.INSTANCE || ptr_cr != StringSupport.CR_7BIT) {
                        str_cr = str.scanForCodeRange();
                    }
                }
            }
        }
        if (ptr_cr_ret != null) {
            ptr_cr_ret[0] = ptr_cr;
        }

        if (incompatible ||
                (strEnc != ptrEnc &&
                str_cr != StringSupport.CR_7BIT &&
                ptr_cr != StringSupport.CR_7BIT)) {
            throw runtime.newEncodingCompatibilityError("incompatible encodings: " + strEnc + " and " + ptrEnc);
        }

        if (str_cr == StringSupport.CR_UNKNOWN) {
            resEnc = strEnc;
            res_cr = StringSupport.CR_UNKNOWN;
        } else if (str_cr == StringSupport.CR_7BIT) {
            if (ptr_cr == StringSupport.CR_7BIT) {
                resEnc = strEnc;
                res_cr = StringSupport.CR_7BIT;
            } else {
                resEnc = ptrEnc;
                res_cr = ptr_cr;
            }
        } else if (str_cr == StringSupport.CR_VALID) {
            resEnc = strEnc;
            if (ptr_cr == StringSupport.CR_7BIT || ptr_cr == StringSupport.CR_VALID) {
                res_cr = str_cr;
            } else {
                res_cr = ptr_cr;
            }
        } else { // str_cr must be BROKEN at this point
            resEnc = strEnc;
            res_cr = str_cr;
            if (0 < len) res_cr = StringSupport.CR_UNKNOWN;
        }

        // MRI checks for len < 0 here, but I don't think that's possible for us

        strBufCat(runtime, str, ptrBytes, ptr, len);
        str.setEncodingAndCodeRange(resEnc, res_cr);
    }

    // econv_args
    public static void econvArgs(ThreadContext context, IRubyObject[] args, byte[][] encNames, Encoding[] encs, int[] ecflags_p, IRubyObject[] ecopts_p) {
        Ruby runtime = context.runtime;
        IRubyObject snamev = context.nil;
        IRubyObject dnamev = context.nil;
        IRubyObject flags = context.nil;
        IRubyObject opt = context.nil;

        // scan args logic
        {
            switch (args.length) {
                case 3:
                    flags = args[2];
                case 2:
                    dnamev = args[1];
                case 1:
                    snamev = args[0];
            }

            IRubyObject tmp;
            if (!(tmp = TypeConverter.checkHashType(runtime, flags)).isNil()) {
                opt = tmp;
                flags = context.nil;
            }
        }

        if (!flags.isNil()) {
            if (!opt.isNil()) {
                throw runtime.newArgumentError(args.length, 3);
            }
            ecflags_p[0] = (int)flags.convertToInteger().getLongValue();
            ecopts_p[0] = context.nil;
        } else if (!opt.isNil()) {
            ecflags_p[0] = EncodingUtils.econvPrepareOpts(context, opt, ecopts_p);
        } else {
            ecflags_p[0] = 0;
            ecopts_p[0] = context.nil;
        }

        encs[0] = runtime.getEncodingService().getEncodingFromObjectNoError(snamev);
        if (encs[0] == null) {
            snamev = snamev.convertToString();
        }
        encs[1] = runtime.getEncodingService().getEncodingFromObjectNoError(dnamev);
        if (encs[1] == null) {
            dnamev = dnamev.convertToString();
        }

        encNames[0] = encs[0] != null ? encs[0].getName() : ((RubyString)snamev).getBytes();
        encNames[1] = encs[1] != null ? encs[1].getName() : ((RubyString)dnamev).getBytes();

        return;
    }

    // rb_econv_init_by_convpath
    public static EConv econvInitByConvpath(ThreadContext context, IRubyObject convpath, byte[][] encNames, Encoding[] encs) {
        final Ruby runtime = context.runtime;
        final EConv ec = TranscoderDB.alloc(convpath.convertToArray().size());

        IRubyObject[] sname_v = {context.nil};
        IRubyObject[] dname_v = {context.nil};
        byte[][] sname = {null};
        byte[][] dname = {null};
        Encoding[] senc = {null};
        Encoding[] denc = {null};

        boolean first = true;

        for (int i = 0; i < ((RubyArray)convpath).size(); i++) {
            IRubyObject elt = ((RubyArray)convpath).eltOk(i);
            IRubyObject pair;
            if (!(pair = elt.checkArrayType()).isNil()) {
                if (((RubyArray)pair).size() != 2) {
                    throw context.runtime.newArgumentError("not a 2-element array in convpath");
                }
                sname_v[0] = ((RubyArray)pair).eltOk(0);
                encArg(context, sname_v[0], sname, senc);
                dname_v[0] = ((RubyArray)pair).eltOk(1);
                encArg(context, dname_v[0], dname, denc);
            } else {
                sname[0] = NULL_BYTE_ARRAY;
                dname[0] = elt.convertToString().getBytes();
            }
            if (DECORATOR_P(sname[0], dname[0])) {
                boolean ret = ec.addConverter(sname[0], dname[0], ec.numTranscoders);
                if (!ret) {
                    throw runtime.newArgumentError("decoration failed: " + new String(dname[0]));
                }
            } else {
                int j = ec.numTranscoders;
                final int[] arg = {j,0};
                int ret = TranscoderDB.searchPath(sname[0], dname[0], new TranscoderDB.SearchPathCallback() {
                    @Override
                    public void call(byte[] source, byte[] destination, int depth) {
                        if (arg[1] == -1) return;

                        arg[1] = ec.addConverter(source, destination, arg[0]) ? 0 : -1;
                    }
                });
                if (ret == -1 || arg[1] == -1) {
                    throw runtime.newArgumentError("adding conversion failed: " + new String(sname[0]) + " to " + new String(dname[0]));
                }
                if (first) {
                    first = false;
                    encs[0] = senc[0];
                    encNames[0] = ec.elements[j].transcoding.transcoder.getSource();
                }
                encs[1] = denc[0];
                encNames[1] = ec.elements[ec.numTranscoders - 1].transcoding.transcoder.getDestination();
            }
        }

        if (first) {
            encs[0] = null;
            encs[1] = null;
            encNames[0] = new byte[0];
            encNames[1] = new byte[0];
        }

        ec.source = encNames[0];
        ec.destination = encNames[0];

        return ec;
    }

    // decorate_convpath
    public static int decorateConvpath(ThreadContext context, IRubyObject convpath, int ecflags) {
        Ruby runtime = context.runtime;
        int num_decorators;
        byte[][] decorators = new byte[EConvFlags.MAX_ECFLAGS_DECORATORS][];
        int i;
        int n, len;

        num_decorators = TranscoderDB.decoratorNames(ecflags, decorators);
        if (num_decorators == -1)
            return -1;

        len = n = ((RubyArray)convpath).size();
        if (n != 0) {
            IRubyObject pair = ((RubyArray)convpath).eltOk(n - 1);
            if (pair instanceof RubyArray) {
                byte[] sname = runtime.getEncodingService().getEncodingFromObject(((RubyArray)pair).eltOk(0)).getName();
                byte[] dname = runtime.getEncodingService().getEncodingFromObject(((RubyArray)pair).eltOk(1)).getName();
                TranscoderDB.Entry entry = TranscoderDB.getEntry(sname, dname);
                Transcoder tr = entry.getTranscoder();
                if (tr == null)
                    return -1;
                if (!DECORATOR_P(tr.getSource(), tr.getDestination()) &&
                        tr.compatibility.isEncoder()) {
                    n--;
                    ((RubyArray)convpath).store(len + num_decorators - 1, pair);
                }
            } else {
                ((RubyArray)convpath).store(len + num_decorators - 1, pair);
            }
        }

        for (i = 0; i < num_decorators; i++)
            ((RubyArray)convpath).store(n + i, RubyString.newString(runtime, decorators[i]));

        return 0;
    }

    // io_enc_str
    public static IRubyObject ioEncStr(Ruby runtime, IRubyObject str, OpenFile fptr)
    {
        str.setTaint(true);
        ((RubyString)str).setEncoding(fptr.readEncoding(runtime));
        return str;
    }

    // rb_enc_uint_chr
    public static IRubyObject encUintChr(ThreadContext context, int code, Encoding enc) {
        Ruby runtime = context.runtime;

        if (!Character.isValidCodePoint(code)) {
            // inefficient to create a fixnum for this
            return new RubyFixnum(runtime, code).chr19(context);
        }

        char[] chars = Character.toChars(code);
        RubyString str = RubyString.newString(runtime, new String(chars), enc);
//        ByteList strByteList = str.getByteList();
//        if (StringSupport.preciseLength(enc, strByteList.unsafeBytes(), strByteList.getBegin(), strByteList.getBegin() + strByteList.getRealSize()) != n) {
//            rb_raise(rb_eRangeError, "invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
//        }
        return str;

    }

    // rb_enc_mbcput
    public static void encMbcput(int c, byte[] buf, int p, Encoding enc) {
        enc.codeToMbc(c, buf, p);
    }

    // rb_enc_codepoint_len
    public static int encCodepointLength(Ruby runtime, byte[] pBytes, int p, int e, int[] len_p, Encoding enc) {
        int r;
        if (e <= p)
            throw runtime.newArgumentError("empty string");
        r = StringSupport.preciseLength(enc, pBytes, p, e);
        if (!StringSupport.MBCLEN_CHARFOUND_P(r)) {
            throw runtime.newArgumentError("invalid byte sequence in " + enc);
        }
        if (len_p != null) len_p[0] = StringSupport.MBCLEN_CHARFOUND_LEN(r);
        return StringSupport.codePoint(runtime, enc, pBytes, p, e);
    }

    // MRI: str_compat_and_valid
    public static IRubyObject strCompatAndValid(ThreadContext context, IRubyObject _str, Encoding enc) {
        int cr;
        RubyString str = _str.convertToString();
        cr = str.scanForCodeRange();
        if (cr == StringSupport.CR_BROKEN) {
            throw context.runtime.newArgumentError("replacement must be valid byte sequence '" + str + "'");
        }
        else if (cr == StringSupport.CR_7BIT) {
            Encoding e = str.getEncoding();
            if (!enc.isAsciiCompatible()) {
                throw context.runtime.newEncodingCompatibilityError("incompatible character encodings: " + enc + " and " + e);
            }
        }
        else { /* ENC_CODERANGE_VALID */
            Encoding e = str.getEncoding();
            if (enc != e) {
                throw context.runtime.newEncodingCompatibilityError("incompatible character encodings: " + enc + " and " + e);
            }
        }
        return str;
    }
}
TOP

Related Classes of org.jruby.util.io.EncodingUtils$ResizeFunction

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.