Examples of com.ibm.icu.impl.Normalizer2Impl$UTF16Plus

com.ibm.icu.impl.Normalizer2Impl
Writable buffer that takes care of canonical ordering. Its Appendable methods behave like the C++ implementation's appendZeroCC() methods.
If dest is a StringBuilder, then the buffer writes directly to it. Otherwise, the buffer maintains a StringBuilder for intermediate text segments until no further changes are necessary and whole segments are appended. append() methods that take combining-class values always write to the StringBuilder. Other append() methods flush and append to the Appendable.

        }
        // first: b = NFKC(Fold(a))
        StringBuffer folded=new StringBuffer();
        int folded1Length=csp.toFullFolding(c, folded, 0);
        if(folded1Length<0) {
            Normalizer2Impl nfkcImpl=((Norm2AllModes.Normalizer2WithImpl)nfkc).impl;
            if(nfkcImpl.getCompQuickCheck(nfkcImpl.getNorm16(c))!=0) {
                return "";  // c does not change at all under CaseFolding+NFKC
            }
            folded.appendCodePoint(c);
        } else {
            if(folded1Length>UCaseProps.MAX_STRING_LENGTH) {

View Full Code Here

     */
    private static final int COMPARE_EQUIV=0x80000;


    /* internal function; package visibility for use by UTF16.StringComparator */
    /*package*/ static int cmpEquivFold(CharSequence cs1, CharSequence cs2, int options) {
        Normalizer2Impl nfcImpl;
        UCaseProps csp;


        /* current-level start/limit - s1/s2 as current */
        int s1, s2, limit1, limit2;


        /* decomposition and case folding variables */
        int length;


        /* stacks of previous-level start/current/limit */
        CmpEquivLevel[] stack1=null, stack2=null;


        /* buffers for algorithmic decompositions */
        String decomp1, decomp2;


        /* case folding buffers, only use current-level start/limit */
        StringBuffer fold1, fold2;


        /* track which is the current level per string */
        int level1, level2;


        /* current code units, and code points for lookups */
        int c1, c2, cp1, cp2;


        /* no argument error checking because this itself is not an API */


        /*
         * assume that at least one of the options _COMPARE_EQUIV and U_COMPARE_IGNORE_CASE is set
         * otherwise this function must behave exactly as uprv_strCompare()
         * not checking for that here makes testing this function easier
         */


        /* normalization/properties data loaded? */
        if((options&COMPARE_EQUIV)!=0) {
            nfcImpl=Norm2AllModes.getNFCInstance().impl;
        } else {
            nfcImpl=null;
        }
        if((options&COMPARE_IGNORE_CASE)!=0) {
            try {
                csp=UCaseProps.getSingleton();
            } catch(IOException e) {
                throw new RuntimeException(e);
            }
            fold1=new StringBuffer();
            fold2=new StringBuffer();
        } else {
            csp=null;
            fold1=fold2=null;
        }


        /* initialize */
        s1=0;
        limit1=cs1.length();
        s2=0;
        limit2=cs2.length();


        level1=level2=0;
        c1=c2=-1;


        /* comparison loop */
        for(;;) {
            /*
             * here a code unit value of -1 means "get another code unit"
             * below it will mean "this source is finished"
             */


            if(c1<0) {
                /* get next code unit from string 1, post-increment */
                for(;;) {
                    if(s1==limit1) {
                        if(level1==0) {
                            c1=-1;
                            break;
                        }
                    } else {
                        c1=cs1.charAt(s1++);
                        break;
                    }


                    /* reached end of level buffer, pop one level */
                    do {
                        --level1;
                        cs1=stack1[level1].cs;
                    } while(cs1==null);
                    s1=stack1[level1].s;
                    limit1=cs1.length();
                }
            }


            if(c2<0) {
                /* get next code unit from string 2, post-increment */
                for(;;) {
                    if(s2==limit2) {
                        if(level2==0) {
                            c2=-1;
                            break;
                        }
                    } else {
                        c2=cs2.charAt(s2++);
                        break;
                    }


                    /* reached end of level buffer, pop one level */
                    do {
                        --level2;
                        cs2=stack2[level2].cs;
                    } while(cs2==null);
                    s2=stack2[level2].s;
                    limit2=cs2.length();
                }
            }


            /*
             * compare c1 and c2
             * either variable c1, c2 is -1 only if the corresponding string is finished
             */
            if(c1==c2) {
                if(c1<0) {
                    return 0;   /* c1==c2==-1 indicating end of strings */
                }
                c1=c2=-1;       /* make us fetch new code units */
                continue;
            } else if(c1<0) {
                return -1;      /* string 1 ends before string 2 */
            } else if(c2<0) {
                return 1;       /* string 2 ends before string 1 */
            }
            /* c1!=c2 && c1>=0 && c2>=0 */


            /* get complete code points for c1, c2 for lookups if either is a surrogate */
            cp1=c1;
            if(UTF16.isSurrogate((char)c1)) {
                char c;


                if(Normalizer2Impl.UTF16Plus.isSurrogateLead(c1)) {
                    if(s1!=limit1 && Character.isLowSurrogate(c=cs1.charAt(s1))) {
                        /* advance ++s1; only below if cp1 decomposes/case-folds */
                        cp1=Character.toCodePoint((char)c1, c);
                    }
                } else /* isTrail(c1) */ {
                    if(0<=(s1-2) && Character.isHighSurrogate(c=cs1.charAt(s1-2))) {
                        cp1=Character.toCodePoint(c, (char)c1);
                    }
                }
            }


            cp2=c2;
            if(UTF16.isSurrogate((char)c2)) {
                char c;


                if(Normalizer2Impl.UTF16Plus.isSurrogateLead(c2)) {
                    if(s2!=limit2 && Character.isLowSurrogate(c=cs2.charAt(s2))) {
                        /* advance ++s2; only below if cp2 decomposes/case-folds */
                        cp2=Character.toCodePoint((char)c2, c);
                    }
                } else /* isTrail(c2) */ {
                    if(0<=(s2-2) && Character.isHighSurrogate(c=cs2.charAt(s2-2))) {
                        cp2=Character.toCodePoint(c, (char)c2);
                    }
                }
            }


            /*
             * go down one level for each string
             * continue with the main loop as soon as there is a real change
             */


            if( level1==0 && (options&COMPARE_IGNORE_CASE)!=0 &&
                (length=csp.toFullFolding(cp1, fold1, options))>=0
            ) {
                /* cp1 case-folds to the code point "length" or to p[length] */
                if(UTF16.isSurrogate((char)c1)) {
                    if(Normalizer2Impl.UTF16Plus.isSurrogateLead(c1)) {
                        /* advance beyond source surrogate pair if it case-folds */
                        ++s1;
                    } else /* isTrail(c1) */ {
                        /*
                         * we got a supplementary code point when hitting its trail surrogate,
                         * therefore the lead surrogate must have been the same as in the other string;
                         * compare this decomposition with the lead surrogate in the other string
                         * remember that this simulates bulk text replacement:
                         * the decomposition would replace the entire code point
                         */
                        --s2;
                        c2=cs2.charAt(s2-1);
                    }
                }


                /* push current level pointers */
                if(stack1==null) {
                    stack1=createCmpEquivLevelStack();
                }
                stack1[0].cs=cs1;
                stack1[0].s=s1;
                ++level1;


                /* copy the folding result to fold1[] */
                /* Java: the buffer was probably not empty, remove the old contents */
                if(length<=UCaseProps.MAX_STRING_LENGTH) {
                    fold1.delete(0, fold1.length()-length);
                } else {
                    fold1.setLength(0);
                    fold1.appendCodePoint(length);
                }


                /* set next level pointers to case folding */
                cs1=fold1;
                s1=0;
                limit1=fold1.length();


                /* get ready to read from decomposition, continue with loop */
                c1=-1;
                continue;
            }


            if( level2==0 && (options&COMPARE_IGNORE_CASE)!=0 &&
                (length=csp.toFullFolding(cp2, fold2, options))>=0
            ) {
                /* cp2 case-folds to the code point "length" or to p[length] */
                if(UTF16.isSurrogate((char)c2)) {
                    if(Normalizer2Impl.UTF16Plus.isSurrogateLead(c2)) {
                        /* advance beyond source surrogate pair if it case-folds */
                        ++s2;
                    } else /* isTrail(c2) */ {
                        /*
                         * we got a supplementary code point when hitting its trail surrogate,
                         * therefore the lead surrogate must have been the same as in the other string;
                         * compare this decomposition with the lead surrogate in the other string
                         * remember that this simulates bulk text replacement:
                         * the decomposition would replace the entire code point
                         */
                        --s1;
                        c1=cs1.charAt(s1-1);
                    }
                }


                /* push current level pointers */
                if(stack2==null) {
                    stack2=createCmpEquivLevelStack();
                }
                stack2[0].cs=cs2;
                stack2[0].s=s2;
                ++level2;


                /* copy the folding result to fold2[] */
                /* Java: the buffer was probably not empty, remove the old contents */
                if(length<=UCaseProps.MAX_STRING_LENGTH) {
                    fold2.delete(0, fold2.length()-length);
                } else {
                    fold2.setLength(0);
                    fold2.appendCodePoint(length);
                }


                /* set next level pointers to case folding */
                cs2=fold2;
                s2=0;
                limit2=fold2.length();


                /* get ready to read from decomposition, continue with loop */
                c2=-1;
                continue;
            }


            if( level1<2 && (options&COMPARE_EQUIV)!=0 &&
                (decomp1=nfcImpl.getDecomposition(cp1))!=null
            ) {
                /* cp1 decomposes into p[length] */
                if(UTF16.isSurrogate((char)c1)) {
                    if(Normalizer2Impl.UTF16Plus.isSurrogateLead(c1)) {
                        /* advance beyond source surrogate pair if it decomposes */
                        ++s1;
                    } else /* isTrail(c1) */ {
                        /*
                         * we got a supplementary code point when hitting its trail surrogate,
                         * therefore the lead surrogate must have been the same as in the other string;
                         * compare this decomposition with the lead surrogate in the other string
                         * remember that this simulates bulk text replacement:
                         * the decomposition would replace the entire code point
                         */
                        --s2;
                        c2=cs2.charAt(s2-1);
                    }
                }


                /* push current level pointers */
                if(stack1==null) {
                    stack1=createCmpEquivLevelStack();
                }
                stack1[level1].cs=cs1;
                stack1[level1].s=s1;
                ++level1;


                /* set empty intermediate level if skipped */
                if(level1<2) {
                    stack1[level1++].cs=null;
                }


                /* set next level pointers to decomposition */
                cs1=decomp1;
                s1=0;
                limit1=decomp1.length();


                /* get ready to read from decomposition, continue with loop */
                c1=-1;
                continue;
            }


            if( level2<2 && (options&COMPARE_EQUIV)!=0 &&
                (decomp2=nfcImpl.getDecomposition(cp2))!=null
            ) {
                /* cp2 decomposes into p[length] */
                if(UTF16.isSurrogate((char)c2)) {
                    if(Normalizer2Impl.UTF16Plus.isSurrogateLead(c2)) {
                        /* advance beyond source surrogate pair if it decomposes */

View Full Code Here

    public static int getCombiningClass(int ch)
    {
        if (ch < MIN_VALUE || ch > MAX_VALUE) {
            throw new IllegalArgumentException("Codepoint out of bounds");
        }
        Normalizer2Impl impl = Norm2AllModes.getNFCInstance().impl;
        return impl.getCC(impl.getNorm16(ch));
    }

View Full Code Here

        UCaseProps csp=UCaseProps.INSTANCE;
        // first: b = NFKC(Fold(a))
        StringBuilder folded=new StringBuilder();
        int folded1Length=csp.toFullFolding(c, folded, 0);
        if(folded1Length<0) {
            Normalizer2Impl nfkcImpl=((Norm2AllModes.Normalizer2WithImpl)nfkc).impl;
            if(nfkcImpl.getCompQuickCheck(nfkcImpl.getNorm16(c))!=0) {
                return "";  // c does not change at all under CaseFolding+NFKC
            }
            folded.appendCodePoint(c);
        } else {
            if(folded1Length>UCaseProps.MAX_STRING_LENGTH) {

View Full Code Here

     */
    private static final int COMPARE_EQUIV=0x80000;


    /* internal function; package visibility for use by UTF16.StringComparator */
    /*package*/ static int cmpEquivFold(CharSequence cs1, CharSequence cs2, int options) {
        Normalizer2Impl nfcImpl;
        UCaseProps csp;


        /* current-level start/limit - s1/s2 as current */
        int s1, s2, limit1, limit2;


        /* decomposition and case folding variables */
        int length;


        /* stacks of previous-level start/current/limit */
        CmpEquivLevel[] stack1=null, stack2=null;


        /* buffers for algorithmic decompositions */
        String decomp1, decomp2;


        /* case folding buffers, only use current-level start/limit */
        StringBuilder fold1, fold2;


        /* track which is the current level per string */
        int level1, level2;


        /* current code units, and code points for lookups */
        int c1, c2, cp1, cp2;


        /* no argument error checking because this itself is not an API */


        /*
         * assume that at least one of the options _COMPARE_EQUIV and U_COMPARE_IGNORE_CASE is set
         * otherwise this function must behave exactly as uprv_strCompare()
         * not checking for that here makes testing this function easier
         */


        /* normalization/properties data loaded? */
        if((options&COMPARE_EQUIV)!=0) {
            nfcImpl=Norm2AllModes.getNFCInstance().impl;
        } else {
            nfcImpl=null;
        }
        if((options&COMPARE_IGNORE_CASE)!=0) {
            csp=UCaseProps.INSTANCE;
            fold1=new StringBuilder();
            fold2=new StringBuilder();
        } else {
            csp=null;
            fold1=fold2=null;
        }


        /* initialize */
        s1=0;
        limit1=cs1.length();
        s2=0;
        limit2=cs2.length();


        level1=level2=0;
        c1=c2=-1;


        /* comparison loop */
        for(;;) {
            /*
             * here a code unit value of -1 means "get another code unit"
             * below it will mean "this source is finished"
             */


            if(c1<0) {
                /* get next code unit from string 1, post-increment */
                for(;;) {
                    if(s1==limit1) {
                        if(level1==0) {
                            c1=-1;
                            break;
                        }
                    } else {
                        c1=cs1.charAt(s1++);
                        break;
                    }


                    /* reached end of level buffer, pop one level */
                    do {
                        --level1;
                        cs1=stack1[level1].cs;
                    } while(cs1==null);
                    s1=stack1[level1].s;
                    limit1=cs1.length();
                }
            }


            if(c2<0) {
                /* get next code unit from string 2, post-increment */
                for(;;) {
                    if(s2==limit2) {
                        if(level2==0) {
                            c2=-1;
                            break;
                        }
                    } else {
                        c2=cs2.charAt(s2++);
                        break;
                    }


                    /* reached end of level buffer, pop one level */
                    do {
                        --level2;
                        cs2=stack2[level2].cs;
                    } while(cs2==null);
                    s2=stack2[level2].s;
                    limit2=cs2.length();
                }
            }


            /*
             * compare c1 and c2
             * either variable c1, c2 is -1 only if the corresponding string is finished
             */
            if(c1==c2) {
                if(c1<0) {
                    return 0;   /* c1==c2==-1 indicating end of strings */
                }
                c1=c2=-1;       /* make us fetch new code units */
                continue;
            } else if(c1<0) {
                return -1;      /* string 1 ends before string 2 */
            } else if(c2<0) {
                return 1;       /* string 2 ends before string 1 */
            }
            /* c1!=c2 && c1>=0 && c2>=0 */


            /* get complete code points for c1, c2 for lookups if either is a surrogate */
            cp1=c1;
            if(UTF16.isSurrogate((char)c1)) {
                char c;


                if(Normalizer2Impl.UTF16Plus.isSurrogateLead(c1)) {
                    if(s1!=limit1 && Character.isLowSurrogate(c=cs1.charAt(s1))) {
                        /* advance ++s1; only below if cp1 decomposes/case-folds */
                        cp1=Character.toCodePoint((char)c1, c);
                    }
                } else /* isTrail(c1) */ {
                    if(0<=(s1-2) && Character.isHighSurrogate(c=cs1.charAt(s1-2))) {
                        cp1=Character.toCodePoint(c, (char)c1);
                    }
                }
            }


            cp2=c2;
            if(UTF16.isSurrogate((char)c2)) {
                char c;


                if(Normalizer2Impl.UTF16Plus.isSurrogateLead(c2)) {
                    if(s2!=limit2 && Character.isLowSurrogate(c=cs2.charAt(s2))) {
                        /* advance ++s2; only below if cp2 decomposes/case-folds */
                        cp2=Character.toCodePoint((char)c2, c);
                    }
                } else /* isTrail(c2) */ {
                    if(0<=(s2-2) && Character.isHighSurrogate(c=cs2.charAt(s2-2))) {
                        cp2=Character.toCodePoint(c, (char)c2);
                    }
                }
            }


            /*
             * go down one level for each string
             * continue with the main loop as soon as there is a real change
             */


            if( level1==0 && (options&COMPARE_IGNORE_CASE)!=0 &&
                (length=csp.toFullFolding(cp1, fold1, options))>=0
            ) {
                /* cp1 case-folds to the code point "length" or to p[length] */
                if(UTF16.isSurrogate((char)c1)) {
                    if(Normalizer2Impl.UTF16Plus.isSurrogateLead(c1)) {
                        /* advance beyond source surrogate pair if it case-folds */
                        ++s1;
                    } else /* isTrail(c1) */ {
                        /*
                         * we got a supplementary code point when hitting its trail surrogate,
                         * therefore the lead surrogate must have been the same as in the other string;
                         * compare this decomposition with the lead surrogate in the other string
                         * remember that this simulates bulk text replacement:
                         * the decomposition would replace the entire code point
                         */
                        --s2;
                        c2=cs2.charAt(s2-1);
                    }
                }


                /* push current level pointers */
                if(stack1==null) {
                    stack1=createCmpEquivLevelStack();
                }
                stack1[0].cs=cs1;
                stack1[0].s=s1;
                ++level1;


                /* copy the folding result to fold1[] */
                /* Java: the buffer was probably not empty, remove the old contents */
                if(length<=UCaseProps.MAX_STRING_LENGTH) {
                    fold1.delete(0, fold1.length()-length);
                } else {
                    fold1.setLength(0);
                    fold1.appendCodePoint(length);
                }


                /* set next level pointers to case folding */
                cs1=fold1;
                s1=0;
                limit1=fold1.length();


                /* get ready to read from decomposition, continue with loop */
                c1=-1;
                continue;
            }


            if( level2==0 && (options&COMPARE_IGNORE_CASE)!=0 &&
                (length=csp.toFullFolding(cp2, fold2, options))>=0
            ) {
                /* cp2 case-folds to the code point "length" or to p[length] */
                if(UTF16.isSurrogate((char)c2)) {
                    if(Normalizer2Impl.UTF16Plus.isSurrogateLead(c2)) {
                        /* advance beyond source surrogate pair if it case-folds */
                        ++s2;
                    } else /* isTrail(c2) */ {
                        /*
                         * we got a supplementary code point when hitting its trail surrogate,
                         * therefore the lead surrogate must have been the same as in the other string;
                         * compare this decomposition with the lead surrogate in the other string
                         * remember that this simulates bulk text replacement:
                         * the decomposition would replace the entire code point
                         */
                        --s1;
                        c1=cs1.charAt(s1-1);
                    }
                }


                /* push current level pointers */
                if(stack2==null) {
                    stack2=createCmpEquivLevelStack();
                }
                stack2[0].cs=cs2;
                stack2[0].s=s2;
                ++level2;


                /* copy the folding result to fold2[] */
                /* Java: the buffer was probably not empty, remove the old contents */
                if(length<=UCaseProps.MAX_STRING_LENGTH) {
                    fold2.delete(0, fold2.length()-length);
                } else {
                    fold2.setLength(0);
                    fold2.appendCodePoint(length);
                }


                /* set next level pointers to case folding */
                cs2=fold2;
                s2=0;
                limit2=fold2.length();


                /* get ready to read from decomposition, continue with loop */
                c2=-1;
                continue;
            }


            if( level1<2 && (options&COMPARE_EQUIV)!=0 &&
                (decomp1=nfcImpl.getDecomposition(cp1))!=null
            ) {
                /* cp1 decomposes into p[length] */
                if(UTF16.isSurrogate((char)c1)) {
                    if(Normalizer2Impl.UTF16Plus.isSurrogateLead(c1)) {
                        /* advance beyond source surrogate pair if it decomposes */
                        ++s1;
                    } else /* isTrail(c1) */ {
                        /*
                         * we got a supplementary code point when hitting its trail surrogate,
                         * therefore the lead surrogate must have been the same as in the other string;
                         * compare this decomposition with the lead surrogate in the other string
                         * remember that this simulates bulk text replacement:
                         * the decomposition would replace the entire code point
                         */
                        --s2;
                        c2=cs2.charAt(s2-1);
                    }
                }


                /* push current level pointers */
                if(stack1==null) {
                    stack1=createCmpEquivLevelStack();
                }
                stack1[level1].cs=cs1;
                stack1[level1].s=s1;
                ++level1;


                /* set empty intermediate level if skipped */
                if(level1<2) {
                    stack1[level1++].cs=null;
                }


                /* set next level pointers to decomposition */
                cs1=decomp1;
                s1=0;
                limit1=decomp1.length();


                /* get ready to read from decomposition, continue with loop */
                c1=-1;
                continue;
            }


            if( level2<2 && (options&COMPARE_EQUIV)!=0 &&
                (decomp2=nfcImpl.getDecomposition(cp2))!=null
            ) {
                /* cp2 decomposes into p[length] */
                if(UTF16.isSurrogate((char)c2)) {
                    if(Normalizer2Impl.UTF16Plus.isSurrogateLead(c2)) {
                        /* advance beyond source surrogate pair if it decomposes */

View Full Code Here

TOP

Related Classes of com.ibm.icu.impl.Normalizer2Impl$UTF16Plus

com.ibm.icu.lang.UCharacter

com.ibm.icu.text.Normalizer

com.ibm.icu.text.UnicodeSet

java.io.IOException

java.io.BufferedInputStream

java.io.DataInputStream

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.