Package com.foundationdb.server.collation

Source Code of com.foundationdb.server.collation.CollationSpecifier

/**
* Copyright (C) 2009-2014 FoundationDB, LLC
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

package com.foundationdb.server.collation;

import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Map.Entry;

import com.foundationdb.server.error.AmbiguousCollationException;
import com.foundationdb.server.error.InvalidCollationKeywordException;
import com.foundationdb.server.error.InvalidCollationSchemeException;
import com.foundationdb.server.error.UnsupportedCollationException;
import com.ibm.icu.text.Collator;
import com.ibm.icu.text.RuleBasedCollator;
import com.ibm.icu.util.ULocale;

public class CollationSpecifier {

    // Only the region needs to be checked, for ambiguity
    private final static int REGION_NDX = 1;

    private final static String CASE_SENSITIVE = "cs";
    private final static String CASE_INSENSITIVE = "ci";
    private final static String ACCENT_SENSITIVE = "co";
    private final static String ACCENT_INSENSITIVE = "cx";

    private final static String DEFAULT_CASE = CASE_SENSITIVE;
    private final static String DEFAULT_ACCENT = ACCENT_SENSITIVE;

    // Used to check the validity of requested locales
    private final static HashSet<ULocale> locales = new HashSet<ULocale>(Arrays.asList(ULocale.getAvailableLocales()));

    private final String scheme; // the original, user-created scheme; use toString() instead for a standardized version
    private final String locale;
    private final boolean caseSensitive;
    private final boolean accentSensitive;
    private final HashMap<String, String> keywordsToValues = new HashMap<String, String>();

    public CollationSpecifier(String scheme) {
        this.scheme = scheme;
        String[] pieces = scheme.toLowerCase().split("_");

        StringBuilder localeBuilder = new StringBuilder();
        boolean localeStarted = false;
        boolean localeFinished = false;
        boolean caseSet = false;
        boolean accentSet = false;
        boolean caseSensitive = false;
        boolean accentSensitive = false;
        for (int i = 0; i < pieces.length; i++) {
            if (pieces[i].contains("=")) {
                addKeyword(pieces[i], scheme);
                localeFinished = true;
            }
            else if (isCaseShortcut(pieces[i]) || (isAccentShortcut(pieces[i]))) {
                if (i == REGION_NDX) {
                    if (localeStarted) localeBuilder.append("_");
                    localeStarted = true;
                    localeBuilder.append(pieces[i]);
                } else if (isCaseShortcut(pieces[i])){
                    if (caseSet) {
                        throw new InvalidCollationSchemeException(scheme, "can't set the case sensitivity twice");
                    }
                    caseSensitive = CASE_SENSITIVE.equalsIgnoreCase(pieces[i]);
                    localeFinished = true;
                    caseSet = true;
                } else {
                    if (accentSet) {
                        throw new InvalidCollationSchemeException(scheme, "can't set the accent sensitivity twice");
                    }
                    accentSensitive = ACCENT_SENSITIVE.equalsIgnoreCase(pieces[i]);
                    localeFinished = true;
                    accentSet = true;
                }
            }
            else if (localeFinished) {
                throw new InvalidCollationSchemeException(scheme, "can't define locale after keywords or shortcuts");
            } else {
                if (localeStarted) localeBuilder.append("_");
                localeStarted = true;
                localeBuilder.append(pieces[i]);
            }
        }

        // if the locale is just a language, need to append an underscore
        // to avoid ambiguity in toString()
        if (localeBuilder.indexOf("_") == -1) {
            localeBuilder.append("_");
        }
        locale = localeBuilder.toString();

        checkKeywordsAndShortcuts(caseSet, accentSet);
        checkAmbiguous(pieces, caseSensitive, accentSensitive, caseSet, accentSet);

        if (caseSet) {
            this.caseSensitive = caseSensitive;
        } else {
            this.caseSensitive = CASE_SENSITIVE.equalsIgnoreCase(DEFAULT_CASE);
        }
        if (accentSet) {
            this.accentSensitive = accentSensitive;
        } else {
            this. accentSensitive = ACCENT_SENSITIVE.equalsIgnoreCase(DEFAULT_ACCENT);
        }
    }

    private void checkKeywordsAndShortcuts(boolean caseSet, boolean accentSet) {
        if (!keywordsToValues.isEmpty() && (caseSet || accentSet)) {
            throw new InvalidCollationSchemeException(scheme, "can't include both keywords and case/accent shortcuts");
        }
    }

    private void checkAmbiguous(String[] pieces, boolean caseSensitive, boolean accentSensitive,
            boolean caseSet, boolean accentSet) {
        if (pieces.length < REGION_NDX + 1) return;
        if ((isCaseShortcut(pieces[REGION_NDX]) && !caseSet) ||
                (isAccentShortcut(pieces[REGION_NDX]) && !accentSet)) {
            String providedCase = !caseSet ? DEFAULT_CASE
                                           : caseSensitive ? CASE_SENSITIVE : CASE_INSENSITIVE;
            String providedAccent = !accentSet ? DEFAULT_ACCENT
                                               : accentSensitive ? ACCENT_SENSITIVE: ACCENT_INSENSITIVE;

            String possibility1case = isCaseShortcut(pieces[REGION_NDX]) ? pieces[REGION_NDX] : providedCase;
            String possibility1accent = isAccentShortcut(pieces[REGION_NDX]) ? pieces[REGION_NDX] : providedAccent;
            String possibility1 = new StringBuilder().append(locale.replace(pieces[REGION_NDX], ""))
                                                     .append("_")
                                                     .append(possibility1case)
                                                     .append("_")
                                                     .append(possibility1accent)
                                                     .toString();
            String possibility2 = new StringBuilder().append(locale)
                                                     .append("_")
                                                     .append(providedCase)
                                                     .append("_")
                                                     .append(providedAccent)
                                                     .toString();
            throw new AmbiguousCollationException(scheme, possibility1, possibility2);
        }
    }

    public RuleBasedCollator createCollator() {
        ULocale ulocale = new ULocale(locale);
        checkLocale(ulocale, scheme);
        ulocale = setKeywords(ulocale, keywordsToValues);

        RuleBasedCollator collator = (RuleBasedCollator) RuleBasedCollator.getInstance(ulocale);
        checkKeywords(collator.getLocale(ULocale.VALID_LOCALE), keywordsToValues,
                scheme);

        if (shouldSetStrength()) {
            setCollatorStrength(collator, this);
        }
       
        return collator;
    }

    private static void checkKeywords(ULocale locale, Map<String, String> keywordsToValues, String scheme) {
        for (Entry<String, String> entry : keywordsToValues.entrySet()) {
            if (locale.getKeywordValue(entry.getKey()) == null ||
                    !locale.getKeywordValue(entry.getKey()).equalsIgnoreCase(entry.getValue())) {
                throw new InvalidCollationKeywordException(scheme, entry.getKey(), entry.getValue());
            }
        }
    }

    private static void setCollatorStrength(RuleBasedCollator collator, CollationSpecifier specifier) {
        if (specifier.caseSensitive() && specifier.accentSensitive()) {
            collator.setStrength(Collator.TERTIARY);
            collator.setCaseLevel(false);
        }
        else if (specifier.caseSensitive() && !specifier.accentSensitive()) {
            collator.setCaseLevel(true);
            collator.setStrength(Collator.PRIMARY);
        }
        else if (!specifier.caseSensitive() && specifier.accentSensitive()) {
            collator.setStrength(Collator.SECONDARY);
            collator.setCaseLevel(false);
        }
        else {
            collator.setStrength(Collator.PRIMARY);
            collator.setCaseLevel(false);
        }
    }

    private static ULocale setKeywords(ULocale locale, Map<String, String> keywordsToValues) {
        for (Entry<String, String> entry : keywordsToValues.entrySet()) {
            locale = locale.setKeywordValue(entry.getKey(), entry.getValue());
        }
        return locale;
    }

    private static void checkLocale(ULocale locale, String scheme) {
        if (!locales.contains(locale))
            throw new UnsupportedCollationException(scheme);
    }

    private static Boolean isCaseShortcut(String caseOrNot) {
        return caseOrNot.equalsIgnoreCase(CASE_INSENSITIVE) ||
               caseOrNot.equalsIgnoreCase(CASE_SENSITIVE);
    }

    private static Boolean isAccentShortcut(String accentOrNot) {
        return accentOrNot.equalsIgnoreCase(ACCENT_INSENSITIVE) ||
               accentOrNot.equalsIgnoreCase(ACCENT_SENSITIVE);
    }

    private void addKeyword(String keywordAndValue, String scheme) {
        String[] pieces = keywordAndValue.split("=");
        if (pieces.length != 2) {
            throw new InvalidCollationSchemeException(scheme, "keywords and values must be of the form `keyword=value`");
        }
        keywordsToValues.put(pieces[0], pieces[1]);
    }

    public boolean caseSensitive() {
        return caseSensitive;
    }

    public boolean accentSensitive() {
        return accentSensitive;
    }

    public HashMap<String, String> getKeywordsAndValues() {
        return keywordsToValues;
    }

    public Boolean shouldSetStrength() {
        return keywordsToValues.isEmpty();
    }

    public String toString() {
        StringBuilder builder = new StringBuilder().append(locale);
        if (!keywordsToValues.isEmpty()) {
            for (Entry<String, String> entry : keywordsToValues.entrySet()) {
                builder.append("_")
                       .append(entry.getKey())
                       .append("=")
                       .append(entry.getValue());
            }
        }
        else {
            builder.append("_")
                   .append(caseSensitive ? CASE_SENSITIVE : CASE_INSENSITIVE)
                   .append("_")
                   .append(accentSensitive ? ACCENT_SENSITIVE : ACCENT_INSENSITIVE);
        }
        return builder.toString();
    }
}
TOP

Related Classes of com.foundationdb.server.collation.CollationSpecifier

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.