Package com.multysite.util

Source Code of com.multysite.util.StringHelper

package com.multysite.util;

import java.io.UnsupportedEncodingException;
import java.util.regex.Pattern;

import com.cybozu.labs.langdetect.Detector;
import com.cybozu.labs.langdetect.DetectorFactory;
import com.cybozu.labs.langdetect.LangDetectException;

public class StringHelper {

  private static boolean isLoadLanguageProfile = false;

  public static boolean checkRemove(String str) {
    boolean boo = false;
    str = str.toLowerCase();

    String remove = "theo,Nhưng,Vì,Có,Các,Vì thế,Chính vì thế,Thậm chí,Tuy nhiên,Vì vậy,Nhiều,Đừng,Tùy,Hiện nay,Tuỳ theo,Tuy vậy,sau,Nay,Thông,Thế mà,Bữa nay,Kế đến,Người,Chẳng,Trong,Chuyện,Không,Những,Chiếc,trước,source,chuyến,thường,nguồn,chúng";

    String[] listRemove = remove.split(",");

    for (int i = 0; i < listRemove.length; i++) {
      if (str.startsWith(listRemove[i].toLowerCase()))
        boo = true;
    }
    if (str.equals("Source")) {
      boo = true;
    }

    return boo;
  }

  public static String keyword(String str) {
    String string = "";
    str = str.trim();
    str = str.replaceAll("[ ]*[.]+[ ]*", "daucham");
    str = str.replaceAll("[ ]+", "-");
    str = str.replaceAll("[\\,\\=\\+.!@#$%^&*()_]+", ".-")
        .replace("“", ".").replace("”", ".");
    str = str.replaceAll("daucham", ".");

    String[] words = str.split("-");
    int count = 0;
    String check = "";
    String[] check_arr = new String[5];
    String s_word = "";
    String s_check = "";

    for (int i = 0; i < (words.length - 1); i++) {
      if (Pattern
          .matches(
              "^[A-ZĐÀÁÂÃÈÉÊÌÍÒÓÔÕÙÚÝĂĨŨƠƯẠẢẤẦẨẪẬẮẰẲẴẶẸẺẼẾỀỂỄỆỈỊỌỎỐỒỔỖỘỚỜỞỠỢỤỦỨỪỬỮỰ]+[\\w\\W]+",
              words[i])
          && count <= 4
          && i != 0
          && !Pattern.matches("^[^.]+[.]+[\\w\\W]*", words[i])) {
        String wk = "";
        if (i < (words.length - 2)
            && Pattern
                .matches(
                    "^[A-ZĐÀÁÂÃÈÉÊÌÍÒÓÔÕÙÚÝĂĨŨƠƯẠẢẤẦẨẪẬẮẰẲẴẶẸẺẼẾỀỂỄỆỈỊỌỎỐỒỔỖỘỚỜỞỠỢỤỦỨỪỬỮỰ]+[\\w\\W]+",
                    words[i + 1])
            && !Pattern
                .matches("^[^.]+[.]+[\\w\\W]+", words[i + 1])
            && !Pattern.matches("^[^.]+[.]+[\\w\\W]*", words[i])) {
          words[i] = words[i] + " " + words[i + 1];
          wk = words[i];
          i = i + 1;
        }
        if (i < (words.length - 2)
            && Pattern
                .matches(
                    "^[A-ZĐÀÁÂÃÈÉÊÌÍÒÓÔÕÙÚÝĂĨŨƠƯẠẢẤẦẨẪẬẮẰẲẴẶẸẺẼẾỀỂỄỆỈỊỌỎỐỒỔỖỘỚỜỞỠỢỤỦỨỪỬỮỰ]+[\\w\\W]+",
                    words[i + 1])
            && !Pattern
                .matches("^[^.]+[.]+[\\w\\W]+", words[i + 1])
            && !Pattern.matches("^[^.]+[.]+[\\w\\W]*", words[i])) {
          words[i] = words[i - 1] + " " + words[i + 1];
          wk = words[i];
          i = i + 1;
        }

        if (i < (words.length - 2)
            && Pattern
                .matches(
                    "^[A-ZĐÀÁÂÃÈÉÊÌÍÒÓÔÕÙÚÝĂĨŨƠƯẠẢẤẦẨẪẬẮẰẲẴẶẸẺẼẾỀỂỄỆỈỊỌỎỐỒỔỖỘỚỜỞỠỢỤỦỨỪỬỮỰ]+[\\w\\W]+",
                    words[i + 1])
            && !Pattern
                .matches("^[^.]+[.]+[\\w\\W]+", words[i + 1])
            && !Pattern.matches("^[^.]+[.]+[\\w\\W]*", words[i])) {
          words[i] = words[i - 1] + " " + words[i + 1];
          wk = words[i];
          i = i + 1;
        }

        check = "";
        if (!wk.equals(""))
          s_word = wk;
        else
          s_word = words[i];
        for (int j = 0; j < check_arr.length; j++) {
          if (check_arr[j] != null)
            s_check = check_arr[j].toLowerCase();
          if (s_word.toLowerCase().equals(s_check)) {
            check = "1";
          }
        }
        if (checkRemove(s_word))
          check = "1";

        if (check.equals("") && s_word.length() >= 5) {
          if (count == 0) {
            string += s_word
                .replaceAll("[\\,\\=\\+ .!@#$%^&*()_-]+", " ")
                .replace("“", "").trim();
          } else {
            string += ","
                + s_word.replaceAll(
                    "[\\,\\=\\+ .!@#$%^&*()_-]+", " ")
                    .replace("“", "").trim();
          }
          check_arr[count] = s_word;
          count++;

        }
      }
      if (count >= 6) {
        break;
      }
    }

    return string;
  }

  public static String replace(String str) {
    str = str.replaceAll("[\\'\\/]+", "");
    str = str
        .replaceAll(
            "[-\\!\"\\@\\#\\,\\.\\$%\\^&\\*\\(\\)_\\+\\=\\?\\;\\:\\~\\`\\{\\}\\[\\]\\|\\\\]+",
            "-");
    str = str.replaceAll(" ", "-");
    str = str.replaceAll("[-]+", "-");
    str = str.replaceAll("[\\W]+", "-");
    str = str.replaceAll("^-", "");
    str = str.replaceAll("-$", "");
    str = str.toLowerCase();
    return str;
  }

  public static String cleanContent(String str) {
    str = str.replaceAll("width[ ]*:[ ]*[0-9a-zA-Z ]+[;]*", "");
    return str;
  }

  public static String cutDescription(String str) {
    String des = "";
    str = str.replaceAll("\\<.*?\\>", "").trim();
    if (str.length() > 250) {
      des = str.substring(0, 250);
      des = des.replaceAll("[ ]+[^ ]*$", " ...");
    } else {
      des = str;
    }
    return des;
  }

  public static String replaceSpace(String str) {
    str = str.replaceAll("[\\'\\/]+", "");
    str = str
        .replaceAll(
            "[-\\!\"\\@\\#\\,\\.\\$%\\^&\\*\\(\\)_\\+\\=\\?\\;\\:\\~\\`\\{\\}\\[\\]\\|\\\\]+",
            "-");
    str = str.replaceAll(" ", "-");
    str = str.replaceAll("[-]+", "-");
    str = str.replaceAll("^-", "");
    str = str.replaceAll("-$", "");
    str = str.toLowerCase();
    return str;
  }

  public static String replaceVietnamese(String str) {
    String[] aArray = { "ấ", "ầ", "ẩ", "ẫ", "ậ", "Ấ", "Ầ", "Ẩ", "Ẫ", "Ậ",
        "ắ", "ằ", "ẳ", "ẵ", "ặ", "Ắ", "Ằ", "Ẳ", "Ẵ", "Ặ", "á", "à",
        "ả", "ã", "ạ", "â", "ă", "Á", "À", "Ả", "Ã", "Ạ", "Â", "Ă" };
    String[] eArray = { "ế", "ề", "ể", "ễ", "ệ", "Ế", "Ề", "Ể", "Ễ", "Ệ",
        "é", "è", "ẻ", "ẽ", "ẹ", "ê", "É", "È", "Ẻ", "Ẽ", "Ẹ", "Ê" };
    String[] iArray = { "í", "ì", "ỉ", "ĩ", "ị", "Í", "Ì", "Ỉ", "Ĩ", "Ị" };
    String[] oArray = { "ố", "ồ", "ổ", "ỗ", "ộ", "Ố", "Ồ", "Ổ", "Ô", "Ộ",
        "ớ", "ờ", "ở", "ỡ", "ợ", "Ớ", "Ờ", "Ở", "Ỡ", "Ợ", "ó", "ò",
        "ỏ", "õ", "ọ", "ô", "ơ", "Ó", "Ò", "Ỏ", "Õ", "Ọ", "Ô", "Ơ" };
    String[] uArray = { "ứ", "ừ", "ử", "ữ", "ự", "Ứ", "Ừ", "Ử", "Ữ", "Ự",
        "ú", "ù", "ủ", "ũ", "ụ", "ư", "Ú", "Ù", "Ủ", "Ũ", "Ụ", "Ư" };
    String[] yArray = { "ý", "ỳ", "ỷ", "ỹ", "ỵ", "Ý", "Ỳ", "Ỷ", "Ỹ", "Ỵ" };
    String[] dArray = { "đ", "Đ" };
    for (String item : aArray) {
      str = str.replaceAll(item, "a");
    }
    for (String item : eArray) {
      str = str.replaceAll(item, "e");
    }
    for (String item : iArray) {
      str = str.replaceAll(item, "i");
    }
    for (String item : oArray) {
      str = str.replaceAll(item, "o");
    }
    for (String item : uArray) {
      str = str.replaceAll(item, "u");
    }
    for (String item : yArray) {
      str = str.replaceAll(item, "y");
    }
    for (String item : dArray) {
      str = str.replaceAll(item, "d");
    }
    str = str.replaceAll("[\\'\\/]+", "");
    str = str
        .replaceAll(
            "[-\\!\"\\@\\#\\,\\.\\$%\\^&\\*\\(\\)_\\+\\=\\?\\;\\:\\~\\`\\{\\}\\[\\]\\|\\\\]+",
            "-");
    str = str.replaceAll(" ", "-");
    str = str.replaceAll("[-]+", "-");
    // str = str.replaceAll("[\\W]+", "-");
    str = str.replaceAll("^-", "");
    str = str.replaceAll("-$", "");
    str = str.toLowerCase();
    return str;

  }

  public static String tag(String str) {
    str = str.replaceAll("\\#", "Sharp");
    str = str.replaceAll("\\+", "Plus");
    str = str.replaceAll("[^.\\w]+", "-");
    str = str.replaceAll("^-", "");
    str = str.replaceAll("-$", "");
    str = str.toLowerCase();
    return str;
  }

  public static String remove(String str) {
    str = str.replaceAll("[\'\"]+", "&quot;");
    return str;
  }

  public static boolean StringIsNullOrEmpty(String string) {
    if (string == null || string.equals("")) {
      return true;
    }
    return false;
  }

  public static String getAliasByLanguage(String title) {
    String alias = "";
    String lang = "";
    try {
      if (!isLoadLanguageProfile) {
        DetectorFactory.loadProfile("language_detect");
        isLoadLanguageProfile = true;
      }
      Detector detector = DetectorFactory.create();
      detector.append(title);
      lang = detector.detect();
    } catch (LangDetectException e) {
      e.printStackTrace();
    }
    int type = 1;
    if (lang.equalsIgnoreCase("en")) {
      type = 1;
    }
    if (lang.equalsIgnoreCase("es")) {
      type = 1;
    }
    if (lang.equalsIgnoreCase("zh-cn")) {
      type = 2;
    }
    if (lang.equalsIgnoreCase("zh-tw")) {
      type = 2;
    }
    if (lang.equalsIgnoreCase("ko")) {
      type = 2;
    }
    if (lang.equalsIgnoreCase("ja")) {
      type = 2;
    }
    if (lang.equalsIgnoreCase("vi")) {
      type = 3;
    }
    if (type == 1) {
      alias = replace(title);
    } else if (type == 2) {
      alias = replaceSpace(title);
    } else if (type == 3) {
      alias = replaceVietnamese(title);
    }
    if (alias.length() < 8) {
      return title.replaceAll(" ", "-").toLowerCase();
    } else {
      return alias.toLowerCase();
    }
  }

  public static String getUTF8FromString(String input) {
    String result = "";
    try {
      result = new String(input.getBytes(("ISO-8859-1")), "UTF-8");
    } catch (UnsupportedEncodingException e) {
      e.printStackTrace();
    }
    return result;
  }
}
TOP

Related Classes of com.multysite.util.StringHelper

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.