Package qurtext.factory

Source Code of qurtext.factory.SectionFactory

/* Copyright (C) Abu Rizal, 2009.
*
* This file is part of QurText.
*
* QurText is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* QurText is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with QurText. If not, see <http://www.gnu.org/licenses/>.
*/
package qurtext.factory;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.jdo.PersistenceManager;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;

import org.json.JSONException;
import org.json.JSONObject;
import org.xml.sax.InputSource;

import qurtext.domain.Chapter;
import qurtext.domain.Section;
import qurtext.domain.Verse;

public class SectionFactory {
  private static final String TRANSLATOR = "Free_Minds";
  static final HashMap<Character,Character> buckwalter = new HashMap<Character, Character>();
  static {
    buckwalter.put('\u0621','\'');
    buckwalter.put('\u0622','|');
    buckwalter.put('\u0623','>');
    buckwalter.put('\u0624','&');
    buckwalter.put('\u0625','<');
    buckwalter.put('\u0626','}');
    buckwalter.put('\u0627','A');
    buckwalter.put('\u0628','b');
    buckwalter.put('\u0629','p');
    buckwalter.put('\u062A','t');
    buckwalter.put('\u062B','v');
    buckwalter.put('\u062C','j');
    buckwalter.put('\u062D','H');
    buckwalter.put('\u062E','x');
    buckwalter.put('\u062F','d');
    buckwalter.put('\u0630','*');
    buckwalter.put('\u0631','r');
    buckwalter.put('\u0632','z');
    buckwalter.put('\u0633','s');
    buckwalter.put('\u0634','$');
    buckwalter.put('\u0635','S');
    buckwalter.put('\u0636','D');
    buckwalter.put('\u0637','T');
    buckwalter.put('\u0638','Z');
    buckwalter.put('\u0639','E');
    buckwalter.put('\u063A','g');
    buckwalter.put('\u0640','_');
    buckwalter.put('\u0641','f');
    buckwalter.put('\u0642','q');
    buckwalter.put('\u0643','k');
    buckwalter.put('\u0644','l');
    buckwalter.put('\u0645','m');
    buckwalter.put('\u0646','n');
    buckwalter.put('\u0647','h');
    buckwalter.put('\u0648','w');
    buckwalter.put('\u0649','Y');
    buckwalter.put('\u064A','y');
    buckwalter.put('\u064B','F');
    buckwalter.put('\u064C','N');
    buckwalter.put('\u064D','K');
    buckwalter.put('\u064E','a');
    buckwalter.put('\u064F','u');
    buckwalter.put('\u0650','i');
    buckwalter.put('\u0651','~');
    buckwalter.put('\u0652','o');
    buckwalter.put('\u0670','`');
    buckwalter.put('\u0671','{');
    buckwalter.put('\u067E','P');
    buckwalter.put('\u0686','J');
    buckwalter.put('\u06A4','V');
    buckwalter.put('\u06AF','G');
  };
  static final String BAMA_URL_STRING = "http://www.xrce.xerox.com/cgi-bin/mltt/arabic/xarabic.pl?showglosses=YES&showscript=NO&showqpart=YES&showimpv=YES&show3pfplv=YES&show2pfsgv=YES&show1psgv=YES&show1psgposs=YES&showallcase=YES&showallmood=YES&itranslitname=Buckwalter&input=";
    static final String GOOGLE_TRANSLATE_URL_STRING = "http://ajax.googleapis.com/ajax/services/language/translate?v=1.0&langpair=ar%7Cen&q=";
  private static final int WORD_COUNT_PER_REQUEST = 10;

  private SourceFactory sourceFactory;
  private TranslationParser translationParser;
 
  public SectionFactory() {
    sourceFactory=new SourceFactory();
  }

  public void initSectionVerses(int chapterNo, int verseNo) {
    translationParser= new TranslationParser("WEB-INF/quran.txt");
    PersistenceManager pm = PMF.get().getPersistenceManager();
    try {
      Section section = getSection(chapterNo, verseNo, pm);

      JSONObject verses = getYaqbVerse(section);
          Verse previousVerse=null;
          String previousTransliteration="";;
      for (int i=section.getStartVerse();i<=section.getEndVerse();i++) {
        JSONObject versions=verses.getJSONObject("" + section.getChapterNo() + ":" + i);
        String arabic = getTanzilText(section.getChapterNo(), i);
        String transliteration = versions.getString("Transliteration");
        transliteration=transliteration.replaceAll("[A][A]", "'");
        transliteration=transliteration.replaceAll("[<][b][>]", "<s>");
        transliteration=transliteration.replaceAll("[ ][<][/][b][>]", "</s>");
        transliteration=transliteration.replaceAll("[<][/][b][>]", "</s>");
        transliteration=transliteration.replaceAll("[<][s][t][r][o][n][g][>]", "<s>");
        transliteration=transliteration.replaceAll("[<][/][s][t][r][o][n][g][>]", "</s>");
        String token="" + section.getChapterNo() + ":" + i;
        String translation = versions.getString(TRANSLATOR);
        if ("Free_Minds".equals(TRANSLATOR)) {
          translation=translationParser.getTranslations().get(token).trim();
        }
        TreeSet<String> topicList = translationParser.getVerseTopics().get(token);
        String topics = flattenStringCollection(topicList);
        transliteration = repairShiftedTransliteration(arabic,
            transliteration);
        Verse verse=section.getVerse(chapterNo,i);
        if (null==verse) {
          verse=new Verse();
          section.addVerse(verse);
        }
        verse.setChapterNo(chapterNo);
        verse.setVerseNo(i);
        verse.setText(arabic);
        verse.setUthmani(getTanzilUthmani(section.getChapterNo(), i));
        verse.setTransliteration(transliteration);
        verse.setTranslation(translation);
        verse.setTopics(topics);
        pm.makePersistent(verse);
        if (previousTransliteration.equals(transliteration)) {//suspicious;
          repairTransliteration(verse);
          repairTransliteration(previousVerse);
        }
        previousTransliteration = transliteration;
        previousVerse=verse;
      }
    } catch (JSONException e) {
      e.printStackTrace();
    } finally {
      pm.close();
    }
  }

  private String flattenStringCollection(Collection<String> texts) {
    StringBuffer result=new StringBuffer();
    if (null!=texts)
      for (String text:texts) {
        result.append(text);
        result.append(',');
      }
    if (result.length()>0) result.setLength(result.length()-1);
    return result.toString();
  }

  private JSONObject getYaqbVerse(Section section) throws JSONException {
    StringBuffer message = new StringBuffer();
    for (int i=section.getStartVerse();i<=section.getEndVerse();i++) {
      message.append(section.getChapterNo());
      message.append(":");
      message.append(i);
      message.append(" ");
    }

//      message=URLEncoder.encode(message, "UTF-8");
    String urlPath = "http://www.yaqb.org/lookupReferences";
    String params = "lookup_input=" + message;
     // http://www.yaqb.org/lookupReferences?lookup_input=1:5+1:1 (Post operation)
    String result = sourceFactory.getSourceContent(urlPath, params, true, "yaqb="+message);
    JSONObject jsRoot = new JSONObject(result);
    JSONObject verses=jsRoot.getJSONObject("verses");
    return verses;
  }

  private String getTanzilText(int chapterNo, int verseNo) {
    String filename = "WEB-INF/quran-simple.xml";
    return getTanzil(chapterNo, verseNo, filename);
  }

  private String getTanzilUthmani(int chapterNo, int verseNo) {
    String filename = "WEB-INF/quran-simple-enhanced.xml";
    return getTanzil(chapterNo, verseNo, filename);
  }

  /**
   * @param chapterNo
   * @param verseNo
   * @param filename
   * @return
   */
  private String getTanzil(int chapterNo, int verseNo, String filename) {
    File xmlDocument =
        new File(filename);
    XPathFactory factory = new org.apache.xpath.jaxp.XPathFactoryImpl();
    XPath xPath=factory.newXPath();
    try {
      XPathExpression  xPathExpression=
          xPath.compile("/quran/sura[@index='"+chapterNo+"']/aya[@index='"+verseNo+"']/@text");
      InputSource inputSource =
          new InputSource(new
               FileInputStream(xmlDocument));
      return xPathExpression.evaluate(inputSource);
    } catch (XPathExpressionException e) {
      e.printStackTrace();
    } catch (FileNotFoundException e) {
      e.printStackTrace();
    }
    return null;
  }

  @SuppressWarnings("unchecked")
  private Section getSection(int chapterNo, int verseNo, PersistenceManager pm) {
    String query = "select from " + Section.class.getName() + " where chapterNo==" + chapterNo + " && startVerse==" + verseNo;
    Section section1 = ((Collection<Section>) pm.newQuery(query)
        .execute()).iterator().next();
    return section1;
  }

  public Section getSection(int chapterNo, int verseNo) {
    PersistenceManager pm = PMF.get().getPersistenceManager();
    try {
      return getSection(chapterNo,verseNo,pm);
    } catch (NoSuchElementException e) {
      return null;
    } finally {
      pm.close();
    }
  }

  private String unicodeToBuckwalter(String arabic) {
    StringBuffer result=new StringBuffer(arabic.length());
    for (char c:arabic.toCharArray()) {
      Character newC=buckwalter.get(c);
      if (null==newC) newC=c;
      result.append(newC);
    }
    return result.toString();
  }

  private void repairTransliteration(Verse verse) {
    if (verse.getChapterNo()==2 && verse.getVerseNo()==16) {
      verse.setTransliteration("Ol<u>a</u>-ika alla<u>th</u>eena ishtarawoo a<s>l</s><u>dd</u>al<u>a</u>lata bi<s>a</s>lhud<u>a</u> fam<u>a</u> rabi<u>h</u>at tij<u>a</u>ratuhum wam<u>a</u> k<u>a</u>noo muhtadeen<s>a</s>");
      return;
    } else if (verse.getChapterNo()==2 && verse.getVerseNo()==12) {
      verse.setTransliteration("Al<u>a</u> innahum humu almufsidoona wal<u>a</u>kin l<u>a</u> yash'uroon<s>a</s>");
      return;
    } else if (verse.getChapterNo()==7 && verse.getVerseNo()==45) {
      verse.setTransliteration("Alla<u>th</u>eena ya<u>s</u>uddoona 'an sabeeli All<u>a</u>hi wayabghoonah<u>a</u> 'iwajan wahum bi<s>a</s>l-<u>a</u>khirati k<u>a</u>firoon<s>a</s>");
      return;
    } else if (verse.getChapterNo()==12 && verse.getVerseNo()==12) {
      verse.setTransliteration("Arsilhu ma'an<u>a</u> ghadan yarta' wayal'ab wa-inn<u>a</u> lahu la<u>ha</u>fi<i><u>th</u></i>oon<s>a</s>");
      return;
    }
    verse.setTransliteration(markTransliteration(verse.getText(), verse.getTransliteration()));
  }

  private String repairShiftedTransliteration(String arabic,
      String transliteration) {
    int arabicLength = arabic.split("[ ]").length;
    int translitLength = transliteration.split("[ ]").length;
    if (translitLength==arabicLength) return transliteration;
    if (translitLength>arabicLength && transliteration.startsWith("Awa ")) {
      transliteration=transliteration.replaceFirst("[ ]", "");
    } else if (translitLength>arabicLength && transliteration.indexOf(" awa ")>=0) {
      transliteration=transliteration.replaceAll("[ ][a][w][a][ ]", " awa");
    } else if (translitLength>arabicLength && transliteration.indexOf("ayna m<u>a</u>")>=0) { //33:61
      transliteration=transliteration.replaceAll("[a][y][n][a][ ][m][<][u][>][a][<][/][u][>]", "aynam<u>a</u>");
    } else if (translitLength>arabicLength && transliteration.indexOf(" ba'da m<u>a</u> ")>=0) {
      transliteration=transliteration.replaceAll("[ ][b][a]['][d][a][ ][m][<][u][>][a][<][/][u][>][ ]", " ba'dam<u>a</u> ");
    } else if (translitLength>arabicLength && transliteration.indexOf(" likay l<u>a</u> ")>=0) {
      transliteration=transliteration.replaceAll("[ ][l][i][k][a][y][ ][l][<][u][>][a][<][/][u][>][ ]", " likayl<u>a</u> ");
    } else if (translitLength<arabicLength && transliteration.indexOf("aynam<u>a</u>")>=0) {
      transliteration=transliteration.replaceAll("[a][y][n][a][m][<][u][>][a][<][/][u][>]", "ayna m<u>a</u>");
    } else if (translitLength<arabicLength && transliteration.startsWith("H<u>a</u>antum")) {
      transliteration=transliteration.replaceAll("[H][<][u][>][a][<][/][u][>][a][n][t][u][m]", "H<u>a</u> antum");
    } else if (translitLength<arabicLength && transliteration.indexOf("feem<u>a</u>")>=0) {
      transliteration=transliteration.replaceAll("[f][e][e][m][<][u][>][a][<][/][u][>]", "fee m<u>a</u>");
    } else if (translitLength<arabicLength && transliteration.startsWith("Arsilhu")) { //12:11
      transliteration="Q<u>a</u>loo y<u>a</u> ab<u>a</u>n<u>a</u> m<u>a</u> laka l<u>a</u> ta/mann<u>a</u> 'al<u>a</u> yoosufa wa-inn<u>a</u> lahu lan<u>as</u>i<u>h</u>oon<s>a</s>";
    } else if (translitLength<arabicLength && transliteration.indexOf("yabnaomma")>=0) { //20:94
      transliteration=transliteration.replaceAll("[y][a][b][n][a][o][m][m][a]", "ya bna omma");
    } else if (translitLength<arabicLength && transliteration.indexOf("mimm<u>a</u>")>=0) { //30:28
      transliteration=transliteration.replaceAll("[m][i][m][m][<][u][>][a][<][/][u][>]", "min m<u>a</u>");
    } else if (translitLength<arabicLength && transliteration.indexOf("m<u>a</u>lee")>=0) { //40:41
      transliteration=transliteration.replaceAll("[m][<][u][>][a][<][/][u][>][l][e][e]", "m<u>a</u> lee");
    } else if (translitLength<arabicLength && transliteration.indexOf("amman")>=0) { //41:40
      transliteration=transliteration.replaceAll("[a][m][m][a][n]", "am man");
    } else if (translitLength<arabicLength && transliteration.startsWith("Waallawi")) { //72:16
      transliteration=transliteration.replaceAll("[W][a][a][l][l][a][w][i]", "Waa llawi");
    }
    return transliteration;
  }

  private String markTransliteration(String arabic, String transliteration) {
    int arabicLength = arabic.split("[ ]").length;
    if (arabicLength!=transliteration.split("[ ]").length) {
      transliteration = "";
      for (int j=0;j<arabicLength;j++) {
        transliteration+="TODO ";
      }
      transliteration+="TODO";
    }
    return transliteration;
  }

  @SuppressWarnings("unchecked")
  public List<Section> getAllSectionList() {
    PersistenceManager pm = PMF.get().getPersistenceManager();
    try {
      String query = "select from " + Section.class.getName() + " order by sectionNo";
      List<Section> resultList = new ArrayList<Section>();
      resultList.addAll((Collection<Section>) pm.newQuery(query)
          .execute());
      return resultList;
    } finally {
      pm.close();
    }
  }

  @SuppressWarnings("unchecked")
  public String initSectionLiterals(String start) {
    PersistenceManager pm = PMF.get().getPersistenceManager();
    try {
      String[] location=start.split("[:]");
      int chapterNo=Integer.valueOf(location[0]);
      int verseNo=Integer.valueOf(location[1]);
      int wordNo=Integer.valueOf(location[2]);
      Chapter chapter = ((Collection<Chapter>) pm.newQuery("select from " + Chapter.class.getName() + " where chapterNo==" + chapterNo)
          .execute()).iterator().next();
      for (Section section:chapter.getSections()) {
        if (section.getStartVerse()>verseNo || section.getEndVerse()<verseNo) continue;
        for (Verse verse:section.getVerses()) {
          if (verse.getVerseNo()!=verseNo) continue;
          String[] texts = verse.getText().replaceAll("[\u0640]", "").split("[ ]");
          int maxWordPerRequest=wordNo+WORD_COUNT_PER_REQUEST;
          while (wordNo<texts.length && wordNo<maxWordPerRequest) {
            if (wordNo==0) verse.setLiteral(unicodeToBuckwalter(verse.getText().replaceAll("[\u0640]", "")));
            String[] words=verse.getLiteral().trim().split("[ ]");
            String bamaResult=updateLiteralBama(words[wordNo]);
            if (null==bamaResult) {
              String googleResult=updateLiteralGoogle(texts[wordNo]);
              if (null!=googleResult) {
                words[wordNo]=googleResult;
              }
            } else {
              if ( ! "SKIP".equals(bamaResult))
                words[wordNo]=bamaResult;
            }
            StringBuffer result=new StringBuffer();
            for (String word:words) {
              result.append(word);
              result.append(" ");
            }
            verse.setLiteral(result.toString().trim());
            wordNo++;
          }
          if (wordNo>=texts.length) {
            wordNo=0;
            verseNo++;
          }
          if (verseNo>section.getEndVerse()) return null;
          return ""+chapterNo+":"+verseNo+":"+wordNo;
        }
      }
    } finally {
      pm.close();
    }
    return null;
  }

  private String updateLiteralGoogle(String text) {
    String result=null;
        JSONObject jsRoot;
    try {
      String[] urlParts = (GOOGLE_TRANSLATE_URL_STRING + URLEncoder.encode(text,"UTF-8")).split("[?]");
      result=sourceFactory.getSourceContent(urlParts[0], urlParts[1],false, "google="+text);
      jsRoot = new JSONObject(result);
          JSONObject responseData=jsRoot.getJSONObject("responseData");
          result=responseData.getString("translatedText");
          if (result==null) return null;
          if ("".equals(result)) return null;
          result=result.replaceAll("[ ]", "-");
    } catch (JSONException e) {
      return null;
    } catch (UnsupportedEncodingException e) {
      return null;
    }

    return result;
  }

  //  static private final Pattern p = Pattern.compile("[<][T][R][ ][V][A][L][I][G][N][=][\"][t][o][p][\"][ ][A][L][I][G][N][=][\"][l][e][f][t][\"][>][<][T][D][>].*[<][/][T][D][>][<][/][T][R][>]");
  static private final Pattern p = Pattern.compile("[<][T][A][B][L][E].*[<][/][T][A][B][L][E][>]");

  //Input Word:</FONT>&nbsp;&nbsp;AloHamodu<HR>
  //<TR VALIGN="top" ALIGN="left"><TD>the</TD><TD>commendation<BR>praise</TD></TR>
  //<FONT SIZE=4>Solution 2:</FONT>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Aalr~aHom`n<PRE>{al-raHom`n Funcwa</PRE>
  private String updateLiteralBama(String buckString) {
    String[] urlParts = (BAMA_URL_STRING+buckString.replaceAll("&","%26")).split("[?]");
    String result=sourceFactory.getSourceContent(urlParts[0], urlParts[1],false, "bama="+buckString);
    Matcher m = p.matcher(result);
    if (m.find()) {
        result = m.group(0);
        result=result.replaceAll("[<][/][T][D][>][<][T][D][>]", "-");
        result=result.replaceAll("[<][B][R][>]", "/");
        result=result.replaceAll("[<F][O][N][T][ ][S][I][Z][E][=][4][>][S][o][l][u][t][i][o][n][ ].*?[<][/][P][R][E][>]", "");
        result=result.replaceAll("\\<.*?\\>", "");
        result=result.replaceAll("[ ]", "-");
    } else {
      return null;
    }
    if (result.length()>300) return "SKIP";
    return result;
  }
}
TOP

Related Classes of qurtext.factory.SectionFactory

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.