/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.international.arabic.pipeline;

import edu.stanford.nlp.international.arabic.pipeline.Mapper;
import java.io.File;
import java.io.Serializable;
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class DefaultLexicalMapper
implements Mapper,
Serializable {
    private static final long serialVersionUID = -197782849766133026L;
    private static final Pattern utf8ArabicChart = Pattern.compile("[\u0600-\u06ff]");
    private static final String bwAlefChar = "A";
    private static final Pattern bwDiacritics = Pattern.compile("F|N|K|a|u|i|\\~|o");
    private static final Pattern bwTatweel = Pattern.compile("_");
    private static final Pattern bwAlef = Pattern.compile("\\{|\\||>|<");
    private static final Pattern bwQuran = Pattern.compile("`");
    private static final Pattern latinPunc = Pattern.compile("[!-/:-@\\u005B\\]^-`{-~\u00a1-\u00bf\u2010-\u2027\u2030-\u205e\u20a0-\u20b5]+");
    private static final Pattern arabicPunc = Pattern.compile("[\u0609-\u060d\u061b-\u061f\u066a\u066c-\u066d\u06d4]+");
    private static final Pattern utf8Diacritics = Pattern.compile("\u064e|\u064b|\u064f|\u064c|\u0650|\u064d|\u0651|\u0652");
    private static final Pattern utf8Tatweel = Pattern.compile("\u0640");
    private static final Pattern utf8Alef = Pattern.compile("\u0627|\u0625|\u0623|\u0622|\u0671");
    private static final Pattern utf8Quran = Pattern.compile("[\u0615-\u061a\u06d6-\u06e5]");
    private static final Pattern cliticMarker = Pattern.compile("^-|-$");
    private static final Pattern reservedSymbol = Pattern.compile("-[A-Z]+-");
    private static final Pattern hasNum = Pattern.compile("\\d+");
    private final Set<String> parentTagsToEscape = new HashSet<String>();

    public DefaultLexicalMapper() {
        this.parentTagsToEscape.add("PUNC");
        this.parentTagsToEscape.add("LATIN");
        this.parentTagsToEscape.add("-NONE-");
    }

    private static String mapUtf8(String element) {
        Matcher latinPuncOnly = latinPunc.matcher(element);
        Matcher arbPuncOnly = arabicPunc.matcher(element);
        if (latinPuncOnly.matches() || arbPuncOnly.matches()) {
            return element;
        }
        Matcher rmDiacritics = utf8Diacritics.matcher(element);
        if ((element = rmDiacritics.replaceAll("")).length() > 1) {
            Matcher rmTatweel = utf8Tatweel.matcher(element);
            element = rmTatweel.replaceAll("");
        }
        Matcher normAlef = utf8Alef.matcher(element);
        element = normAlef.replaceAll("\u0627");
        Matcher rmQuran = utf8Quran.matcher(element);
        if ((element = rmQuran.replaceAll("")).length() > 1) {
            Matcher rmCliticMarker = cliticMarker.matcher(element);
            element = rmCliticMarker.replaceAll("");
        }
        return element;
    }

    private static String mapBuckwalter(String element) {
        Matcher puncOnly = latinPunc.matcher(element);
        if (puncOnly.matches()) {
            return element;
        }
        Matcher rmDiacritics = bwDiacritics.matcher(element);
        if ((element = rmDiacritics.replaceAll("")).length() > 1) {
            Matcher rmTatweel = bwTatweel.matcher(element);
            element = rmTatweel.replaceAll("");
        }
        Matcher normAlef = bwAlef.matcher(element);
        element = normAlef.replaceAll(bwAlefChar);
        Matcher rmQuran = bwQuran.matcher(element);
        if ((element = rmQuran.replaceAll("")).length() > 1 && !reservedSymbol.matcher(element).matches()) {
            Matcher rmCliticMarker = cliticMarker.matcher(element);
            element = rmCliticMarker.replaceAll("");
        }
        return element;
    }

    public String map(String parent, String element) {
        String elem = element.trim();
        if (this.parentTagsToEscape.contains(parent)) {
            return elem;
        }
        Matcher utf8Encoding = utf8ArabicChart.matcher(elem);
        return utf8Encoding.find() ? DefaultLexicalMapper.mapUtf8(elem) : DefaultLexicalMapper.mapBuckwalter(elem);
    }

    public void setup(File path) {
    }

    public boolean canChangeEncoding(String parent, String element) {
        parent = parent.trim();
        element = element.trim();
        if (parent.contains("NUMERIC_COMMA") || parent.contains("PUNC") && element.equals("r")) {
            return true;
        }
        Matcher numMatcher = hasNum.matcher(element);
        return !numMatcher.find() && !this.parentTagsToEscape.contains(parent);
    }

    public static void main(String[] args) {
        DefaultLexicalMapper m = new DefaultLexicalMapper();
        System.out.printf("< :-> %s\n", m.map(null, "FNKqq"));
    }
}

