package eus.ixa.ixa.pipe.tok;

import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:eus/ixa/ixa/pipe/tok/Normalizer.class */
public class Normalizer {
    public static final String THREE_DOTS = "...";
    public static final Pattern ellipsis = Pattern.compile("[…舰]");
    public static final Pattern longDash = Pattern.compile("–|[—舒]");
    public static final Pattern oneFourth = Pattern.compile("¼");
    public static final Pattern oneThird = Pattern.compile("⅓");
    public static final Pattern oneHalf = Pattern.compile("½");
    public static final Pattern twoThirds = Pattern.compile("⅔");
    public static final Pattern threeQuarters = Pattern.compile("¾");
    private static final Pattern cents = Pattern.compile("¢");
    private static final Pattern sterling = Pattern.compile("£");
    public static final Pattern apostrophe = Pattern.compile("[''\u0092’]");
    public static final Pattern leftSingleQuote = Pattern.compile("[\u0091‛‘‹]");
    public static final Pattern rightSingleQuote = Pattern.compile("['\u0092›’]");
    public static final Pattern leftDoubleQuote = Pattern.compile("[«\u0093“]");
    public static final Pattern rightDoubleQuote = Pattern.compile("[»\u0094”]");
    public static final Pattern singleAsciiQuote = Pattern.compile("'|'");
    public static final Pattern invertSingleAsciiQuote = Pattern.compile("([\\p{Alpha}])([^\\p{Space}])", 256);
    public static final Pattern doubleAsciiQuote = Pattern.compile("\"");
    public static final Pattern doubleAsciiQuoteAlphaNumeric = Pattern.compile("([\\p{Alpha}\\p{Digit}$])", 256);
    public static final String TO_ASCII_SINGLE_QUOTE = "['\u0091\u0092’‚‛›‘‹]";
    public static final Pattern toAsciiSingleQuote = Pattern.compile(TO_ASCII_SINGLE_QUOTE);
    public static final Pattern toAsciiDoubleQuote = Pattern.compile("[«»\u0093\u0094“”„\"]");

    private Normalizer() {
    }

    public static void convertNonCanonicalStrings(List<Token> list, String str) {
        for (Token token : list) {
            token.setTokenValue(apostrophe.matcher(token.getTokenValue()).replaceAll("'"));
            token.setTokenValue(ellipsis.matcher(token.getTokenValue()).replaceAll(THREE_DOTS));
            token.setTokenValue(longDash.matcher(token.getTokenValue()).replaceAll("--"));
            if (str.equalsIgnoreCase("en")) {
                token.setTokenValue(oneFourth.matcher(token.getTokenValue()).replaceAll("1\\\\/4"));
                token.setTokenValue(oneThird.matcher(token.getTokenValue()).replaceAll("1\\\\/3"));
                token.setTokenValue(oneHalf.matcher(token.getTokenValue()).replaceAll("1\\\\/2"));
                token.setTokenValue(threeQuarters.matcher(token.getTokenValue()).replaceAll("3\\\\/4"));
                token.setTokenValue(sterling.matcher(token.getTokenValue()).replaceAll("#"));
            }
            token.setTokenValue(oneFourth.matcher(token.getTokenValue()).replaceAll("1/4"));
            token.setTokenValue(oneThird.matcher(token.getTokenValue()).replaceAll("1/3"));
            token.setTokenValue(oneHalf.matcher(token.getTokenValue()).replaceAll("1/2"));
            token.setTokenValue(twoThirds.matcher(token.getTokenValue()).replaceAll("2/3"));
            token.setTokenValue(threeQuarters.matcher(token.getTokenValue()).replaceAll("3/4"));
            token.setTokenValue(cents.matcher(token.getTokenValue()).replaceAll("cents"));
        }
    }

    public static void normalizeQuotes(List<Token> list, String str) {
        for (Token token : list) {
            if (str.equalsIgnoreCase("en")) {
                token.setTokenValue(leftSingleQuote.matcher(token.getTokenValue()).replaceAll("`"));
                token.setTokenValue(rightSingleQuote.matcher(token.getTokenValue()).replaceAll("'"));
                token.setTokenValue(leftDoubleQuote.matcher(token.getTokenValue()).replaceAll("``"));
                token.setTokenValue(rightDoubleQuote.matcher(token.getTokenValue()).replaceAll("''"));
            } else if (str.equalsIgnoreCase("de") || str.equalsIgnoreCase("es") || str.equalsIgnoreCase("eu") || str.equalsIgnoreCase("fr") || str.equalsIgnoreCase("gl") || str.equalsIgnoreCase("it") || str.equalsIgnoreCase("nl")) {
                token.setTokenValue(toAsciiSingleQuote.matcher(token.getTokenValue()).replaceAll("'"));
                token.setTokenValue(toAsciiDoubleQuote.matcher(token.getTokenValue()).replaceAll("\""));
            }
        }
    }

    public static void normalizeDoubleQuotes(List<Token> list, String str) {
        boolean z = true;
        for (int i = 0; i < list.size(); i++) {
            if (str.equalsIgnoreCase("en")) {
                Matcher matcher = doubleAsciiQuote.matcher(list.get(i).getTokenValue());
                Matcher matcher2 = singleAsciiQuote.matcher(list.get(i).getTokenValue());
                if (matcher.find()) {
                    if (z && i < list.size() - 1 && doubleAsciiQuoteAlphaNumeric.matcher(list.get(i + 1).getTokenValue()).find()) {
                        list.get(i).setTokenValue("``");
                        z = false;
                    } else if (!z) {
                        list.get(i).setTokenValue("''");
                        z = true;
                    }
                } else if (matcher2.find() && i < list.size() - 2 && list.get(i + 1).getTokenValue().matches("[A-Za-z]") && list.get(i + 2).getTokenValue().matches("[^ \t\n\r ¶]")) {
                    list.get(i).setTokenValue("`");
                }
            }
        }
    }
}
