package org.mycore.mods.merger;

import java.text.Normalizer;
import java.util.Locale;
import org.apache.commons.lang3.StringUtils;

/* loaded from: input_file:org/mycore/mods/merger/MCRTextNormalizer.class */
public class MCRTextNormalizer {
    private static final char LATIN_SMALL_LETTER_SHARP_S = 223;
    private static final char LATIN_SMALL_LIGATURE_FF = 64256;
    private static final char LATIN_SMALL_LIGATURE_FFI = 64259;
    private static final char LATIN_SMALL_LIGATURE_FFL = 64260;
    private static final char LATIN_SMALL_LIGATURE_FI = 64257;
    private static final char LATIN_SMALL_LIGATURE_FL = 64258;
    private static final char LATIN_SMALL_LIGATURE_IJ = 307;
    private static final char LATIN_SMALL_LIGATURE_ST = 64262;
    private static final char LATIN_SMALL_LIGATURE_LONG_ST = 64261;

    public String normalize(String str) {
        return normalizeText(str);
    }

    public static String normalizeText(String str) {
        return StringUtils.stripAccents(Normalizer.normalize(MCRHyphenNormalizer.normalizeHyphen(str.toLowerCase(Locale.getDefault()), ' '), Normalizer.Form.NFD)).replace(Character.toString((char) 223), "s").replace("ss", "s").replace(Character.toString((char) 64256), "ff").replace(Character.toString((char) 64259), "ffi").replace(Character.toString((char) 64260), "ffl").replace(Character.toString((char) 64257), "fi").replace(Character.toString((char) 64258), "fl").replace(Character.toString((char) 307), "ij").replace(Character.toString((char) 64262), "st").replace(Character.toString((char) 64261), "st").replaceAll("[^\\p{Alpha}\\p{Digit}\\p{Space}]", " ").replaceAll("\\p{Space}+", " ").trim();
    }
}
