package com.yahoo.language.opennlp;

import com.yahoo.language.Language;
import com.yahoo.language.LinguisticsCase;
import com.yahoo.language.process.Normalizer;
import com.yahoo.language.process.StemMode;
import com.yahoo.language.process.Token;
import com.yahoo.language.process.TokenType;
import com.yahoo.language.process.Tokenizer;
import com.yahoo.language.process.Transformer;
import com.yahoo.language.simple.SimpleNormalizer;
import com.yahoo.language.simple.SimpleToken;
import com.yahoo.language.simple.SimpleTokenType;
import com.yahoo.language.simple.SimpleTokenizer;
import com.yahoo.language.simple.SimpleTransformer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.logging.Level;
import java.util.logging.Logger;
import opennlp.tools.stemmer.Stemmer;
import opennlp.tools.stemmer.snowball.SnowballStemmer;

/* loaded from: input_file:com/yahoo/language/opennlp/OpenNlpTokenizer.class */
public class OpenNlpTokenizer implements Tokenizer {
    private static final int SPACE_CODE = 32;
    private static final Logger log = Logger.getLogger(OpenNlpTokenizer.class.getName());
    private final Normalizer normalizer;
    private final Transformer transformer;
    private final SimpleTokenizer simpleTokenizer;

    public OpenNlpTokenizer() {
        this(new SimpleNormalizer(), new SimpleTransformer());
    }

    public OpenNlpTokenizer(Normalizer normalizer, Transformer transformer) {
        this.normalizer = normalizer;
        this.transformer = transformer;
        this.simpleTokenizer = new SimpleTokenizer(normalizer, transformer);
    }

    @Override // com.yahoo.language.process.Tokenizer
    public Iterable<Token> tokenize(String str, Language language, StemMode stemMode, boolean z) {
        if (str.isEmpty()) {
            return Collections.emptyList();
        }
        Stemmer stemmerForLanguage = getStemmerForLanguage(language, stemMode);
        if (stemmerForLanguage == null) {
            return this.simpleTokenizer.tokenize(str, language, stemMode, z);
        }
        ArrayList arrayList = new ArrayList();
        int codePointAt = str.codePointAt(0);
        TokenType valueOf = SimpleTokenType.valueOf(codePointAt);
        int i = 0;
        int charCount = Character.charCount(codePointAt);
        while (true) {
            int i2 = charCount;
            if (i2 > str.length()) {
                return arrayList;
            }
            int codePointAt2 = i2 < str.length() ? str.codePointAt(i2) : SPACE_CODE;
            TokenType valueOf2 = SimpleTokenType.valueOf(codePointAt2);
            if (!valueOf.isIndexable() || !valueOf2.isIndexable()) {
                String substring = str.substring(i, i2);
                arrayList.add(new SimpleToken(substring).setOffset(i).setType(valueOf).setTokenString(processToken(substring, language, stemMode, z, stemmerForLanguage)));
                i = i2;
                valueOf = valueOf2;
            }
            charCount = i2 + Character.charCount(codePointAt2);
        }
    }

    /* JADX WARN: Failed to find 'out' block for switch in B:8:0x0032. Please report as an issue. */
    private Stemmer getStemmerForLanguage(Language language, StemMode stemMode) {
        SnowballStemmer.ALGORITHM algorithm;
        log.log(Level.FINEST, () -> {
            return "getStemmerForLanguage '" + language + "' mode: " + stemMode;
        });
        if (language == null || Language.ENGLISH.equals(language) || StemMode.NONE.equals(stemMode)) {
            return null;
        }
        switch (language) {
            case DANISH:
                algorithm = SnowballStemmer.ALGORITHM.DANISH;
                return new SnowballStemmer(algorithm);
            case DUTCH:
                algorithm = SnowballStemmer.ALGORITHM.DUTCH;
                return new SnowballStemmer(algorithm);
            case FINNISH:
                algorithm = SnowballStemmer.ALGORITHM.FINNISH;
                return new SnowballStemmer(algorithm);
            case FRENCH:
                algorithm = SnowballStemmer.ALGORITHM.FRENCH;
                return new SnowballStemmer(algorithm);
            case GERMAN:
                algorithm = SnowballStemmer.ALGORITHM.GERMAN;
                return new SnowballStemmer(algorithm);
            case HUNGARIAN:
                algorithm = SnowballStemmer.ALGORITHM.HUNGARIAN;
                return new SnowballStemmer(algorithm);
            case IRISH:
                algorithm = SnowballStemmer.ALGORITHM.IRISH;
                return new SnowballStemmer(algorithm);
            case ITALIAN:
                algorithm = SnowballStemmer.ALGORITHM.ITALIAN;
                return new SnowballStemmer(algorithm);
            case NORWEGIAN_BOKMAL:
            case NORWEGIAN_NYNORSK:
                algorithm = SnowballStemmer.ALGORITHM.NORWEGIAN;
                return new SnowballStemmer(algorithm);
            case PORTUGUESE:
                algorithm = SnowballStemmer.ALGORITHM.PORTUGUESE;
                return new SnowballStemmer(algorithm);
            case ROMANIAN:
                algorithm = SnowballStemmer.ALGORITHM.ROMANIAN;
                return new SnowballStemmer(algorithm);
            case RUSSIAN:
                algorithm = SnowballStemmer.ALGORITHM.RUSSIAN;
                return new SnowballStemmer(algorithm);
            case SPANISH:
                algorithm = SnowballStemmer.ALGORITHM.SPANISH;
                return new SnowballStemmer(algorithm);
            case SWEDISH:
                algorithm = SnowballStemmer.ALGORITHM.SWEDISH;
                return new SnowballStemmer(algorithm);
            case TURKISH:
                algorithm = SnowballStemmer.ALGORITHM.TURKISH;
                return new SnowballStemmer(algorithm);
            case ENGLISH:
                algorithm = SnowballStemmer.ALGORITHM.ENGLISH;
                return new SnowballStemmer(algorithm);
            default:
                return null;
        }
    }

    private String processToken(String str, Language language, StemMode stemMode, boolean z, Stemmer stemmer) {
        log.log(Level.FINEST, () -> {
            return "processToken '" + str + "'";
        });
        String lowerCase = LinguisticsCase.toLowerCase(this.normalizer.normalize(str));
        if (z) {
            lowerCase = this.transformer.accentDrop(lowerCase, language);
        }
        if (stemMode != StemMode.NONE) {
            String str2 = lowerCase;
            lowerCase = doStemming(lowerCase, stemmer);
            log.log(Level.FINEST, () -> {
                return "stem '" + str2 + "' to '" + lowerCase + "'";
            });
        }
        String str3 = lowerCase;
        log.log(Level.FINEST, () -> {
            return "processed token is: " + str3;
        });
        return str3;
    }

    private String doStemming(String str, Stemmer stemmer) {
        return stemmer.stem(str).toString();
    }
}
