package com.yahoo.language.lucene;

import com.yahoo.component.provider.ComponentRegistry;
import com.yahoo.language.Language;
import com.yahoo.language.process.StemMode;
import com.yahoo.language.process.Token;
import com.yahoo.language.process.TokenScript;
import com.yahoo.language.process.TokenType;
import com.yahoo.language.process.Tokenizer;
import com.yahoo.language.simple.SimpleToken;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;

/* loaded from: input_file:com/yahoo/language/lucene/LuceneTokenizer.class */
class LuceneTokenizer implements Tokenizer {
    private static final Logger log = Logger.getLogger(LuceneTokenizer.class.getName());
    private static final String FIELD_NAME = "F";
    private final AnalyzerFactory analyzerFactory;

    public LuceneTokenizer(LuceneAnalysisConfig luceneAnalysisConfig) {
        this(luceneAnalysisConfig, new ComponentRegistry());
    }

    public LuceneTokenizer(LuceneAnalysisConfig luceneAnalysisConfig, ComponentRegistry<Analyzer> componentRegistry) {
        this.analyzerFactory = new AnalyzerFactory(luceneAnalysisConfig, componentRegistry);
    }

    public Iterable<Token> tokenize(String str, Language language, StemMode stemMode, boolean z) {
        if (str.isEmpty()) {
            return List.of();
        }
        List<Token> textToTokens = textToTokens(str, this.analyzerFactory.getAnalyzer(language, stemMode, z));
        log.log(Level.FINEST, () -> {
            return "Tokenized '" + language + "' text='" + str + "' into: n=" + textToTokens.size() + ", tokens=" + textToTokens;
        });
        return textToTokens;
    }

    private List<Token> textToTokens(String str, Analyzer analyzer) {
        ArrayList arrayList = new ArrayList();
        TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, str);
        CharTermAttribute addAttribute = tokenStream.addAttribute(CharTermAttribute.class);
        OffsetAttribute addAttribute2 = tokenStream.addAttribute(OffsetAttribute.class);
        try {
            tokenStream.reset();
            while (tokenStream.incrementToken()) {
                arrayList.add(new SimpleToken(str.substring(addAttribute2.startOffset(), addAttribute2.endOffset()), addAttribute.toString()).setType(TokenType.ALPHABETIC).setOffset(addAttribute2.startOffset()).setScript(TokenScript.UNKNOWN));
            }
            tokenStream.end();
            tokenStream.close();
            return arrayList;
        } catch (IOException e) {
            throw new RuntimeException("Failed to analyze: " + str, e);
        }
    }
}
