package sklearn2pmml.feature_extraction.text;

import com.google.common.base.Joiner;
import java.util.List;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.dmg.pmml.TextIndex;
import sklearn.feature_extraction.text.Tokenizer;

/* loaded from: input_file:sklearn2pmml/feature_extraction/text/Matcher.class */
public class Matcher extends Tokenizer {
    public Matcher() {
        this("sklearn2pmml.feature_extraction.text", "Matcher");
    }

    public Matcher(String str, String str2) {
        super(str, str2);
    }

    @Override // sklearn.feature_extraction.text.Tokenizer
    public TextIndex configure(TextIndex textIndex) {
        return textIndex.setTokenize(Boolean.TRUE).setWordRE(getWordRE());
    }

    @Override // sklearn.feature_extraction.text.Tokenizer
    public String formatStopWordsRE(List<String> list) {
        String wordRE = getWordRE();
        List list2 = (List) list.stream().filter(Pattern.compile(wordRE).asPredicate()).collect(Collectors.toList());
        if (list2.isEmpty()) {
            return null;
        }
        return (wordRE.startsWith("(?u)") ? "(?u)" : "") + "\\b(" + Joiner.on("|").join(list2) + ")\\b";
    }

    public void __setstate__(String str) {
        setWordRE(str);
    }

    public String getWordRE() {
        return getString("word_re");
    }

    public Matcher setWordRE(String str) {
        put("word_re", str);
        return this;
    }
}
