package org.datacleaner.beans.standardize;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import javax.inject.Named;
import org.datacleaner.api.Categorized;
import org.datacleaner.api.Configured;
import org.datacleaner.api.Description;
import org.datacleaner.api.Initialize;
import org.datacleaner.api.InputColumn;
import org.datacleaner.api.InputRow;
import org.datacleaner.api.OutputColumns;
import org.datacleaner.api.Transformer;
import org.datacleaner.components.categories.MatchingAndStandardizationCategory;
import org.datacleaner.util.HasGroupLiteral;
import org.datacleaner.util.NamedPattern;
import org.datacleaner.util.NamedPatternMatch;

@Categorized({MatchingAndStandardizationCategory.class})
@Named("URL standardizer")
@Description("Retrieve the individual parts of an URL, including protocol, domain, port, path and querystring.")
/* loaded from: input_file:org/datacleaner/beans/standardize/UrlStandardizerTransformer.class */
public class UrlStandardizerTransformer implements Transformer {
    public static final String[] PATTERNS = {"PROTOCOL://DOMAIN:PORTPATH\\?QUERYSTRING", "PROTOCOL://DOMAINPATH\\?QUERYSTRING", "PROTOCOL://DOMAIN:PORTPATH", "PROTOCOL://DOMAIN:PORT\\?QUERYSTRING", "PROTOCOL://DOMAIN\\?QUERYSTRING", "PROTOCOL://DOMAINPATH", "PROTOCOL://DOMAIN:PORT", "PROTOCOL://DOMAIN"};

    @Configured
    InputColumn<String> inputColumn;
    private List<NamedPattern<UrlPart>> namedPatterns;

    /* loaded from: input_file:org/datacleaner/beans/standardize/UrlStandardizerTransformer$UrlPart.class */
    public enum UrlPart implements HasGroupLiteral {
        PROTOCOL,
        DOMAIN,
        PORT,
        PATH,
        QUERYSTRING;

        @Override // org.datacleaner.util.HasGroupLiteral
        public String getGroupLiteral() {
            if (this == DOMAIN) {
                return "([a-zA-Z0-9\\._\\-@]+)";
            }
            if (this == PORT) {
                return "([0-9]+)";
            }
            if (this == PATH) {
                return "(/[a-zA-Z0-9\\._\\-/#:%]+)";
            }
            if (this == QUERYSTRING) {
                return "([a-zA-Z0-9\\.=\\?_\\-/%]+)";
            }
            return null;
        }
    }

    @Initialize
    public void init() {
        this.namedPatterns = new ArrayList(PATTERNS.length);
        for (String str : PATTERNS) {
            this.namedPatterns.add(new NamedPattern<>(str, UrlPart.class));
        }
    }

    public OutputColumns getOutputColumns() {
        return new OutputColumns(String.class, "Protocol", new String[]{"Domain", "Port", "Path", "Querystring"});
    }

    /* renamed from: transform, reason: merged with bridge method [inline-methods] */
    public String[] m12transform(InputRow inputRow) {
        return transform((String) inputRow.getValue(this.inputColumn));
    }

    public String[] transform(String str) {
        String str2 = null;
        String str3 = null;
        String str4 = null;
        String str5 = null;
        String str6 = null;
        if (str != null) {
            Iterator<NamedPattern<UrlPart>> it = this.namedPatterns.iterator();
            while (true) {
                if (!it.hasNext()) {
                    break;
                }
                NamedPatternMatch<UrlPart> match = it.next().match(str);
                if (match != null) {
                    str2 = match.get(UrlPart.PROTOCOL);
                    str3 = match.get(UrlPart.DOMAIN);
                    str4 = match.get(UrlPart.PORT);
                    str5 = match.get(UrlPart.PATH);
                    str6 = match.get(UrlPart.QUERYSTRING);
                    break;
                }
            }
        }
        return new String[]{str2, str3, str4, str5, str6};
    }
}
