package fr.pilato.elasticsearch.crawler.fs.tika;

import fr.pilato.elasticsearch.crawler.fs.beans.Doc;
import fr.pilato.elasticsearch.crawler.fs.beans.Meta;
import fr.pilato.elasticsearch.crawler.fs.framework.FSCrawlerLogger;
import fr.pilato.elasticsearch.crawler.fs.framework.FsCrawlerUtil;
import fr.pilato.elasticsearch.crawler.fs.framework.SignTool;
import fr.pilato.elasticsearch.crawler.fs.settings.FsSettings;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.security.DigestInputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Base64;
import java.util.List;
import java.util.Objects;
import java.util.function.Consumer;
import java.util.function.Function;
import org.apache.commons.io.input.TeeInputStream;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.tika.language.detect.LanguageResult;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Office;
import org.apache.tika.metadata.Property;
import org.apache.tika.metadata.TikaCoreProperties;

/* loaded from: input_file:fr/pilato/elasticsearch/crawler/fs/tika/TikaDocParser.class */
public class TikaDocParser {
    private static final Logger logger = LogManager.getLogger(TikaDocParser.class);

    public static void generate(FsSettings fsSettings, InputStream inputStream, String str, String str2, Doc doc, MessageDigest messageDigest, long j) throws IOException {
        logger.trace("Generating document [{}]", str2);
        int i = 100000;
        if (fsSettings.getFs().getIndexedChars() != null) {
            if (fsSettings.getFs().getIndexedChars().percentage()) {
                i = (int) Math.round(j * fsSettings.getFs().getIndexedChars().asDouble());
                logger.trace("using percentage [{}] to define indexed chars: [{}]", fsSettings.getFs().getIndexedChars(), Integer.valueOf(i));
            } else {
                i = (int) fsSettings.getFs().getIndexedChars().value();
                logger.trace("indexed chars [{}]", i == -1 ? "has been disabled. All text will be extracted" : Integer.valueOf(i));
            }
        }
        Metadata metadata = new Metadata();
        metadata.set("resourceName", str);
        String str3 = null;
        if (messageDigest != null) {
            logger.trace("Generating hash with [{}]", messageDigest.getAlgorithm());
            inputStream = new DigestInputStream(inputStream, messageDigest);
        }
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        if (fsSettings.getFs().isStoreSource()) {
            logger.debug("Using a TeeInputStream as we need to store the source");
            byteArrayOutputStream = new ByteArrayOutputStream();
            inputStream = new TeeInputStream(inputStream, byteArrayOutputStream);
        }
        if (fsSettings.getFs().isIndexContent()) {
            try {
                logger.trace("Beginning Tika extraction");
                str3 = TikaInstance.extractText(fsSettings, i, inputStream, metadata);
                logger.trace("End of Tika extraction");
            } catch (Throwable th) {
                Throwable th2 = th;
                StringBuilder sb = new StringBuilder();
                while (th2 != null) {
                    sb.append(th2.getMessage());
                    th2 = th2.getCause();
                    if (th2 != null) {
                        sb.append(" -> ");
                    }
                }
                try {
                    FSCrawlerLogger.documentError(fsSettings.getFs().isFilenameAsId() ? str : SignTool.sign(str2), FsCrawlerUtil.computeVirtualPathName(fsSettings.getFs().getUrl(), str2), sb.toString());
                } catch (NoSuchAlgorithmException e) {
                }
                logger.warn("Failed to extract [{}] characters of text for [{}]: {}", Integer.valueOf(i), str2, sb.toString());
                logger.debug("Failed to extract [" + i + "] characters of text for [" + str2 + "]", th);
            }
            doc.getFile().setContentType(metadata.get("Content-Type"));
            if (fsSettings.getFs().getIndexedChars() != null && fsSettings.getFs().getIndexedChars().value() != -1.0d) {
                doc.getFile().setIndexedChars(Integer.valueOf(i));
            }
            if (fsSettings.getFs().isAddFilesize() && metadata.get("Content-Length") != null) {
                doc.getFile().setFilesize(Long.valueOf(Long.parseLong(metadata.get("Content-Length"))));
            }
            if (messageDigest != null) {
                byte[] digest = messageDigest.digest();
                StringBuilder sb2 = new StringBuilder();
                for (byte b : digest) {
                    sb2.append(Integer.toString((b & 255) + 256, 16).substring(1));
                }
                doc.getFile().setChecksum(sb2.toString());
            }
            Property property = TikaCoreProperties.CREATOR;
            Meta meta = doc.getMeta();
            Objects.requireNonNull(meta);
            setMeta(str2, metadata, property, meta::setAuthor, Function.identity());
            Property property2 = TikaCoreProperties.TITLE;
            Meta meta2 = doc.getMeta();
            Objects.requireNonNull(meta2);
            setMeta(str2, metadata, property2, meta2::setTitle, Function.identity());
            Property property3 = TikaCoreProperties.MODIFIED;
            Meta meta3 = doc.getMeta();
            Objects.requireNonNull(meta3);
            setMeta(str2, metadata, property3, meta3::setDate, FsCrawlerUtil::localDateTimeToDate);
            Property property4 = Office.KEYWORDS;
            Meta meta4 = doc.getMeta();
            Objects.requireNonNull(meta4);
            setMeta(str2, metadata, property4, meta4::setKeywords, TikaDocParser::commaDelimitedListToStringArray);
            Property property5 = TikaCoreProperties.FORMAT;
            Meta meta5 = doc.getMeta();
            Objects.requireNonNull(meta5);
            setMeta(str2, metadata, property5, meta5::setFormat, Function.identity());
            Property property6 = TikaCoreProperties.IDENTIFIER;
            Meta meta6 = doc.getMeta();
            Objects.requireNonNull(meta6);
            setMeta(str2, metadata, property6, meta6::setIdentifier, Function.identity());
            Property property7 = TikaCoreProperties.CONTRIBUTOR;
            Meta meta7 = doc.getMeta();
            Objects.requireNonNull(meta7);
            setMeta(str2, metadata, property7, meta7::setContributor, Function.identity());
            Property property8 = TikaCoreProperties.COVERAGE;
            Meta meta8 = doc.getMeta();
            Objects.requireNonNull(meta8);
            setMeta(str2, metadata, property8, meta8::setCoverage, Function.identity());
            Property property9 = TikaCoreProperties.MODIFIER;
            Meta meta9 = doc.getMeta();
            Objects.requireNonNull(meta9);
            setMeta(str2, metadata, property9, meta9::setModifier, Function.identity());
            Property property10 = TikaCoreProperties.CREATOR_TOOL;
            Meta meta10 = doc.getMeta();
            Objects.requireNonNull(meta10);
            setMeta(str2, metadata, property10, meta10::setCreatorTool, Function.identity());
            String str4 = str3;
            Property property11 = TikaCoreProperties.LANGUAGE;
            Meta meta11 = doc.getMeta();
            Objects.requireNonNull(meta11);
            setMeta(str2, metadata, property11, meta11::setLanguage, str5 -> {
                if (str5 != null) {
                    return str5;
                }
                if (!fsSettings.getFs().isLangDetect() || str4 == null) {
                    return null;
                }
                List detectAll = TikaInstance.langDetector().detectAll(str4);
                if (detectAll.isEmpty()) {
                    return null;
                }
                LanguageResult languageResult = (LanguageResult) detectAll.get(0);
                logger.trace("Main detected language: [{}]", languageResult);
                return languageResult.getLanguage();
            });
            Property property12 = TikaCoreProperties.PUBLISHER;
            Meta meta12 = doc.getMeta();
            Objects.requireNonNull(meta12);
            setMeta(str2, metadata, property12, meta12::setPublisher, Function.identity());
            Property property13 = TikaCoreProperties.RELATION;
            Meta meta13 = doc.getMeta();
            Objects.requireNonNull(meta13);
            setMeta(str2, metadata, property13, meta13::setRelation, Function.identity());
            Property property14 = TikaCoreProperties.RIGHTS;
            Meta meta14 = doc.getMeta();
            Objects.requireNonNull(meta14);
            setMeta(str2, metadata, property14, meta14::setRights, Function.identity());
            Property property15 = TikaCoreProperties.SOURCE;
            Meta meta15 = doc.getMeta();
            Objects.requireNonNull(meta15);
            setMeta(str2, metadata, property15, meta15::setSource, Function.identity());
            Property property16 = TikaCoreProperties.TYPE;
            Meta meta16 = doc.getMeta();
            Objects.requireNonNull(meta16);
            setMeta(str2, metadata, property16, meta16::setType, Function.identity());
            Property property17 = TikaCoreProperties.DESCRIPTION;
            Meta meta17 = doc.getMeta();
            Objects.requireNonNull(meta17);
            setMeta(str2, metadata, property17, meta17::setDescription, Function.identity());
            Property property18 = TikaCoreProperties.CREATED;
            Meta meta18 = doc.getMeta();
            Objects.requireNonNull(meta18);
            setMeta(str2, metadata, property18, meta18::setCreated, FsCrawlerUtil::localDateTimeToDate);
            Property property19 = TikaCoreProperties.PRINT_DATE;
            Meta meta19 = doc.getMeta();
            Objects.requireNonNull(meta19);
            setMeta(str2, metadata, property19, meta19::setPrintDate, FsCrawlerUtil::localDateTimeToDate);
            Property property20 = TikaCoreProperties.METADATA_DATE;
            Meta meta20 = doc.getMeta();
            Objects.requireNonNull(meta20);
            setMeta(str2, metadata, property20, meta20::setMetadataDate, FsCrawlerUtil::localDateTimeToDate);
            Property property21 = TikaCoreProperties.LATITUDE;
            Meta meta21 = doc.getMeta();
            Objects.requireNonNull(meta21);
            setMeta(str2, metadata, property21, meta21::setLatitude, Function.identity());
            Property property22 = TikaCoreProperties.LONGITUDE;
            Meta meta22 = doc.getMeta();
            Objects.requireNonNull(meta22);
            setMeta(str2, metadata, property22, meta22::setLongitude, Function.identity());
            Property property23 = TikaCoreProperties.ALTITUDE;
            Meta meta23 = doc.getMeta();
            Objects.requireNonNull(meta23);
            setMeta(str2, metadata, property23, meta23::setAltitude, Function.identity());
            Property property24 = TikaCoreProperties.RATING;
            Meta meta24 = doc.getMeta();
            Objects.requireNonNull(meta24);
            setMeta(str2, metadata, property24, meta24::setRating, str6 -> {
                if (str6 == null) {
                    return null;
                }
                return Integer.valueOf(Integer.parseInt(str6));
            });
            Property property25 = TikaCoreProperties.COMMENTS;
            Meta meta25 = doc.getMeta();
            Objects.requireNonNull(meta25);
            setMeta(str2, metadata, property25, meta25::setComments, Function.identity());
            if (fsSettings.getFs().isRawMetadata()) {
                FSCrawlerLogger.metadata("Listing all available metadata:", new Object[0]);
                FSCrawlerLogger.metadata("  assertThat(raw.entrySet(), iterableWithSize({}));", new Object[]{Integer.valueOf(metadata.size())});
                for (String str7 : metadata.names()) {
                    String str8 = metadata.get(str7);
                    FSCrawlerLogger.metadata("  assertThat(raw, hasEntry(\"{}\", \"{}\"));", new Object[]{str7, str8});
                    doc.getMeta().addRaw(str7.replaceAll("\\.", ":"), str8);
                }
            }
            doc.setContent(str3);
        } else if (fsSettings.getFs().isStoreSource()) {
            inputStream.transferTo(byteArrayOutputStream);
        }
        if (fsSettings.getFs().isStoreSource()) {
            doc.setAttachment(Base64.getEncoder().encodeToString(byteArrayOutputStream.toByteArray()));
        }
        logger.trace("End document generation");
    }

    private static <T> void setMeta(String str, Metadata metadata, Property property, Consumer<T> consumer, Function<String, T> function) {
        String str2 = metadata.get(property);
        try {
            consumer.accept(function.apply(str2));
        } catch (Exception e) {
            logger.warn("Can not parse meta [{}] for [{}]. Skipping [{}] field...", str2, str, property.getName());
        }
    }

    private static List<String> commaDelimitedListToStringArray(String str) {
        int i;
        if (str == null) {
            return null;
        }
        ArrayList arrayList = new ArrayList();
        int i2 = 0;
        while (true) {
            i = i2;
            int indexOf = str.indexOf(",", i);
            if (indexOf == -1) {
                break;
            }
            arrayList.add(str.substring(i, indexOf));
            i2 = indexOf + 1;
        }
        if (str.length() > 0 && i <= str.length()) {
            arrayList.add(str.substring(i));
        }
        return arrayList;
    }
}
