package ai.knowly.langtorch.preprocessing.splitter.text.word;

import ai.knowly.langtorch.preprocessing.splitter.text.TextSplitter;
import com.google.common.collect.ImmutableList;
import java.util.List;

/* loaded from: input_file:ai/knowly/langtorch/preprocessing/splitter/text/word/WordSplitter.class */
public class WordSplitter implements TextSplitter<WordSplitterOption> {
    public static WordSplitter create() {
        return new WordSplitter();
    }

    @Override // ai.knowly.langtorch.preprocessing.splitter.text.TextSplitter
    public List<String> splitText(WordSplitterOption wordSplitterOption) {
        int maxLengthPerChunk = wordSplitterOption.getMaxLengthPerChunk();
        String text = wordSplitterOption.getText();
        ImmutableList.Builder builder = ImmutableList.builder();
        if (maxLengthPerChunk < 1) {
            throw new IllegalArgumentException("maxLengthPerChunk should be greater than 0");
        }
        String[] split = text.split("\\s+");
        int length = split[0].length();
        for (String str : split) {
            length = Math.min(length, str.length());
        }
        if (maxLengthPerChunk < length) {
            throw new IllegalArgumentException("maxLengthPerChunk is smaller than the smallest word in the string");
        }
        StringBuilder sb = new StringBuilder();
        int length2 = split.length;
        int i = 0;
        while (i < length2) {
            String str2 = split[i];
            boolean z = i == length2 - 1;
            if (sb.length() + str2.length() + (z ? 0 : 1) <= maxLengthPerChunk) {
                sb.append(str2);
                if (!z) {
                    sb.append(" ");
                }
            } else {
                builder.add(sb.toString().trim());
                sb = new StringBuilder();
                sb.append(str2).append(" ");
            }
            i++;
        }
        if (sb.length() > 0) {
            builder.add(sb.toString().trim());
        }
        return builder.build();
    }
}
