/*
 * Decompiled with CFR 0.152.
 */
package org.tribuo.data.text.impl;

import com.oracle.labs.mlrg.olcut.config.Config;
import com.oracle.labs.mlrg.olcut.config.Configurable;
import com.oracle.labs.mlrg.olcut.provenance.ConfiguredObjectProvenance;
import com.oracle.labs.mlrg.olcut.provenance.impl.ConfiguredObjectProvenanceImpl;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Logger;
import org.tribuo.Feature;
import org.tribuo.data.text.TextProcessingException;
import org.tribuo.data.text.TextProcessor;
import org.tribuo.util.tokens.Tokenizer;

public class NgramProcessor
implements TextProcessor {
    private static final Logger logger = Logger.getLogger(NgramProcessor.class.getName());
    @Config(description="n in the n-gram to emit.")
    private int n = 2;
    @Config(description="Value to emit for each n-gram.")
    private double value = 1.0;
    @Config(mandatory=true, description="Tokenizer to use.")
    private Tokenizer tokenizer;
    private ThreadLocal<Tokenizer> tokenizerThreadLocal;

    public NgramProcessor(Tokenizer tokenizer, int n, double value) {
        if (n < 1) {
            throw new IllegalArgumentException("n = " + n + ", must be a positive integer.");
        }
        this.n = n;
        this.value = value;
        this.tokenizer = tokenizer;
        this.tokenizerThreadLocal = ThreadLocal.withInitial(() -> {
            try {
                return this.tokenizer.clone();
            }
            catch (CloneNotSupportedException e) {
                throw new IllegalArgumentException("Tokenizer not cloneable", e);
            }
        });
    }

    private NgramProcessor() {
    }

    public void postConfig() {
        this.tokenizerThreadLocal = ThreadLocal.withInitial(() -> {
            try {
                return this.tokenizer.clone();
            }
            catch (CloneNotSupportedException e) {
                throw new IllegalArgumentException("Tokenizer not cloneable", e);
            }
        });
    }

    @Override
    public List<Feature> process(String text) throws TextProcessingException {
        return this.innerProcess(this.n + "-N=", text);
    }

    @Override
    public List<Feature> process(String tag, String text) throws TextProcessingException {
        if (tag == null || tag.isEmpty()) {
            return this.innerProcess(this.n + "-N=", text);
        }
        return this.innerProcess(tag + "-" + this.n + "-N=", text);
    }

    private List<Feature> innerProcess(String tag, String text) {
        ArrayList<Feature> ret = new ArrayList<Feature>();
        List words = this.tokenizerThreadLocal.get().split((CharSequence)text);
        if (words.size() < this.n) {
            return ret;
        }
        StringBuilder ngram = new StringBuilder();
        int start = 0;
        for (int end = this.n; end <= words.size(); ++end) {
            ngram.delete(0, ngram.length());
            ngram.append(tag);
            for (int i = start; i < end; ++i) {
                ngram.append((String)words.get(i));
                ngram.append('/');
            }
            ngram.deleteCharAt(ngram.length() - 1);
            if (ngram.length() > 0 && Character.isLetterOrDigit(ngram.charAt(0))) {
                String ngramString = ngram.toString();
                ret.add(new Feature(ngramString, this.value));
            }
            ++start;
        }
        return ret;
    }

    public ConfiguredObjectProvenance getProvenance() {
        return new ConfiguredObjectProvenanceImpl((Configurable)this, "TextProcessor");
    }
}

