/*
 * Decompiled with CFR 0.152.
 */
package com.wcohen.secondstring.tokens;

import com.wcohen.secondstring.tokens.SimpleTokenizer;
import com.wcohen.secondstring.tokens.Token;
import com.wcohen.secondstring.tokens.Tokenizer;
import java.util.ArrayList;

public class NGramTokenizer
implements Tokenizer {
    private int minNGramSize;
    private int maxNGramSize;
    private boolean keepOldTokens;
    private Tokenizer innerTokenizer;
    public static NGramTokenizer DEFAULT_TOKENIZER = new NGramTokenizer(3, 5, true, SimpleTokenizer.DEFAULT_TOKENIZER);

    public NGramTokenizer(int minNGramSize, int maxNGramSize, boolean keepOldTokens, Tokenizer innerTokenizer) {
        this.minNGramSize = minNGramSize;
        this.maxNGramSize = maxNGramSize;
        this.keepOldTokens = keepOldTokens;
        this.innerTokenizer = innerTokenizer;
    }

    public Token[] tokenize(String input) {
        Token[] initialTokens = this.innerTokenizer.tokenize(input);
        ArrayList<Token> tokens = new ArrayList<Token>();
        for (int i = 0; i < initialTokens.length; ++i) {
            Token tok = initialTokens[i];
            String str = "^" + tok.getValue() + "$";
            if (this.keepOldTokens) {
                tokens.add(this.intern(str));
            }
            for (int lo = 0; lo < str.length(); ++lo) {
                for (int len = this.minNGramSize; len <= this.maxNGramSize; ++len) {
                    if (lo + len >= str.length()) continue;
                    tokens.add(this.innerTokenizer.intern(str.substring(lo, lo + len)));
                }
            }
        }
        return tokens.toArray(new Token[tokens.size()]);
    }

    public Token intern(String s) {
        return this.innerTokenizer.intern(s);
    }

    public static void main(String[] argv) {
        NGramTokenizer tokenizer = DEFAULT_TOKENIZER;
        int n = 0;
        for (int i = 0; i < argv.length; ++i) {
            System.out.println("argument " + i + ": '" + argv[i] + "'");
            Token[] tokens = tokenizer.tokenize(argv[i]);
            for (int j = 0; j < tokens.length; ++j) {
                System.out.println("token " + ++n + ":" + " id=" + tokens[j].getIndex() + " value: '" + tokens[j].getValue() + "'");
            }
        }
    }
}

