/*
 * Decompiled with CFR 0.152.
 */
package com.wcohen.secondstring;

import com.wcohen.secondstring.AbstractStatisticalTokenDistance;
import com.wcohen.secondstring.BagOfTokens;
import com.wcohen.secondstring.PrintfFormat;
import com.wcohen.secondstring.StringWrapper;
import com.wcohen.secondstring.tokens.SimpleTokenizer;
import com.wcohen.secondstring.tokens.Token;
import com.wcohen.secondstring.tokens.Tokenizer;
import java.util.Iterator;

public class TokenFelligiSunter
extends AbstractStatisticalTokenDistance {
    private Tokenizer tokenizer;
    private double mismatchFactor = 0.5;

    public TokenFelligiSunter(Tokenizer tokenizer, double mismatchFactor) {
        this.tokenizer = tokenizer;
        this.mismatchFactor = mismatchFactor;
    }

    public TokenFelligiSunter() {
        this(SimpleTokenizer.DEFAULT_TOKENIZER, 0.5);
    }

    public void setMismatchFactor(double d) {
        this.mismatchFactor = d;
    }

    public void setMismatchFactor(Double d) {
        this.mismatchFactor = d;
    }

    public double score(StringWrapper s, StringWrapper t) {
        BagOfTokens sBag = (BagOfTokens)s;
        BagOfTokens tBag = (BagOfTokens)t;
        double sim = 0.0;
        Iterator i = sBag.tokenIterator();
        while (i.hasNext()) {
            Token tok = (Token)i.next();
            if (tBag.contains(tok)) {
                sim += tBag.getWeight(tok);
                continue;
            }
            sim -= sBag.getWeight(tok) * this.mismatchFactor;
        }
        return sim;
    }

    public StringWrapper prepare(String s) {
        BagOfTokens bag = new BagOfTokens(s, this.tokenizer.tokenize(s));
        double normalizer = 0.0;
        Iterator i = bag.tokenIterator();
        while (i.hasNext()) {
            Token tok = (Token)i.next();
            if (this.collectionSize > 0) {
                Integer dfInteger = (Integer)this.documentFrequency.get(tok);
                double df = dfInteger == null ? 1.0 : (double)dfInteger.intValue();
                double w = -Math.log(df / (double)this.collectionSize);
                bag.setWeight(tok, w);
                continue;
            }
            bag.setWeight(tok, 1.0);
        }
        return bag;
    }

    public String explainScore(StringWrapper s, StringWrapper t) {
        BagOfTokens sBag = (BagOfTokens)s;
        BagOfTokens tBag = (BagOfTokens)t;
        StringBuffer buf = new StringBuffer("");
        PrintfFormat fmt = new PrintfFormat("%.3f");
        buf.append("Common tokens: ");
        Iterator i = sBag.tokenIterator();
        while (i.hasNext()) {
            Token tok = (Token)i.next();
            if (!tBag.contains(tok)) continue;
            buf.append(" " + tok.getValue() + ": ");
            buf.append(fmt.sprintf(tBag.getWeight(tok)));
        }
        buf.append("\nscore = " + this.score(s, t));
        return buf.toString();
    }

    public String toString() {
        return "[TokenFelligiSunter]";
    }

    public static void main(String[] argv) {
        TokenFelligiSunter.doMain(new TokenFelligiSunter(), argv);
    }
}

