/*
 * Decompiled with CFR 0.152.
 */
package com.wcohen.secondstring;

import com.wcohen.secondstring.AbstractStatisticalTokenDistance;
import com.wcohen.secondstring.BagOfTokens;
import com.wcohen.secondstring.PrintfFormat;
import com.wcohen.secondstring.StringWrapper;
import com.wcohen.secondstring.tokens.Token;
import com.wcohen.secondstring.tokens.Tokenizer;
import java.util.Iterator;

public class TFIDF
extends AbstractStatisticalTokenDistance {
    public TFIDF(Tokenizer tokenizer) {
        super(tokenizer);
    }

    public TFIDF() {
    }

    public double score(StringWrapper s, StringWrapper t) {
        BagOfTokens sBag = (BagOfTokens)s;
        BagOfTokens tBag = (BagOfTokens)t;
        double sim = 0.0;
        Iterator i = sBag.tokenIterator();
        while (i.hasNext()) {
            Token tok = (Token)i.next();
            if (!tBag.contains(tok)) continue;
            sim += sBag.getWeight(tok) * tBag.getWeight(tok);
        }
        return sim;
    }

    public StringWrapper prepare(String s) {
        Token tok;
        BagOfTokens bag = new BagOfTokens(s, this.tokenizer.tokenize(s));
        double normalizer = 0.0;
        Iterator i = bag.tokenIterator();
        while (i.hasNext()) {
            tok = (Token)i.next();
            if (this.collectionSize > 0) {
                Integer dfInteger = (Integer)this.documentFrequency.get(tok);
                double df = dfInteger == null ? 1.0 : (double)dfInteger.intValue();
                double w = Math.log(bag.getWeight(tok) + 1.0) * Math.log((double)this.collectionSize / df);
                bag.setWeight(tok, w);
                normalizer += w * w;
                continue;
            }
            bag.setWeight(tok, 1.0);
            normalizer += 1.0;
        }
        normalizer = Math.sqrt(normalizer);
        i = bag.tokenIterator();
        while (i.hasNext()) {
            tok = (Token)i.next();
            bag.setWeight(tok, bag.getWeight(tok) / normalizer);
        }
        return bag;
    }

    public String explainScore(StringWrapper s, StringWrapper t) {
        BagOfTokens sBag = (BagOfTokens)s;
        BagOfTokens tBag = (BagOfTokens)t;
        StringBuffer buf = new StringBuffer("");
        PrintfFormat fmt = new PrintfFormat("%.3f");
        buf.append("Common tokens: ");
        Iterator i = sBag.tokenIterator();
        while (i.hasNext()) {
            Token tok = (Token)i.next();
            if (!tBag.contains(tok)) continue;
            buf.append(" " + tok.getValue() + ": ");
            buf.append(fmt.sprintf(sBag.getWeight(tok)));
            buf.append("*");
            buf.append(fmt.sprintf(tBag.getWeight(tok)));
        }
        buf.append("\nscore = " + this.score(s, t));
        return buf.toString();
    }

    public String toString() {
        return "[TFIDF]";
    }

    public static void main(String[] argv) {
        TFIDF.doMain(new TFIDF(), argv);
    }
}

