/*
 * Decompiled with CFR 0.152.
 */
package com.wcohen.secondstring.expt;

import com.wcohen.secondstring.expt.Blocker;
import com.wcohen.secondstring.expt.MatchData;
import com.wcohen.secondstring.tokens.SimpleTokenizer;
import com.wcohen.secondstring.tokens.Token;
import com.wcohen.secondstring.tokens.Tokenizer;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;

public class TokenBlocker
extends Blocker {
    private static double defaultMaxFraction = 1.0;
    private ArrayList pairList;
    private Tokenizer tokenizer;
    double maxFraction;
    private static final Set STOPWORD_TOKEN_MARKER;
    int numCorrectPairs;

    public TokenBlocker(Tokenizer tokenizer, double maxFraction) {
        this.tokenizer = tokenizer;
        this.maxFraction = maxFraction;
    }

    public TokenBlocker() {
        this(SimpleTokenizer.DEFAULT_TOKENIZER, defaultMaxFraction);
    }

    public void block(MatchData data) {
        String bigSource;
        this.numCorrectPairs = this.countCorrectPairs(data);
        this.pairList = new ArrayList();
        if (!this.clusterMode && data.numSources() != 2) {
            throw new IllegalArgumentException("need exactly two sources out of clusterMode");
        }
        if (this.clusterMode && data.numSources() != 1) {
            throw new IllegalArgumentException("need exactly one source in clusterMode");
        }
        String smallSource = data.getSource(0);
        String string = bigSource = this.clusterMode ? data.getSource(0) : data.getSource(1);
        if (data.numInstances(smallSource) > data.numInstances(bigSource)) {
            String tmp = smallSource;
            smallSource = bigSource;
            bigSource = tmp;
        }
        double maxSetSize = (double)data.numInstances(smallSource) * this.maxFraction;
        TreeMap<Token, Set> index = new TreeMap<Token, Set>();
        for (int i = 0; i < data.numInstances(smallSource); ++i) {
            Token[] tokens = this.tokenizer.tokenize(data.getInstance(smallSource, i).getText().unwrap());
            for (int j = 0; j < tokens.length; ++j) {
                TreeSet<Integer> containers = (TreeSet<Integer>)index.get(tokens[j]);
                if (containers != STOPWORD_TOKEN_MARKER && containers == null) {
                    containers = new TreeSet<Integer>();
                    index.put(tokens[j], containers);
                }
                containers.add(new Integer(i));
                if (!((double)containers.size() > maxSetSize)) continue;
                index.put(tokens[j], STOPWORD_TOKEN_MARKER);
            }
        }
        TreeSet<Integer> pairedUpInstances = new TreeSet<Integer>();
        for (int i = 0; i < data.numInstances(bigSource); ++i) {
            MatchData.Instance bigInst = data.getInstance(bigSource, i);
            pairedUpInstances.clear();
            Token[] tokens = this.tokenizer.tokenize(bigInst.getText().unwrap());
            for (int j = 0; j < tokens.length; ++j) {
                Set containers = (Set)index.get(tokens[j]);
                if (containers == null || containers == STOPWORD_TOKEN_MARKER) continue;
                Iterator k = containers.iterator();
                while (k.hasNext()) {
                    Integer smallIndexInteger = (Integer)k.next();
                    int smallIndex = smallIndexInteger;
                    if (pairedUpInstances.contains(smallIndexInteger) || smallSource == bigSource && smallIndex <= i) continue;
                    MatchData.Instance smallInst = data.getInstance(smallSource, smallIndex);
                    this.pairList.add(new Blocker.Pair(bigInst, smallInst));
                    pairedUpInstances.add(smallIndexInteger);
                }
            }
        }
    }

    public int size() {
        return this.pairList.size();
    }

    public Blocker.Pair getPair(int i) {
        return (Blocker.Pair)this.pairList.get(i);
    }

    public String toString() {
        return "[TokenBlocker:clusterMode=" + this.clusterMode + ",maxFraction=" + this.maxFraction + "]";
    }

    public int numCorrectPairs() {
        return this.numCorrectPairs;
    }

    private void showIndex(Map index) {
        Iterator i = index.keySet().iterator();
        while (i.hasNext()) {
            Token tok = (Token)i.next();
            System.out.print(tok.toString());
            Set containers = (Set)index.get(tok);
            Iterator j = containers.iterator();
            while (j.hasNext()) {
                Integer k = (Integer)j.next();
                System.out.print(" " + k);
            }
            System.out.println();
        }
    }

    static {
        try {
            String s = System.getProperty("blockerMaxFraction");
            if (s != null) {
                defaultMaxFraction = Double.parseDouble(s);
            }
        }
        catch (NumberFormatException numberFormatException) {
            // empty catch block
        }
        STOPWORD_TOKEN_MARKER = new HashSet();
    }
}

