/*
 * Decompiled with CFR 0.152.
 */
package cc.mallet.share.weili.ner;

import cc.mallet.types.Token;
import java.util.regex.Pattern;

public class WordTransformation {
    static final String[] endings = new String[]{"ing", "ed", "ogy", "s", "ly", "ion", "tion", "ity", "ies"};
    static Pattern[] endingPatterns = new Pattern[endings.length];
    static final String[][][] endingNames = new String[2][3][endings.length];
    boolean doSpelling;
    boolean doDigitCollapses;
    boolean doDowncasing;

    public WordTransformation() {
        this(false, true, false);
    }

    public WordTransformation(boolean doSpelling, boolean doDigitCollapses, boolean doDowncasing) {
        for (int i = 0; i < endings.length; ++i) {
            WordTransformation.endingPatterns[i] = Pattern.compile(".*" + endings[i] + "$");
            for (int j = 0; j < 3; ++j) {
                for (int k = 0; k < 2; ++k) {
                    WordTransformation.endingNames[k][j][i] = "W" + (k == 1 ? "-" : "") + j + "=<END" + endings[i] + ">";
                }
            }
        }
        this.doSpelling = doSpelling;
        this.doDigitCollapses = doDigitCollapses;
        this.doDowncasing = doDowncasing;
    }

    public Token transformedToken(String original) {
        boolean[][] ending = new boolean[3][endings.length];
        boolean[][] endingp1 = new boolean[3][endings.length];
        boolean[][] endingp2 = new boolean[3][endings.length];
        String word = original;
        if (this.doDigitCollapses) {
            if (word.matches("19\\d\\d")) {
                word = "<YEAR>";
            } else if (word.matches("19\\d\\ds")) {
                word = "<YEARDECADE>";
            } else if (word.matches("19\\d\\d-\\d+")) {
                word = "<YEARSPAN>";
            } else if (word.matches("\\d+\\\\/\\d")) {
                word = "<FRACTION>";
            } else if (word.matches("\\d[\\d,\\.]*")) {
                word = "<DIGITS>";
            } else if (word.matches("19\\d\\d-\\d\\d-\\d--d")) {
                word = "<DATELINEDATE>";
            } else if (word.matches("19\\d\\d-\\d\\d-\\d\\d")) {
                word = "<DATELINEDATE>";
            } else if (word.matches(".*-led")) {
                word = "<LED>";
            } else if (word.matches(".*-sponsored")) {
                word = "<LED>";
            }
        }
        if (this.doDowncasing) {
            word = word.toLowerCase();
        }
        Token token = new Token(word);
        if (this.doSpelling) {
            for (int j = 0; j < endings.length; ++j) {
                ending[2][j] = ending[1][j];
                ending[1][j] = ending[0][j];
                ending[0][j] = endingPatterns[j].matcher(word).matches();
                if (!ending[0][j]) continue;
                token.setFeatureValue(endingNames[0][0][j], 1.0);
            }
        }
        return token;
    }
}

