/*
 * Decompiled with CFR 0.152.
 */
package edu.umass.cs.dex.ie;

import edu.umass.cs.dex.types.People;
import edu.umass.cs.dex.types.PeopleInMessage;
import edu.umass.cs.dex.types.PersonInMessage;
import edu.umass.cs.mallet.base.util.MalletLogger;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashSet;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class EmailPeopleExtractor {
    private static Logger logger = MalletLogger.getLogger(EmailPeopleExtractor.class.getName());
    public People people = new People();
    public HashSet stopWords;

    public EmailPeopleExtractor(File dir, HashSet stopWords) {
        this.stopWords = stopWords;
        this.processDir(dir);
        if (logger.isLoggable(Level.FINE)) {
            this.people.writeToFile(new File("disambig_log.txt"));
        }
    }

    public People getPeople() {
        return this.people;
    }

    public String cleanName(String name) {
        String[] words;
        Pattern pat = Pattern.compile("[^\\w\\.\\-][\\w\\.\\-]+\\@[\\w\\.\\-]+[^\\w\\.\\-]");
        Matcher mat = pat.matcher(name);
        name = mat.replaceAll(" ");
        pat = Pattern.compile("\\W");
        mat = pat.matcher(name);
        name = mat.replaceAll(" ");
        pat = Pattern.compile("^ *([^ ](.*[^ ])*) *$");
        mat = pat.matcher(name);
        if (mat.matches()) {
            name = mat.group(1);
        }
        if ((words = (pat = Pattern.compile(" +")).split(name)).length == 0) {
            return null;
        }
        name = "";
        for (int i = 0; i < words.length - 1; ++i) {
            name = name + words[i] + " ";
        }
        name = name + words[words.length - 1];
        pat = Pattern.compile(" (e mail|mailto)");
        mat = pat.matcher(name);
        name = mat.replaceAll("");
        return name;
    }

    public void getLoginAndName(String person, PeopleInMessage peopleInMessage, double weight) {
        person = person.toLowerCase();
        Pattern pat = Pattern.compile("[\\<\\>\\\"\\'\\,]");
        Matcher mat = pat.matcher(person);
        person = mat.replaceAll(" ");
        pat = Pattern.compile("^(.* )*([\\w\\.\\-]+)\\+*\\@([\\w\\.\\-]+) *$");
        mat = pat.matcher(person);
        if (mat.matches()) {
            String name = mat.group(1);
            String login = mat.group(2);
            String domain = mat.group(3);
            if (name != null) {
                name = this.cleanName(name);
            }
            PersonInMessage p = new PersonInMessage(name, login, domain, weight);
            peopleInMessage.addPerson(p);
        } else {
            logger.fine("No match in person " + person + ".");
        }
    }

    public double getWeight(String prefix) {
        if (prefix.compareTo("from") == 0) {
            return 2.0;
        }
        if (prefix.compareTo("to") == 0) {
            return 1.0;
        }
        if (prefix.compareTo("cc") == 0) {
            return 0.5;
        }
        if (prefix.compareTo("bcc") == 0) {
            return 0.5;
        }
        return 0.0;
    }

    public void getWordsFromLine(String line, PeopleInMessage peopleInMessage) {
        Pattern pat = Pattern.compile(" ");
        Matcher mat = pat.matcher(line);
        if (!mat.find() && line.length() > 40) {
            return;
        }
        pat = Pattern.compile("\\W+");
        String[] words = pat.split(line);
        for (int i = 0; i < words.length; ++i) {
            String word = words[i].toLowerCase();
            if (word.equals("") || this.stopWords.contains(word)) continue;
            peopleInMessage.addWordToContextModel(word);
        }
    }

    public void processLine(String line, PeopleInMessage peopleInMessage, LineProcessor lineProcessor) {
        double oldNestedMessageLevel = lineProcessor.getNestedMessageLevel();
        if (lineProcessor.nextCorrespondentLine(line) || lineProcessor.isCorrespondentLine(line)) {
            if (lineProcessor.getNestedMessageLevel() > oldNestedMessageLevel) {
                peopleInMessage.reduceWeights();
            }
            String prefix = lineProcessor.getPrefix();
            line = lineProcessor.getLineWithoutPrefix(line);
            line = lineProcessor.removeCommasInBrackets(line);
            Pattern pat = Pattern.compile("\\,");
            String[] peopleInLine = pat.split(line);
            for (int i = 0; i < peopleInLine.length; ++i) {
                this.getLoginAndName(peopleInLine[i], peopleInMessage, this.getWeight(prefix));
            }
            return;
        }
        if (lineProcessor.nextHeaderLine(line) || lineProcessor.isHeaderLine(line)) {
            return;
        }
        if (lineProcessor.updateNestedMessageLevel(line) > oldNestedMessageLevel) {
            peopleInMessage.reduceWeights();
        }
        this.getWordsFromLine(line, peopleInMessage);
    }

    public void processFile(File file) {
        try {
            PeopleInMessage peopleInMessage = new PeopleInMessage();
            LineProcessor lineProcessor = new LineProcessor();
            BufferedReader in = new BufferedReader(new FileReader(file));
            String line = new String(in.readLine());
            boolean isCorrespondentLineProcessed = false;
            while (line != null && !lineProcessor.isLastLine(line)) {
                this.processLine(line, peopleInMessage, lineProcessor);
                line = in.readLine();
            }
            in.close();
            People simplePeople = peopleInMessage.getSimplePeople();
            simplePeople.buildEmailLinks();
            this.people.addAll(simplePeople);
        }
        catch (IOException e) {
            System.err.print("Cannot open file ");
            System.err.println(file.getName());
        }
    }

    public void processDir(File dir) {
        File[] files = dir.listFiles();
        for (int i = 0; i < files.length; ++i) {
            if (files[i].isFile()) {
                this.processFile(files[i]);
            }
            if (!files[i].isDirectory()) continue;
            this.processDir(files[i]);
        }
    }

    public class LineProcessor {
        public boolean headerLine = false;
        public boolean correspondentLine = false;
        public double nestedMessageLevel = 0.5;
        public String prefix = "";

        public boolean isEmptyLine(String line) {
            Pattern pat = Pattern.compile("^\\s*$");
            Matcher mat = pat.matcher(line);
            return mat.matches();
        }

        public boolean isLastLine(String line) {
            Pattern pat = Pattern.compile("^Content-Type\\: .*\\/html");
            Matcher mat = pat.matcher(line);
            return mat.lookingAt();
        }

        public boolean isHeaderLine(String line) {
            Pattern pat = Pattern.compile("^((\\> )*|(\\>+ ))([\\w\\-]+)\\: ");
            Matcher mat = pat.matcher(line);
            if (mat.lookingAt()) {
                this.prefix = mat.group(4);
                this.prefix = this.prefix.toLowerCase();
                if (this.prefix.compareTo("subject") == 0) {
                    return false;
                }
                this.headerLine = true;
                return true;
            }
            return false;
        }

        public boolean isCorrespondentLine(String line) {
            Pattern pat = Pattern.compile("^((\\> )*|(\\>+ ))(from|to|cc|bcc)\\: ", 2);
            Matcher mat = pat.matcher(line);
            if (mat.lookingAt()) {
                this.correspondentLine = true;
                this.prefix = mat.group(4);
                this.prefix = this.prefix.toLowerCase();
                if (this.prefix.compareTo("from") == 0) {
                    this.nestedMessageLevel = 2.0 * this.nestedMessageLevel;
                }
                return true;
            }
            return false;
        }

        public boolean nextHeaderLine(String line) {
            Pattern pat = Pattern.compile("^\\s");
            Matcher mat = pat.matcher(line);
            if (mat.lookingAt()) {
                return this.headerLine;
            }
            this.headerLine = false;
            return false;
        }

        public boolean nextCorrespondentLine(String line) {
            Pattern pat = Pattern.compile("^\\s");
            Matcher mat = pat.matcher(line);
            if (mat.lookingAt()) {
                return this.correspondentLine;
            }
            this.correspondentLine = false;
            return false;
        }

        public boolean wasHeaderLine() {
            return this.headerLine;
        }

        public boolean wasCorrespondentLine() {
            return this.correspondentLine;
        }

        public String getLineWithoutPrefix(String line) {
            Pattern pat = Pattern.compile("^((\\> )*|(\\>+ ))(from|to|cc|bcc)\\: ", 2);
            Matcher mat = pat.matcher(line);
            if (mat.lookingAt()) {
                line = mat.replaceAll("");
            }
            return line;
        }

        public String getPrefix() {
            return this.prefix;
        }

        public double getNestedMessageLevel() {
            return this.nestedMessageLevel;
        }

        public String removeCommasInBrackets(String line) {
            int i;
            Pattern pat = Pattern.compile("\\\"");
            Matcher mat = pat.matcher(line);
            if (!mat.find()) {
                return line;
            }
            String[] chunks = pat.split(line);
            for (i = 1; i < chunks.length; i += 2) {
                pat = Pattern.compile("\\,");
                mat = pat.matcher(chunks[i]);
                chunks[i] = mat.replaceAll(" ");
            }
            line = "";
            for (i = 0; i < chunks.length; ++i) {
                line = line + " " + chunks[i];
            }
            return line;
        }

        public double updateNestedMessageLevel(String line) {
            Pattern pat = Pattern.compile("^([\\> ]+)[^\\> ]");
            Matcher mat = pat.matcher(line);
            if (mat.lookingAt()) {
                line = mat.group(1);
                int counter = 1;
                for (int i = 0; i < line.length(); ++i) {
                    if (line.charAt(i) != '>') continue;
                    counter = 2 * counter;
                }
                if (this.nestedMessageLevel < (double)counter) {
                    this.nestedMessageLevel = counter;
                }
            }
            return this.nestedMessageLevel;
        }
    }
}

