/*
 * Decompiled with CFR 0.152.
 */
package edu.umass.cs.dex.web;

import com.google.soap.search.GoogleSearch;
import com.google.soap.search.GoogleSearchFault;
import com.google.soap.search.GoogleSearchResult;
import com.google.soap.search.GoogleSearchResultElement;
import edu.umass.cs.dex.types.People;
import edu.umass.cs.dex.types.Person;
import edu.umass.cs.dex.web.GoogleExceptionConversionUtility;
import edu.umass.cs.dex.web.GoogleQueryLimitExceededException;
import edu.umass.cs.dex.web.GoogleRuntimeException;
import edu.umass.cs.dex.web.LicenseKeyManager;
import edu.umass.cs.dex.web.TimedSocket;
import edu.umass.cs.dex.web.WebPage;
import edu.umass.cs.mallet.base.util.MalletLogger;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.io.PrintWriter;
import java.io.StringReader;
import java.io.StringWriter;
import java.io.Writer;
import java.net.MalformedURLException;
import java.net.Socket;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Google {
    private static Logger logger = MalletLogger.getLogger(Google.class.getName());
    private final int MAX_URL_COUNTER;
    private LicenseKeyManager.License licenseKey;
    String threadName;
    String outDirName;
    Person person;
    HashSet stopWords;
    GoogleSearch search;
    HashMap urls;
    int currentKeyIndex;
    People people;
    File peopleFile;

    public Google(String threadName, String outDirName, Person person, HashSet stopWords, int maxUrls, People people, File peopleFile) {
        this.MAX_URL_COUNTER = maxUrls;
        this.threadName = threadName;
        this.outDirName = outDirName;
        this.person = person;
        this.stopWords = stopWords;
        this.people = people;
        this.peopleFile = peopleFile;
        this.search = new GoogleSearch();
        this.urls = new HashMap(100);
        this.licenseKey = LicenseKeyManager.getInstance().getLicenseKey();
        this.search.setKey(this.licenseKey.getKey());
    }

    public static boolean binaryFile(String url) {
        Pattern pat = Pattern.compile("\\.(pdf|ps|doc|ppt|gz|zip|avi|gif|jpg|jpeg|mpg|mpeg|cfm)$", 2);
        Matcher mat = pat.matcher(url);
        return mat.find();
    }

    public static boolean htmlFile(String url) {
        String end;
        String[] splits = url.split("\\.");
        if (splits.length > 1 && (end = splits[splits.length - 1]).indexOf("/") == -1) {
            return end.equalsIgnoreCase("htm") || end.equalsIgnoreCase("html") || end.equalsIgnoreCase("shtml") || end.equalsIgnoreCase("php") || end.equalsIgnoreCase("cgi") || end.equalsIgnoreCase("txt");
        }
        return true;
    }

    public static boolean isDomainInQuery(String query) {
        Pattern pat = Pattern.compile(" site\\:.+");
        Matcher mat = pat.matcher(query);
        return mat.find();
    }

    public static String getBase(String url) {
        Pattern pat = Pattern.compile("\\/$");
        Matcher mat = pat.matcher(url);
        if (mat.find()) {
            return url;
        }
        pat = Pattern.compile("^(.*\\/)[^\\/]+\\.[^\\/]+$");
        mat = pat.matcher(url);
        if (mat.matches()) {
            return mat.group(1);
        }
        return url + "/";
    }

    public String getFileName(int num) {
        Integer intValue = new Integer(num);
        return this.outDirName + File.separator + intValue.toString() + ".html";
    }

    public void extractURLs(String line, String base) {
        Pattern pat = Pattern.compile("href\\=\\\"([^\\\"]+)\\\"");
        Matcher mat = pat.matcher(line);
        while (mat.find()) {
            String url = mat.group();
            pat = Pattern.compile("\\\"([^\\\"]+)\\\"");
            Matcher mat1 = pat.matcher(url);
            if (mat1.find()) {
                url = mat1.group(1);
            }
            if ((mat1 = (pat = Pattern.compile("\\#")).matcher(url)).find() || Google.binaryFile(url)) continue;
            if (!Google.htmlFile(url)) {
                logger.fine("not html: " + url);
                continue;
            }
            if (!url.regionMatches(0, base, 0, base.length())) {
                pat = Pattern.compile("\\:");
                mat1 = pat.matcher(url);
                if (mat1.find() || (mat1 = (pat = Pattern.compile("^(\\/|\\.\\.)")).matcher(url)).find() || (mat1 = (pat = Pattern.compile("[^\\w\\-\\/\\.]")).matcher(url)).find() || (mat1 = (pat = Pattern.compile("^\\.\\/?$")).matcher(url)).matches()) continue;
                pat = Pattern.compile("^\\.\\/");
                mat1 = pat.matcher(url);
                url = mat1.replaceAll("");
                url = base + url;
            }
            if (this.urls.get(url) != null || this.urls.size() >= this.MAX_URL_COUNTER) continue;
            this.urls.put(url, new Boolean(false));
            logger.fine(this.threadName + ") Found URL: " + url + " (false)");
        }
    }

    public ArrayList getWordsFromLine(String line) {
        Pattern pat = Pattern.compile("\\W+");
        ArrayList<String> stoppedWords = new ArrayList<String>();
        String[] words = pat.split(line);
        for (int i = 0; i < words.length; ++i) {
            String word = words[i].toLowerCase();
            if (word.equals("") || this.stopWords.contains(word)) continue;
            stoppedWords.add(word);
        }
        return stoppedWords;
    }

    public void buildContextModelForPerson() {
        if (this.person.contextModel.numLocations() > 0) {
            return;
        }
        if (this.person.contextPages.size() == 0) {
            StringWriter sw = new StringWriter();
            PrintWriter pw = new PrintWriter((Writer)sw, true);
            pw.print("Cannot build context model for ");
            this.person.printPersonalInfo(pw);
            logger.info(sw.toString());
        }
        for (int i = 0; i < this.person.contextPages.size(); ++i) {
            String fileName = ((WebPage)this.person.contextPages.elementAt((int)i)).fileName;
            try {
                String inputLine;
                BufferedReader in = new BufferedReader(new FileReader(new File(fileName)));
                while ((inputLine = in.readLine()) != null) {
                    String[] words = this.getWordsFromLine(inputLine).toArray(new String[0]);
                    this.person.addWordsToContextModel(words);
                    inputLine = in.readLine();
                }
                in.close();
                continue;
            }
            catch (FileNotFoundException e) {
                logger.warning(this.threadName + ") Cannot open input file " + fileName + ": " + e);
                continue;
            }
            catch (IOException e) {
                logger.warning(this.threadName + ") Cannot close input file " + fileName + ": " + e);
            }
        }
    }

    public BufferedReader establishConnection(String stringURL) {
        try {
            String line;
            logger.fine("establishing connection for " + stringURL);
            URL url = new URL(stringURL);
            String address = url.getHost();
            String path = url.getPath();
            int port = url.getPort();
            if (port == -1) {
                port = 80;
            }
            Socket connection = TimedSocket.getSocket(address, port, 4000);
            connection.setSoTimeout(7000);
            BufferedReader din = new BufferedReader(new InputStreamReader(connection.getInputStream()));
            PrintStream pout = new PrintStream(connection.getOutputStream());
            pout.print("GET " + path + " HTTP/1.0\n\n");
            StringBuffer buf = new StringBuffer();
            for (int i = 0; i < 500 && (line = din.readLine()) != null; ++i) {
                buf.append(line);
            }
            din.close();
            pout.close();
            return new BufferedReader(new StringReader(buf.toString()));
        }
        catch (MalformedURLException u) {
            logger.warning(this.threadName + ") Malformed URL: " + stringURL);
        }
        catch (IOException e) {
            logger.warning(this.threadName + ") Failed to establish connection to " + stringURL + " error: " + e);
        }
        return null;
    }

    public boolean retrievePage(String url, String outFileName, ArrayList words) {
        int MAX_NUMBER_OF_LINES = 5000;
        String base = Google.getBase(url);
        logger.fine(this.threadName + ") Retrieving page: " + url);
        try {
            String inputLine;
            BufferedReader in = this.establishConnection(url);
            logger.fine("established connection to " + url);
            if (in == null) {
                return false;
            }
            BufferedWriter out = new BufferedWriter(new FileWriter(new File(outFileName)));
            int numberOfLines = 0;
            while ((inputLine = in.readLine()) != null) {
                if (++numberOfLines == MAX_NUMBER_OF_LINES) {
                    logger.fine(this.threadName + ") Too long file - stop reading");
                    break;
                }
                this.extractURLs(inputLine, base);
                words.addAll(this.getWordsFromLine(inputLine));
                out.write(inputLine);
                out.newLine();
            }
            out.close();
            in.close();
            logger.fine(this.threadName + ") Content is written");
            this.person.addPage(new WebPage(url, outFileName));
            return true;
        }
        catch (FileNotFoundException e) {
            logger.warning(this.threadName + ") Cannot open output file " + outFileName + e);
        }
        catch (IOException e) {
            logger.warning(this.threadName + ") Cannot close output file " + outFileName + e);
        }
        return false;
    }

    public int processUnseenURLs(int fileNumber, ArrayList words) {
        boolean allProcessedURLs = false;
        while (!allProcessedURLs) {
            allProcessedURLs = true;
            Object[] keys = this.urls.keySet().toArray();
            for (int i = 0; i < keys.length; ++i) {
                String url = (String)keys[i];
                Boolean seen = (Boolean)this.urls.get(url);
                if (seen.booleanValue()) continue;
                allProcessedURLs = false;
                this.urls.put(url, new Boolean(true));
                String outFileName = this.getFileName(fileNumber);
                if (!this.retrievePage(url, outFileName, words)) continue;
                ++fileNumber;
            }
        }
        return fileNumber;
    }

    private GoogleSearchResult doSearch(String query) {
        while (true) {
            try {
                logger.fine(this.threadName + ") About to call GoogleSearch.doSearch()");
                GoogleSearchResult result = this.search.doSearch();
                logger.fine(this.threadName + ") Returned from GoogleSearch.doSearch()");
                return result;
            }
            catch (GoogleSearchFault fault) {
                String error = "Google search fault on query: " + query;
                GoogleRuntimeException gre = GoogleExceptionConversionUtility.convert(fault, error);
                if (gre instanceof GoogleQueryLimitExceededException) {
                    if (!LicenseKeyManager.getInstance().isUserLicensed()) {
                        throw gre;
                    }
                    logger.fine(this.threadName + ": Changing Key after Google key expiration: " + (Object)((Object)fault));
                    this.licenseKey = LicenseKeyManager.getInstance().getNextLicenseKey(this.licenseKey);
                    logger.fine(this.threadName + ") Key changed ");
                    this.search.setKey(this.licenseKey.getKey());
                    continue;
                }
                throw gre;
            }
            break;
        }
    }

    private void removeFiles(int beginIndex, int endIndex) {
        for (int i = beginIndex; i < endIndex; ++i) {
            String outFileName = this.getFileName(i);
            File file = new File(outFileName);
            file.delete();
            this.person.removePage(outFileName);
        }
    }

    public boolean responseToQuery(String query) {
        int fileNumber;
        String url = "";
        boolean returnValue = false;
        this.search.setQueryString(query);
        GoogleSearchResult r = this.doSearch(query);
        if (r == null) {
            return false;
        }
        GoogleSearchResultElement[] results = r.getResultElements();
        if (results.length == 0) {
            return false;
        }
        int oldFileNumber = fileNumber = 1;
        if (!Google.isDomainInQuery(query)) {
            this.buildContextModelForPerson();
        }
        for (int i = 0; i < results.length; ++i) {
            url = results[i].getURL();
            logger.fine(this.threadName + ") Found URL: " + url + " (true)");
            if (Google.binaryFile(url)) {
                logger.fine(this.threadName + ") URL " + url + " is binary");
                continue;
            }
            if (!this.person.isLoginOrNameInURL(url, this.threadName)) {
                logger.fine(this.threadName + ") URL " + url + " is is not related to person");
                continue;
            }
            logger.fine(this.threadName + ") Found URL: " + url + " (true)");
            logger.info("Fetching " + url);
            oldFileNumber = fileNumber;
            this.urls.put(url, new Boolean(true));
            String outFileName = this.getFileName(fileNumber);
            ArrayList wordsFromWeb = new ArrayList();
            if (!this.retrievePage(url, outFileName, wordsFromWeb)) continue;
            ++fileNumber;
            fileNumber = this.processUnseenURLs(fileNumber, wordsFromWeb);
            if (Google.isDomainInQuery(query)) {
                returnValue = true;
                break;
            }
            double cosine = this.person.calculateCosineWithContextModel(wordsFromWeb.toArray(new String[0]));
            if (cosine > 0.008) {
                returnValue = true;
                break;
            }
            logger.fine(this.threadName + ") URL: " + url + " has cosine " + cosine + " with true model...removing fileno " + oldFileNumber + " to " + fileNumber + " for " + this.person.getFirstName());
            this.removeFiles(oldFileNumber, fileNumber);
            fileNumber = oldFileNumber;
        }
        return returnValue;
    }
}

