/*
 * Decompiled with CFR 0.152.
 */
package org.apache.uima.examples.tagger.trainAndTest;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import org.apache.uima.examples.tagger.NGram;
import org.apache.uima.examples.tagger.trainAndTest.CorpusReader;
import org.apache.uima.examples.tagger.trainAndTest.MappingInterface;
import org.apache.uima.examples.tagger.trainAndTest.SuffixTree;
import org.apache.uima.examples.tagger.trainAndTest.Token;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class ModelGeneration
implements Serializable {
    private static final long serialVersionUID = 1L;
    public Map suffix_tree = new HashMap();
    public Map suffix_tree_capitalized = new HashMap();
    public Map<String, Map<String, Double>> word_probs = new HashMap<String, Map<String, Double>>();
    public Map<NGram, Double> transition_probs = new HashMap<NGram, Double>();
    static List<String> posList = new ArrayList<String>();
    int N;
    public double[] lambdas2 = new double[2];
    public double[] lambdas3 = new double[3];
    public double theta;
    transient String OutputFile;
    transient List corpus;
    static int tokens_count_all_corpus = 0;
    static Map sm = new HashMap();
    static Map sm2 = new HashMap();
    static Map<NGram, Double> unigrams;
    static Map<NGram, Double> bigrams;
    static Map<NGram, Double> trigrams;

    public ModelGeneration(List<Token> corpus, String OutputFile) {
        this.OutputFile = OutputFile;
        this.corpus = corpus;
    }

    public void init() {
        List<Map<String, Map<String, Double>>> l = ModelGeneration.get_word_probs(ModelGeneration.get_lexicon(this.corpus));
        this.word_probs = l.get(0);
        this.suffix_tree = l.get(1);
        this.suffix_tree_capitalized = l.get(2);
        System.out.println("Number of different words " + this.word_probs.size());
        System.out.println("Number of non-capitalized suffixes: " + this.suffix_tree.size());
        System.out.println("Number of capitalized suffixes: " + this.suffix_tree_capitalized.size());
        Map<NGram, Double> pos_probabilities = ModelGeneration.get_transition_probs(1);
        this.transition_probs.putAll(pos_probabilities);
        this.transition_probs.putAll(ModelGeneration.get_transition_probs(2));
        this.transition_probs.putAll(ModelGeneration.get_transition_probs(3));
        this.theta = this.get_theta(pos_probabilities);
        System.out.println("theta= " + this.theta);
        this.lambdas2 = this.calculate_lambda(2);
        this.lambdas3 = this.calculate_lambda(3);
        this.write_to_file(this.OutputFile);
    }

    public static boolean capitalized(String word) {
        char capitalized;
        char first_letter = word.charAt(0);
        boolean b = first_letter == (capitalized = word.toUpperCase().charAt(0));
        return b;
    }

    static Map<String, Map<String, Double>> get_lexicon(List<Token> corpus) {
        HashMap<String, Map<String, Double>> lexicon = new HashMap<String, Map<String, Double>>();
        for (int x = 0; x < corpus.size(); ++x) {
            Map<String, Double> pos;
            ++tokens_count_all_corpus;
            Token current_token = corpus.get(x);
            posList.add(current_token.pos);
            if (lexicon.containsKey(current_token.word)) {
                pos = (Map)lexicon.get(current_token.word);
                Double freq = (Double)pos.get(current_token.pos);
                pos.put(current_token.pos, freq == null ? 1.0 : freq + 1.0);
                pos.put("count", (Double)pos.get("count") + 1.0);
                continue;
            }
            pos = new HashMap<String, Double>();
            pos.put(current_token.pos, new Double(1.0));
            pos.put("count", new Double(1.0));
            lexicon.put(current_token.word, pos);
        }
        return lexicon;
    }

    /*
     * WARNING - void declaration
     */
    static List<Map<String, Map<String, Double>>> get_word_probs(Map<String, Map<String, Double>> corpus) {
        Map<String, Map<String, Double>> word_counts = corpus;
        HashMap word_probs = new HashMap();
        int mapsize = word_counts.size();
        Iterator<Map.Entry<String, Map<String, Double>>> keyValuePairs = word_counts.entrySet().iterator();
        Map<NGram, Double> pos_counts = ModelGeneration.get_ngrams(1);
        for (int i = 0; i < mapsize; ++i) {
            HashSet<String> endings;
            Map.Entry<String, Map<String, Double>> entry = keyValuePairs.next();
            String key = entry.getKey();
            Map<String, Double> pos2 = word_counts.get(key);
            Iterator<Map.Entry<String, Double>> keyValuePairs_pos = pos2.entrySet().iterator();
            HashMap<String, Double> lokal = new HashMap<String, Double>();
            for (int u = 0; u < pos2.size(); ++u) {
                Map.Entry<String, Double> entry_pos = keyValuePairs_pos.next();
                String key2 = entry_pos.getKey();
                System.out.println(pos_counts.size());
                if (key2 != "count") {
                    Double value2 = entry_pos.getValue();
                    NGram ng = new NGram(key2);
                    double d = pos_counts.get(ng);
                    Double val2 = value2 / d;
                    lokal.put(key2, val2);
                    continue;
                }
                lokal.remove("count");
            }
            word_probs.put(key, lokal);
            HashSet<String> local_suffixes = new HashSet<String>();
            HashSet<String> local_suffixes_capitalized = new HashSet<String>();
            if (word_counts.get(key).get("count") < 10.0) {
                void var17_21;
                String word_end = key.toString().length() > 9 ? key.toString().substring(key.toString().length() - 9, key.toString().length()) : key;
                SuffixTree st = new SuffixTree(word_end);
                Iterator<Map.Entry<SuffixTree.EDGE_KEY, SuffixTree.Edge>> kv = st.edges.entrySet().iterator();
                boolean bl = false;
                while (var17_21 < st.edges.size()) {
                    Map.Entry<SuffixTree.EDGE_KEY, SuffixTree.Edge> m = kv.next();
                    SuffixTree.Edge e = m.getValue();
                    if (st.nodes.get((int)(e.end_node - 1)).suffix_node == -1) {
                        String suffix = st.text.substring(e.first_char_index, e.last_char_index + 1);
                        if (ModelGeneration.capitalized(key)) {
                            local_suffixes_capitalized.add(suffix);
                        } else {
                            local_suffixes.add(suffix);
                        }
                    }
                    ++var17_21;
                }
            }
            if (local_suffixes.isEmpty() && local_suffixes_capitalized.isEmpty()) continue;
            Map local_sm = new HashMap();
            if (local_suffixes.isEmpty()) {
                endings = local_suffixes_capitalized;
                local_sm = sm2;
            } else {
                endings = local_suffixes;
                local_sm = sm;
            }
            for (Object e : endings) {
                HashMap<String, Double> etwas = new HashMap<String, Double>();
                if (local_sm.containsKey(e)) {
                    Map pos_suffix = (Map)local_sm.get(e);
                    Iterator pos_suf = pos_suffix.entrySet().iterator();
                    for (int k = 0; k < pos_suffix.size(); ++k) {
                        Map.Entry entry3 = pos_suf.next();
                        Object key_pos = entry3.getKey();
                        Object value_pos = entry3.getValue();
                        if (((Map)word_probs.get(key)).containsKey(key_pos)) {
                            Double val_suffix = (Double)value_pos + (Double)((Map)word_probs.get(key)).get(key_pos);
                            etwas.put((String)key_pos, val_suffix);
                        } else {
                            etwas.put((String)key_pos, (Double)value_pos);
                        }
                        Set smth2 = ((Map)word_probs.get(key)).keySet();
                        Object[] smth = smth2.toArray();
                        for (int r = 0; r < smth.length; ++r) {
                            if (pos_suffix.containsKey(smth[r]) || smth[r].equals("count")) continue;
                            etwas.put((String)smth[r], pos2.get(smth[r]));
                        }
                    }
                } else {
                    etwas.putAll(lokal);
                }
                if (local_suffixes.isEmpty()) {
                    sm2.put(e, etwas);
                    continue;
                }
                sm.put(e, etwas);
            }
        }
        ArrayList<Map<String, Map<String, Double>>> l = new ArrayList<Map<String, Map<String, Double>>>();
        l.add(word_probs);
        l.add(sm);
        l.add(sm2);
        return l;
    }

    static Map<String, Map<String, Double>> logify_probs(Map<String, Map<String, Double>> probs) {
        HashMap<String, Map<String, Double>> logs = new HashMap<String, Map<String, Double>>();
        Iterator<Map.Entry<String, Map<String, Double>>> keyValuePairs = probs.entrySet().iterator();
        for (int i = 0; i < probs.size(); ++i) {
            Map.Entry<String, Map<String, Double>> entry = keyValuePairs.next();
            String key = entry.getKey();
            Map<String, Double> poss = probs.get(key);
            Object[] pos_s = poss.entrySet().toArray();
            for (int u = 0; u < pos_s.length; ++u) {
                Map.Entry entry2 = (Map.Entry)pos_s[u];
                Object key2 = entry2.getKey();
                Double value2 = (Double)entry2.getValue();
                poss.put((String)key2, Math.log(value2));
            }
            logs.put(key, poss);
        }
        return logs;
    }

    static Map<NGram, Double> get_ngrams(int N) throws IllegalArgumentException {
        HashMap<NGram, Double> ngrams1 = new HashMap<NGram, Double>();
        HashMap<NGram, Double> ngrams2 = new HashMap<NGram, Double>();
        HashMap<NGram, Double> ngrams3 = new HashMap<NGram, Double>();
        if (N == 1) {
            for (int y = 0; y < posList.size(); ++y) {
                NGram onegram;
                Double freq = (Double)ngrams1.get(onegram = new NGram(posList.get(y)));
                ngrams1.put(onegram, freq == null ? 1.0 : freq + 1.0);
            }
        } else if (N == 2) {
            for (int y = 0; y < posList.size() - 1; ++y) {
                NGram bigram;
                Double freq = (Double)ngrams2.get(bigram = new NGram(posList.get(y), posList.get(y + 1)));
                ngrams2.put(bigram, freq == null ? 1.0 : freq + 1.0);
            }
        } else if (N == 3) {
            for (int y = 0; y < posList.size() - 2; ++y) {
                NGram trigram;
                Double freq = (Double)ngrams3.get(trigram = new NGram(posList.get(y), posList.get(y + 1), posList.get(y + 2)));
                ngrams3.put(trigram, freq == null ? 1.0 : freq + 1.0);
            }
        } else {
            throw new IllegalArgumentException("N=1, N=2 or N=3, no further N-grams are supported at the moment");
        }
        return N == 1 ? ngrams1 : (N == 2 ? ngrams2 : ngrams3);
    }

    static Map<NGram, Double> get_transition_probs(int N) throws IllegalArgumentException {
        HashMap<NGram, Double> probs1 = new HashMap<NGram, Double>();
        HashMap<NGram, Double> probs2 = new HashMap<NGram, Double>();
        HashMap<NGram, Double> probs3 = new HashMap<NGram, Double>();
        unigrams = ModelGeneration.get_ngrams(1);
        bigrams = ModelGeneration.get_ngrams(2);
        trigrams = ModelGeneration.get_ngrams(3);
        if (N == 1) {
            Iterator<Map.Entry<NGram, Double>> keyValuePairs = unigrams.entrySet().iterator();
            for (int i = 0; i < unigrams.size(); ++i) {
                Map.Entry<NGram, Double> entry = keyValuePairs.next();
                NGram key = entry.getKey();
                double freq1 = unigrams.get(key);
                double prob1 = freq1 / (double)tokens_count_all_corpus;
                probs1.put(key, prob1);
            }
        } else if (N == 2) {
            Iterator<Map.Entry<NGram, Double>> keyValuePairs = bigrams.entrySet().iterator();
            for (int i = 0; i < bigrams.size(); ++i) {
                Map.Entry<NGram, Double> entry = keyValuePairs.next();
                NGram key = entry.getKey();
                double freq1 = unigrams.get(new NGram(key.tag1));
                Double freq2 = entry.getValue();
                double prob2 = freq2 / freq1;
                probs2.put(key, prob2);
            }
        } else if (N == 3) {
            Iterator<Map.Entry<NGram, Double>> keyValuePairs = trigrams.entrySet().iterator();
            for (int i = 0; i < trigrams.size(); ++i) {
                Map.Entry<NGram, Double> entry = keyValuePairs.next();
                NGram key = entry.getKey();
                double freq1 = bigrams.get(new NGram(key.tag1, key.tag2));
                Double freq2 = entry.getValue();
                double prob3 = freq2 / freq1;
                probs3.put(key, prob3);
            }
        } else {
            throw new IllegalArgumentException("only uni-, bi-, and trigramms are supported at the moment");
        }
        return N == 2 ? probs2 : (N == 3 ? probs3 : probs1);
    }

    private double[] calculate_lambda(int N) {
        NGram key;
        Map.Entry<NGram, Double> entry;
        int i;
        Iterator<Map.Entry<NGram, Double>> keyValuePairs;
        double lambda1 = 0.0;
        double lambda2 = 0.0;
        double lambda3 = 0.0;
        double count2 = 0.0;
        double count3 = 0.0;
        if (N == 2) {
            keyValuePairs = bigrams.entrySet().iterator();
            for (i = 0; i < bigrams.size(); ++i) {
                double f1;
                entry = keyValuePairs.next();
                key = entry.getKey();
                double freq1 = unigrams.get(new NGram(key.tag1));
                Double freq2 = entry.getValue();
                double f2 = (freq2 - 1.0) / (freq1 - 1.0);
                double freq = ModelGeneration.get_max(f2, f1 = (freq1 - 1.0) / (double)(tokens_count_all_corpus - 1), 0.0);
                if (freq == f2) {
                    lambda2 += freq2.doubleValue();
                    count2 += freq2.doubleValue();
                    continue;
                }
                lambda1 += freq2.doubleValue();
                count2 += freq2.doubleValue();
            }
            this.lambdas2[0] = lambda1 / count2;
            this.lambdas2[1] = lambda2 / count2;
        }
        if (N == 3) {
            keyValuePairs = trigrams.entrySet().iterator();
            for (i = 0; i < trigrams.size(); ++i) {
                double f1;
                double f2;
                entry = keyValuePairs.next();
                key = entry.getKey();
                NGram preceding_bigram = new NGram(key.tag1, key.tag2);
                Double freq2 = bigrams.containsKey(preceding_bigram) ? bigrams.get(preceding_bigram) : 0.0;
                Double freq3 = entry.getValue();
                Double freq1 = unigrams.containsKey(new NGram(key.tag1)) ? unigrams.get(new NGram(key.tag1)) : 0.0;
                double f3 = (freq3 - 1.0) / (freq2 - 1.0);
                double freq = ModelGeneration.get_max(f3, f2 = (freq2 - 1.0) / (freq1 - 1.0), f1 = (freq1 - 1.0) / (double)(tokens_count_all_corpus - 1));
                if (freq == f3) {
                    lambda3 += freq3.doubleValue();
                    count3 += freq3.doubleValue();
                    continue;
                }
                if (freq == f2) {
                    lambda2 += freq3.doubleValue();
                    count3 += freq3.doubleValue();
                    continue;
                }
                lambda1 += freq3.doubleValue();
                count3 += freq3.doubleValue();
            }
            this.lambdas3[0] = lambda1 / count3;
            this.lambdas3[1] = lambda2 / count3;
            this.lambdas3[2] = lambda3 / count3;
        }
        System.out.println("lambdas: " + this.lambdas3[0] + " " + this.lambdas3[1] + " " + this.lambdas3[2]);
        return N == 2 ? this.lambdas2 : this.lambdas3;
    }

    private double get_theta(Map<String, Double> m) {
        double d = 0.0;
        double sum1 = 0.0;
        double tagset_size = m.size();
        Iterator<Map.Entry<String, Double>> tags_probs = m.entrySet().iterator();
        int h = 0;
        while ((double)h < tagset_size) {
            Map.Entry<String, Double> entry_pos = tags_probs.next();
            Double prob = entry_pos.getValue();
            sum1 += prob.doubleValue();
            ++h;
        }
        double prob_average = sum1 / tagset_size;
        double sum2 = 0.0;
        Iterator<Map.Entry<String, Double>> tags_probs2 = m.entrySet().iterator();
        int h2 = 0;
        while ((double)h2 < tagset_size) {
            Map.Entry<String, Double> entry_pos = tags_probs2.next();
            Double prob = entry_pos.getValue();
            sum2 += Math.pow(prob - prob_average, 2.0);
            ++h2;
        }
        d = sum2 / (tagset_size - 1.0);
        return d;
    }

    static double get_max(double a, double b, double c) {
        double max = a;
        if (b > max) {
            max = b;
        }
        if (c > max) {
            max = c;
        }
        return max;
    }

    private void write_to_file(String filename) {
        File file = null;
        if (filename != null) {
            file = new File(filename);
        }
        if (file == null) {
            System.out.println("Default: model.dat");
            file = new File("model.dat");
        }
        try {
            FileOutputStream file_output = new FileOutputStream(file);
            ObjectOutputStream o = new ObjectOutputStream(file_output);
            o.writeObject(this);
            file_output.close();
        }
        catch (IOException e) {
            System.err.println("IO exception = " + e);
        }
    }

    public static void main(String[] args) {
        try {
            MappingInterface MAPPING;
            String paramFile = "tagger.properties";
            Properties defaultProps = new Properties();
            FileInputStream in = new FileInputStream(paramFile);
            defaultProps.load(in);
            in.close();
            String file = defaultProps.getProperty("FILE");
            String fileOutput = defaultProps.getProperty("MODEL_FILE");
            String b = defaultProps.getProperty("DO_MAPPING");
            boolean DO_MAPPING = Boolean.valueOf(b);
            if (DO_MAPPING) {
                String m = defaultProps.getProperty("MAPPING");
                MAPPING = (MappingInterface)Class.forName(m).newInstance();
            } else {
                MAPPING = null;
            }
            String r = defaultProps.getProperty("CORPUS_READER");
            CorpusReader reader = (CorpusReader)Class.forName(r).newInstance();
            System.out.println("Input file: " + file);
            System.out.println("Output model file: " + fileOutput);
            ModelGeneration md = new ModelGeneration(reader.read_corpus(file, MAPPING), fileOutput);
            md.init();
        }
        catch (Exception e) {
            System.err.println(e);
        }
    }
}

