/*
 * Decompiled with CFR 0.152.
 */
package opennlp.grok.preprocess.mwe;

import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.util.HashSet;
import java.util.Iterator;
import java.util.zip.GZIPOutputStream;
import opennlp.common.util.PerlHelp;

public class WordNetExtractor {
    public static void main(String[] argv) {
        String dir = "D:\\WN17\\dict\\";
        if (argv.length > 0) {
            dir = argv[0];
        }
        WordNetExtractor.createCompoundData(dir, "noun");
        WordNetExtractor.createCompoundData(dir, "verb");
        WordNetExtractor.createNonCompoundData(dir, "verb");
        WordNetExtractor.createCompoundData(dir, "adv");
        WordNetExtractor.createCompoundData(dir, "adj");
    }

    private static void createCompoundData(String dir, String type) {
        try {
            HashSet<String> s = new HashSet<String>();
            File nouns = new File(dir + "index." + type);
            FileInputStream fis = new FileInputStream(nouns);
            InputStreamReader reader = new InputStreamReader(fis);
            StringBuffer sb = new StringBuffer();
            int chr = reader.read();
            while (chr >= 0) {
                if (chr == 10 || chr == 13) {
                    String line = sb.toString();
                    if (line.length() > 0) {
                        String[] spaceSplit = PerlHelp.split((String)line);
                        int i = 0;
                        while (i < spaceSplit.length) {
                            if (spaceSplit[i].indexOf(95) >= 0) {
                                s.add(spaceSplit[i].replace('_', ' '));
                            }
                            ++i;
                        }
                    }
                    sb.setLength(0);
                } else {
                    sb.append((char)chr);
                }
                chr = reader.read();
            }
            System.out.println(type + " size=" + s.size());
            File output = new File(dir + "compound." + type + "s.gz");
            FileOutputStream fos = new FileOutputStream(output);
            GZIPOutputStream gzos = new GZIPOutputStream(new BufferedOutputStream(fos));
            PrintWriter writer = new PrintWriter(gzos);
            writer.println("# This file was extracted from WordNet data, the following copyright notice");
            writer.println("# from WordNet is attached.");
            writer.println("#");
            writer.println("#  This software and database is being provided to you, the LICENSEE, by  ");
            writer.println("#  Princeton University under the following license.  By obtaining, using  ");
            writer.println("#  and/or copying this software and database, you agree that you have  ");
            writer.println("#  read, understood, and will comply with these terms and conditions.:  ");
            writer.println("#  ");
            writer.println("#  Permission to use, copy, modify and distribute this software and  ");
            writer.println("#  database and its documentation for any purpose and without fee or  ");
            writer.println("#  royalty is hereby granted, provided that you agree to comply with  ");
            writer.println("#  the following copyright notice and statements, including the disclaimer,  ");
            writer.println("#  and that the same appear on ALL copies of the software, database and  ");
            writer.println("#  documentation, including modifications that you make for internal  ");
            writer.println("#  use or for distribution.  ");
            writer.println("#  ");
            writer.println("#  WordNet 1.7 Copyright 2001 by Princeton University.  All rights reserved. ");
            writer.println("#  ");
            writer.println("#  THIS SOFTWARE AND DATABASE IS PROVIDED \"AS IS\" AND PRINCETON  ");
            writer.println("#  UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR  ");
            writer.println("#  IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, PRINCETON  ");
            writer.println("#  UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES OF MERCHANT-  ");
            writer.println("#  ABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE  ");
            writer.println("#  OF THE LICENSED SOFTWARE, DATABASE OR DOCUMENTATION WILL NOT  ");
            writer.println("#  INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR ");
            writer.println("#  OTHER RIGHTS. ");
            writer.println("#  ");
            writer.println("#  The name of Princeton University or Princeton may not be used in");
            writer.println("#  advertising or publicity pertaining to distribution of the software");
            writer.println("#  and/or database.  Title to copyright in this software, database and");
            writer.println("#  any associated documentation shall at all times remain with");
            writer.println("#  Princeton University and LICENSEE agrees to preserve same.  ");
            Iterator i = s.iterator();
            while (i.hasNext()) {
                String mwe = (String)i.next();
                writer.println(mwe);
            }
            writer.close();
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    private static void createNonCompoundData(String dir, String type) {
        try {
            HashSet<String> s = new HashSet<String>();
            File nouns = new File(dir + "index." + type);
            FileInputStream fis = new FileInputStream(nouns);
            InputStreamReader reader = new InputStreamReader(fis);
            StringBuffer sb = new StringBuffer();
            int chr = reader.read();
            while (chr >= 0) {
                if (chr == 10 || chr == 13) {
                    String[] spaceSplit;
                    String line = sb.toString();
                    if (line.length() > 0 && line.charAt(0) != ' ' && (spaceSplit = PerlHelp.split((String)line))[0].indexOf(95) < 0) {
                        s.add(spaceSplit[0]);
                    }
                    sb.setLength(0);
                } else {
                    sb.append((char)chr);
                }
                chr = reader.read();
            }
            System.out.println(type + " size=" + s.size());
            File output = new File(dir + "nonCompound." + type + "s.gz");
            FileOutputStream fos = new FileOutputStream(output);
            GZIPOutputStream gzos = new GZIPOutputStream(new BufferedOutputStream(fos));
            PrintWriter writer = new PrintWriter(gzos);
            writer.println("# This file was extracted from WordNet data, the following copyright notice");
            writer.println("# from WordNet is attached.");
            writer.println("#");
            writer.println("#  This software and database is being provided to you, the LICENSEE, by  ");
            writer.println("#  Princeton University under the following license.  By obtaining, using  ");
            writer.println("#  and/or copying this software and database, you agree that you have  ");
            writer.println("#  read, understood, and will comply with these terms and conditions.:  ");
            writer.println("#  ");
            writer.println("#  Permission to use, copy, modify and distribute this software and  ");
            writer.println("#  database and its documentation for any purpose and without fee or  ");
            writer.println("#  royalty is hereby granted, provided that you agree to comply with  ");
            writer.println("#  the following copyright notice and statements, including the disclaimer,  ");
            writer.println("#  and that the same appear on ALL copies of the software, database and  ");
            writer.println("#  documentation, including modifications that you make for internal  ");
            writer.println("#  use or for distribution.  ");
            writer.println("#  ");
            writer.println("#  WordNet 1.7 Copyright 2001 by Princeton University.  All rights reserved. ");
            writer.println("#  ");
            writer.println("#  THIS SOFTWARE AND DATABASE IS PROVIDED \"AS IS\" AND PRINCETON  ");
            writer.println("#  UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR  ");
            writer.println("#  IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, PRINCETON  ");
            writer.println("#  UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES OF MERCHANT-  ");
            writer.println("#  ABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE  ");
            writer.println("#  OF THE LICENSED SOFTWARE, DATABASE OR DOCUMENTATION WILL NOT  ");
            writer.println("#  INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR ");
            writer.println("#  OTHER RIGHTS. ");
            writer.println("#  ");
            writer.println("#  The name of Princeton University or Princeton may not be used in");
            writer.println("#  advertising or publicity pertaining to distribution of the software");
            writer.println("#  and/or database.  Title to copyright in this software, database and");
            writer.println("#  any associated documentation shall at all times remain with");
            writer.println("#  Princeton University and LICENSEE agrees to preserve same.  ");
            Iterator i = s.iterator();
            while (i.hasNext()) {
                String mwe = (String)i.next();
                writer.println(mwe);
            }
            writer.close();
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }
}

