/*
 * Decompiled with CFR 0.152.
 */
package opennlp.grok.preprocess.chunk;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import opennlp.common.util.Pair;
import opennlp.grok.preprocess.chunk.ChunkerContextGenerator;
import opennlp.maxent.ContextGenerator;
import opennlp.maxent.Counter;
import opennlp.maxent.Event;
import opennlp.maxent.EventCollector;

public class ChunkerEventCollector
implements EventCollector {
    private BufferedReader br;
    private ContextGenerator cg = new ChunkerContextGenerator();

    public ChunkerEventCollector(Reader data) {
        this.br = new BufferedReader(data);
    }

    public ChunkerEventCollector(Reader data, ContextGenerator gen) {
        this.br = new BufferedReader(data);
        this.cg = gen;
    }

    public Event[] getEvents() {
        return this.getEvents(false);
    }

    private Set getFrequent(BufferedReader br) {
        HashMap<String, Counter> map = new HashMap<String, Counter>();
        try {
            String s = br.readLine();
            while (s != null) {
                StringTokenizer st = new StringTokenizer(s);
                while (st.hasMoreTokens()) {
                    String tagged = (String)ChunkerEventCollector.split((String)st.nextToken()).a;
                    String tok = (String)ChunkerEventCollector.split((String)tagged).a;
                    Counter c = (Counter)map.get(tok);
                    if (c != null) {
                        c.increment();
                        continue;
                    }
                    map.put(tok, new Counter());
                }
                s = br.readLine();
            }
        }
        catch (IOException e) {
            e.printStackTrace();
        }
        HashSet set = new HashSet();
        Iterator i = map.entrySet().iterator();
        while (i.hasNext()) {
            Map.Entry entry = i.next();
            if (!((Counter)entry.getValue()).passesCutoff(5)) continue;
            set.add(entry.getKey());
        }
        return set;
    }

    private static Pair split(String s) {
        int split = s.lastIndexOf("/");
        if (split == -1) {
            System.out.println("There is a problem in your training data: " + s + " does not conform to the format WORD/TAG.");
            return new Pair((Object)s, (Object)"UNKNOWN");
        }
        return new Pair((Object)s.substring(0, split), (Object)s.substring(split + 1));
    }

    public Event[] getEvents(boolean evalMode) {
        ArrayList<Event> elist = new ArrayList<Event>();
        if (!evalMode) {
            System.out.println("Reading in all the data");
            try {
                StringBuffer sb = new StringBuffer();
                String s = this.br.readLine();
                while (s != null) {
                    sb.append(s + "\n");
                    s = this.br.readLine();
                }
                System.out.println("Getting most frequent words");
                Set frequent = this.getFrequent(new BufferedReader(new StringReader(sb.toString())));
                this.br = new BufferedReader(new StringReader(sb.toString()));
                sb = null;
            }
            catch (IOException e) {
                e.printStackTrace();
            }
        }
        try {
            String s = this.br.readLine();
            while (s != null) {
                ArrayList<Object> tokens = new ArrayList<Object>();
                ArrayList<Object> first = new ArrayList<Object>();
                ArrayList<Object> outcomes = new ArrayList<Object>();
                StringTokenizer st = new StringTokenizer(s);
                while (st.hasMoreTokens()) {
                    Pair p = ChunkerEventCollector.split(st.nextToken());
                    Pair pt = ChunkerEventCollector.split((String)p.a);
                    tokens.add(pt.a);
                    first.add(pt.b);
                    outcomes.add(p.b);
                }
                ArrayList tags = new ArrayList();
                int i = 0;
                while (i < tokens.size()) {
                    Object[] params = new Object[]{tokens, first, tags, new Integer(i)};
                    String[] context = this.cg.getContext((Object)params);
                    Event e = new Event((String)outcomes.get(i), context);
                    tags.add(outcomes.get(i));
                    elist.add(e);
                    ++i;
                }
                s = this.br.readLine();
            }
        }
        catch (Exception e) {
            e.printStackTrace();
        }
        Event[] events = new Event[elist.size()];
        int i = 0;
        while (i < events.length) {
            events[i] = (Event)elist.get(i);
            ++i;
        }
        return events;
    }

    public static void main(String[] args) {
        String data = "Rockwell/NNP/B-NP said/VBD/B-VP the/DT/B-NP agreement/NN/I-NP calls/VBZ/B-VP for /IN/B-SBAR it/PRP/B-NP to/TO/B-VP supply/VB/I-VP 200/CD/B-NP additional/JJ/I-NP so-called/JJ/I-NP shipsets/NNS/I-NP for/IN/B-PP the/DT/B-NP planes/NNS/I-NP ././O";
        ChunkerEventCollector ec = new ChunkerEventCollector(new StringReader(data));
        Event[] events = ec.getEvents();
        int i = 0;
        while (i < events.length) {
            System.out.println(events[i].getOutcome());
            ++i;
        }
    }
}

