/*
 * Decompiled with CFR 0.152.
 */
package core.text;

import core.Tag;
import core.TransformableItem;
import core.text.TextualItem;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.List;
import weka.classifiers.Classifier;
import weka.classifiers.evaluation.Prediction;
import weka.classifiers.functions.SMO;
import weka.classifiers.functions.supportVector.Kernel;
import weka.classifiers.functions.supportVector.RBFKernel;
import weka.core.Attribute;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.StringToWordVector;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class TextualTag
extends Tag {
    private static final long serialVersionUID = 6806104955762214807L;
    private static final int dictionarySize = 100;
    private StringToWordVector filter;

    public StringToWordVector getFilter() {
        return this.filter;
    }

    public static TextualTag named(String n) {
        TextualTag t = (TextualTag)Tag.getTagNamed(n);
        if (t != null) {
            return t;
        }
        return new TextualTag(n);
    }

    protected TextualTag(String n) {
        super(n);
    }

    @Override
    public void train() {
        this.classifier = null;
        if (!this.isTrainingSetOk()) {
            System.out.println("tag " + this + " has bad training set, cannot be trained");
            return;
        }
        this.classifier = this.createLearningModel();
        Hashtable<String, List<TextualItem>> fileDico = new Hashtable<String, List<TextualItem>>();
        ArrayList<TextualItem> posExamplesList = new ArrayList<TextualItem>();
        ArrayList<TextualItem> negExamplesList = new ArrayList<TextualItem>();
        for (TransformableItem m : this.positiveExamples) {
            posExamplesList.add((TextualItem)m);
        }
        for (TransformableItem m : this.negativeExamples) {
            negExamplesList.add((TextualItem)m);
        }
        fileDico.put(this.name, posExamplesList);
        fileDico.put("not " + this.name, negExamplesList);
        try {
            this.instances = this.buildInstances(fileDico);
            Instances wordInstances = this.wordFilter(this.instances, 100);
            this.classifier.buildClassifier(wordInstances);
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    private Instances buildInstances(Hashtable<String, List<TextualItem>> fileDico) throws Exception {
        FastVector atts = new FastVector(3);
        atts.addElement((Object)new Attribute("filename", null));
        atts.addElement((Object)new Attribute("contents", null));
        FastVector classes = new FastVector(fileDico.size());
        Enumeration<String> keys = fileDico.keys();
        while (keys.hasMoreElements()) {
            classes.addElement((Object)keys.nextElement());
        }
        atts.addElement((Object)new Attribute("class", classes));
        Instances data = new Instances("Files: rootDir?", atts, 0);
        data.setClassIndex(2);
        keys = fileDico.keys();
        while (keys.hasMoreElements()) {
            String tag = keys.nextElement();
            List<TextualItem> txtFiles = fileDico.get(tag);
            int classInd = classes.indexOf((Object)tag);
            for (TextualItem txtFile : txtFiles) {
                double[] newInst = new double[]{data.attribute(0).addStringValue(txtFile.getName()), data.attribute(1).addStringValue(txtFile.getText()), classInd};
                data.add(new Instance(1.0, newInst));
            }
        }
        return data;
    }

    private Instance getInstanceFor(Instances insts, TextualItem item) {
        Instances copy = new Instances(insts);
        double[] newInst = new double[]{insts.attribute(0).addStringValue(item.getName()), insts.attribute(1).addStringValue(item.getText()), Prediction.MISSING_VALUE};
        Instance inst = new Instance(1.0, newInst);
        inst.setDataset(insts);
        copy.add(inst);
        try {
            Instances res = Filter.useFilter((Instances)copy, (Filter)this.filter);
            res.deleteAttributeAt(0);
            return res.instance(res.numInstances() - 1);
        }
        catch (Exception e) {
            e.printStackTrace();
            return null;
        }
    }

    public Instances wordFilter(Instances instances, int dictionarySize) throws Exception {
        this.filter = new StringToWordVector();
        String[] options = new String[]{"-R", "2"};
        this.filter.setOptions(options);
        this.filter.setWordsToKeep(dictionarySize);
        this.filter.setInputFormat(instances);
        Instances inst = Filter.useFilter((Instances)instances, (Filter)this.filter);
        inst.deleteAttributeAt(0);
        return inst;
    }

    @Override
    protected Classifier createLearningModel() {
        SMO smo = new SMO();
        smo.setKernel((Kernel)new RBFKernel());
        smo.setBuildLogisticModels(true);
        return smo;
    }

    @Override
    public double classifyScalar(TransformableItem m) {
        if (this.classifier == null) {
            return 0.0;
        }
        TextualItem TextualM = (TextualItem)m;
        try {
            Instance inst = this.getInstanceFor(this.instances, TextualM);
            double[] distrib = this.classifier.distributionForInstance(inst);
            String premier = inst.dataset().classAttribute().value(0);
            String second = inst.dataset().classAttribute().value(1);
            if (premier.toLowerCase().startsWith("not ")) {
                return distrib[1];
            }
            return distrib[0];
        }
        catch (Exception e1) {
            e1.printStackTrace();
            return 0.0;
        }
    }

    public static /* bridge */ /* synthetic */ Tag named(String string) {
        return TextualTag.named(string);
    }
}

