/*
 * Decompiled with CFR 0.152.
 */
package core.text;

import csl.tools.files.FileTools;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.List;
import java.util.Random;
import weka.classifiers.Classifier;
import weka.classifiers.Evaluation;
import weka.classifiers.functions.SMO;
import weka.classifiers.functions.supportVector.Kernel;
import weka.classifiers.functions.supportVector.RBFKernel;
import weka.classifiers.trees.J48;
import weka.core.Attribute;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.StringToWordVector;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class TextMining {
    private StringToWordVector filter;

    public StringToWordVector getFilter() {
        return this.filter;
    }

    public Instances instances(File rootDir) throws Exception {
        File[] subDirs = rootDir.listFiles(new FileFilter(){

            public boolean accept(File pathname) {
                return pathname.isDirectory();
            }
        });
        FastVector atts = new FastVector(3);
        atts.addElement((Object)new Attribute("filename", null));
        atts.addElement((Object)new Attribute("contents", null));
        FastVector classes = new FastVector(subDirs.length);
        File[] fileArray = subDirs;
        int n = 0;
        int n2 = fileArray.length;
        while (n < n2) {
            File subDir = fileArray[n];
            classes.addElement((Object)subDir.getName());
            ++n;
        }
        atts.addElement((Object)new Attribute("class", classes));
        Instances data = new Instances("Files: " + rootDir.getName(), atts, 0);
        data.setClassIndex(2);
        File[] fileArray2 = subDirs;
        n2 = 0;
        int n3 = fileArray2.length;
        while (n2 < n3) {
            File subDir = fileArray2[n2];
            File[] txtFiles = subDir.listFiles(FileTools.txtFF());
            int classInd = classes.indexOf((Object)subDir.getName());
            File[] fileArray3 = txtFiles;
            int n4 = 0;
            int n5 = fileArray3.length;
            while (n4 < n5) {
                int c;
                File txtFile = fileArray3[n4];
                double[] newInst = new double[3];
                newInst[0] = data.attribute(0).addStringValue(txtFile.getPath());
                InputStreamReader is = new InputStreamReader(new FileInputStream(txtFile));
                StringBuffer txtStr = new StringBuffer();
                while ((c = is.read()) != -1) {
                    txtStr.append((char)c);
                }
                newInst[1] = data.attribute(1).addStringValue(txtStr.toString());
                newInst[2] = classInd;
                data.add(new Instance(1.0, newInst));
                ++n4;
            }
            ++n2;
        }
        return data;
    }

    public Instances instances(Hashtable<String, List<File>> fileDico) throws Exception {
        FastVector atts = new FastVector(3);
        atts.addElement((Object)new Attribute("filename", null));
        atts.addElement((Object)new Attribute("contents", null));
        FastVector classes = new FastVector(fileDico.size());
        Enumeration<String> keys = fileDico.keys();
        while (keys.hasMoreElements()) {
            classes.addElement((Object)keys.nextElement());
        }
        atts.addElement((Object)new Attribute("class", classes));
        Instances data = new Instances("Files: rootDir?", atts, 0);
        data.setClassIndex(2);
        keys = fileDico.keys();
        while (keys.hasMoreElements()) {
            String tag = keys.nextElement();
            List<File> txtFiles = fileDico.get(tag);
            int classInd = classes.indexOf((Object)tag);
            for (File txtFile : txtFiles) {
                int c;
                double[] newInst = new double[3];
                newInst[0] = data.attribute(0).addStringValue(txtFile.getPath());
                InputStreamReader is = new InputStreamReader(new FileInputStream(txtFile));
                StringBuffer txtStr = new StringBuffer();
                while ((c = is.read()) != -1) {
                    txtStr.append((char)c);
                }
                newInst[1] = data.attribute(1).addStringValue(txtStr.toString());
                newInst[2] = classInd;
                data.add(new Instance(1.0, newInst));
            }
        }
        return data;
    }

    public Instances wordFilter(Instances instances, int dictionarySize) throws Exception {
        this.filter = new StringToWordVector();
        String[] options = new String[]{"-R", "2"};
        this.filter.setOptions(options);
        this.filter.setWordsToKeep(dictionarySize);
        this.filter.setInputFormat(instances);
        Instances inst = Filter.useFilter((Instances)instances, (Filter)this.filter);
        inst.deleteAttributeAt(0);
        return inst;
    }

    public Evaluation classify(Instances instances) throws Exception {
        J48 j48 = new J48();
        j48.setSubtreeRaising(true);
        j48.setNumFolds(2);
        j48.setUnpruned(true);
        j48.buildClassifier(instances);
        SMO smo = new SMO();
        smo.setKernel((Kernel)new RBFKernel());
        smo.setNumFolds(10);
        smo.buildClassifier(instances);
        Evaluation eval = new Evaluation(instances);
        eval.crossValidateModel((Classifier)smo, instances, 10, new Random());
        System.out.println(smo);
        return eval;
    }

    public static void main(String[] args) {
        int dictionarySize = 500;
        TextMining tm = new TextMining();
        try {
            File root = new File("D:/workspace/FPTextMining/Mail");
            Instances instances = tm.instances(root);
            System.out.println(instances);
            Instances wordInstances = tm.wordFilter(instances, dictionarySize);
            System.out.println(wordInstances);
            PrintWriter w = FileTools.printWriterOn((File)new File(root, "instances.arff"));
            w.write(instances.toString());
            w.close();
            w = FileTools.printWriterOn((File)new File(root, "word-instances.arff"));
            w.write(wordInstances.toString());
            w.close();
            Evaluation ev = tm.classify(wordInstances);
            System.out.println(ev.toClassDetailsString());
            System.out.println(ev.toMatrixString());
            System.out.println(ev.toSummaryString(true));
        }
        catch (Exception e) {
            System.err.println(e.getMessage());
            e.printStackTrace();
        }
    }
}

