package csl.tools.weka;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Vector;

import weka.core.Attribute;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.converters.ArffLoader;
import weka.core.converters.CSVLoader;
import weka.core.converters.CSVSaver;
import csl.tools.files.FileTools;

public class Weka {

	public static void saveARFF(Instances data, String output)
			throws IOException {

		saveARFF(data, new File(output));
	}

	/**
	 * Merges two ARFF files assuming the instances in both files are the same,
	 * although they can be in any order if the "indexCol" list is populated.
	 * Otherwise, the resulting ARFF file cannot be exploited
	 * 
	 * @param arff1
	 * @param arff2
	 * @param arff
	 * @param string2
	 * @param string
	 * @param cs1
	 *            contains the list of the attribute names of the columns used
	 *            to identify the same rows in the two files in arff1
	 * @param cs2
	 *            contains the list of the attribute names of the columns used
	 *            to identify the same rows in the two files in arff2
	 * @throws IOException
	 */
	public static void mergeARFF(File arff1, File arff2, File arff,
			String[] refColumnNames, String prefix1, String prefix2)
			throws IOException {

		/*
		 * loads the two data-sets as Weka Instances
		 */
		Instances instances1 = Weka.instances(arff1);
		Instances instances2 = Weka.instances(arff2);
		/*
		 * Checks that the two class attributes are the same
		 */
		Attribute classAttr1 = instances1.classAttribute();
		Attribute classAttr2 = instances2.classAttribute();
		if (!classAttr1.name().equals(classAttr2.name())) {
			System.err
					.println("ARFF files not compatible: different class attributes");
			return;
		}
		/*
		 * Checks that the instances sizes are the same
		 */
		int s1 = instances1.numInstances();
		int s2 = instances1.numInstances();
		if (s1 != s2) {
			System.err
					.println("ARFF files not compatible: different class attributes");
			return;
		}

		/*
		 * Builds the reference column index list for instances1 and instances2
		 */
		List<Integer> c1 = new ArrayList<Integer>(refColumnNames.length);
		List<Integer> c2 = new ArrayList<Integer>(refColumnNames.length);
		for (String refColName : refColumnNames) {
			c1.add(instances1.attribute(refColName).index());
			c2.add(instances2.attribute(refColName).index());
		}

		/*
		 * Adds empty attributes in mergedInstances
		 */
		List<Integer> insertionIndices = new ArrayList<Integer>();
		Instances mergedInstances = new Instances(instances1);
		int insertIndex = c1.get(0);
		for (int i = 0; i < instances2.numAttributes(); i++) {
			if (!c2.contains(i) && instances2.classIndex() != i) {
				mergedInstances.insertAttributeAt((Attribute) instances2
						.attribute(i).copy(), insertIndex);
				insertionIndices.add(insertIndex);
				insertIndex++;
			}
		}
		/*
		 * Performs the merge
		 */
		for (int i = 0; i < s1; i++) {
			Instance inst1 = instances1.instance(i);
			/*
			 * Looks up for inst1 counterpart in arff2
			 * 
			 * 1/ Creates the list with the reference values (corresponding to
			 * the index columns of arff1)
			 */
			List<Object> ref1 = new ArrayList<Object>(c1.size());
			for (int index : c1) {
				Attribute att = instances1.attribute(index);
				if (att.isNumeric())
					ref1.add(inst1.value(att));
				else
					ref1.add(att.value(i));
			}
			/*
			 * 2/ For each instance in instances2, check whether the reference
			 * values (i.e. those corresponding to indexCols2) are the same than
			 * ref1
			 */

			for (int j = 0; j < s2; j++) {
				Instance inst2 = instances2.instance(j);
				List<Object> ref2 = new ArrayList<Object>(c2.size());
				for (int index : c2) {
					Attribute att = instances2.attribute(index);
					if (att.isNumeric())
						ref2.add(inst2.value(att));
					else
						ref2.add(att.value(j));
				}
				/*
				 * 3/ Compares ref1 and ref2 values
				 */
				boolean equal = true;
				for (int k = 0; k < ref1.size(); k++) {
					Object o1 = ref1.get(k);
					Object o2 = ref2.get(k);
					if (!o1.equals(o2)) {
						equal = false;
						break;
					}
				}
				if (equal) {
					/*
					 * Copies the missing values from inst2 to the merged
					 * instance
					 */
					Instance mergedInstance = mergedInstances.instance(i);
					int insertionIndex = 0;
					for (int l = 0; l < instances2.numAttributes(); l++) {
						if (!c2.contains(l) && instances2.classIndex() != l) {
							Attribute att = instances2.attribute(l);
							if (att.isNumeric())
								mergedInstance.setValue(mergedInstances
										.attribute(insertionIndices
												.get(insertionIndex)), inst2
										.value(l));
							else
								mergedInstance.setValue(insertionIndex, att
										.value(l));
							insertionIndex++;
						}
					}
					/*
					 * Deletes the current inst2 from instances2, in order to
					 * speed up the rest of the search
					 */
					// instances2.delete(j);
					// s2--;
					break;
				} else {
					continue;
				}
			}
		}
		Weka.saveARFF(mergedInstances, arff);
	}

	public static void saveARFF(Instances data, File outputARFF)
			throws IOException {

		BufferedWriter writer = new BufferedWriter(new FileWriter(outputARFF));
		writer.write(data.toString());
		writer.flush();
		writer.close();
		// ArffSaver saver = new ArffSaver();
		// saver.setInstances(data);
		// saver.setFile(outputArff);
		// saver.setDestination(outputArff);
		// saver.writeBatch();
	}

	public static void saveCSV(Instances data, File outputCSV)
			throws IOException {

		CSVSaver csv = new CSVSaver();
		FileOutputStream output = new FileOutputStream(outputCSV);
		csv.setDestination(output);
		csv.setInstances(data);
		csv.writeBatch();
		output.write(10);
		output.write(20);
		output.close();
	}

	public static Instances[] split(Instances all, int percentage) {

		Instances train = new Instances(all, all.numInstances());
		Instances test = new Instances(all, all.numInstances());
		Instances[] split = new Instances[2];
		List<Integer> indices = new Vector<Integer>(all.numInstances());
		for (int i = 0; i < all.numInstances(); i++) {
			indices.add(i);
		}
		Collections.shuffle(indices);
		int stop = (int) (((double) (percentage * all.numInstances())) / 100);
		for (int i = 0; i < stop; i++) {
			train.add(all.instance(i));
		}
		for (int i = stop; i < all.numInstances(); i++) {
			test.add(all.instance(i));
		}
		split[0] = train;
		split[1] = test;
		return split;
	}

	public static Instances instancesFromCSV(File inputCsv) throws IOException {

		CSVLoader loader = new CSVLoader();
		loader.setFile(inputCsv);
		loader.setSource(inputCsv);
		Instances dataSet = loader.getDataSet();
		Attribute clAttr = dataSet.attribute("class");
		if (clAttr != null)
			dataSet.setClass(clAttr);
		return dataSet;
	}

	public static void smartConcat(File outputArff, File... inputArff)
			throws IOException {

		PrintWriter pw = FileTools.printWriterOn(outputArff, false);
		boolean header = true;
		for (File f : inputArff) {
			Instances inst_i = instances(f);
			if (header) {
				pw.append(new Instances(inst_i).toString()).append("\n");
				header = false;
			} else
				for (int j = 0; j < inst_i.numInstances(); j++)
					pw.append(inst_i.instance(j).toString()).append("\n");
		}
		pw.flush();
		pw.close();
	}

	/**
	 * Assumes the input files have the very same header.
	 * 
	 * @param outputArff
	 * @param inputArff
	 * @throws IOException
	 */
	public static void rawConcat(File outputArff, File... inputArff)
			throws IOException {

		PrintWriter pw = FileTools.printWriterOn(outputArff, false);
		boolean header = true;
		for (File f : inputArff) {
			if (header) {
				BufferedReader br = new BufferedReader(new FileReader(f));
				String line = "";
				while (!line.startsWith("@data")) {
					line = br.readLine();
					pw.write(line);
					pw.write("\n");
				}
				header = false;
			}
			BufferedReader br = new BufferedReader(new FileReader(f));
			String line = "";
			while (!line.startsWith("@data")) {
				line = br.readLine();
			}
			while (br.ready()) {
				pw.write(br.readLine());
				pw.write("\n");
			}
		}
		pw.close();
	}

	public static Instances instances(File input) throws IOException {

		if (input.getName().toLowerCase().endsWith(".arff"))
			return instancesFromARFF(input);
		if (input.getName().toLowerCase().endsWith(".csv"))
			return instancesFromCSV(input);
		return null;
	}

	public static Instances instancesFromARFF(File inputArff)
			throws IOException {

		if (!inputArff.exists()) {
			System.err.println("Could not find file " + inputArff);
			return null;
		}
		ArffLoader loader = new ArffLoader();
		loader.setFile(inputArff);
		loader.setSource(inputArff);
		Instances dataSet = null;
		try {
			dataSet = loader.getDataSet();
		} catch (IOException e) {
			return null;
		}
		Attribute clAttr = dataSet.attribute("class");
		if (clAttr != null)
			dataSet.setClass(clAttr);
		return dataSet;
	}

	public static String[][] rawCsv(String csvFile, int numRowsToSkip,
			int numColsToSkip) throws IOException {

		return rawCsv(new File(csvFile), numRowsToSkip, numColsToSkip);
	}

	public static double[][] csv(String csvFile, int numRowsToSkip,
			int numColsToSkip) throws IOException {

		return csv(new File(csvFile), numRowsToSkip, numColsToSkip);
	}

	public static String[][] rawCsv(File csvFile, int numRowsToSkip,
			int numColsToSkip) throws IOException {

		List<String> strings = FileTools.readLines(csvFile, numRowsToSkip + 1,
				Integer.MAX_VALUE);
		String[] _strings = strings.get(0).split(",");
		String[][] contents = new String[strings.size()][_strings.length
				- numColsToSkip];
		int row = 0;
		for (String string : strings) {
			_strings = string.split(",");
			for (int i = numColsToSkip; i < _strings.length; i++) {
				String[] strings2 = contents[row];
				strings2[i - numColsToSkip] = _strings[i];
			}
			row++;
		}
		return contents;
	}

	public static double[][] csv(File csvFile, int numRowsToSkip,
			int numColsToSkip) throws IOException {

		String[][] contents = rawCsv(csvFile, numRowsToSkip, numColsToSkip);
		double[][] doubleContents = new double[contents.length][contents[0].length];
		for (int row = 0; row < contents.length; row++)
			for (int col = 0; col < contents[row].length; col++)
				doubleContents[row][col] = Double
						.parseDouble(contents[row][col]);
		return doubleContents;
	}

	/**
	 * Checks that this instances contains at least "min" true and false class
	 * values
	 * 
	 * @param inst
	 * @param min
	 * @return
	 */
	public static boolean checkInstances(Instances inst, int min) {

		int numTrue = 0;
		int numFalse = 0;
		for (int i = 0; i < inst.numInstances(); i++) {
			if (inst.instance(i).classValue() == 0)
				numFalse++;
			else
				numTrue++;
			if (numFalse >= min && numTrue >= min)
				return true;
		}
		return false;
	}

	public static void sortFastVector(FastVector classes) {

		List<String> cl = new ArrayList<String>();
		for (int i = 0; i < classes.size(); i++) {
			cl.add((String) classes.elementAt(i));
		}
		Collections.sort(cl);
		int index = 0;
		for (String s : cl)
			classes.setElementAt(s, index++);
	}

	/**
	 * Returns the list of IDs in the ARFF file. It is assumed that IDs are in
	 * the attribute at idIndex in the ARFF file.
	 * 
	 * @param arff
	 *            the input file
	 * @param idIndex
	 *            the index of the ID attribute in the ARFF file
	 * 
	 * @return the list of IDs in this file
	 * @throws IOException
	 */
	public static List<Integer> ids(File arff, int idIndex) throws IOException {

		Instances instances = Weka.instancesFromARFF(arff);
		List<Integer> ids = new ArrayList<Integer>(instances.numInstances());
		for (int i = 0; i < instances.numInstances(); i++) {
			Instance inst = instances.instance(i);
			int id = (int) inst.value(idIndex);
			ids.add(id);
		}
		return ids;
	}

	public static void main(String[] args) throws IOException {
		File root = new File(
				"D:/users/roy/sony/papers/submitted/Animal Cognition -- Parrot Vocalizations/Experiment #2 (IB1)");
		File a1 = new File(root, "Parrot 5 cat 22 samples (IB1).arff");
		File a2 = new File(root, "Parrots AW 22 samples (IB1).arff");
		File fusion = new File(root, "all.arff");
		String[] ref = { "source", "start", "end" };
		mergeARFF(a1, a2, fusion, ref, "ALL_", "AW_");
	}
}
