#!/usr/bin/env python

import sys
import glob

import numpy as np
import scipy.stats


def main(argv):
    hyp_files = glob.glob(argv[0] + '/*.hyp')

    # get lists of hyp files for each dataset

    train_hyp_files_2d = filter(lambda x: '2d' in x and 'train' in x, hyp_files)
    dev_hyp_files_2d = filter(lambda x: '2d' in x and 'dev' in x, hyp_files)
    eval_hyp_files_2d = filter(lambda x: '2d' in x and 'eval' in x, hyp_files)

    train_hyp_files_5d = filter(lambda x: '5d' in x and 'train' in x, hyp_files)
    dev_hyp_files_5d = filter(lambda x: '5d' in x and 'dev' in x, hyp_files)
    eval_hyp_files_5d = filter(lambda x: '5d' in x and 'eval' in x, hyp_files)
    #function to extract the first column out of all the hyp files
    def get_labels(filename):
        print filename
        hyp_matrix = np.loadtxt(filename)
        try:
            labels = hyp_matrix[:,0].astype(np.int32)
        # if there is only one column then the previous line of code will
        # throw an exception
        except IndexError:
            labels = hyp_matrix.astype(np.int32)
        return labels

    # get labels from the hyp files

    train_hyp_labels_2d = np.asarray(map(lambda x: get_labels(x), train_hyp_files_2d))
    dev_hyp_labels_2d = np.asarray(map(lambda x: get_labels(x), dev_hyp_files_2d))
    eval_hyp_labels_2d = np.asarray(map(lambda x: get_labels(x), eval_hyp_files_2d))

    train_hyp_labels_5d = np.asarray(map(lambda x: get_labels(x), train_hyp_files_5d))
    dev_hyp_labels_5d = np.asarray(map(lambda x: get_labels(x), dev_hyp_files_5d))
    eval_hyp_labels_5d = np.asarray(map(lambda x: get_labels(x), eval_hyp_files_5d))

    # find the majority vote (i.e. the mode) among the hyp files for each data point

    train_2d_vote = scipy.stats.mode(train_hyp_labels_2d,axis=0)[0].T
    dev_2d_vote = scipy.stats.mode(dev_hyp_labels_2d,axis=0)[0].T
    eval_2d_vote = scipy.stats.mode(eval_hyp_labels_2d,axis=0)[0].T

    train_5d_vote = scipy.stats.mode(train_hyp_labels_5d,axis=0)[0].T
    dev_5d_vote = scipy.stats.mode(dev_hyp_labels_5d,axis=0)[0].T
    eval_5d_vote = scipy.stats.mode(eval_hyp_labels_5d,axis=0)[0].T

    # create hyp files from the labels
    data = dict()
    data['2d'] = dict()
    data['5d'] = dict()

    data['2d']['train'] = np.loadtxt(argv[1] + '/2d/train.txt')
    data['2d']['dev'] = np.loadtxt(argv[1] + '/2d/dev.txt')
    data['2d']['eval'] = np.loadtxt(argv[1] + '/2d/eval.txt')

    data['5d']['train'] = np.loadtxt(argv[1] + '/5d/train.txt')
    data['5d']['dev'] = np.loadtxt(argv[1] + '/5d/dev.txt')
    data['5d']['eval'] = np.loadtxt(argv[1] + '/5d/eval_anonymized.txt')

    train_feats_2d  = data['2d']['train'][:,1:]
    train_labels_2d = data['2d']['train'][:,0]

    dev_feats_2d  = data['2d']['dev'][:,1:]
    dev_labels_2d = data['2d']['dev'][:,0]

    eval_feats_2d = data['2d']['eval'][:,1:]
    eval_labels_2d =data['2d']['eval'][:,0]


    train_feats_5d = data['5d']['train'][:,1:]
    train_labels_5d = data['5d']['train'][:,0]

    dev_feats_5d = data['5d']['dev'][:,1:]
    dev_labels_5d = data['5d']['dev'][:,0]

    eval_feats_5d = data['5d']['eval'][:,1:]
    
    # generate hyp files
    hyp_train_2d = np.concatenate([train_2d_vote, train_feats_2d], axis=1)
    hyp_dev_2d = np.concatenate([dev_2d_vote, dev_feats_2d], axis=1)
    hyp_eval_2d = np.concatenate([eval_2d_vote, eval_feats_2d], axis=1)

    hyp_train_5d = np.concatenate([train_5d_vote, train_feats_5d], axis=1)
    hyp_dev_5d = np.concatenate([dev_5d_vote, dev_feats_5d], axis=1)
    hyp_eval_5d = np.concatenate([eval_5d_vote, eval_feats_5d], axis=1)


    np.savetxt(argv[2] + "/majority_vote_2d_maj_train.hyp", hyp_train_2d, fmt=['%1.0f', '%.6f', '%.6f'])
    np.savetxt(argv[2] + "/majority_vote_2d_maj_dev.hyp", hyp_dev_2d, fmt=['%1.0f', '%.6f', '%.6f'])
    np.savetxt(argv[2] + "/majority_vote_2d_maj_eval.hyp", hyp_eval_2d, fmt=['%1.0f', '%.6f', '%.6f'])

    np.savetxt(argv[2] + "/majority_vote_5d_maj_train.hyp", hyp_train_5d, fmt=['%1.0f', '%.6f', '%.6f', '%.6f', '%.6f', '%.6f'])
    np.savetxt(argv[2] + "/majority_vote_5d_maj_dev.hyp", hyp_dev_5d, fmt=['%1.0f', '%.6f', '%.6f', '%.6f', '%.6f', '%.6f'])
    np.savetxt(argv[2] + "/majority_vote_5d_maj_eval.hyp", hyp_eval_5d, fmt=['%1.0f', '%.6f', '%.6f', '%.6f', '%.6f', '%.6f'])

if __name__ == "__main__":
    if len(sys.argv) != 4:
        sys.exit("usage: python majority_vote.py hyp_dir ref_dir out_dir")
    main(sys.argv[1:])