#!/usr/local/bin/perl
#
# file: compute_consensus_hypotheses.pl
#
$ISIP_HELP_FILE = <<__ISIP_HELP_HERE_FILE__;
name: compute_consensus_hypotheses.pl

synopsis: compute_consensus_hypotheses.pl -input_lattice_list <input_file> -output_lattice_list <output_file> -output_list <output_file>

descr: this program reads the lattice(s) in ISIP and BBN (HTK style) format, generates confusion networks as well as concensus hypotheses

example: compute_consensus_hypotheses.pl -input_lattice_list input_lattice.list -output_lattice_list output_lattice.list -output_list output_hypo.list

options:
 -input_lattice_list <input_file> : this option tells the script the location and names of the input lattices list

 -output_lattice_list <output_file> : this option tells the script the location and names of the output lattices list

 -output_list <output_file> : this option tells the script the location and names of the output hypotheses list

arguments: 

man page: none
__ISIP_HELP_HERE_FILE__

# system packages
#
use File::Basename;
use File::Path;
use File::Copy;
use FileHandle;
use strict;
autoflush STDOUT;

# CPAN modules
#
use Graph::Directed;

# ISIP packages
#
use command_line;
use lattice_subs;

#----------------------------------------------------------------------------
#
# constant and variable definitions
#
#----------------------------------------------------------------------------

# define the program name
#
my @tmp = split(/\//, $0);
my $ISIP_PROG = pop(@tmp);

# define a separator for output comments
#
my $TI_LINE_SEP = "-------------------  $ISIP_PROG  ----------------";

# define command-line options
#
my $TI_OPT_INPUT_LAT_LIST_TYPE= "-input_lattice_list";
my $TI_OPT_OUTPUT_LAT_LIST_TYPE= "-output_lattice_list";
my $TI_OPT_OUTPUT_LIST_TYPE= "-output_list";

# define vertex attributes
#
my @ATTR_NAME_VERTEX = ("time");
push(@ATTR_NAME_VERTEX, "utterance_id");

# define edge attributes
#
my @ATTR_NAME_EDGE = ("number");
push(@ATTR_NAME_EDGE, "word");
push(@ATTR_NAME_EDGE, "pronunciation_version");
push(@ATTR_NAME_EDGE, "acoustic_score");
push(@ATTR_NAME_EDGE, "language_model_score");
push(@ATTR_NAME_EDGE, "posterior_score");

#----------------------------------------------------------------------------
#
# command line processing
#
#----------------------------------------------------------------------------

# print a debugging message
#
print "$TI_LINE_SEP\n";
print "INFO: compute_consensus_hypotheses - checking command-line arguments\n\n";

# parse the command line
#
my $input_lat_list;
my $output_lat_list;
my $output_list;

my ($input_lat_list, $output_lat_list, $output_list) = 
    command_line(0, $TI_OPT_INPUT_LAT_LIST_TYPE , 1, $TI_OPT_OUTPUT_LAT_LIST_TYPE , 1, $TI_OPT_OUTPUT_LIST_TYPE, 1);

# verify the required options and arguments
#
if ($input_lat_list eq "") {
    isip_die ("ERROR: input lattice file list not provided\n ERROR: provide the input lattice file list through $TI_OPT_INPUT_LAT_LIST_TYPE option");
}

if ($output_lat_list eq "") {
    isip_die ("ERROR: output lattice file list not provided\n ERROR: provide the output lattice file list through $TI_OPT_OUTPUT_LAT_LIST_TYPE option");
}

if ($output_list eq "") {
    isip_die ("ERROR: output hypotheses file list not provided\n ERROR: provide the output hypotheses file list through $TI_OPT_OUTPUT_LIST_TYPE option");
}

print "INFO: compute_consensus_hypotheses - running with parameters: \n";
my $out_line = join (" ", $TI_OPT_INPUT_LAT_LIST_TYPE, $input_lat_list, $TI_OPT_OUTPUT_LAT_LIST_TYPE,  $output_lat_list, $TI_OPT_OUTPUT_LIST_TYPE, $output_list);
print "$out_line\n";

#----------------------------------------------------------------------------
#
# lattice processing
#
#----------------------------------------------------------------------------

# local variables
#

# try to open the input and output lists
#    
open(fp_input_lat_list, "$input_lat_list") or 
    isip_die("ERROR: can't open file $input_lat_list") ;
open(fp_output_lat_list, "$output_lat_list") or 
    isip_die("ERROR: can't open file $output_lat_list") ;
open(fp_output_list, "$output_list") or 
    isip_die("ERROR: can't open file $output_list") ;

# cache the input and output lists for later use
#
my @input_lat_files = <fp_input_lat_list>;
my @output_lat_files = <fp_output_lat_list>;
my @output_files = <fp_output_list>;

# close the lists
#
close(fp_input_lat_list) or isip_die("ERROR: compute_consensus_hypotheses  - can't close file $input_lat_list");
close(fp_output_lat_list) or isip_die("ERROR: compute_consensus_hypotheses  - can't close file $output_lat_list");
close(fp_output_list) or isip_die("ERROR: compute_consensus_hypotheses  - can't close file $output_list");

# error checking and warning mechanism
#
if ($#input_lat_files != $#output_lat_files) {
    isip_die ("ERROR: the number of files in the input lattice file list $input_lat_list does not match with the number of files in the output lattice file list $output_lat_list");
}
elsif ($#input_lat_files != $#output_files) {
    isip_die ("ERROR: the number of files in the input lattice file list $input_lat_list does not match with the number of files in the output list $output_list");
}

# loop over all the lattices
#
for (my $i = 0; $i <= $#input_lat_files; $i++) {
    
    # delete if the output file already exists
    #
    (delete_file($output_lat_files[$i]) == 1) 
	or isip_die("ERROR: failed delete the file $output_lat_files[$i]");
    (delete_file($output_files[$i]) == 1) 
	or isip_die("ERROR: failed delete the file $output_files[$i]");
    
    # local variables
    #
    my $g = new Graph::Directed->new;
    my $g_cn = new Graph::Directed->new;
    my $id;
    my $lm_scale = 1.0;
    my $wd_penalty = 0.0;    
    my $cur_pos = 0;
    my $next_pos = 0;
    my $input_lat_file = $input_lat_files[$i];
    my $output_lat_file = $output_lat_files[$i];
    my $output_file = $output_files[$i];
    chop($input_lat_file);
    chop($output_lat_file);
    chop($output_file);
    my @hypo;
    
    # read the input lattice
    #
    while (read_lattice($input_lat_file, \$g, \@ATTR_NAME_VERTEX, 
			\@ATTR_NAME_EDGE, $cur_pos, \$next_pos, \$id, 
			\$lm_scale, \$wd_penalty) == 1) {
    
	# generate confusion network and consensus hypothesis
	#
	(generate_confusion_network(\$g, \@ATTR_NAME_VERTEX, \@ATTR_NAME_EDGE, 
				    \$g_cn, \@hypo, \$id) == 1) or
					isip_die("ERROR: compute_lattice_error_rate - failed to compute foward computations");
	
	# write out the confusion network and concensus hypotheses
	#
	(write_lattice($output_lat_file, \$g_cn, \@ATTR_NAME_VERTEX, 
		       \@ATTR_NAME_EDGE, \$id, \$lm_scale, \$wd_penalty) == 1) 
	    or isip_die("ERROR: failed to write the output lattice"); 
	(write_file_as_lines($output_file, \@hypo) == 1) 
	    or isip_die("ERROR: failed to write the output consensus hypotheses");     
	# update the positions
	#
	$cur_pos = $next_pos;
	$id = "";
	$lm_scale = 1.0;
	$wd_penalty = 0.0;
	
	# clear the graph
	#
	undef $g;
	$g = new Graph::Directed->new;
    }
    
    # processing for the last lattice. note that the while loop above
    # quits as soon as it finishes reading the last lattice because the
    # read_lattice function returns 0 as soon as it sees an EOF
    #

    # generate confusion network and consensus hypothesis
    #
    (generate_confusion_network(\$g, \@ATTR_NAME_VERTEX, \@ATTR_NAME_EDGE, 
				\$g_cn, \@hypo, \$id) == 1) or
				    isip_die("ERROR: compute_lattice_error_rate - failed to compute foward computations");
    
    # write out the confusion network and concensus hypotheses
    #
    (write_lattice($output_lat_file, \$g_cn, \@ATTR_NAME_VERTEX, 
		   \@ATTR_NAME_EDGE, \$id, \$lm_scale, \$wd_penalty) == 1) 
	or isip_die("ERROR: failed to write the output lattice"); 
    (write_file_as_lines($output_file, \@hypo) == 1) 
	or isip_die("ERROR: failed to write the output consensus hypotheses");     
    # clear the graph
    #
    undef $g;
    $g = new Graph::Directed->new;
}

#----------------------------------------------------------------------------
#
# finished
#
#----------------------------------------------------------------------------

print "\nINFO: compute_consensus_hypotheses - generation of confusion networks and consensus hypotheses complete\n";
print "$TI_LINE_SEP\n";

# return no error
#
exit 0;
