# file: lib/lattice_subs.pm
#

# this file contains the subroutines for lattice manipulation. Note
# that all functions in this package return 0 if there is an error and
# 1 otherwise. This is chosen to coincide with the notion of true and
# false. Note that this is different than the return values for system
# calls where 1 is an error and 0 is no error 
#

# system libraries
#
use FileHandle;
use strict;
autoflush STDOUT;

# CPAN modules
#
use Graph::Directed;

# ISIP packages
#
use command_line;
use Link;

#----------------------------------------------------------------------------
#
# global constants
#
#----------------------------------------------------------------------------
use constant MIN_LOG_VALUE => -23.02585093;
use constant CORRS_COST => 0;
use constant SUBS_COST => -4;
use constant DELS_COST => -3;
use constant INS_COST => -3;
use constant ALIGN_SYMBOL => "-";
use constant MAX_POSITIVE_INTEGER => 9999999999;
use constant MIN_NEGATIVE_INTEGER => -9999999999;
use constant UNSET => 0;
use constant SET => 1;

#----------------------------------------------------------------------------
#
# subroutines
#
#----------------------------------------------------------------------------

# subroutine: read_lattice
#
# arguments:
#  @_ : ($input_lat_file, \$g, \@attr_name_vertex, \@attr_name_edge, $cur_pos,
#        \$next_pos, \$id, \$lm_scale, \$wd_penalty)
#
# description: this routine reads a lattice in BBN format
#
# return: 0 if error 1 if no error
#
sub read_lattice ($$$$$$$$$) {

    # parse the argument list
    #
    my $input_lat_file = shift @_;
    my $g_ref = shift @_;
    my @attr_name_vertex = @{shift @_};
    my @attr_name_edge = @{shift @_};
    my $cur_pos = shift @_;
    my $next_pos = shift @_;
    my $id_ref = shift @_;
    my $lm_scale_ref = shift @_;
    my $wd_penalty_ref = shift @_;

    if (($input_lat_file eq "") || ($attr_name_vertex[0] eq "") || 
	($attr_name_edge[0] eq "") || ($cur_pos eq "") ||
	($$next_pos eq ""))  {
	print "ERROR: read_lattice - bad argument list\n";
	return 0;
    }
    
    # clear the graph
    #
    if ($$g_ref ne "") {
	undef $$g_ref;
	$$g_ref = new Graph::Directed->new;
    }
    
    # local variables
    #
    my @line;
    my @node_no;
    my @time;
    my @arc_no;
    my @start;
    my @end;
    my @word;
    my @pron_ver;
    my @ac_score;
    my @lm_score;
    my @prob_score;
    my $old_ac_score;
    my $old_lm_score;
    my $old_prob_score;
    my $sum_ac_score = 0;
    my $sum_lm_score = 0;
    my $sum_prob_score = 0;
    my $count_utt = 0;

    # try to open the input file
    #    
    open(fp_lat_file, "$input_lat_file") or 
	isip_die("ERROR: read_lattice - can't open file $input_lat_file");
    
    # move to the current position
    #
    seek(fp_lat_file, $cur_pos, 0);

    # read in the lattice
    #
    while(<fp_lat_file>) {
	
	chop;
	
	# read in utterance id
	#
	if (/UTTERANCE/o) {
	    @line = split('=');
	    $count_utt++;

	    # display information
	    #
	    if ($count_utt <= 1) {
		$$id_ref = $line[1];
		print "\nINFO: lattice_subs - reading the lattice for utterance $$id_ref\n";
	    }
	    
	    # check if we have encountered next lattice
	    #
	    if ($count_utt > 1) {
		my $len = length();
		$$next_pos = tell(fp_lat_file) - ($len + 1);
		return 1;
	    }
	}
	
	# read the lm scale and wor insertion penalty
	#
	elsif (/lmscale/o) {
	    @line = split('=');
	    $$lm_scale_ref = $line[1];
	}
	elsif (/wdpenalty/o) {
	    @line = split('=');
	    $$wd_penalty_ref = $line[1];
	}
	
	# read the vertices
	#
	elsif ($_ =~ /^I=(\d+)\s+t=(\d+)/o) {

	    # read the node number and time
	    #
	    @line = split(/\s+/);	    
	    @node_no = split('=', $line[0]);
	    @time = split('=', $line[1]);
	    
	    $$g_ref->set_vertex_attribute($node_no[1], $attr_name_vertex[0],
				      $time[1]);
	}
	
	# read the edges
	#
	elsif ($_ =~ /^J=(\d+)\s+S=(\d+)\s+E=(\d+)\s+W=/o) {

	    # read and set the edge parameters
	    #
	    @line = split(/\s+/);	    
	    @arc_no = split('=', $line[0]);
	    @start = split('=', $line[1]);
	    @end = split('=', $line[2]);
	    @word = split('=', $line[3]);
	    @pron_ver = split('=', $line[4]);
	    @ac_score = split('=', $line[5]);
	    @lm_score = split('=', $line[6]);
	    @prob_score = split('=', $line[7]);

	    # add the edge if does not exist
	    #
	    if (!($$g_ref->has_edge($start[1], $end[1]))) {
		$$g_ref->set_edge_attribute($start[1], $end[1],
					$attr_name_edge[0], $arc_no[1]);
		
		$$g_ref->set_edge_attribute($start[1], $end[1], $attr_name_edge[1],
					$word[1]);
		
		$$g_ref->set_edge_attribute($start[1], $end[1], $attr_name_edge[2],
					$pron_ver[1]);
		
		$$g_ref->set_edge_attribute($start[1], $end[1], $attr_name_edge[3],
					$ac_score[1]);
		
		$$g_ref->set_edge_attribute($start[1], $end[1], $attr_name_edge[4],
					$lm_score[1]);
		if ($prob_score[0] eq "p") {
		    $$g_ref->set_edge_attribute($start[1], $end[1],
					    $attr_name_edge[5], $prob_score[1]);
		}
	    }   

	    # otherwise add the probabilites on the existing and this
	    # arc
	    #
	    else {
		
		# get old acoustic, language model, prob scores
		#
		$old_ac_score = $$g_ref->get_edge_attribute($start[1], $end[1], 
							$attr_name_edge[3]);
		$old_lm_score = $$g_ref->get_edge_attribute($start[1], $end[1],
							$attr_name_edge[4]);
	
		if ($$g_ref->has_edge_attribute($start[1], $end[1],
					    $attr_name_edge[5])) {
		    
		    $old_prob_score = $$g_ref->get_edge_attribute($start[1],
							      $end[1],
							      $attr_name_edge[5]);
		}
		
		# sum the probabilities of old arc and new arc
		#
		(log_add_log($old_ac_score, $ac_score[1], \$sum_ac_score) == 1)
		    or isip_die ("ERROR: read_lattice - failed to add log probabilities");
		(log_add_log($old_lm_score, $lm_score[1], \$sum_lm_score) == 1)
		    or isip_die ("ERROR: read_lattice - failed to add log probabilities");
		if ($$g_ref->has_edge_attribute($start[1], $end[1],
					    $attr_name_edge[5])) {
		    
		    (log_add_log($old_prob_score, $prob_score[1], 
				 \$sum_prob_score) == 1)
			or isip_die ("ERROR: read_lattice - failed to add log probabilities");
		    
		}
		
		# set back the summed probabilites
		#
		$$g_ref->set_edge_attribute($start[1], $end[1], $attr_name_edge[3],
					$sum_ac_score);		
		$$g_ref->set_edge_attribute($start[1], $end[1], $attr_name_edge[4],
					$sum_lm_score);		

		if ($$g_ref->has_edge_attribute($start[1], $end[1],
					    $attr_name_edge[5])) {
		    $$g_ref->set_edge_attribute($start[1], $end[1], 
					    $attr_name_edge[5],
					    $sum_prob_score);
		}
	    }
	}
	
	# return 0 is EOF found
	#
	if (eof(fp_lat_file)) {
	    return 0;
	}
    }
	
    # close the file
    #
    close(fp_lat_file) or isip_die("ERROR: read_lattice - can't close file $input_lat_file") ;    
    
    # exit gracefully
    #
    return 1;
}

# subroutine: write_lattice
#
# arguments:
#  @_ : ($output_lat_file, \$g, \@attr_name_vertex, \@attr_name_edge, \$id,
#        \$lm_scale, \$wd_penalty)
#
# description: this routine writes a lattice in BBN format
#
# return: 0 if error 1 if no error
#
sub write_lattice ($$$$$$$) {
    
    # parse the argument list
    #
    my $output_lat_file = shift @_;
    my $g_ref = shift @_;
    my @attr_name_vertex = @{shift @_};
    my @attr_name_edge = @{shift @_};
    my $id_ref = shift @_;
    my $lm_scale_ref = shift @_;
    my $wd_penalty_ref = shift @_;
    
    if (($output_lat_file eq "") || ($attr_name_vertex[0] eq "") || 
	($attr_name_edge[0] eq ""))  {
	print "ERROR: write_lattice - bad argument list\n";
	return 0;
    }
    
    # local variables
    #
    my @V;
    my @E;
    my @v;
    my $V_num;
    my $E_num;

    my $name;
    my $value;
    my @line;
    my @node_no;
    my @time;
    my @arc_no;
    my @start;
    my @end;
    my @word;
    my @pron_ver;
    my @ac_score;
    my @lm_score;
    
    # try to open the output file
    #    
    open(fp, ">>$output_lat_file")  or return 0;
    
    # get vertices and edges
    #
    @V = $$g_ref->vertices;
    @E = $$g_ref->edges;
    
    # print the utterance id
    #
    $value = $$id_ref;    
    print fp "UTTERANCE=$value\n";    
    print "\nINFO: lattice_subs - writing the lattice for utterance $value\n";
    
    # print the lm scale
    #
    $value = $$lm_scale_ref;    
    print fp "lmscale=$value\n";

    # print the word insertion penalty
    #
    $value = $$wd_penalty_ref;
    print fp "wdpenalty=$value\n";
    
    # print the number of vertices and edges
    #
    $V_num = $#V+1;
    $E_num = $#E+1;
    print fp "N=$V_num L=$E_num\n";
    
    # get and print the node information
    #
    for (my $i = 0; $i <= $#V; $i++) {
	
	$value = $$g_ref->get_vertex_attribute($V[$i], $attr_name_vertex[0]);
	
	print fp "I=$V[$i]\tt=$value\n";	
    }
    
    # get and print the edge information
    #
    for (my $i = 0; $i <= $#E; $i++) {
	
	my @edge = @{@E->[$i]};
	
	$value = $$g_ref->get_edge_attribute($edge[0], $edge[1], 
					 $attr_name_edge[0]);
	print fp "J=$value\tS=$edge[0]\tE=$edge[1]\t";
	
	$value = $$g_ref->get_edge_attribute($edge[0], $edge[1], 
					 $attr_name_edge[1]);	
	print fp "W=$value\t";
	
	$value = $$g_ref->get_edge_attribute($edge[0], $edge[1], 
					 $attr_name_edge[2]);	
	print fp "v=$value\t";
	
	$value = $$g_ref->get_edge_attribute($edge[0], $edge[1], 
					 $attr_name_edge[3]);
	print fp "a=$value\t";
	
	$value = $$g_ref->get_edge_attribute($edge[0], $edge[1], 
					 $attr_name_edge[4]);
	print fp "l=$value\t";
	
	$value = $$g_ref->get_edge_attribute($edge[0], $edge[1], 
					 $attr_name_edge[5]);
	print fp "p=$value\n";
    }
    
    # close the file
    #
    close(fp) or return 0;    
    
    # exit gracefully
    #
    return 1;
}

# subroutine: log_add_log
#
# arguments:
#  @_ : ($x, $y, \$output_value)
#
# description: this routine reads a lattice in BBN format
#
# return: 0 if error 1 if no error
#
# reference IFC Integral::logAddLog
#
sub log_add_log($$$) {

    # parse the argument list
    #
    my $x = shift @_;
    my $y = shift @_;
    my $output_value_ref = shift @_;

    if (($x eq "") || ($y eq ""))  {
	print "ERROR: log_add_log - bad argument list\n";
	return 0;
    }
    
    # define the variables to hold the gaussian score
    #
    my $tmp = 0;
    my $tmp_value = $y;
    $$output_value_ref = $x;

    # find the minimum component so that the sign of tmp is negative below
    #
    if ($$output_value_ref < $tmp_value) {
	$tmp = $$output_value_ref;
	$$output_value_ref = $tmp_value;
	$tmp_value = $tmp;
    }

    # find the difference between the two values. the output of this will
    # always be negative since output_value >= tmp_value
    #
    $tmp = $tmp_value - $$output_value_ref;
    
    # do not allow the score to underflow.
    #
    if ($tmp >= MIN_LOG_VALUE) {
	$$output_value_ref += log1p(exp($tmp));
    }
    
    # exit gracefully
    #
    return 1;
}

# subroutine: log1p
#
# arguments:
#  @_ : ($x)
#
# description: this routine returns log1p
#
#
sub log1p ($) {

    # parse the argument list
    #
    my $x = shift @_;
    
    if ($x eq "")  {
	print "ERROR: log1p - bad argument list\n";
	return 0;
    }
    
    # This "trick" comes from the comments in the source code for FDLIBM,
    # SunSoft's Freely Distributable Math Library, www.netlib.org/fdlibm.
    # They reference the HP-15C Advanced Functions Handbook.  The idea
    # probably comes from Prof. William Kahan, U. C. Berkeley.
    #
    my $u = 1 + $x;

    return $x if $u == 1;
    return log($u) * ($x / ($u - 1));
}

# subroutine: delete_file
#
# arguments:
#  @_ : ($output_lat_file)
#
# description: this routine deletes a file if it exists
#
# return: 0 if error 1 if no error
#
sub delete_file ($) {
    
    # parse the argument list
    #
    my $output_lat_file = shift @_;
    
    if (($output_lat_file eq ""))  {
	print "ERROR: delete_file - bad argument list\n";
	return 0;
    }

    if ( -e $output_lat_file) {
	system("rm -rf $output_lat_file");
    }
    
    # exit gracefully
    #
    return 1;
}

# subroutine: compute_alpha
#
# arguments:
#  @_ : (\$g, \@attr_name_vertex, \@attr_name_edge, \$id, \lm_scale, 
#        \wd_penalty)
#
# description: this routine writes a lattice in BBN format
#
# return: 0 if error 1 if no error
#
sub compute_alpha ($$$$$$) {
    
    # parse the argument list
    #
    my $g_ref = shift @_;
    my @attr_name_vertex = @{shift @_};
    my @attr_name_edge = @{shift @_};
    my $id_ref = shift @_;
    my $lm_scale_ref = shift @_;
    my $wd_penalty_ref = shift @_;
    
    if (($attr_name_vertex[0] eq "") || 
	($attr_name_edge[0] eq ""))  {
	print "ERROR: compute_alpha - bad argument list\n";
	return 0;
    }
    
    # local variables
    #
    my @V;
    my @E;
    my $V_num;
    my $E_num;
    my $alpha_curr;
    my $alpha_next;
    my $alpha;
    my $lm_score;
    my $ac_score;
    my $sum_alpha;
    my $value;
    
    # get vertices and edges
    #
    @V = $$g_ref->vertices;
    @E = $$g_ref->edges;
    
    # display information
    #
    print "\nINFO: lattice_subs - making forward computations on the lattice for utterance $$id_ref\n";

    # get the number of vertices and edges
    #
    $V_num = $#V+1;
    $E_num = $#E+1;
    
    # loop through the nodes from the start to the end and compute the 
    # alpha on every every node
    #
    for (my $i = 0; $i <= $V_num - 1 ; $i++) {
	
	# if this is the first node, set the alpha as "0"
	#
	if ($i == 0) {
	    $$g_ref->set_vertex_attribute($i, $attr_name_vertex[1],0);
	}
	
	# check if this node has any successors
	#
	if ($$g_ref->is_successorful_vertex($i) == 1) {
	    
	    # obtain the immediate successors of this node
	    #
	    my @S = $$g_ref->successors($i);
	    my $S_num = $#S+1;
	    
	    # loop through all the successors and update their alpha values
	    #
	    for (my $j = 0; $j <= $S_num - 1; $j++) {
		
		# get the attributes on the arc between the two nodes
		#
		$ac_score = $$g_ref->get_edge_attribute($i, $S[$j],
						    $attr_name_edge[3]);
		$lm_score = $$g_ref->get_edge_attribute($i, $S[$j],
						    $attr_name_edge[4]);
		
		# get the attributes on the two nodes
		#
		$alpha_curr = $$g_ref->get_vertex_attribute($i, 
							$attr_name_vertex[1]);
		$alpha_next = $$g_ref->get_vertex_attribute($S[$j], 
							    $attr_name_vertex[1]);
		$alpha = $alpha_curr + 
		    ($ac_score/$$lm_scale_ref) + $lm_score + $$wd_penalty_ref;
		
		# check if the next node has an alpha value on it, else just
		# put the computed alpha value to the next node. But if alpha
		# attribute exists on the next node then call the log_add_log
		# function to add the two values
		#
		if ($$g_ref->has_vertex_attribute($S[$j], $attr_name_vertex[1])) {
		    (log_add_log($alpha_next, $alpha, \$sum_alpha) == 1)
			or isip_die ("ERROR: compute_alpha - failed to add log probabilities");
		    
		    # set the alpha for the next node
		    #
		    $$g_ref->set_vertex_attribute($S[$j], $attr_name_vertex[1],
						  $sum_alpha);
		}
		else {
		    
		    # set the alpha for the next node
		    #
		    $$g_ref->set_vertex_attribute($S[$j], $attr_name_vertex[1],
						  $alpha);
		}
	    }
	}
    }
    
    # exit gracefully
    #
    return 1;
}   

# subroutine: compute_beta
#
# arguments:
#  @_ : (\$g, \@attr_name_vertex, \@attr_name_edge, \$id, \lm_scale, 
#        \wd_penalty)
#
# description: this routine writes a lattice in BBN format
#
# return: 0 if error 1 if no error
#
sub compute_beta ($$$$$$) {
    
    # parse the argument list
    #
    my $g_ref = shift @_;
    my @attr_name_vertex = @{shift @_};
    my @attr_name_edge = @{shift @_};
    my $id_ref = shift @_;
    my $lm_scale_ref = shift @_;
    my $wd_penalty_ref = shift @_;
    
    if (($attr_name_vertex[0] eq "") || 
	($attr_name_edge[0] eq ""))  {
	print "ERROR: compute_beta - bad argument list\n";
	return 0;
    }
    
    # local variables
    #
    my @V;
    my @E;
    my $V_num;
    my $E_num;
    my $beta_curr;
    my $beta_prev;
    my $beta;
    my $lm_score;
    my $ac_score;
    my $sum_beta;
    my $value;
    
    # get vertices and edges
    #
    @V = $$g_ref->vertices;
    @E = $$g_ref->edges;
    
    # display information
    #
    print "\nINFO: lattice_subs - making backward computations on the lattice for utterance $$id_ref\n";
    
    # get the number of vertices and edges
    #
    $V_num = $#V+1;
    $E_num = $#E+1;
    
    # set all the alphas on the nodes to zero
    #
    # $$g_ref->set_graph_attribute($attr_name_vertex[1], 0);
    
    # loop through the nodes from the start to the end and compute the 
    # alpha on every every node
    #
    for (my $i = $V_num - 1; $i >= 0; $i--) {
	
	# if this is the last node, set the beta as "0"
	#
	if ($i == ($V_num -1)) {
	    $$g_ref->set_vertex_attribute($i, $attr_name_vertex[2],0);
	}
	
	# check if this node has any predecessors
	#
	if ($$g_ref->is_predecessorful_vertex($i) == 1) {
	    
	    # obtain the immediate predecessors of this node
	    #
	    my @P = $$g_ref->predecessors($i);
	    my $P_num = $#P+1;
	    
	    # loop through all the predecessors and update their alpha values
	    #
	    for (my $j = 0; $j <= $P_num - 1; $j++) {
		
		# get the attributes on the arc between the two nodes
		#
		$ac_score = $$g_ref->get_edge_attribute($P[$j], $i,
							$attr_name_edge[3]);
		$lm_score = $$g_ref->get_edge_attribute($P[$j], $i,
							$attr_name_edge[4]);
		
		# get the attributes on the two nodes
		#
		$beta_curr = $$g_ref->get_vertex_attribute($i, 
							   $attr_name_vertex[2]);
		$beta_prev = $$g_ref->get_vertex_attribute($P[$j], 
							   $attr_name_vertex[2]);
		$beta = $beta_curr +
		    ($ac_score/$$lm_scale_ref) + $lm_score + $$wd_penalty_ref;
		
		# check if the prev node has a beta value on it, else just
		# put the computed beta value to the prev node. But if beta
		# attribute exists on the next node then call the log_add_log
		# function to add the two values
		#
		if ($$g_ref->has_vertex_attribute($P[$j], $attr_name_vertex[2])) {
		    (log_add_log($beta_prev, $beta, \$sum_beta) == 1)
			or isip_die ("ERROR: compute_beta - failed to add log probabilities");
		    
		    # set the beta for the next node
		    #
		    $$g_ref->set_vertex_attribute($P[$j], $attr_name_vertex[2],
						  $sum_beta);
		}
		else {
		    
		    # set the beta for the next node
		    #
		    $$g_ref->set_vertex_attribute($P[$j], $attr_name_vertex[2],
						  $beta);
		}
	    }	
	}
    }   
    
    # exit gracefully
    #
    return 1;
}   

# subroutine: compute_posterior
#
# arguments:
#  @_ : (\$g, \@attr_name_vertex, \@attr_name_edge, \$id, \lm_scale, 
#        \wd_penalty)
#
# description: this routine writes a lattice in BBN format
#
# return: 0 if error 1 if no error
#
sub compute_posterior ($$$$$$) {
    
    # parse the argument list
    #
    my $g_ref = shift @_;
    my @attr_name_vertex = @{shift @_};
    my @attr_name_edge = @{shift @_};
    my $id_ref = shift @_;
    my $lm_scale_ref = shift @_;
    my $wd_penalty_ref = shift @_;

    if (($attr_name_vertex[0] eq "") || 
	($attr_name_edge[0] eq ""))  {
	print "ERROR: compute_posterior - bad argument list\n";
	return 0;
    }
    
    # local variables
    #
    my @V;
    my @E;
    my $V_num;
    my $E_num;
    my $alpha;
    my $beta;
    my $gamma;
    my $start_time;
    my $end_time;
    my $lm_score;
    my $ac_score;
    my $posterior;
    my $value;
    
    # get vertices and edges
    #
    @V = $$g_ref->vertices;
    @E = $$g_ref->edges;
    
    # display information
    #
    print "\nINFO: lattice_subs - computing posteriors on the lattice for utterance $$id_ref\n";

    # get the number of vertices and edges
    #
    $V_num = $#V+1;
    $E_num = $#E+1;
    
    # loop through the nodes from the start to the end and compute the 
    # posterior on every link that originages from the node
    #
    for (my $i = 0; $i <= $V_num - 1 ; $i++) {
	
	# check if this node has any successors
	#
	if ($$g_ref->is_successorful_vertex($i) == 1) {
	    
	    # obtain the immediate successors of this node
	    #
	    my @S = $$g_ref->successors($i);
	    my $S_num = $#S+1;
	    
	    # loop through all the successors and update their alpha values
	    #
	    for (my $j = 0; $j <= $S_num -1 ; $j++) {
		
		# get the attributes on the arc between the two nodes
		#
		$ac_score = $$g_ref->get_edge_attribute($i, $S[$j],
							$attr_name_edge[3]);
		$lm_score = $$g_ref->get_edge_attribute($i, $S[$j],
							$attr_name_edge[4]);
		
		# get the attributes on the two nodes
		#
		$alpha = $$g_ref->get_vertex_attribute($i, 
						       $attr_name_vertex[1]);
		$beta = $$g_ref->get_vertex_attribute($S[$j], 
						      $attr_name_vertex[2]);
		$gamma = $alpha + $beta +
		    ($ac_score/$$lm_scale_ref) + $lm_score + $$wd_penalty_ref;
		
		# normalize the posterior by dividing the gamma value by the
		# probability of sum of all paths in the graph. this values is
		# a by product of the alpha beta computation. The value is 
		# either the alpha in the last node or the beta in the first 
		# node
		#
		$posterior = $gamma - 
		    ($$g_ref->get_vertex_attribute($V_num - 1, 
						   $attr_name_vertex[1]));

		my $temp =
 		    ($$g_ref->get_vertex_attribute($V_num - 1, 
						   $attr_name_vertex[1]));
		
		# the posteriors can be obtained per-frame by dividing it with
		# the time duration of the arc
		#
		$start_time = $$g_ref->get_vertex_attribute($i, 
							    $attr_name_vertex[0]);
		$end_time = $$g_ref->get_vertex_attribute($S[$j], 
							  $attr_name_vertex[0]);
		
#		if ($start_time != $end_time) {
#		    $posterior = $posterior/($end_time - $start_time);
#		}
		
		# add the posterior value to the data structure
		# the posterior value is added to the arc or the edges
		#
		$$g_ref->set_edge_attribute($i, $S[$j], $attr_name_edge[5],
					    $posterior);
	    }
	}
    }
    
    # exit gracefully
    #
    return 1;
}

# subroutine: read_file_as_lines
#
# arguments:
#  @_ : ($input_file, \@lines)
#
# description: this routine reads a file as lines
#
# return: 0 if error 1 if no error
#
sub read_file_as_lines ($$) {
    
    # parse the argument list
    #
    my $input_file = shift @_;
    my $lines_ref = shift @_;
    
    if (($input_file eq ""))  {
	print "ERROR: read_file_as_lines - bad argument list\n";
	return 0;
    }
    
    # try to open the input file
    #    
    open(fp, "$input_file") or 
	isip_die("ERROR: read_file_as_lines - can't open file $input_file") ;
    
    # read in the file as lines
    #
    while(<fp>) {	
	chop;	
	push(@{$lines_ref}, $_);
    }
    
    # close the file
    #
    close(fp) or isip_die("ERROR: read_file_as_lines - can't close file $input_file") ;
    
    # exit gracefully
    #
    return 1;
}

# subroutine: compute_error_rate
#
# arguments:
#  @_ : ($input_lat_file_list), \@trans, \@attr_name_vertex, \@attr_name_edge,
#       \@aligns, \@corrs, \@subs, \@dels, \@ins, \@file_ids)
#
# description: this routine computes the error rate of lattices
#
# return: 0 if error 1 if no error
#
sub compute_error_rate ($$$$$$$$$$) {
    
    # parse the argument list
    #
    my $input_lat_file_list = shift @_;
    my $trans_ref = shift @_;
    my @attr_name_vertex = @{shift @_};
    my @attr_name_edge = @{shift @_};
    my $aligns_ref = shift @_;
    my $corrs_ref = shift @_;
    my $subs_ref = shift @_;
    my $dels_ref = shift @_;
    my $ins_ref = shift @_;
    my $file_ids_ref = shift @_;

    if (($input_lat_file_list eq "") || (@{$trans_ref} eq "") || 
	($attr_name_vertex[0] eq "") || ($attr_name_edge[0] eq ""))  {
	print "ERROR: compute_error_rate - bad argument list\n";
	return 0;
    }
    
    # local variables
    #
    my $i = 0;

    # try to open the input lattice file list
    #    
    open(fp_lat_file_list, "$input_lat_file_list") or 
	isip_die("ERROR: compute_error_rate - can't open file $input_lat_file_list") ;
    
    # loop over all the lattices and accumulate statistics for each
    #
    while (<fp_lat_file_list>) {
	
	# local variables
	#
	my $g = new Graph::Directed->new;
	my $cur_pos = 0;
	my $next_pos = 0;
	my $id = "";
	my $lm_scale = 0.00;
	my $wd_penalty = 0.00;
	my $trans;
	my $align;
	my $corrs = 0;
	my $subs = 0;
	my $dels = 0;
	my $ins = 0;
	my $file;
	my $flag = 0;

	# read the lattice
	#
	chop;
	$file = $_;
	
	while (read_lattice($file, \$g, \@attr_name_vertex, \@attr_name_edge, 
			    $cur_pos, \$next_pos, \$id, \$lm_scale, 
			    \$wd_penalty) == 1) {
	    
	    # compute statistics by using the principle of dynammic
	    # programming for string alignments
	    #
	    $trans = ${$trans_ref}[$i];
	    (align_lat_dp(\$g, \$trans, \@attr_name_edge, \$align, \$corrs, 
			  \$subs, \$dels, \$ins) == 1) or
		isip_die("ERROR: compute_error_rate - failed to align $_ lattice with the reference transcription") ;
	    
	    # updates
	    #
	    $i++;
	    push(@{$aligns_ref}, $align);
	    push(@{$corrs_ref}, $corrs);
	    push(@{$subs_ref}, $subs);
	    push(@{$dels_ref}, $dels);
	    push(@{$ins_ref}, $ins);
	    $flag = 1;
	    
	    # update the positions
	    #
	    $cur_pos = $next_pos;
	    $id = "";
	    $lm_scale = 0.00;
	    $wd_penalty = 0.00;
	    	    
	    # save the file ids
	    #
	    my @V = $g->vertices;
	    my $id_new = join (": ", $file, $id);
	    push (@{$file_ids_ref}, $id_new);

	    # display information
	    #
	    print "\nINFO: compute_error_rate - computing word error rate for the lattice for utterance $id_new\n";
	    
	    # clear the graph
	    #
	    for (my $j = 0; $j <= $#V; $j++) {
		$g->delete_vertex($V[$j]);
	    }
	}
	
	# processing for the last lattice. note that the while loop above
	# quits as soon as it finishes reading the last lattice because the
	# read_lattice function returns 0 as soon as it sees an EOF
	#
	
	# compute statistics by using the principle of dynammic
	# programming for string alignments
	#
	$trans = ${$trans_ref}[$i];
	
	(align_lat_dp(\$g, \$trans, \@attr_name_edge, \$align, \$corrs, 
		      \$subs, \$dels, \$ins) == 1) or
			  isip_die("ERROR: compute_error_rate - failed to align $_ lattice with the reference transcription") ;
	
	# updates
	#
	$i++;
	push(@{$aligns_ref}, $align);
	push(@{$corrs_ref}, $corrs);
	push(@{$subs_ref}, $subs);
	push(@{$dels_ref}, $dels);
	push(@{$ins_ref}, $ins);

	# save the file ids
	#
	if ($flag == 0) {
	    push (@{$file_ids_ref}, $file);
	}
	else {
	    my @V = $g->vertices;
	    my $id_new = join (": ", $file, $id);
	    push (@{$file_ids_ref}, $id_new);
	}
	
    }

    # close the file
    #
    close(fp_lat_file_list) or 
	isip_die("ERROR: compute_error_rate - can't close file $input_lat_file_list") ;
    
    # exit gracefully
    #
    return 1;
}

# subroutine: align_lat_dp
#
# arguments:
#  @_ : (\$g, \$trans, \$align, \@attr_name_edge, \$corrs, \$subs, \$dels, \$ins)
#
# description: this routine aligns lattice with the reference transcriptions 
#              using the principle of dynamic programming
#
# return: 0 if error 1 if no error
#
sub align_lat_dp ($$$$$$$$) {
    
    # parse the argument list
    #
    my $g_ref = shift @_;
    my $trans_ref = shift @_;
    my @attr_name_edge = @{shift @_};
    my $align_ref = shift @_;
    my $corrs_ref = shift @_;
    my $subs_ref = shift @_;
    my $dels_ref = shift @_;
    my $ins_ref = shift @_;

    if (($$trans_ref eq "") || ($$g_ref eq "") || 
	(@attr_name_edge[0] eq ""))  {
	print "ERROR: align_lat_dp - bad argument list\n";
	return 0;
    }
    
    # traverse the lattice in a depth first fashion - O(b*d) space 
    # complexity. process each hypothesis sequentially
    #

    # get the root vertex, we have only one source vertex in the lattices
    #
    my @source_vertices = $$g_ref->source_vertices();
    $$align_ref = "";
    $$corrs_ref = MAX_POSITIVE_INTEGER;
    $$subs_ref = MAX_POSITIVE_INTEGER;
    $$dels_ref = MAX_POSITIVE_INTEGER;
    $$ins_ref = MAX_POSITIVE_INTEGER;
    my @hypo = "";
    my $level = 0;
    
    (align_lat_dp_recursive($source_vertices[0], $g_ref, $trans_ref, \@attr_name_edge, $align_ref, $corrs_ref, $subs_ref, $dels_ref, $ins_ref, \@hypo, \$level) == 1) or
	isip_die("ERROR: align_lat_dp - failed to align lattice with the reference transcription") ;
    
    # exit gracefully
    #
    return 1;
}   

# subroutine: align_lat_dp_recursive
#
# arguments:
#  @_ : ($vertex, $g_ref, $trans_ref, \@attr_name_edge, $align_ref, 
#       $corrs_ref, $subs_ref, $dels_ref, $ins_ref, \@hypo, \$level)
#
# description: this routine aligns lattice with the reference transcriptions.
#              each of the hypothesis in lattice is processed in in depth
#              first fashion - O(b*d) space complexity
#
# return: 0 if error 1 if no error
#
sub align_lat_dp_recursive ($$$$$$$$$$) {
    
    # parse the argument list
    #
    my $vertex = shift @_;
    my $g_ref = shift @_;
    my $trans_ref = shift @_;
    my @attr_name_edge = @{shift @_};
    my $align_ref = shift @_;
    my $corrs_ref = shift @_;
    my $subs_ref = shift @_;
    my $dels_ref = shift @_;
    my $ins_ref = shift @_;
    my $hypo_ref = shift @_;
    my $level_ref = shift @_;

    if (($vertex eq "") || ($$trans_ref eq "") || ($$g_ref eq "") ||
	(@attr_name_edge[0] eq ""))  {
	print "ERROR: align_lat_dp_recursive - bad argument list\n";
	return 0;
    }    
    
    # if the current vertex has children
    #
    if ($$g_ref->is_successorful_vertex($vertex)) {	
	
	# get all the current vertex
	#
	my @children = $$g_ref->successors($vertex);
	
	# loop over all the children vertices
	#
	for (my $i = 0; $i <= $#children; $i++) {
	    
	    
	    # get the word on the arc between the parent and the
	    # current child and append it to the current hypothesis
	    #
	    my $word = $$g_ref->get_edge_attribute($vertex, $children[$i], $attr_name_edge[1]);
	    my @new_hypo;
	    for (my $j = 0; $j <= $$level_ref; $j++) {
		$new_hypo[$j] = @{$hypo_ref}[$j];
	    }	    
	    push (@new_hypo, $word);
		
	    # update the level
	    #
	    $$level_ref++;
	    
	    # call the function itself
	    #
	    if (align_lat_dp_recursive($children[$i], $g_ref, $trans_ref, \@attr_name_edge, $align_ref, $corrs_ref, $subs_ref, $dels_ref, $ins_ref, \@new_hypo, $level_ref) != 1) {
		
		# we have already reached end of lattice
		# 
		# local variables
		#
		my $total_score = 0;
		my $corrs = 0;
		my $subs = 0;
		my $dels = 0;
		my $ins = 0;
		
		# remove !SENT_START and !SENT_END from the hypothesis
		#
		my @hypo_temp = "";
		for (my $k = 0; $k <= $#new_hypo; $k++) {		    
		    if (($new_hypo[$k] ne "!SENT_START") &&
			($new_hypo[$k] ne "!SENT_END")) {
			push (@hypo_temp, $new_hypo[$k]);
		    }
		}
		my $hypo = join (' ', @hypo_temp);		

		# do a dynammic programming based string alignment on words
		#
		(align_dp($trans_ref, \$hypo, \$total_score, \$corrs, \$subs, \$dels, \$ins) == 1) or
		    isip_die("ERROR: align_lat_dp_recursive - failed to align reference hypothesis: $$trans_ref\n with hypothesis: $$hypo_ref");
		
		# update the best hypothesis if the current is better than the 
		# previous best
		#
		my $best_score = $$corrs_ref * CORRS_COST 
		    + $$subs_ref * SUBS_COST + 
		    $$dels_ref * DELS_COST + 
		    $$ins_ref * INS_COST;		

		if ($total_score > $best_score) {
		    $$align_ref = $hypo;
		    $$corrs_ref = $corrs;
		    $$subs_ref = $subs;
		    $$dels_ref = $dels;
		    $$ins_ref = $ins;
		}
		
		# return 1
		#
		return 1;		
	    } 
	}
    }
    
    # else we reach end of the lattice
    #
    else {
	
	# return 0
	#
	return 0;	
    }
    
    # exit gracefully
    #
    return 1;
}

# subroutine: align_dp
#
# arguments:
#  @_ : ($trans_ref, \$hypo, \$total_score, \$corrs, \$subs, \$dels, 
#        \$ins)
#
# description: this routine aligns two word hypothesis in a dynammic 
#              programming framework
#
# return: 0 if error 1 if no error
#
sub align_dp ($$$$$$$) {
    
    # parse the argument list
    #
    my $trans_ref = shift @_;
    my $hypo_ref = shift @_;
    my $total_score_ref = shift @_;
    my $corrs_ref = shift @_;
    my $subs_ref = shift @_;
    my $dels_ref = shift @_;
    my $ins_ref = shift @_;

    if (($$trans_ref eq "") || ($$hypo_ref eq ""))  {
	print "ERROR: align_dp - bad argument list\n";
	return 0;
    }    

    # create the two sequences of words. prepend each hypothesis by
    # "-" character
    #
    my $temp = join(" ", ALIGN_SYMBOL, $$trans_ref);
    my @seq1 = split(/\s+/, $temp);
    $temp = join(" ", ALIGN_SYMBOL, $$hypo_ref);
    my @seq2 = split(/\s+/, $temp);
    
    # create the DP table row-wise, x-axis has seq1 and y-axis has
    # seq2
    #
    my @dp_table;
    my @dp_table_bp_x;
    my @dp_table_bp_y;
    
    (create_dp_table(\@seq1, \@seq2, \@dp_table, \@dp_table_bp_x, \@dp_table_bp_y) == 1) or
	isip_die("ERROR: align_lat_dp - failed to create DP table for sequence @seq1 and @seq2");

    # update total score - best path score through the DP table
    #
    $$total_score_ref = $dp_table[$#seq1][$#seq2];
    
    # compute the total_score, corrs, subs, dels and ins
    #
    (score_dp_table(\@seq1, \@seq2, \@dp_table, \@dp_table_bp_x, \@dp_table_bp_y, $corrs_ref, $subs_ref, $dels_ref, $ins_ref) == 1) or
	isip_die("ERROR: align_lat_dp - failed to score DP table");    
    
    # exit gracefully
    #
    return 1;
}

# subroutine: cost_match
#
# arguments:
#  @_ : (\$word1, \$word2, \$match)
#
# description: this routine aligns computes the indel cost for two words
#              Levenshtein Distance:
#              cost(word1, word1) = 0 (CORRS_COST)
#              cost(word1, word2) = -4, for word1 != word2 (SUBS_COST)
#
# return: 0 if error 1 if no error
#
sub cost_match ($$$) {
    
    # parse the argument list
    #
    my $word1_ref = shift @_;
    my $word2_ref = shift @_;
    my $match_ref = shift @_;
    
    if (($$word1_ref eq "") || ($$word2_ref eq ""))  {
	print "ERROR: cost_match - bad argument list\n";
	return 0;
    }    
    
    if ($$word1_ref eq $$word2_ref) {
	$$match_ref = CORRS_COST;
    }
    else {
	$$match_ref = SUBS_COST;
    }       
    
    # exit gracefully
    #
    return 1;
}

# subroutine: create_dp_table
#
# arguments:
#  @_ : (\$seq1, \$seq2, \@dp_table, \@dp_table_bp_x, \@dp_table_bp_y)
#
# description: this routine create the DP table corresponding to the
#              two input sequence
#
# return: 0 if error 1 if no error
#
sub create_dp_table ($$$$$) {
    
    # parse the argument list
    #
    my @seq1 = @{shift @_};
    my @seq2 = @{shift @_};
    my $dp_table_ref = shift @_;
    my $dp_table_bp_x_ref = shift @_;
    my $dp_table_bp_y_ref = shift @_;
    
    if ((@seq1 eq "") || (@seq2 eq ""))  {
	print "ERROR: create_dp_table - bad argument list\n";
	return 0;
    }
    
    # initialize the table and the backpointers
    #
    for (my $j = 0; $j <= $#seq2; $j++) {	
	my @temp;
	for (my $i = 0; $i <= $#seq1; $i++) {	    
	    push @temp, 0;	    
	}
	push @{$dp_table_ref}, [@temp];
	push @{$dp_table_bp_x_ref}, [@temp];
	push @{$dp_table_bp_y_ref}, [@temp];
    }    
    
    for (my $j = 0; $j <= $#seq2; $j++) {
	
	for (my $i = 0; $i <= $#seq1; $i++) {
	    
	    # find the miminum cost (max positive number) from getting
	    # into the current location from all the three ways
	    #
	    my $max;
	    my $temp1 = MIN_NEGATIVE_INTEGER;
	    my $temp2 = MIN_NEGATIVE_INTEGER;
	    
	    # take care of the first entry in the table
	    # 
	    if (($i == 0) && ($j== 0)) {	       
		$max = 0;
	    }
	    
	    else {
		
		# D(i,j) = D(i-1, j) + deletion cost
		#
		if ($i > 0) {
		    $temp1 = ${$dp_table_ref}[$i-1][$j] + DELS_COST;
		    ${$dp_table_bp_x_ref}[$i][$j] = $i-1;
		    ${$dp_table_bp_y_ref}[$i][$j] = $j;
		}
		
		# D(i,j) = D(i, j-1) + insertion cost
		#
		if ($j > 0) {
		    $temp2 = ${$dp_table_ref}[$i][$j-1] + INS_COST;
		}
		$max = $temp1;	    
		if($temp2 > $max) {
		    $max = $temp2;
		    ${$dp_table_bp_x_ref}[$i][$j] = $i;
		    ${$dp_table_bp_y_ref}[$i][$j] = $j-1;		    
		}
		
		# D(i,j) = D(i-1, j-1) + match cost
		#
		if (($i > 0) && ($j > 0)) {
		    my $match_cost = 0;
		    (cost_match(\$seq1[$i], \$seq2[$j], \$match_cost) == 1) or
			isip_die("ERROR: align_dp - failed to compute match cost");
		    $temp1 = ${$dp_table_ref}[$i-1][$j-1] + $match_cost;
		}
		if($temp1 > $max) {
		    $max = $temp1;
		    ${$dp_table_bp_x_ref}[$i][$j] = $i-1;
		    ${$dp_table_bp_y_ref}[$i][$j] = $j-1;		    
		}
	    }
	    
	    # set the cost in the table
	    #
	    ${$dp_table_ref}[$i][$j] = $max;
	}
    }
    
    # exit gracefully
    #
    return 1;
}

# subroutine: score_dp_table
#
# arguments:
#  @_ : (\@seq1, \@seq2, \@dp_table, \@dp_table_p_x, \@dp_table_bp_y, 
#       $corrs_ref, $subs_ref, $dels_ref, $ins_ref)
#
# description: this routine scores the DP table by finding the alignment
#
# return: 0 if error 1 if no error
#
sub score_dp_table ($$$$$$$$$) {
    
    # parse the argument list
    #
    my $seq1_ref = shift @_;
    my $seq2_ref = shift @_;
    my $dp_table_ref = shift @_;
    my $dp_table_bp_x_ref = shift @_;
    my $dp_table_bp_y_ref = shift @_;
    my $corrs_ref = shift @_;
    my $subs_ref = shift @_;
    my $dels_ref = shift @_;
    my $ins_ref = shift @_;
    
    if ((@{$seq1_ref} eq "") || (@{$seq2_ref} eq ""))  {
	print "ERROR: score_dp_table - bad argument list\n";
	return 0;
    }
    
    # backtrace and compute the statistics
    #
    (backtrace($#{$seq1_ref}, $#{$seq2_ref}, $seq1_ref, $seq2_ref, $dp_table_ref, $dp_table_bp_x_ref, $dp_table_bp_y_ref, $corrs_ref, $subs_ref, $dels_ref, $ins_ref) == 1) or
	isip_die("ERROR: score_dp_table - failed to backtrace the DP table") ;
    
    # exit gracefully
    #
    return 1;
}

# subroutine: backtrace
#
# arguments:
#  @_ : (i, j, \@seq1, \@seq2, \@dp_table, \@dp_table_p_x, \@dp_table_bp_y, 
#       $corrs_ref, $subs_ref, $dels_ref, $ins_ref)
#
# description: this routine scores the DP table by finding the alignment
#
# return: 0 if error 1 if no error
#
sub backtrace ($$$$$$$$$$$) {
    
    # parse the argument list
    #
    my $i = shift @_;
    my $j = shift @_;
    my $seq1_ref = shift @_;
    my $seq2_ref = shift @_;
    my $dp_table_ref = shift @_;
    my $dp_table_bp_x_ref = shift @_;
    my $dp_table_bp_y_ref = shift @_;
    my $corrs_ref = shift @_;
    my $subs_ref = shift @_;
    my $dels_ref = shift @_;
    my $ins_ref = shift @_;
    
    if ((@{$seq1_ref} eq "") || (@{$seq2_ref} eq ""))  {
	print "ERROR: backtrace - bad argument list\n";
	return 0;
    }
    
    # we reach the beginning if both $i and $j are 0, break the recursion
    #
    if (($i == 0) && ($j == 0)) {
	return 1;
    }
    
    # get the backpointer (table entry) to the current entry in the table
    #
    my $i_bp = ${$dp_table_bp_x_ref}[$i][$j];
    my $j_bp = ${$dp_table_bp_y_ref}[$i][$j];
    
    # update statistics
    #
    if (($i_bp == ($i-1)) && ($j_bp == $j)) {
	$$dels_ref++;
    }
    elsif (($i_bp == $i) && ($j_bp == ($j-1))) {
	$$ins_ref++;
    }
    elsif (($i_bp == ($i-1)) && ($j_bp == ($j-1))) {
	
	if (!(($i == 0) && ($j == 0))) {
	    if (${$seq1_ref}[$i] eq ${$seq2_ref}[$j]) {
		$$corrs_ref++;
	    }
	    else {
		$$subs_ref++;
	    }
	}
    }

    # backtrace further
    #
    (backtrace($i_bp, $j_bp, $seq1_ref, $seq2_ref, $dp_table_ref, $dp_table_bp_x_ref, $dp_table_bp_y_ref, $corrs_ref, $subs_ref, $dels_ref, $ins_ref) == 1);
    
    # exit gracefully
    #
    return 1;
}

# subroutine: write_error_rate
#
# arguments:
#  @_ : ($output_error_rate_file, \@transcriptions, \@alignments, 
#        \@corrs, \@subs, \@ins, \@file_ids)
#
# description: this routine write the error rate report
#
# return: 0 if error 1 if no error
#
sub write_error_rate ($$$$$$$$) {
    
    # parse the argument list
    #
    my $output_error_rate_file = shift @_;
    my $trans_ref = shift @_;
    my $align_ref = shift @_;
    my $corrs_ref = shift @_;
    my $subs_ref = shift @_;
    my $dels_ref = shift @_;
    my $ins_ref = shift @_;
    my $file_ids_ref = shift @_;
    
    if (($output_error_rate_file eq "") || (${$trans_ref}[0] eq "") || 
	(${$align_ref}[0] eq "") || (${$corrs_ref}[0] eq "") || 
	(${$subs_ref}[0] eq "") || (${$dels_ref}[0] eq "") || 
	(${$ins_ref}[0] eq "") || (${$file_ids_ref}[0] eq ""))  {
	print "ERROR:  write_error_rate - bad argument list\n";
	return 0;
    }    
    
    # local variables
    #
    my $total_nref = 0;
    my $total_corrs = 0;
    my $total_subs = 0;
    my $total_dels = 0;
    my $total_ins = 0;
    
    # try to open the input file
    #    
    open(fp_error_rate, ">$output_error_rate_file") or 
	isip_die("ERROR: write_error_rate - can't open file $output_error_rate_file") ;

    # loop over all the references
    #
    for (my $i = 0; $i <= $#{$trans_ref}; $i++) {
	
	# print the reference and hypothesis to the output file
	#
	printf fp_error_rate "id: %s\n", ${$file_ids_ref}[$i];
	printf fp_error_rate "Scores: (#C #S #D #I) %d %d %d %d\n", ${$corrs_ref}[$i], ${$subs_ref}[$i], ${$dels_ref}[$i], ${$ins_ref}[$i];
	printf fp_error_rate "REF: %s\n", ${$trans_ref}[$i];
	printf fp_error_rate "HYP: %s\n\n", ${$align_ref}[$i];
	
	# update statistics
	#
	my @temp = split(/\s+/,${$trans_ref}[$i]);
	$total_nref += $#temp + 1;
	$total_corrs += ${$corrs_ref}[$i];
	$total_subs += ${$subs_ref}[$i];
	$total_dels += ${$dels_ref}[$i];
	$total_ins += ${$ins_ref}[$i];
    }

    # print final statistics to output file
    #
    if ( $#{$trans_ref} > 0) {
	
	my $total_err = $total_subs + $total_dels + $total_ins;

	printf fp_error_rate  "WORD RECOGNITION PERFORMANCE\n\n";

	my $temp = $total_err * 100 / $total_nref;
	printf fp_error_rate "Percent Total Error              = %2.1f%   (%d)\n\n", $temp, $total_err;
	
	$temp = $total_corrs * 100 / $total_nref;
	printf fp_error_rate "Percent Correct                  = %2.1f%   (%d)\n\n", $temp, $total_corrs;
	
	$temp = $total_subs * 100 / $total_nref;
	printf fp_error_rate "Percent Substitutions            = %2.1f%   (%d)\n", $temp, $total_subs;
	
	$temp = $total_dels * 100 / $total_nref,;
	printf fp_error_rate "Percent Deletions                = %2.1f%   (%d)\n", $temp, $total_dels;

	$temp = $total_ins * 100 / $total_nref;
	printf fp_error_rate "Percent Insertions               = %2.1f%   (%d)\n", $temp, $total_ins;
	
	$temp = ($total_err - $total_ins) * 100 / $total_nref;
	printf fp_error_rate "Percent Word Accuracy            = %2.1f%\n\n", $temp;
	
	printf fp_error_rate "Ref. words                       =        (%d)\n", $total_nref;
	
	$temp = $total_corrs + $total_subs + $total_ins;
	printf fp_error_rate "Hyp. words                       =        (%d)", $temp;
    }
    else {
	print "No files processed\n";
    }
    
    # close the file
    #
    close(fp_error_rate) or isip_die("ERROR: write_error_rate - can't close file $output_error_rate_file") ;    
    
    # exit gracefully
    #
    return 1;
}

# subroutine: generate_confusion_network
#
# arguments:
#  @_ : (\$g, \@attr_name_vertex, \@attr_name_edge, \$g_cn, \@hypo, \$id)
#
# description: this routine generated a confusion network from a given
# lattice.
#
# reference: L. Mangu, E. Brill and A. Stolcke (2000), "Finding
# Consensus in Speech Recognition: Word Error Minimization and Other
# Applications of Confusion Networks",  in Computer, Speech and
# Language, 14(4):373-400, 2000.
#
# return: 0 if error 1 if no error
#
sub generate_confusion_network ($$$$$$) {
    
    # parse the argument list
    #
    my $g_ref = shift @_;
    my @attr_name_vertex = @{shift @_};
    my @attr_name_edge = @{shift @_};
    my $g_cn_ref = shift @_;
    my $hypo_ref = shift @_;
    my $id_ref = shift @_;

    if (($attr_name_vertex[0] eq "") || 
	($attr_name_edge[0] eq ""))  {
	print "ERROR: generate_confusion_network - bad argument list\n";
	return 0;
    }
    
    # display information
    #
    print "\nINFO: lattice_subs - generating confusion network corresponding to the lattice for utterance $$id_ref\n";

    # convert the lattice into equivalence classes. generate one 
    # equivalence class per link. note that the initialization step given in 
    # reference paper is not necessary because we merge the same links within
    # the same time stamps by adding the scores in the read_lattice function.
    # also set the partial order between equivalence classes in an table. 
    # ignore !SENT_START and !SENT_END
    #
    my @equiv_classes;
    my @partial_order_table;
    
    (convert_lattice_to_equiv_classes($g_ref, \@attr_name_vertex, 
				      \@attr_name_edge, \@equiv_classes, 
				      \@partial_order_table) == 1) or
					  isip_die("ERROR: generate_confusion_network - failed to convert lattice to equivalence classes");
    undef $$g_ref;
    #print "\nINFO: lattice_subs - finished converting to equavalence class\n";
    
    # intra-word clustering
    #
    my @equiv_classes_temp;
    my @partial_order_table_temp;
    (intra_word_clustering(\@equiv_classes, \@partial_order_table, 
			   \@equiv_classes_temp, 
			   \@partial_order_table_temp) == 1) or
			       isip_die("ERROR: generate_confusion_network - failed intra-word clusteing");    
    undef @equiv_classes;
    undef @partial_order_table;
    #print "\nINFO: lattice_subs - finished intra-word clustering\n";

    # inter-word clustering
    #
    (inter_word_clustering(\@equiv_classes_temp, \@partial_order_table_temp, 
			   \@equiv_classes, \@partial_order_table) == 1) or
			       isip_die("ERROR: generate_confusion_network - failed inter-word clusteing");
    undef @equiv_classes_temp;
    undef @partial_order_table_temp;
    #print "\nINFO: lattice_subs - finished inter-word clustering\n";

    # also order the classses in time
    #
    (order_classes(\@equiv_classes, \@equiv_classes_temp) == 1) or
	isip_die("ERROR: generate_confusion_network - failed to order equivalence classes in time");    
    undef @equiv_classes;
    print "\nINFO: lattice_subs - ordered classes in time\n";
    
    # merge same words in an class to form confusion network
    #
    (merge_same_words(\@equiv_classes_temp, \@equiv_classes,) == 1) or
	isip_die("ERROR: generate_confusion_network - failed to merge words");
    undef @equiv_classes_temp;
    #print "\nINFO: lattice_subs - finished merging same words to form confusion network\n";

    # add a "-" to each equivalence class if the probability sum of all 
    # links is less than unity, at the end of this step we get confusion n/w
    #
    (add_null_arcs(\@equiv_classes, \@equiv_classes_temp) == 1) or
	isip_die("ERROR: generate_confusion_network - failed to add null arcs");
    undef @equiv_classes;
    #print "\nINFO: lattice_subs - finished adding null-arcs\n";
    
    # pick best hypotheses from confusion n/w
    #
    (pick_best_hypo(\@equiv_classes_temp, $hypo_ref) == 1) or
	isip_die("ERROR: generate_confusion_network - failed to convert confusion network into lattice");
    #print "\nINFO: lattice_subs - finished picking best hypo from the confusion network\n";
    
    # convert the confusion n/w into equivalent lattice
    #
    (convert_confusion_network_to_lattice(\@equiv_classes_temp, 
					  \@attr_name_vertex, 
					  \@attr_name_edge, $g_cn_ref) == 1) or
					      isip_die("ERROR: generate_confusion_network - failed to convert confusion network into lattice");
    
    # exit gracefully
    #
    return 1;
}

# subroutine: convert_lattice_to_equiv_classes
#
# arguments:
#  @_ : ($g_ref, \@attr_name_vertex, \@attr_name_edge, \@equiv_classes, 
#        \@partial_order_table)
#
# description: this routine converts a lattice into a set of equivalence 
#              classes. each link is a an equivalence class. also set the 
#              partial order between all the equivalence classes
#
# return: 0 if error 1 if no error
#
sub convert_lattice_to_equiv_classes ($$$$$) {

    # parse the argument list
    #
    my $g_ref = shift @_;
    my @attr_name_vertex = @{shift @_};
    my @attr_name_edge = @{shift @_};
    my $equiv_classes_ref = shift @_;
    my $po_table_ref = shift @_;

    if (($attr_name_vertex[0] eq "") || 
	($attr_name_edge[0] eq ""))  {
	print "ERROR: convert_lattice_to_equiv_classes - bad argument list\n";
	return 0;
    }

    # local variables
    #
    my @E;
    my $value;

    # get vertices and edges
    #
    @E = $$g_ref->edges;

    # get the edge information
    #
    for (my $i = 0; $i <= $#E; $i++) {
	
	my @edge = @{@E->[$i]};
	my $temp_link = new Link;
	
	$value = $$g_ref->get_edge_attribute($edge[0], $edge[1], 
					     $attr_name_edge[1]);
	# ignore !SENT_START and !SENT_END
	#
	if (($value ne "!SENT_START") && ($value ne "!SENT_END")) {
	    
	    $temp_link->word($value);
	    
	    $value = $$g_ref->get_vertex_attribute($edge[0], 
						   $attr_name_vertex[0]);
	    $temp_link->start_time($value);
	    
	    $value = $$g_ref->get_vertex_attribute($edge[1], 
						   $attr_name_vertex[0]);
	    $temp_link->end_time($value);
	
	    $value = $$g_ref->get_edge_attribute($edge[0], $edge[1], 
						 $attr_name_edge[5]);
	    $temp_link->prob($value);

	    # set the equivalence class
	    #
	    my @equiv_class;
	    push @equiv_class, $temp_link;
	    push @{$equiv_classes_ref}, [@equiv_class];
	}
    }
    
    # set the partial order of all the classes, each class has only one link
    #
    for (my $j = 0; $j <= $#{$equiv_classes_ref}; $j++) {
	
	for (my $i = 0; $i <= $#{$equiv_classes_ref}; $i++) {
	    
	    # set the partial order for itself. condition e=f in 
	    # reference paper
	    #
	    if ($i == $j) {		
		${$po_table_ref}[$j][$i] = SET;
	    }
	    
	    # set the partial order for second and third condition in the
	    # reference paper
	    #
	    elsif (${$equiv_classes_ref}[$j][0]->end_time <=  ${$equiv_classes_ref}[$i][0]->start_time) {
		${$po_table_ref}[$j][$i] = SET;
	    }

	    # else unset the partial order
	    #
	    else {
		${$po_table_ref}[$j][$i] = UNSET;
	    }
	}
    }
    
    # exit gracefully
    #
    return 1;
}

# subroutine: intra_word_clustering
#
# arguments:
#  @_ : (\@equiv_classes, \@partial_order_table, 
#        \@equiv_classes_cluster, \@po_cluster)
#
# description: this routine performs intra-word clustering
#
# return: 0 if error 1 if no error
#
sub intra_word_clustering ($$$$) {

    # parse the argument list
    #
    my $equiv_classes_ref = shift @_;
    my $po_table_ref = shift @_;
    my $equiv_classes_cluster_ref = shift @_;
    my $po_table_cluster_ref = shift @_;
    
    if ((${$equiv_classes_ref}[0] eq "") || (${$po_table_ref}[0] eq ""))  {
	print "ERROR: intra_word_clustering - bad argument list\n";
	return 0;
    }
    
    # local variables
    #    
    my $MaxSim = 0;
    my $bestSet1_ref;
    my $bestSet2_ref;
    my @equiv_class_new;
    my @equiv_classes_new;
    my @po_table_new;
    my $i =0;

    # core intra word algorithm
    #
    do {
	$MaxSim = MIN_NEGATIVE_INTEGER;
	
	for (my $l1 = 0; $l1 <= $#{$equiv_classes_ref}; $l1++) {
	    for (my $l2 = 0; $l2 <= $#{$equiv_classes_ref}; $l2++) {
		
		if ((${$po_table_ref}[$l1][$l2] != 1) && 
		    (${$po_table_ref}[$l2][$l1] != 1)) {
		    
		    my $equiv_class1_ref = ${$equiv_classes_ref}[$l1];
		    my $equiv_class2_ref = ${$equiv_classes_ref}[$l2];
		    
		    if ((same_words($equiv_class1_ref) == 1) 
			&& (same_words($equiv_class2_ref) == 1)) {
			
			if (${$equiv_classes_ref}[$l1][0]->word eq 
			    ${$equiv_classes_ref}[$l2][0]->word) {
			    
			    my $sim = MIN_NEGATIVE_INTEGER;
			    (sim_intra_word($equiv_class1_ref,
					    $equiv_class2_ref, \$sim) == 1) 
				or isip_die("ERROR: intra_word_clustering - failed to compute similarity measure");
			    if ($sim > $MaxSim) {
				$MaxSim = $sim;
				$bestSet1_ref = ${$equiv_classes_ref}[$l1];
				$bestSet2_ref = ${$equiv_classes_ref}[$l2];
			    }
			}
		    }
		}
	    }
	}    

	if ($MaxSim > MIN_NEGATIVE_INTEGER) {
	    (union($bestSet1_ref,
		   $bestSet2_ref, \@equiv_class_new) == 1) 
		or isip_die("ERROR: intra_word_clustering - failed to union two classes");
	    
	    (create_equiv_classes($bestSet1_ref, $bestSet2_ref,
				  \@equiv_class_new, $equiv_classes_ref, 
				  $po_table_ref,
				  \@equiv_classes_new, \@po_table_new) == 1) 
		or isip_die("ERROR: intra_word_clustering - failed to create a new set oof equivalence classes");
	    
	    # update the references for the next loop
	    #
	    undef @{$equiv_classes_ref};
	    undef @{$po_table_ref};
	    copy_equiv_classes(\@equiv_classes_new,  $equiv_classes_ref);
	    copy_partial_order_table(\@po_table_new,  $po_table_ref);
	    undef @equiv_class_new;
	    undef @equiv_classes_new;
	    undef @po_table_new;
	    $i++;
	}
    } while ($MaxSim > MIN_NEGATIVE_INTEGER);    

    # update the output arguments
    #
    (copy_equiv_classes($equiv_classes_ref,  $equiv_classes_cluster_ref) == 1) 
	or isip_die("ERROR: intra_word_clustering - failed to copy set of equivalence lasses");
    (copy_partial_order_table($po_table_ref, $po_table_cluster_ref) == 1) 
	or isip_die("ERROR: intra_word_clustering - failed to copy set of equivalence lasses");

    # exit gracefully
    #
    return 1;
}

# subroutine: same_words
#
# arguments:
#  @_ : ($equiv_class_ref)
#
# description: this routine returns 1 if the all the links in this 
#              equivalence has same words, else return 0
#
sub same_words ($) {

    # parse the argument list
    #
    my $equiv_class_ref = shift @_;

    if ((${$equiv_class_ref}[0] eq ""))  {
	isip_die ("ERROR: same_words - bad argument list\n");
	return 0;
    }
	
    # check if the words on the links in this class are same
    #
    for (my $i = 0; $i < $#{$equiv_class_ref}; $i++) {	
	
	my $link1 = ${$equiv_class_ref}[$i];
	my $link2 = ${$equiv_class_ref}[$i+1];

	if (($link1->word) ne ($link1->word)) {
	    return 0;
	}
    }
    
    # exit gracefully
    #
    return 1;
}

# subroutine: sim_intra_word
#
# arguments:
#  @_ : ($equiv_class1_ref, $equiv_class2_ref, \$sim)
#
# description: this routine computes the similarity measure for intra-word
#              clustering
#
# return: 0 if error 1 if no error
#
sub sim_intra_word ($$$) {

    # parse the argument list
    #
    my $equiv_class1_ref = shift @_;
    my $equiv_class2_ref = shift @_;
    my $sim_ref = shift @_;
    
    if ((${$equiv_class1_ref}[0] eq "") || (${$equiv_class2_ref}[0] eq ""))  {
	print "ERROR: sim_intra_word - bad argument list\n";
	return 0;
    }
    
    # find the pair of words for which the similarity measure is largest
    #
    my $max = MIN_NEGATIVE_INTEGER;
    my $temp = MIN_NEGATIVE_INTEGER;
    for (my $i = 0; $i <= $#{$equiv_class1_ref}; $i++) {
	for (my $j = 0; $j <= $#{$equiv_class2_ref}; $j++) {
	    my $time_overlap = 0;

	    (overlap(${$equiv_class1_ref}[$i], ${$equiv_class2_ref}[$j],
		     \$time_overlap) == 1) or 
			 isip_die("ERROR: sim_intra_word - failed to compute overlap");
	
	    $temp =  log($time_overlap) + ${$equiv_class1_ref}[$i]->prob + 
		${$equiv_class2_ref}[$j]->prob;

	    if ($temp > $max) {
		$max = $temp;
	    }
	}
    }
    $$sim_ref = $max;
    
    # exit gracefully
    #
    return 1;
}

# subroutine: overlap
#
# arguments:
#  @_ : ($link1, $link2, \$time_overlap)
#
# description: this routine computes the normalized overlap between two links
#
# return: 0 if error 1 if no error
#
sub overlap ($$$) {

    # parse the argument list
    #
    my $link1 = shift @_;
    my $link2 = shift @_;
    my $time_overlap_ref = shift @_;
    
    # local variables
    #
    my $start = $link1->start_time;
    my $end = $link1->end_time;
    
    # find the start and end of overlap
    #
    if ($link2->start_time > $start) {
	$start = $link2->start_time;
    }
    if ($link2->end_time < $end) {
	$end = $link2->end_time;
    }

    # find normalized time
    #
    $$time_overlap_ref = 0;
    my $temp = ($link1->end_time - $link1->start_time)
	+ ($link2->end_time - $link2->start_time);    

    if ($temp > 0) {
	$$time_overlap_ref = ($end - $start) / $temp;
    }
    
    # exit gracefully
    #
    return 1;
}

# subroutine: union
#
# arguments:
#  @_ : ($equiv_class1_ref, $equiv_class2_ref, \$equiv_class_new)
#
# description: this routine creates union of two equivalence classes
#
# return: 0 if error 1 if no error
#
sub union ($$$) {

    # parse the argument list
    #
    my $equiv_class1_ref = shift @_;
    my $equiv_class2_ref = shift @_;
    my $equiv_class_new_ref = shift @_;
    
    if ((${$equiv_class1_ref}[0] eq "") || (${$equiv_class2_ref}[0] eq ""))  {
	print "ERROR: union - bad argument list\n";
	return 0;
    }
    
    # union the two classes
    #
    for (my $i = 0; $i <= $#{$equiv_class1_ref}; $i++) {

	my $temp_link = new Link;
	my $value = ${$equiv_class1_ref}[$i]->word;
	$temp_link->word($value);
	$value = ${$equiv_class1_ref}[$i]->start_time;
	$temp_link->start_time($value);
	$value = ${$equiv_class1_ref}[$i]->end_time;
	$temp_link->end_time($value);
	$value = ${$equiv_class1_ref}[$i]->prob;
	$temp_link->prob($value);
	
	# set the equivalence class
	#
	push @{$equiv_class_new_ref}, $temp_link;	
    }

    for (my $i = 0; $i <= $#{$equiv_class2_ref}; $i++) {

	my $temp_link = new Link;
	my $value = ${$equiv_class2_ref}[$i]->word;
	$temp_link->word($value);
	$value = ${$equiv_class2_ref}[$i]->start_time;
	$temp_link->start_time($value);
	$value = ${$equiv_class2_ref}[$i]->end_time;
	$temp_link->end_time($value);
	$value = ${$equiv_class2_ref}[$i]->prob;
	$temp_link->prob($value);
	
	# set the equivalence class
	#
	push @{$equiv_class_new_ref}, $temp_link;	
    }
    
    # exit gracefully
    #
    return 1;
}

# subroutine: create_equiv_classes
#
# arguments:
#  @_ : ($equiv_class1_ref, $equiv_class2_ref, \@equiv_class_new,
#        $equiv_classes_ref, $po_table_ref, \@new_equiv_classes, 
#        \@new_po_table)
#
# description: this routine creates a new set of equivalence classes by 
#              removing two old classes and adding a new one at the end
#
# return: 0 if error 1 if no error
#
sub create_equiv_classes ($$$$$$$) {
    
    # parse the argument list
    #
    my $equiv_class1_ref = shift @_;
    my $equiv_class2_ref = shift @_;
    my $equiv_class_new_ref = shift @_;
    my $equiv_classes_ref = shift @_;
    my $po_table_ref = shift @_;
    my $equiv_classes_new_ref = shift @_;
    my $po_table_new_ref = shift @_;
    
    if ((${$equiv_class1_ref}[0] eq "") || 
	(${$equiv_class2_ref}[0] eq "") || 
	(${$equiv_class_new_ref}[0] eq "") ||
	(${$equiv_classes_ref}[0][0] eq "") ||
	(${$po_table_ref}[0][0] eq ""))  {
	print "ERROR: create_equiv_classes - bad argument list\n";
	return 0;
    }
    
    # loop over all set of classes and create a new set of classes, ignore 
    # the two old classes
    #
    for (my $i = 0; $i <= $#{$equiv_classes_ref}; $i++) {	
	
	if ((${$equiv_classes_ref}[$i] != $equiv_class1_ref) &&
	    (${$equiv_classes_ref}[$i] != $equiv_class2_ref)) {
	    
	    my @equiv_class;
	    my $temp_equiv_class_ref = ${$equiv_classes_ref}[$i];
	    for (my $j = 0; $j <= $#{$temp_equiv_class_ref}; $j++) {
		
		my $temp_link = new Link;
		
		my $value = ${$equiv_classes_ref}[$i][$j]->word;
		$temp_link->word($value);
		
		$value = ${$equiv_classes_ref}[$i][$j]->start_time;
		$temp_link->start_time($value);
	
		$value = ${$equiv_classes_ref}[$i][$j]->end_time;
		$temp_link->end_time($value);
		
		$value = ${$equiv_classes_ref}[$i][$j]->prob;
		$temp_link->prob($value);
		
		# set the equivalence class
		#
		push @equiv_class, $temp_link;
	    }
	    push @{$equiv_classes_new_ref}, [@equiv_class];
	}
    }

    # add the new class
    #
    my @equiv_class;

    for (my $j = 0; $j <= $#{$equiv_class_new_ref}; $j++) {
	
	my $temp_link = new Link;
	
	my $value = ${$equiv_class_new_ref}[$j]->word;
	$temp_link->word($value);
	
	$value = ${$equiv_class_new_ref}[$j]->start_time;
	$temp_link->start_time($value);
	
	$value = ${$equiv_class_new_ref}[$j]->end_time;
	$temp_link->end_time($value);
	
	$value = ${$equiv_class_new_ref}[$j]->prob;
	$temp_link->prob($value);
	
	# set the equivalence class
	#
	push @equiv_class, $temp_link;
    }
    push @{$equiv_classes_new_ref}, [@equiv_class];
    
    # set the partial order of all the classes, each class has only one link
    #
    for (my $j = 0; $j <= $#{$equiv_classes_new_ref}; $j++) {
	
	for (my $i = 0; $i <= $#{$equiv_classes_new_ref}; $i++) {
	    
	    my $equiv_class1_ref = ${$equiv_classes_new_ref}[$j];
	    my $equiv_class2_ref = ${$equiv_classes_new_ref}[$i];	    
	    
	    # set the partial order for itself. condition e=f in 
	    # reference paper
	    #
	    if ($i == $j) {		
		${$po_table_new_ref}[$j][$i] = SET;
	    }
	    
	    # set the partial order for second and third condition in the
	    # reference paper
	    #
	    elsif (partial_order($equiv_class1_ref, $equiv_class2_ref) == 1) {
		${$po_table_new_ref}[$j][$i] = SET;
	    }
	    
	    # else unset the partial order
	    #
	    else {
		${$po_table_new_ref}[$j][$i] = UNSET;
	    }
	}
    }
    
    # exit gracefully
    #
    return 1;
}

# subroutine: partial_order
#
# arguments:
#  @_ : ($equiv_class1_ref, $equiv_class2_ref)
#
# description: this routine returns 1 if 
#              $equiv_class1_ref <= (parital_order) $equiv_class2_ref
#              otherwise it returns 0
#
sub partial_order ($$) {

    # parse the argument list
    #
    my $equiv_class1_ref = shift @_;
    my $equiv_class2_ref = shift @_;
    
    if ((${$equiv_class1_ref}[0] eq "") || (${$equiv_class2_ref}[0] eq ""))  {
	print "ERROR: partial_order - bad argument list\n";
	return 0;
    }
    
    # make sure any link in class1 <= every link in class2
    #
    for (my $i = 0; $i <= $#{$equiv_class1_ref}; $i++) {
	for (my $j = 0; $j <= $#{$equiv_class2_ref}; $j++) {

	    my $link1_end = ${$equiv_class1_ref}[$i]->end_time;
	    my $link2_start = ${$equiv_class2_ref}[$j]->start_time;
	    
	    if ($link1_end <= $link2_start) {
		return 1;
	    }
	}
    }

    # exit gracefully
    #
    return 0;
}

# subroutine: inter_word_clustering
#
# arguments:
#  @_ : (\@equiv_classes, \@partial_order_table, 
#        \@equiv_classes_cluster, \@po_cluster)
#
# description: this routine performs inter-word clustering
#
# return: 0 if error 1 if no error
#
sub inter_word_clustering ($$$$) {

    # parse the argument list
    #
    my $equiv_classes_ref = shift @_;
    my $po_table_ref = shift @_;
    my $equiv_classes_cluster_ref = shift @_;
    my $po_table_cluster_ref = shift @_;
    
    if ((${$equiv_classes_ref}[0] eq "") || (${$po_table_ref}[0] eq ""))  {
	print "ERROR: inter_word_clustering - bad argument list\n";
	return 0;
    }
    
    # local variables
    #    
    my $MaxSim = 0;
    my $bestSet1_ref;
    my $bestSet2_ref;
    my @equiv_class_new;
    my @equiv_classes_new;
    my @po_table_new;
    my $i = 0;
    
    # core inter word algorithm
    #
    do {
	$MaxSim = MIN_NEGATIVE_INTEGER;
	
	for (my $l1 = 0; $l1 <= $#{$equiv_classes_ref}; $l1++) {
	    for (my $l2 = 0; $l2 <= $#{$equiv_classes_ref}; $l2++) {
		
		if ((${$po_table_ref}[$l1][$l2] != 1) && 
		    (${$po_table_ref}[$l2][$l1] != 1)) {
		    
		    my $equiv_class1_ref = ${$equiv_classes_ref}[$l1];
		    my $equiv_class2_ref = ${$equiv_classes_ref}[$l2];
		    
		    my $sim = MIN_NEGATIVE_INTEGER;
		    (sim_inter_word($equiv_class1_ref,
				    $equiv_class2_ref, \$sim) == 1) 
			or isip_die("ERROR: inter_word_clustering - failed to compute similarity measure");
		    if ($sim > $MaxSim) {
			$MaxSim = $sim;
			$bestSet1_ref = ${$equiv_classes_ref}[$l1];
			$bestSet2_ref = ${$equiv_classes_ref}[$l2];
		    }
		}
	    }
	}    
	
	if ($MaxSim > MIN_NEGATIVE_INTEGER) {	    
	    (union($bestSet1_ref,
		   $bestSet2_ref, \@equiv_class_new) == 1) 
		or isip_die("ERROR: inter_word_clustering - failed to union two classes");
	    
	    my @equiv_classes_new;
	    my @po_table_new;    
	    (create_equiv_classes($bestSet1_ref, $bestSet2_ref, 
				  \@equiv_class_new, $equiv_classes_ref, 
				  $po_table_ref,
				  \@equiv_classes_new, \@po_table_new) == 1) 
		or isip_die("ERROR: inter_word_clustering - failed to create a new set oof equivalence classes");
	    
	    # update the references for the next loop
	    #
	    undef @{$equiv_classes_ref};
	    undef @{$po_table_ref};
	    copy_equiv_classes(\@equiv_classes_new,  $equiv_classes_ref);
	    copy_partial_order_table(\@po_table_new,  $po_table_ref);
	    undef @equiv_class_new;
	    undef @equiv_classes_new;
	    undef @po_table_new;
	    $i++;
	}	
    } while ($MaxSim > MIN_NEGATIVE_INTEGER);    
    
    # update the output arguments
    #
    (copy_equiv_classes($equiv_classes_ref,  $equiv_classes_cluster_ref) == 1) 
	or isip_die("ERROR: inter_word_clustering - failed to copy set of equivalence lasses");
    (copy_partial_order_table($po_table_ref, $po_table_cluster_ref) == 1) 
	or isip_die("ERROR: inter_word_clustering - failed to copy set of equivalence lasses");

    # exit gracefully
    #
    return 1;
}

# subroutine: sim_inter_word
#
# arguments:
#  @_ : ($equiv_class1_ref, $equiv_class2_ref, \$sim)
#
# description: this routine computes the similarity measure for inter-word
#              clustering. note that the phonetic measure given in the 
#              reference paper does not increase the performance when the time
#              marks of the links are provided and hence, we ignore the 
#              phonetic similarity in this implementation. However, the
#              phonetic measure can easily be added to the current
#              implementation
#
# return: 0 if error 1 if no error
#
sub sim_inter_word ($$$) {

    # parse the argument list
    #
    my $equiv_class1_ref = shift @_;
    my $equiv_class2_ref = shift @_;
    my $sim_ref = shift @_;
    
    if ((${$equiv_class1_ref}[0] eq "") || (${$equiv_class2_ref}[0] eq ""))  {
	print "ERROR: sim_inter_word - bad argument list\n";
	return 0;
    }
    
    # find the pair of words for which the similarity measure is largest
    #
    my $max = MIN_NEGATIVE_INTEGER;
    my $temp = MIN_NEGATIVE_INTEGER;
    for (my $i = 0; $i <= $#{$equiv_class1_ref}; $i++) {
	for (my $j = 0; $j <= $#{$equiv_class2_ref}; $j++) {

	    $temp =  ${$equiv_class1_ref}[$i]->prob + 
		${$equiv_class1_ref}[$i]->prob;
	    
	    if ($temp > $max) {
		$max = $temp;
	    }
	}
    }
    $$sim_ref = $max;
    
    # exit gracefully
    #
    return 1;
}

# subroutine: copy_equiv_classes
#
# arguments:
#  @_ : ($equiv_classes_ref, $copy_equiv_classes_ref)
#
# description: this routine cpopies a set of equivalence classes
#
# return: 0 if error 1 if no error
#
sub copy_equiv_classes ($$) {
    
    # parse the argument list
    #
    my $equiv_classes_ref = shift @_;
    my $equiv_classes_merge_ref = shift @_;
    
    if (${$equiv_classes_ref}[0][0] eq "")  {
	print "ERROR: copy_equiv_classes - bad argument list\n";
	return 0;
    }
    
    # loop over all set of classes and create a new set of classes
    #
    for (my $i = 0; $i <= $#{$equiv_classes_ref}; $i++) {
	
	my @equiv_class;
	my $temp_equiv_class_ref = ${$equiv_classes_ref}[$i];
	
	for (my $j = 0; $j <= $#{$temp_equiv_class_ref}; $j++) {
	    
	    my $temp_link = new Link;
	    
	    my $value = ${$equiv_classes_ref}[$i][$j]->word;
	    $temp_link->word($value);
	    
	    $value = ${$equiv_classes_ref}[$i][$j]->start_time;
	    $temp_link->start_time($value);
	    
	    $value = ${$equiv_classes_ref}[$i][$j]->end_time;
	    $temp_link->end_time($value);
		    
	    $value = ${$equiv_classes_ref}[$i][$j]->prob;
	    $temp_link->prob($value);

	    # set the equivalence class
	    #
	    push @equiv_class, $temp_link;
	}
	push @{$equiv_classes_merge_ref}, [@equiv_class];
    }
    
    # exit gracefully
    #
    return 1;
}

# subroutine: copy_partial_order_table
#
# arguments:
#  @_ : ($po_table_ref, $copy_po_table_ref)
#
# description: this routine creates a copy of partial order table
#
# return: 0 if error 1 if no error
#
sub copy_partial_order_table ($$) {
    
    # parse the argument list
    #
    my $po_table_ref = shift @_;
    my $copy_po_table_ref = shift @_;
    
    if (${$po_table_ref}[0][0] eq "")  {
	print "ERROR: copy_partial_order_table - bad argument list\n";
	return 0;
    }
    
    # set the partial order of all the classes, each class has only one link
    #
    for (my $j = 0; $j <= $#{$po_table_ref}; $j++) {
	for (my $i = 0; $i <= $#{$po_table_ref}; $i++) {	    
	    ${$copy_po_table_ref}[$j][$i] =  ${$po_table_ref}[$j][$i];
	    
	}
    }
    
    # exit gracefully
    #
    return 1;
}

# subroutine: order_classes
#
# arguments:
#  @_ : (\@equiv_classes, \@equiv_classes_cluster)
#
# description: this routine orders (indexes) the equivalence classes in time
#
# return: 0 if error 1 if no error
#
sub order_classes ($$) {

    # parse the argument list
    #
    my $equiv_classes_ref = shift @_;
    my $equiv_classes_cluster_ref = shift @_;
    
    if ((${$equiv_classes_ref}[0] eq ""))  {
	print "ERROR: order_classes - bad argument list\n";
	return 0;
    }
    
    # local variables
    #
    my @times;
    
    # get the start time for all the classes
    #
    for (my $i = 0; $i <= $#{$equiv_classes_ref}; $i++) {	
	
	my $temp_equiv_class_ref = ${$equiv_classes_ref}[$i];
	my $temp_time = MAX_POSITIVE_INTEGER;
	$times[$i] = MAX_POSITIVE_INTEGER;
	
	for (my $j = 0; $j <= $#{$temp_equiv_class_ref}; $j++) {	    
	    $temp_time = ${$equiv_classes_ref}[$i][$j]->start_time;
	    if ($temp_time < $times[$i]) {
		$times[$i] = $temp_time;
	    }
	}
    }


    # sort the start times
    #
    my @sorted_times = sort {$a <=> $b} @times;

    # create a hash mapping from new class indices to old class indices
    #
    my %new_old_map;
    my @flag;
    for (my $j = 0; $j <= $#times; $j++) {
	$flag[$j] = 0;
    }    
    for (my $i = 0; $i <= $#sorted_times; $i++) {	
	for (my $j = 0; $j <= $#times; $j++) {
	    if (($sorted_times[$i] == $times[$j]) && ($flag[$j] == 0)) {
		$new_old_map{$i} = $j;
		$flag[$j] = 1;
		last;
	    }
	}
    }
    
    # create time sorted classes
    #
    for (my $i = 0; $i <= $#{$equiv_classes_ref}; $i++) {	
	
	my @equiv_class;
	my $old_index = $new_old_map{$i};

	my $temp_equiv_class_ref = ${$equiv_classes_ref}[$old_index];
	
	for (my $j = 0; $j <= $#{$temp_equiv_class_ref}; $j++) {
	    
	    my $temp_link = new Link;
	    
	    my $value = ${$equiv_classes_ref}[$old_index][$j]->word;
	    $temp_link->word($value);
	    
	    $value = ${$equiv_classes_ref}[$old_index][$j]->start_time;
	    $temp_link->start_time($value);
	    
	    $value = ${$equiv_classes_ref}[$old_index][$j]->end_time;
	    $temp_link->end_time($value);
	    
	    $value = ${$equiv_classes_ref}[$old_index][$j]->prob;
	    $temp_link->prob($value);
	    
	    # set the equivalence class
	    #
	    push @equiv_class, $temp_link;
	}
	push @{$equiv_classes_cluster_ref}, [@equiv_class];
    }    

    # exit gracefully
    #
    return 1;
}

# subroutine: merge_same_words
#
# arguments:
#  @_ : (\@equiv_classes, \@equiv_classes_cluster)
#
# description: this routine merges same words in each class
#
# return: 0 if error 1 if no error
#
sub merge_same_words ($$) {

    # parse the argument list
    #
    my $equiv_classes_ref = shift @_;
    my $equiv_classes_cluster_ref = shift @_;
    
    if ((${$equiv_classes_ref}[0] eq ""))  {
	print "ERROR: merge_same_words - bad argument list\n";
	return 0;
    }
    
    # merge same words within a class
    #
    for (my $i = 0; $i <= $#{$equiv_classes_ref}; $i++) {	
	
	my @equiv_class;
	my $temp_equiv_class_ref = ${$equiv_classes_ref}[$i];
	
	for (my $j = 0; $j <= $#{$temp_equiv_class_ref}; $j++) {
	    
	    my $temp_link = new Link;
	    
	    my $value = ${$equiv_classes_ref}[$i][$j]->word;
	    $temp_link->word($value);
	    
	    $value = ${$equiv_classes_ref}[$i][$j]->start_time;
	    $temp_link->start_time($value);
	    
	    $value = ${$equiv_classes_ref}[$i][$j]->end_time;
	    $temp_link->end_time($value);
	    
	    $value = ${$equiv_classes_ref}[$i][$j]->prob;
	    $temp_link->prob($value);
	    
	    # set the equivalence class if not present
	    #
	    (merge_links(\@equiv_class, \$temp_link) == 1)
		or isip_die ("ERROR: merge_same_words  - failed to add log probabilities");
	}
	push @{$equiv_classes_cluster_ref}, [@equiv_class];
    }
    
    # exit gracefully
    #
    return 1;
}

# subroutine: merge_links
#
# arguments:
#  @_ : (\@equiv_class, \$link)
#
# description: this routine returns merges the link to one present
#
# return: 0 if error 1 if no error
#
sub merge_links ($$) {

    # parse the argument list
    #
    my $equiv_class_ref = shift @_;
    my $link_ref = shift @_;
    
    if (($$link_ref eq ""))  {
	print "ERROR: merge_links - bad argument list\n";
	return 0;
    }
    
    # local variables
    #
    my $word1 = $$link_ref->word;
    my $prob1 = $$link_ref->prob;
    
    # loop over all links in the class
    #
    my $flag = 0;    
    for (my $j = 0; $j <= $#{$equiv_class_ref}; $j++) {	    

	my $word2 = ${$equiv_class_ref}[$j]->word;    
	my $prob2 = ${$equiv_class_ref}[$j]->prob;
	
	if ($word1 eq $word2) {
	    my $prob;
	    (log_add_log($prob1, $prob2, \$prob) == 1)
		or isip_die ("ERROR: merge_links - failed to add log probabilities");
	    ${$equiv_class_ref}[$j]->prob($prob);
	    $flag = 1;
	}
    }
    
    if ($flag == 0) {
	push @{$equiv_class_ref}, $$link_ref;
    }

    # exit gracefully
    #
    return 1;
}

# subroutine: add_null_arcs
#
# arguments:
#  @_ : (\@equiv_classes, \@equiv_classes_cn)
#
# description: this routine adds a null arc to each equvalence class if
#              the sum of links in each class is less than unity
#
# return: 0 if error 1 if no error
#
sub add_null_arcs ($$) {

    # parse the argument list
    #
    my $equiv_classes_ref = shift @_;
    my $equiv_classes_cn_ref = shift @_;
    
    if ((${$equiv_classes_ref}[0] eq ""))  {
	print "ERROR: add_null_arcs - bad argument list\n";
	return 0;
    }
    
    # merge same words within a class
    #
    for (my $i = 0; $i <= $#{$equiv_classes_ref}; $i++) {	
	
	my @equiv_class;
	my $temp_equiv_class_ref = ${$equiv_classes_ref}[$i];
	my $total_prob = MIN_NEGATIVE_INTEGER ;
	
	for (my $j = 0; $j <= $#{$temp_equiv_class_ref}; $j++) {
	    
	    my $temp_link = new Link;
	    
	    my $value = ${$equiv_classes_ref}[$i][$j]->word;
	    $temp_link->word($value);
	    
	    $value = ${$equiv_classes_ref}[$i][$j]->start_time;
	    $temp_link->start_time($value);
	    
	    $value = ${$equiv_classes_ref}[$i][$j]->end_time;
	    $temp_link->end_time($value);
	    
	    $value = ${$equiv_classes_ref}[$i][$j]->prob;
	    $temp_link->prob($value);
	    
	    my $tmp = $total_prob;
	    (log_add_log($value, $tmp, \$total_prob) == 1)
		or isip_die ("ERROR: add_null_arcs - failed to add log probabilities");

	    # set the equivalence class
	    #
	    push @equiv_class, $temp_link;
	}

	if ($total_prob < 0) {
	    my $temp_link = new Link;	    
	    $temp_link->word(ALIGN_SYMBOL);
	    $temp_link->start_time(0);	   
	    $temp_link->end_time(0);
	    my $temp = log(1 - exp($total_prob));
	    $temp_link->prob($temp);	    
	    push @equiv_class, $temp_link;
	}
	
	push @{$equiv_classes_cn_ref}, [@equiv_class];
    }
    
    # exit gracefully
    #
    return 1;
}

# subroutine: convert_confusion_network_to_lattice
#
# arguments:
#  @_ : (\@equiv_classes, \@attr_name_vertex, \@attr_name_edge, 
#        \$g_ref)
#
# description: this routine converts confusion n/w into a lattice
#
# return: 0 if error 1 if no error
#
sub convert_confusion_network_to_lattice ($$$$) {

    # parse the argument list
    #
    my $equiv_classes_ref = shift @_;
    my @attr_name_vertex = @{shift @_};
    my @attr_name_edge = @{shift @_};
    my $g_ref = shift @_;
    
    if ((${$equiv_classes_ref}[0] eq "") || ($attr_name_vertex[0] eq "") || 
	($attr_name_edge[0] eq ""))  {
	print "ERROR: convert_confusion_network_to_lattice - bad argument list\n";
	return 0;
    }
    
    # loop over all set of classes and create a new set of classes
    #
    my $arc_no = 0;
    my $start_node_no = 0;
    my $end_node_no = 0;
    
    for (my $i = 0; $i <= $#{$equiv_classes_ref}; $i++) {
	
	my $temp_equiv_class_ref = ${$equiv_classes_ref}[$i];
	$end_node_no = $start_node_no + 1;
	
	for (my $j = 0; $j <= $#{$temp_equiv_class_ref}; $j++) {
	    
	    my $word = ${$equiv_classes_ref}[$i][$j]->word;
	    my $prob = ${$equiv_classes_ref}[$i][$j]->prob;
	    
	    # read and set the edge parameters
	    #
	    $$g_ref->set_edge_attribute($start_node_no, $end_node_no, 
					$attr_name_edge[0], $arc_no);
	    $$g_ref->set_edge_attribute($start_node_no, $end_node_no, 
					$attr_name_edge[1], $word);
	    $$g_ref->set_edge_attribute($start_node_no, $end_node_no, 
					$attr_name_edge[5], $prob);
	
	    # set the node time (sequence of equivalence classes) as 
	    # positive integer
	    #
	    $$g_ref->set_vertex_attribute($start_node_no, $attr_name_vertex[0],
					  $i);
	    $$g_ref->set_vertex_attribute($end_node_no, $attr_name_vertex[0],
					  $i+1);
	
	    $arc_no++;
	    $end_node_no++;
	}
	$start_node_no = $end_node_no;
    }
    
    # exit gracefully
    #
    return 1;
}

# subroutine: pick_best_hypo
#
# arguments:
#  @_ : (\@equiv_classes, $hypo_ref)
#
# description: this routine picks the best hypothesis from a confusion n/w
#
# return: 0 if error 1 if no error
#
sub pick_best_hypo ($$) {

    # parse the argument list
    #
    my $equiv_classes_ref = shift @_;
    my $hypo_ref = shift @_;
    
    if ((${$equiv_classes_ref}[0] eq ""))  {
	print "ERROR: pick_best_hypo - bad argument list\n";
	return 0;
    }

    # pick one best hypothesis
    #
    for (my $i = 0; $i <= $#{$equiv_classes_ref}; $i++) {	
	
	# local variables
	#
	my @equiv_class;
	my $temp_equiv_class_ref = ${$equiv_classes_ref}[$i];
	my $best_word;
	my $best_start_time;
	my $best_end_time;
	my $best_prob = MIN_NEGATIVE_INTEGER;
	
	for (my $j = 0; $j <= $#{$temp_equiv_class_ref}; $j++) {
	    
	    my $temp_word = ${$equiv_classes_ref}[$i][$j]->word;
	    my $temp_start_time = ${$equiv_classes_ref}[$i][$j]->start_time;
	    my $temp_end_time = ${$equiv_classes_ref}[$i][$j]->end_time;
	    my $temp_prob = ${$equiv_classes_ref}[$i][$j]->prob;
	    
	    if ($temp_prob > $best_prob) {
		$best_word  = $temp_word;
		$best_start_time = $temp_start_time;
		$best_end_time = $temp_end_time;
		$best_prob = $temp_prob;		
	    }	    
	}
	
	# add !SENT_START
	#
	if ($i == 0) {
	    my @tmp;
	    push (@tmp, 0);
	    push (@tmp, $best_start_time);
	    push (@tmp, "!SENT_START");
	    push (@tmp, 0);
    
	    my $hy = join(" ", @tmp);
	    push (@{$hypo_ref}, $hy);
	}	

	# add the best word in the equivalence class into the hypothesis
	#
	if ($best_word ne ALIGN_SYMBOL) {
	    my @tmp;
	    push (@tmp, $best_start_time);
	    push (@tmp, $best_end_time);
	    push (@tmp, $best_word);
	    push (@tmp, $best_prob);
	    
	    my $hy = join(" ", @tmp);
	    push (@{$hypo_ref}, $hy);
	}
	
	# add !SENT_END
	#
	if ($i == $#{$equiv_classes_ref}) {
	    my @tmp;
	    push (@tmp, $best_end_time);
	    push (@tmp, $best_end_time);
	    push (@tmp, "!SENT_END");
	    push (@tmp, 0);
	    
	    my $hy = join(" ", @tmp);
	    push (@{$hypo_ref}, $hy);
	}	
    }
    
    # exit gracefully
    #
    return 1;
}

# subroutine: write_file_as_lines
#
# arguments:
#  @_ : ($input_file, \@lines)
#
# description: this routine reads a file as lines
#
# return: 0 if error 1 if no error
#
sub write_file_as_lines ($$) {
    
    # parse the argument list
    #
    my $input_file = shift @_;
    my $lines_ref = shift @_;
    
    if (($input_file eq ""))  {
	print "ERROR: write_file_as_lines - bad argument list\n";
	return 0;
    }
    
    # try to open the input file
    #    
    open(fp_write, ">$input_file") or 
	isip_die("ERROR: write_file_as_lines - can't open file $input_file") ;
    
    # write the lines in array ro the file
    #
    print fp_write "\n";
    for (my $i = 0; $i <= $#{$lines_ref}; $i++) {
	print fp_write "${$lines_ref}[$i]\n";
    }

    # close the file
    #
    close(fp_write) or isip_die("ERROR: write_file_as_lines - can't close file $input_file") ;
    
    # exit gracefully
    #
    return 1;
}

# subroutine: prune_lattices
#
# arguments:
#  @_ : ($g_ref, \@attr_name_vertex, \@attr_name_edge, $thresh, \$id)
#
# description: this routine prunes all the links whose posterior score is
#              less than the threshold
#
# return: 0 if error 1 if no error
#
sub prune_lattices ($$$$$) {

    # parse the argument list
    #
    my $g_ref = shift @_;
    my @attr_name_vertex = @{shift @_};
    my @attr_name_edge = @{shift @_};
    my $thresh = shift @_;
    my $id_ref = shift @_;

    if (($$g_ref eq "") || ($attr_name_vertex[0] eq "") || 
	($attr_name_edge[0] eq "") || ($thresh eq ""))  {
	print "ERROR: prune_lattices - bad argument list\n";
	return 0;
    }
    
    # get vertices and edges
    #
    my @E = $$g_ref->edges;
    my @V = $$g_ref->vertices;
    
    # display information
    #
    print "\nINFO: lattice_subs - pruning the lattice for utterance $$id_ref\n";

    # get the edge information
    #
    for (my $i = 0; $i <= $#E; $i++) {
	
	my @edge = @{@E->[$i]};	
	my $prob = $$g_ref->get_edge_attribute($edge[0], $edge[1], 
					       $attr_name_edge[5]);
	if ($prob < $thresh) {
	    $$g_ref->delete_edge($edge[0], $edge[1]);		
	}
    }
    
    # exit gracefully
    #
    return 1;
}

# return 1 to make perl happy
#
return 1;

#
# end of file
