/*=====================================================================
                =======   COPYRIGHT NOTICE   =======
Copyright (C) 1996, Carnegie Mellon University, Cambridge University,
Ronald Rosenfeld and Philip Clarkson.

All rights reserved.

This software is made available for research purposes only.  It may be
redistributed freely for this purpose, in full or in part, provided
that this entire copyright notice is included on any copies of this
software and applications and derivations thereof.

This software is provided on an "as is" basis, without warranty of any
kind, either expressed or implied, as to any matter including, but not
limited to warranty of fitness of purpose, or merchantability, or
results obtained from use of this software.
======================================================================*/


/* Patched together from other header files. Ugly (and not used in
   compiling the toolkit), but makes using toolkit functions in other
   code simpler. */

#include <stdio.h>
#include <stdlib.h>

#ifndef _TOOLKIT_H_
#define _TOOLKIT_H_

#define DEFAULT_N 3
#define DEFAULT_VERBOSITY 2
#define MAX_VOCAB_SIZE 65535

/* The following gives the amount of memory (in MB) which the toolkit
   will assign when allocating big chunks of memory for buffers. Note
   that the more memory that can be allocated, the faster things will
   run, so if you are running these tools on machines with 400 MB of
   RAM, then you could safely triple this figure. */

#define STD_MEM 100
#define DEFAULT_TEMP "/usr/tmp/"

#define VERSION 2.0

typedef unsigned short flag;

#endif
/*=====================================================================
                =======   COPYRIGHT NOTICE   =======
Copyright (C) 1996, Carnegie Mellon University, Cambridge University,
Ronald Rosenfeld and Philip Clarkson.

All rights reserved.

This software is made available for research purposes only.  It may be
redistributed freely for this purpose, in full or in part, provided
that this entire copyright notice is included on any copies of this
software and applications and derivations thereof.

This software is provided on an "as is" basis, without warranty of any
kind, either expressed or implied, as to any matter including, but not
limited to warranty of fitness of purpose, or merchantability, or
results obtained from use of this software.
======================================================================*/


/* Function prototypes for pc library */

#ifndef _PCGEN_H_
#define _PCGEN_H_

int pc_flagarg(int *argc, char **argv, char *flag);

char *pc_stringarg(int *argc, char **argv, char *flag, char *value);

int pc_intarg(int *argc, char **argv, char *flag, int value);

double pc_doublearg(int *argc, char **argv, char *flag, double value);

short *pc_shortarrayarg(int *argc, char **argv, char *flag, int elements, 
			int size);

int *pc_intarrayarg(int *argc, char **argv, char *flag, int elements, 
		    int size);

void pc_message(unsigned short verbosity, 
	       unsigned short priority, 
	       char *msg, ...);

void pc_report_unk_args(int *argc, char **argv, int verbosity);

void report_version(int *argc, char **argv);

#endif


/* GENERAL.H  */
/*=====================================================================
                =======   COPYRIGHT NOTICE   =======
Copyright (C) 1994, Carnegie Mellon University and Ronald Rosenfeld.
All rights reserved.

This software is made available for research purposes only.  It may be
redistributed freely for this purpose, in full or in part, provided
that this entire copyright notice is included on any copies of this
software and applications and derivations thereof.

This software is provided on an "as is" basis, without warranty of any
kind, either expressed or implied, as to any matter including, but not
limited to warranty of fitness of purpose, or merchantability, or
results obtained from use of this software.
======================================================================*/

#ifndef _GENERAL_H_
#define _GENERAL_H_


#define CMU_SLM_VERSION  "CMU SLM Toolkit, Version for internal CMU use"

/* the following should be made machine-dependent */
typedef int   int32;
typedef short int16;

FILE *rr_fopen(char *filename, char *mode);
void *rr_fseek(FILE *fp, int offset, int mode, char *description);
void *rr_fread();
void *rr_fwrite();
char *rr_malloc(size_t n_bytes);
char *rr_calloc(size_t nelem, size_t elsize);
int  rr_filesize(int fd);
int  rr_feof(FILE *fp);
char *salloc(char *str);
int  rr_fexists(char *path);
FILE *rr_iopen(char *path);
void *rr_iclose(FILE *fp);
FILE *rr_oopen(char *path);
void *rr_oclose(FILE *fp);
void parse_line(char *line, int mwords, int canonize,
    char **pword_begin, char **pword_end, int *p_nwords, int *p_overflow);
int quit(int rc, char *msg, ...);

typedef char Boolean;
typedef unsigned short wordid_t;
typedef int    cluster_t;

#ifndef MIN
#define MIN(X,Y)  ( ((X)<(Y)) ? (X) : (Y))
#endif
#ifndef MAX
#define MAX(X,Y)  ( ((X)>(Y)) ? (X) : (Y))
#endif

#define LOG_BASE	9.9995e-5
#define MIN_LOG		-690810000
#define LOG(x) ((x == 0.0) ? MIN_LOG : ((x > 1.0) ?			    \
				 	(int) ((log (x) / LOG_BASE) + 0.5) :\
				 	(int) ((log (x) / LOG_BASE) - 0.5)))
#define EXP(x)  (exp ((double) (x) * LOG_BASE))

#ifdef __alpha
#define SLM_SWAP_BYTES  1    /* reverse byteorder */
#endif

/* the following are for the benefit of vararg-less environments */

#define quit0(rc,msg) {fprintf(stderr,msg); exit(rc);}
#define quit1(rc,msg,i1) {fprintf(stderr,msg,i1); exit(rc);}
#define quit2(rc,msg,i1,i2) {fprintf(stderr,msg,i1,i2); exit(rc);}
#define quit3(rc,msg,i1,i2,i3) {fprintf(stderr,msg,i1,i2,i3); exit(rc);}
#define quit4(rc,msg,i1,i2,i3,i4) {fprintf(stderr,msg,i1,i2,i3,i4); exit(rc);}

#define  MAX_WORDS_PER_DOC 65534

#endif  
/* MIPS_SWAP.H  */
/*=====================================================================
                =======   COPYRIGHT NOTICE   =======
Copyright (C) 1994, Carnegie Mellon University and Ronald Rosenfeld.
All rights reserved.

This software is made available for research purposes only.  It may be
redistributed freely for this purpose, in full or in part, provided
that this entire copyright notice is included on any copies of this
software and applications and derivations thereof.

This software is provided on an "as is" basis, without warranty of any
kind, either expressed or implied, as to any matter including, but not
limited to warranty of fitness of purpose, or merchantability, or
results obtained from use of this software.
======================================================================*/

#ifndef _MIPS_SWAP_H_
#define _MIPS_SWAP_H_


#ifdef SLM_SWAP_BYTES    /* reverse byteorder */

/* the following works even for badly aligned pointers */

#define SWAPFIELD(x) {if     (sizeof(*(x))==sizeof(short)) {SWAPHALF((x))}  \
		      else if (sizeof(*(x))==sizeof(int))   {SWAPWORD((x))}  \
		      else if (sizeof(*(x))==sizeof(double)){SWAPDOUBLE((x))}\
		     }

#define SWAPHALF(x) {char tmp_byte; 			   \
			       tmp_byte = *((char*)(x)+0); \
			*((char*)(x)+0) = *((char*)(x)+1); \
			*((char*)(x)+1) = tmp_byte;        \
		    }
#define SWAPWORD(x) {char tmp_byte; 			   \
			       tmp_byte = *((char*)(x)+0); \
			*((char*)(x)+0) = *((char*)(x)+3); \
			*((char*)(x)+3) = tmp_byte;  	   \
			       tmp_byte = *((char*)(x)+1); \
			*((char*)(x)+1) = *((char*)(x)+2); \
			*((char*)(x)+2) = tmp_byte;        \
		    }

#define SWAPDOUBLE(x) {char tmp_byte; 			   \
			       tmp_byte = *((char*)(x)+0); \
			*((char*)(x)+0) = *((char*)(x)+7); \
			*((char*)(x)+7) = tmp_byte;        \
			       tmp_byte = *((char*)(x)+1); \
			*((char*)(x)+1) = *((char*)(x)+6); \
			*((char*)(x)+6) = tmp_byte;        \
			       tmp_byte = *((char*)(x)+2); \
			*((char*)(x)+2) = *((char*)(x)+5); \
			*((char*)(x)+5) = tmp_byte;        \
			       tmp_byte = *((char*)(x)+3); \
			*((char*)(x)+3) = *((char*)(x)+4); \
			*((char*)(x)+4) = tmp_byte;        \
		    }

#if 0 /* old */
#define SWAPHALF(x) *(short*)(x) = ((0xff   & (*(short*)(x)) >> 8) | \
				    (0xff00 & (*(short*)(x)) << 8))
#define SWAPWORD(x) *(int*)  (x) = ((0xff       & (*(int*)(x)) >> 24) | \
				    (0xff00     & (*(int*)(x)) >>  8) | \
                        	    (0xff0000   & (*(int*)(x)) <<  8) | \
				    (0xff000000 & (*(int*)(x)) << 24))
#define SWAPDOUBLE(x) { int *low  = (int *) (x), \
		            *high = (int *) (x) + 1, temp;\
                        SWAPWORD(low);  SWAPWORD(high);\
                        temp = *low; *low = *high; *high = temp;}
#endif /* old */
#else

#define SWAPFIELD(x)
#define SWAPHALF(x)
#define SWAPWORD(x)
#define SWAPDOUBLE(x)

#endif


#define ALWAYS_SWAPFIELD(x) {\
		      if      (sizeof(*(x))==sizeof(short)) {SWAPHALF((x))}  \
		      else if (sizeof(*(x))==sizeof(int))   {SWAPWORD((x))}  \
		      else if (sizeof(*(x))==sizeof(double)){SWAPDOUBLE((x))}\
		     }

#define ALWAYS_SWAPHALF(x) {char tmp_byte; 			   \
			       tmp_byte = *((char*)(x)+0); \
			*((char*)(x)+0) = *((char*)(x)+1); \
			*((char*)(x)+1) = tmp_byte;        \
		    }
#define ALWAYS_SWAPWORD(x) {char tmp_byte; 			   \
			       tmp_byte = *((char*)(x)+0); \
			*((char*)(x)+0) = *((char*)(x)+3); \
			*((char*)(x)+3) = tmp_byte;  	   \
			       tmp_byte = *((char*)(x)+1); \
			*((char*)(x)+1) = *((char*)(x)+2); \
			*((char*)(x)+2) = tmp_byte;        \
		    }

#define ALWAYS_SWAPDOUBLE(x) {char tmp_byte; 			   \
			       tmp_byte = *((char*)(x)+0); \
			*((char*)(x)+0) = *((char*)(x)+7); \
			*((char*)(x)+7) = tmp_byte;        \
			       tmp_byte = *((char*)(x)+1); \
			*((char*)(x)+1) = *((char*)(x)+6); \
			*((char*)(x)+6) = tmp_byte;        \
			       tmp_byte = *((char*)(x)+2); \
			*((char*)(x)+2) = *((char*)(x)+5); \
			*((char*)(x)+5) = tmp_byte;        \
			       tmp_byte = *((char*)(x)+3); \
			*((char*)(x)+3) = *((char*)(x)+4); \
			*((char*)(x)+4) = tmp_byte;        \
		    }

#endif

/* SIH.H : String-to-Integer Hashing */
/*=====================================================================
                =======   COPYRIGHT NOTICE   =======
Copyright (C) 1994, Carnegie Mellon University and Ronald Rosenfeld.
All rights reserved.

This software is made available for research purposes only.  It may be
redistributed freely for this purpose, in full or in part, provided
that this entire copyright notice is included on any copies of this
software and applications and derivations thereof.

This software is provided on an "as is" basis, without warranty of any
kind, either expressed or implied, as to any matter including, but not
limited to warranty of fitness of purpose, or merchantability, or
results obtained from use of this software.
======================================================================*/

#ifndef _SIH_H_
#define _SIH_H_


typedef struct {
	char *string;	   /* string (input to hash function) */
	int32 intval;	   /* Associated int32 value (output of hash function) */
} sih_slot_t;


typedef struct {
	double  max_occupancy;  /* max. allowed occupancy rate */
	double  growth_ratio;   /* ratio of expansion when above is violated */
	int     warn_on_update; /* print warning if same string is hashed again */
	int	nslots;    	/* # of slots in the hash table */
	int	nentries;	/* # of actual entries */
	sih_slot_t *slots;	/* array of (string,intval) pairs */
} sih_t;


sih_t *sih_create(int initial_size, double max_occupancy, double growth_ratio, int warn_on_update);

void    sih_add(sih_t *ht, char *string, int32 intval);

char sih_lookup(sih_t *ht, char *string, int32 *p_intval);

void *sih_val_write_to_file(sih_t *ht, FILE *fp, char *filename, int verbosity);

void *sih_val_read_from_file(sih_t *ht, FILE *fp, char *filename, int verbosity);

/* Moved to here from read_voc.c by Philip Clarkson March 4, 1997 */

void get_vocab_from_vocab_ht(sih_t *ht, int vocab_size, int verbosity, char ***p_vocab);

/* Added by Philip Clarkson March 4, 1997 */

void read_wlist_into_siht(char *wlist_filename, int verbosity,  
			  sih_t *p_word_id_ht, int *p_n_wlist);

void read_wlist_into_array(char *wlist_filename, int verbosity, 
			   char ***p_wlist, int *p_n_wlist);

#endif 
/*=====================================================================
                =======   COPYRIGHT NOTICE   =======
Copyright (C) 1996, Carnegie Mellon University, Cambridge University,
Ronald Rosenfeld and Philip Clarkson.

All rights reserved.

This software is made available for research purposes only.  It may be
redistributed freely for this purpose, in full or in part, provided
that this entire copyright notice is included on any copies of this
software and applications and derivations thereof.

This software is provided on an "as is" basis, without warranty of any
kind, either expressed or implied, as to any matter including, but not
limited to warranty of fitness of purpose, or merchantability, or
results obtained from use of this software.
======================================================================*/


/* Type and function definitions for general n_gram models */

#ifndef _NGRAM_H_
#define _NGRAM_H_


#define DEFAULT_COUNT_TABLE_SIZE 65535
#define DEFAULT_OOV_FRACTION 0.5
#define DEFAULT_DISC_RANGE_1 1
#define DEFAULT_DISC_RANGE_REST 7
#define DEFAULT_MIN_ALPHA -3.2
#define DEFAULT_MAX_ALPHA 2.5
#define DEFAULT_OUT_OF_RANGE_ALPHAS 10000

#define GOOD_TURING 1
#define ABSOLUTE 2
#define LINEAR 3
#define WITTEN_BELL 4

#define SPECIFIED 1
#define BUFFER 2
#define TWO_PASSES 3

#define KEY 65000

#define CLOSED_VOCAB 0
#define OPEN_VOCAB_1 1
#define OPEN_VOCAB_2 2

typedef unsigned short id__t; /* Double underscore, since id_t is
				 already defined on some platforms */
typedef int count_t;   /* The count as read in, rather than its index 
			  in the count table. */
typedef unsigned short count_ind_t; /* The count's index in the count 
				       table. */
typedef unsigned short bo_weight_t;
typedef unsigned short cutoff_t;
typedef int table_size_t;
typedef unsigned short index__t;
typedef double disc_val_t;
typedef double uni_probs_t;
typedef int ptr_tab_t;
typedef float four_byte_t;


typedef struct {
  unsigned short n;
  id__t          *id_array;
  count_t        count;
} ngram;

typedef struct {
  unsigned short count_table_size;
  int            *counts_array;
} count_table_t;

typedef struct {

  /* Language model type */

  unsigned short n;                /* n=3 for trigram, n=4 for 4-gram etc. */
  int            version;

  /* Vocabulary stuff */

  sih_t          *vocab_ht;      /* Vocabulary hash table */
  unsigned short vocab_size;     /* Vocabulary size */
  char           **vocab;        /* Array of vocabulary words */
  unsigned short no_of_ccs;      /* Number of context cues */

  /* Tree */

  table_size_t   *table_sizes;   /* Pointer to table size array */
  id__t          **word_id;      /* Pointer to array of id lists */
  count_ind_t    **count;        /* Pointer to array of count lists 
				    (actually indices in a count table) */
  count_ind_t    *marg_counts;   /* Array of marginal counts for the 
				    unigrams. The normal unigram counts
				    differ in that context cues have
				    zero counts there, but not here */
  int            **count4;       /* Alternative method of storing the counts,
				    using 4 bytes. Not normally allocated */
  int            *marg_counts4;  /* Ditto */
  bo_weight_t    **bo_weight;    /* Pointer to array of back-off weights */
  four_byte_t    **bo_weight4;   /* Pointer to array of 4 byte
				    back_off weights. Only one of
				    these arrays will be allocated */
  index__t       **ind;          /* Pointer to array of index lists */
  

  /* Two-byte alpha stuff */

  double         min_alpha;      /* The minimum alpha in the table */
  double         max_alpha;      /* The maximum alpha in the table */
  unsigned short out_of_range_alphas;  /* The maximum number of out of range 
					  alphas that we are going to allow. */
  double         *alpha_array;
  unsigned short size_of_alpha_array;

  /* Count table */

  count_ind_t    count_table_size; /* Have same size for each count table */
  count_t        **count_table;    /* Pointer to array of count tables */

  /* Index lookup tables */

  ptr_tab_t      **ptr_table;     /* Pointer to the tables used for compact 
				     representation of the indices */
  unsigned short *ptr_table_size; /* Pointer to array of pointer tables */

  /* Discounting and cutoffs - note: some of these may not used,
     depending on the discounting techinque used. */

  unsigned short discounting_method;     /* See #define stuff at the top of 
					    this file */
  cutoff_t       *cutoffs;               /* Array of cutoffs */
  int            **freq_of_freq;         /* Array of frequency of frequency 
					    information  */
  unsigned short *fof_size;              /* The sizes of the above arrays */
  unsigned short *disc_range;            /* Pointer to array of discounting 
					    ranges - typically will be 
					    fof_size - 1, but can be reduced
					    further if stats are anomolous */
  disc_val_t     **gt_disc_ratio;        /* The discounted values of the 
					    counts */
  disc_val_t     *lin_disc_ratio;        /* The linear discounting ratio */
  double         *abs_disc_const;        /* The constant required for
					    absolute discounting */

  /* Unigram statistics */

  uni_probs_t    *uni_probs;             /* Probs for each unigram */
  uni_probs_t    *uni_log_probs;         /* Log probs for each unigram */
  flag           *context_cue;           /* True if word with this id is
					    a context cue */
  int            n_unigrams;             /* Total number of unigrams in
					    the training data */
  int            min_unicount;           /* Count to which infrequent unigrams
					    will be bumped up */
  /* Input files */

  char           *id_gram_filename;  /* The filename of the id-gram file */
  FILE           *id_gram_fp;        /* The file pointer of the id-gram file */
  char           *vocab_filename;    /* The filename of the vocabulary file */
  char           *context_cues_filename; /* The filename of the context cues 
					    file */
  FILE           *context_cues_fp;       /* The file pointer of the context 
					    cues file */

  /* Output files */

  flag           write_arpa;      /* True if the language model is to be 
				     written out in arpa format */
  char           *arpa_filename;  /* The filaname of the arpa format LM */
  FILE           *arpa_fp;        /* The file of the arpa format LM */
  flag           write_bin;       /* True if the language model is to be 
				     written out in binary format */
  char           *bin_filename;   /* The filaname of the bin format LM */
  FILE           *bin_fp;         /* The file of the bin format LM */

  /* Misc */

  int            *num_kgrams;     /* Array indicating how many 
				     2-grams, ... ,n-grams, have been 
				     processed so far */

  unsigned short vocab_type;      /* see #define stuff at the top */

  unsigned short first_id;        /* 0 if we have open vocab, 1 if we have
				     a closed vocab. */

  /* Once the tree has been constructed, the tables are indexed from 0
     to (num_kgrams[i]-1). */

  /* 1-gram tables are indexed from 0 to ng.vocab_size. */

  double         zeroton_fraction; /* cap on prob(zeroton) as fraction of 
				      P(singleton) */
  double         oov_fraction;
  flag           four_byte_alphas;
  flag           four_byte_counts;

} ng_t;

#endif


/*=====================================================================
                =======   COPYRIGHT NOTICE   =======
Copyright (C) 1996, Carnegie Mellon University, Cambridge University,
Ronald Rosenfeld and Philip Clarkson.

All rights reserved.

This software is made available for research purposes only.  It may be
redistributed freely for this purpose, in full or in part, provided
that this entire copyright notice is included on any copies of this
software and applications and derivations thereof.

This software is provided on an "as is" basis, without warranty of any
kind, either expressed or implied, as to any matter including, but not
limited to warranty of fitness of purpose, or merchantability, or
results obtained from use of this software.
======================================================================*/

/* Function prototypes for evallm */

#ifndef _EVALLM_PROTS_
#define _EVALLM_PROTS_


/* Type specification for forced back-off list */

typedef struct {
  flag backed_off;
  flag inclusive;
} fb_info;

typedef float bo_t;
typedef float prob_t;

/* Type specification for arpa_lm type */

typedef struct {

  unsigned short n;                /* n=3 for trigram, n=4 for 4-gram etc. */

  /* Vocabulary stuff */

  sih_t          *vocab_ht;      /* Vocabulary hash table */
  unsigned short vocab_size;     /* Vocabulary size */
  char           **vocab;        /* Array of vocabulary words */
  flag           *context_cue;   /* True if word with this id is
				    a context cue */
  int            no_of_ccs;      /* The number of context cues in the LM */

  /* Tree */

  table_size_t   *table_sizes;   /* Pointer to table size array */
  id__t          **word_id;      /* Pointer to array of id lists */

  bo_t           **bo_weight;    /* Pointer to array of back-off weights */
  prob_t         **probs;        /* Pointer to array of probabilities */
  index__t       **ind;          /* Pointer to array of index lists */

  /* Index lookup tables */

  ptr_tab_t      **ptr_table;     /* Pointer to the tables used for compact 
				     representation of the indices */
  unsigned short *ptr_table_size; /* Pointer to array of pointer tables */

  /* Misc */

  int            *num_kgrams;     /* Array indicating how many 
				     2-grams, ... ,n-grams, have been 
				     processed so far */

  unsigned short vocab_type;      /* see #define stuff at the top */

  unsigned short first_id;        /* 0 if we have open vocab, 1 if we have
				     a closed vocab. */

} arpa_lm_t;

/* Function prototypes */

unsigned short num_of_types(int k,
			    int ind,
			    ng_t *ng);
void decode_bo_case(int bo_case,
		    int context_length,
		    FILE *annotation_fp);

void display_stats(ng_t *ng);

void display_arpa_stats(arpa_lm_t *arpa_ng);

void load_lm(ng_t *ng,
	     char *lm_filename);

void load_arpa_lm(arpa_lm_t *arpa_lm,
		  char *lm_filename);
		  

void parse_comline(char *input_line,
		  int *num_of_args,
		  char **args);

void compute_perplexity(ng_t *ng,
			arpa_lm_t *arpa_ng,
			char *text_stream_filename,
			char *probs_stream_filename,
			char *annotation_filename,
			char *oov_filename,
			char *fb_list_filename,
			flag backoff_from_unk_inc,
			flag backoff_from_unk_exc,
			flag backoff_from_ccs_inc,
			flag backoff_from_ccs_exc,
			flag arpa_lm,
			flag include_unks,
			double log_base);

fb_info *gen_fb_list(sih_t *vocab_ht,
		     int vocab_size,
		     char **vocab,
		     flag *context_cue,
		     flag backoff_from_unk_inc,
		     flag backoff_from_unk_exc,
		     flag backoff_from_ccs_inc,
		     flag backoff_from_ccs_exc,
		     char *fb_list_filename);

void validate(ng_t *ng,
	      arpa_lm_t *arpa_ng,
	      char **words,
	      flag backoff_from_unk_inc,
	      flag backoff_from_unk_exc,
	      flag backoff_from_ccs_inc,
	      flag backoff_from_ccs_exc,
	      flag arpa_lm,
	      char *fb_list_filename);

double calc_prob_of(id__t sought_word,
		    id__t *context,
		    int context_length,
		    ng_t *ng,
		    arpa_lm_t *arpa_ng,
		    fb_info *fb_list,
		    int *bo_case,
		    int *actual_context_length,
		    flag arpa_lm);

void arpa_bo_ng_prob(int context_length,
		     id__t *sought_ngram,
		     arpa_lm_t *arpa_ng,
		     int verbosity,
		     double *p_prob,
		     int *bo_case);


#endif
/*=====================================================================
                =======   COPYRIGHT NOTICE   =======
Copyright (C) 1996, Carnegie Mellon University, Cambridge University,
Ronald Rosenfeld and Philip Clarkson.

All rights reserved.

This software is made available for research purposes only.  It may be
redistributed freely for this purpose, in full or in part, provided
that this entire copyright notice is included on any copies of this
software and applications and derivations thereof.

This software is provided on an "as is" basis, without warranty of any
kind, either expressed or implied, as to any matter including, but not
limited to warranty of fitness of purpose, or merchantability, or
results obtained from use of this software.
======================================================================*/


/* Function prototypes */

#ifndef _IDNGRAM2LM_H_
#define _IDNGRAM2LM_H_

unsigned short num_of_types(int k,
			    int ind,
			    ng_t *ng);
int get_ngram(FILE *id_ngram_fp,ngram *ng,flag is_ascii);
void calc_mem_req(ng_t *ng,flag is_ascii);
void write_arpa_lm(ng_t *ng,int verbosity);
void write_bin_lm(ng_t *ng,int verbosity);
unsigned short new_index(int full_index,
			 int *ind_table,
			 unsigned short *ind_table_size,
			 int position_in_list);
int get_full_index(unsigned short short_index,
		   int *ind_table,
		   int ind_table_size,
		   int position_in_list);
void compute_gt_discount(int            n,
			 int            *freq_of_freq,
			 int            fof_size,
			 unsigned short *disc_range,
			 int cutoff,
			 int verbosity,
			 disc_val_t **discounted_values);
int lookup_index_of(int *lookup_table, 
		    int lookup_table_size, 
		    int intintval);
void compute_unigram(ng_t *ng,int verbosity);
void compute_back_off(ng_t *ng,int n,int verbosity);
void bo_ng_prob(int context_length,
		id__t *sought_ngram,
		ng_t *ng,
		int verbosity,
		double *p_prob,
		int *bo_case);
void increment_context(ng_t *ng, int k, int verbosity);
unsigned short short_alpha(double long_alpha,
			   double *alpha_array,
			   unsigned short *size_of_alpha_array,
			   int elements_in_range,
			   double min_range,
			   double max_range);

double double_alpha(unsigned short short_alpha,
		    double *alpha_array,
		    int size_of_alpha_array,
		    int elements_in_range,
		    double min_range,
		    double max_range);

void guess_mem(int total_mem,
	       int middle_size,
	       int end_size,
	       int n,
	       table_size_t *table_sizes,
	       int verbosity);

void read_voc(char *filename, int verbosity,   
	      sih_t *p_vocab_ht, char ***p_vocab, 
	      unsigned short *p_vocab_size);

void store_count(flag four_byte_counts,
		 int *count_table,
		 int count_table_size,
		 unsigned short *short_counts,
		 int *long_counts,
		 int position,
		 int count);

int return_count(flag four_byte_counts,
		 int *count_table,
		 unsigned short *short_counts,
		 int *long_counts,
		 int position);

#endif