#ifndef SLM_H
#define SLM_H

#ifdef __cplusplus
extern "C" {
#endif

#define ENABLE_LONGID 1
#ifndef INTSIZE_H
#define INTSIZE_H
typedef unsigned short uint2;
typedef unsigned int uint4;
typedef int int4;
typedef long ptr_int;
typedef unsigned long u_ptr_int;
#endif
#ifndef LIBS_H
#define LIBS_H

#ifdef DEBUG_MALLOC
#include <gc.h>
#define Malloc(size) GC_debug_malloc(size,__FILE__,__LINE__)
#define Free(ptr)   GC_debug_free(ptr)
#define Realloc(ptr,size) GC_debug_realloc(ptr,size,__FILE__,__LINE__)
#else
void* SLM_my_malloc(size_t,char*,int);
void* SLM_my_realloc(void*,size_t,char*,int);
void SLM_my_free(void*,char*,int);
#define Malloc(size) SLM_my_malloc(size,__FILE__,__LINE__)
#define Realloc(ptr,size) SLM_my_realloc(ptr,size,__FILE__,__LINE__)
#define Free(size) SLM_my_free(size,__FILE__,__LINE__)
#endif

#define New(type)	((type*)Malloc(sizeof(type)))
#define NewAtom(type)	((type*)Malloc(sizeof(type)))
#define New_N(type,n)	((type*)Malloc((n)*sizeof(type)))
#define NewAtom_N(type,n)	((type*)Malloc((n)*sizeof(type)))
#define New_Reuse(type,ptr,n)   ((type*)Realloc((ptr),(n)*sizeof(type)))

#define GC_strdup(s) strdup(s)

#endif
#ifndef SLM_IO_H
#define SLM_IO_H

#include <stdio.h>

#ifdef HAVE_ZLIB
#include <zlib.h>

#define FH_UNGETC_BUFSIZE 16
typedef struct filehandle {
    gzFile f;
    char ungetc_buf[FH_UNGETC_BUFSIZE];
    unsigned int ugbuf_ptr;
} *FILEHANDLE;

FILEHANDLE z_open(const char* file, const char* mode);
void z_close(FILEHANDLE gzf);
#define z_write(ptr,elsize,nelem,stream) gzwrite((stream)->f,ptr,(elsize)*(nelem))
#define z_read(ptr,elsize,nelem,stream) gzwrite((stream)->f,ptr,(elsize)*(nelem))
#define z_eof(gzf) gzeof((gzf)->f)
#define z_gets(ptr, len, stream)  gzgets((stream)->f,ptr,len)
#define z_getc(gzf) (((gzf)->ugbuf_ptr > 0)?(gzf)->ungetc_buf[--(gzf)->ugbuf_ptr]:gzgetc((gzf)->f))
#define z_ungetc(c,f) (((f)->ugbuf_ptr<FH_UNGETC_BUFSIZE)?(f)->ungetc_buf[(f)->ugbuf_ptr++] = (c):(c))

#else
typedef struct filehandle {
    unsigned flag;
    FILE *f;
} *FILEHANDLE;

#define FH_FILE 0
#define FH_PIPE 1

#define FH_READ     0
#define FH_WRITE    2
#define FH_APPEND   4

#define FH_OPEN     8

FILEHANDLE z_open(char *file,char *mode);
void z_close(FILEHANDLE fh);
#define z_write(ptr,elsize,nelem,stream) fwrite(ptr,elsize,nelem,(stream)->f)
#define z_read(ptr,elsize,nelem,stream) fread(ptr,elsize,nelem,(stream)->f)
#define z_getc(stream) fgetc((stream)->f)
#define z_ungetc(c,stream) ungetc(c,(stream)->f)
#define z_eof(stream) feof((stream)->f)
#define z_gets(ptr, len, stream)  fgets(ptr,len,(stream)->f)

#endif /* not HAVE_ZLIB */

/* common subroutines */

FILEHANDLE FILEIO_stdin();
FILEHANDLE FILEIO_stdout();
FILEHANDLE FILEIO_stderr();
void z_printf(FILEHANDLE f, char *fmt,...);
int z_getint(FILEHANDLE f, int *iptr);
int z_getlong(FILEHANDLE f, int4 *iptr);
int z_getulong(FILEHANDLE f, uint4 *iptr);
int z_getushort(FILEHANDLE f, unsigned short *iptr);
int z_getfloat(FILEHANDLE f, float *iptr);
int z_getdouble(FILEHANDLE f, double *iptr);
int z_getstr(FILEHANDLE f, char *ptr, int limit);

#endif
#ifndef SLM_HASH_H
#define SLM_HASH_H

/*
 * A Hash Table using closed hash technique.
 *
 * by A. Ito
 * August 30, 2000
 */
#include <string.h>

typedef struct {
    void *keyptr;
    void *valueptr;
} SLMHashTableElement;

typedef u_ptr_int (*SLMHashFuncT)(void*);
typedef int (*SLMCompFuncT)(void*,void*);

typedef struct {
    int size;
    int nelem;
    short key_incr;
    SLMHashFuncT hashfunc;
    SLMCompFuncT compar;
    SLMHashTableElement *body;
} SLMHashTable;

SLMHashTable *SLMHashCreate(int size, SLMHashFuncT hashfunc, SLMCompFuncT comp);
void SLMHashDestroy(SLMHashTable *ht);
SLMHashTableElement *SLMHashSearch(SLMHashTable *hash, void *key);
void SLMHashInsert(SLMHashTable *hash, void *key, void *value);

u_ptr_int SLMHash4String(void*);

#define SLMHashCreateSI(size) SLMHashCreate(size,SLMHash4String,(SLMCompFuncT)strcmp)

ptr_int SLMIntHashSearch(SLMHashTable *ht, char *key);

#define SLMIntHashInsert(ht, key,value) SLMHashInsert(ht,key,(void*)(ptr_int)value)

#endif
#ifndef SLM_NGRAM_H
#define SLM_NGRAM_H

#undef NG_CACHE  /* use cache to expedite n-gram search */

#define MAX_GRAM 20 /* 20-gram max */
#define SLM_DEFAULT_DELIMITER '+'

#ifdef ENABLE_LONGID
typedef uint4 SLMWordID;
#define SLMhtonID(x) htonl(x)
#define SLMntohID(x) ntohl(x)
#define SLM_NONWORD 0xffffffff   /* not a word id */
#define SLMWordID_FMT "%lu"
#else
typedef uint2 SLMWordID;
#define SLMhtonID(x) htons(x)
#define SLMntohID(x) ntohs(x)
#define SLM_NONWORD 0xffff   /* not a word id */
#define SLMWordID_FMT "%hu"
#endif
typedef SLMWordID *SLMWordTuple;


typedef struct {
    uint4 count;
    SLMWordID *word_id;
} SLMNgramCount;

/* N-gram tree leaf */
typedef struct {
    SLMWordID id;
    float prob;
} SLMNgramLeaf;

/* N-gram tree node */
/* An n-gram tree is expressed as N arrays: node[0], node[1],...
   unigram probs are stored in node[0], bigram in node[1], and so on.
   The probs for the longest N are stored in the array `leaf', not `node'.

   The members id, prob and alpha have the word ID, probability and
   back-off weight respectively. nextpos denotes the index of the first
   node/leaf in the next-level array. nelem is number of child of the node.
*/
typedef struct {
    SLMWordID id;
    float prob;
    float alpha;
    int nextpos;
    unsigned int nelem;
} SLMNgramNode;


#ifdef NG_CACHE
/* N-gram search history */
typedef struct {
    SLMWordID id;
    SLMNgramNode *node;
} SLMNgramSearchHist;
#endif

/* N-gram structure */
typedef struct SLM_Ngram {
    short type;                      /* type of N-gram */
    unsigned char first_id;        /* closed:1 open:0 */
    unsigned char first_class_id;  /* closed:1 open:0 */
    int ngram_len;                 /* unigram=1 bigram=2 trigram=3...*/
    int context_len;               /* length of the context words. Usually
				      it is ngram_len-1, but it varies
				      in the case of distant-bigram. */
    int n_unigram;                 /* # of word for word-ngram, */
                                   /* # of class for class-ngram */
    char **vocab;                  /* array of vocabulary words */
    SLMHashTable *vocab_ht;        /* hash table of vocab */
    char delimiter;                /* delimiter between word and class */
    char **class_sym;              /* array of class symbols */
    SLMHashTable *class_ht;        /* hash table of class */
    int n_word;                    /* # of word: used for class-ngram */
    float *c_uniprob;              /* array of P(w|c) for each w      */
    SLMWordID *class_id;           /* class number of a word  */
    SLMNgramNode **node;           /* N-gram tree node */
    SLMNgramLeaf *leaf;            /* N-gram tree leaf */
    /* following members are used for combined model */
    char *filename;                /* filename of LM */
    float weight;                  /* weight of this model */
    struct SLM_Ngram *next_lm;     /* next LM */
    struct SLM_Ngram *delegate;    /* If this model is a mirror of another */
                                   /* model, this member points that */
#ifdef NG_CACHE
    SLMNgramSearchHist *hist;      /* previously searched ngram */
#endif
    int socket;                    /* socket for remote model */
} SLMNgram;

typedef struct {
    unsigned char len;  /* length of the eveluated n-gram */
    char hit[MAX_GRAM]; /* hit status */
    float ng_prob;      /* P(w|w') for word n-gram, P(c|c') for class n-gram */
    float ug_prob;      /* P(w|c) for class n-gram */
} SLMBOStatus;

#define SLMNewWordTuple(len) New_N(SLMWordID,len)
#define SLMNewWordTuple_N(n,len) New_N(SLMWordID,(len)*(n))
SLMNgramCount *SLMNewNgramCount(int len);
SLMNgramCount *SLMNewNgramCount_N(int n,int len);

SLMWordTuple SLMDupTuple(SLMWordTuple t, int len);
SLMNgramCount *SLMReadNgramCount(int ngram_len, FILEHANDLE inf,SLMNgramCount *base,int ascii_in);
int SLMCompareNgramCount(int ngram_len, SLMNgramCount *p1, SLMNgramCount *p2);
void SLMPrintNgramCount(int ngram_len,SLMNgramCount *ngc, FILEHANDLE outf,int ascii_out);
void SLMMergeIDNgram(int ngram_len, char **list, int nlist, FILEHANDLE outf, int ascii_in, int ascii_out);
void SLMMixIDNgram(int ngram_len, char **list, double *weight, int nlist, FILEHANDLE outf, int ascii_in, int ascii_out);

#define SLM_LM_ARPA      0
#define SLM_LM_BINGRAM   1
#define SLM_LM_BINARY    2

SLMNgram *SLMReadLM(char *filename,int format,int verbosity);
void SLMFreeLM(SLMNgram *ng);
#ifdef USE_WHEN_APPEARED
void SLMAddLM(SLMNgram *ng, int len, double weight, int use_when_appeared, char *filename,int format,int verbosity);
#else
void SLMAddLM(SLMNgram *ng, int len, double weight, char *filename,int format,int verbosity);
#endif
SLMWordID SLMWord2ID(SLMNgram *ng, char *word);
double SLMGetBOProb(SLMNgram *ng, int len, SLMWordID *idarray, SLMBOStatus *status);
void SLMBOStatusString(SLMBOStatus *status, char *buf);
int SLMVocabSize(SLMNgram *ng);
int SLMContextLength(SLMNgram *ng);
int SLMNgramLength(SLMNgram *ng);
const char *SLMID2Word(SLMNgram *ng, SLMWordID id);
double SLMl2d(int4 x);
int4 SLMd2l(double x);

#define SLM_STAT_HIT           3
#define SLM_STAT_BO_WITH_ALPHA 1
#define SLM_STAT_BO            0

#define SLM_BINLM_HEADER_SIZE 512
#define SLM_BINLM_HEADER_SIZE_V1 512
#define SLM_BINLM_HEADER_SIZE_V2 510

#define SLM_BINLM_HEADER_MSG_V1 "Palmkit binary format v.1"
#define SLM_BINLM_HEADER_MSG_V2 "Palmkit binary format v.2"
#endif

SLMHashTable *readContextCues(SLMNgram *ng, char *file);


#ifdef __cplusplus
}
#endif
#endif /* SLM_H */
