Phonetisaurus
1.0
FST-based Grapheme-to-Phoneme conversion
|
#include <include/M2MFstAligner.h>
#include <include/LatticePruner.h>
#include <include/util.h>
#include <include/PhonetisaurusRex.h>
Go to the source code of this file.
Functions | |
int | load_input_file (M2MFstAligner *aligner, string input_file, string delim, string s1_char_delim, string s2_char_delim, bool init=false) |
void | write_alignments (M2MFstAligner *aligner, string ofile_name, StdArc::Weight threshold, int nbest, bool fb, bool penalize) |
void | compileNBestFarArchive (M2MFstAligner *aligner, vector< VectorFst< LogArc > > *fsts, string far_name, StdArc::Weight threshold, int nbest, bool fb, bool penalize) |
DEFINE_bool (seq1_del, true,"Allow deletions in sequence one.") | |
DEFINE_bool (seq2_del, true,"Allow deletions in sequence two.") | |
DEFINE_bool (penalize, true,"Penalize scores.") | |
DEFINE_bool (penalize_em, false,"Penalize links during EM training.") | |
DEFINE_bool (load_model, false,"Load a pre-trained model for use.") | |
DEFINE_bool (lattice, false,"Write out the alignment lattices as an fst archive (.far).") | |
DEFINE_bool (restrict, true,"Restrict links to M-1, 1-N during initialization.") | |
DEFINE_bool (mbr, false,"Use the LMBR decoder (not yet implemented).") | |
DEFINE_bool (fb, false,"Use forward-backward pruning for the alignment lattices.") | |
DEFINE_int32 (seq1_max, 2,"Maximum subsequence length for sequence one.") | |
DEFINE_int32 (seq2_max, 2,"Maximum subsequence length for sequence two.") | |
DEFINE_int32 (iter, 11,"Maximum number of EM iterations to perform.") | |
DEFINE_int32 (nbest, 1,"Output the N-best alignments given the model.") | |
DEFINE_string (input,"","Two-column input file to align.") | |
DEFINE_string (seq1_sep,"|","Multi-token separator for input tokens.") | |
DEFINE_string (seq2_sep,"|","Multi-token separator for output tokens.") | |
DEFINE_string (s1s2_sep,"}","Token used to separate input-output subsequences in the g2p model.") | |
DEFINE_string (delim,"\t","Delimiter separating entry one and entry two in the input file.") | |
DEFINE_string (eps,"<eps>","Epsilon symbol.") | |
DEFINE_string (skip,"_","Skip token used to represent null transitions. Distinct from epsilon.") | |
DEFINE_string (ofile,"","Output file to write the aligned dictionary to.") | |
DEFINE_string (s1_char_delim,"","Sequence one input delimeter.") | |
DEFINE_string (s2_char_delim," ","Sequence two input delimeter.") | |
DEFINE_string (model_file,"","FST-format alignment model to load.") | |
DEFINE_string (write_model,"","Write out the alignment model in OpenFst format to filename.") | |
DEFINE_double (thresh, 1e-10,"Delta threshold for EM training termination.") | |
DEFINE_double (pthresh,-99,"Pruning threshold. Use to prune unlikely N-best candidates when using multiple alignments.") | |
int | main (int argc, char *argv[]) |
void compileNBestFarArchive | ( | M2MFstAligner * | aligner, |
vector< VectorFst< LogArc > > * | fsts, | ||
string | far_name, | ||
StdArc::Weight | threshold, | ||
int | nbest, | ||
bool | fb, | ||
bool | penalize | ||
) |
Definition at line 146 of file phonetisaurus-align.cc.
DEFINE_bool | ( | seq1_del | , |
true | , | ||
"Allow deletions in sequence one." | |||
) |
DEFINE_bool | ( | seq2_del | , |
true | , | ||
"Allow deletions in sequence two." | |||
) |
DEFINE_bool | ( | penalize | , |
true | , | ||
"Penalize scores." | |||
) |
DEFINE_bool | ( | penalize_em | , |
false | , | ||
"Penalize links during EM training." | |||
) |
DEFINE_bool | ( | load_model | , |
false | , | ||
"Load a pre-trained model for use." | |||
) |
DEFINE_bool | ( | lattice | , |
false | , | ||
"Write out the alignment lattices as an fst archive (.far)." | |||
) |
DEFINE_bool | ( | restrict | , |
true | , | ||
"Restrict links to M- | 1, | ||
1-N during initialization." | |||
) |
DEFINE_bool | ( | mbr | , |
false | , | ||
"Use the LMBR decoder (not yet implemented)." | |||
) |
DEFINE_bool | ( | fb | , |
false | , | ||
"Use forward-backward pruning for the alignment lattices." | |||
) |
DEFINE_double | ( | thresh | , |
1e- | 10, | ||
"Delta threshold for EM training termination." | |||
) |
DEFINE_double | ( | pthresh | , |
- | 99, | ||
"Pruning threshold. Use to prune unlikely N-best candidates when using multiple alignments." | |||
) |
DEFINE_int32 | ( | seq1_max | , |
2 | , | ||
"Maximum subsequence length for sequence one." | |||
) |
DEFINE_int32 | ( | seq2_max | , |
2 | , | ||
"Maximum subsequence length for sequence two." | |||
) |
DEFINE_int32 | ( | iter | , |
11 | , | ||
"Maximum number of EM iterations to perform." | |||
) |
DEFINE_int32 | ( | nbest | , |
1 | , | ||
"Output the N-best alignments given the model." | |||
) |
DEFINE_string | ( | input | , |
"" | , | ||
"Two-column input file to align." | |||
) |
DEFINE_string | ( | seq1_sep | , |
"|" | , | ||
"Multi-token separator for input tokens." | |||
) |
DEFINE_string | ( | seq2_sep | , |
"|" | , | ||
"Multi-token separator for output tokens." | |||
) |
DEFINE_string | ( | s1s2_sep | , |
"}" | , | ||
"Token used to separate input-output subsequences in the g2p model." | |||
) |
DEFINE_string | ( | delim | , |
"\t" | , | ||
"Delimiter separating entry one and entry two in the input file." | |||
) |
DEFINE_string | ( | eps | , |
"<eps>" | , | ||
"Epsilon symbol." | |||
) |
DEFINE_string | ( | skip | , |
"_" | , | ||
"Skip token used to represent null transitions. Distinct from epsilon." | |||
) |
DEFINE_string | ( | ofile | , |
"" | , | ||
"Output file to write the aligned dictionary to." | |||
) |
DEFINE_string | ( | s1_char_delim | , |
"" | , | ||
"Sequence one input delimeter." | |||
) |
DEFINE_string | ( | s2_char_delim | , |
" " | , | ||
"Sequence two input delimeter." | |||
) |
DEFINE_string | ( | model_file | , |
"" | , | ||
"FST-format alignment model to load." | |||
) |
DEFINE_string | ( | write_model | , |
"" | , | ||
"Write out the alignment model in OpenFst format to filename." | |||
) |
int load_input_file | ( | M2MFstAligner * | aligner, |
string | input_file, | ||
string | delim, | ||
string | s1_char_delim, | ||
string | s2_char_delim, | ||
bool | init = false |
||
) |
Definition at line 39 of file phonetisaurus-align.cc.
int main | ( | int | argc, |
char * | argv[] | ||
) |
Definition at line 263 of file phonetisaurus-align.cc.
void write_alignments | ( | M2MFstAligner * | aligner, |
string | ofile_name, | ||
StdArc::Weight | threshold, | ||
int | nbest, | ||
bool | fb, | ||
bool | penalize | ||
) |
Definition at line 75 of file phonetisaurus-align.cc.