|
Phonetisaurus
1.0
FST-based Grapheme-to-Phoneme conversion
|
#include <include/M2MFstAligner.h>#include <include/LatticePruner.h>#include <include/util.h>#include <include/PhonetisaurusRex.h>Go to the source code of this file.
Functions | |
| int | load_input_file (M2MFstAligner *aligner, string input_file, string delim, string s1_char_delim, string s2_char_delim, bool init=false) |
| void | write_alignments (M2MFstAligner *aligner, string ofile_name, StdArc::Weight threshold, int nbest, bool fb, bool penalize) |
| void | compileNBestFarArchive (M2MFstAligner *aligner, vector< VectorFst< LogArc > > *fsts, string far_name, StdArc::Weight threshold, int nbest, bool fb, bool penalize) |
| DEFINE_bool (seq1_del, true,"Allow deletions in sequence one.") | |
| DEFINE_bool (seq2_del, true,"Allow deletions in sequence two.") | |
| DEFINE_bool (penalize, true,"Penalize scores.") | |
| DEFINE_bool (penalize_em, false,"Penalize links during EM training.") | |
| DEFINE_bool (load_model, false,"Load a pre-trained model for use.") | |
| DEFINE_bool (lattice, false,"Write out the alignment lattices as an fst archive (.far).") | |
| DEFINE_bool (restrict, true,"Restrict links to M-1, 1-N during initialization.") | |
| DEFINE_bool (mbr, false,"Use the LMBR decoder (not yet implemented).") | |
| DEFINE_bool (fb, false,"Use forward-backward pruning for the alignment lattices.") | |
| DEFINE_int32 (seq1_max, 2,"Maximum subsequence length for sequence one.") | |
| DEFINE_int32 (seq2_max, 2,"Maximum subsequence length for sequence two.") | |
| DEFINE_int32 (iter, 11,"Maximum number of EM iterations to perform.") | |
| DEFINE_int32 (nbest, 1,"Output the N-best alignments given the model.") | |
| DEFINE_string (input,"","Two-column input file to align.") | |
| DEFINE_string (seq1_sep,"|","Multi-token separator for input tokens.") | |
| DEFINE_string (seq2_sep,"|","Multi-token separator for output tokens.") | |
| DEFINE_string (s1s2_sep,"}","Token used to separate input-output subsequences in the g2p model.") | |
| DEFINE_string (delim,"\t","Delimiter separating entry one and entry two in the input file.") | |
| DEFINE_string (eps,"<eps>","Epsilon symbol.") | |
| DEFINE_string (skip,"_","Skip token used to represent null transitions. Distinct from epsilon.") | |
| DEFINE_string (ofile,"","Output file to write the aligned dictionary to.") | |
| DEFINE_string (s1_char_delim,"","Sequence one input delimeter.") | |
| DEFINE_string (s2_char_delim," ","Sequence two input delimeter.") | |
| DEFINE_string (model_file,"","FST-format alignment model to load.") | |
| DEFINE_string (write_model,"","Write out the alignment model in OpenFst format to filename.") | |
| DEFINE_double (thresh, 1e-10,"Delta threshold for EM training termination.") | |
| DEFINE_double (pthresh,-99,"Pruning threshold. Use to prune unlikely N-best candidates when using multiple alignments.") | |
| int | main (int argc, char *argv[]) |
| void compileNBestFarArchive | ( | M2MFstAligner * | aligner, |
| vector< VectorFst< LogArc > > * | fsts, | ||
| string | far_name, | ||
| StdArc::Weight | threshold, | ||
| int | nbest, | ||
| bool | fb, | ||
| bool | penalize | ||
| ) |
Definition at line 146 of file phonetisaurus-align.cc.
| DEFINE_bool | ( | seq1_del | , |
| true | , | ||
| "Allow deletions in sequence one." | |||
| ) |
| DEFINE_bool | ( | seq2_del | , |
| true | , | ||
| "Allow deletions in sequence two." | |||
| ) |
| DEFINE_bool | ( | penalize | , |
| true | , | ||
| "Penalize scores." | |||
| ) |
| DEFINE_bool | ( | penalize_em | , |
| false | , | ||
| "Penalize links during EM training." | |||
| ) |
| DEFINE_bool | ( | load_model | , |
| false | , | ||
| "Load a pre-trained model for use." | |||
| ) |
| DEFINE_bool | ( | lattice | , |
| false | , | ||
| "Write out the alignment lattices as an fst archive (.far)." | |||
| ) |
| DEFINE_bool | ( | restrict | , |
| true | , | ||
| "Restrict links to M- | 1, | ||
| 1-N during initialization." | |||
| ) |
| DEFINE_bool | ( | mbr | , |
| false | , | ||
| "Use the LMBR decoder (not yet implemented)." | |||
| ) |
| DEFINE_bool | ( | fb | , |
| false | , | ||
| "Use forward-backward pruning for the alignment lattices." | |||
| ) |
| DEFINE_double | ( | thresh | , |
| 1e- | 10, | ||
| "Delta threshold for EM training termination." | |||
| ) |
| DEFINE_double | ( | pthresh | , |
| - | 99, | ||
| "Pruning threshold. Use to prune unlikely N-best candidates when using multiple alignments." | |||
| ) |
| DEFINE_int32 | ( | seq1_max | , |
| 2 | , | ||
| "Maximum subsequence length for sequence one." | |||
| ) |
| DEFINE_int32 | ( | seq2_max | , |
| 2 | , | ||
| "Maximum subsequence length for sequence two." | |||
| ) |
| DEFINE_int32 | ( | iter | , |
| 11 | , | ||
| "Maximum number of EM iterations to perform." | |||
| ) |
| DEFINE_int32 | ( | nbest | , |
| 1 | , | ||
| "Output the N-best alignments given the model." | |||
| ) |
| DEFINE_string | ( | input | , |
| "" | , | ||
| "Two-column input file to align." | |||
| ) |
| DEFINE_string | ( | seq1_sep | , |
| "|" | , | ||
| "Multi-token separator for input tokens." | |||
| ) |
| DEFINE_string | ( | seq2_sep | , |
| "|" | , | ||
| "Multi-token separator for output tokens." | |||
| ) |
| DEFINE_string | ( | s1s2_sep | , |
| "}" | , | ||
| "Token used to separate input-output subsequences in the g2p model." | |||
| ) |
| DEFINE_string | ( | delim | , |
| "\t" | , | ||
| "Delimiter separating entry one and entry two in the input file." | |||
| ) |
| DEFINE_string | ( | eps | , |
| "<eps>" | , | ||
| "Epsilon symbol." | |||
| ) |
| DEFINE_string | ( | skip | , |
| "_" | , | ||
| "Skip token used to represent null transitions. Distinct from epsilon." | |||
| ) |
| DEFINE_string | ( | ofile | , |
| "" | , | ||
| "Output file to write the aligned dictionary to." | |||
| ) |
| DEFINE_string | ( | s1_char_delim | , |
| "" | , | ||
| "Sequence one input delimeter." | |||
| ) |
| DEFINE_string | ( | s2_char_delim | , |
| " " | , | ||
| "Sequence two input delimeter." | |||
| ) |
| DEFINE_string | ( | model_file | , |
| "" | , | ||
| "FST-format alignment model to load." | |||
| ) |
| DEFINE_string | ( | write_model | , |
| "" | , | ||
| "Write out the alignment model in OpenFst format to filename." | |||
| ) |
| int load_input_file | ( | M2MFstAligner * | aligner, |
| string | input_file, | ||
| string | delim, | ||
| string | s1_char_delim, | ||
| string | s2_char_delim, | ||
| bool | init = false |
||
| ) |
Definition at line 39 of file phonetisaurus-align.cc.
| int main | ( | int | argc, |
| char * | argv[] | ||
| ) |
Definition at line 263 of file phonetisaurus-align.cc.
| void write_alignments | ( | M2MFstAligner * | aligner, |
| string | ofile_name, | ||
| StdArc::Weight | threshold, | ||
| int | nbest, | ||
| bool | fb, | ||
| bool | penalize | ||
| ) |
Definition at line 75 of file phonetisaurus-align.cc.
1.8.11