32 #include <fst/fstlib.h> 40 typedef unordered_map<int, vector<PathData> >
RMAP;
43 const SymbolTable* osyms) {
44 for (
int i = 0; i < results.size (); i++) {
45 cout << FLAGS_word <<
"\t";
46 cout << results [i].PathWeight <<
"\t";
47 for (
int j = 0; j < results [i].Uniques.size (); j++) {
48 cout << osyms->Find (results [i].Uniques [j]);
49 if (j < results [i].Uniques.size () - 1)
57 int FLAGS_beam,
int FLAGS_nbest,
bool FLAGS_reverse,
58 string FLAGS_skip,
double FLAGS_thresh,
string FLAGS_gsep,
59 bool FLAGS_write_fsts) {
60 for (
int i = 0; i < corpus.size (); i++) {
61 vector<PathData> results = decoder.
Phoneticize (corpus [i], FLAGS_nbest,
62 FLAGS_beam, FLAGS_thresh,
69 int FLAGS_beam,
int FLAGS_nbest,
70 bool FLAGS_reverse,
string FLAGS_skip,
71 double FLAGS_thresh,
string FLAGS_gsep,
72 bool FLAGS_write_fsts,
int FLAGS_threads) {
73 int csize = corpus.size ();
77 #pragma omp parallel for 78 for (
int x = 0; x < FLAGS_threads; x++) {
82 int start = x * (csize / FLAGS_threads);
83 int end = (x == FLAGS_threads - 1) ? csize \
84 : start + (csize / FLAGS_threads);
85 for (
int i = start; i < end; i++) {
86 vector<PathData> results = decoder.
Phoneticize (corpus [i], FLAGS_nbest,
87 FLAGS_beam, FLAGS_thresh,
93 for (
int i = 0; i < csize; i++) {
94 const vector<PathData> results = rmap [i];
101 DEFINE_string (wordlist,
"",
"Input wordlist to phoneticize");
104 DEFINE_int32 (nbest, 1,
"N-best hypotheses to output.");
106 DEFINE_int32 (threads, 1,
"Number of parallel threads.");
107 DEFINE_double (thresh, 99.0,
"N-best comparison threshold.");
108 DEFINE_bool (write_fsts,
false,
"Write the output FSTs for debugging.");
109 DEFINE_bool (reverse,
false,
"Reverse input word.");
111 int main (
int argc,
char* argv []) {
112 string usage =
"phonetisaurus-g2pfst - joint N-gram decoder.\n\n Usage: ";
113 set_new_handler (FailedNewHandler);
116 if (FLAGS_model.compare (
"") == 0) {
117 cerr <<
"You must supply an FST model to --model" << endl;
120 std::ifstream model_ifp (FLAGS_model);
121 if (!model_ifp.good ()) {
122 cout <<
"Failed to open --model file '" 123 << FLAGS_model <<
"'" << endl;
129 bool use_wordlist =
false;
130 if (FLAGS_wordlist.compare (
"") != 0) {
131 std::ifstream wordlist_ifp (FLAGS_wordlist);
132 if (!wordlist_ifp.good ()) {
133 cout <<
"Failed to open --wordlist file '" 134 << FLAGS_wordlist <<
"'" << endl;
141 if (FLAGS_wordlist.compare (
"") == 0 && FLAGS_word.compare (
"") == 0) {
142 cout <<
"Either --wordlist or --word must be set!" << endl;
146 omp_set_num_threads (FLAGS_threads);
150 if (use_wordlist ==
true) {
151 vector<string> corpus;
154 if (FLAGS_threads > 1) {
155 cout <<
"TODO: Current OpenMP parallel output is non-deterministic." << endl;
165 FLAGS_reverse, FLAGS_skip, FLAGS_thresh,
166 FLAGS_gsep, FLAGS_write_fsts);
170 vector<PathData> results = decoder.
Phoneticize (FLAGS_word, FLAGS_nbest,
171 FLAGS_beam, FLAGS_thresh,
void EvaluateWordlist(PhonetisaurusScript &decoder, vector< string > corpus, int FLAGS_beam, int FLAGS_nbest, bool FLAGS_reverse, string FLAGS_skip, double FLAGS_thresh, string FLAGS_gsep, bool FLAGS_write_fsts)
vector< PathData > Phoneticize(const string &word, int nbest=1, int beam=10000, float threshold=99, bool write_fsts=false)
void PrintPathData(const vector< PathData > &results, string FLAGS_word, const SymbolTable *osyms)
void ThreadedEvalaateWordlist(string FLAGS_model, vector< string > corpus, int FLAGS_beam, int FLAGS_nbest, bool FLAGS_reverse, string FLAGS_skip, double FLAGS_thresh, string FLAGS_gsep, bool FLAGS_write_fsts, int FLAGS_threads)
const SymbolTable * osyms_
A wrapper class encapsulating the FST G2P decoder.
void PhonetisaurusSetFlags(const char *usage, int *argc, char ***argv, bool remove_flags)
DEFINE_string(model,"","Input FST G2P model.")
DEFINE_int32(nbest, 1,"N-best hypotheses to output.")
int main(int argc, char *argv[])
unordered_map< int, vector< PathData > > RMAP
DEFINE_double(thresh, 99.0,"N-best comparison threshold.")
void LoadWordList(const std::string &filename, std::vector< std::string > *corpus)
DEFINE_bool(write_fsts, false,"Write the output FSTs for debugging.")