1 #include <fst/fstlib.h> 14 typedef unordered_map<int, SimpleResult>
RMAP;
19 int FLAGS_threads,
int FLAGS_beam,
20 int FLAGS_kmax,
int FLAGS_nbest,
21 bool FLAGS_reverse,
string FLAGS_gpdelim,
22 string FLAGS_gdelim,
string FLAGS_skip,
23 double FLAGS_thresh,
string FLAGS_gsep) {
24 int csize = corpus.size ();
26 #pragma omp parallel for 27 for (
int x = 0; x < FLAGS_threads; x++) {
30 int start = x * (csize / FLAGS_threads);
31 int end = (x == FLAGS_threads - 1) ? csize \
32 : start + (csize / FLAGS_threads);
33 for (
int i = start; i < end; i++) {
36 if (FLAGS_reverse ==
true)
37 reverse (graphemes.begin (), graphemes.end ());
39 graphemes.push_back (
"</s>");
41 decoder.Decode (graphemes, FLAGS_beam, FLAGS_kmax,
42 FLAGS_nbest, FLAGS_thresh, FLAGS_gpdelim,
43 FLAGS_gdelim, FLAGS_skip);
48 for (
int i = 0; i < csize; i++) {
52 cout << result.
word <<
"\t" << result.
scores [k] <<
"\t" 59 int FLAGS_kmax,
int FLAGS_nbest,
bool FLAGS_reverse,
60 string FLAGS_gpdelim,
string FLAGS_gdelim,
61 string FLAGS_skip,
double FLAGS_thresh,
65 for (
int i = 0; i < corpus.size (); i++) {
68 if (FLAGS_reverse ==
true)
69 reverse (graphemes.begin (), graphemes.end ());
71 graphemes.push_back (
"</s>");
74 decoder.Decode (graphemes, FLAGS_beam, FLAGS_kmax,
75 FLAGS_nbest, FLAGS_thresh, FLAGS_gpdelim,
76 FLAGS_gdelim, FLAGS_skip);
79 cout << result.
word <<
"\t" << result.
scores [k] <<
"\t" 85 int FLAGS_beam,
int FLAGS_kmax,
int FLAGS_nbest,
86 bool FLAGS_reverse,
string FLAGS_gpdelim,
87 string FLAGS_gdelim,
string FLAGS_skip,
88 double FLAGS_thresh,
string FLAGS_gsep) {
92 if (FLAGS_reverse ==
true)
93 reverse (graphemes.begin (), graphemes.end ());
94 graphemes.push_back (
"</s>");
98 decoder.Decode (graphemes, FLAGS_beam, FLAGS_kmax,
99 FLAGS_nbest, FLAGS_thresh, FLAGS_gpdelim,
100 FLAGS_gdelim, FLAGS_skip);
103 cout << result.
word <<
"\t" << result.
scores [k] <<
"\t" 108 DEFINE_string (wordlist,
"",
"Input word list to evaluate.");
110 DEFINE_string (gdelim,
"|",
"The default multigram delimiter.");
111 DEFINE_string (gpdelim,
"}",
"The default grapheme / phoneme delimiter.");
112 DEFINE_string (gsep,
"",
"The default grapheme delimiter for testing. Typically ''.");
114 DEFINE_int32 (nbest, 1,
"Maximum number of hypotheses to return.");
115 DEFINE_int32 (threads, 1,
"Number of parallel threads (OpenMP).");
116 DEFINE_int32 (kmax, 20,
"State-local maximum queue size.");
118 DEFINE_double (thresh, 0.0,
"The n-best pruning threshold. Relative to 1-best.");
119 DEFINE_bool (reverse,
false,
"Reverse the input word before decoding.");
121 int main (
int argc,
char* argv []) {
122 string usage =
"phonetisaurus-g2prnn --rnnlm=test.rnnlm "\
123 "--wordlist=test.words --nbest=5\n\n Usage: ";
124 set_new_handler (FailedNewHandler);
127 if (FLAGS_rnnlm.compare (
"") == 0) {
128 cout <<
"--rnnlm model is required!" << endl;
131 std::ifstream rnnlm_ifp (FLAGS_rnnlm);
132 if (!rnnlm_ifp.good ()) {
133 cout <<
"Faile to open --rnnlm file '" 134 << FLAGS_rnnlm <<
"'" << endl;
139 bool use_wordlist =
false;
140 if (FLAGS_wordlist.compare (
"") != 0) {
141 std::ifstream wordlist_ifp (FLAGS_wordlist);
142 if (!wordlist_ifp.good ()) {
143 cout <<
"Failed to open --wordlist file '" 144 << FLAGS_wordlist <<
"'" << endl;
151 if (FLAGS_wordlist.compare (
"") == 0 && FLAGS_word.compare (
"") == 0) {
152 cout <<
"Either --wordlist or --word must be set!" << endl;
156 omp_set_num_threads (FLAGS_threads);
158 vector<string> corpus;
168 if (use_wordlist ==
true) {
169 if (FLAGS_threads > 1) {
171 FLAGS_beam, FLAGS_kmax, FLAGS_nbest,
172 FLAGS_reverse, FLAGS_gpdelim,
173 FLAGS_gdelim, FLAGS_skip,
174 FLAGS_thresh, FLAGS_gsep);
177 FLAGS_kmax, FLAGS_nbest, FLAGS_reverse,
178 FLAGS_gpdelim, FLAGS_gdelim, FLAGS_skip,
179 FLAGS_thresh, FLAGS_gsep);
183 FLAGS_nbest, FLAGS_reverse, FLAGS_gpdelim,
184 FLAGS_gdelim, FLAGS_skip, FLAGS_thresh, FLAGS_gsep);
DEFINE_int32(nbest, 1,"Maximum number of hypotheses to return.")
DEFINE_string(rnnlm,"","The input RnnLM model.")
void ThreadedEvaluateWordlist(vector< string > &corpus, RMAP &rmap, LegacyRnnLMHash &h, Decodable &s, int FLAGS_threads, int FLAGS_beam, int FLAGS_kmax, int FLAGS_nbest, bool FLAGS_reverse, string FLAGS_gpdelim, string FLAGS_gdelim, string FLAGS_skip, double FLAGS_thresh, string FLAGS_gsep)
unordered_map< int, SimpleResult > RMAP
void PhonetisaurusSetFlags(const char *usage, int *argc, char ***argv, bool remove_flags)
void EvaluateWordlist(vector< string > &corpus, LegacyRnnLMHash &h, Decodable &s, int FLAGS_beam, int FLAGS_kmax, int FLAGS_nbest, bool FLAGS_reverse, string FLAGS_gpdelim, string FLAGS_gdelim, string FLAGS_skip, double FLAGS_thresh, string FLAGS_gsep)
void EvaluateWord(string word, LegacyRnnLMHash &h, Decodable &s, int FLAGS_beam, int FLAGS_kmax, int FLAGS_nbest, bool FLAGS_reverse, string FLAGS_gpdelim, string FLAGS_gdelim, string FLAGS_skip, double FLAGS_thresh, string FLAGS_gsep)
unordered_map< int, vector< PathData > > RMAP
DEFINE_double(thresh, 0.0,"The n-best pruning threshold. Relative to 1-best.")
void LoadWordList(const std::string &filename, std::vector< std::string > *corpus)
LegacyRnnLMDecodable< Token, LegacyRnnLMHash > Decodable
int main(int argc, char *argv[])
vector< string > tokenize_utf8_string(string *utf8_string, string *delimiter)
Decodable CopyLegacyRnnLM(Hasher &h, int max_order=5)
Hasher CopyVocabHash(const string g_delim, const string gp_delim)
DEFINE_bool(reverse, false,"Reverse the input word before decoding.")
vector< string > pronunciations