34 string vec2str( vector<string> vec,
string sep ){
36 for(
size_t i = 0; i < vec.size(); ++i){
45 std::stringstream ostring;
62 char* str = (
char*) utf8_string->c_str ();
65 char* end = str + strlen (str) + 1;
66 vector<string> string_vec;
67 if (delimiter->compare (
"") != 0)
68 string_vec.push_back (
"");
72 utf8::uint32_t code = utf8::next (str_i, end);
75 int start = strlen (str) - strlen (str_j);
76 int end = strlen (str) - strlen (str_i);
77 int len = end - start;
79 if (delimiter->compare (
"") == 0) {
80 string_vec.push_back (utf8_string->substr (start,len));
82 if (delimiter->compare (utf8_string->substr (start, len)) == 0)
83 string_vec.push_back (
"");
85 string_vec [string_vec.size () - 1] += utf8_string->substr (start, len);
87 }
while (str_i < end);
97 for (
unsigned int i=0; i<tokens.size (); i++) {
98 if (syms->Find (tokens.at (i)) != -1) {
99 entry.push_back (tokens.at (i));
101 cerr <<
"Symbol: '" << tokens.at (i)
102 <<
"' not found in input symbols table." << endl
103 <<
"Mapping to null..." << endl;
111 const SymbolTable* syms) {
114 for (
unsigned int i=0; i<tokens.size(); i++) {
115 int label = syms->Find (tokens[i]);
117 cerr <<
"Symbol: '" << tokens[i]
118 <<
"' not found in input symbols table." << endl
119 <<
"Mapping to null..." << endl;
121 entry.push_back (label);
131 host_get_clock_service(mach_host_self(), REALTIME_CLOCK, &cclock);
132 clock_get_time(cclock, &mts);
134 timespec ts = {mts.tv_sec, mts.tv_nsec};
140 clock_gettime(CLOCK_REALTIME, &ts);
145 timespec
diff(timespec start, timespec end){
147 if ((end.tv_nsec-start.tv_nsec)<0) {
148 temp.tv_sec = end.tv_sec-start.tv_sec-1;
149 temp.tv_nsec = 1000000000+end.tv_nsec-start.tv_nsec;
151 temp.tv_sec = end.tv_sec-start.tv_sec;
152 temp.tv_nsec = end.tv_nsec-start.tv_nsec;
157 DEFINE_bool (help,
false,
"show usage information");
162 SetFlags (usage, argc, argv, remove_flags);
165 for (; index < *argc; ++index) {
166 string argval = (*argv)[index];
168 if (argval[0] !=
'-' || argval ==
"-")
170 while (argval[0] ==
'-')
171 argval = argval.substr(1);
177 size_t pos = argval.find(
"=");
178 if (pos != string::npos) {
179 arg = argval.substr(0, pos);
180 val = argval.substr(pos + 1);
184 FlagRegister<bool> *bool_register =
185 FlagRegister<bool>::GetRegister();
186 if (bool_register->SetFlag(arg, val))
188 FlagRegister<string> *string_register =
189 FlagRegister<string>::GetRegister();
190 if (string_register->SetFlag(arg, val))
192 FlagRegister<int32> *int32_register =
193 FlagRegister<int32>::GetRegister();
194 if (int32_register->SetFlag(arg, val))
196 FlagRegister<int64> *int64_register =
197 FlagRegister<int64>::GetRegister();
198 if (int64_register->SetFlag(arg, val))
200 FlagRegister<double> *double_register =
201 FlagRegister<double>::GetRegister();
202 if (double_register->SetFlag(arg, val))
205 LOG(FATAL) <<
"SetFlags: Bad option: " << (*argv)[index];
211 std::set< pair<string, string> > usage_set;
213 cout << usage <<
"\n";
215 FlagRegister<bool> *bool_register = FlagRegister<bool>::GetRegister();
216 bool_register->GetUsage(&usage_set);
217 FlagRegister<string> *string_register = FlagRegister<string>::GetRegister();
218 string_register->GetUsage(&usage_set);
219 FlagRegister<int32> *int32_register = FlagRegister<int32>::GetRegister();
220 int32_register->GetUsage(&usage_set);
221 FlagRegister<int64> *int64_register = FlagRegister<int64>::GetRegister();
222 int64_register->GetUsage(&usage_set);
223 FlagRegister<double> *double_register = FlagRegister<double>::GetRegister();
224 double_register->GetUsage(&usage_set);
226 for (std::set< pair<string, string> >::const_iterator it =
228 it != usage_set.end();
230 const string &file = it->first;
231 const string &usage = it->second;
234 if (file.compare (
"fst.cc") == 0 \
235 || file.compare (
"symbol-table.cc") == 0 || \
236 file.compare (
"util.cc") == 0)
240 cout << usage << endl;
243 cout <<
" --help: type = bool, default = false" << endl;
244 cout <<
" show usage information" << endl;
251 std::vector<std::string>* corpus) {
252 std::ifstream ifp (filename.c_str ());
255 if (ifp.is_open ()) {
256 while (ifp.good ()) {
261 corpus->push_back (line);
268 void Split (
const std::string& s,
char delim, std::vector<std::string>& elems) {
269 std::stringstream ss (s);
271 while (getline (ss, item, delim))
272 elems.push_back (item);
void LoadWordList(const std::string &filename, std::vector< std::string > *corpus)
timespec diff(timespec start, timespec end)
string vec2str(vector< string > vec, string sep)
void PhonetisaurusSetFlags(const char *usage, int *argc, char ***argv, bool remove_flags)
vector< string > tokenize_utf8_string(string *utf8_string, string *delimiter)
vector< int > tokenize2ints(string *testword, string *sep, const SymbolTable *syms)
DEFINE_bool(help, false,"show usage information")
vector< string > tokenize_entry(string *testword, string *sep, SymbolTable *syms)
void Split(const std::string &s, char delim, std::vector< std::string > &elems)