//================================
// ========== Dialing Lemmatizer (www.aot.ru)
//================================
#include "../common/utilit.h"
#include "../common/MorphologyHolder.h"
void PrintUsage()
{
printf ("Dialing File Lemmatizer(www.aot.ru)\n");
printf ("Usage: FileLem (RUSSIAN|ENGLISH|GERMAN) \n");
printf ("Example: FileLem Russian input_list.txt\n");
printf (" is a file with list of files (one file per line)\n");
printf ("Possible file encodings: Russian(windows-1251), German(windows-1252)\n");
exit(-1);
};
int main(int argc, char **argv)
{
// =============== LOADING DICTS ================
if (argc != 3)
PrintUsage();
if ( !strcmp (argv[1], "-h")
|| !strcmp (argv[1], "-help")
|| !strcmp (argv[1], "/h")
|| !strcmp (argv[1], "/help")
)
PrintUsage();
MorphLanguageEnum Language;
if (!GetLanguageByString(argv[1], Language))
{
PrintUsage();
};
fprintf (stderr, "Loading dictionaries \n");
CMorphologyHolder Holder;
if (!Holder.LoadGraphanAndLemmatizer(Language))
{
fprintf (stderr, "Cannot load %s morphology of \n", argv[1]);
return 1;
};
const char* FileName = argv[2];
FILE * fp = fopen (FileName, "r");
if (!fp)
{
printf (" Cannot open %s \n", FileName );
return 1;
};
char buffer[1024];
while (fgets(buffer, 1024, fp))
{
std::string f = buffer;
Trim(f);
int CountOfWords;
printf (" Reading %s \n", f.c_str());
if (!Holder.GetMorphology(f,true, CountOfWords))
{
printf (" Cannot process %s \n", f.c_str() );
continue;
};
printf (" Found %i items\n", Holder.m_PlmLines.m_Items.size() );
// morphology
// save
std::string LemFile = MakeFName(f, "lem");
printf ( " Save to %s\n", LemFile.c_str() );
Holder.m_PlmLines.SaveToFile(LemFile);
};
return 0;
}