// ==========  This file is under  LGPL, the GNU Lesser General Public Licence
// ==========  Dialing Lemmatizer (www.aot.ru)
// ==========  Copyright by Alexey Sokirko, Andrey Putrin

#include "StdMorph.h"
#include "Lemmatizers.h"
#include "Paradigm.h"
#include "Paradigm.h"

#ifdef DETECT_MEMORY_LEAK
	#ifdef _DEBUG
	#define new DEBUG_NEW

	#undef THIS_FILE
	static char THIS_FILE[] = __FILE__;
	#endif
#endif

CFormInfo::CFormInfo()
{
	m_pParent = NULL;
	m_bFound = true;
	m_InnerAnnot.m_LemmaInfoNo = -1;
	
}

void	CFormInfo::Copy(const CFormInfo& from)
{
	m_InnerAnnot = from.m_InnerAnnot;
	m_pParent = from.m_pParent;
	m_InputWordBase = from.m_InputWordBase;
	m_bFound = from.m_bFound;
	m_bCutFlexia = from.m_bCutFlexia;
	m_bCutPrefixes = from.m_bCutPrefixes;
};


size_t  CFormInfo::GetLemmaPrefixLength() const 
{
	assert (IsValid());
	if (!IsValid()) return 0;
	if (!m_bFound) return 0;
	return m_pParent->m_Prefixes[m_InnerAnnot.m_PrefixNo].length();
};

void	CFormInfo::Create(const CLemmatizer*	pParent, const CAutomAnnotationInner& A, const string& InputWordForm, bool bFound)
{
	m_InnerAnnot = A;
	m_pParent = pParent;
	m_bFound = bFound;
	m_InputWordBase = InputWordForm;
	const CMorphForm& M = GetFlexiaModel().m_Flexia[A.m_ItemNo];
	size_t FlexLength = M.m_FlexiaStr.length();

	//  It can be so( if CLemmatizer::PredictByDataBase was used) that 
	//  the flexion  is not suffix of m_InputWordBase, but only part of  it.
	//  If so, then we cannot generate paradigm, since the current form cannot be 
	//  divided into two parts: the base and a known flexion.

	if (	m_bFound
		||	(		(	m_InputWordBase.length()>= FlexLength)
				&&	(m_InputWordBase.substr(m_InputWordBase.length()-FlexLength) == M.m_FlexiaStr)
			)
		)
	{
		m_bCutFlexia = true;
		m_InputWordBase.erase(m_InputWordBase.length() - M.m_FlexiaStr.length());
	}
	else
		m_bCutFlexia = false;

		
	const string& LemmPrefix = m_pParent->m_Prefixes[m_InnerAnnot.m_PrefixNo];
	if	(		m_bFound
			||	(		(m_InputWordBase.substr(0, LemmPrefix.length()) == LemmPrefix)
					&&	(m_InputWordBase.substr(LemmPrefix.length(), M.m_PrefixStr.length()) == M.m_PrefixStr)
				)
		)
	{
		m_InputWordBase.erase(0, LemmPrefix.length()+ M.m_PrefixStr.length());
		m_bCutPrefixes = true;
	}
	else
		m_bCutPrefixes = false;

	
};


const CLemmaInfoAndLemma& CFormInfo::GetLemmaInfo() const
{
	return m_pParent->m_LemmaInfos[m_InnerAnnot.m_LemmaInfoNo];
};

const CFlexiaModel& CFormInfo::GetFlexiaModel() const
{
	const CLemmaInfoAndLemma I = GetLemmaInfo();
	return m_pParent->m_FlexiaModels[I.m_LemmaInfo.m_FlexiaModelNo];
};

DWORD CFormInfo::GetParadigmId() const 
{
	assert (IsValid());
	if (!IsValid()) return (DWORD)-1;

	if (!m_bFound)
		return ErrorParadigmId;
	else
		return m_InnerAnnot.GetParadigmId();
};


string CFormInfo::GetCommonAncode() const 
{
	assert (IsValid());
	if (!IsValid()) return "";
	return GetLemmaInfo().m_LemmaInfo.GetCommonAncode();
};

string CFormInfo::GetLemSign() const 
{
	assert (IsValid());
	if (!IsValid()) return "";
	string Result  = m_bFound ? "+" : "-";
	string CommonAncode = GetCommonAncode();
	Result  += CommonAncode.empty() ? "??" : CommonAncode;
	return Result;
};


bool  CFormInfo::SetParadigmId(DWORD newVal) 
{
	assert (m_pParent);
	{
		CAutomAnnotationInner A;
		A.SplitParadigmId(newVal);
		if (A.m_LemmaInfoNo > m_pParent->m_LemmaInfos.size()) 
			return false;
		if (A.m_PrefixNo > m_pParent->m_Prefixes.size()) 
			return false;
		A.m_ItemNo = 0;
		A.m_nWeight = m_pParent->GetStatistic().get_HomoWeight(A.GetParadigmId(), 0);
		A.m_ModelNo = m_pParent->m_LemmaInfos[A.m_LemmaInfoNo].m_LemmaInfo.m_FlexiaModelNo;
		m_InnerAnnot = A;
	};

	m_bCutPrefixes = true;
	m_bCutFlexia = true;
	m_bFound = true;
	m_InputWordBase = GetSrcNorm();
	m_InputWordBase.erase(m_InputWordBase.length() - GetFlexiaModel().get_first_flex().length());

	return true;
}

DWORD  CFormInfo::GetCount () const 
{
	assert (IsValid());
	if (!IsValid()) return 0;
	return GetFlexiaModel().m_Flexia.size();
}


string CFormInfo::GetSrcNorm() const 
{
	assert (IsValid());
	if (!IsValid()) return "";

	//string result = m_pParent->m_Prefixes[m_InnerAnnot.m_PrefixNo];
	//result += m_pParent->m_Bases[GetLemmaInfo().m_LemmaStrNo].GetString();
	string result = m_pParent->m_Bases[GetLemmaInfo().m_LemmaStrNo].GetString();
	result += GetFlexiaModel().get_first_flex();
	return result;
}

int CFormInfo::GetHomonymWeightWithForm(WORD pos) const 
{
	assert (IsValid());
	if (!IsValid()) return 0;
	return  m_pParent->GetStatistic().get_HomoWeight(GetParadigmId(), pos);
}

int  CFormInfo::GetWordWeight() const
{
	assert (IsValid());
	if (!IsValid()) return 0;

	return m_pParent->GetStatistic().get_WordWeight(GetParadigmId());
}

void	CFormInfo::AttachLemmatizer(const CLemmatizer* parent)
{
	m_pParent = parent;
}

bool  CFormInfo::IsValid() const
{
	return		m_pParent 
			&&	(m_InnerAnnot.m_LemmaInfoNo != -1);
};

string  CFormInfo::GetSrcAncode() const
{
	assert (IsValid());
	if (!IsValid()) return "";

	const CLemmaInfoAndLemma& I = GetLemmaInfo();

	return GetFlexiaModel().m_Flexia[m_InnerAnnot.m_ItemNo].m_Gramcode;
}

string CFormInfo::GetAncode(WORD pos) const 
{
	assert (IsValid());
	if (!IsValid()) return "";

	return GetFlexiaModel().m_Flexia[pos].m_Gramcode.substr(0,2);
}


string CFormInfo::GetWordForm (WORD pos) const 
{
	assert (IsValid());
	if (!IsValid()) return "";

	const CMorphForm& F = GetFlexiaModel().m_Flexia[pos];
	
	string Result = m_InputWordBase;

	if (m_bCutFlexia)
		Result += F.m_FlexiaStr;

	if (m_bCutPrefixes)
		Result = m_pParent->m_Prefixes[m_InnerAnnot.m_PrefixNo]+F.m_PrefixStr + Result;

	return Result;
}


int  CFormInfo::GetHomonymWeight() const
{	
	assert (IsValid());
	if (!IsValid()) return 0;
	

	return m_InnerAnnot.m_nWeight;
}

BYTE	CFormInfo::GetAccentedVowel(WORD pos) const
{
	if (!m_bFound) return UnknownAccent;
	assert (IsValid());
	if (!IsValid()) return UnknownAccent;
	

	const CLemmaInfoAndLemma& I = GetLemmaInfo();

	if (I.m_LemmaInfo.m_AccentModelNo == UnknownAccentModelNo)
		return UnknownAccent;

	BYTE BackVowelNo = m_pParent->m_AccentModels[I.m_LemmaInfo.m_AccentModelNo].m_Accents[pos];
	string s = GetWordForm(pos);	
	RmlMakeLower(s, m_pParent->GetLanguage());
	return TransferReverseVowelNoToCharNo(s, BackVowelNo, m_pParent->GetLanguage());
};

BYTE	CFormInfo::GetSrcAccentedVowel() const
{
	return GetAccentedVowel(m_InnerAnnot.m_ItemNo);
};