/*
* International Union of Pure and Applied Chemistry (IUPAC)
* International Chemical Identifier (InChI)
* Version 1
* Software version 1.01
* July 21, 2006
* Developed at NIST
*/
#ifndef __STRUTIL_H__
#define __STRUTIL_H__
#ifndef INCHI_ALL_CPP
#ifdef __cplusplus
extern "C" {
#endif
#endif
/* forward declaration */
struct tagTautomerGroupsInfo;
int ExtractConnectedComponent( inp_ATOM *at, int num_at, int component_number, inp_ATOM *component_at );
int SetConnectedComponentNumber( inp_ATOM *at, int num_at, int component_number );
INChI *Alloc_INChI( inp_ATOM *at, int num_at, int *found_num_bonds, int *found_num_isotopic, int nAllocMode );
int Free_INChI(INChI **ppINChI);
int Free_INChI_Members(INChI *pINChI);
int Free_INChI_Stereo( INChI_Stereo *pINChI_Stereo );
INChI_Aux *Alloc_INChI_Aux( int num_at, int num_isotopic_atoms, int nAllocMode, int bOrigData );
int Free_INChI_Aux( INChI_Aux **ppINChI_Aux );
int Create_INChI( INChI **ppINChI, INChI_Aux **ppINChI_Aux, ORIG_ATOM_DATA *orig_inp_data,
inp_ATOM *inp_at, INP_ATOM_DATA *inp_norm_data[2],
int num_inp_at, INCHI_MODE nUserMode,
INCHI_MODE *pbTautFlags, INCHI_MODE *pbTautFlagsDone,
struct tagInchiTime *ulMaxTime, struct tagTautomerGroupsInfo *ti_out, char *pStrErrStruct);
int FillOutInfAtom(inp_ATOM *norm_at, INF_ATOM_DATA *inf_norm_at_data, int init_num_at,
int num_removed_H, int bAdd_DT_to_num_H, int nNumRemovedProtons, NUM_H *nNumRemovedProtonsIsotopic, int bIsotopic,
INChI *pINChI, INChI_Aux *pINChI_Aux, int bAbcNumbers, INCHI_MODE nMode );
int FillOutCompositeCanonInfAtom(COMP_ATOM_DATA *composite_norm_data, INF_ATOM_DATA *inf_norm_at_data,
int bIsotopic, int bTautomeric,
PINChI2 *pINChI2, PINChI_Aux2 *pINChI_Aux2, int bAbcNumbers, INCHI_MODE nMode);
typedef enum tagInchiDiffBits {
IDIF_PROBLEM = 0x00000001, /* severe: at least one InChI does not exist */
IDIF_NUM_AT = 0x00000001, /* severe: different number of atoms in the skeleton */
IDIF_ATOMS = 0x00000001, /* severe: diiferent types of skeleton atoms */
IDIF_NUM_EL = 0x00000001, /* severe: formulas differ in another element */
IDIF_CON_LEN = 0x00000001, /* severe: different connection table lengths */
IDIF_CON_TBL = 0x00000001, /* severe: different connection tables */
IDIF_POSITION_H = 0x00000002, /* difference in non-taut (Mobile-H) or all H (Fixed-H) location/number */
IDIF_MORE_FH = 0x00000004, /* extra fixed H */
IDIF_LESS_FH = 0x00000008, /* missing fixed H */
IDIF_MORE_H = 0x00000010, /* formulas differ in number of H */
IDIF_LESS_H = 0x00000020, /* formulas differ in number of H */
/*IDIF_TAUT_LEN = 0x00000008,*/ /* different lengths of tautomer lists */
IDIF_NO_TAUT = 0x00000040, /* restored structure has no taut groups while the original InChI has some */
IDIF_WRONG_TAUT = 0x00000080, /* restored has tautomerism while the original does not have it */
IDIF_SINGLE_TG = 0x00000100, /* restored has 1 taut. group while the original InChI has multiple tg */
IDIF_MULTIPLE_TG = 0x00000200, /* restored has multiple tg while the original InChI has only one tg */
IDIF_NUM_TG = 0x00000400, /* different number of tautomeric groups */
/*IDIF_LESS_TG_ENDP = 0x00000200,*/ /* restores structure has less taut. endpoints */
/*IDIF_MORE_TG_ENDP = 0x00000400,*/ /* restores structure has more taut. endpoints */
IDIF_EXTRA_TG_ENDP = 0x00000800, /* extra tautomeric endpoint(s) in restored structure */
IDIF_MISS_TG_ENDP = 0x00001000, /* one or more tg endpoint is not in the restored structure */
IDIF_DIFF_TG_ENDP = 0x00002000, /* lists of tg endpoints are different */
IDIF_TG = 0x00004000, /* different tautomeric groups */
IDIF_NUM_ISO_AT = 0x00008000, /* ?severe: restored struct. has different number of isotopic atoms */
IDIF_ISO_AT = 0x00010000, /* ?severe: restored struct. has different locations/isotopes of isotopic atoms */
IDIF_CHARGE = 0x00020000, /* restored structure has different charge */
IDIF_REM_PROT = 0x00040000, /* proton(s) removed/added from the restored structure */
IDIF_REM_ISO_H = 0x00080000, /* isotopic H removed */
IDIF_SC_INV = 0x00100000, /* restores structure has different inversion stereocenter mark */
IDIF_SC_PARITY = 0x00200000, /* restored structure has stereoatoms or allenes with different parity */
IDIF_SC_EXTRA_UNDF = 0x00400000, /* restored structure has extra undefined stereocenter(s) */
IDIF_SC_EXTRA = 0x00800000, /* restored structure has extra stereocenter(s) */
IDIF_SC_MISS_UNDF = 0x01000000, /* restored structure has not some undefined stereocenter(s) */
IDIF_SC_MISS = 0x02000000, /* restored structure has not some stereocenters that are not undefined */
IDIF_SB_PARITY = 0x04000000, /* restored structure has stereobonds or cumulenes with different parity */
IDIF_SB_EXTRA_UNDF = 0x08000000, /* restored structure has extra undefined stereobond(s) */
IDIF_SB_EXTRA = 0x10000000, /* restored structure has extra stereobond(s) */
IDIF_SB_MISS_UNDF = 0x20000000, /* restored structure has not some undefined stereocenters */
IDIF_SB_MISS = 0x40000000 /* restored structure has not some stereobonds that are not undefined */
} IDIF;
#define IDIFF_SB (IDIF_SB_PARITY | IDIF_SB_EXTRA_UNDF | IDIF_SB_EXTRA | IDIF_SB_MISS_UNDF | IDIF_SB_MISS)
#define IDIFF_SC (IDIF_SC_PARITY | IDIF_SC_EXTRA_UNDF | IDIF_SC_EXTRA | IDIF_SC_MISS_UNDF | IDIF_SC_MISS)
#define IDIFF_CONSTIT (IDIF_POSITION_H | IDIF_MORE_FH | IDIF_LESS_FH | IDIF_MORE_H | IDIF_LESS_H |\
IDIF_NO_TAUT | IDIF_WRONG_TAUT | IDIF_SINGLE_TG | IDIF_MULTIPLE_TG | \
IDIF_NUM_TG | IDIF_EXTRA_TG_ENDP | IDIF_MISS_TG_ENDP | IDIF_TG | \
IDIF_NUM_ISO_AT | IDIF_ISO_AT | IDIF_CHARGE | IDIF_REM_PROT | IDIF_REM_ISO_H |\
IDIF_DIFF_TG_ENDP)
#define IDIFF_STEREO (IDIF_SC_INV | IDIF_SC_PARITY | IDIF_SC_EXTRA_UNDF | IDIF_SC_EXTRA | \
IDIF_SC_MISS_UNDF | IDIF_SC_MISS | IDIF_SB_PARITY | IDIF_SB_EXTRA_UNDF |\
IDIF_SB_EXTRA | IDIF_SB_MISS_UNDF | IDIF_SB_MISS)
/*************************************************************************************/
#define ICR_MAX_ENDP_IN1_ONLY 32
#define ICR_MAX_ENDP_IN2_ONLY 32
#define ICR_MAX_DIFF_FIXED_H 32
#define ICR_MAX_SB_IN1_ONLY 32
#define ICR_MAX_SB_IN2_ONLY 32
#define ICR_MAX_SC_IN1_ONLY 32
#define ICR_MAX_SC_IN2_ONLY 32
#define ICR_MAX_SB_UNDF 32
#define ICR_MAX_SC_UNDF 32
typedef struct tagInChICompareResults {
INCHI_MODE flags;
int tot_num_H1;
int tot_num_H2;
int num_taut_H1;
int num_taut_H2;
int num_taut_M1;
int num_taut_M2;
/* 1 => InChI from reversed struct. 2 => input InChI */
AT_NUMB endp_in1_only[ICR_MAX_ENDP_IN1_ONLY]; /* endpoint canonical number = index+1 */
int num_endp_in1_only;
AT_NUMB endp_in2_only[ICR_MAX_ENDP_IN2_ONLY]; /* endpoint canonical number = index+1 */
int num_endp_in2_only;
AT_NUMB diff_pos_H_at[ICR_MAX_DIFF_FIXED_H]; /* non-tautomeric H */
S_CHAR diff_pos_H_nH[ICR_MAX_DIFF_FIXED_H];
int num_diff_pos_H;
AT_NUMB fixed_H_at1_more[ICR_MAX_DIFF_FIXED_H]; /* extra fixed_H */
S_CHAR fixed_H_nH1_more[ICR_MAX_DIFF_FIXED_H];
int num_fixed_H1_more;
AT_NUMB fixed_H_at2_more[ICR_MAX_DIFF_FIXED_H]; /* missed fixed_H */
S_CHAR fixed_H_nH2_more[ICR_MAX_DIFF_FIXED_H];
int num_fixed_H2_more;
AT_NUMB sc_in1_only[ICR_MAX_SC_IN1_ONLY];
int num_sc_in1_only;
AT_NUMB sc_in2_only[ICR_MAX_SC_IN2_ONLY];
int num_sc_in2_only;
AT_NUMB sb_in1_only[ICR_MAX_SB_IN1_ONLY];
int num_sb_in1_only;
AT_NUMB sb_in2_only[ICR_MAX_SB_IN2_ONLY];
int num_sb_in2_only;
AT_NUMB sb_undef_in1_only[ICR_MAX_SC_UNDF];
int num_sb_undef_in1_only;
AT_NUMB sb_undef_in2_only[ICR_MAX_SC_UNDF];
int num_sb_undef_in2_only;
AT_NUMB sc_undef_in1_only[ICR_MAX_SB_UNDF];
int num_sc_undef_in1_only;
AT_NUMB sc_undef_in2_only[ICR_MAX_SB_UNDF];
int num_sc_undef_in2_only;
} ICR;
INCHI_MODE CompareReversedINChI2( INChI *i1 /* InChI from reversed struct */, INChI *i2 /* input InChI */, INChI_Aux *a1, INChI_Aux *a2, ICR *picr, int *err );
int CompareIcr( ICR *picr1, ICR *picr2, INCHI_MODE *pin1, INCHI_MODE *pin2, INCHI_MODE mask );
int CompareReversedINChI( INChI *i1, INChI *i2, INChI_Aux *a1, INChI_Aux *a2 );
const char *CompareReversedInchiMsg( int code );
#define EQL_EXISTS 1
#define EQL_SP3 2
#define EQL_SP3_INV 4
#define EQL_SP2 8
int Eql_INChI_Stereo( INChI_Stereo *s1, int eql1, INChI_Stereo *s2, int eql2, int bRelRac );
int Eql_INChI_Isotopic( INChI *i1, INChI *i2 );
#define EQL_EQU 0
#define EQL_EQU_TG 1
#define EQL_EQU_ISO 2
int Eql_INChI_Aux_Equ( INChI_Aux *a1, int eql1, INChI_Aux *a2, int eql2 );
#define EQL_NUM 0
#define EQL_NUM_INV 1
#define EQL_NUM_ISO 2
int Eql_INChI_Aux_Num( INChI_Aux *a1, int eql1, INChI_Aux *a2, int eql2 );
int bHasEquString( AT_NUMB *LinearCT, int nLenCT );
int CompINChINonTaut2(const void *p1, const void *p2);
int CompINChITaut2(const void *p1, const void *p2);
int CompINChI2(const INCHI_SORT *p1, const INCHI_SORT *p2, int bTaut, int bCompareIsotopic);
int CompINChITautVsNonTaut(const INCHI_SORT *p1, const INCHI_SORT *p2, int bCompareIsotopic);
typedef enum tagDiffINChISegments { /* r = repetitive, n = non-repetitive */
DIFS_f_FORMULA, /* 0 r; fixed-H <-> mobile-H */
DIFS_c_CONNECT, /* 1 n; connection table; mobile-H only */
DIFS_h_H_ATOMS, /* 2 n; hydrogen atoms: mobile-H and Fixed-H; have different meanings */
DIFS_q_CHARGE, /* 3 r; charge; fixed-H <-> mobile-H */
DIFS_p_PROTONS, /* 4 n; protons; mobile-H only */
DIFS_b_SBONDS, /* 5 r: stereobonds: fixed-H <-> mobile-H * isotopic <-> non-isotopic */
DIFS_t_SATOMS, /* 6 r: stereoatoms: fixed-H <-> mobile-H * isotopic <-> non-isotopic */
DIFS_m_SP3INV, /* 7 r: stereo-abs-inv: fixed-H <-> mobile-H * isotopic <-> non-isotopic */
DIFS_s_STYPE, /* 8 r: stereo-type: fixed-H <-> mobile-H * isotopic <-> non-isotopic */
DIFS_i_IATOMS, /* 9 r: isotopic atoms: fixed-H <-> mobile-H * isotopic <-> non-isotopic */
DIFS_o_TRANSP, /* 10 n: Fixed-H transposition */
DIFS_idf_LENGTH, /* 11 length of the array relevant to the INChI Identifier */
/* later elements referring to AuxInfo may be added */
DIFS_LENGTH = DIFS_idf_LENGTH /* length of the array */
} DIF_SEGMENTS;
typedef enum tagDiffINChILayers {
DIFL_M, /* 0 main layer */
DIFL_MI, /* 1 main isotopic */
DIFL_F, /* 2 fixed-H */
DIFL_FI, /* 3 fixed-H isotopic */
DIFL_LENGTH /* number of layers */
} DIF_LAYERS;
/* Value meaning */
typedef enum tagMarkDiff {
DIFV_BOTH_EMPTY = 0, /* both this and the component in the preceding namesake segment are empty */
DIFV_EQL2PRECED = 1, /* equal to the component in the preceding namesake segment */
DIFV_NEQ2PRECED = 2, /* different from the component in the preceding namesake segment */
DIFV_IS_EMPTY = 4, /* is empty while the preceding namesake segment is not empty */
DIFV_FI_EQ_MI = 8, /* FI stereo component is equal to the component in the MI namesake segment */
/* while M and F components are empty */
/* decision_F = bitmask: bits that should not be present */
/* decision_T = bitmask: at least one of the bits should be present */
/* decision = true if( !( BITS & decision_F ) && ( BITS & decision_F ) ) */
DIFV_OUTPUT_EMPTY_T = (DIFV_IS_EMPTY), /* bits present for empty segment output */
DIFV_OUTPUT_EMPTY_F = (DIFV_EQL2PRECED | DIFV_NEQ2PRECED | DIFV_FI_EQ_MI), /* bits NOT present */
DIFV_OUTPUT_OMIT_F = (DIFV_NEQ2PRECED | DIFV_IS_EMPTY), /* bits NOT present for omitting */
DIFV_OUTPUT_FILL_T = (DIFV_EQL2PRECED | DIFV_NEQ2PRECED | DIFV_FI_EQ_MI)
} DIF_VALUES;
typedef enum tagINChISegmAction {
INCHI_SEGM_OMIT = 0,
INCHI_SEGM_FILL = 1, /* the value is used in str_LineEnd() */
INCHI_SEGM_EMPTY = 2 /* the value is used in str_LineEnd() */
} INCHI_SEGM_ACTION;
int CompINChILayers(const INCHI_SORT *p1, const INCHI_SORT *p2, char sDifSegs[][DIFS_LENGTH] );
int MarkUnusedAndEmptyLayers( char sDifSegs[][DIFS_LENGTH] );
int INChI_SegmentAction( char cDifSegs );
#define FLAG_SORT_PRINT_TRANSPOS_BAS 1 /* transposition in the main InChI layer */
#define FLAG_SORT_PRINT_TRANSPOS_REC 2 /* transposition in the reconnected InChI layer */
#define FLAG_SORT_PRINT_NO_NFIX_H_BAS 4 /* no fixed H non-isotopic in the main InChI layer */
#define FLAG_SORT_PRINT_NO_NFIX_H_REC 8 /* no fixed H non-isotopic in the reconnected InChI layer */
#define FLAG_SORT_PRINT_NO_IFIX_H_BAS 16 /* no fixed H isotopic in the main InChI layer */
#define FLAG_SORT_PRINT_NO_IFIX_H_REC 32 /* no fixed H isotopic in the the reconnected InChI layer */
#define FLAG_SORT_PRINT_ReChI_PREFIX 64 /* Output ReChI instead of InChI */
int OutputINChI1( char *pStr, int nStrLen, INCHI_SORT *pINChISortTautAndNonTaut[][TAUT_NUM], int iINChI,
ORIG_STRUCT *pOrigStruct,
int bDisconnectedCoord, int bOutputType, int bINChIOutputOptions, int bXml, int bAbcNumbers,
int bCtPredecessors, int bNoStructLabels,
int num_components[], int num_non_taut[], int num_taut[],
INCHI_FILE *output_file, INCHI_FILE *log_file, int num_input_struct,
const char *szSdfLabel, const char *szSdfValue, long lSdfId, int *pSortPrintINChIFlags );
int OutputINChI2( char *pStr, int nStrLen, INCHI_SORT *pINChISortTautAndNonTaut[][TAUT_NUM], int iINChI,
ORIG_STRUCT *pOrigStruct,
int bDisconnectedCoord, int bOutputType, int bINChIOutputOptions, int bXml, int bAbcNumbers,
int bCtPredecessors, int bNoStructLabels,
int num_components[], int num_non_taut[], int num_taut[],
INCHI_FILE *output_file, INCHI_FILE *log_file, int num_input_struct,
const char *szSdfLabel, const char *szSdfValue, long lSdfId, int *pSortPrintINChIFlags );
int SaveEquComponentsInfoAndSortOrder ( int iINChI, INCHI_SORT *pINChISort[TAUT_NUM], int *num_components,
ORIG_ATOM_DATA *orig_inp_data, ORIG_ATOM_DATA *prep_inp_data,
COMP_ATOM_DATA composite_norm_data[TAUT_NUM],
int bCompareComponents );
int OutputINChIXmlRootStartTag( INCHI_FILE *output_file );
int OutputINChIXmlRootEndTag( INCHI_FILE *output_file );
int OutputINChIXmlError( INCHI_FILE *output_file, char *pStr, int nStrLen, int ind,
/*int nErrorNumber,*/ char *szErrorText, int bError );
int OutputINChIPlainError( INCHI_FILE *output_file, char *pStr, int nStrLen,
char *pErrorText, int bError );
int OutputINChIXmlStructStartTag( INCHI_FILE *output_file, char *pStr, int ind /* indent*/, int nStrLen, int bNoStructLabels,
int num_input_struct, const char *szSdfLabel, const char *szSdfValue );
int OutputINChIXmlStructEndTag( INCHI_FILE *output_file, char *pStr, int nStrLen, int ind );
int GetInpStructErrorType(INPUT_PARMS *ip, int err, char *pStrErrStruct, int num_inp_atoms );
int ProcessStructError( INCHI_FILE *output_file, INCHI_FILE *log_file, /*int err,*/ char *pStrErrStruct, int nErrorType,
int *bXmlStructStarted, long num_inp, INPUT_PARMS *ip, char *pStr, int nStrLen );
int bNumHeterAtomHasIsotopicH( inp_ATOM *atom, int num_atoms );
int WriteToSDfile( const INP_ATOM_DATA *inp_at_data, INCHI_FILE* fcb, const char* name, const char* comment,
const char *szLabel, const char *szValue );
int WriteOrigAtomDataToSDfile( const ORIG_ATOM_DATA *inp_at_data, INCHI_FILE* fcb, const char* name, const char* comment,
int bChiral, int bAtomsDT, const char *szLabel, const char *szValue);
int bIsMetalSalt( inp_ATOM *at, int i );
extern const char gsMissing[];
extern const char gsEmpty[];
extern const char gsSpace[];
extern const char gsEqual[];
/*
#define gsMissing "is missing"
#define gsEmpty ""
#define gsSpace " "
#define gsEqual "="
*/
/* format string for SDF_LBL_VAL(L,V): %s%s%s%s (four strings) */
/*#define SDF_LBL_VAL(L,V) ((L)&&(L)[0])?gsSpace:gsEmpty, ((L)&&(L)[0])?L:gsEmpty, ((L)&&(L)[0])? (((V)&&(V)[0])?gsEqual:gsSpace):gsEmpty, ((L)&&(L)[0])?((V)&&(V)[0]?V:gsMissing):gsEmpty*/
#define SDF_LBL_VAL(L,V) ((L)&&(L)[0])?gsSpace:gsEmpty, ((L)&&(L)[0])?L:gsEmpty, ((L)&&(L)[0])? (((V)&&(V)[0])?gsEqual:gsSpace):gsEmpty, ((V)&&(V)[0])?V:((L)&&(L)[0])?gsMissing:gsEmpty
#ifndef INCHI_ALL_CPP
#ifdef __cplusplus
}
#endif
#endif
#endif /* __STRUTIL_H__ */
syntax highlighted by Code2HTML, v. 0.9.1