/*
The University of Liverpool
Modifications to Zebra 1.1 / YAZ 1.7 to enable ranking
by attribute weight.
Copyright (c) 2001-2002 The University of Liverpool. All
rights reserved.
Licensed under the Academic Free License version 1.1.
http://opensource.org/licenses/academic.php
$Id: livcode.c,v 1.1 2003/03/26 16:41:48 adam Exp $
*/
#include <stdlib.h>
#include <stdio.h>
#ifdef WIN32
#include <process.h>
#else
#include <unistd.h>
#endif
#include <assert.h>
#include "index.h"
#include "zserver.h"
/*
** These functions/routines
** 1. reads in and builds a linked list of rank attr/rank score pairs
** 2. expand a simple query into a paired list of complex/simple nodes.
*/
typedef struct rstype
{
struct rstype *next_rsnode ;
int rank ;
int score ;
char *rankstr ;
} rsnode, *refrsnode ;
refrsnode start_rsnode = NULL ;
/*
** Function/Routine prototypes
*/
static int search_for_score( char *rankstr ) ;
static char *search_for_rankstr( int rank ) ;
static int search_for_rank( int rank ) ;
static refrsnode set_rsnode( int rank, int score ) ;
static int read_zrank_file(ZebraHandle zh) ;
static void convert_simple2complex(ZebraHandle zh, Z_RPNStructure *rpnstruct ) ;
static void walk_complex_query(ZebraHandle zh, Z_RPNStructure *rpnstruct ) ;
static Z_Complex *expand_query(ZebraHandle zh, Z_Operand *thisop ) ;
static Z_Complex *set_1complex_1operand( Z_Complex *comp,Z_Operand *simp ) ;
static Z_Complex *set_2operands( Z_Operand *sim1,Z_Operand *sim2 ) ;
static Z_Operand *set_operand( Z_Operand *thisop, int newattr ) ;
static int check_operand_attrs( Z_Operand *thisop ) ;
/*
** search_for_score()
** given a rank-string traverse down the linked list ;
** return its score if found otherwise return -1.
*/
int search_for_score( char *rankstr )
{
refrsnode node = start_rsnode ;
int rank ;
if ( sscanf( rankstr,"%d",&rank ) )
{
while ( node )
{
if ( node->rank == rank ) return node->score ;
node = node->next_rsnode ;
}
}
return -1 ;
}
/*
** search_for_rankstr()
** given a rank traverse down the linked list ;
** return its string if found otherwise return NULL.
*/
char *search_for_rankstr( int rank )
{
refrsnode node = start_rsnode ;
while ( node )
{
if ( node->rank == rank ) return node->rankstr ;
node = node->next_rsnode ;
}
return "rank" ;
}
/*
** search_for_rank()
** given a rank traverse down the linked list ;
** return 1 if found otherwise return 0.
*/
int search_for_rank( int rank )
{
refrsnode node = start_rsnode ;
while ( node )
{
if ( node->rank == rank ) return 1 ;
node = node->next_rsnode ;
}
return 0 ;
}
/*
** set_rsnode()
** given a rank and a score, build the rest of the rsnode structure.
*/
refrsnode set_rsnode( int rank, int score )
{
#define BUFFMAX 128
refrsnode node = (refrsnode)malloc( sizeof(rsnode) ) ;
char buff[BUFFMAX] ;
node->next_rsnode = NULL ;
node->rank = rank ;
node->score = score ;
sprintf( buff,"%d",rank ) ;
node->rankstr = (char *)malloc( strlen(buff)+1 ) ;
strcpy( node->rankstr, buff ) ;
return node ;
}
/*
** read_zrank_file(zh)
** read in the rankfile and build the rank/score linked list ;
** return 0 : can't open the zebra config. file
** return 0 : can't find the rankfile entry in the zebra config. file
** return 0 : can't open the rankfile itself
** return the number of distinct ranks read in.
*/
int read_zrank_file(ZebraHandle zh)
{
#define LINEMAX 256
char line[ LINEMAX ] ;
char rname[ LINEMAX ] ;
char *lineptr ;
FILE *ifd ;
int rank = 0 ;
int score = 0 ;
int numranks = 0 ;
/*
** open the zebra configuration file and look for the "rankfile:"
** entry which contains the path/name of the rankfile
*/
const char *rankfile = res_get_def(zh->res, "rankfile", 0);
const char *profilePath = res_get_def(zh->res, "profilePath",
DEFAULT_PROFILE_PATH);
if (!rankfile)
{
yaz_log(LOG_LOG, "rankfile entry not found in config file" ) ;
return 0 ;
}
ifd = yaz_path_fopen(profilePath, rankfile, "r" ) ;
if ( ifd )
{
while ( (lineptr = fgets( line,LINEMAX,ifd )) )
{
if ( sscanf( lineptr,"rankfile: %s", rname ) == 1 )
rankfile = rname ;
}
/*
** open the rankfile and read the rank/score pairs
** ignore 1016
** ignore duplicate ranks
** ignore ranks without +ve scores
*/
if ( rankfile )
{
if ( !(ifd = fopen( rankfile, "r" )) )
{
logf( LOG_LOG, "unable to open rankfile %s",rankfile ) ;
return 0;
}
while ( (lineptr = fgets( line,LINEMAX,ifd )) )
{
sscanf( lineptr,"%d : %d", &rank,&score ) ;
if ( ( score > 0 ) && ( rank != 1016 ) )
{
refrsnode new_rsnode ;
if ( search_for_rank( rank ) == 0 )
{
new_rsnode = set_rsnode( rank,score ) ;
new_rsnode->next_rsnode = start_rsnode ;
start_rsnode = new_rsnode ;
numranks++ ;
}
}
}
}
else
{
yaz_log(LOG_WARN|LOG_ERRNO, "unable to open config file (%s)",
rankfile);
}
}
return numranks ;
}
/*
** set_operand()
** build an operand "node" - hav to make a complete copy of thisop and
** then insert newattr in the appropriate place
**
*/
Z_Operand *set_operand( Z_Operand *thisop, int newattr )
{
Z_Operand *operand ;
Z_AttributesPlusTerm *attributesplusterm ;
Z_AttributeList *attributelist ;
Z_AttributeElement *attributeelement ;
Z_AttributeElement *attrptr ;
Z_AttributeElement **attrptrptr ;
Z_Term *term ;
Odr_oct *general ;
int i ;
operand = (Z_Operand *)
malloc( sizeof( Z_Operand ) ) ;
attributesplusterm = (Z_AttributesPlusTerm *)
malloc( sizeof( Z_AttributesPlusTerm ) ) ;
attributelist = (Z_AttributeList *)
malloc( sizeof( Z_AttributeList ) ) ;
attributeelement = (Z_AttributeElement *)
malloc( sizeof( Z_AttributeElement ) ) ;
term = (Z_Term *)
malloc( sizeof( Z_Term ) ) ;
general = (Odr_oct *)
malloc( sizeof( Odr_oct ) ) ;
operand->which = Z_Operand_APT ;
operand->u.attributesPlusTerm = attributesplusterm ;
attributesplusterm->attributes = attributelist ;
attributesplusterm->term = term ;
attributelist->num_attributes = thisop->u.attributesPlusTerm->
attributes->num_attributes ;
attrptr = (Z_AttributeElement *) malloc( sizeof(Z_AttributeElement) *
attributelist->num_attributes ) ;
attrptrptr = (Z_AttributeElement **) malloc( sizeof(Z_AttributeElement) *
attributelist->num_attributes ) ;
attributelist->attributes = attrptrptr ;
for ( i = 0 ; i < attributelist->num_attributes ; i++ )
{
*attrptr = *thisop->u.attributesPlusTerm->attributes->attributes[i] ;
attrptr->attributeType = (int *)malloc( sizeof(int *) ) ;
*attrptr->attributeType = *thisop->u.attributesPlusTerm->attributes->
attributes[i]->attributeType;
attrptr->value.numeric = (int *)malloc( sizeof(int *) ) ;
*attrptr->value.numeric = *thisop->u.attributesPlusTerm->attributes->
attributes[i]->value.numeric;
if ( (*attrptr->attributeType == 1) &&
(*attrptr->value.numeric == 1016) )
{
*attrptr->value.numeric = newattr ;
}
*attrptrptr++ = attrptr++ ;
}
term->which = Z_Term_general ;
term->u.general = general ;
general->len = thisop->u.attributesPlusTerm->term->u.general->len ;
general->size = thisop->u.attributesPlusTerm->term->u.general->size ;
general->buf = malloc( general->size ) ;
strcpy( general->buf,
thisop->u.attributesPlusTerm->term->u.general->buf ) ;
return operand ;
}
/*
** set_2operands()
** build a complex "node" with two (simple) operand "nodes" as branches
*/
Z_Complex *set_2operands( Z_Operand *sim1,Z_Operand *sim2 )
{
Z_Complex *top ;
Z_RPNStructure *s1 ;
Z_RPNStructure *s2 ;
Z_Operator *roperator ;
top = (Z_Complex *) malloc( sizeof( Z_Complex ) ) ;
s1 = (Z_RPNStructure *)malloc( sizeof( Z_RPNStructure ) ) ;
s2 = (Z_RPNStructure *)malloc( sizeof( Z_RPNStructure ) ) ;
roperator = (Z_Operator *) malloc( sizeof( Z_Operator ) ) ;
top->roperator = roperator ;
top->roperator->which = Z_Operator_or ;
top->roperator->u.op_or = odr_nullval() ;
top->s1 = s1 ;
top->s1->which = Z_RPNStructure_simple ;
top->s1->u.simple = sim1 ;
top->s2 = s2 ;
top->s2->which = Z_RPNStructure_simple ;
top->s2->u.simple = sim2 ;
return top ;
}
/*
** set_1complex_1operand()
** build a complex "node" with a complex "node" branch and an
** operand "node" branch
*/
Z_Complex *set_1complex_1operand( Z_Complex *comp,Z_Operand *simp )
{
Z_Complex *top ;
Z_RPNStructure *s1 ;
Z_RPNStructure *s2 ;
Z_Operator *roperator ;
top = (Z_Complex *) malloc( sizeof( Z_Complex ) ) ;
s1 = (Z_RPNStructure *)malloc( sizeof( Z_RPNStructure ) ) ;
s2 = (Z_RPNStructure *)malloc( sizeof( Z_RPNStructure ) ) ;
roperator = (Z_Operator *) malloc( sizeof( Z_Operator ) ) ;
top->roperator = roperator ;
top->roperator->which = Z_Operator_or ;
top->roperator->u.op_or = odr_nullval() ;
top->s1 = s1 ;
top->s1->which = Z_RPNStructure_complex ;
top->s1->u.complex = comp ;
top->s2 = s2 ;
top->s2->which = Z_RPNStructure_simple ;
top->s2->u.simple = simp ;
return top ;
}
/*
** expand_query()
** expand a simple query into a number of complex queries
*/
Z_Complex *expand_query(ZebraHandle zh, Z_Operand *thisop )
{
Z_Complex *top ;
int numattrs = 0 ;
/*
** start_rsnode will be set if we have already read the rankfile
** so don't bother again but we need to know the number of attributes
** in the linked list so traverse it again to find out how many.
*/
if ( start_rsnode )
{
refrsnode node = start_rsnode ;
while ( node )
{
numattrs++ ;
node = node->next_rsnode ;
}
}
/*
** only expand the query if there are 2 or more attributes
*/
if ( numattrs >= 2 )
{
refrsnode node = start_rsnode ;
int attr1 ;
int attr2 ;
attr1 = node->rank ; node = node->next_rsnode ;
attr2 = node->rank ; node = node->next_rsnode ;
/*
** this is the special case and has to be done first because the
** last complex node in the linear list has two simple nodes whereas
** all the others have a complex and a simple.
*/
top = set_2operands( set_operand( thisop,attr1 ),
set_operand( thisop,attr2 ) ) ;
/*
** do the rest as complex/simple pairs
*/
while ( node )
{
attr1 = node->rank ; node = node->next_rsnode ;
top = set_1complex_1operand( top,set_operand( thisop,attr1 ) ) ;
}
/*
** finally add the 1016 rank attribute at the top of the tree
*/
top = set_1complex_1operand( top,set_operand( thisop,1016 ) ) ;
return top ;
}
else return NULL ;
}
/*
** check_operand_attrs()
** loop through the attributes of a particular operand
** return 1 if (type==1 && value==1016) && (type==2 && value==102)
** otherwise return 0
*/
int check_operand_attrs( Z_Operand *thisop )
{
Z_AttributeElement *attrptr ;
int cond1 = 0 ;
int cond2 = 0 ;
int numattrs ;
int i ;
numattrs = thisop->u.attributesPlusTerm->attributes->num_attributes ;
for ( i = 0 ; i < numattrs ; i++ )
{
attrptr = thisop->u.attributesPlusTerm->attributes->attributes[i] ;
if ( (*attrptr->attributeType == 1) &&
(*attrptr->value.numeric == 1016) )
cond1 = 1 ;
if ( (*attrptr->attributeType == 2) &&
(*attrptr->value.numeric == 102) )
cond2 = 1 ;
}
return (cond1 & cond2) ;
}
/*
** convert_simple2complex()
**
*/
void convert_simple2complex(ZebraHandle zh, Z_RPNStructure *rpnstruct )
{
Z_Complex *complex = NULL ;
Z_Operand *operand = rpnstruct->u.simple ;
if ( check_operand_attrs( operand ) )
{
complex = expand_query(zh, operand ) ;
if ( complex )
{
/*
** Everything is complete so replace the original
** operand with the newly built complex structure
** This is it ... no going back!!
*/
rpnstruct->which = Z_RPNStructure_complex ;
rpnstruct->u.complex = complex ;
}
}
}
/*
** walk_complex_query()
** recursively traverse the tree expanding any simple queries we find
*/
void walk_complex_query(ZebraHandle zh, Z_RPNStructure *rpnstruct )
{
if ( rpnstruct->which == Z_RPNStructure_simple )
{
convert_simple2complex(zh, rpnstruct ) ;
}
else
{
walk_complex_query(zh, rpnstruct->u.complex->s1 ) ;
walk_complex_query(zh, rpnstruct->u.complex->s2 ) ;
}
}
void zebra_livcode_transform(ZebraHandle zh, Z_RPNQuery *query)
{
/*
** Got a search request,
** 1. if it is a simple query, see if it suitable for expansion
** i.e. the attributes are of the form ...
** (type==1 && value==1016) && (type==2 && value==102)
** or
** 2. if it is complex, traverse the complex query tree and expand
** any simples simples as above
*/
#if LIV_CODE
Z_RPNStructure *rpnstruct = query->RPNStructure ;
if ( rpnstruct->which == Z_RPNStructure_simple )
{
convert_simple2complex(zh, rpnstruct ) ;
}
else if ( rpnstruct->which == Z_RPNStructure_complex )
{
walk_complex_query(zh, rpnstruct ) ;
}
#endif
}
struct rank_class_info {
int dummy;
};
struct rank_term_info {
int local_occur;
int global_occur;
int global_inv;
int rank_flag;
};
struct rank_set_info {
int last_pos;
int no_entries;
int no_rank_entries;
struct rank_term_info *entries;
};
static int log2_int (unsigned g)
{
int n = 0;
while ((g = g>>1))
n++;
return n;
}
/*
* create: Creates/Initialises this rank handler. This routine is
* called exactly once. The routine returns the class_handle.
*/
static void *create (ZebraHandle zh)
{
struct rank_class_info *ci = (struct rank_class_info *)
xmalloc (sizeof(*ci));
logf (LOG_DEBUG, "livrank create");
read_zrank_file(zh) ;
return ci;
}
/*
* destroy: Destroys this rank handler. This routine is called
* when the handler is no longer needed - i.e. when the server
* dies. The class_handle was previously returned by create.
*/
static void destroy (struct zebra_register *reg, void *class_handle)
{
struct rank_class_info *ci = (struct rank_class_info *) class_handle;
logf (LOG_DEBUG, "livrank destroy");
xfree (ci);
}
/*
* begin: Prepares beginning of "real" ranking. Called once for
* each result set. The returned handle is a "set handle" and
* will be used in each of the handlers below.
*/
static void *begin (struct zebra_register *reg, void *class_handle, RSET rset)
{
struct rank_set_info *si = (struct rank_set_info *) xmalloc (sizeof(*si));
int i;
logf (LOG_DEBUG, "livrank begin");
si->no_entries = rset->no_rset_terms;
si->no_rank_entries = 0;
si->entries = (struct rank_term_info *)
xmalloc (sizeof(*si->entries)*si->no_entries);
for (i = 0; i < si->no_entries; i++)
{
const char *flags = rset->rset_terms[i]->flags;
int g = rset->rset_terms[i]->nn;
const char *cp = strstr(flags, ",u=");
si->entries[i].rank_flag = 1;
if (cp)
{
char *t = search_for_rankstr(atoi(cp+3));
if (t)
si->entries[i].rank_flag = search_for_score(t) ;
}
if ( si->entries[i].rank_flag )
(si->no_rank_entries)++;
si->entries[i].local_occur = 0;
si->entries[i].global_occur = g;
si->entries[i].global_inv = 32 - log2_int (g);
logf (LOG_DEBUG, "-------- %d ------", 32 - log2_int (g));
}
return si;
}
/*
* end: Terminates ranking process. Called after a result set
* has been ranked.
*/
static void end (struct zebra_register *reg, void *set_handle)
{
struct rank_set_info *si = (struct rank_set_info *) set_handle;
logf (LOG_DEBUG, "livrank end");
xfree (si->entries);
xfree (si);
}
/*
* add: Called for each word occurence in a result set. This routine
* should be as fast as possible. This routine should "incrementally"
* update the score.
*/
static void add (void *set_handle, int seqno, int term_index)
{
struct rank_set_info *si = (struct rank_set_info *) set_handle;
logf (LOG_DEBUG, "rank-1 add seqno=%d term_index=%d", seqno, term_index);
si->last_pos = seqno;
si->entries[term_index].local_occur++;
}
/*
* calc: Called for each document in a result. This handler should
* produce a score based on previous call(s) to the add handler. The
* score should be between 0 and 1000. If score cannot be obtained
* -1 should be returned.
*/
static int calc (void *set_handle, int sysno)
{
int i, lo, divisor, score = 0;
struct rank_set_info *si = (struct rank_set_info *) set_handle;
logf (LOG_DEBUG, "livrank calc sysno=%d", sysno);
if (!si->no_rank_entries)
return -1;
for (i = 0; i < si->no_entries; i++)
{
score += si->entries[i].local_occur * si->entries[i].rank_flag ;
}
for (i = 0; i < si->no_entries; i++)
if (si->entries[i].rank_flag && (lo = si->entries[i].local_occur))
score += (8+log2_int (lo)) * si->entries[i].global_inv;
score *= 34;
divisor = si->no_rank_entries * (8+log2_int (si->last_pos/si->no_entries));
score = score / divisor;
if (score > 1000)
score = 1000;
for (i = 0; i < si->no_entries; i++)
si->entries[i].local_occur = 0;
return score;
}
/*
* Pseudo-meta code with sequence of calls as they occur in a
* server. Handlers are prefixed by --:
*
* server init
* -- create
* foreach search
* rank result set
* -- begin
* foreach record
* foreach word
* -- add
* -- calc
* -- end
* -- destroy
* server close
*/
static struct rank_control rank_control = {
"livrank",
create,
destroy,
begin,
end,
calc,
add,
};
struct rank_control *rankliv_class = &rank_control;
syntax highlighted by Code2HTML, v. 0.9.1