/*
  
The University of Liverpool

Modifications to Zebra 1.1 / YAZ 1.7 to enable ranking
by attribute weight.

Copyright (c) 2001-2002 The University of Liverpool.  All
rights reserved.

Licensed under the Academic Free License version 1.1.
http://opensource.org/licenses/academic.php

$Id: livcode.c,v 1.1 2003/03/26 16:41:48 adam Exp $

*/

#include <stdlib.h>
#include <stdio.h>
#ifdef WIN32
#include <process.h>
#else
#include <unistd.h>
#endif
#include <assert.h>

#include "index.h"
#include "zserver.h"

/*
** These functions/routines 
** 1. reads in and builds a linked list of rank attr/rank score pairs
** 2. expand a simple query into a paired list of complex/simple nodes.
*/

typedef struct rstype
{
    struct rstype *next_rsnode ;
    int rank ;
    int score ;
    char *rankstr ;
} rsnode, *refrsnode ;

refrsnode start_rsnode = NULL ;

/*
** Function/Routine prototypes
*/
static int        search_for_score( char *rankstr ) ;
static char      *search_for_rankstr( int rank ) ;
static int        search_for_rank( int rank ) ;
static refrsnode  set_rsnode( int rank, int score ) ;
static int        read_zrank_file(ZebraHandle zh) ;

static void       convert_simple2complex(ZebraHandle zh, Z_RPNStructure *rpnstruct ) ;
static void       walk_complex_query(ZebraHandle zh, Z_RPNStructure *rpnstruct ) ;
static Z_Complex *expand_query(ZebraHandle zh, Z_Operand *thisop ) ;
static Z_Complex *set_1complex_1operand( Z_Complex *comp,Z_Operand *simp ) ;
static Z_Complex *set_2operands( Z_Operand *sim1,Z_Operand *sim2 ) ;
static Z_Operand *set_operand( Z_Operand *thisop, int newattr ) ;
static int        check_operand_attrs( Z_Operand *thisop ) ;

/*
** search_for_score()
** given a rank-string traverse down the linked list ;
** return its score if found otherwise return -1.
*/
int search_for_score( char *rankstr )
{
    refrsnode node = start_rsnode ;
    int rank ;
  
    if ( sscanf( rankstr,"%d",&rank ) )
    {
        while ( node )
        {
            if ( node->rank == rank ) return node->score ;
            node = node->next_rsnode ;
        }
    }
    return -1 ;
}

/*
** search_for_rankstr()
** given a rank traverse down the linked list ; 
** return its string if found otherwise return NULL.
*/
char *search_for_rankstr( int rank )
{
    refrsnode node = start_rsnode ;
  
    while ( node )
    {
        if ( node->rank == rank ) return node->rankstr ;
        node = node->next_rsnode ;
    }
    return "rank" ;
}

/*
** search_for_rank()
** given a rank traverse down the linked list ;
** return 1 if found otherwise return 0.
*/
int search_for_rank( int rank )
{
    refrsnode node = start_rsnode ;

    while ( node )
    {
        if ( node->rank == rank ) return 1 ;
        node = node->next_rsnode ;
    }
    return 0 ;
}

/*
** set_rsnode()
** given a rank and a score, build the rest of the rsnode structure.
*/
refrsnode set_rsnode( int rank, int score )
{
#define BUFFMAX 128
    refrsnode node = (refrsnode)malloc( sizeof(rsnode) ) ;
    char buff[BUFFMAX] ;

    node->next_rsnode = NULL  ;
    node->rank        = rank  ;
    node->score       = score ;

    sprintf( buff,"%d",rank ) ;
    node->rankstr     = (char *)malloc( strlen(buff)+1 ) ;
    strcpy( node->rankstr, buff ) ;

    return node ;
}

/*
** read_zrank_file(zh)
** read in the rankfile and build the rank/score linked list ; 
** return 0 : can't open the zebra config. file
** return 0 : can't find the rankfile entry in the zebra config. file
** return 0 : can't open the rankfile itself
** return the number of distinct ranks read in.
*/
int read_zrank_file(ZebraHandle zh)
{
#define LINEMAX 256
    char line[ LINEMAX ] ;
    char rname[ LINEMAX ] ;
    char *lineptr ;
    FILE *ifd ;
    int rank = 0 ;
    int score = 0 ;
    int numranks = 0 ;
 
    /*
    ** open the zebra configuration file and look for the "rankfile:"
    ** entry which contains the path/name of the rankfile
    */
    
    const char *rankfile = res_get_def(zh->res, "rankfile", 0);
    const char *profilePath = res_get_def(zh->res, "profilePath",
                                          DEFAULT_PROFILE_PATH);

    if (!rankfile)
    {
        yaz_log(LOG_LOG, "rankfile entry not found in config file" ) ;
        return 0 ;
    }
    ifd = yaz_path_fopen(profilePath, rankfile, "r" ) ;
    if ( ifd )
    {
        while ( (lineptr = fgets( line,LINEMAX,ifd )) )
        {
            if ( sscanf( lineptr,"rankfile: %s", rname ) == 1 )
                rankfile  = rname ;
        }

        /*
        ** open the rankfile and read the rank/score pairs
        ** ignore 1016 
        ** ignore duplicate ranks 
        ** ignore ranks without +ve scores
        */
        if ( rankfile )
        { 
            if ( !(ifd = fopen( rankfile, "r" )) )
            {
                logf( LOG_LOG, "unable to open rankfile %s",rankfile ) ;
                return 0;
            }
     
            while ( (lineptr = fgets( line,LINEMAX,ifd )) )
            {
                sscanf( lineptr,"%d : %d", &rank,&score ) ;
                if ( ( score > 0 ) && ( rank != 1016 ) )
                {
                    refrsnode new_rsnode ;
    
                    if ( search_for_rank( rank ) == 0 )
                    {
                        new_rsnode              = set_rsnode( rank,score ) ;
                        new_rsnode->next_rsnode = start_rsnode ;
                        start_rsnode            = new_rsnode ;
                        numranks++ ;
                    }
                }
            }
        }
        else 
        {
            yaz_log(LOG_WARN|LOG_ERRNO, "unable to open config file (%s)",
                    rankfile);
        }
    }
    return numranks ;
}

/*
** set_operand() 
** build an operand "node" - hav to make a complete copy of thisop and 
** then insert newattr in the appropriate place
** 
*/
Z_Operand *set_operand( Z_Operand *thisop, int newattr )
{
    Z_Operand            *operand ;
    Z_AttributesPlusTerm *attributesplusterm ;
    Z_AttributeList      *attributelist ;
    Z_AttributeElement   *attributeelement ;
    Z_AttributeElement   *attrptr ;
    Z_AttributeElement   **attrptrptr ;
    Z_Term               *term ;
    Odr_oct              *general ;
    int i ;

    operand            = (Z_Operand *) 
                         malloc( sizeof( Z_Operand ) ) ;
    attributesplusterm = (Z_AttributesPlusTerm *)
                         malloc( sizeof( Z_AttributesPlusTerm ) ) ;
    attributelist      = (Z_AttributeList *)
                         malloc( sizeof( Z_AttributeList ) ) ;
    attributeelement   = (Z_AttributeElement *)
                         malloc( sizeof( Z_AttributeElement ) ) ;
    term               = (Z_Term *)
                         malloc( sizeof( Z_Term ) ) ;
    general            = (Odr_oct *)
                         malloc( sizeof( Odr_oct ) ) ;

    operand->which                 = Z_Operand_APT ;
    operand->u.attributesPlusTerm  = attributesplusterm ;

    attributesplusterm->attributes = attributelist ;
    attributesplusterm->term       = term ;

    attributelist->num_attributes  = thisop->u.attributesPlusTerm->
                                     attributes->num_attributes ;

    attrptr = (Z_AttributeElement *) malloc( sizeof(Z_AttributeElement) *
                                             attributelist->num_attributes ) ;
    attrptrptr = (Z_AttributeElement **) malloc( sizeof(Z_AttributeElement) *
                                           attributelist->num_attributes ) ; 

    attributelist->attributes = attrptrptr ;

    for ( i = 0 ; i < attributelist->num_attributes ; i++ )
    {
        *attrptr = *thisop->u.attributesPlusTerm->attributes->attributes[i] ;

         attrptr->attributeType  = (int *)malloc( sizeof(int *) ) ;
        *attrptr->attributeType  = *thisop->u.attributesPlusTerm->attributes->
                                    attributes[i]->attributeType;

         attrptr->value.numeric  = (int *)malloc( sizeof(int *) ) ;
        *attrptr->value.numeric  = *thisop->u.attributesPlusTerm->attributes->
                                    attributes[i]->value.numeric;

        if ( (*attrptr->attributeType == 1) && 
             (*attrptr->value.numeric == 1016) )
        {
            *attrptr->value.numeric = newattr ;
        }
        *attrptrptr++ = attrptr++ ;
    }

    term->which     = Z_Term_general ;
    term->u.general = general ;

    general->len  = thisop->u.attributesPlusTerm->term->u.general->len ;
    general->size = thisop->u.attributesPlusTerm->term->u.general->size ;
    general->buf  = malloc( general->size ) ;
    strcpy( general->buf,
            thisop->u.attributesPlusTerm->term->u.general->buf ) ;

    return operand ;
}

/*
** set_2operands()
** build a complex "node" with two (simple) operand "nodes" as branches
*/
Z_Complex *set_2operands( Z_Operand *sim1,Z_Operand *sim2 )
{
    Z_Complex *top        ;
    Z_RPNStructure *s1    ;
    Z_RPNStructure *s2    ;
    Z_Operator *roperator ;

    top       = (Z_Complex *)     malloc( sizeof( Z_Complex      ) ) ;
    s1        = (Z_RPNStructure *)malloc( sizeof( Z_RPNStructure ) ) ;
    s2        = (Z_RPNStructure *)malloc( sizeof( Z_RPNStructure ) ) ;
    roperator = (Z_Operator *)    malloc( sizeof( Z_Operator     ) ) ;

    top->roperator          = roperator     ;
    top->roperator->which   = Z_Operator_or ;
    top->roperator->u.op_or = odr_nullval() ;

    top->s1                 = s1                    ;
    top->s1->which          = Z_RPNStructure_simple ;
    top->s1->u.simple       = sim1                  ;

    top->s2                 = s2                    ;
    top->s2->which          = Z_RPNStructure_simple ;
    top->s2->u.simple       = sim2                  ;

    return top ;
}

/*
** set_1complex_1operand()
** build a complex "node" with a complex "node" branch and an 
** operand "node" branch
*/
Z_Complex *set_1complex_1operand( Z_Complex *comp,Z_Operand *simp )
{
    Z_Complex *top        ;
    Z_RPNStructure *s1    ;
    Z_RPNStructure *s2    ;
    Z_Operator *roperator ;

    top       = (Z_Complex *)     malloc( sizeof( Z_Complex      ) ) ;
    s1        = (Z_RPNStructure *)malloc( sizeof( Z_RPNStructure ) ) ;
    s2        = (Z_RPNStructure *)malloc( sizeof( Z_RPNStructure ) ) ;
    roperator = (Z_Operator *)    malloc( sizeof( Z_Operator     ) ) ;

    top->roperator          = roperator     ;
    top->roperator->which   = Z_Operator_or ;
    top->roperator->u.op_or = odr_nullval() ;

    top->s1                 = s1                     ;
    top->s1->which          = Z_RPNStructure_complex ;
    top->s1->u.complex      = comp                   ;

    top->s2                 = s2                     ;
    top->s2->which          = Z_RPNStructure_simple  ;
    top->s2->u.simple       = simp                   ;

    return top ;
}

/*
** expand_query()
** expand a simple query into a number of complex queries
*/
Z_Complex *expand_query(ZebraHandle zh, Z_Operand *thisop )
{
    Z_Complex *top ;
    int numattrs = 0 ;

    /*
    ** start_rsnode will be set if we have already read the rankfile 
    ** so don't bother again but we need to know the number of attributes
    ** in the linked list so traverse it again to find out how many.
    */
    if ( start_rsnode )
    {
        refrsnode node = start_rsnode ;
        while ( node )
        {
            numattrs++ ;
            node = node->next_rsnode ;
        }
    } 

    /*
    ** only expand the query if there are 2 or more attributes 
    */
    if ( numattrs >= 2 )
    {
        refrsnode node = start_rsnode ;
        int attr1 ;
        int attr2 ;

        attr1 = node->rank ; node = node->next_rsnode ;
        attr2 = node->rank ; node = node->next_rsnode ;

        /*
        ** this is the special case and has to be done first because the 
        ** last complex node in the linear list has two simple nodes whereas 
        ** all the others have a complex and a simple.
        */
        top = set_2operands( set_operand( thisop,attr1 ),
                             set_operand( thisop,attr2 ) ) ;

        /*
        ** do the rest as complex/simple pairs
        */
        while ( node )
        {
            attr1 = node->rank ; node = node->next_rsnode ;
            top = set_1complex_1operand( top,set_operand( thisop,attr1 ) ) ;
        }
        /*
        ** finally add the 1016 rank attribute at the top of the tree
        */
        top = set_1complex_1operand( top,set_operand( thisop,1016 ) ) ;

        return top ;
    }
    else return NULL ;
}

/*
** check_operand_attrs()
** loop through the attributes of a particular operand 
** return 1 if (type==1 && value==1016) && (type==2 && value==102) 
** otherwise return 0
*/
int check_operand_attrs( Z_Operand *thisop ) 
{
    Z_AttributeElement *attrptr ;
    int cond1 = 0 ;
    int cond2 = 0 ;
    int numattrs ;
    int i ;

    numattrs = thisop->u.attributesPlusTerm->attributes->num_attributes ;

    for ( i = 0 ; i < numattrs ; i++ )
    {
        attrptr = thisop->u.attributesPlusTerm->attributes->attributes[i] ;

        if ( (*attrptr->attributeType == 1) && 
             (*attrptr->value.numeric == 1016) )
            cond1 = 1 ;

        if ( (*attrptr->attributeType == 2) && 
             (*attrptr->value.numeric == 102) )
            cond2 = 1 ;
    }

    return (cond1 & cond2) ;
}

/*
** convert_simple2complex()
** 
*/
void convert_simple2complex(ZebraHandle zh, Z_RPNStructure *rpnstruct )
{
    Z_Complex *complex = NULL ;                                         
    Z_Operand *operand = rpnstruct->u.simple ;                          

    if ( check_operand_attrs( operand ) )
    {
        complex = expand_query(zh, operand ) ;

        if ( complex )
        {
            /*
            ** Everything is complete so replace the original
            ** operand with the newly built complex structure
            ** This is it ... no going back!!
            */
            rpnstruct->which     = Z_RPNStructure_complex ;
            rpnstruct->u.complex = complex ;
        }
    }                                                                       
}

/*
** walk_complex_query()
** recursively traverse the tree expanding any simple queries we find
*/
void walk_complex_query(ZebraHandle zh, Z_RPNStructure *rpnstruct )
{
    if ( rpnstruct->which == Z_RPNStructure_simple )
    {
        convert_simple2complex(zh, rpnstruct ) ;
    }
    else
    {
        walk_complex_query(zh, rpnstruct->u.complex->s1 ) ;
        walk_complex_query(zh, rpnstruct->u.complex->s2 ) ;
    }
}

void zebra_livcode_transform(ZebraHandle zh, Z_RPNQuery *query)
{
    /*
    ** Got a search request,
    ** 1. if it is a simple query, see if it suitable for expansion
    **    i.e. the attributes are of the form ...
    **    (type==1 && value==1016) && (type==2 && value==102)
    ** or
    ** 2. if it is complex, traverse the complex query tree and expand
    **    any simples simples as above
    */
#if LIV_CODE
    Z_RPNStructure *rpnstruct = query->RPNStructure ;
    
    if ( rpnstruct->which == Z_RPNStructure_simple )
    {
        convert_simple2complex(zh, rpnstruct ) ;
    }
    else if ( rpnstruct->which == Z_RPNStructure_complex )
    {
        walk_complex_query(zh, rpnstruct ) ;
    }
#endif
}


struct rank_class_info {
    int dummy;
};

struct rank_term_info {
    int local_occur;
    int global_occur;
    int global_inv;
    int rank_flag;
};

struct rank_set_info {
    int last_pos;
    int no_entries;
    int no_rank_entries;
    struct rank_term_info *entries;
};

static int log2_int (unsigned g)
{
    int n = 0;
    while ((g = g>>1))
	n++;
    return n;
}

/*
 * create: Creates/Initialises this rank handler. This routine is 
 *  called exactly once. The routine returns the class_handle.
 */
static void *create (ZebraHandle zh)
{
    struct rank_class_info *ci = (struct rank_class_info *)
	xmalloc (sizeof(*ci));

    logf (LOG_DEBUG, "livrank create");

    read_zrank_file(zh) ;

    return ci;
}

/*
 * destroy: Destroys this rank handler. This routine is called
 *  when the handler is no longer needed - i.e. when the server
 *  dies. The class_handle was previously returned by create.
 */
static void destroy (struct zebra_register *reg, void *class_handle)
{
    struct rank_class_info *ci = (struct rank_class_info *) class_handle;

    logf (LOG_DEBUG, "livrank destroy");
    xfree (ci);
}


/*
 * begin: Prepares beginning of "real" ranking. Called once for
 *  each result set. The returned handle is a "set handle" and
 *  will be used in each of the handlers below.
 */
static void *begin (struct zebra_register *reg, void *class_handle, RSET rset)
{
    struct rank_set_info *si = (struct rank_set_info *) xmalloc (sizeof(*si));
    int i;

    logf (LOG_DEBUG, "livrank begin");
    si->no_entries = rset->no_rset_terms;
    si->no_rank_entries = 0;
    si->entries = (struct rank_term_info *)
	xmalloc (sizeof(*si->entries)*si->no_entries);
    for (i = 0; i < si->no_entries; i++)
    {
        const char *flags = rset->rset_terms[i]->flags;
	int g = rset->rset_terms[i]->nn;
        const char *cp = strstr(flags, ",u=");

        si->entries[i].rank_flag = 1;
        if (cp)
        {
            char *t = search_for_rankstr(atoi(cp+3));
            if (t)
                si->entries[i].rank_flag = search_for_score(t) ;
        }
        if ( si->entries[i].rank_flag )
            (si->no_rank_entries)++;

	si->entries[i].local_occur = 0;
	si->entries[i].global_occur = g;
	si->entries[i].global_inv = 32 - log2_int (g);
	logf (LOG_DEBUG, "-------- %d ------", 32 - log2_int (g));
    }
    return si;
}

/*
 * end: Terminates ranking process. Called after a result set
 *  has been ranked.
 */
static void end (struct zebra_register *reg, void *set_handle)
{
    struct rank_set_info *si = (struct rank_set_info *) set_handle;
    logf (LOG_DEBUG, "livrank end");
    xfree (si->entries);
    xfree (si);
}

/*
 * add: Called for each word occurence in a result set. This routine
 *  should be as fast as possible. This routine should "incrementally"
 *  update the score.
 */
static void add (void *set_handle, int seqno, int term_index)
{
    struct rank_set_info *si = (struct rank_set_info *) set_handle;
    logf (LOG_DEBUG, "rank-1 add seqno=%d term_index=%d", seqno, term_index);
    si->last_pos = seqno;
    si->entries[term_index].local_occur++;
}

/*
 * calc: Called for each document in a result. This handler should 
 *  produce a score based on previous call(s) to the add handler. The
 *  score should be between 0 and 1000. If score cannot be obtained
 *  -1 should be returned.
 */
static int calc (void *set_handle, int sysno)
{
    int i, lo, divisor, score = 0;
    struct rank_set_info *si = (struct rank_set_info *) set_handle;

    logf (LOG_DEBUG, "livrank calc sysno=%d", sysno);

    if (!si->no_rank_entries)
	return -1;
    for (i = 0; i < si->no_entries; i++)
    {
        score += si->entries[i].local_occur * si->entries[i].rank_flag ;
    }
    for (i = 0; i < si->no_entries; i++)
	if (si->entries[i].rank_flag && (lo = si->entries[i].local_occur))
	    score += (8+log2_int (lo)) * si->entries[i].global_inv;
    score *= 34;
    divisor = si->no_rank_entries * (8+log2_int (si->last_pos/si->no_entries));
    score = score / divisor;
    if (score > 1000)
	score = 1000;
    for (i = 0; i < si->no_entries; i++)
	si->entries[i].local_occur = 0;
    return score;
}

/*
 * Pseudo-meta code with sequence of calls as they occur in a
 * server. Handlers are prefixed by --:
 *
 *     server init
 *     -- create
 *     foreach search
 *        rank result set
 *        -- begin
 *        foreach record
 *           foreach word
 *              -- add
 *           -- calc
 *        -- end
 *     -- destroy
 *     server close
 */

static struct rank_control rank_control = {
    "livrank",
    create,
    destroy,
    begin,
    end,
    calc,
    add,
};
 
struct rank_control *rankliv_class = &rank_control;


syntax highlighted by Code2HTML, v. 0.9.1