/* $Id: zsets.c,v 1.49.2.3 2005/01/21 11:35:49 adam Exp $
   Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005
   Index Data Aps

This file is part of the Zebra server.

Zebra is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 2, or (at your option) any later
version.

Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received a copy of the GNU General Public License
along with Zebra; see the file LICENSE.zebra.  If not, write to the
Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
02111-1307, USA.
*/


#include <stdio.h>
#include <assert.h>
#ifdef WIN32
#include <io.h>
#else
#include <unistd.h>
#endif

#include "index.h"
#include <rstemp.h>

#define SORT_IDX_ENTRYSIZE 64
#define ZSET_SORT_MAX_LEVEL 3

struct zebra_set_term_entry {
    int reg_type;
    char *db;
    int set;
    int use;
    char *term;
};
struct zebra_set {
    char *name;
    RSET rset;
    NMEM nmem;
    int hits;
    int num_bases;
    char **basenames;
    Z_RPNQuery *rpn;
    struct zset_sort_info *sort_info;
    struct zebra_set_term_entry *term_entries;
    int term_entries_max;
    struct zebra_set *next;
    int locked;

    int cache_position;  /* last position */
    RSFD cache_rfd;      /* rfd (NULL if not existing) */
    int cache_psysno;
};

struct zset_sort_entry {
    int sysno;
    int score;
    char buf[ZSET_SORT_MAX_LEVEL][SORT_IDX_ENTRYSIZE];
};

struct zset_sort_info {
    int max_entries;
    int num_entries;
    struct zset_sort_entry *all_entries;
    struct zset_sort_entry **entries;
};

ZebraSet resultSetAddRPN (ZebraHandle zh, NMEM m,
			  Z_RPNQuery *rpn, int num_bases,
                          char **basenames, 
			  const char *setname)
{
    ZebraSet zebraSet;
    int i;

    zh->errCode = 0;
    zh->errString = NULL;
    zh->hits = 0;

    zebraSet = resultSetAdd (zh, setname, 1);
    if (!zebraSet)
	return 0;
    zebraSet->locked = 1;
    zebraSet->rpn = 0;
    zebraSet->nmem = m;
    
    zebraSet->num_bases = num_bases;
    zebraSet->basenames = 
        nmem_malloc (zebraSet->nmem, num_bases * sizeof(*zebraSet->basenames));
    for (i = 0; i<num_bases; i++)
        zebraSet->basenames[i] = nmem_strdup (zebraSet->nmem, basenames[i]);


    zebraSet->rset = rpn_search (zh, zebraSet->nmem, rpn,
                                 zebraSet->num_bases,
		                 zebraSet->basenames, zebraSet->name,
				 zebraSet);
    zh->hits = zebraSet->hits;
    if (zebraSet->rset)
        zebraSet->rpn = rpn;
    zebraSet->locked = 0;
    return zebraSet;
}

void resultSetAddTerm (ZebraHandle zh, ZebraSet s, int reg_type,
		       const char *db, int set,
		       int use, const char *term)
{
    if (!s->nmem)
	s->nmem = nmem_create ();
    if (!s->term_entries)
    {
	int i;
	s->term_entries_max = 1000;
	s->term_entries =
	    nmem_malloc (s->nmem, s->term_entries_max * 
			 sizeof(*s->term_entries));
	for (i = 0; i < s->term_entries_max; i++)
	    s->term_entries[i].term = 0;
    }
    if (s->hits < s->term_entries_max)
    {
	s->term_entries[s->hits].reg_type = reg_type;
	s->term_entries[s->hits].db = nmem_strdup (s->nmem, db);
	s->term_entries[s->hits].set = set;
	s->term_entries[s->hits].use = use;
	s->term_entries[s->hits].term = nmem_strdup (s->nmem, term);
    }
    (s->hits)++;
}


int zebra_resultSetTerms (ZebraHandle zh, const char *setname, 
                          int no, int *count, 
                          int *type, char *out, size_t *len)
{
    ZebraSet s = resultSetGet (zh, setname);
    int no_max = 0;

    if (count)
        *count = 0;
    if (!s || !s->rset)
        return 0;
    no_max = s->rset->no_rset_terms;
    if (no < 0 || no >= no_max)
        return 0;
    if (count)
        *count = s->rset->rset_terms[no]->count;
    if (type)
        *type = s->rset->rset_terms[no]->type;
    
    if (out)
    {
        char *inbuf = s->rset->rset_terms[no]->name;
        size_t inleft = strlen(inbuf);
        size_t outleft = *len - 1;
	int converted = 0;

        if (zh->iconv_from_utf8 != 0)
        {
            char *outbuf = out;
            size_t ret;
            
            ret = yaz_iconv(zh->iconv_from_utf8, &inbuf, &inleft,
                        &outbuf, &outleft);
            if (ret == (size_t)(-1))
                *len = 0;
            else
                *len = outbuf - out;
	    converted = 1;
        }
        if (!converted)
        {
            if (inleft > outleft)
                inleft = outleft;
            *len = inleft;
            memcpy (out, inbuf, *len);
        }
        out[*len] = 0;
    }
    return no_max;
}


ZebraSet resultSetAdd (ZebraHandle zh, const char *name, int ov)
{
    ZebraSet s;
    int i;

    for (s = zh->sets; s; s = s->next)
        if (!strcmp (s->name, name))
	    break;
    if (s)
    {
	yaz_log (LOG_DEBUG, "updating result set %s", name);
	if (!ov || s->locked)
	    return NULL;
	if (s->rset)
	{
	    if (s->cache_rfd)
		rset_close(s->rset, s->cache_rfd);
	    rset_delete (s->rset);
	}
	if (s->nmem)
	    nmem_destroy (s->nmem);
    }
    else
    {
	const char *sort_max_str = zebra_get_resource(zh, "sortmax", "1000");

	yaz_log (LOG_DEBUG, "adding result set %s", name);
	s = (ZebraSet) xmalloc (sizeof(*s));
	s->next = zh->sets;
	zh->sets = s;
	s->name = (char *) xmalloc (strlen(name)+1);
	strcpy (s->name, name);

	s->sort_info = (struct zset_sort_info *)
	    xmalloc (sizeof(*s->sort_info));
	s->sort_info->max_entries = atoi(sort_max_str);
	if (s->sort_info->max_entries < 2)
	    s->sort_info->max_entries = 2;

	s->sort_info->entries = (struct zset_sort_entry **)
	    xmalloc (sizeof(*s->sort_info->entries) *
		     s->sort_info->max_entries);
	s->sort_info->all_entries = (struct zset_sort_entry *)
	    xmalloc (sizeof(*s->sort_info->all_entries) *
		     s->sort_info->max_entries);
	for (i = 0; i < s->sort_info->max_entries; i++)
	    s->sort_info->entries[i] = s->sort_info->all_entries + i;
    }
    s->locked = 0;
    s->term_entries = 0;
    s->hits = 0;
    s->rset = 0;
    s->nmem = 0;
    s->rpn = 0;
    s->cache_position = 0;
    s->cache_rfd = 0;
    return s;
}

ZebraSet resultSetGet (ZebraHandle zh, const char *name)
{
    ZebraSet s;

    for (s = zh->sets; s; s = s->next)
        if (!strcmp (s->name, name))
        {
            if (!s->term_entries && !s->rset && s->rpn)
            {
                NMEM nmem = nmem_create ();
                yaz_log (LOG_LOG, "research %s", name);
                s->rset =
                    rpn_search (zh, nmem, s->rpn, s->num_bases,
				s->basenames, s->name, s);
                nmem_destroy (nmem);
            }
            return s;
        }
    return NULL;
}

void resultSetInvalidate (ZebraHandle zh)
{
    ZebraSet s = zh->sets;
    
    for (; s; s = s->next)
    {
        if (s->rset)
	{
	    if (s->cache_rfd)
		rset_close(s->rset, s->cache_rfd);
            rset_delete (s->rset);
	}
        s->rset = 0;
	s->cache_rfd = 0;
	s->cache_position = 0;
    }
}

void resultSetDestroy (ZebraHandle zh, int num, char **names,int *statuses)
{
    ZebraSet * ss = &zh->sets;
    int i;
    
    if (statuses)
	for (i = 0; i<num; i++)
	    statuses[i] = Z_DeleteStatus_resultSetDidNotExist;
    zh->errCode = 0;
    zh->errString = NULL;
    while (*ss)
    {
	int i = -1;
	ZebraSet s = *ss;
	if (num >= 0)
	{
	    for (i = 0; i<num; i++)
		if (!strcmp (s->name, names[i]))
		{
		    if (statuses)
			statuses[i] = Z_DeleteStatus_success;
		    i = -1;
		    break;
		}
	}
	if (i < 0)
	{
	    *ss = s->next;
	    
	    xfree (s->sort_info->all_entries);
	    xfree (s->sort_info->entries);
	    xfree (s->sort_info);
	    
	    if (s->nmem)
		nmem_destroy (s->nmem);
	    if (s->rset)
	    {
		if (s->cache_rfd)
		    rset_close (s->rset, s->cache_rfd);
		rset_delete (s->rset);
	    }
	    xfree (s->name);
	    xfree (s);
	}
	else
	    ss = &s->next;
    }
}

ZebraPosSet zebraPosSetCreate (ZebraHandle zh, const char *name, 
			       int num, int *positions)
{
    ZebraSet sset;
    ZebraPosSet sr = 0;
    RSET rset;
    int i;
    struct zset_sort_info *sort_info;

    if (!(sset = resultSetGet (zh, name)))
        return NULL;
    if (!(rset = sset->rset))
    {
	if (!sset->term_entries)
	    return 0;
	sr = (ZebraPosSet) xmalloc (sizeof(*sr) * num);
	for (i = 0; i<num; i++)
	{
	    sr[i].sysno = 0;
	    sr[i].score = -1;
	    sr[i].term = 0;
	    sr[i].db = 0;

	    if (positions[i] <= sset->term_entries_max)
	    {
		sr[i].term = sset->term_entries[positions[i]-1].term;
		sr[i].db = sset->term_entries[positions[i]-1].db;
	    }
	}
    }
    else
    {
	sr = (ZebraPosSet) xmalloc (sizeof(*sr) * num);
	for (i = 0; i<num; i++)
	{
	    sr[i].sysno = 0;
	    sr[i].score = -1;
	    sr[i].term = 0;
	    sr[i].db = 0;
	}
	sort_info = sset->sort_info;
	if (sort_info)
	{
	    int position;
	    
	    for (i = 0; i<num; i++)
	    {
		position = positions[i];
		if (position > 0 && position <= sort_info->num_entries)
		{
		    yaz_log (LOG_DEBUG, "got pos=%d (sorted)", position);
		    sr[i].sysno = sort_info->entries[position-1]->sysno;
		    sr[i].score = sort_info->entries[position-1]->score;
		}
	    }
	}
	/* did we really get all entries using sort ? */
	for (i = 0; i<num; i++)
	{
	    if (!sr[i].sysno)
		break;
	}
	if (i < num) /* nope, get the rest, unsorted - sorry */
	{
	    int position = 0;
	    int num_i = 0;
	    int psysno = 0;
	    int term_index;
	    RSFD rfd;
	    struct it_key key;
	    
	    if (sort_info)
		position = sort_info->num_entries;
	    while (num_i < num && positions[num_i] < position)
		num_i++;
	    
	    if (sset->cache_rfd && 
		num_i < num && positions[num_i] > sset->cache_position)
	    {
		position = sset->cache_position;
		rfd = sset->cache_rfd;
		psysno = sset->cache_psysno;
	    } 
	    else
	    {
		if (sset->cache_rfd)
		    rset_close(rset, sset->cache_rfd);
		rfd = rset_open (rset, RSETF_READ);
	    }
	    while (num_i < num && rset_read (rset, rfd, &key, &term_index))
	    {
		if (key.sysno != psysno)
		{
		    psysno = key.sysno;
		    if (sort_info)
		    {
			/* determine we alreay have this in our set */
			for (i = sort_info->num_entries; --i >= 0; )
			    if (psysno == sort_info->entries[i]->sysno)
				break;
			if (i >= 0)
			    continue;
		    }
		    position++;
		    assert (num_i < num);
		    if (position == positions[num_i])
		    {
			sr[num_i].sysno = psysno;
			yaz_log (LOG_DEBUG, "got pos=%d (unsorted)", position);
			sr[num_i].score = -1;
			num_i++;
		    }
		}
	    }
	    sset->cache_position = position;
	    sset->cache_psysno = psysno;
	    sset->cache_rfd = rfd;
	}
    }
    return sr;
}

void zebraPosSetDestroy (ZebraHandle zh, ZebraPosSet records, int num)
{
    xfree (records);
}

struct sortKeyInfo {
    int relation;
    int attrUse;
    int numerical;
};

void resultSetInsertSort (ZebraHandle zh, ZebraSet sset,
			  struct sortKeyInfo *criteria, int num_criteria,
			  int sysno)
{
    struct zset_sort_entry this_entry;
    struct zset_sort_entry *new_entry = NULL;
    struct zset_sort_info *sort_info = sset->sort_info;
    int i, j;

    sortIdx_sysno (zh->reg->sortIdx, sysno);
    for (i = 0; i<num_criteria; i++)
    {
	sortIdx_type (zh->reg->sortIdx, criteria[i].attrUse);
	sortIdx_read (zh->reg->sortIdx, this_entry.buf[i]);
    }
    i = sort_info->num_entries;
    while (--i >= 0)
    {
	int rel = 0;
	for (j = 0; j<num_criteria; j++)
	{
            if (criteria[j].numerical)
            {
                double diff = atof(this_entry.buf[j]) -
                              atof(sort_info->entries[i]->buf[j]);
                rel = 0;
                if (diff > 0.0)
                    rel = 1;
                else if (diff < 0.0)
                    rel = -1;
            }
            else
            {
	        rel = memcmp (this_entry.buf[j], sort_info->entries[i]->buf[j],
			  SORT_IDX_ENTRYSIZE);
            }
	    if (rel)
		break;
	}	
	if (!rel)
	    break;
	if (criteria[j].relation == 'A')
        {
	    if (rel > 0)
	        break;
	}
        else if (criteria[j].relation == 'D')
	{
	    if (rel < 0)
		break;
	}
    }
    ++i;
    j = sort_info->max_entries;
    if (i == j)
	return;

    if (sort_info->num_entries == j)
	--j;
    else
	j = (sort_info->num_entries)++;
    new_entry = sort_info->entries[j];
    while (j != i)
    {
	sort_info->entries[j] = sort_info->entries[j-1];
	--j;
    }
    sort_info->entries[i] = new_entry;
    assert (new_entry);
    for (i = 0; i<num_criteria; i++)
	memcpy (new_entry->buf[i], this_entry.buf[i], SORT_IDX_ENTRYSIZE);
    new_entry->sysno = sysno;
    new_entry->score = -1;
}

void resultSetInsertRank(ZebraHandle zh, struct zset_sort_info *sort_info,
			 int sysno, int score, int relation)
{
    struct zset_sort_entry *new_entry = NULL;
    int i, j;

    i = sort_info->num_entries;
    while (--i >= 0)
    {
	int rel = 0;

	rel = score - sort_info->entries[i]->score;

	if (relation == 'D')
	{
	    if (rel >= 0)
		break;
	}
	else if (relation == 'A')
	{
	    if (rel <= 0)
		break;
	}
    }
    ++i;
    j = sort_info->max_entries;
    if (i == j)
	return;

    if (sort_info->num_entries == j)
	--j;
    else
	j = (sort_info->num_entries)++;
    
    new_entry = sort_info->entries[j];
    while (j != i)
    {
	sort_info->entries[j] = sort_info->entries[j-1];
	--j;
    }
    sort_info->entries[i] = new_entry;
    assert (new_entry);
    new_entry->sysno = sysno;
    new_entry->score = score;
}

void resultSetSort (ZebraHandle zh, NMEM nmem,
		    int num_input_setnames, const char **input_setnames,
		    const char *output_setname,
		    Z_SortKeySpecList *sort_sequence, int *sort_status)
{
    ZebraSet sset;
    RSET rset;

    if (num_input_setnames == 0)
    {
	zh->errCode = 208;
	return ;
    }
    if (num_input_setnames > 1)
    {
	zh->errCode = 230;
	return;
    }
    yaz_log (LOG_DEBUG, "result set sort input=%s output=%s",
	  *input_setnames, output_setname);
    sset = resultSetGet (zh, input_setnames[0]);
    if (!sset)
    {
	zh->errCode = 30;
	zh->errString = nmem_strdup (nmem, input_setnames[0]);
	return;
    }
    if (!(rset = sset->rset))
    {
	zh->errCode = 30;
	zh->errString = nmem_strdup (nmem, input_setnames[0]);
        return;
    }
    if (strcmp (output_setname, input_setnames[0]))
    {
	rset = rset_dup (rset);
	sset = resultSetAdd (zh, output_setname, 1);
        sset->rset = rset;
    }
    resultSetSortSingle (zh, nmem, sset, rset, sort_sequence, sort_status);
}

void resultSetSortSingle (ZebraHandle zh, NMEM nmem,
			  ZebraSet sset, RSET rset,
			  Z_SortKeySpecList *sort_sequence, int *sort_status)
{
    int kno = 0;
    int i, psysno = 0;
    struct it_key key;
    struct sortKeyInfo sort_criteria[3];
    int num_criteria;
    int term_index;
    RSFD rfd;

    sset->sort_info->num_entries = 0;

    sset->hits = 0;
    num_criteria = sort_sequence->num_specs;
    if (num_criteria > 3)
	num_criteria = 3;
    for (i = 0; i < num_criteria; i++)
    {
	Z_SortKeySpec *sks = sort_sequence->specs[i];
	Z_SortKey *sk;

	if (*sks->sortRelation == Z_SortKeySpec_ascending)
	    sort_criteria[i].relation = 'A';
	else if (*sks->sortRelation == Z_SortKeySpec_descending)
	    sort_criteria[i].relation = 'D';
	else
	{
	    zh->errCode = 214;
	    return;
	}
	if (sks->sortElement->which == Z_SortElement_databaseSpecific)
	{
	    zh->errCode = 210;
	    return;
	}
	else if (sks->sortElement->which != Z_SortElement_generic)
	{
	    zh->errCode = 237;
	    return;
	}	
	sk = sks->sortElement->u.generic;
	switch (sk->which)
	{
	case Z_SortKey_sortField:
	    yaz_log (LOG_DEBUG, "Sort: key %d is of type sortField", i+1);
	    zh->errCode = 207;
	    return;
	case Z_SortKey_elementSpec:
	    yaz_log (LOG_DEBUG, "Sort: key %d is of type elementSpec", i+1);
	    zh->errCode = 207;
	    return;
	case Z_SortKey_sortAttributes:
	    yaz_log (LOG_DEBUG, "Sort: key %d is of type sortAttributes", i+1);
	    sort_criteria[i].attrUse =
		zebra_maps_sort (zh->reg->zebra_maps,
				 sk->u.sortAttributes,
                                 &sort_criteria[i].numerical);
	    yaz_log (LOG_DEBUG, "use value = %d", sort_criteria[i].attrUse);
	    if (sort_criteria[i].attrUse == -1)
	    {
		zh->errCode = 116;
		return;
	    }
	    if (sortIdx_type (zh->reg->sortIdx, sort_criteria[i].attrUse))
	    {
		zh->errCode = 207;
		return;
	    }
	    break;
	}
    }
    rfd = rset_open (rset, RSETF_READ);
    while (rset_read (rset, rfd, &key, &term_index))
    {
	kno++;
        if (key.sysno != psysno)
        {
	    (sset->hits)++;
            psysno = key.sysno;
	    resultSetInsertSort (zh, sset,
				 sort_criteria, num_criteria, psysno);
        }
    }
    rset_close (rset, rfd);

    yaz_log (LOG_LOG, "%d keys, %d sysnos, sort", kno, sset->hits);
    for (i = 0; i < rset->no_rset_terms; i++)
	yaz_log (LOG_LOG, "term=\"%s\" nn=%d type=%s count=%d",
                 rset->rset_terms[i]->name,
                 rset->rset_terms[i]->nn,
                 rset->rset_terms[i]->flags,
                 rset->rset_terms[i]->count);

    *sort_status = Z_SortResponse_success;
}

RSET resultSetRef (ZebraHandle zh, const char *resultSetId)
{
    ZebraSet s;

    if ((s = resultSetGet (zh, resultSetId)))
	return s->rset;
    return NULL;
}

void resultSetRank (ZebraHandle zh, ZebraSet zebraSet, RSET rset)
{
    int kno = 0;
    struct it_key key;
    RSFD rfd;
    int term_index, i;
    ZebraRankClass rank_class;
    struct rank_control *rc;
    struct zset_sort_info *sort_info;
    const char *rank_handler_name = res_get_def(zh->res, "rank", "rank-1");

    sort_info = zebraSet->sort_info;
    sort_info->num_entries = 0;
    zebraSet->hits = 0;
    rfd = rset_open (rset, RSETF_READ);

    rank_class = zebraRankLookup (zh, rank_handler_name);
    if (!rank_class)
    {
        yaz_log (LOG_WARN, "No such rank handler: %s", rank_handler_name);
        return;
    }
    rc = rank_class->control;

    if (rset_read (rset, rfd, &key, &term_index))
    {
	int psysno = key.sysno;
	int score;
	void *handle =
	    (*rc->begin) (zh->reg, rank_class->class_handle, rset);
	(zebraSet->hits)++;
	do
	{
	    kno++;
	    if (key.sysno != psysno)
	    {
		score = (*rc->calc) (handle, psysno);

		resultSetInsertRank (zh, sort_info, psysno, score, 'A');
		(zebraSet->hits)++;
		psysno = key.sysno;
	    }
	    (*rc->add) (handle, key.seqno, term_index);
	}
	while (rset_read (rset, rfd, &key, &term_index));
	score = (*rc->calc) (handle, psysno);
	resultSetInsertRank (zh, sort_info, psysno, score, 'A');
	(*rc->end) (zh->reg, handle);
    }
    rset_close (rset, rfd);

    yaz_log (LOG_LOG, "%d keys, %d sysnos, rank", kno, zebraSet->hits);
    for (i = 0; i < rset->no_rset_terms; i++)
	yaz_log (LOG_LOG, "term=\"%s\" nn=%d type=%s count=%d",
                 rset->rset_terms[i]->name,
                 rset->rset_terms[i]->nn,
                 rset->rset_terms[i]->flags,
                 rset->rset_terms[i]->count);
    
}

ZebraRankClass zebraRankLookup (ZebraHandle zh, const char *name)
{
    ZebraRankClass p = zh->reg->rank_classes;
    while (p && strcmp (p->control->name, name))
	p = p->next;
    if (p && !p->init_flag)
    {
	if (p->control->create)
	    p->class_handle = (*p->control->create)(zh);
	p->init_flag = 1;
    }
    return p;
}

void zebraRankInstall (struct zebra_register *reg, struct rank_control *ctrl)
{
    ZebraRankClass p = (ZebraRankClass) xmalloc (sizeof(*p));
    p->control = (struct rank_control *) xmalloc (sizeof(*p->control));
    memcpy (p->control, ctrl, sizeof(*p->control));
    p->control->name = xstrdup (ctrl->name);
    p->init_flag = 0;
    p->next = reg->rank_classes;
    reg->rank_classes = p;
}

void zebraRankDestroy (struct zebra_register *reg)
{
    ZebraRankClass p = reg->rank_classes;
    while (p)
    {
	ZebraRankClass p_next = p->next;
	if (p->init_flag && p->control->destroy)
	    (*p->control->destroy)(reg, p->class_handle);
	xfree (p->control->name);
	xfree (p->control);
	xfree (p);
	p = p_next;
    }
    reg->rank_classes = NULL;
}


syntax highlighted by Code2HTML, v. 0.9.1