/* $Id: rsbetween.c,v 1.15.2.4 2004/12/17 13:43:09 heikki Exp $
Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002
Index Data Aps
This file is part of the Zebra server.
Zebra is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 2, or (at your option) any later
version.
Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with Zebra; see the file LICENSE.zebra. If not, write to the
Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
02111-1307, USA.
*/
/* rsbetween is (mostly) used for xml searches. It returns the hits of the
* "middle" rset, that are in between the "left" and "right" rsets. For
* example "Shakespeare" in between "<title>" and </title>. The thing is
* complicated by the inclusion of attributes (from their own rset). If attrs
* specified, they must match the "left" rset (start tag). "Hamlet" between
* "<title lang=eng>" and "</title>". (This assumes that the attributes are
* indexed to the same seqno as the tags).
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <rsbetween.h>
#include <zebrautl.h>
#define RSBETWEEN_DEBUG 0
#if RSBETWEEN_DEBUG
#define log_level LOG_DEBUG
#endif
static void *r_create_between(RSET ct, const struct rset_control *sel, void *parms);
static RSFD r_open_between (RSET ct, int flag);
static void r_close_between (RSFD rfd);
static void r_delete_between (RSET ct);
static void r_rewind_between (RSFD rfd);
static int r_forward_between(RSET ct, RSFD rfd, void *buf, int *term_index,
int (*cmpfunc)(const void *p1, const void *p2),
const void *untilbuf);
static int r_read (RSFD rfd, void *buf, int *term_index);
static int r_write_between (RSFD rfd, const void *buf);
static const struct rset_control control_between =
{
"between",
r_create_between,
r_open_between,
r_close_between,
r_delete_between,
r_rewind_between,
r_forward_between, /* rset_default_forward, */
rset_default_pos,
r_read,
r_write_between,
};
const struct rset_control *rset_kind_between = &control_between;
#define WHICH_L 0
#define WHICH_M 1
#define WHICH_R 2
#define WHICH_A 3
struct rset_between_info {
int key_size;
RSET rset_l;
RSET rset_m;
RSET rset_r;
RSET rset_attr;
int term_index_s;
int (*cmp)(const void *p1, const void *p2);
char *(*printer)(const void *p1, char *buf);
struct rset_between_rfd *rfd_list;
};
struct rset_between_rfd {
RSFD rfd_l;
RSFD rfd_m;
RSFD rfd_r;
RSFD rfd_attr;
int more_l;
int more_m;
int more_r;
int more_attr;
int term_index_l;
int term_index_m;
int term_index_r;
void *buf_l;
void *buf_m;
void *buf_r;
void *buf_attr;
int level;
struct rset_between_rfd *next;
struct rset_between_info *info;
/* 1.4 backport */
void *recbuf; /* a key that tells which record we are in */
void *startbuf; /* the start tag */
int startbufok; /* we have seen the first start tag */
void *attrbuf; /* the attr tag. If these two match, we have attr match */
int attrbufok; /* we have seen the first attr tag, can compare */
int depth; /* number of start-tags without end-tags */
int attrdepth; /* on what depth the attr matched */
int hits; /* number of hits returned so far */
/* Stuff for the multi-and read, also backported */
/* uses more_l/m/r/a etc from above */
int tailcount;
int tailbits[4];
int and_eof;
};
#if RSBETWEEN_DEBUG
static void log2 (struct rset_between_rfd *p, char *msg, int cmp_l, int cmp_r)
{
char buf_l[32];
char buf_m[32];
char buf_r[32];
logf(LOG_DEBUG,"btw: %s l=%s(%d/%d) m=%s(%d) r=%s(%d/%d), lev=%d",
msg,
(*p->info->printer)(p->buf_l, buf_l), p->more_l, cmp_l,
(*p->info->printer)(p->buf_m, buf_m), p->more_m,
(*p->info->printer)(p->buf_r, buf_r), p->more_r, cmp_r,
p->level);
}
#endif
static void *r_create_between (RSET ct, const struct rset_control *sel,
void *parms)
{
rset_between_parms *between_parms = (rset_between_parms *) parms;
struct rset_between_info *info;
info = (struct rset_between_info *) xmalloc (sizeof(*info));
info->key_size = between_parms->key_size;
info->rset_l = between_parms->rset_l;
info->rset_m = between_parms->rset_m;
info->rset_r = between_parms->rset_r;
info->rset_attr = between_parms->rset_attr;
if (rset_is_volatile(info->rset_l) ||
rset_is_volatile(info->rset_m) ||
rset_is_volatile(info->rset_r))
ct->flags |= RSET_FLAG_VOLATILE;
info->cmp = between_parms->cmp;
info->printer = between_parms->printer;
info->rfd_list = NULL;
info->term_index_s = info->rset_l->no_rset_terms;
if (info->rset_m)
{
ct->no_rset_terms =
info->rset_l->no_rset_terms +
info->rset_m->no_rset_terms +
info->rset_r->no_rset_terms;
ct->rset_terms = (RSET_TERM *)
xmalloc (sizeof (*ct->rset_terms) * ct->no_rset_terms);
memcpy (ct->rset_terms, info->rset_l->rset_terms,
info->rset_l->no_rset_terms * sizeof(*ct->rset_terms));
memcpy (ct->rset_terms + info->rset_l->no_rset_terms,
info->rset_m->rset_terms,
info->rset_m->no_rset_terms * sizeof(*ct->rset_terms));
memcpy (ct->rset_terms + info->rset_l->no_rset_terms +
info->rset_m->no_rset_terms,
info->rset_r->rset_terms,
info->rset_r->no_rset_terms * sizeof(*ct->rset_terms));
}
else
{
ct->no_rset_terms =
info->rset_l->no_rset_terms +
info->rset_r->no_rset_terms;
ct->rset_terms = (RSET_TERM *)
xmalloc (sizeof (*ct->rset_terms) * ct->no_rset_terms);
memcpy (ct->rset_terms, info->rset_l->rset_terms,
info->rset_l->no_rset_terms * sizeof(*ct->rset_terms));
memcpy (ct->rset_terms + info->rset_l->no_rset_terms,
info->rset_r->rset_terms,
info->rset_r->no_rset_terms * sizeof(*ct->rset_terms));
}
return info;
}
static RSFD r_open_between (RSET ct, int flag)
{
struct rset_between_info *info = (struct rset_between_info *) ct->buf;
struct rset_between_rfd *rfd;
if (flag & RSETF_WRITE)
{
logf (LOG_FATAL, "between set type is read-only");
return NULL;
}
rfd = (struct rset_between_rfd *) xmalloc (sizeof(*rfd));
rfd->next = info->rfd_list;
info->rfd_list = rfd;
rfd->info = info;
rfd->buf_l = xmalloc (info->key_size);
rfd->buf_m = xmalloc (info->key_size);
rfd->buf_r = xmalloc (info->key_size);
rfd->buf_attr = xmalloc (info->key_size);
rfd->rfd_l = rset_open (info->rset_l, RSETF_READ);
rfd->rfd_m = rset_open (info->rset_m, RSETF_READ);
rfd->rfd_r = rset_open (info->rset_r, RSETF_READ);
rfd->more_l = rset_read (info->rset_l, rfd->rfd_l, rfd->buf_l,
&rfd->term_index_l);
rfd->more_m = rset_read (info->rset_m, rfd->rfd_m, rfd->buf_m,
&rfd->term_index_m);
rfd->more_r = rset_read (info->rset_r, rfd->rfd_r, rfd->buf_r,
&rfd->term_index_r);
if (info->rset_attr)
{
int dummy;
rfd->rfd_attr = rset_open (info->rset_attr, RSETF_READ);
rfd->more_attr = rset_read (info->rset_attr, rfd->rfd_attr,
rfd->buf_attr, &dummy);
}
rfd->level=0;
rfd->recbuf=xmalloc (info->key_size);
rfd->startbuf=xmalloc (info->key_size);
rfd->attrbuf=xmalloc (info->key_size);
rfd->startbufok=0;
rfd->attrbufok=0;
rfd->depth=0;
rfd->attrdepth=0;
rfd->hits=-1; /* signals uninitialized */
rfd->tailcount=0;
rfd->tailbits[WHICH_L]=0;
rfd->tailbits[WHICH_M]=0;
rfd->tailbits[WHICH_R]=0;
rfd->tailbits[WHICH_A]=0;
rfd->and_eof=0;
return rfd;
}
static void r_close_between (RSFD rfd)
{
struct rset_between_info *info = ((struct rset_between_rfd*)rfd)->info;
struct rset_between_rfd **rfdp;
for (rfdp = &info->rfd_list; *rfdp; rfdp = &(*rfdp)->next)
if (*rfdp == rfd)
{
xfree ((*rfdp)->buf_l);
xfree ((*rfdp)->buf_m);
xfree ((*rfdp)->buf_r);
xfree ((*rfdp)->buf_attr);
xfree ((*rfdp)->recbuf);
xfree ((*rfdp)->startbuf);
xfree ((*rfdp)->attrbuf);
rset_close (info->rset_l, (*rfdp)->rfd_l);
rset_close (info->rset_m, (*rfdp)->rfd_m);
rset_close (info->rset_r, (*rfdp)->rfd_r);
if (info->rset_attr)
rset_close (info->rset_attr, (*rfdp)->rfd_attr);
*rfdp = (*rfdp)->next;
xfree (rfd);
return;
}
logf (LOG_FATAL, "r_close_between but no rfd match!");
assert (0);
}
static void r_delete_between (RSET ct)
{
struct rset_between_info *info = (struct rset_between_info *) ct->buf;
assert (info->rfd_list == NULL);
xfree (ct->rset_terms);
rset_delete (info->rset_l);
rset_delete (info->rset_m);
rset_delete (info->rset_r);
if (info->rset_attr)
rset_delete (info->rset_attr);
xfree (info);
}
static void r_rewind_between (RSFD rfd)
{
struct rset_between_info *info = ((struct rset_between_rfd*)rfd)->info;
struct rset_between_rfd *p = (struct rset_between_rfd *) rfd;
#if RSBETWEEN_DEBUG
logf (LOG_DEBUG, "rsbetween_rewind");
#endif
rset_rewind (info->rset_l, p->rfd_l);
rset_rewind (info->rset_m, p->rfd_m);
rset_rewind (info->rset_r, p->rfd_r);
p->more_l = rset_read (info->rset_l, p->rfd_l, p->buf_l, &p->term_index_l);
p->more_m = rset_read (info->rset_m, p->rfd_m, p->buf_m, &p->term_index_m);
p->more_r = rset_read (info->rset_r, p->rfd_r, p->buf_r, &p->term_index_r);
if (info->rset_attr)
{
int dummy;
rset_rewind (info->rset_attr, p->rfd_attr);
p->more_attr = rset_read (info->rset_attr, p->rfd_attr, p->buf_attr,
&dummy);
}
p->level=0;
}
static int r_forward_between(RSET ct, RSFD rfd, void *buf, int *term_index,
int (*cmpfunc)(const void *p1, const void *p2),
const void *untilbuf)
{
struct rset_between_info *info = ((struct rset_between_rfd*)rfd)->info;
struct rset_between_rfd *p = (struct rset_between_rfd *) rfd;
int rc;
#if RSBETWEEN_DEBUG
log2( p, "fwd: before forward", 0,0);
#endif
/* It is enough to forward the m pointer here, the read will */
/* naturally forward the l, m, and attr pointers */
if (p->more_m)
p->more_m=rset_forward(info->rset_m,p->rfd_m, p->buf_m,
&p->term_index_m, info->cmp,untilbuf);
#if RSBETWEEN_DEBUG
log2( p, "fwd: after forward M", 0,0);
#endif
/* rc = r_read_between(rfd, buf, term_index);*/
rc = r_read(rfd, buf, term_index);
#if RSBETWEEN_DEBUG
log2( p, "fwd: after forward", 0,0);
#endif
return rc;
}
static int r_write_between (RSFD rfd, const void *buf)
{
logf (LOG_FATAL, "between set type is read-only");
return -1;
}
/* Heikki's back-port of the cleaner 1.4 algorithm */
static void checkattr(struct rset_between_rfd *p)
{
struct rset_between_info *info =p->info;
int cmp;
if (p->attrdepth)
return; /* already found one */
if (!info->rset_attr)
{
p->attrdepth=-1; /* matches always */
return;
}
if ( p->startbufok && p->attrbufok )
{ /* have buffers to compare */
cmp=(*info->cmp)(p->startbuf,p->attrbuf);
if (0==cmp) /* and the keys match */
{
p->attrdepth=p->depth;
#if RSBETWEEN_DEBUG
yaz_log(log_level, "found attribute match at depth %d",p->attrdepth);
#endif
}
}
}
/* Implements a multi-and between the l,r, and m rfds. Returns all */
/* hits from those, provided that all three point to the same record */
/* See rsmultiandor.c in zebra 1.4 for details */
static int read_anded(struct rset_between_rfd *p,void *buf,
int *term, int *which) {
RSFD rfds[4];
RSET rsets[4];
void *bufs[4];
int *terms[4];
int dummyterm;
struct rset_between_info *info =p->info;
int i, mintail;
int n;
int cmp;
/* make the individual args into arrays, to match rsmultiandor */
rfds[WHICH_L]=p->rfd_l;
rfds[WHICH_M]=p->rfd_m;
rfds[WHICH_R]=p->rfd_r;
rfds[WHICH_A]=p->rfd_attr;
rsets[WHICH_L]=info->rset_l;
rsets[WHICH_M]=info->rset_m;
rsets[WHICH_R]=info->rset_r;
rsets[WHICH_A]=info->rset_attr;
bufs[WHICH_L]=p->buf_l;
bufs[WHICH_M]=p->buf_m;
bufs[WHICH_R]=p->buf_r;
bufs[WHICH_A]=p->buf_attr;
terms[WHICH_L]=&(p->term_index_l);
terms[WHICH_M]=&(p->term_index_m);
terms[WHICH_R]=&(p->term_index_r);
terms[WHICH_A]=&dummyterm;
dummyterm=0;
if (info->rset_attr)
n=4;
else
n=3;
while (1) {
if (p->tailcount)
{ /* we are tailing */
mintail=0;
while ((mintail<n) && !p->tailbits[mintail])
mintail++; /* first tail */
for (i=mintail+1;i<n;i++)
{
if (p->tailbits[i])
{
cmp=(*info->cmp)(bufs[i],bufs[mintail]);
if (cmp<0)
mintail=i;
}
}
/* return the lowest tail */
memcpy(buf, bufs[mintail], info->key_size);
*which=mintail;
*term=*terms[mintail];
/* and read that one further */
if (!rset_read(rsets[mintail],rfds[mintail],
bufs[mintail],terms[mintail]))
{
p->and_eof=1;
p->tailbits[mintail]=0;
(p->tailcount)--;
return 1;
}
/* still a tail? */
cmp=(info->cmp)(bufs[mintail],buf);
if (cmp>=2)
{
p->tailbits[mintail]=0;
(p->tailcount)--;
}
return 1;
}
else
{ /* not tailing */
if (p->and_eof)
return 0; /* game over */
i=1; /* assume items[0] is highest up */
while (i<n)
{
cmp=(*info->cmp)(bufs[0],bufs[i]);
if (cmp<=-2)
{ /* [0] was behid, forward it and start anew */
if (!rset_forward(rsets[0],rfds[0], bufs[0],
terms[0], info->cmp, bufs[i]))
{
p->and_eof=1;
return 0;
}
i=0; /* all over again */
}
else if (cmp>=2)
{/* [0] was ahead, forward i */
if (!rset_forward(rsets[i],rfds[i], bufs[i],
terms[i], info->cmp, bufs[0]))
{
p->and_eof=1;
return 0;
}
} else
i++;
} /* while i */
/* now all are within the same records, set tails and let upper
* if return hits */
for (i=0; i<n;i++)
p->tailbits[i]=1;
p->tailcount=n;
}
}
} /* read_anded */
static int r_read (RSFD rfd, void *buf, int *term)
{
struct rset_between_rfd *p=(struct rset_between_rfd *)rfd;
struct rset_between_info *info =p->info;
int cmp;
int thisterm=0;
int which;
*term=0; /* just in case, should not be necessary */
#if RSBETWEEN_DEBUG
yaz_log(log_level,"btw: == read: term=%p",term);
#endif
while ( read_anded(p,buf,&thisterm,&which) )
{
#if RSBETWEEN_DEBUG
yaz_log(log_level,"btw: read loop term=%p d=%d ad=%d",
term, p->depth, p->attrdepth);
#endif
if (p->hits<0)
{/* first time? */
memcpy(p->recbuf,buf,info->key_size);
p->hits=0;
cmp=2;
}
else {
cmp=(*info->cmp)(buf,p->recbuf);
#if RSBETWEEN_DEBUG
yaz_log(log_level, "btw: cmp=%d",cmp);
#endif
}
if (cmp>=2)
{
#if RSBETWEEN_DEBUG
yaz_log(log_level,"btw: new record");
#endif
p->depth=0;
p->attrdepth=0;
memcpy(p->recbuf,buf,info->key_size);
}
#if RSBETWEEN_DEBUG
yaz_log(log_level,"btw: which: %d", which);
#endif
if (which==WHICH_L)
{
p->depth++;
#if RSBETWEEN_DEBUG
yaz_log(log_level,"btw: read start tag. d=%d",p->depth);
#endif
memcpy(p->startbuf,buf,info->key_size);
p->startbufok=1;
checkattr(rfd); /* in case we already saw the attr here */
}
else if (which==WHICH_R)
{
if (p->depth == p->attrdepth)
p->attrdepth=0; /* ending the tag with attr match */
p->depth--;
#if RSBETWEEN_DEBUG
yaz_log(log_level,"btw: read end tag. d=%d ad=%d",
p->depth, p->attrdepth);
#endif
}
else if (which==WHICH_A)
{
#if RSBETWEEN_DEBUG
yaz_log(log_level,"btw: read attr");
#endif
memcpy(p->attrbuf,buf,info->key_size);
p->attrbufok=1;
checkattr(rfd); /* in case the start tag came first */
}
else
{ /* mut be a real hit */
if (p->depth && p->attrdepth)
{
p->hits++;
#if RSBETWEEN_DEBUG
yaz_log(log_level,"btw: got a hit h=%d d=%d ad=%d t=%d+%d",
p->hits,p->depth,p->attrdepth,
info->rset_m->no_rset_terms,thisterm);
#endif
*term= info->rset_m->no_rset_terms + thisterm;
return 1; /* everything else is in place already */
} else
{
#if RSBETWEEN_DEBUG
yaz_log(log_level, "btw: Ignoring hit. h=%d d=%d ad=%d",
p->hits,p->depth,p->attrdepth);
#endif
}
}
} /* while read */
return 0;
} /* r_read */
syntax highlighted by Code2HTML, v. 0.9.1