/* $Id: rectext.c,v 1.18 2004/06/16 20:32:41 adam Exp $
Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
Index Data Aps
This file is part of the Zebra server.
Zebra is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 2, or (at your option) any later
version.
Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with Zebra; see the file LICENSE.zebra. If not, write to the
Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
02111-1307, USA.
*/
#include <stdio.h>
#include <assert.h>
#include <ctype.h>
#include <zebrautl.h>
#include "rectext.h"
struct text_info {
char *sep;
};
static void *text_init (RecType recType)
{
struct text_info *tinfo = (struct text_info *) xmalloc(sizeof(*tinfo));
tinfo->sep = 0;
return tinfo;
}
static void text_destroy (void *clientData)
{
struct text_info *tinfo = clientData;
xfree (tinfo->sep);
xfree (tinfo);
}
struct buf_info {
struct recExtractCtrl *p;
char *buf;
int offset;
int max;
};
struct buf_info *buf_open (struct recExtractCtrl *p)
{
struct buf_info *fi = (struct buf_info *) xmalloc (sizeof(*fi));
fi->p = p;
fi->buf = (char *) xmalloc (4096);
fi->offset = 1;
fi->max = 1;
return fi;
}
int buf_read (struct text_info *tinfo, struct buf_info *fi, char *dst)
{
if (fi->offset >= fi->max)
{
if (fi->max <= 0)
return 0;
fi->max = (*fi->p->readf)(fi->p->fh, fi->buf, 4096);
fi->offset = 0;
if (fi->max <= 0)
return 0;
}
*dst = fi->buf[(fi->offset)++];
if (tinfo->sep && *dst == *tinfo->sep)
{
off_t off = (*fi->p->tellf)(fi->p->fh);
(*fi->p->endf)(fi->p->fh, off - (fi->max - fi->offset));
return 0;
}
return 1;
}
void buf_close (struct buf_info *fi)
{
xfree (fi->buf);
xfree (fi);
}
static int text_extract (void *clientData, struct recExtractCtrl *p)
{
struct text_info *tinfo = clientData;
char w[512];
RecWord recWord;
int r;
struct buf_info *fi = buf_open (p);
#if 0
yaz_log(LOG_LOG, "text_extract off=%ld",
(long) (*fi->p->tellf)(fi->p->fh));
#endif
xfree(tinfo->sep);
tinfo->sep = 0;
if (p->subType) {
if (!strncmp(p->subType, "sep=", 4))
tinfo->sep = xstrdup(p->subType+4);
}
(*p->init)(p, &recWord);
recWord.reg_type = 'w';
do
{
int i = 0;
r = buf_read (tinfo, fi, w);
while (r > 0 && i < 511 && w[i] != '\n' && w[i] != '\r')
{
i++;
r = buf_read (tinfo, fi, w + i);
}
if (i)
{
recWord.string = w;
recWord.length = i;
(*p->tokenAdd)(&recWord);
}
} while (r > 0);
buf_close (fi);
return RECCTRL_EXTRACT_OK;
}
static int text_retrieve (void *clientData, struct recRetrieveCtrl *p)
{
int r, text_ptr = 0;
static char *text_buf = NULL;
static int text_size = 0;
int make_header = 1;
int make_body = 1;
const char *elementSetName = NULL;
int no_lines = 0;
if (p->comp && p->comp->which == Z_RecordComp_simple &&
p->comp->u.simple->which == Z_ElementSetNames_generic)
elementSetName = p->comp->u.simple->u.generic;
if (elementSetName)
{
/* don't make header for the R(aw) element set name */
if (!strcmp(elementSetName, "R"))
{
make_header = 0;
make_body = 1;
}
/* only make header for the H(eader) element set name */
else if (!strcmp(elementSetName, "H"))
{
make_header = 1;
make_body = 0;
}
}
while (1)
{
if (text_ptr + 4096 >= text_size)
{
char *nb;
text_size = 2*text_size + 8192;
nb = (char *) xmalloc (text_size);
if (text_buf)
{
memcpy (nb, text_buf, text_ptr);
xfree (text_buf);
}
text_buf = nb;
}
if (make_header && text_ptr == 0)
{
if (p->score >= 0)
{
sprintf (text_buf, "Rank: %d\n", p->score);
text_ptr = strlen(text_buf);
}
sprintf (text_buf + text_ptr, "Local Number: %d\n", p->localno);
text_ptr = strlen(text_buf);
if (p->fname)
{
sprintf (text_buf + text_ptr, "Filename: %s\n", p->fname);
text_ptr = strlen(text_buf);
}
strcpy(text_buf+text_ptr++, "\n");
}
if (!make_body)
break;
r = (*p->readf)(p->fh, text_buf + text_ptr, 4096);
if (r <= 0)
break;
text_ptr += r;
}
text_buf[text_ptr] = '\0';
if (elementSetName)
{
if (!strcmp (elementSetName, "B"))
no_lines = 4;
if (!strcmp (elementSetName, "M"))
no_lines = 20;
}
if (no_lines)
{
char *p = text_buf;
int i = 0;
while (++i <= no_lines && (p = strchr (p, '\n')))
p++;
if (p)
{
p[1] = '\0';
text_ptr = p-text_buf;
}
}
p->output_format = VAL_SUTRS;
p->rec_buf = text_buf;
p->rec_len = text_ptr;
return 0;
}
static struct recType text_type = {
"text",
text_init,
text_destroy,
text_extract,
text_retrieve
};
RecType recTypeText = &text_type;
syntax highlighted by Code2HTML, v. 0.9.1