/* $Id: mfile.c,v 1.52 2003/04/05 12:32:34 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003 Index Data Aps This file is part of the Zebra server. Zebra is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. Zebra is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Zebra; see the file LICENSE.zebra. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /* * TODO: The size estimates in init may not be accurate due to * only partially written final blocks. */ #include #include #ifdef WIN32 #include #else #include #endif #include #include #include #include #include #include #include #include #include static int scan_areadef(MFile_area ma, const char *ad, const char *base) { /* * If no definition is given, use current directory, unlimited. */ char dirname[FILENAME_MAX+1]; mf_dir **dp = &ma->dirs, *dir = *dp; if (!ad) ad = ".:-1b"; for (;;) { const char *ad0 = ad; int i = 0, fact = 1, multi; mfile_off_t size = 0; while (*ad == ' ' || *ad == '\t') ad++; if (!*ad) break; if (!yaz_is_abspath(ad) && base) { strcpy (dirname, base); i = strlen(dirname); dirname[i++] = '/'; } while (*ad) { if (*ad == ':' && strchr ("+-0123456789", ad[1])) break; if (i < FILENAME_MAX) dirname[i++] = *ad; ad++; } dirname[i] = '\0'; if (*ad++ != ':') { logf (LOG_WARN, "Missing colon after path: %s", ad0); return -1; } if (i == 0) { logf (LOG_WARN, "Empty path: %s", ad0); return -1; } while (*ad == ' ' || *ad == '\t') ad++; if (*ad == '-') { fact = -1; ad++; } else if (*ad == '+') ad++; size = 0; if (*ad < '0' || *ad > '9') { logf (LOG_FATAL, "Missing size after path: %s", ad0); return -1; } size = 0; while (*ad >= '0' && *ad <= '9') size = size*10 + (*ad++ - '0'); switch (*ad) { case 'B': case 'b': multi = 1; break; case 'K': case 'k': multi = 1024; break; case 'M': case 'm': multi = 1048576; break; case 'G': case 'g': multi = 1073741824; break; case '\0': logf (LOG_FATAL, "Missing unit: %s", ad0); return -1; default: logf (LOG_FATAL, "Illegal unit: %c in %s", *ad, ad0); return -1; } ad++; *dp = dir = (mf_dir *) xmalloc(sizeof(mf_dir)); dir->next = 0; strcpy(dir->name, dirname); dir->max_bytes = dir->avail_bytes = fact * size * multi; dp = &dir->next; } return 0; } static int file_position(MFile mf, int pos, int offset) { int off = 0, c = mf->cur_file, ps; if ((c > 0 && pos <= mf->files[c-1].top) || (c < mf->no_files -1 && pos > mf->files[c].top)) { c = 0; while (c + 1 < mf->no_files && mf->files[c].top < pos) { off += mf->files[c].blocks; c++; } assert(c < mf->no_files); } else off = c ? (mf->files[c-1].top + 1) : 0; if (mf->files[c].fd < 0) { if ((mf->files[c].fd = open(mf->files[c].path, mf->wr ? (O_BINARY|O_RDWR|O_CREAT) : (O_BINARY|O_RDONLY), 0666)) < 0) { if (!mf->wr && errno == ENOENT && off == 0) return -2; logf (LOG_WARN|LOG_ERRNO, "Failed to open %s", mf->files[c].path); return -1; } } ps = pos - off; if (mfile_seek(mf->files[c].fd, ps * (mfile_off_t) mf->blocksize + offset, SEEK_SET) < 0) { logf (LOG_WARN|LOG_ERRNO, "Failed to seek in %s", mf->files[c].path); logf(LOG_WARN, "pos=%d off=%d blocksize=%d offset=%d", pos, off, mf->blocksize, offset); return -1; } mf->cur_file = c; return ps; } static int cmp_part_file(const void *p1, const void *p2) { return ((part_file *)p1)->number - ((part_file *)p2)->number; } /* * Create a new area, cotaining metafiles in directories. * Find the part-files in each directory, and inventory the existing metafiles. */ MFile_area mf_init(const char *name, const char *spec, const char *base) { MFile_area ma = (MFile_area) xmalloc(sizeof(*ma)); mf_dir *dirp; meta_file *meta_f; part_file *part_f = 0; DIR *dd; struct dirent *dent; int fd, number; char metaname[FILENAME_MAX+1], tmpnam[FILENAME_MAX+1]; logf (LOG_DEBUG, "mf_init(%s)", name); strcpy(ma->name, name); ma->mfiles = 0; ma->dirs = 0; if (scan_areadef(ma, spec, base) < 0) { logf (LOG_WARN, "Failed to access description of '%s'", name); return 0; } /* look at each directory */ for (dirp = ma->dirs; dirp; dirp = dirp->next) { if (!(dd = opendir(dirp->name))) { logf (LOG_WARN|LOG_ERRNO, "Failed to open directory %s", dirp->name); return 0; } /* look at each file */ while ((dent = readdir(dd))) { int len = strlen(dent->d_name); const char *cp = strrchr (dent->d_name, '-'); if (strchr (".-", *dent->d_name)) continue; if (len < 5 || !cp || strcmp (dent->d_name + len - 3, ".mf")) continue; number = atoi(cp+1); memcpy (metaname, dent->d_name, cp - dent->d_name); metaname[ cp - dent->d_name] = '\0'; for (meta_f = ma->mfiles; meta_f; meta_f = meta_f->next) { /* known metafile */ if (!strcmp(meta_f->name, metaname)) { part_f = &meta_f->files[meta_f->no_files++]; break; } } /* new metafile */ if (!meta_f) { meta_f = (meta_file *) xmalloc(sizeof(*meta_f)); zebra_mutex_init (&meta_f->mutex); meta_f->ma = ma; meta_f->next = ma->mfiles; meta_f->open = 0; meta_f->cur_file = -1; meta_f->unlink_flag = 0; ma->mfiles = meta_f; strcpy(meta_f->name, metaname); part_f = &meta_f->files[0]; meta_f->no_files = 1; } part_f->number = number; part_f->dir = dirp; part_f->fd = -1; sprintf(tmpnam, "%s/%s", dirp->name, dent->d_name); part_f->path = xstrdup(tmpnam); /* get size */ if ((fd = open(part_f->path, O_BINARY|O_RDONLY)) < 0) { logf (LOG_FATAL|LOG_ERRNO, "Failed to access %s", dent->d_name); return 0; } if ((part_f->bytes = mfile_seek(fd, 0, SEEK_END)) < 0) { logf (LOG_FATAL|LOG_ERRNO, "Failed to seek in %s", dent->d_name); return 0; } #ifndef WIN32 fsync(fd); #endif close(fd); if (dirp->max_bytes >= 0) dirp->avail_bytes -= part_f->bytes; } closedir(dd); } for (meta_f = ma->mfiles; meta_f; meta_f = meta_f->next) { logf (LOG_DEBUG, "mf_init: %s consists of %d part(s)", meta_f->name, meta_f->no_files); qsort(meta_f->files, meta_f->no_files, sizeof(part_file), cmp_part_file); } return ma; } void mf_destroy(MFile_area ma) { mf_dir *dp; meta_file *meta_f; if (!ma) return; dp = ma->dirs; while (dp) { mf_dir *d = dp; dp = dp->next; xfree (d); } meta_f = ma->mfiles; while (meta_f) { int i; meta_file *m = meta_f; for (i = 0; ino_files; i++) { xfree (m->files[i].path); } zebra_mutex_destroy (&meta_f->mutex); meta_f = meta_f->next; xfree (m); } xfree (ma); } void mf_reset(MFile_area ma) { meta_file *meta_f; if (!ma) return; meta_f = ma->mfiles; while (meta_f) { int i; meta_file *m = meta_f; assert (!m->open); for (i = 0; ino_files; i++) { unlink (m->files[i].path); xfree (m->files[i].path); } meta_f = meta_f->next; xfree (m); } ma->mfiles = 0; } /* * Open a metafile. * If !ma, Use MF_DEFAULT_AREA. */ MFile mf_open(MFile_area ma, const char *name, int block_size, int wflag) { meta_file *mnew; int i; char tmp[FILENAME_MAX+1]; mf_dir *dp; logf(LOG_DEBUG, "mf_open(%s bs=%d, %s)", name, block_size, wflag ? "RW" : "RDONLY"); assert (ma); for (mnew = ma->mfiles; mnew; mnew = mnew->next) if (!strcmp(name, mnew->name)) { if (mnew->open) abort(); else break; } if (!mnew) { mnew = (meta_file *) xmalloc(sizeof(*mnew)); strcpy(mnew->name, name); /* allocate one, empty file */ zebra_mutex_init (&mnew->mutex); mnew->no_files = 1; mnew->files[0].bytes = 0; mnew->files[0].blocks = 0; mnew->files[0].top = -1; mnew->files[0].number = 0; mnew->files[0].fd = -1; mnew->unlink_flag = 0; mnew->min_bytes_creat = MF_MIN_BLOCKS_CREAT * block_size; for (dp = ma->dirs; dp && dp->max_bytes >= 0 && dp->avail_bytes < mnew->min_bytes_creat; dp = dp->next); if (!dp) { logf (LOG_FATAL, "Insufficient space for new mfile."); return 0; } mnew->files[0].dir = dp; sprintf(tmp, "%s/%s-%d.mf", dp->name, mnew->name, 0); mnew->files[0].path = xstrdup(tmp); mnew->ma = ma; mnew->next = ma->mfiles; ma->mfiles = mnew; } else { for (i = 0; i < mnew->no_files; i++) { if (mnew->files[i].bytes % block_size) mnew->files[i].bytes += block_size - mnew->files[i].bytes % block_size; mnew->files[i].blocks = (int) (mnew->files[i].bytes / block_size); } assert(!mnew->open); } mnew->blocksize = block_size; mnew->min_bytes_creat = MF_MIN_BLOCKS_CREAT * block_size; mnew->wr=wflag; mnew->cur_file = 0; mnew->open = 1; for (i = 0; i < mnew->no_files; i++) { mnew->files[i].blocks = (int)(mnew->files[i].bytes / mnew->blocksize); if (i == mnew->no_files - 1) mnew->files[i].top = -1; else mnew->files[i].top = i ? (mnew->files[i-1].top + mnew->files[i].blocks) : (mnew->files[i].blocks - 1); } return mnew; } /* * Close a metafile. */ int mf_close(MFile mf) { int i; logf (LOG_DEBUG, "mf_close(%s)", mf->name); assert(mf->open); for (i = 0; i < mf->no_files; i++) { if (mf->files[i].fd >= 0) { #ifndef WIN32 fsync(mf->files[i].fd); #endif close(mf->files[i].fd); mf->files[i].fd = -1; } if (mf->unlink_flag) unlink(mf->files[i].path); } mf->open = 0; return 0; } /* * Read one block from a metafile. Interface mirrors bfile. */ int mf_read(MFile mf, int no, int offset, int nbytes, void *buf) { int rd, toread; zebra_mutex_lock (&mf->mutex); if ((rd = file_position(mf, no, offset)) < 0) { if (rd == -2) { zebra_mutex_unlock (&mf->mutex); return 0; } else { yaz_log (LOG_FATAL, "mf_read %s internal error", mf->name); exit(1); } } toread = nbytes ? nbytes : mf->blocksize; if ((rd = read(mf->files[mf->cur_file].fd, buf, toread)) < 0) { logf (LOG_FATAL|LOG_ERRNO, "mf_read: Read failed (%s)", mf->files[mf->cur_file].path); exit(1); } zebra_mutex_unlock (&mf->mutex); if (rd < toread) return 0; else return 1; } /* * Write. */ int mf_write(MFile mf, int no, int offset, int nbytes, const void *buf) { int ps, nblocks, towrite; mf_dir *dp; char tmp[FILENAME_MAX+1]; unsigned char dummych = '\xff'; zebra_mutex_lock (&mf->mutex); if ((ps = file_position(mf, no, offset)) < 0) { yaz_log (LOG_FATAL, "mf_write %s internal error (1)", mf->name); exit(1); } /* file needs to grow */ while (ps >= mf->files[mf->cur_file].blocks) { mfile_off_t needed = (ps - mf->files[mf->cur_file].blocks + 1) * mf->blocksize; /* file overflow - allocate new file */ if (mf->files[mf->cur_file].dir->max_bytes >= 0 && needed > mf->files[mf->cur_file].dir->avail_bytes) { /* cap off file? */ if ((nblocks = (int) (mf->files[mf->cur_file].dir->avail_bytes / mf->blocksize)) > 0) { logf (LOG_DEBUG, "Capping off file %s at pos %d", mf->files[mf->cur_file].path, nblocks); if ((ps = file_position(mf, (mf->cur_file ? mf->files[mf->cur_file-1].top : 0) + mf->files[mf->cur_file].blocks + nblocks - 1, 0)) < 0) { yaz_log (LOG_FATAL, "mf_write %s internal error (2)", mf->name); exit(1); } logf (LOG_DEBUG, "ps = %d", ps); if (write(mf->files[mf->cur_file].fd, &dummych, 1) < 1) { logf (LOG_ERRNO|LOG_FATAL, "mf_write %s internal error (3)", mf->name); exit(1); } mf->files[mf->cur_file].blocks += nblocks; mf->files[mf->cur_file].bytes += nblocks * mf->blocksize; mf->files[mf->cur_file].dir->avail_bytes -= nblocks * mf->blocksize; } /* get other bit */ logf (LOG_DEBUG, "Creating new file."); for (dp = mf->ma->dirs; dp && dp->max_bytes >= 0 && dp->avail_bytes < needed; dp = dp->next); if (!dp) { logf (LOG_FATAL, "Cannot allocate more space for %s", mf->name); exit(1); } mf->files[mf->cur_file].top = (mf->cur_file ? mf->files[mf->cur_file-1].top : -1) + mf->files[mf->cur_file].blocks; mf->files[++(mf->cur_file)].top = -1; mf->files[mf->cur_file].dir = dp; mf->files[mf->cur_file].number = mf->files[mf->cur_file-1].number + 1; mf->files[mf->cur_file].blocks = 0; mf->files[mf->cur_file].bytes = 0; mf->files[mf->cur_file].fd = -1; sprintf(tmp, "%s/%s-%d.mf", dp->name, mf->name, mf->files[mf->cur_file].number); mf->files[mf->cur_file].path = xstrdup(tmp); mf->no_files++; /* open new file and position at beginning */ if ((ps = file_position(mf, no, offset)) < 0) { yaz_log (LOG_FATAL, "mf_write %s internal error (4)", mf->name); exit(1); } } else { nblocks = ps - mf->files[mf->cur_file].blocks + 1; mf->files[mf->cur_file].blocks += nblocks; mf->files[mf->cur_file].bytes += nblocks * mf->blocksize; if (mf->files[mf->cur_file].dir->max_bytes >= 0) mf->files[mf->cur_file].dir->avail_bytes -= nblocks * mf->blocksize; } } towrite = nbytes ? nbytes : mf->blocksize; if (write(mf->files[mf->cur_file].fd, buf, towrite) < towrite) { logf (LOG_FATAL|LOG_ERRNO, "Write failed for file %s part %d", mf->name, mf->cur_file); exit(1); } zebra_mutex_unlock (&mf->mutex); return 0; } /* * Destroy a metafile, unlinking component files. File must be open. */ int mf_unlink(MFile mf) { if (mf->open) mf->unlink_flag = 1; else { int i; for (i = 0; ino_files; i++) unlink(mf->files[i].path); } return 0; } /* * Unlink the file by name, rather than MFile-handle. File should be closed. */ int mf_unlink_name(MFile_area ma, const char *name) { abort(); return 0; }