/*- * See the file LICENSE for redistribution information. * * Copyright (c) 1996-2001 * Sleepycat Software. All rights reserved. */ #include "db_config.h" #ifndef lint static const char revid[] = "$Id: log_put.c,v 11.58 2001/11/17 17:01:57 bostic Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include #if TIME_WITH_SYS_TIME #include #include #else #if HAVE_SYS_TIME_H #include #else #include #endif #endif #include #include #include #endif #include "db_int.h" #include "db_page.h" #include "log.h" #include "hash.h" #include "clib_ext.h" #include "rep.h" #include "txn.h" static int __log_fill __P((DB_LOG *, DB_LSN *, void *, u_int32_t)); static int __log_flush_int __P((DB_LOG *, const DB_LSN *, int)); static int __log_newfh __P((DB_LOG *)); static int __log_open_files __P((DB_ENV *)); static int __log_putr __P((DB_LOG *, DB_LSN *, const DBT *, u_int32_t)); static int __log_write __P((DB_LOG *, void *, u_int32_t)); /* * __log_put -- * Write a log record. * * PUBLIC: int __log_put __P((DB_ENV *, DB_LSN *, const DBT *, u_int32_t)); */ int __log_put(dbenv, lsn, dbt, flags) DB_ENV *dbenv; DB_LSN *lsn; const DBT *dbt; u_int32_t flags; { DB_LOG *dblp; int ret; u_int32_t op; PANIC_CHECK(dbenv); ENV_REQUIRES_CONFIG(dbenv, dbenv->lg_handle, "DB_ENV->log_put", DB_INIT_LOG); /* Validate arguments. */ op = DB_OPFLAGS_MASK & flags; if (op != 0 && op != DB_CHECKPOINT && op != DB_CURLSN && op != DB_COMMIT) return (__db_ferr(dbenv, "DB_ENV->log_put", 0)); if ((flags & ~DB_OPFLAGS_MASK & ~DB_FLUSH) != 0) return (__db_ferr(dbenv, "DB_ENV->log_put", 0)); dblp = dbenv->lg_handle; R_LOCK(dbenv, &dblp->reginfo); ret = __log_put_int(dbenv, lsn, dbt, flags); R_UNLOCK(dbenv, &dblp->reginfo); return (ret); } /* * __log_put_int -- * Write a log record; internal version. * * PUBLIC: int __log_put_int __P((DB_ENV *, DB_LSN *, const DBT *, u_int32_t)); */ int __log_put_int(dbenv, lsn, dbt, flags) DB_ENV *dbenv; DB_LSN *lsn; const DBT *dbt; u_int32_t flags; { DBT t; struct __db_commit *commit; DB_LOG *dblp; DB_LSN old_lsn, save_lsn; LOG *lp; u_int32_t lastoff, op; int do_flush, ret, send_newfile; dblp = dbenv->lg_handle; lp = dblp->reginfo.primary; op = flags & DB_OPFLAGS_MASK; do_flush = LF_ISSET(DB_FLUSH); send_newfile = 0; /* * If the application just wants to know where we are, fill in * the information. Currently used by the transaction manager * to avoid writing TXN_begin records. */ if (op == DB_CURLSN) { lsn->file = lp->lsn.file; lsn->offset = lp->lsn.offset; return (0); } /* * Save a copy of lp->lsn before we might decide to switch log * files and change it. If we do switch log files, and we're * doing replication, we'll need to tell our clients about the * switch, and they need to receive a NEWFILE message * with this "would-be" LSN in order to know they're not * missing any log records. */ old_lsn = lp->lsn; /* * If this information won't fit in the file, or if we're a * replication client environment and have been told to do so, * swap files. */ if (F_ISSET(lp, LOG_NEWFILE) || lp->lsn.offset + sizeof(HDR) + dbt->size > lp->persist.lg_max) { if (sizeof(HDR) + sizeof(LOGP) + dbt->size > lp->persist.lg_max) { __db_err(dbenv, "DB_ENV->log_put: record larger than maximum file size"); return (EINVAL); } /* * Flush the log so this file is out and can be * closed. We cannot release the region lock * here because we need to protect the end of * the file while we switch. In particular * a thread with a smaller record than ours * could detect that there is space in the * log. Even blocking that event by declaring * the file full would require all threads to * wait here so that the lsn.file can be * moved ahead after the flush completes. * This probably can be changed if we had * an lsn for the previous file and one * for the curent, but it does not seem like * this would get much more throughput, if any. */ if ((ret = __log_flush_int(dblp, NULL, 0)) != 0) return (ret); DB_ASSERT(lp->b_off == 0); /* * Save the last known offset from the previous file, we'll * need it to initialize the persistent header information. */ lastoff = lp->lsn.offset; /* Point the current LSN to the new file. */ ++lp->lsn.file; lp->lsn.offset = 0; /* Reset the file write offset. */ lp->w_off = 0; } else lastoff = 0; /* Initialize the LSN information returned to the user. */ save_lsn = lp->lsn; /* * Insert persistent information as the first record in every file. * Note that the previous length is wrong for the very first record * of the log, but that's okay, we check for it during retrieval. */ if (lp->lsn.offset == 0) { DB_ASSERT(lp->b_off == 0); /* * If we are a master replication site, flag that we * need to send a newfile message to the clients. We do * this here, rather than in the log-file-switching code * above, so that it's the first thing we send in a brand-new * log (when there's no switch, but lp->lsn == [1][0]). We * postpone doing the actual send until we can safely release * the log region lock and are doing so anyway. */ if (F_ISSET(dbenv, DB_ENV_REP_MASTER)) send_newfile = 1; t.data = &lp->persist; t.size = sizeof(LOGP); if ((ret = __log_putr(dblp, &save_lsn, &t, lastoff == 0 ? 0 : lastoff - lp->len)) != 0) return (ret); /* * Record files open in this log. * * If we are recovering, then we are in the * process of outputting the files; don't do * it again. * * If LOG_NEWFILE is set, we are a replication client; * the openfile records are forthcoming from the master * and shouldn't be generated here. */ if (!F_ISSET(dblp, DBLOG_RECOVER) && !F_ISSET(lp, LOG_NEWFILE) && (ret = __log_open_files(dbenv)) != 0) return (ret); /* Update the LSN information returned to the user. */ save_lsn = lp->lsn; } /* We're done with the NEWFILE flag; clear it. */ F_CLR(lp, LOG_NEWFILE); /* Write the application's log record. */ if ((ret = __log_putr(dblp, &save_lsn, dbt, lp->lsn.offset - lp->len)) != 0) return (ret); *lsn = save_lsn; /* * On a checkpoint, we: * 1. Put out the checkpoint record (above). * 2. Save the LSN of the checkpoint in the shared region. * 3. Append the set of file name information into the log. * * We save the LSN of the checkpoint region here because we have * not yet released the log region lock. However, if we're * a replication master, we want to send the checkpoint record * before we send the open_files records--but sending means * releasing the lock. So we put off calling __log_open_files * (#3 in the list above) until after calling __rep_send_message. */ if (op == DB_CHECKPOINT) lp->chkpt_lsn = *lsn; /* * In a replicated environment, we also write the log record over * the network. If the send function fails and this is a critical * operation (COMMIT or CHECKPOINT), flush the local log. * * Note that we release the log region lock; anything which * must happen atomically must thus happen before this point. */ #ifdef DIAGNOSTIC R_UNLOCK(dbenv, &dblp->reginfo); if (F_ISSET(dbenv, DB_ENV_REP_MASTER)) { #else if (F_ISSET(dbenv, DB_ENV_REP_MASTER)) { R_UNLOCK(dbenv, &dblp->reginfo); #endif /* * If we flagged it as necessary earlier, send a NEWFILE * message before the log record. */ if ((send_newfile && __rep_send_message(dbenv, DB_EID_BROADCAST, REP_NEWFILE, &old_lsn, NULL, 0) != 0) || (__rep_send_message(dbenv, DB_EID_BROADCAST, REP_LOG, lsn, dbt, do_flush) != 0)) if (op == DB_COMMIT || op == DB_CHECKPOINT) do_flush = DB_FLUSH; #ifdef DIAGNOSTIC } R_LOCK(dbenv, &dblp->reginfo); #else R_LOCK(dbenv, &dblp->reginfo); } #endif /* This is #3 in the list of things we do on a checkpoint, above. */ if (op == DB_CHECKPOINT && (ret = __log_open_files(dbenv)) != 0) return (ret); /* * If a flush is in progress, drop the region lock and * block waiting for the next flush. */ if (do_flush && lp->in_flush != 0) { if ((commit = SH_TAILQ_FIRST( &lp->free_commits, __db_commit)) == NULL) { if ((ret = __db_shalloc(dblp->reginfo.addr, sizeof(struct __db_commit), MUTEX_ALIGN, &commit)) != 0) goto flush; if ((ret = __db_shmutex_init(dbenv, &commit->mutex, R_OFFSET(&dblp->reginfo, &commit->mutex), MUTEX_SELF_BLOCK, &dblp->reginfo, (REGMAINT *) R_ADDR(&dblp->reginfo, lp->maint_off))) != 0) { __db_shalloc_free(dblp->reginfo.addr, commit); return (ret); } MUTEX_LOCK(dbenv, &commit->mutex, dbenv->lockfhp); } else SH_TAILQ_REMOVE( &lp->free_commits, commit, links, __db_commit); lp->ncommit++; /* * We may have released the log region lock before getting here, * so records may arrive at this point out of order. Be * sure that we only move t_lsn forward. */ if (log_compare(&lp->t_lsn, lsn) < 0) lp->t_lsn = *lsn; commit->lsn = *lsn; SH_TAILQ_INSERT_HEAD( &lp->commits, commit, links, __db_commit); /* Wait here for a log flush. */ R_UNLOCK(dbenv, &dblp->reginfo); MUTEX_LOCK(dbenv, &commit->mutex, dbenv->lockfhp); R_LOCK(dbenv, &dblp->reginfo); lp->ncommit--; /* * Grab the flag before freeing the struct to see if * we need to flush the log to commit. If so, * use the maximal lsn for any committing thread. */ do_flush = F_ISSET(commit, DB_COMMIT_FLUSH); F_CLR(commit, DB_COMMIT_FLUSH); SH_TAILQ_INSERT_HEAD( &lp->free_commits, commit, links, __db_commit); if (do_flush) { lp->in_flush--; save_lsn = lp->t_lsn; } else return (0); } /* * On a checkpoint or when flush is requested, we: * Flush the current buffer contents to disk. * Sync the log to disk. */ flush: if (do_flush) { /* Note if this flush contains a single commit. */ if (op == DB_COMMIT && lp->ncommit == 0) { lp->stat.st_flushcommit++; lp->stat.st_mincommitperflush = 1; if (lp->stat.st_maxcommitperflush == 0) lp->stat.st_maxcommitperflush = 1; } if ((ret = __log_flush_int(dblp, &save_lsn, 1)) != 0) { if (op != DB_COMMIT) return (ret); /* * If the flush failed we must make sure * that a commit record does not get out * after we abort the transaction. * We do this by overwritting it in * the buffer. * (Note that other commits in this * buffer will wait until a sucessful * write happens, we do not wake them.) * We point at the right part of the log * and write an abort record over the * commit. We then must flush the log * again, since that part of the buffer * may have actually made it out. */ /* See if we are still in the buffer. */ if (lsn->file != lp->lsn.file || lsn->offset < lp->w_off) return (0); __txn_force_abort(dblp->bufp + lsn->offset - lp->w_off); /* That part of the buffer may have made it to disk. */ (void)__log_flush_int(dblp, lsn, 0); return (ret); } } /* * On a checkpoint, we: * Save the time the checkpoint was written. * Reset the bytes written since the last checkpoint. */ if (op == DB_CHECKPOINT) { (void)time(&lp->chkpt); lp->stat.st_wc_bytes = lp->stat.st_wc_mbytes = 0; } return (0); } /* * __log_putr -- * Actually put a record into the log. */ static int __log_putr(dblp, lsn, dbt, prev) DB_LOG *dblp; DB_LSN *lsn; const DBT *dbt; u_int32_t prev; { HDR hdr; LOG *lp; int ret, t_ret; size_t b_off, nr; u_int32_t w_off; lp = dblp->reginfo.primary; /* Save our position incase we fail. */ b_off = lp->b_off; w_off = lp->w_off; /* * Initialize the header. If we just switched files, lsn.offset will * be 0, and what we really want is the offset of the previous record * in the previous file. Fortunately, prev holds the value we want. */ hdr.prev = prev; hdr.len = sizeof(HDR) + dbt->size; hdr.cksum = __ham_func4(NULL, dbt->data, dbt->size); if ((ret = __log_fill(dblp, lsn, &hdr, sizeof(HDR))) != 0) goto err; if ((ret = __log_fill(dblp, lsn, dbt->data, dbt->size)) != 0) goto err; lp->len = sizeof(HDR) + dbt->size;; lp->lsn.offset += sizeof(HDR) + dbt->size; return (0); err: /* * If we wrote more than one buffer before failing get the * first one back. * The extra buffers will fail the checksums and be ignored. */ if (w_off + lp->buffer_size < lp->w_off) { if ((t_ret = __os_seek(dblp->dbenv, &dblp->lfh, 0, 0, w_off, 0, DB_OS_SEEK_SET)) != 0 || (t_ret = __os_read(dblp->dbenv, &dblp->lfh, dblp->bufp, b_off, &nr)) != 0) return (__db_panic(dblp->dbenv, t_ret)); if (nr != b_off) { __db_err(dblp->dbenv, "Short read while restoring log"); return (__db_panic(dblp->dbenv, EIO)); } } /* Reset to where we started. */ lp->w_off = w_off; lp->b_off = b_off; return (ret); } /* * __log_flush -- * Write all records less than or equal to the specified LSN. * * PUBLIC: int __log_flush __P((DB_ENV *, const DB_LSN *)); */ int __log_flush(dbenv, lsn) DB_ENV *dbenv; const DB_LSN *lsn; { DB_LOG *dblp; int ret; PANIC_CHECK(dbenv); ENV_REQUIRES_CONFIG(dbenv, dbenv->lg_handle, "DB_ENV->log_flush", DB_INIT_LOG); dblp = dbenv->lg_handle; R_LOCK(dbenv, &dblp->reginfo); ret = __log_flush_int(dblp, lsn, 1); R_UNLOCK(dbenv, &dblp->reginfo); return (ret); } /* * __log_flush_int -- * Write all records less than or equal to the specified LSN; internal * version. */ static int __log_flush_int(dblp, lsn, release) DB_LOG *dblp; const DB_LSN *lsn; int release; { DB_ENV *dbenv; DB_LSN f_lsn, t_lsn; LOG *lp; struct __db_commit *commit; size_t b_off; u_int32_t ncommit, w_off; int current, first, ret; ret = 0; lp = dblp->reginfo.primary; dbenv = dblp->dbenv; /* * If no LSN specified, flush the entire log by setting the flush LSN * to the last LSN written in the log. Otherwise, check that the LSN * isn't a non-existent record for the log. */ if (lsn == NULL) { t_lsn.file = lp->lsn.file; t_lsn.offset = lp->lsn.offset - lp->len; lsn = &t_lsn; } else if (lsn->file > lp->lsn.file || (lsn->file == lp->lsn.file && lsn->offset > lp->lsn.offset - lp->len)) { __db_err(dblp->dbenv, "DB_ENV->log_flush: LSN past current end-of-log"); return (EINVAL); } /* * Protect flushing with its own Mutex so we can release * the region lock except during file switches. */ MUTEX_LOCK(dbenv, &lp->flush, dbenv->lockfhp); /* * If the LSN is less than or equal to the last-sync'd LSN, we're done. * Note, the last-sync LSN saved in s_lsn is the LSN of the first byte * after the byte we absolutely know was written to disk, so the test * is <, not <=. */ if (lsn->file < lp->s_lsn.file || (lsn->file == lp->s_lsn.file && lsn->offset < lp->s_lsn.offset)) goto done; /* * We may need to write the current buffer. We have to write the * current buffer if the flush LSN is greater than or equal to the * buffer's starting LSN. */ current = 0; if (lp->b_off != 0 && log_compare(lsn, &lp->f_lsn) >= 0) { if ((ret = __log_write(dblp, dblp->bufp, lp->b_off)) != 0) goto err; lp->b_off = 0; current = 1; } /* * It's possible that this thread may never have written to this log * file. Acquire a file descriptor if we don't already have one. * One last check -- if we're not writing anything from the current * buffer, don't bother. We have nothing to write and nothing to * sync. */ if (dblp->lfname != lp->lsn.file) { if (!current) goto done; if ((ret = __log_newfh(dblp)) != 0) goto err; } /* * We are going to flush, release the region. * First get the current state of the buffer since * another write may come in, but we may not flush it. */ b_off = lp->b_off; w_off = lp->w_off; f_lsn = lp->f_lsn; lp->in_flush++; if (release) R_UNLOCK(dbenv, &dblp->reginfo); /* Sync all writes to disk. */ if ((ret = __os_fsync(dblp->dbenv, &dblp->lfh)) != 0) { MUTEX_UNLOCK(dbenv, &lp->flush); if (release) R_LOCK(dbenv, &dblp->reginfo); ret = __db_panic(dblp->dbenv, ret); return (ret); } /* * Set the last-synced LSN. * This value must be set to the LSN past the last complete * record that has been flushed. This is at least the first * lsn, f_lsn. If the buffer is empty, b_off == 0, then * we can move up to write point since the first lsn is not * set for the new buffer. */ lp->s_lsn = f_lsn; if (b_off == 0) lp->s_lsn.offset = w_off; MUTEX_UNLOCK(dbenv, &lp->flush); if (release) R_LOCK(dbenv, &dblp->reginfo); lp->in_flush--; ++lp->stat.st_scount; if (lp->ncommit != 0) { first = 1; ncommit = 0; for (commit = SH_TAILQ_FIRST(&lp->commits, __db_commit); commit != NULL; commit = SH_TAILQ_NEXT(commit, links, __db_commit)) if (log_compare(&lp->s_lsn, &commit->lsn) > 0) { MUTEX_UNLOCK(dblp->dbenv, &commit->mutex); SH_TAILQ_REMOVE( &lp->commits, commit, links, __db_commit); ncommit++; } else if (first == 1) { F_SET(commit, DB_COMMIT_FLUSH); MUTEX_UNLOCK(dblp->dbenv, &commit->mutex); SH_TAILQ_REMOVE( &lp->commits, commit, links, __db_commit); /* * This thread will wake and flush. * If another thread commits and flushes * first we will waste a trip trough the * mutex. */ lp->in_flush++; first = 0; } if (ncommit != 0) { if (lp->stat.st_maxcommitperflush < ncommit) lp->stat.st_maxcommitperflush = ncommit; if (lp->stat.st_mincommitperflush > ncommit || lp->stat.st_mincommitperflush == 0) lp->stat.st_mincommitperflush = ncommit; lp->stat.st_flushcommit++; } } return (ret); done: err: MUTEX_UNLOCK(dbenv, &lp->flush); return (ret); } /* * __log_fill -- * Write information into the log. */ static int __log_fill(dblp, lsn, addr, len) DB_LOG *dblp; DB_LSN *lsn; void *addr; u_int32_t len; { LOG *lp; u_int32_t bsize, nrec; size_t nw, remain; int ret; lp = dblp->reginfo.primary; bsize = lp->buffer_size; while (len > 0) { /* Copy out the data. */ /* * If we're beginning a new buffer, note the user LSN to which * the first byte of the buffer belongs. We have to know this * when flushing the buffer so that we know if the in-memory * buffer needs to be flushed. */ if (lp->b_off == 0) lp->f_lsn = *lsn; /* * If we're on a buffer boundary and the data is big enough, * copy as many records as we can directly from the data. */ if (lp->b_off == 0 && len >= bsize) { nrec = len / bsize; if ((ret = __log_write(dblp, addr, nrec * bsize)) != 0) return (ret); addr = (u_int8_t *)addr + nrec * bsize; len -= nrec * bsize; ++lp->stat.st_wcount_fill; continue; } /* Figure out how many bytes we can copy this time. */ remain = bsize - lp->b_off; nw = remain > len ? len : remain; memcpy(dblp->bufp + lp->b_off, addr, nw); addr = (u_int8_t *)addr + nw; len -= nw; lp->b_off += nw; /* If we fill the buffer, flush it. */ if (lp->b_off == bsize) { if ((ret = __log_write(dblp, dblp->bufp, bsize)) != 0) return (ret); lp->b_off = 0; ++lp->stat.st_wcount_fill; } } return (0); } /* * __log_write -- * Write the log buffer to disk. */ static int __log_write(dblp, addr, len) DB_LOG *dblp; void *addr; u_int32_t len; { LOG *lp; size_t nw; int ret; /* * If we haven't opened the log file yet or the current one * has changed, acquire a new log file. */ lp = dblp->reginfo.primary; if (!F_ISSET(&dblp->lfh, DB_FH_VALID) || dblp->lfname != lp->lsn.file) if ((ret = __log_newfh(dblp)) != 0) return (ret); /* * Seek to the offset in the file (someone may have written it * since we last did). */ if ((ret = __os_seek(dblp->dbenv, &dblp->lfh, 0, 0, lp->w_off, 0, DB_OS_SEEK_SET)) != 0 || (ret = __os_write(dblp->dbenv, &dblp->lfh, addr, len, &nw)) != 0) return (ret); if (nw != len) { __db_err(dblp->dbenv, "Short write while writing log"); return (EIO); } /* Reset the buffer offset and update the seek offset. */ lp->w_off += len; /* Update written statistics. */ if ((lp->stat.st_w_bytes += len) >= MEGABYTE) { lp->stat.st_w_bytes -= MEGABYTE; ++lp->stat.st_w_mbytes; } if ((lp->stat.st_wc_bytes += len) >= MEGABYTE) { lp->stat.st_wc_bytes -= MEGABYTE; ++lp->stat.st_wc_mbytes; } ++lp->stat.st_wcount; return (0); } /* * __log_file -- * Map a DB_LSN to a file name. * * PUBLIC: int __log_file __P((DB_ENV *, const DB_LSN *, char *, size_t)); */ int __log_file(dbenv, lsn, namep, len) DB_ENV *dbenv; const DB_LSN *lsn; char *namep; size_t len; { DB_LOG *dblp; int ret; char *name; PANIC_CHECK(dbenv); ENV_REQUIRES_CONFIG(dbenv, dbenv->lg_handle, "DB_ENV->log_file", DB_INIT_LOG); dblp = dbenv->lg_handle; R_LOCK(dbenv, &dblp->reginfo); ret = __log_name(dblp, lsn->file, &name, NULL, 0); R_UNLOCK(dbenv, &dblp->reginfo); if (ret != 0) return (ret); /* Check to make sure there's enough room and copy the name. */ if (len < strlen(name) + 1) { *namep = '\0'; __db_err(dbenv, "DB_ENV->log_file: name buffer is too short"); return (EINVAL); } (void)strcpy(namep, name); __os_freestr(dbenv, name); return (0); } /* * __log_newfh -- * Acquire a file handle for the current log file. */ static int __log_newfh(dblp) DB_LOG *dblp; { LOG *lp; int ret; char *name; /* Close any previous file descriptor. */ if (F_ISSET(&dblp->lfh, DB_FH_VALID)) (void)__os_closehandle(&dblp->lfh); /* Get the path of the new file and open it. */ lp = dblp->reginfo.primary; dblp->lfname = lp->lsn.file; /* * Adding DB_OSO_LOG to the flags may add additional platform-specific * optimizations. On WinNT, the logfile is preallocated, which may * have a time penalty at startup, but have better overall throughput. * We are not certain that this works reliably, so enable at your own * risk. * * XXX: * Initialize the log file size. This is a hack to push the log's * maximum size down into the Windows __os_open routine, because it * wants to pre-allocate it. */ dblp->lfh.log_size = dblp->dbenv->lg_max; if ((ret = __log_name(dblp, dblp->lfname, &name, &dblp->lfh, DB_OSO_CREATE |/* DB_OSO_LOG |*/ DB_OSO_SEQ)) != 0) __db_err(dblp->dbenv, "DB_ENV->log_put: %s: %s", name, db_strerror(ret)); __os_freestr(dblp->dbenv, name); return (ret); } /* * __log_name -- * Return the log name for a particular file, and optionally open it. * * PUBLIC: int __log_name __P((DB_LOG *, * PUBLIC: u_int32_t, char **, DB_FH *, u_int32_t)); */ int __log_name(dblp, filenumber, namep, fhp, flags) DB_LOG *dblp; u_int32_t filenumber, flags; char **namep; DB_FH *fhp; { LOG *lp; int ret; char *oname; char old[sizeof(LFPREFIX) + 5 + 20], new[sizeof(LFPREFIX) + 10 + 20]; lp = dblp->reginfo.primary; /* * !!! * The semantics of this routine are bizarre. * * The reason for all of this is that we need a place where we can * intercept requests for log files, and, if appropriate, check for * both the old-style and new-style log file names. The trick is * that all callers of this routine that are opening the log file * read-only want to use an old-style file name if they can't find * a match using a new-style name. The only down-side is that some * callers may check for the old-style when they really don't need * to, but that shouldn't mess up anything, and we only check for * the old-style name when we've already failed to find a new-style * one. * * Create a new-style file name, and if we're not going to open the * file, return regardless. */ (void)snprintf(new, sizeof(new), LFNAME, filenumber); if ((ret = __db_appname(dblp->dbenv, DB_APP_LOG, NULL, new, 0, NULL, namep)) != 0 || fhp == NULL) return (ret); /* Open the new-style file -- if we succeed, we're done. */ if ((ret = __os_open(dblp->dbenv, *namep, flags, lp->persist.mode, fhp)) == 0) return (0); /* * The open failed... if the DB_RDONLY flag isn't set, we're done, * the caller isn't interested in old-style files. */ if (!LF_ISSET(DB_OSO_RDONLY)) { __db_err(dblp->dbenv, "%s: log file open failed: %s", *namep, db_strerror(ret)); return (__db_panic(dblp->dbenv, ret)); } /* Create an old-style file name. */ (void)snprintf(old, sizeof(old), LFNAME_V1, filenumber); if ((ret = __db_appname(dblp->dbenv, DB_APP_LOG, NULL, old, 0, NULL, &oname)) != 0) goto err; /* * Open the old-style file -- if we succeed, we're done. Free the * space allocated for the new-style name and return the old-style * name to the caller. */ if ((ret = __os_open(dblp->dbenv, oname, flags, lp->persist.mode, fhp)) == 0) { __os_freestr(dblp->dbenv, *namep); *namep = oname; return (0); } /* * Couldn't find either style of name -- return the new-style name * for the caller's error message. If it's an old-style name that's * actually missing we're going to confuse the user with the error * message, but that implies that not only were we looking for an * old-style name, but we expected it to exist and we weren't just * looking for any log file. That's not a likely error. */ err: __os_freestr(dblp->dbenv, oname); return (ret); } static int __log_open_files(dbenv) DB_ENV *dbenv; { DB_LOG *dblp; DB_LSN r_unused; DBT fid_dbt, t; FNAME *fnp; LOG *lp; int ret; dblp = dbenv->lg_handle; lp = dblp->reginfo.primary; for (fnp = SH_TAILQ_FIRST(&lp->fq, __fname); fnp != NULL; fnp = SH_TAILQ_NEXT(fnp, q, __fname)) { if (fnp->ref == 0) /* Entry not in use. */ continue; if (fnp->name_off != INVALID_ROFF) { memset(&t, 0, sizeof(t)); t.data = R_ADDR(&dblp->reginfo, fnp->name_off); t.size = strlen(t.data) + 1; } memset(&fid_dbt, 0, sizeof(fid_dbt)); fid_dbt.data = fnp->ufid; fid_dbt.size = DB_FILE_ID_LEN; /* * Output LOG_CHECKPOINT records which will be * processed during the OPENFILES pass of recovery. * At the end of recovery we want to output the * files that were open so that a future recovery * run will have the correct files open during * a backward pass. For this we output LOG_RCLOSE * records so that the files will be closed on * the forward pass. */ if ((ret = __log_register_log(dbenv, NULL, &r_unused, 0, F_ISSET(dblp, DBLOG_RECOVER) ? LOG_RCLOSE : LOG_CHECKPOINT, fnp->name_off == INVALID_ROFF ? NULL : &t, &fid_dbt, fnp->id, fnp->s_type, fnp->meta_pgno)) != 0) return (ret); } return (0); }