/*- * See the file LICENSE for redistribution information. * * Copyright (c) 1996, 2010 Oracle and/or its affiliates. All rights reserved. * * $Id$ */ #include "db_config.h" #include "db_int.h" #include "dbinc/db_page.h" #include "dbinc/btree.h" #include "dbinc/hash.h" #ifndef HAVE_QUEUE #include "dbinc/qam.h" /* For __db_no_queue_am(). */ #endif #include "dbinc/lock.h" #include "dbinc/mp.h" #include "dbinc/partition.h" #include "dbinc/txn.h" static int __db_associate_arg __P((DB *, DB *, int (*)(DB *, const DBT *, const DBT *, DBT *), u_int32_t)); static int __dbc_del_arg __P((DBC *, u_int32_t)); static int __dbc_pget_arg __P((DBC *, DBT *, u_int32_t)); static int __dbc_put_arg __P((DBC *, DBT *, DBT *, u_int32_t)); static int __db_curinval __P((const ENV *)); static int __db_cursor_arg __P((DB *, u_int32_t)); static int __db_del_arg __P((DB *, DBT *, u_int32_t)); static int __db_get_arg __P((const DB *, DBT *, DBT *, u_int32_t)); static int __db_join_arg __P((DB *, DBC **, u_int32_t)); static int __db_open_arg __P((DB *, DB_TXN *, const char *, const char *, DBTYPE, u_int32_t)); static int __db_pget_arg __P((DB *, DBT *, u_int32_t)); static int __db_put_arg __P((DB *, DBT *, DBT *, u_int32_t)); static int __dbt_ferr __P((const DB *, const char *, const DBT *, int)); static int __db_compact_func __P((DBC *, DBC *, u_int32_t *, db_pgno_t, u_int32_t, void *)); static int __db_associate_foreign_arg __P((DB *, DB *, int (*)(DB *, const DBT *, DBT *, const DBT *, int *), u_int32_t)); /* * These functions implement the Berkeley DB API. They are organized in a * layered fashion. The interface functions (XXX_pp) perform all generic * error checks (for example, PANIC'd region, replication state change * in progress, inconsistent transaction usage), call function-specific * check routines (_arg) to check for proper flag usage, etc., do pre-amble * processing (incrementing handle counts, handling local transactions), * call the function and then do post-amble processing (local transactions, * decrement handle counts). * * The basic structure is: * Check for simple/generic errors (PANIC'd region) * Check if replication is changing state (increment handle count). * Call function-specific argument checking routine * Create internal transaction if necessary * Call underlying worker function * Commit/abort internal transaction if necessary * Decrement handle count */ /* * __db_associate_pp -- * DB->associate pre/post processing. * * PUBLIC: int __db_associate_pp __P((DB *, DB_TXN *, DB *, * PUBLIC: int (*)(DB *, const DBT *, const DBT *, DBT *), u_int32_t)); */ int __db_associate_pp(dbp, txn, sdbp, callback, flags) DB *dbp, *sdbp; DB_TXN *txn; int (*callback) __P((DB *, const DBT *, const DBT *, DBT *)); u_int32_t flags; { DBC *sdbc; DB_THREAD_INFO *ip; ENV *env; int handle_check, ret, t_ret, txn_local; env = dbp->env; txn_local = 0; STRIP_AUTO_COMMIT(flags); ENV_ENTER(env, ip); /* Check for replication block. */ handle_check = IS_ENV_REPLICATED(env); if (handle_check && (ret = __db_rep_enter(dbp, 1, 0, IS_REAL_TXN(txn))) != 0) { handle_check = 0; goto err; } /* * Secondary cursors may have the primary's lock file ID, so we need * to make sure that no older cursors are lying around when we make * the transition. */ if (TAILQ_FIRST(&sdbp->active_queue) != NULL || TAILQ_FIRST(&sdbp->join_queue) != NULL) { __db_errx(env, "Databases may not become secondary indices while cursors are open"); ret = EINVAL; goto err; } if ((ret = __db_associate_arg(dbp, sdbp, callback, flags)) != 0) goto err; /* * Create a local transaction as necessary, check for consistent * transaction usage, and, if we have no transaction but do have * locking on, acquire a locker id for the handle lock acquisition. */ if (IS_DB_AUTO_COMMIT(dbp, txn)) { if ((ret = __txn_begin(env, ip, NULL, &txn, 0)) != 0) goto err; txn_local = 1; } /* Check for consistent transaction usage. */ if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID, 0)) != 0) goto err; while ((sdbc = TAILQ_FIRST(&sdbp->free_queue)) != NULL) if ((ret = __dbc_destroy(sdbc)) != 0) goto err; ret = __db_associate(dbp, ip, txn, sdbp, callback, flags); err: if (txn_local && (t_ret = __db_txn_auto_resolve(env, txn, 0, ret)) && ret == 0) ret = t_ret; /* Release replication block. */ if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) ret = t_ret; ENV_LEAVE(env, ip); return (ret); } /* * __db_associate_arg -- * Check DB->associate arguments. */ static int __db_associate_arg(dbp, sdbp, callback, flags) DB *dbp, *sdbp; int (*callback) __P((DB *, const DBT *, const DBT *, DBT *)); u_int32_t flags; { ENV *env; int ret; env = dbp->env; if (F_ISSET(sdbp, DB_AM_SECONDARY)) { __db_errx(env, "Secondary index handles may not be re-associated"); return (EINVAL); } if (F_ISSET(dbp, DB_AM_SECONDARY)) { __db_errx(env, "Secondary indices may not be used as primary databases"); return (EINVAL); } if (F_ISSET(dbp, DB_AM_DUP)) { __db_errx(env, "Primary databases may not be configured with duplicates"); return (EINVAL); } if (F_ISSET(dbp, DB_AM_RENUMBER)) { __db_errx(env, "Renumbering recno databases may not be used as primary databases"); return (EINVAL); } /* * It's OK for the primary and secondary to not share an environment IFF * the environments are local to the DB handle. (Specifically, cursor * adjustment will work correctly in this case.) The environment being * local implies the environment is not configured for either locking or * transactions, as neither of those could work correctly. */ if (dbp->env != sdbp->env && (!F_ISSET(dbp->env, ENV_DBLOCAL) || !F_ISSET(sdbp->env, ENV_DBLOCAL))) { __db_errx(env, "The primary and secondary must be opened in the same environment"); return (EINVAL); } if ((DB_IS_THREADED(dbp) && !DB_IS_THREADED(sdbp)) || (!DB_IS_THREADED(dbp) && DB_IS_THREADED(sdbp))) { __db_errx(env, "The DB_THREAD setting must be the same for primary and secondary"); return (EINVAL); } if (callback == NULL && (!F_ISSET(dbp, DB_AM_RDONLY) || !F_ISSET(sdbp, DB_AM_RDONLY))) { __db_errx(env, "Callback function may be NULL only when database handles are read-only"); return (EINVAL); } if ((ret = __db_fchk(env, "DB->associate", flags, DB_CREATE | DB_IMMUTABLE_KEY)) != 0) return (ret); return (0); } /* * __db_close_pp -- * DB->close pre/post processing. * * PUBLIC: int __db_close_pp __P((DB *, u_int32_t)); */ int __db_close_pp(dbp, flags) DB *dbp; u_int32_t flags; { DB_THREAD_INFO *ip; ENV *env; int handle_check, ret, t_ret; env = dbp->env; ret = 0; /* * Close a DB handle -- as a handle destructor, we can't fail. * * !!! * The actual argument checking is simple, do it inline, outside of * the replication block. */ if (flags != 0 && flags != DB_NOSYNC) ret = __db_ferr(env, "DB->close", 0); ENV_ENTER(env, ip); /* Check for replication block. */ handle_check = IS_ENV_REPLICATED(env); if (handle_check && (t_ret = __db_rep_enter(dbp, 0, 0, 0)) != 0) { handle_check = 0; if (ret == 0) ret = t_ret; } if ((t_ret = __db_close(dbp, NULL, flags)) != 0 && ret == 0) ret = t_ret; /* Release replication block. */ if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) ret = t_ret; ENV_LEAVE(env, ip); return (ret); } /* * __db_cursor_pp -- * DB->cursor pre/post processing. * * PUBLIC: int __db_cursor_pp __P((DB *, DB_TXN *, DBC **, u_int32_t)); */ int __db_cursor_pp(dbp, txn, dbcp, flags) DB *dbp; DB_TXN *txn; DBC **dbcp; u_int32_t flags; { DB_THREAD_INFO *ip; ENV *env; REGENV *renv; int rep_blocked, ret; env = dbp->env; DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->cursor"); ENV_ENTER(env, ip); /* Check for replication block. */ rep_blocked = 0; if (IS_ENV_REPLICATED(env)) { if (!IS_REAL_TXN(txn)) { if ((ret = __op_rep_enter(env, 0)) != 0) goto err; rep_blocked = 1; } renv = env->reginfo->primary; if (dbp->timestamp != renv->rep_timestamp) { __db_errx(env, "%s %s", "replication recovery unrolled committed transactions;", "open DB and DBcursor handles must be closed"); ret = DB_REP_HANDLE_DEAD; goto err; } } if ((ret = __db_cursor_arg(dbp, flags)) != 0) goto err; /* * Check for consistent transaction usage. For now, assume this * cursor might be used for read operations only (in which case * it may not require a txn). We'll check more stringently in * c_del and c_put. (Note this means the read-op txn tests have * to be a subset of the write-op ones.) */ if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID, 1)) != 0) goto err; ret = __db_cursor(dbp, ip, txn, dbcp, flags); /* * Register externally created cursors into the valid transaction. * If a family transaction was passed in, the transaction handle in * the cursor may not match. */ txn = (*dbcp)->txn; if (txn != NULL && ret == 0) TAILQ_INSERT_HEAD(&(txn->my_cursors), *dbcp, txn_cursors); err: /* Release replication block on error. */ if (ret != 0 && rep_blocked) (void)__op_rep_exit(env); ENV_LEAVE(env, ip); return (ret); } /* * __db_cursor -- * DB->cursor. * * PUBLIC: int __db_cursor __P((DB *, * PUBLIC: DB_THREAD_INFO *, DB_TXN *, DBC **, u_int32_t)); */ int __db_cursor(dbp, ip, txn, dbcp, flags) DB *dbp; DB_THREAD_INFO *ip; DB_TXN *txn; DBC **dbcp; u_int32_t flags; { DBC *dbc; ENV *env; db_lockmode_t mode; int ret; env = dbp->env; if (MULTIVERSION(dbp) && txn == NULL && (LF_ISSET(DB_TXN_SNAPSHOT) || F_ISSET(env->dbenv, DB_ENV_TXN_SNAPSHOT))) { if ((ret = __txn_begin(env, ip, NULL, &txn, DB_TXN_SNAPSHOT)) != 0) return (ret); F_SET(txn, TXN_PRIVATE); } PERFMON5(env, db, cursor, dbp->fname, dbp->dname, txn == NULL ? 0 : txn->txnid, flags, &dbp->fileid[0]); if ((ret = __db_cursor_int(dbp, ip, txn, dbp->type, PGNO_INVALID, LF_ISSET(DB_CURSOR_BULK | DB_CURSOR_TRANSIENT | DB_RECOVER), NULL, &dbc)) != 0) return (ret); /* * If this is CDB, do all the locking in the interface, which is * right here. */ if (CDB_LOCKING(env)) { mode = (LF_ISSET(DB_WRITELOCK)) ? DB_LOCK_WRITE : ((LF_ISSET(DB_WRITECURSOR) || txn != NULL) ? DB_LOCK_IWRITE : DB_LOCK_READ); if ((ret = __lock_get(env, dbc->locker, 0, &dbc->lock_dbt, mode, &dbc->mylock)) != 0) goto err; if (LF_ISSET(DB_WRITECURSOR)) F_SET(dbc, DBC_WRITECURSOR); if (LF_ISSET(DB_WRITELOCK)) F_SET(dbc, DBC_WRITER); } if (LF_ISSET(DB_READ_UNCOMMITTED) || (txn != NULL && F_ISSET(txn, TXN_READ_UNCOMMITTED))) F_SET(dbc, DBC_READ_UNCOMMITTED); if (LF_ISSET(DB_READ_COMMITTED) || (txn != NULL && F_ISSET(txn, TXN_READ_COMMITTED))) F_SET(dbc, DBC_READ_COMMITTED); *dbcp = dbc; return (0); err: (void)__dbc_close(dbc); return (ret); } /* * __db_cursor_arg -- * Check DB->cursor arguments. */ static int __db_cursor_arg(dbp, flags) DB *dbp; u_int32_t flags; { ENV *env; env = dbp->env; /* * DB_READ_COMMITTED and DB_READ_UNCOMMITTED require locking. */ if (LF_ISSET(DB_READ_COMMITTED | DB_READ_UNCOMMITTED)) { if (!LOCKING_ON(env)) return (__db_fnl(env, "DB->cursor")); } LF_CLR(DB_CURSOR_BULK | DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_TXN_SNAPSHOT); /* Check for invalid function flags. */ if (LF_ISSET(DB_WRITECURSOR)) { if (DB_IS_READONLY(dbp)) return (__db_rdonly(env, "DB->cursor")); if (!CDB_LOCKING(env)) return (__db_ferr(env, "DB->cursor", 0)); LF_CLR(DB_WRITECURSOR); } else if (LF_ISSET(DB_WRITELOCK)) { if (DB_IS_READONLY(dbp)) return (__db_rdonly(env, "DB->cursor")); LF_CLR(DB_WRITELOCK); } if (flags != 0) return (__db_ferr(env, "DB->cursor", 0)); return (0); } /* * __db_del_pp -- * DB->del pre/post processing. * * PUBLIC: int __db_del_pp __P((DB *, DB_TXN *, DBT *, u_int32_t)); */ int __db_del_pp(dbp, txn, key, flags) DB *dbp; DB_TXN *txn; DBT *key; u_int32_t flags; { DB_THREAD_INFO *ip; ENV *env; int handle_check, ret, t_ret, txn_local; env = dbp->env; txn_local = 0; STRIP_AUTO_COMMIT(flags); DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->del"); #ifdef CONFIG_TEST if (IS_REP_MASTER(env)) DB_TEST_WAIT(env, env->test_check); #endif ENV_ENTER(env, ip); /* Check for replication block. */ handle_check = IS_ENV_REPLICATED(env); if (handle_check && (ret = __db_rep_enter(dbp, 1, 0, IS_REAL_TXN(txn))) != 0) { handle_check = 0; goto err; } if ((ret = __db_del_arg(dbp, key, flags)) != 0) goto err; /* Create local transaction as necessary. */ if (IS_DB_AUTO_COMMIT(dbp, txn)) { if ((ret = __txn_begin(env, ip, NULL, &txn, 0)) != 0) goto err; txn_local = 1; } /* Check for consistent transaction usage. */ if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID, 0)) != 0) goto err; ret = __db_del(dbp, ip, txn, key, flags); err: if (txn_local && (t_ret = __db_txn_auto_resolve(env, txn, 0, ret)) && ret == 0) ret = t_ret; /* Release replication block. */ if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) ret = t_ret; ENV_LEAVE(env, ip); __dbt_userfree(env, key, NULL, NULL); return (ret); } /* * __db_del_arg -- * Check DB->delete arguments. */ static int __db_del_arg(dbp, key, flags) DB *dbp; DBT *key; u_int32_t flags; { ENV *env; int ret; env = dbp->env; /* Check for changes to a read-only tree. */ if (DB_IS_READONLY(dbp)) return (__db_rdonly(env, "DB->del")); /* Check for invalid function flags. */ switch (flags) { case DB_CONSUME: if (dbp->type != DB_QUEUE) return (__db_ferr(env, "DB->del", 0)); goto copy; case DB_MULTIPLE: case DB_MULTIPLE_KEY: if (!F_ISSET(key, DB_DBT_BULK)) { __db_errx(env, "DB->del with DB_MULTIPLE(_KEY) requires multiple key records"); return (EINVAL); } /* FALL THROUGH */ case 0: copy: if ((ret = __dbt_usercopy(env, key)) != 0) return (ret); break; default: return (__db_ferr(env, "DB->del", 0)); } return (0); } /* * __db_exists -- * DB->exists implementation. * * PUBLIC: int __db_exists __P((DB *, DB_TXN *, DBT *, u_int32_t)); */ int __db_exists(dbp, txn, key, flags) DB *dbp; DB_TXN *txn; DBT *key; u_int32_t flags; { DBT data; int ret; /* * Most flag checking is done in the DB->get call, we only check for * specific incompatibilities here. This saves making __get_arg * aware of the exist method's API constraints. */ STRIP_AUTO_COMMIT(flags); if ((ret = __db_fchk(dbp->env, "DB->exists", flags, DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW)) != 0) return (ret); /* * Configure a data DBT that returns no bytes so there's no copy * of the data. */ memset(&data, 0, sizeof(data)); data.dlen = 0; data.flags = DB_DBT_PARTIAL | DB_DBT_USERMEM; return (dbp->get(dbp, txn, key, &data, flags)); } /* * db_fd_pp -- * DB->fd pre/post processing. * * PUBLIC: int __db_fd_pp __P((DB *, int *)); */ int __db_fd_pp(dbp, fdp) DB *dbp; int *fdp; { DB_FH *fhp; DB_THREAD_INFO *ip; ENV *env; int handle_check, ret, t_ret; env = dbp->env; DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->fd"); ENV_ENTER(env, ip); /* Check for replication block. */ handle_check = IS_ENV_REPLICATED(env); if (handle_check && (ret = __db_rep_enter(dbp, 1, 0, 0)) != 0) goto err; /* * !!! * There's no argument checking to be done. * * !!! * The actual method call is simple, do it inline. * * XXX * Truly spectacular layering violation. */ if ((ret = __mp_xxx_fh(dbp->mpf, &fhp)) == 0) { if (fhp == NULL) { *fdp = -1; __db_errx(env, "Database does not have a valid file handle"); ret = ENOENT; } else *fdp = fhp->fd; } /* Release replication block. */ if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) ret = t_ret; err: ENV_LEAVE(env, ip); return (ret); } /* * __db_get_pp -- * DB->get pre/post processing. * * PUBLIC: int __db_get_pp __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); */ int __db_get_pp(dbp, txn, key, data, flags) DB *dbp; DB_TXN *txn; DBT *key, *data; u_int32_t flags; { DB_THREAD_INFO *ip; ENV *env; u_int32_t mode; int handle_check, ignore_lease, ret, t_ret, txn_local; env = dbp->env; mode = 0; txn_local = 0; STRIP_AUTO_COMMIT(flags); DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->get"); ignore_lease = LF_ISSET(DB_IGNORE_LEASE) ? 1 : 0; LF_CLR(DB_IGNORE_LEASE); if ((ret = __db_get_arg(dbp, key, data, flags)) != 0) return (ret); ENV_ENTER(env, ip); /* Check for replication block. */ handle_check = IS_ENV_REPLICATED(env); if (handle_check && (ret = __db_rep_enter(dbp, 1, 0, IS_REAL_TXN(txn))) != 0) { handle_check = 0; goto err; } if (LF_ISSET(DB_READ_UNCOMMITTED)) mode = DB_READ_UNCOMMITTED; else if ((flags & DB_OPFLAGS_MASK) == DB_CONSUME || (flags & DB_OPFLAGS_MASK) == DB_CONSUME_WAIT) { mode = DB_WRITELOCK; if (IS_DB_AUTO_COMMIT(dbp, txn)) { if ((ret = __txn_begin(env, ip, NULL, &txn, 0)) != 0) goto err; txn_local = 1; } } /* Check for consistent transaction usage. */ if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID, mode == DB_WRITELOCK || LF_ISSET(DB_RMW) ? 0 : 1)) != 0) goto err; ret = __db_get(dbp, ip, txn, key, data, flags); /* * Check for master leases. */ if (ret == 0 && IS_REP_MASTER(env) && IS_USING_LEASES(env) && !ignore_lease) ret = __rep_lease_check(env, 1); err: if (txn_local && (t_ret = __db_txn_auto_resolve(env, txn, 0, ret)) && ret == 0) ret = t_ret; /* Release replication block. */ if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) ret = t_ret; ENV_LEAVE(env, ip); __dbt_userfree(env, key, NULL, data); return (ret); } /* * __db_get -- * DB->get. * * PUBLIC: int __db_get __P((DB *, * PUBLIC: DB_THREAD_INFO *, DB_TXN *, DBT *, DBT *, u_int32_t)); */ int __db_get(dbp, ip, txn, key, data, flags) DB *dbp; DB_THREAD_INFO *ip; DB_TXN *txn; DBT *key, *data; u_int32_t flags; { DBC *dbc; u_int32_t mode; int ret, t_ret; /* * The DB_CURSOR_TRANSIENT flag indicates that we're just doing a single * operation with this cursor, and that in case of error we don't need * to restore it to its old position. Thus, we can perform the get * without duplicating the cursor, saving some cycles in this common * case. */ mode = DB_CURSOR_TRANSIENT; if (LF_ISSET(DB_READ_UNCOMMITTED)) { mode |= DB_READ_UNCOMMITTED; LF_CLR(DB_READ_UNCOMMITTED); } else if (LF_ISSET(DB_READ_COMMITTED)) { mode |= DB_READ_COMMITTED; LF_CLR(DB_READ_COMMITTED); } else if ((flags & DB_OPFLAGS_MASK) == DB_CONSUME || (flags & DB_OPFLAGS_MASK) == DB_CONSUME_WAIT) mode |= DB_WRITELOCK; if ((ret = __db_cursor(dbp, ip, txn, &dbc, mode)) != 0) return (ret); DEBUG_LREAD(dbc, txn, "DB->get", key, NULL, flags); /* * The semantics of bulk gets are different for DB->get vs DBC->get. * Mark the cursor so the low-level bulk get routines know which * behavior we want. */ F_SET(dbc, DBC_FROM_DB_GET); /* * SET_RET_MEM indicates that if key and/or data have no DBT * flags set and DB manages the returned-data memory, that memory * will belong to this handle, not to the underlying cursor. */ SET_RET_MEM(dbc, dbp); if (LF_ISSET(~(DB_RMW | DB_MULTIPLE)) == 0) LF_SET(DB_SET); #ifdef HAVE_PARTITION if (F_ISSET(dbc, DBC_PARTITIONED)) ret = __partc_get(dbc, key, data, flags); else #endif ret = __dbc_get(dbc, key, data, flags); if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0 && ret == 0) ret = t_ret; return (ret); } /* * __db_get_arg -- * DB->get argument checking, used by both DB->get and DB->pget. */ static int __db_get_arg(dbp, key, data, flags) const DB *dbp; DBT *key, *data; u_int32_t flags; { ENV *env; int dirty, multi, ret; env = dbp->env; /* * Check for read-modify-write validity. DB_RMW doesn't make sense * with CDB cursors since if you're going to write the cursor, you * had to create it with DB_WRITECURSOR. Regardless, we check for * LOCKING_ON and not STD_LOCKING, as we don't want to disallow it. * If this changes, confirm that DB does not itself set the DB_RMW * flag in a path where CDB may have been configured. */ dirty = 0; if (LF_ISSET(DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW)) { if (!LOCKING_ON(env)) return (__db_fnl(env, "DB->get")); if ((ret = __db_fcchk(env, "DB->get", flags, DB_READ_UNCOMMITTED, DB_READ_COMMITTED)) != 0) return (ret); if (LF_ISSET(DB_READ_COMMITTED | DB_READ_UNCOMMITTED)) dirty = 1; LF_CLR(DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW); } multi = 0; if (LF_ISSET(DB_MULTIPLE | DB_MULTIPLE_KEY)) { if (LF_ISSET(DB_MULTIPLE_KEY)) goto multi_err; multi = LF_ISSET(DB_MULTIPLE) ? 1 : 0; LF_CLR(DB_MULTIPLE); } /* Check for invalid function flags. */ switch (flags) { case DB_GET_BOTH: if ((ret = __dbt_usercopy(env, data)) != 0) return (ret); /* FALLTHROUGH */ case 0: if ((ret = __dbt_usercopy(env, key)) != 0) { __dbt_userfree(env, key, NULL, data); return (ret); } break; case DB_SET_RECNO: if (!F_ISSET(dbp, DB_AM_RECNUM)) goto err; if ((ret = __dbt_usercopy(env, key)) != 0) return (ret); break; case DB_CONSUME: case DB_CONSUME_WAIT: if (dirty) { __db_errx(env, "%s is not supported with DB_CONSUME or DB_CONSUME_WAIT", LF_ISSET(DB_READ_UNCOMMITTED) ? "DB_READ_UNCOMMITTED" : "DB_READ_COMMITTED"); return (EINVAL); } if (multi) multi_err: return (__db_ferr(env, "DB->get", 1)); if (dbp->type == DB_QUEUE) break; /* FALLTHROUGH */ default: err: return (__db_ferr(env, "DB->get", 0)); } /* * Check for invalid key/data flags. */ if ((ret = __dbt_ferr(dbp, "key", key, DB_RETURNS_A_KEY(dbp, flags))) != 0) return (ret); if ((ret = __dbt_ferr(dbp, "data", data, 1)) != 0) return (ret); if (multi) { if (!F_ISSET(data, DB_DBT_USERMEM)) { __db_errx(env, "DB_MULTIPLE requires DB_DBT_USERMEM be set"); return (EINVAL); } if (F_ISSET(key, DB_DBT_PARTIAL) || F_ISSET(data, DB_DBT_PARTIAL)) { __db_errx(env, "DB_MULTIPLE does not support DB_DBT_PARTIAL"); return (EINVAL); } if (data->ulen < 1024 || data->ulen < dbp->pgsize || data->ulen % 1024 != 0) { __db_errx(env, "%s%s", "DB_MULTIPLE buffers must be ", "aligned, at least page size and multiples of 1KB"); return (EINVAL); } } return (0); } /* * __db_join_pp -- * DB->join pre/post processing. * * PUBLIC: int __db_join_pp __P((DB *, DBC **, DBC **, u_int32_t)); */ int __db_join_pp(primary, curslist, dbcp, flags) DB *primary; DBC **curslist, **dbcp; u_int32_t flags; { DB_THREAD_INFO *ip; ENV *env; int handle_check, ret, t_ret; env = primary->env; ENV_ENTER(env, ip); /* Check for replication block. */ handle_check = IS_ENV_REPLICATED(env); if (handle_check && (ret = __db_rep_enter( primary, 1, 0, IS_REAL_TXN(curslist[0]->txn))) != 0) { handle_check = 0; goto err; } if ((ret = __db_join_arg(primary, curslist, flags)) == 0) ret = __db_join(primary, curslist, dbcp, flags); /* Release replication block. */ if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) ret = t_ret; err: ENV_LEAVE(env, ip); return (ret); } /* * __db_join_arg -- * Check DB->join arguments. */ static int __db_join_arg(primary, curslist, flags) DB *primary; DBC **curslist; u_int32_t flags; { DB_TXN *txn; ENV *env; int i; env = primary->env; switch (flags) { case 0: case DB_JOIN_NOSORT: break; default: return (__db_ferr(env, "DB->join", 0)); } if (curslist == NULL || curslist[0] == NULL) { __db_errx(env, "At least one secondary cursor must be specified to DB->join"); return (EINVAL); } txn = curslist[0]->txn; for (i = 1; curslist[i] != NULL; i++) if (curslist[i]->txn != txn) { __db_errx(env, "All secondary cursors must share the same transaction"); return (EINVAL); } return (0); } /* * __db_key_range_pp -- * DB->key_range pre/post processing. * * PUBLIC: int __db_key_range_pp * PUBLIC: __P((DB *, DB_TXN *, DBT *, DB_KEY_RANGE *, u_int32_t)); */ int __db_key_range_pp(dbp, txn, key, kr, flags) DB *dbp; DB_TXN *txn; DBT *key; DB_KEY_RANGE *kr; u_int32_t flags; { DBC *dbc; DB_THREAD_INFO *ip; ENV *env; int handle_check, ret, t_ret; env = dbp->env; DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->key_range"); /* * !!! * The actual argument checking is simple, do it inline, outside of * the replication block. */ if (flags != 0) return (__db_ferr(env, "DB->key_range", 0)); ENV_ENTER(env, ip); /* Check for replication block. */ handle_check = IS_ENV_REPLICATED(env); if (handle_check && (ret = __db_rep_enter(dbp, 1, 0, IS_REAL_TXN(txn))) != 0) { handle_check = 0; goto err; } /* Check for consistent transaction usage. */ if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID, 1)) != 0) goto err; /* * !!! * The actual method call is simple, do it inline. */ switch (dbp->type) { case DB_BTREE: if ((ret = __dbt_usercopy(env, key)) != 0) goto err; /* Acquire a cursor. */ if ((ret = __db_cursor(dbp, ip, txn, &dbc, 0)) != 0) break; DEBUG_LWRITE(dbc, NULL, "bam_key_range", NULL, NULL, 0); #ifdef HAVE_PARTITION if (DB_IS_PARTITIONED(dbp)) ret = __part_key_range(dbc, key, kr, flags); else #endif ret = __bam_key_range(dbc, key, kr, flags); if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) ret = t_ret; __dbt_userfree(env, key, NULL, NULL); break; case DB_HASH: case DB_QUEUE: case DB_RECNO: ret = __dbh_am_chk(dbp, DB_OK_BTREE); break; case DB_UNKNOWN: default: ret = __db_unknown_type(env, "DB->key_range", dbp->type); break; } err: /* Release replication block. */ if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) ret = t_ret; ENV_LEAVE(env, ip); return (ret); } /* * __db_open_pp -- * DB->open pre/post processing. * * PUBLIC: int __db_open_pp __P((DB *, DB_TXN *, * PUBLIC: const char *, const char *, DBTYPE, u_int32_t, int)); */ int __db_open_pp(dbp, txn, fname, dname, type, flags, mode) DB *dbp; DB_TXN *txn; const char *fname, *dname; DBTYPE type; u_int32_t flags; int mode; { DB_THREAD_INFO *ip; ENV *env; int handle_check, nosync, remove_me, ret, t_ret, txn_local; env = dbp->env; nosync = 1; handle_check = remove_me = txn_local = 0; ENV_ENTER(env, ip); /* * Save the flags. We do this here because we don't pass all of the * flags down into the actual DB->open method call, we strip * DB_AUTO_COMMIT at this layer. */ dbp->open_flags = flags; /* Save the current DB handle flags for refresh. */ dbp->orig_flags = dbp->flags; /* Check for replication block. */ handle_check = IS_ENV_REPLICATED(env); if (handle_check && (ret = __db_rep_enter(dbp, 1, 0, IS_REAL_TXN(txn))) != 0) { handle_check = 0; goto err; } /* * A replication client can't create a database, but it's convenient to * allow a repmgr application to specify DB_CREATE anyway. Thus for * such an application the meaning of DB_CREATE becomes "create it if * I'm a master, and otherwise ignore the flag". A repmgr application * running as master can't be sure that it won't spontaneously become a * client, so there's a race condition. */ if (IS_REP_CLIENT(env) && !F_ISSET(dbp, DB_AM_NOT_DURABLE)) LF_CLR(DB_CREATE); /* * Create local transaction as necessary, check for consistent * transaction usage. */ if (IS_ENV_AUTO_COMMIT(env, txn, flags)) { if ((ret = __db_txn_auto_init(env, ip, &txn)) != 0) goto err; txn_local = 1; } else if (txn != NULL && !TXN_ON(env) && (!CDB_LOCKING(env) || !F_ISSET(txn, TXN_FAMILY))) { ret = __db_not_txn_env(env); goto err; } LF_CLR(DB_AUTO_COMMIT); /* * We check arguments after possibly creating a local transaction, * which is unusual -- the reason is some flags are illegal if any * kind of transaction is in effect. */ if ((ret = __db_open_arg(dbp, txn, fname, dname, type, flags)) == 0) if ((ret = __db_open(dbp, ip, txn, fname, dname, type, flags, mode, PGNO_BASE_MD)) != 0) goto txnerr; /* * You can open the database that describes the subdatabases in the * rest of the file read-only. The content of each key's data is * unspecified and applications should never be adding new records * or updating existing records. However, during recovery, we need * to open these databases R/W so we can redo/undo changes in them. * Likewise, we need to open master databases read/write during * rename and remove so we can be sure they're fully sync'ed, so * we provide an override flag for the purpose. */ if (dname == NULL && !IS_RECOVERING(env) && !LF_ISSET(DB_RDONLY) && !LF_ISSET(DB_RDWRMASTER) && F_ISSET(dbp, DB_AM_SUBDB)) { __db_errx(env, "files containing multiple databases may only be opened read-only"); ret = EINVAL; goto txnerr; } /* * Success: file creations have to be synchronous, otherwise we don't * care. */ if (F_ISSET(dbp, DB_AM_CREATED | DB_AM_CREATED_MSTR)) nosync = 0; /* Success: don't discard the file on close. */ F_CLR(dbp, DB_AM_DISCARD | DB_AM_CREATED | DB_AM_CREATED_MSTR); /* * If not transactional, remove the databases/subdatabases if it is * persistent. If we're transactional, the child transaction abort * cleans up. */ txnerr: if (ret != 0 && !IS_REAL_TXN(txn)) { remove_me = (F_ISSET(dbp, DB_AM_CREATED) && (fname != NULL || dname != NULL)) ? 1 : 0; if (F_ISSET(dbp, DB_AM_CREATED_MSTR) || (dname == NULL && remove_me)) /* Remove file. */ (void)__db_remove_int(dbp, ip, txn, fname, NULL, DB_FORCE); else if (remove_me) /* Remove subdatabase. */ (void)__db_remove_int(dbp, ip, txn, fname, dname, DB_FORCE); } if (txn_local && (t_ret = __db_txn_auto_resolve(env, txn, nosync, ret)) && ret == 0) ret = t_ret; err: /* Release replication block. */ if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) ret = t_ret; ENV_LEAVE(env, ip); return (ret); } /* * __db_open_arg -- * Check DB->open arguments. */ static int __db_open_arg(dbp, txn, fname, dname, type, flags) DB *dbp; DB_TXN *txn; const char *fname, *dname; DBTYPE type; u_int32_t flags; { ENV *env; u_int32_t ok_flags; int ret; env = dbp->env; /* Validate arguments. */ #undef OKFLAGS #define OKFLAGS \ (DB_AUTO_COMMIT | DB_CREATE | DB_EXCL | DB_FCNTL_LOCKING | \ DB_MULTIVERSION | DB_NOMMAP | DB_NO_AUTO_COMMIT | DB_RDONLY | \ DB_RDWRMASTER | DB_READ_UNCOMMITTED | DB_THREAD | DB_TRUNCATE) if ((ret = __db_fchk(env, "DB->open", flags, OKFLAGS)) != 0) return (ret); if (LF_ISSET(DB_EXCL) && !LF_ISSET(DB_CREATE)) return (__db_ferr(env, "DB->open", 1)); if (LF_ISSET(DB_RDONLY) && LF_ISSET(DB_CREATE)) return (__db_ferr(env, "DB->open", 1)); #ifdef HAVE_VXWORKS if (LF_ISSET(DB_TRUNCATE)) { __db_errx(env, "DB_TRUNCATE not supported on VxWorks"); return (DB_OPNOTSUP); } #endif switch (type) { case DB_UNKNOWN: if (LF_ISSET(DB_CREATE|DB_TRUNCATE)) { __db_errx(env, "DB_UNKNOWN type specified with DB_CREATE or DB_TRUNCATE"); return (EINVAL); } ok_flags = 0; break; case DB_BTREE: ok_flags = DB_OK_BTREE; break; case DB_HASH: #ifndef HAVE_HASH return (__db_no_hash_am(env)); #endif ok_flags = DB_OK_HASH; break; case DB_QUEUE: #ifndef HAVE_QUEUE return (__db_no_queue_am(env)); #endif ok_flags = DB_OK_QUEUE; break; case DB_RECNO: ok_flags = DB_OK_RECNO; break; default: __db_errx(env, "unknown type: %lu", (u_long)type); return (EINVAL); } if (ok_flags) DB_ILLEGAL_METHOD(dbp, ok_flags); /* The environment may have been created, but never opened. */ if (!F_ISSET(env, ENV_DBLOCAL | ENV_OPEN_CALLED)) { __db_errx(env, "database environment not yet opened"); return (EINVAL); } /* * Historically, you could pass in an environment that didn't have a * mpool, and DB would create a private one behind the scenes. This * no longer works. */ if (!F_ISSET(env, ENV_DBLOCAL) && !MPOOL_ON(env)) { __db_errx(env, "environment did not include a memory pool"); return (EINVAL); } /* * You can't specify threads during DB->open if subsystems in the * environment weren't configured with them. */ if (LF_ISSET(DB_THREAD) && !F_ISSET(env, ENV_DBLOCAL | ENV_THREAD)) { __db_errx(env, "environment not created using DB_THREAD"); return (EINVAL); } /* DB_MULTIVERSION requires a database configured for transactions. */ if (LF_ISSET(DB_MULTIVERSION) && !IS_REAL_TXN(txn)) { __db_errx(env, "DB_MULTIVERSION illegal without a transaction specified"); return (EINVAL); } if (LF_ISSET(DB_MULTIVERSION) && type == DB_QUEUE) { __db_errx(env, "DB_MULTIVERSION illegal with queue databases"); return (EINVAL); } /* DB_TRUNCATE is neither transaction recoverable nor lockable. */ if (LF_ISSET(DB_TRUNCATE) && (LOCKING_ON(env) || txn != NULL)) { __db_errx(env, "DB_TRUNCATE illegal with %s specified", LOCKING_ON(env) ? "locking" : "transactions"); return (EINVAL); } /* Subdatabase checks. */ if (dname != NULL) { /* QAM can only be done on in-memory subdatabases. */ if (type == DB_QUEUE && fname != NULL) { __db_errx( env, "Queue databases must be one-per-file"); return (EINVAL); } /* * Named in-memory databases can't support certain flags, * so check here. */ if (fname == NULL) F_CLR(dbp, DB_AM_CHKSUM | DB_AM_ENCRYPT); } return (0); } /* * __db_pget_pp -- * DB->pget pre/post processing. * * PUBLIC: int __db_pget_pp * PUBLIC: __P((DB *, DB_TXN *, DBT *, DBT *, DBT *, u_int32_t)); */ int __db_pget_pp(dbp, txn, skey, pkey, data, flags) DB *dbp; DB_TXN *txn; DBT *skey, *pkey, *data; u_int32_t flags; { DB_THREAD_INFO *ip; ENV *env; int handle_check, ignore_lease, ret, t_ret; env = dbp->env; DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->pget"); ignore_lease = LF_ISSET(DB_IGNORE_LEASE) ? 1 : 0; LF_CLR(DB_IGNORE_LEASE); if ((ret = __db_pget_arg(dbp, pkey, flags)) != 0 || (ret = __db_get_arg(dbp, skey, data, flags)) != 0) { __dbt_userfree(env, skey, pkey, data); return (ret); } ENV_ENTER(env, ip); /* Check for replication block. */ handle_check = IS_ENV_REPLICATED(env); if (handle_check && (ret = __db_rep_enter(dbp, 1, 0, IS_REAL_TXN(txn))) != 0) { handle_check = 0; goto err; } ret = __db_pget(dbp, ip, txn, skey, pkey, data, flags); /* * Check for master leases. */ if (ret == 0 && IS_REP_MASTER(env) && IS_USING_LEASES(env) && !ignore_lease) ret = __rep_lease_check(env, 1); err: /* Release replication block. */ if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) ret = t_ret; ENV_LEAVE(env, ip); __dbt_userfree(env, skey, pkey, data); return (ret); } /* * __db_pget -- * DB->pget. * * PUBLIC: int __db_pget __P((DB *, * PUBLIC: DB_THREAD_INFO *, DB_TXN *, DBT *, DBT *, DBT *, u_int32_t)); */ int __db_pget(dbp, ip, txn, skey, pkey, data, flags) DB *dbp; DB_THREAD_INFO *ip; DB_TXN *txn; DBT *skey, *pkey, *data; u_int32_t flags; { DBC *dbc; u_int32_t mode; int ret, t_ret; mode = DB_CURSOR_TRANSIENT; if (LF_ISSET(DB_READ_UNCOMMITTED)) { mode |= DB_READ_UNCOMMITTED; LF_CLR(DB_READ_UNCOMMITTED); } else if (LF_ISSET(DB_READ_COMMITTED)) { mode |= DB_READ_COMMITTED; LF_CLR(DB_READ_COMMITTED); } if ((ret = __db_cursor(dbp, ip, txn, &dbc, mode)) != 0) return (ret); SET_RET_MEM(dbc, dbp); DEBUG_LREAD(dbc, txn, "__db_pget", skey, NULL, flags); /* * !!! * The actual method call is simple, do it inline. * * The underlying cursor pget will fill in a default DBT for null * pkeys, and use the cursor's returned-key memory internally to * store any intermediate primary keys. However, we've just set * the returned-key memory to the DB handle's key memory, which * is unsafe to use if the DB handle is threaded. If the pkey * argument is NULL, use the DBC-owned returned-key memory * instead; it'll go away when we close the cursor before we * return, but in this case that's just fine, as we're not * returning the primary key. */ if (pkey == NULL) dbc->rkey = &dbc->my_rkey; /* * The cursor is just a perfectly ordinary secondary database cursor. * Call its c_pget() method to do the dirty work. */ if (flags == 0 || flags == DB_RMW) flags |= DB_SET; ret = __dbc_pget(dbc, skey, pkey, data, flags); if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) ret = t_ret; return (ret); } /* * __db_pget_arg -- * Check DB->pget arguments. */ static int __db_pget_arg(dbp, pkey, flags) DB *dbp; DBT *pkey; u_int32_t flags; { ENV *env; int ret; env = dbp->env; if (!F_ISSET(dbp, DB_AM_SECONDARY)) { __db_errx(env, "DB->pget may only be used on secondary indices"); return (EINVAL); } if (LF_ISSET(DB_MULTIPLE | DB_MULTIPLE_KEY)) { __db_errx(env, "DB_MULTIPLE and DB_MULTIPLE_KEY may not be used on secondary indices"); return (EINVAL); } /* DB_CONSUME makes no sense on a secondary index. */ LF_CLR(DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW); switch (flags) { case DB_CONSUME: case DB_CONSUME_WAIT: return (__db_ferr(env, "DB->pget", 0)); default: /* __db_get_arg will catch the rest. */ break; } /* * We allow the pkey field to be NULL, so that we can make the * two-DBT get calls into wrappers for the three-DBT ones. */ if (pkey != NULL && (ret = __dbt_ferr(dbp, "primary key", pkey, 1)) != 0) return (ret); if (flags == DB_GET_BOTH) { /* The pkey field can't be NULL if we're doing a DB_GET_BOTH. */ if (pkey == NULL) { __db_errx(env, "DB_GET_BOTH on a secondary index requires a primary key"); return (EINVAL); } if ((ret = __dbt_usercopy(env, pkey)) != 0) return (ret); } return (0); } /* * __db_put_pp -- * DB->put pre/post processing. * * PUBLIC: int __db_put_pp __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); */ int __db_put_pp(dbp, txn, key, data, flags) DB *dbp; DB_TXN *txn; DBT *key, *data; u_int32_t flags; { DB_THREAD_INFO *ip; ENV *env; int handle_check, ret, txn_local, t_ret; env = dbp->env; txn_local = 0; STRIP_AUTO_COMMIT(flags); DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->put"); if ((ret = __db_put_arg(dbp, key, data, flags)) != 0) return (ret); ENV_ENTER(env, ip); /* Check for replication block. */ handle_check = IS_ENV_REPLICATED(env); if (handle_check && (ret = __db_rep_enter(dbp, 1, 0, IS_REAL_TXN(txn))) != 0) { handle_check = 0; goto err; } /* Create local transaction as necessary. */ if (IS_DB_AUTO_COMMIT(dbp, txn)) { if ((ret = __txn_begin(env, ip, NULL, &txn, 0)) != 0) goto err; txn_local = 1; } /* Check for consistent transaction usage. */ if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID, 0)) != 0) goto err; ret = __db_put(dbp, ip, txn, key, data, flags); err: if (txn_local && (t_ret = __db_txn_auto_resolve(env, txn, 0, ret)) && ret == 0) ret = t_ret; /* Release replication block. */ if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) ret = t_ret; ENV_LEAVE(env, ip); __dbt_userfree(env, key, NULL, data); return (ret); } /* * __db_put_arg -- * Check DB->put arguments. */ static int __db_put_arg(dbp, key, data, flags) DB *dbp; DBT *key, *data; u_int32_t flags; { ENV *env; int ret, returnkey; env = dbp->env; returnkey = 0; /* Check for changes to a read-only tree. */ if (DB_IS_READONLY(dbp)) return (__db_rdonly(env, "DB->put")); /* Check for puts on a secondary. */ if (F_ISSET(dbp, DB_AM_SECONDARY)) { __db_errx(env, "DB->put forbidden on secondary indices"); return (EINVAL); } if (LF_ISSET(DB_MULTIPLE_KEY | DB_MULTIPLE)) { if (LF_ISSET(DB_MULTIPLE) && LF_ISSET(DB_MULTIPLE_KEY)) goto err; switch (LF_ISSET(DB_OPFLAGS_MASK)) { case 0: case DB_OVERWRITE_DUP: break; default: __db_errx(env, "DB->put: DB_MULTIPLE(_KEY) can only be combined with DB_OVERWRITE_DUP"); return (EINVAL); } if (!F_ISSET(key, DB_DBT_BULK)) { __db_errx(env, "DB->put with DB_MULTIPLE(_KEY) requires a bulk key buffer"); return (EINVAL); } } if (LF_ISSET(DB_MULTIPLE)) { if (!F_ISSET(data, DB_DBT_BULK)) { __db_errx(env, "DB->put with DB_MULTIPLE requires a bulk data buffer"); return (EINVAL); } } /* Check for invalid function flags. */ switch (LF_ISSET(DB_OPFLAGS_MASK)) { case 0: case DB_NOOVERWRITE: case DB_OVERWRITE_DUP: break; case DB_APPEND: if (dbp->type != DB_RECNO && dbp->type != DB_QUEUE) goto err; returnkey = 1; break; case DB_NODUPDATA: if (F_ISSET(dbp, DB_AM_DUPSORT)) break; /* FALLTHROUGH */ default: err: return (__db_ferr(env, "DB->put", 0)); } /* * Check for invalid key/data flags. The key may reasonably be NULL * if DB_APPEND is set and the application doesn't care about the * returned key. */ if (((returnkey && key != NULL) || !returnkey) && (ret = __dbt_ferr(dbp, "key", key, returnkey)) != 0) return (ret); if (!LF_ISSET(DB_MULTIPLE_KEY) && (ret = __dbt_ferr(dbp, "data", data, 0)) != 0) return (ret); /* * The key parameter should not be NULL or have the "partial" flag set * in a put call unless the user doesn't care about a key value we'd * return. The user tells us they don't care about the returned key by * setting the key parameter to NULL or configuring the key DBT to not * return any information. (Returned keys from a put are always record * numbers, and returning part of a record number doesn't make sense: * only accept a partial return if the length returned is 0.) */ if ((returnkey && key != NULL && F_ISSET(key, DB_DBT_PARTIAL) && key->dlen != 0) || (!returnkey && F_ISSET(key, DB_DBT_PARTIAL))) return (__db_ferr(env, "key DBT", 0)); /* Check for partial puts in the presence of duplicates. */ if (data != NULL && F_ISSET(data, DB_DBT_PARTIAL) && (F_ISSET(dbp, DB_AM_DUP) || F_ISSET(key, DB_DBT_DUPOK))) { __db_errx(env, "a partial put in the presence of duplicates requires a cursor operation"); return (EINVAL); } if ((flags != DB_APPEND && (ret = __dbt_usercopy(env, key)) != 0) || (!LF_ISSET(DB_MULTIPLE_KEY) && (ret = __dbt_usercopy(env, data)) != 0)) return (ret); return (0); } /* * __db_compact_func * Callback routine to report if the txn has open cursors. */ static int __db_compact_func(dbc, my_dbc, countp, pgno, indx, args) DBC *dbc, *my_dbc; u_int32_t *countp; db_pgno_t pgno; u_int32_t indx; void *args; { DB_TXN *txn; COMPQUIET(my_dbc, NULL); COMPQUIET(countp, NULL); COMPQUIET(pgno, 0); COMPQUIET(indx, 0); txn = (DB_TXN *)args; if (txn == dbc->txn) return (EEXIST); return (0); } /* * __db_compact_pp -- * DB->compact pre/post processing. * * PUBLIC: int __db_compact_pp __P((DB *, DB_TXN *, * PUBLIC: DBT *, DBT *, DB_COMPACT *, u_int32_t, DBT *)); */ int __db_compact_pp(dbp, txn, start, stop, c_data, flags, end) DB *dbp; DB_TXN *txn; DBT *start, *stop; DB_COMPACT *c_data; u_int32_t flags; DBT *end; { DB_COMPACT *dp, l_data; DB_THREAD_INFO *ip; ENV *env; int handle_check, ret, t_ret; u_int32_t count; env = dbp->env; DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->compact"); /* * !!! * The actual argument checking is simple, do it inline, outside of * the replication block. */ if ((ret = __db_fchk( env, "DB->compact", flags, DB_FREELIST_ONLY | DB_FREE_SPACE)) != 0) return (ret); /* Check for changes to a read-only database. */ if (DB_IS_READONLY(dbp)) return (__db_rdonly(env, "DB->compact")); if (start != NULL && (ret = __dbt_usercopy(env, start)) != 0) return (ret); if (stop != NULL && (ret = __dbt_usercopy(env, stop)) != 0) return (ret); ENV_ENTER(env, ip); /* Check for replication block. */ handle_check = IS_ENV_REPLICATED(env); if (handle_check && (ret = __db_rep_enter(dbp, 1, 0, IS_REAL_TXN(txn))) != 0) { handle_check = 0; goto err; } if (txn != NULL) { if ((ret = __db_walk_cursors(dbp, NULL, __db_compact_func, &count, 0, 0, txn)) != 0) { if (ret == EEXIST) { __db_errx(env, "DB->compact may not be called with active cursors in the transaction." ); ret = EINVAL; } goto err; } } if (c_data == NULL) { dp = &l_data; memset(dp, 0, sizeof(*dp)); } else dp = c_data; #ifdef HAVE_PARTITION if (DB_IS_PARTITIONED(dbp)) ret = __part_compact(dbp, ip, txn, start, stop, dp, flags, end); else #endif switch (dbp->type) { case DB_HASH: case DB_BTREE: case DB_RECNO: ret = __db_compact_int(dbp, ip, txn, start, stop, dp, flags, end); break; default: ret = __dbh_am_chk(dbp, DB_OK_BTREE); break; } /* Release replication block. */ err: if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) ret = t_ret; ENV_LEAVE(env, ip); __dbt_userfree(env, start, stop, NULL); return (ret); } /* * __db_associate_foreign_pp -- * DB->associate_foreign pre/post processing. * * PUBLIC: int __db_associate_foreign_pp __P((DB *, DB *, * PUBLIC: int (*)(DB *, const DBT *, DBT *, const DBT *, int *), * PUBLIC: u_int32_t)); */ int __db_associate_foreign_pp(fdbp, dbp, callback, flags) DB *dbp, *fdbp; int (*callback) __P((DB *, const DBT *, DBT *, const DBT *, int *)); u_int32_t flags; { /* Most of this is based on the implementation of associate */ DB_THREAD_INFO *ip; ENV *env; int handle_check, ret, t_ret; env = dbp->env; PANIC_CHECK(env); STRIP_AUTO_COMMIT(flags); ENV_ENTER(env, ip); /* Check for replication block. */ handle_check = IS_ENV_REPLICATED(env); if (handle_check && (ret = __db_rep_enter(dbp, 1, 0, 0)) != 0) { handle_check = 0; goto err; } if ((ret = __db_associate_foreign_arg(fdbp, dbp, callback, flags)) != 0) goto err; ret = __db_associate_foreign(fdbp, dbp, callback, flags); err: /* Release replication block. */ if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) ret = t_ret; ENV_LEAVE(env, ip); return (ret); } /* * __db_associate_foreign_arg -- * DB->associate_foreign argument checking. */ static int __db_associate_foreign_arg(fdbp, dbp, callback, flags) DB *dbp, *fdbp; int (*callback) __P((DB *, const DBT *, DBT *, const DBT *, int *)); u_int32_t flags; { ENV *env; env = fdbp->env; if (F_ISSET(fdbp, DB_AM_SECONDARY)) { __db_errx(env, "Secondary indices may not be used as foreign databases"); return (EINVAL); } if (F_ISSET(fdbp, DB_AM_DUP)) { __db_errx(env, "Foreign databases may not be configured with duplicates"); return (EINVAL); } if (F_ISSET(fdbp, DB_AM_RENUMBER)) { __db_errx(env, "Renumbering recno databases may not be used as foreign databases"); return (EINVAL); } if (!F_ISSET(dbp, DB_AM_SECONDARY)) { __db_errx(env, "The associating database must be a secondary index."); return (EINVAL); } if (LF_ISSET(DB_FOREIGN_NULLIFY) && callback == NULL) { __db_errx(env, "When specifying a delete action of nullify, a callback%s", " function needs to be configured"); return (EINVAL); } else if (!LF_ISSET(DB_FOREIGN_NULLIFY) && callback != NULL) { __db_errx(env, "When not specifying a delete action of nullify, a%s", " callback function cannot be configured"); return (EINVAL); } return (0); } /* * __db_sync_pp -- * DB->sync pre/post processing. * * PUBLIC: int __db_sync_pp __P((DB *, u_int32_t)); */ int __db_sync_pp(dbp, flags) DB *dbp; u_int32_t flags; { DB_THREAD_INFO *ip; ENV *env; int handle_check, ret, t_ret; env = dbp->env; DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->sync"); /* * !!! * The actual argument checking is simple, do it inline, outside of * the replication block. */ if (flags != 0) return (__db_ferr(env, "DB->sync", 0)); ENV_ENTER(env, ip); /* Check for replication block. */ handle_check = IS_ENV_REPLICATED(env); if (handle_check && (ret = __db_rep_enter(dbp, 1, 0, 0)) != 0) { handle_check = 0; goto err; } ret = __db_sync(dbp); /* Release replication block. */ if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) ret = t_ret; err: ENV_LEAVE(env, ip); return (ret); } /* * __dbc_close_pp -- * DBC->close pre/post processing. * * PUBLIC: int __dbc_close_pp __P((DBC *)); */ int __dbc_close_pp(dbc) DBC *dbc; { DB *dbp; DB_THREAD_INFO *ip; ENV *env; DB_TXN *txn; int handle_check, ret, t_ret; dbp = dbc->dbp; env = dbp->env; txn = dbc->txn; /* * If the cursor is already closed we have a serious problem, and we * assume that the cursor isn't on the active queue. Don't do any of * the remaining cursor close processing. */ if (!F_ISSET(dbc, DBC_ACTIVE)) { __db_errx(env, "Closing already-closed cursor"); return (EINVAL); } ENV_ENTER(env, ip); /* Check for replication block. */ handle_check = !IS_REAL_TXN(dbc->txn) && IS_ENV_REPLICATED(env); /* Unregister the cursor from its transaction, regardless of ret. */ if (txn != NULL) { TAILQ_REMOVE(&(txn->my_cursors), dbc, txn_cursors); dbc->txn_cursors.tqe_next = NULL; dbc->txn_cursors.tqe_prev = NULL; } else { DB_ASSERT(env, dbc->txn_cursors.tqe_next == NULL && dbc->txn_cursors.tqe_prev == NULL); } ret = __dbc_close(dbc); /* Release replication block. */ if (handle_check && (t_ret = __op_rep_exit(env)) != 0 && ret == 0) ret = t_ret; ENV_LEAVE(env, ip); return (ret); } /* * __dbc_cmp_pp -- * DBC->cmp pre/post processing. * * PUBLIC: int __dbc_cmp_pp __P((DBC *, DBC *, int*, u_int32_t)); */ int __dbc_cmp_pp(dbc, other_cursor, result, flags) DBC *dbc, *other_cursor; int *result; u_int32_t flags; { DB *dbp, *odbp; DB_THREAD_INFO *ip; ENV *env; int ret; dbp = dbc->dbp; odbp = other_cursor->dbp; env = dbp->env; if (flags != 0) return (__db_ferr(env, "DBcursor->cmp", 0)); if (other_cursor == NULL) { __db_errx(env, "DBcursor->cmp dbc pointer must not be null"); return (EINVAL); } if (dbp != odbp) { __db_errx(env, "DBcursor->cmp both cursors must refer to the same database."); return (EINVAL); } ENV_ENTER(env, ip); ret = __dbc_cmp(dbc, other_cursor, result); ENV_LEAVE(env, ip); return (ret); } /* * __dbc_count_pp -- * DBC->count pre/post processing. * * PUBLIC: int __dbc_count_pp __P((DBC *, db_recno_t *, u_int32_t)); */ int __dbc_count_pp(dbc, recnop, flags) DBC *dbc; db_recno_t *recnop; u_int32_t flags; { DB *dbp; DB_THREAD_INFO *ip; ENV *env; int ret; dbp = dbc->dbp; env = dbp->env; /* * !!! * The actual argument checking is simple, do it inline, outside of * the replication block. * * The cursor must be initialized, return EINVAL for an invalid cursor. */ if (flags != 0) return (__db_ferr(env, "DBcursor->count", 0)); if (!IS_INITIALIZED(dbc)) return (__db_curinval(env)); ENV_ENTER(env, ip); ret = __dbc_count(dbc, recnop); ENV_LEAVE(env, ip); return (ret); } /* * __dbc_del_pp -- * DBC->del pre/post processing. * * PUBLIC: int __dbc_del_pp __P((DBC *, u_int32_t)); */ int __dbc_del_pp(dbc, flags) DBC *dbc; u_int32_t flags; { DB *dbp; DB_THREAD_INFO *ip; ENV *env; int ret; dbp = dbc->dbp; env = dbp->env; if ((ret = __dbc_del_arg(dbc, flags)) != 0) return (ret); ENV_ENTER(env, ip); /* Check for consistent transaction usage. */ if ((ret = __db_check_txn(dbp, dbc->txn, dbc->locker, 0)) != 0) goto err; DEBUG_LWRITE(dbc, dbc->txn, "DBcursor->del", NULL, NULL, flags); ret = __dbc_del(dbc, flags); err: ENV_LEAVE(env, ip); return (ret); } /* * __dbc_del_arg -- * Check DBC->del arguments. */ static int __dbc_del_arg(dbc, flags) DBC *dbc; u_int32_t flags; { DB *dbp; ENV *env; dbp = dbc->dbp; env = dbp->env; /* Check for changes to a read-only tree. */ if (DB_IS_READONLY(dbp)) return (__db_rdonly(env, "DBcursor->del")); /* Check for invalid function flags. */ switch (flags) { case 0: break; case DB_CONSUME: if (dbp->type != DB_QUEUE) return (__db_ferr(env, "DBC->del", 0)); break; case DB_UPDATE_SECONDARY: DB_ASSERT(env, F_ISSET(dbp, DB_AM_SECONDARY)); break; default: return (__db_ferr(env, "DBcursor->del", 0)); } /* * The cursor must be initialized, return EINVAL for an invalid cursor, * otherwise 0. */ if (!IS_INITIALIZED(dbc)) return (__db_curinval(env)); return (0); } /* * __dbc_dup_pp -- * DBC->dup pre/post processing. * * PUBLIC: int __dbc_dup_pp __P((DBC *, DBC **, u_int32_t)); */ int __dbc_dup_pp(dbc, dbcp, flags) DBC *dbc, **dbcp; u_int32_t flags; { DB *dbp; DB_THREAD_INFO *ip; ENV *env; int rep_blocked, ret; dbp = dbc->dbp; env = dbp->env; /* * !!! * The actual argument checking is simple, do it inline, outside of * the replication block. */ if (flags != 0 && flags != DB_POSITION) return (__db_ferr(env, "DBcursor->dup", 0)); ENV_ENTER(env, ip); rep_blocked = 0; if (dbc->txn == NULL && IS_ENV_REPLICATED(env)) { if ((ret = __op_rep_enter(env, 1)) != 0) goto err; rep_blocked = 1; } ret = __dbc_dup(dbc, dbcp, flags); /* Register externally created cursors into the valid transaction. */ DB_ASSERT(env, (*dbcp)->txn == dbc->txn); if ((*dbcp)->txn != NULL && ret == 0) TAILQ_INSERT_HEAD(&((*dbcp)->txn->my_cursors), *dbcp, txn_cursors); err: if (ret != 0 && rep_blocked) (void)__op_rep_exit(env); ENV_LEAVE(env, ip); return (ret); } /* * __dbc_get_pp -- * DBC->get pre/post processing. * * PUBLIC: int __dbc_get_pp __P((DBC *, DBT *, DBT *, u_int32_t)); */ int __dbc_get_pp(dbc, key, data, flags) DBC *dbc; DBT *key, *data; u_int32_t flags; { DB *dbp; DB_THREAD_INFO *ip; ENV *env; int ignore_lease, ret; dbp = dbc->dbp; env = dbp->env; ignore_lease = LF_ISSET(DB_IGNORE_LEASE) ? 1 : 0; LF_CLR(DB_IGNORE_LEASE); if ((ret = __dbc_get_arg(dbc, key, data, flags)) != 0) return (ret); ENV_ENTER(env, ip); DEBUG_LREAD(dbc, dbc->txn, "DBcursor->get", flags == DB_SET || flags == DB_SET_RANGE ? key : NULL, NULL, flags); ret = __dbc_get(dbc, key, data, flags); /* * Check for master leases. */ if (ret == 0 && IS_REP_MASTER(env) && IS_USING_LEASES(env) && !ignore_lease) ret = __rep_lease_check(env, 1); ENV_LEAVE(env, ip); __dbt_userfree(env, key, NULL, data); return (ret); } /* * __dbc_get_arg -- * Common DBC->get argument checking, used by both DBC->get and DBC->pget. * PUBLIC: int __dbc_get_arg __P((DBC *, DBT *, DBT *, u_int32_t)); */ int __dbc_get_arg(dbc, key, data, flags) DBC *dbc; DBT *key, *data; u_int32_t flags; { DB *dbp; ENV *env; int dirty, multi, ret; dbp = dbc->dbp; env = dbp->env; /* * Typically in checking routines that modify the flags, we have * to save them and restore them, because the checking routine * calls the work routine. However, this is a pure-checking * routine which returns to a function that calls the work routine, * so it's OK that we do not save and restore the flags, even though * we modify them. * * Check for read-modify-write validity. DB_RMW doesn't make sense * with CDB cursors since if you're going to write the cursor, you * had to create it with DB_WRITECURSOR. Regardless, we check for * LOCKING_ON and not STD_LOCKING, as we don't want to disallow it. * If this changes, confirm that DB does not itself set the DB_RMW * flag in a path where CDB may have been configured. */ dirty = 0; if (LF_ISSET(DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW)) { if (!LOCKING_ON(env)) return (__db_fnl(env, "DBcursor->get")); if (LF_ISSET(DB_READ_UNCOMMITTED)) dirty = 1; LF_CLR(DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW); } multi = 0; if (LF_ISSET(DB_MULTIPLE | DB_MULTIPLE_KEY)) { multi = 1; if (LF_ISSET(DB_MULTIPLE) && LF_ISSET(DB_MULTIPLE_KEY)) goto multi_err; LF_CLR(DB_MULTIPLE | DB_MULTIPLE_KEY); } /* Check for invalid function flags. */ switch (flags) { case DB_CONSUME: case DB_CONSUME_WAIT: if (dirty) { __db_errx(env, "DB_READ_UNCOMMITTED is not supported with DB_CONSUME or DB_CONSUME_WAIT"); return (EINVAL); } if (dbp->type != DB_QUEUE) goto err; break; case DB_CURRENT: case DB_FIRST: case DB_NEXT: case DB_NEXT_DUP: case DB_NEXT_NODUP: break; case DB_LAST: case DB_PREV: case DB_PREV_DUP: case DB_PREV_NODUP: if (multi) multi_err: return (__db_ferr(env, "DBcursor->get", 1)); break; case DB_GET_BOTHC: if (dbp->type == DB_QUEUE) goto err; /* FALLTHROUGH */ case DB_GET_BOTH: case DB_GET_BOTH_RANGE: if ((ret = __dbt_usercopy(env, data)) != 0) goto err; /* FALLTHROUGH */ case DB_SET: case DB_SET_RANGE: if ((ret = __dbt_usercopy(env, key)) != 0) goto err; break; case DB_GET_RECNO: /* * The one situation in which this might be legal with a * non-RECNUM dbp is if dbp is a secondary and its primary is * DB_AM_RECNUM. */ if (!F_ISSET(dbp, DB_AM_RECNUM) && (!F_ISSET(dbp, DB_AM_SECONDARY) || !F_ISSET(dbp->s_primary, DB_AM_RECNUM))) goto err; break; case DB_SET_RECNO: if (!F_ISSET(dbp, DB_AM_RECNUM)) goto err; if ((ret = __dbt_usercopy(env, key)) != 0) goto err; break; default: err: __dbt_userfree(env, key, NULL, data); return (__db_ferr(env, "DBcursor->get", 0)); } /* Check for invalid key/data flags. */ if ((ret = __dbt_ferr(dbp, "key", key, 0)) != 0) return (ret); if ((ret = __dbt_ferr(dbp, "data", data, 0)) != 0) return (ret); if (multi) { if (!F_ISSET(data, DB_DBT_USERMEM)) { __db_errx(env, "DB_MULTIPLE/DB_MULTIPLE_KEY require DB_DBT_USERMEM be set"); return (EINVAL); } if (F_ISSET(key, DB_DBT_PARTIAL) || F_ISSET(data, DB_DBT_PARTIAL)) { __db_errx(env, "DB_MULTIPLE/DB_MULTIPLE_KEY do not support DB_DBT_PARTIAL"); return (EINVAL); } if (data->ulen < 1024 || data->ulen < dbp->pgsize || data->ulen % 1024 != 0) { __db_errx(env, "%s%s", "DB_MULTIPLE/DB_MULTIPLE_KEY buffers must be ", "aligned, at least page size and multiples of 1KB"); return (EINVAL); } } /* * The cursor must be initialized for DB_CURRENT, DB_GET_RECNO, * DB_PREV_DUP and DB_NEXT_DUP. Return EINVAL for an invalid * cursor, otherwise 0. */ if (!IS_INITIALIZED(dbc) && (flags == DB_CURRENT || flags == DB_GET_RECNO || flags == DB_NEXT_DUP || flags == DB_PREV_DUP)) return (__db_curinval(env)); /* Check for consistent transaction usage. */ if (LF_ISSET(DB_RMW) && (ret = __db_check_txn(dbp, dbc->txn, dbc->locker, 0)) != 0) return (ret); return (0); } /* * __db_secondary_close_pp -- * DB->close for secondaries * * PUBLIC: int __db_secondary_close_pp __P((DB *, u_int32_t)); */ int __db_secondary_close_pp(dbp, flags) DB *dbp; u_int32_t flags; { DB_THREAD_INFO *ip; ENV *env; int handle_check, ret, t_ret; env = dbp->env; ret = 0; /* * As a DB handle destructor, we can't fail. * * !!! * The actual argument checking is simple, do it inline, outside of * the replication block. */ if (flags != 0 && flags != DB_NOSYNC) ret = __db_ferr(env, "DB->close", 0); ENV_ENTER(env, ip); /* Check for replication block. */ handle_check = IS_ENV_REPLICATED(env); if (handle_check && (t_ret = __db_rep_enter(dbp, 0, 0, 0)) != 0) { handle_check = 0; if (ret == 0) ret = t_ret; } if ((t_ret = __db_secondary_close(dbp, flags)) != 0 && ret == 0) ret = t_ret; /* Release replication block. */ if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) ret = t_ret; ENV_LEAVE(env, ip); return (ret); } /* * __dbc_pget_pp -- * DBC->pget pre/post processing. * * PUBLIC: int __dbc_pget_pp __P((DBC *, DBT *, DBT *, DBT *, u_int32_t)); */ int __dbc_pget_pp(dbc, skey, pkey, data, flags) DBC *dbc; DBT *skey, *pkey, *data; u_int32_t flags; { DB *dbp; DB_THREAD_INFO *ip; ENV *env; int ignore_lease, ret; dbp = dbc->dbp; env = dbp->env; ignore_lease = LF_ISSET(DB_IGNORE_LEASE) ? 1 : 0; LF_CLR(DB_IGNORE_LEASE); if ((ret = __dbc_pget_arg(dbc, pkey, flags)) != 0 || (ret = __dbc_get_arg(dbc, skey, data, flags)) != 0) return (ret); ENV_ENTER(env, ip); ret = __dbc_pget(dbc, skey, pkey, data, flags); /* * Check for master leases. */ if (ret == 0 && IS_REP_MASTER(env) && IS_USING_LEASES(env) && !ignore_lease) ret = __rep_lease_check(env, 1); ENV_LEAVE(env, ip); __dbt_userfree(env, skey, pkey, data); return (ret); } /* * __dbc_pget_arg -- * Check DBC->pget arguments. */ static int __dbc_pget_arg(dbc, pkey, flags) DBC *dbc; DBT *pkey; u_int32_t flags; { DB *dbp; ENV *env; int ret; dbp = dbc->dbp; env = dbp->env; if (!F_ISSET(dbp, DB_AM_SECONDARY)) { __db_errx(env, "DBcursor->pget may only be used on secondary indices"); return (EINVAL); } if (LF_ISSET(DB_MULTIPLE | DB_MULTIPLE_KEY)) { __db_errx(env, "DB_MULTIPLE and DB_MULTIPLE_KEY may not be used on secondary indices"); return (EINVAL); } switch (LF_ISSET(DB_OPFLAGS_MASK)) { case DB_CONSUME: case DB_CONSUME_WAIT: /* These flags make no sense on a secondary index. */ return (__db_ferr(env, "DBcursor->pget", 0)); case DB_GET_BOTH: case DB_GET_BOTH_RANGE: /* BOTH is "get both the primary and the secondary". */ if (pkey == NULL) { __db_errx(env, "%s requires both a secondary and a primary key", LF_ISSET(DB_GET_BOTH) ? "DB_GET_BOTH" : "DB_GET_BOTH_RANGE"); return (EINVAL); } if ((ret = __dbt_usercopy(env, pkey)) != 0) return (ret); break; default: /* __dbc_get_arg will catch the rest. */ break; } /* * We allow the pkey field to be NULL, so that we can make the * two-DBT get calls into wrappers for the three-DBT ones. */ if (pkey != NULL && (ret = __dbt_ferr(dbp, "primary key", pkey, 0)) != 0) return (ret); /* But the pkey field can't be NULL if we're doing a DB_GET_BOTH. */ if (pkey == NULL && (flags & DB_OPFLAGS_MASK) == DB_GET_BOTH) { __db_errx(env, "DB_GET_BOTH on a secondary index requires a primary key"); return (EINVAL); } return (0); } /* * __dbc_put_pp -- * DBC->put pre/post processing. * * PUBLIC: int __dbc_put_pp __P((DBC *, DBT *, DBT *, u_int32_t)); */ int __dbc_put_pp(dbc, key, data, flags) DBC *dbc; DBT *key, *data; u_int32_t flags; { DB *dbp; DB_THREAD_INFO *ip; ENV *env; int ret; dbp = dbc->dbp; env = dbp->env; if ((ret = __dbc_put_arg(dbc, key, data, flags)) != 0) return (ret); ENV_ENTER(env, ip); /* Check for consistent transaction usage. */ if ((ret = __db_check_txn(dbp, dbc->txn, dbc->locker, 0)) != 0) goto err; DEBUG_LWRITE(dbc, dbc->txn, "DBcursor->put", flags == DB_KEYFIRST || flags == DB_KEYLAST || flags == DB_NODUPDATA || flags == DB_UPDATE_SECONDARY ? key : NULL, data, flags); ret = __dbc_put(dbc, key, data, flags); err: ENV_LEAVE(env, ip); __dbt_userfree(env, key, NULL, data); return (ret); } /* * __dbc_put_arg -- * Check DBC->put arguments. */ static int __dbc_put_arg(dbc, key, data, flags) DBC *dbc; DBT *key, *data; u_int32_t flags; { DB *dbp; ENV *env; int key_flags, ret; dbp = dbc->dbp; env = dbp->env; key_flags = 0; /* Check for changes to a read-only tree. */ if (DB_IS_READONLY(dbp)) return (__db_rdonly(env, "DBcursor->put")); /* Check for puts on a secondary. */ if (F_ISSET(dbp, DB_AM_SECONDARY)) { if (flags == DB_UPDATE_SECONDARY) flags = 0; else { __db_errx(env, "DBcursor->put forbidden on secondary indices"); return (EINVAL); } } if ((ret = __dbt_usercopy(env, data)) != 0) return (ret); /* Check for invalid function flags. */ switch (flags) { case DB_AFTER: case DB_BEFORE: switch (dbp->type) { case DB_BTREE: case DB_HASH: /* Only with unsorted duplicates. */ if (!F_ISSET(dbp, DB_AM_DUP)) goto err; if (dbp->dup_compare != NULL) goto err; break; case DB_QUEUE: /* Not permitted. */ goto err; case DB_RECNO: /* Only with mutable record numbers. */ if (!F_ISSET(dbp, DB_AM_RENUMBER)) goto err; key_flags = key == NULL ? 0 : 1; break; case DB_UNKNOWN: default: goto err; } break; case DB_CURRENT: /* * If there is a comparison function, doing a DB_CURRENT * must not change the part of the data item that is used * for the comparison. */ break; case DB_NODUPDATA: if (!F_ISSET(dbp, DB_AM_DUPSORT)) goto err; /* FALLTHROUGH */ case DB_KEYFIRST: case DB_KEYLAST: case DB_OVERWRITE_DUP: key_flags = 1; if ((ret = __dbt_usercopy(env, key)) != 0) return (ret); break; default: err: return (__db_ferr(env, "DBcursor->put", 0)); } /* * Check for invalid key/data flags. The key may reasonably be NULL * if DB_AFTER or DB_BEFORE is set and the application doesn't care * about the returned key, or if the DB_CURRENT flag is set. */ if (key_flags && (ret = __dbt_ferr(dbp, "key", key, 0)) != 0) return (ret); if ((ret = __dbt_ferr(dbp, "data", data, 0)) != 0) return (ret); /* * The key parameter should not be NULL or have the "partial" flag set * in a put call unless the user doesn't care about a key value we'd * return. The user tells us they don't care about the returned key by * setting the key parameter to NULL or configuring the key DBT to not * return any information. (Returned keys from a put are always record * numbers, and returning part of a record number doesn't make sense: * only accept a partial return if the length returned is 0.) */ if (key_flags && F_ISSET(key, DB_DBT_PARTIAL) && key->dlen != 0) return (__db_ferr(env, "key DBT", 0)); /* * The cursor must be initialized for anything other than DB_KEYFIRST, * DB_KEYLAST or zero: return EINVAL for an invalid cursor, otherwise 0. */ if (!IS_INITIALIZED(dbc) && flags != 0 && flags != DB_KEYFIRST && flags != DB_KEYLAST && flags != DB_NODUPDATA && flags != DB_OVERWRITE_DUP) return (__db_curinval(env)); return (0); } /* * __dbt_ferr -- * Check a DBT for flag errors. */ static int __dbt_ferr(dbp, name, dbt, check_thread) const DB *dbp; const char *name; const DBT *dbt; int check_thread; { ENV *env; int ret; env = dbp->env; /* * Check for invalid DBT flags. We allow any of the flags to be * specified to any DB or DBcursor call so that applications can * set DB_DBT_MALLOC when retrieving a data item from a secondary * database and then specify that same DBT as a key to a primary * database, without having to clear flags. */ if ((ret = __db_fchk(env, name, dbt->flags, DB_DBT_APPMALLOC | DB_DBT_BULK | DB_DBT_DUPOK | DB_DBT_MALLOC | DB_DBT_REALLOC | DB_DBT_USERCOPY | DB_DBT_USERMEM | DB_DBT_PARTIAL)) != 0) return (ret); switch (F_ISSET(dbt, DB_DBT_MALLOC | DB_DBT_REALLOC | DB_DBT_USERCOPY | DB_DBT_USERMEM)) { case 0: case DB_DBT_MALLOC: case DB_DBT_REALLOC: case DB_DBT_USERCOPY: case DB_DBT_USERMEM: break; default: return (__db_ferr(env, name, 1)); } if (F_ISSET(dbt, DB_DBT_BULK) && F_ISSET(dbt, DB_DBT_PARTIAL)) { __db_errx(env, "Bulk and partial operations cannot be combined on %s DBT", name); return (EINVAL); } if (check_thread && DB_IS_THREADED(dbp) && !F_ISSET(dbt, DB_DBT_MALLOC | DB_DBT_REALLOC | DB_DBT_USERCOPY | DB_DBT_USERMEM)) { __db_errx(env, "DB_THREAD mandates memory allocation flag on %s DBT", name); return (EINVAL); } return (0); } /* * __db_curinval * Report that a cursor is in an invalid state. */ static int __db_curinval(env) const ENV *env; { __db_errx(env, "Cursor position must be set before performing this operation"); return (EINVAL); } /* * __db_txn_auto_init -- * Handle DB_AUTO_COMMIT initialization. * * PUBLIC: int __db_txn_auto_init __P((ENV *, DB_THREAD_INFO *, DB_TXN **)); */ int __db_txn_auto_init(env, ip, txnidp) ENV *env; DB_THREAD_INFO *ip; DB_TXN **txnidp; { /* * Method calls where applications explicitly specify DB_AUTO_COMMIT * require additional validation: the DB_AUTO_COMMIT flag cannot be * specified if a transaction cookie is also specified, nor can the * flag be specified in a non-transactional environment. */ if (*txnidp != NULL && !F_ISSET(*txnidp, TXN_FAMILY)) { __db_errx(env, "DB_AUTO_COMMIT may not be specified along with a transaction handle"); return (EINVAL); } if (!TXN_ON(env)) { __db_errx(env, "DB_AUTO_COMMIT may not be specified in non-transactional environment"); return (EINVAL); } /* * Our caller checked to see if replication is making a state change. * Don't call the user-level API (which would repeat that check). */ return (__txn_begin(env, ip, *txnidp, txnidp, 0)); } /* * __db_txn_auto_resolve -- * Resolve local transactions. * * PUBLIC: int __db_txn_auto_resolve __P((ENV *, DB_TXN *, int, int)); */ int __db_txn_auto_resolve(env, txn, nosync, ret) ENV *env; DB_TXN *txn; int nosync, ret; { int t_ret; if (ret == 0) return (__txn_commit(txn, nosync ? DB_TXN_NOSYNC : 0)); if ((t_ret = __txn_abort(txn)) != 0) return (__env_panic(env, t_ret)); return (ret); }