| b.liu | e958203 | 2025-04-17 19:18:16 +0800 | [diff] [blame] | 1 | --- a/src/dbinc_auto/int_def.in |
| 2 | +++ b/src/dbinc_auto/int_def.in |
| 3 | @@ -1373,6 +1373,7 @@ |
| 4 | #define __memp_pgread __memp_pgread@DB_VERSION_UNIQUE_NAME@ |
| 5 | #define __memp_pg __memp_pg@DB_VERSION_UNIQUE_NAME@ |
| 6 | #define __memp_bhfree __memp_bhfree@DB_VERSION_UNIQUE_NAME@ |
| 7 | +#define __memp_bh_clear_dirty __memp_bh_clear_dirty@DB_VERSION_UNIQUE_NAME@ |
| 8 | #define __memp_fget_pp __memp_fget_pp@DB_VERSION_UNIQUE_NAME@ |
| 9 | #define __memp_fget __memp_fget@DB_VERSION_UNIQUE_NAME@ |
| 10 | #define __memp_fcreate_pp __memp_fcreate_pp@DB_VERSION_UNIQUE_NAME@ |
| 11 | @@ -1397,6 +1398,7 @@ |
| 12 | #define __memp_fclose __memp_fclose@DB_VERSION_UNIQUE_NAME@ |
| 13 | #define __memp_mf_discard __memp_mf_discard@DB_VERSION_UNIQUE_NAME@ |
| 14 | #define __memp_inmemlist __memp_inmemlist@DB_VERSION_UNIQUE_NAME@ |
| 15 | +#define __memp_mf_mark_dead __memp_mf_mark_dead@DB_VERSION_UNIQUE_NAME@ |
| 16 | #define __memp_fput_pp __memp_fput_pp@DB_VERSION_UNIQUE_NAME@ |
| 17 | #define __memp_fput __memp_fput@DB_VERSION_UNIQUE_NAME@ |
| 18 | #define __memp_unpin_buffers __memp_unpin_buffers@DB_VERSION_UNIQUE_NAME@ |
| 19 | @@ -1455,6 +1457,7 @@ |
| 20 | #define __mp_xxx_fh __mp_xxx_fh@DB_VERSION_UNIQUE_NAME@ |
| 21 | #define __memp_sync_int __memp_sync_int@DB_VERSION_UNIQUE_NAME@ |
| 22 | #define __memp_mf_sync __memp_mf_sync@DB_VERSION_UNIQUE_NAME@ |
| 23 | +#define __memp_purge_dead_files __memp_purge_dead_files@DB_VERSION_UNIQUE_NAME@ |
| 24 | #define __memp_trickle_pp __memp_trickle_pp@DB_VERSION_UNIQUE_NAME@ |
| 25 | #define __mutex_alloc __mutex_alloc@DB_VERSION_UNIQUE_NAME@ |
| 26 | #define __mutex_alloc_int __mutex_alloc_int@DB_VERSION_UNIQUE_NAME@ |
| 27 | --- a/src/dbinc_auto/mp_ext.h |
| 28 | +++ b/src/dbinc_auto/mp_ext.h |
| 29 | @@ -16,6 +16,7 @@ int __memp_bhwrite __P((DB_MPOOL *, DB_M |
| 30 | int __memp_pgread __P((DB_MPOOLFILE *, BH *, int)); |
| 31 | int __memp_pg __P((DB_MPOOLFILE *, db_pgno_t, void *, int)); |
| 32 | int __memp_bhfree __P((DB_MPOOL *, REGINFO *, MPOOLFILE *, DB_MPOOL_HASH *, BH *, u_int32_t)); |
| 33 | +void __memp_bh_clear_dirty __P((ENV*, DB_MPOOL_HASH *, BH *)); |
| 34 | int __memp_fget_pp __P((DB_MPOOLFILE *, db_pgno_t *, DB_TXN *, u_int32_t, void *)); |
| 35 | int __memp_fget __P((DB_MPOOLFILE *, db_pgno_t *, DB_THREAD_INFO *, DB_TXN *, u_int32_t, void *)); |
| 36 | int __memp_fcreate_pp __P((DB_ENV *, DB_MPOOLFILE **, u_int32_t)); |
| 37 | @@ -40,6 +41,7 @@ int __memp_fclose_pp __P((DB_MPOOLFILE * |
| 38 | int __memp_fclose __P((DB_MPOOLFILE *, u_int32_t)); |
| 39 | int __memp_mf_discard __P((DB_MPOOL *, MPOOLFILE *, int)); |
| 40 | int __memp_inmemlist __P((ENV *, char ***, int *)); |
| 41 | +void __memp_mf_mark_dead __P((DB_MPOOL *, MPOOLFILE *, int*)); |
| 42 | int __memp_fput_pp __P((DB_MPOOLFILE *, void *, DB_CACHE_PRIORITY, u_int32_t)); |
| 43 | int __memp_fput __P((DB_MPOOLFILE *, DB_THREAD_INFO *, void *, DB_CACHE_PRIORITY)); |
| 44 | int __memp_unpin_buffers __P((ENV *, DB_THREAD_INFO *)); |
| 45 | @@ -98,6 +100,7 @@ int __memp_fsync __P((DB_MPOOLFILE *)); |
| 46 | int __mp_xxx_fh __P((DB_MPOOLFILE *, DB_FH **)); |
| 47 | int __memp_sync_int __P((ENV *, DB_MPOOLFILE *, u_int32_t, u_int32_t, u_int32_t *, int *)); |
| 48 | int __memp_mf_sync __P((DB_MPOOL *, MPOOLFILE *, int)); |
| 49 | +int __memp_purge_dead_files __P((ENV *)); |
| 50 | int __memp_trickle_pp __P((DB_ENV *, int, int *)); |
| 51 | |
| 52 | #if defined(__cplusplus) |
| 53 | --- a/src/mp/mp_bh.c |
| 54 | +++ b/src/mp/mp_bh.c |
| 55 | @@ -474,11 +474,8 @@ file_dead: |
| 56 | if (F_ISSET(bhp, BH_DIRTY | BH_TRASH)) { |
| 57 | MUTEX_LOCK(env, hp->mtx_hash); |
| 58 | DB_ASSERT(env, !SH_CHAIN_HASNEXT(bhp, vc)); |
| 59 | - if (ret == 0 && F_ISSET(bhp, BH_DIRTY)) { |
| 60 | - F_CLR(bhp, BH_DIRTY | BH_DIRTY_CREATE); |
| 61 | - DB_ASSERT(env, atomic_read(&hp->hash_page_dirty) > 0); |
| 62 | - atomic_dec(env, &hp->hash_page_dirty); |
| 63 | - } |
| 64 | + if (ret == 0) |
| 65 | + __memp_bh_clear_dirty(env, hp, bhp); |
| 66 | |
| 67 | /* put the page back if necessary. */ |
| 68 | if ((ret != 0 || BH_REFCOUNT(bhp) > 1) && |
| 69 | @@ -688,3 +685,29 @@ no_hp: if (mfp != NULL) |
| 70 | |
| 71 | return (ret); |
| 72 | } |
| 73 | + |
| 74 | +/* |
| 75 | + * __memp_bh_clear_dirty -- |
| 76 | + * Clear the dirty flag of of a buffer. Calls on the same buffer must be |
| 77 | + * serialized to get the accounting correct. This can be achieved by |
| 78 | + * acquiring an exclusive lock on the buffer, a shared lock on the |
| 79 | + * buffer plus an exclusive lock on the hash bucket, or some other |
| 80 | + * mechanism that guarantees single-thread access to the entire region |
| 81 | + * (e.g. during __memp_region_bhfree()). |
| 82 | + * |
| 83 | + * PUBLIC: void __memp_bh_clear_dirty __P((ENV*, DB_MPOOL_HASH *, BH *)); |
| 84 | + */ |
| 85 | +void |
| 86 | +__memp_bh_clear_dirty(env, hp, bhp) |
| 87 | + ENV *env; |
| 88 | + DB_MPOOL_HASH *hp; |
| 89 | + BH *bhp; |
| 90 | +{ |
| 91 | + COMPQUIET(env, env); |
| 92 | + if (F_ISSET(bhp, BH_DIRTY)) { |
| 93 | + F_CLR(bhp, BH_DIRTY | BH_DIRTY_CREATE); |
| 94 | + DB_ASSERT(env, atomic_read(&hp->hash_page_dirty) > 0); |
| 95 | + (void)atomic_dec(env, &hp->hash_page_dirty); |
| 96 | + } |
| 97 | +} |
| 98 | + |
| 99 | --- a/src/mp/mp_fget.c |
| 100 | +++ b/src/mp/mp_fget.c |
| 101 | @@ -439,12 +439,7 @@ thawed: need_free = (atomic_dec(env, & |
| 102 | if (flags == DB_MPOOL_FREE) { |
| 103 | freebuf: MUTEX_LOCK(env, hp->mtx_hash); |
| 104 | h_locked = 1; |
| 105 | - if (F_ISSET(bhp, BH_DIRTY)) { |
| 106 | - F_CLR(bhp, BH_DIRTY | BH_DIRTY_CREATE); |
| 107 | - DB_ASSERT(env, |
| 108 | - atomic_read(&hp->hash_page_dirty) > 0); |
| 109 | - atomic_dec(env, &hp->hash_page_dirty); |
| 110 | - } |
| 111 | + __memp_bh_clear_dirty(env, hp, bhp); |
| 112 | |
| 113 | /* |
| 114 | * If the buffer we found is already freed, we're done. |
| 115 | --- a/src/mp/mp_fopen.c |
| 116 | +++ b/src/mp/mp_fopen.c |
| 117 | @@ -14,6 +14,7 @@ |
| 118 | #include "dbinc/db_page.h" |
| 119 | #include "dbinc/hash.h" |
| 120 | |
| 121 | +static int __memp_count_dead_mutex __P((DB_MPOOL *, u_int32_t *)); |
| 122 | static int __memp_mpf_alloc __P((DB_MPOOL *, |
| 123 | DB_MPOOLFILE *, const char *, u_int32_t, u_int32_t, MPOOLFILE **)); |
| 124 | static int __memp_mpf_find __P((ENV *, |
| 125 | @@ -711,7 +712,11 @@ __memp_mpf_find(env, dbmfp, hp, path, fl |
| 126 | */ |
| 127 | if (LF_ISSET(DB_TRUNCATE)) { |
| 128 | MUTEX_LOCK(env, mfp->mutex); |
| 129 | - mfp->deadfile = 1; |
| 130 | + /* |
| 131 | + * We cannot purge dead files here, because the caller |
| 132 | + * is holding the mutex of the hash bucket of mfp. |
| 133 | + */ |
| 134 | + __memp_mf_mark_dead(dbmp, mfp, NULL); |
| 135 | MUTEX_UNLOCK(env, mfp->mutex); |
| 136 | continue; |
| 137 | } |
| 138 | @@ -909,10 +914,11 @@ __memp_fclose(dbmfp, flags) |
| 139 | MPOOLFILE *mfp; |
| 140 | char *rpath; |
| 141 | u_int32_t ref; |
| 142 | - int deleted, ret, t_ret; |
| 143 | + int deleted, purge_dead, ret, t_ret; |
| 144 | |
| 145 | env = dbmfp->env; |
| 146 | dbmp = env->mp_handle; |
| 147 | + purge_dead = 0; |
| 148 | ret = 0; |
| 149 | |
| 150 | /* |
| 151 | @@ -1006,7 +1012,7 @@ __memp_fclose(dbmfp, flags) |
| 152 | if (--mfp->mpf_cnt == 0 || LF_ISSET(DB_MPOOL_DISCARD)) { |
| 153 | if (LF_ISSET(DB_MPOOL_DISCARD) || |
| 154 | F_ISSET(mfp, MP_TEMP) || mfp->unlink_on_close) { |
| 155 | - mfp->deadfile = 1; |
| 156 | + __memp_mf_mark_dead(dbmp, mfp, &purge_dead); |
| 157 | } |
| 158 | if (mfp->unlink_on_close) { |
| 159 | if ((t_ret = __db_appname(dbmp->env, DB_APP_DATA, |
| 160 | @@ -1039,6 +1045,8 @@ __memp_fclose(dbmfp, flags) |
| 161 | } |
| 162 | if (!deleted && !LF_ISSET(DB_MPOOL_NOLOCK)) |
| 163 | MUTEX_UNLOCK(env, mfp->mutex); |
| 164 | + if (purge_dead) |
| 165 | + (void)__memp_purge_dead_files(env); |
| 166 | |
| 167 | done: /* Discard the DB_MPOOLFILE structure. */ |
| 168 | if (dbmfp->pgcookie != NULL) { |
| 169 | @@ -1093,7 +1101,7 @@ __memp_mf_discard(dbmp, mfp, hp_locked) |
| 170 | * mutex so we don't deadlock. Make sure nobody ever looks at this |
| 171 | * structure again. |
| 172 | */ |
| 173 | - mfp->deadfile = 1; |
| 174 | + __memp_mf_mark_dead(dbmp, mfp, NULL); |
| 175 | |
| 176 | /* Discard the mutex we're holding and return it too the pool. */ |
| 177 | MUTEX_UNLOCK(env, mfp->mutex); |
| 178 | @@ -1218,3 +1226,104 @@ nomem: MUTEX_UNLOCK(env, hp->mtx_hash); |
| 179 | *namesp = NULL; |
| 180 | return (ret); |
| 181 | } |
| 182 | + |
| 183 | +/* |
| 184 | + * __memp_mf_mark_dead -- |
| 185 | + * Mark an MPOOLFILE as dead because its contents are no longer necessary. |
| 186 | + * This happens when removing, truncation, or closing an unnamed in-memory |
| 187 | + * database. Return, in the purgep parameter, whether the caller should |
| 188 | + * call __memp_purge_dead_files() after the lock on mfp is released. The |
| 189 | + * caller must hold an exclusive lock on the mfp handle. |
| 190 | + * |
| 191 | + * PUBLIC: void __memp_mf_mark_dead __P((DB_MPOOL *, MPOOLFILE *, int*)); |
| 192 | + */ |
| 193 | +void |
| 194 | +__memp_mf_mark_dead(dbmp, mfp, purgep) |
| 195 | + DB_MPOOL *dbmp; |
| 196 | + MPOOLFILE *mfp; |
| 197 | + int *purgep; |
| 198 | +{ |
| 199 | + ENV *env; |
| 200 | +#ifdef HAVE_MUTEX_SUPPORT |
| 201 | + REGINFO *infop; |
| 202 | + DB_MUTEXREGION *mtxregion; |
| 203 | + u_int32_t mutex_max, mutex_inuse, dead_mutex; |
| 204 | +#endif |
| 205 | + |
| 206 | + if (purgep != NULL) |
| 207 | + *purgep = 0; |
| 208 | + |
| 209 | + env = dbmp->env; |
| 210 | + |
| 211 | +#ifdef HAVE_MUTEX_SUPPORT |
| 212 | + MUTEX_REQUIRED(env, mfp->mutex); |
| 213 | + |
| 214 | + if (MUTEX_ON(env) && mfp->deadfile == 0) { |
| 215 | + infop = &env->mutex_handle->reginfo; |
| 216 | + mtxregion = infop->primary; |
| 217 | + |
| 218 | + mutex_inuse = mtxregion->stat.st_mutex_inuse; |
| 219 | + if ((mutex_max = env->dbenv->mutex_max) == 0) |
| 220 | + mutex_max = infop->rp->max / mtxregion->mutex_size; |
| 221 | + |
| 222 | + /* |
| 223 | + * Purging dead pages requires a full scan of the entire cache |
| 224 | + * buffer, so it is a slow operation. We only want to do it |
| 225 | + * when it is necessary and provides enough benefits. Below is |
| 226 | + * a simple heuristic that determines when to purge all dead |
| 227 | + * pages. |
| 228 | + */ |
| 229 | + if (purgep != NULL && mutex_inuse > mutex_max - 200) { |
| 230 | + /* |
| 231 | + * If the mutex region is almost full and there are |
| 232 | + * many mutexes held by dead files, purge dead files. |
| 233 | + */ |
| 234 | + (void)__memp_count_dead_mutex(dbmp, &dead_mutex); |
| 235 | + dead_mutex += mfp->block_cnt + 1; |
| 236 | + |
| 237 | + if (dead_mutex > mutex_inuse / 20) |
| 238 | + *purgep = 1; |
| 239 | + } |
| 240 | + } |
| 241 | +#endif |
| 242 | + |
| 243 | + mfp->deadfile = 1; |
| 244 | +} |
| 245 | + |
| 246 | +/* |
| 247 | + * __memp_count_dead_mutex -- |
| 248 | + * Estimate the number of mutexes held by dead files. |
| 249 | + */ |
| 250 | +static int |
| 251 | +__memp_count_dead_mutex(dbmp, dead_mutex) |
| 252 | + DB_MPOOL *dbmp; |
| 253 | + u_int32_t *dead_mutex; |
| 254 | +{ |
| 255 | + ENV *env; |
| 256 | + DB_MPOOL_HASH *hp; |
| 257 | + MPOOL *mp; |
| 258 | + MPOOLFILE *mfp; |
| 259 | + u_int32_t mutex_per_file; |
| 260 | + int busy, i; |
| 261 | + |
| 262 | + env = dbmp->env; |
| 263 | + *dead_mutex = 0; |
| 264 | + mutex_per_file = 1; |
| 265 | +#ifndef HAVE_ATOMICFILEREAD |
| 266 | + mutex_per_file = 2; |
| 267 | +#endif |
| 268 | + mp = dbmp->reginfo[0].primary; |
| 269 | + hp = R_ADDR(dbmp->reginfo, mp->ftab); |
| 270 | + for (i = 0; i < MPOOL_FILE_BUCKETS; i++, hp++) { |
| 271 | + busy = MUTEX_TRYLOCK(env, hp->mtx_hash); |
| 272 | + if (busy) |
| 273 | + continue; |
| 274 | + SH_TAILQ_FOREACH(mfp, &hp->hash_bucket, q, __mpoolfile) { |
| 275 | + if (mfp->deadfile) |
| 276 | + *dead_mutex += mfp->block_cnt + mutex_per_file; |
| 277 | + } |
| 278 | + MUTEX_UNLOCK(env, hp->mtx_hash); |
| 279 | + } |
| 280 | + |
| 281 | + return (0); |
| 282 | +} |
| 283 | --- a/src/mp/mp_method.c |
| 284 | +++ b/src/mp/mp_method.c |
| 285 | @@ -640,7 +640,7 @@ __memp_nameop(env, fileid, newname, full |
| 286 | MPOOLFILE *mfp; |
| 287 | roff_t newname_off; |
| 288 | u_int32_t bucket; |
| 289 | - int locked, ret; |
| 290 | + int locked, purge_dead, ret; |
| 291 | size_t nlen; |
| 292 | void *p; |
| 293 | |
| 294 | @@ -657,6 +657,7 @@ __memp_nameop(env, fileid, newname, full |
| 295 | nhp = NULL; |
| 296 | p = NULL; |
| 297 | locked = ret = 0; |
| 298 | + purge_dead = 0; |
| 299 | |
| 300 | if (!MPOOL_ON(env)) |
| 301 | goto fsop; |
| 302 | @@ -749,7 +750,7 @@ __memp_nameop(env, fileid, newname, full |
| 303 | */ |
| 304 | if (mfp->no_backing_file) |
| 305 | mfp->mpf_cnt--; |
| 306 | - mfp->deadfile = 1; |
| 307 | + __memp_mf_mark_dead(dbmp, mfp, &purge_dead); |
| 308 | MUTEX_UNLOCK(env, mfp->mutex); |
| 309 | } else { |
| 310 | /* |
| 311 | @@ -808,6 +809,12 @@ err: if (p != NULL) { |
| 312 | if (nhp != NULL && nhp != hp) |
| 313 | MUTEX_UNLOCK(env, nhp->mtx_hash); |
| 314 | } |
| 315 | + /* |
| 316 | + * __memp_purge_dead_files() must be called when the hash bucket is |
| 317 | + * unlocked. |
| 318 | + */ |
| 319 | + if (purge_dead) |
| 320 | + (void)__memp_purge_dead_files(env); |
| 321 | return (ret); |
| 322 | } |
| 323 | |
| 324 | --- a/src/mp/mp_sync.c |
| 325 | +++ b/src/mp/mp_sync.c |
| 326 | @@ -26,6 +26,7 @@ static int __memp_close_flush_files __P( |
| 327 | static int __memp_sync_files __P((ENV *)); |
| 328 | static int __memp_sync_file __P((ENV *, |
| 329 | MPOOLFILE *, void *, u_int32_t *, u_int32_t)); |
| 330 | +static inline void __update_err_ret(int, int*); |
| 331 | |
| 332 | /* |
| 333 | * __memp_walk_files -- |
| 334 | @@ -965,3 +966,123 @@ __bhcmp(p1, p2) |
| 335 | return (1); |
| 336 | return (0); |
| 337 | } |
| 338 | + |
| 339 | +/* |
| 340 | + * __memp_purge_dead_files -- |
| 341 | + * Remove all dead files and their buffers from the mpool. The caller |
| 342 | + * cannot hold any lock on the dead MPOOLFILE handles, their buffers |
| 343 | + * or their hash buckets. |
| 344 | + * |
| 345 | + * PUBLIC: int __memp_purge_dead_files __P((ENV *)); |
| 346 | + */ |
| 347 | +int |
| 348 | +__memp_purge_dead_files(env) |
| 349 | + ENV *env; |
| 350 | +{ |
| 351 | + BH *bhp; |
| 352 | + DB_MPOOL *dbmp; |
| 353 | + DB_MPOOL_HASH *hp, *hp_end; |
| 354 | + REGINFO *infop; |
| 355 | + MPOOL *c_mp, *mp; |
| 356 | + MPOOLFILE *mfp; |
| 357 | + u_int32_t i_cache; |
| 358 | + int ret, t_ret, h_lock; |
| 359 | + |
| 360 | + if (!MPOOL_ON(env)) |
| 361 | + return (0); |
| 362 | + |
| 363 | + dbmp = env->mp_handle; |
| 364 | + mp = dbmp->reginfo[0].primary; |
| 365 | + ret = t_ret = h_lock = 0; |
| 366 | + |
| 367 | + /* |
| 368 | + * Walk each cache's list of buffers and free all buffers whose |
| 369 | + * MPOOLFILE is marked as dead. |
| 370 | + */ |
| 371 | + for (i_cache = 0; i_cache < mp->nreg; i_cache++) { |
| 372 | + infop = &dbmp->reginfo[i_cache]; |
| 373 | + c_mp = infop->primary; |
| 374 | + |
| 375 | + hp = R_ADDR(infop, c_mp->htab); |
| 376 | + hp_end = &hp[c_mp->htab_buckets]; |
| 377 | + for (; hp < hp_end; hp++) { |
| 378 | + /* Skip empty buckets. */ |
| 379 | + if (SH_TAILQ_FIRST(&hp->hash_bucket, __bh) == NULL) |
| 380 | + continue; |
| 381 | + |
| 382 | + /* |
| 383 | + * Search for a dead buffer. Other places that call |
| 384 | + * __memp_bhfree() acquire the buffer lock before the |
| 385 | + * hash bucket lock. Even though we acquire the two |
| 386 | + * locks in reverse order, we cannot deadlock here |
| 387 | + * because we don't block waiting for the locks. |
| 388 | + */ |
| 389 | + t_ret = MUTEX_TRYLOCK(env, hp->mtx_hash); |
| 390 | + if (t_ret != 0) { |
| 391 | + __update_err_ret(t_ret, &ret); |
| 392 | + continue; |
| 393 | + } |
| 394 | + h_lock = 1; |
| 395 | + SH_TAILQ_FOREACH(bhp, &hp->hash_bucket, hq, __bh) { |
| 396 | + /* Skip buffers that are being used. */ |
| 397 | + if (BH_REFCOUNT(bhp) > 0) |
| 398 | + continue; |
| 399 | + |
| 400 | + mfp = R_ADDR(dbmp->reginfo, bhp->mf_offset); |
| 401 | + if (!mfp->deadfile) |
| 402 | + continue; |
| 403 | + |
| 404 | + /* Found a dead buffer. Prepare to free it. */ |
| 405 | + t_ret = MUTEX_TRYLOCK(env, bhp->mtx_buf); |
| 406 | + if (t_ret != 0) { |
| 407 | + __update_err_ret(t_ret, &ret); |
| 408 | + continue; |
| 409 | + } |
| 410 | + |
| 411 | + DB_ASSERT(env, (!F_ISSET(bhp, BH_EXCLUSIVE) && |
| 412 | + BH_REFCOUNT(bhp) == 0)); |
| 413 | + F_SET(bhp, BH_EXCLUSIVE); |
| 414 | + (void)atomic_inc(env, &bhp->ref); |
| 415 | + |
| 416 | + __memp_bh_clear_dirty(env, hp, bhp); |
| 417 | + |
| 418 | + /* |
| 419 | + * Free the buffer. The buffer and hash bucket |
| 420 | + * are unlocked by __memp_bhfree. |
| 421 | + */ |
| 422 | + if ((t_ret = __memp_bhfree(dbmp, infop, mfp, |
| 423 | + hp, bhp, BH_FREE_FREEMEM)) == 0) |
| 424 | + /* |
| 425 | + * Decrement hp, so the next turn will |
| 426 | + * search the same bucket again. |
| 427 | + */ |
| 428 | + hp--; |
| 429 | + else |
| 430 | + __update_err_ret(t_ret, &ret); |
| 431 | + |
| 432 | + /* |
| 433 | + * The hash bucket is unlocked, we need to |
| 434 | + * start over again. |
| 435 | + */ |
| 436 | + h_lock = 0; |
| 437 | + break; |
| 438 | + } |
| 439 | + |
| 440 | + if (h_lock) { |
| 441 | + MUTEX_UNLOCK(env, hp->mtx_hash); |
| 442 | + h_lock = 0; |
| 443 | + } |
| 444 | + } |
| 445 | + } |
| 446 | + |
| 447 | + return (ret); |
| 448 | +} |
| 449 | + |
| 450 | +static inline void |
| 451 | +__update_err_ret(t_ret, retp) |
| 452 | + int t_ret; |
| 453 | + int *retp; |
| 454 | +{ |
| 455 | + if (t_ret != 0 && t_ret != DB_LOCK_NOTGRANTED && *retp == 0) |
| 456 | + *retp = t_ret; |
| 457 | +} |
| 458 | --- a/src/mp/mp_trickle.c |
| 459 | +++ b/src/mp/mp_trickle.c |
| 460 | @@ -67,6 +67,10 @@ __memp_trickle(env, pct, nwrotep) |
| 461 | return (EINVAL); |
| 462 | } |
| 463 | |
| 464 | + /* First we purge all dead files and their buffers. */ |
| 465 | + if ((ret = __memp_purge_dead_files(env)) != 0) |
| 466 | + return (ret); |
| 467 | + |
| 468 | /* |
| 469 | * Loop through the caches counting total/dirty buffers. |
| 470 | * |
| 471 | --- a/src/mutex/mut_region.c |
| 472 | +++ b/src/mutex/mut_region.c |
| 473 | @@ -17,7 +17,7 @@ |
| 474 | static db_size_t __mutex_align_size __P((ENV *)); |
| 475 | static int __mutex_region_init __P((ENV *, DB_MUTEXMGR *)); |
| 476 | static size_t __mutex_region_size __P((ENV *)); |
| 477 | -static size_t __mutex_region_max __P((ENV *)); |
| 478 | +static size_t __mutex_region_max __P((ENV *, u_int32_t)); |
| 479 | |
| 480 | /* |
| 481 | * __mutex_open -- |
| 482 | @@ -34,7 +34,7 @@ __mutex_open(env, create_ok) |
| 483 | DB_MUTEXMGR *mtxmgr; |
| 484 | DB_MUTEXREGION *mtxregion; |
| 485 | size_t size; |
| 486 | - u_int32_t cpu_count; |
| 487 | + u_int32_t cpu_count, mutex_needed; |
| 488 | int ret; |
| 489 | #ifndef HAVE_ATOMIC_SUPPORT |
| 490 | u_int i; |
| 491 | @@ -61,19 +61,20 @@ __mutex_open(env, create_ok) |
| 492 | } |
| 493 | |
| 494 | /* |
| 495 | - * If the user didn't set an absolute value on the number of mutexes |
| 496 | - * we'll need, figure it out. We're conservative in our allocation, |
| 497 | - * we need mutexes for DB handles, group-commit queues and other things |
| 498 | - * applications allocate at run-time. The application may have kicked |
| 499 | - * up our count to allocate its own mutexes, add that in. |
| 500 | + * Figure out the number of mutexes we'll need. We're conservative in |
| 501 | + * our allocation, we need mutexes for DB handles, group-commit queues |
| 502 | + * and other things applications allocate at run-time. The application |
| 503 | + * may have kicked up our count to allocate its own mutexes, add that |
| 504 | + * in. |
| 505 | */ |
| 506 | + mutex_needed = |
| 507 | + __lock_region_mutex_count(env) + |
| 508 | + __log_region_mutex_count(env) + |
| 509 | + __memp_region_mutex_count(env) + |
| 510 | + __txn_region_mutex_count(env); |
| 511 | if (dbenv->mutex_cnt == 0 && |
| 512 | F_ISSET(env, ENV_PRIVATE | ENV_THREAD) != ENV_PRIVATE) |
| 513 | - dbenv->mutex_cnt = |
| 514 | - __lock_region_mutex_count(env) + |
| 515 | - __log_region_mutex_count(env) + |
| 516 | - __memp_region_mutex_count(env) + |
| 517 | - __txn_region_mutex_count(env); |
| 518 | + dbenv->mutex_cnt = mutex_needed; |
| 519 | |
| 520 | if (dbenv->mutex_max != 0 && dbenv->mutex_cnt > dbenv->mutex_max) |
| 521 | dbenv->mutex_cnt = dbenv->mutex_max; |
| 522 | @@ -90,8 +91,8 @@ __mutex_open(env, create_ok) |
| 523 | size = __mutex_region_size(env); |
| 524 | if (create_ok) |
| 525 | F_SET(&mtxmgr->reginfo, REGION_CREATE_OK); |
| 526 | - if ((ret = __env_region_attach(env, |
| 527 | - &mtxmgr->reginfo, size, size + __mutex_region_max(env))) != 0) |
| 528 | + if ((ret = __env_region_attach(env, &mtxmgr->reginfo, |
| 529 | + size, size + __mutex_region_max(env, mutex_needed))) != 0) |
| 530 | goto err; |
| 531 | |
| 532 | /* If we created the region, initialize it. */ |
| 533 | @@ -352,9 +353,13 @@ __mutex_region_size(env) |
| 534 | |
| 535 | s = sizeof(DB_MUTEXMGR) + 1024; |
| 536 | |
| 537 | - /* We discard one mutex for the OOB slot. */ |
| 538 | + /* |
| 539 | + * We discard one mutex for the OOB slot. Make sure mutex_cnt doesn't |
| 540 | + * overflow. |
| 541 | + */ |
| 542 | s += __env_alloc_size( |
| 543 | - (dbenv->mutex_cnt + 1) *__mutex_align_size(env)); |
| 544 | + (dbenv->mutex_cnt + (dbenv->mutex_cnt == UINT32_MAX ? 0 : 1)) * |
| 545 | + __mutex_align_size(env)); |
| 546 | |
| 547 | return (s); |
| 548 | } |
| 549 | @@ -364,28 +369,42 @@ __mutex_region_size(env) |
| 550 | * Return the amount of space needed to reach the maximum size. |
| 551 | */ |
| 552 | static size_t |
| 553 | -__mutex_region_max(env) |
| 554 | +__mutex_region_max(env, mutex_needed) |
| 555 | ENV *env; |
| 556 | + u_int32_t mutex_needed; |
| 557 | { |
| 558 | DB_ENV *dbenv; |
| 559 | - u_int32_t max; |
| 560 | + u_int32_t max, mutex_cnt; |
| 561 | |
| 562 | dbenv = env->dbenv; |
| 563 | + mutex_cnt = dbenv->mutex_cnt; |
| 564 | |
| 565 | - if ((max = dbenv->mutex_max) == 0) { |
| 566 | + /* |
| 567 | + * We want to limit the region size to accommodate at most UINT32_MAX |
| 568 | + * mutexes. If mutex_cnt is UINT32_MAX, no more space is allowed. |
| 569 | + */ |
| 570 | + if ((max = dbenv->mutex_max) == 0 && mutex_cnt != UINT32_MAX) |
| 571 | if (F_ISSET(env, ENV_PRIVATE | ENV_THREAD) == ENV_PRIVATE) |
| 572 | - max = dbenv->mutex_inc + 1; |
| 573 | - else |
| 574 | + if (dbenv->mutex_inc + 1 < UINT32_MAX - mutex_cnt) |
| 575 | + max = dbenv->mutex_inc + 1 + mutex_cnt; |
| 576 | + else |
| 577 | + max = UINT32_MAX; |
| 578 | + else { |
| 579 | max = __lock_region_mutex_max(env) + |
| 580 | __txn_region_mutex_max(env) + |
| 581 | __log_region_mutex_max(env) + |
| 582 | dbenv->mutex_inc + 100; |
| 583 | - } else if (max <= dbenv->mutex_cnt) |
| 584 | + if (max < UINT32_MAX - mutex_needed) |
| 585 | + max += mutex_needed; |
| 586 | + else |
| 587 | + max = UINT32_MAX; |
| 588 | + } |
| 589 | + |
| 590 | + if (max <= mutex_cnt) |
| 591 | return (0); |
| 592 | else |
| 593 | - max -= dbenv->mutex_cnt; |
| 594 | - |
| 595 | - return ( __env_alloc_size(max * __mutex_align_size(env))); |
| 596 | + return (__env_alloc_size( |
| 597 | + (max - mutex_cnt) * __mutex_align_size(env))); |
| 598 | } |
| 599 | |
| 600 | #ifdef HAVE_MUTEX_SYSTEM_RESOURCES |