Branch data Line data Source code
1 : : // SPDX-License-Identifier: GPL-2.0+
2 : : /*
3 : : * linux/fs/jbd2/transaction.c
4 : : *
5 : : * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
6 : : *
7 : : * Copyright 1998 Red Hat corp --- All Rights Reserved
8 : : *
9 : : * Generic filesystem transaction handling code; part of the ext2fs
10 : : * journaling system.
11 : : *
12 : : * This file manages transactions (compound commits managed by the
13 : : * journaling code) and handles (individual atomic operations by the
14 : : * filesystem).
15 : : */
16 : :
17 : : #include <linux/time.h>
18 : : #include <linux/fs.h>
19 : : #include <linux/jbd2.h>
20 : : #include <linux/errno.h>
21 : : #include <linux/slab.h>
22 : : #include <linux/timer.h>
23 : : #include <linux/mm.h>
24 : : #include <linux/highmem.h>
25 : : #include <linux/hrtimer.h>
26 : : #include <linux/backing-dev.h>
27 : : #include <linux/bug.h>
28 : : #include <linux/module.h>
29 : : #include <linux/sched/mm.h>
30 : :
31 : : #include <trace/events/jbd2.h>
32 : :
33 : : static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
34 : : static void __jbd2_journal_unfile_buffer(struct journal_head *jh);
35 : :
36 : : static struct kmem_cache *transaction_cache;
37 : 3 : int __init jbd2_journal_init_transaction_cache(void)
38 : : {
39 : 3 : J_ASSERT(!transaction_cache);
40 : 3 : transaction_cache = kmem_cache_create("jbd2_transaction_s",
41 : : sizeof(transaction_t),
42 : : 0,
43 : : SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
44 : : NULL);
45 : 3 : if (!transaction_cache) {
46 : 0 : pr_emerg("JBD2: failed to create transaction cache\n");
47 : 0 : return -ENOMEM;
48 : : }
49 : : return 0;
50 : : }
51 : :
52 : 0 : void jbd2_journal_destroy_transaction_cache(void)
53 : : {
54 : 0 : kmem_cache_destroy(transaction_cache);
55 : 0 : transaction_cache = NULL;
56 : 0 : }
57 : :
58 : 3 : void jbd2_journal_free_transaction(transaction_t *transaction)
59 : : {
60 : 3 : if (unlikely(ZERO_OR_NULL_PTR(transaction)))
61 : 3 : return;
62 : 3 : kmem_cache_free(transaction_cache, transaction);
63 : : }
64 : :
65 : : /*
66 : : * jbd2_get_transaction: obtain a new transaction_t object.
67 : : *
68 : : * Simply initialise a new transaction. Initialize it in
69 : : * RUNNING state and add it to the current journal (which should not
70 : : * have an existing running transaction: we only make a new transaction
71 : : * once we have started to commit the old one).
72 : : *
73 : : * Preconditions:
74 : : * The journal MUST be locked. We don't perform atomic mallocs on the
75 : : * new transaction and we can't block without protecting against other
76 : : * processes trying to touch the journal while it is in transition.
77 : : *
78 : : */
79 : :
80 : 3 : static void jbd2_get_transaction(journal_t *journal,
81 : : transaction_t *transaction)
82 : : {
83 : 3 : transaction->t_journal = journal;
84 : 3 : transaction->t_state = T_RUNNING;
85 : 3 : transaction->t_start_time = ktime_get();
86 : 3 : transaction->t_tid = journal->j_transaction_sequence++;
87 : 3 : transaction->t_expires = jiffies + journal->j_commit_interval;
88 : 3 : spin_lock_init(&transaction->t_handle_lock);
89 : : atomic_set(&transaction->t_updates, 0);
90 : : atomic_set(&transaction->t_outstanding_credits,
91 : : atomic_read(&journal->j_reserved_credits));
92 : : atomic_set(&transaction->t_handle_count, 0);
93 : 3 : INIT_LIST_HEAD(&transaction->t_inode_list);
94 : 3 : INIT_LIST_HEAD(&transaction->t_private_list);
95 : :
96 : : /* Set up the commit timer for the new transaction. */
97 : 3 : journal->j_commit_timer.expires = round_jiffies_up(transaction->t_expires);
98 : 3 : add_timer(&journal->j_commit_timer);
99 : :
100 : 3 : J_ASSERT(journal->j_running_transaction == NULL);
101 : 3 : journal->j_running_transaction = transaction;
102 : 3 : transaction->t_max_wait = 0;
103 : 3 : transaction->t_start = jiffies;
104 : 3 : transaction->t_requested = 0;
105 : 3 : }
106 : :
107 : : /*
108 : : * Handle management.
109 : : *
110 : : * A handle_t is an object which represents a single atomic update to a
111 : : * filesystem, and which tracks all of the modifications which form part
112 : : * of that one update.
113 : : */
114 : :
115 : : /*
116 : : * Update transaction's maximum wait time, if debugging is enabled.
117 : : *
118 : : * In order for t_max_wait to be reliable, it must be protected by a
119 : : * lock. But doing so will mean that start_this_handle() can not be
120 : : * run in parallel on SMP systems, which limits our scalability. So
121 : : * unless debugging is enabled, we no longer update t_max_wait, which
122 : : * means that maximum wait time reported by the jbd2_run_stats
123 : : * tracepoint will always be zero.
124 : : */
125 : : static inline void update_t_max_wait(transaction_t *transaction,
126 : : unsigned long ts)
127 : : {
128 : : #ifdef CONFIG_JBD2_DEBUG
129 : : if (jbd2_journal_enable_debug &&
130 : : time_after(transaction->t_start, ts)) {
131 : : ts = jbd2_time_diff(ts, transaction->t_start);
132 : : spin_lock(&transaction->t_handle_lock);
133 : : if (ts > transaction->t_max_wait)
134 : : transaction->t_max_wait = ts;
135 : : spin_unlock(&transaction->t_handle_lock);
136 : : }
137 : : #endif
138 : : }
139 : :
140 : : /*
141 : : * Wait until running transaction passes to T_FLUSH state and new transaction
142 : : * can thus be started. Also starts the commit if needed. The function expects
143 : : * running transaction to exist and releases j_state_lock.
144 : : */
145 : 3 : static void wait_transaction_locked(journal_t *journal)
146 : : __releases(journal->j_state_lock)
147 : : {
148 : 3 : DEFINE_WAIT(wait);
149 : : int need_to_start;
150 : 3 : tid_t tid = journal->j_running_transaction->t_tid;
151 : :
152 : 3 : prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
153 : : TASK_UNINTERRUPTIBLE);
154 : 3 : need_to_start = !tid_geq(journal->j_commit_request, tid);
155 : : read_unlock(&journal->j_state_lock);
156 : 3 : if (need_to_start)
157 : 0 : jbd2_log_start_commit(journal, tid);
158 : : jbd2_might_wait_for_commit(journal);
159 : 3 : schedule();
160 : 3 : finish_wait(&journal->j_wait_transaction_locked, &wait);
161 : 3 : }
162 : :
163 : : /*
164 : : * Wait until running transaction transitions from T_SWITCH to T_FLUSH
165 : : * state and new transaction can thus be started. The function releases
166 : : * j_state_lock.
167 : : */
168 : 0 : static void wait_transaction_switching(journal_t *journal)
169 : : __releases(journal->j_state_lock)
170 : : {
171 : 0 : DEFINE_WAIT(wait);
172 : :
173 : 0 : if (WARN_ON(!journal->j_running_transaction ||
174 : : journal->j_running_transaction->t_state != T_SWITCH))
175 : 0 : return;
176 : 0 : prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
177 : : TASK_UNINTERRUPTIBLE);
178 : : read_unlock(&journal->j_state_lock);
179 : : /*
180 : : * We don't call jbd2_might_wait_for_commit() here as there's no
181 : : * waiting for outstanding handles happening anymore in T_SWITCH state
182 : : * and handling of reserved handles actually relies on that for
183 : : * correctness.
184 : : */
185 : 0 : schedule();
186 : 0 : finish_wait(&journal->j_wait_transaction_locked, &wait);
187 : : }
188 : :
189 : 0 : static void sub_reserved_credits(journal_t *journal, int blocks)
190 : : {
191 : 0 : atomic_sub(blocks, &journal->j_reserved_credits);
192 : 0 : wake_up(&journal->j_wait_reserved);
193 : 0 : }
194 : :
195 : : /*
196 : : * Wait until we can add credits for handle to the running transaction. Called
197 : : * with j_state_lock held for reading. Returns 0 if handle joined the running
198 : : * transaction. Returns 1 if we had to wait, j_state_lock is dropped, and
199 : : * caller must retry.
200 : : */
201 : 3 : static int add_transaction_credits(journal_t *journal, int blocks,
202 : : int rsv_blocks)
203 : : {
204 : 3 : transaction_t *t = journal->j_running_transaction;
205 : : int needed;
206 : 3 : int total = blocks + rsv_blocks;
207 : :
208 : : /*
209 : : * If the current transaction is locked down for commit, wait
210 : : * for the lock to be released.
211 : : */
212 : 3 : if (t->t_state != T_RUNNING) {
213 : 3 : WARN_ON_ONCE(t->t_state >= T_FLUSH);
214 : 3 : wait_transaction_locked(journal);
215 : 3 : return 1;
216 : : }
217 : :
218 : : /*
219 : : * If there is not enough space left in the log to write all
220 : : * potential buffers requested by this operation, we need to
221 : : * stall pending a log checkpoint to free some more log space.
222 : : */
223 : 3 : needed = atomic_add_return(total, &t->t_outstanding_credits);
224 : 3 : if (needed > journal->j_max_transaction_buffers) {
225 : : /*
226 : : * If the current transaction is already too large,
227 : : * then start to commit it: we can then go back and
228 : : * attach this handle to a new transaction.
229 : : */
230 : : atomic_sub(total, &t->t_outstanding_credits);
231 : :
232 : : /*
233 : : * Is the number of reserved credits in the current transaction too
234 : : * big to fit this handle? Wait until reserved credits are freed.
235 : : */
236 : 0 : if (atomic_read(&journal->j_reserved_credits) + total >
237 : 0 : journal->j_max_transaction_buffers) {
238 : : read_unlock(&journal->j_state_lock);
239 : : jbd2_might_wait_for_commit(journal);
240 : 0 : wait_event(journal->j_wait_reserved,
241 : : atomic_read(&journal->j_reserved_credits) + total <=
242 : : journal->j_max_transaction_buffers);
243 : : return 1;
244 : : }
245 : :
246 : 0 : wait_transaction_locked(journal);
247 : 0 : return 1;
248 : : }
249 : :
250 : : /*
251 : : * The commit code assumes that it can get enough log space
252 : : * without forcing a checkpoint. This is *critical* for
253 : : * correctness: a checkpoint of a buffer which is also
254 : : * associated with a committing transaction creates a deadlock,
255 : : * so commit simply cannot force through checkpoints.
256 : : *
257 : : * We must therefore ensure the necessary space in the journal
258 : : * *before* starting to dirty potentially checkpointed buffers
259 : : * in the new transaction.
260 : : */
261 : 3 : if (jbd2_log_space_left(journal) < jbd2_space_needed(journal)) {
262 : : atomic_sub(total, &t->t_outstanding_credits);
263 : : read_unlock(&journal->j_state_lock);
264 : : jbd2_might_wait_for_commit(journal);
265 : 0 : write_lock(&journal->j_state_lock);
266 : 0 : if (jbd2_log_space_left(journal) < jbd2_space_needed(journal))
267 : 0 : __jbd2_log_wait_for_space(journal);
268 : : write_unlock(&journal->j_state_lock);
269 : 0 : return 1;
270 : : }
271 : :
272 : : /* No reservation? We are done... */
273 : 3 : if (!rsv_blocks)
274 : : return 0;
275 : :
276 : 0 : needed = atomic_add_return(rsv_blocks, &journal->j_reserved_credits);
277 : : /* We allow at most half of a transaction to be reserved */
278 : 0 : if (needed > journal->j_max_transaction_buffers / 2) {
279 : 0 : sub_reserved_credits(journal, rsv_blocks);
280 : : atomic_sub(total, &t->t_outstanding_credits);
281 : : read_unlock(&journal->j_state_lock);
282 : : jbd2_might_wait_for_commit(journal);
283 : 0 : wait_event(journal->j_wait_reserved,
284 : : atomic_read(&journal->j_reserved_credits) + rsv_blocks
285 : : <= journal->j_max_transaction_buffers / 2);
286 : : return 1;
287 : : }
288 : : return 0;
289 : : }
290 : :
291 : : /*
292 : : * start_this_handle: Given a handle, deal with any locking or stalling
293 : : * needed to make sure that there is enough journal space for the handle
294 : : * to begin. Attach the handle to a transaction and set up the
295 : : * transaction's buffer credits.
296 : : */
297 : :
298 : 3 : static int start_this_handle(journal_t *journal, handle_t *handle,
299 : : gfp_t gfp_mask)
300 : : {
301 : : transaction_t *transaction, *new_transaction = NULL;
302 : 3 : int blocks = handle->h_buffer_credits;
303 : : int rsv_blocks = 0;
304 : 3 : unsigned long ts = jiffies;
305 : :
306 : 3 : if (handle->h_rsv_handle)
307 : 0 : rsv_blocks = handle->h_rsv_handle->h_buffer_credits;
308 : :
309 : : /*
310 : : * Limit the number of reserved credits to 1/2 of maximum transaction
311 : : * size and limit the number of total credits to not exceed maximum
312 : : * transaction size per operation.
313 : : */
314 : 3 : if ((rsv_blocks > journal->j_max_transaction_buffers / 2) ||
315 : 3 : (rsv_blocks + blocks > journal->j_max_transaction_buffers)) {
316 : 1 : printk(KERN_ERR "JBD2: %s wants too many credits "
317 : : "credits:%d rsv_credits:%d max:%d\n",
318 : 1 : current->comm, blocks, rsv_blocks,
319 : : journal->j_max_transaction_buffers);
320 : 0 : WARN_ON(1);
321 : 0 : return -ENOSPC;
322 : : }
323 : :
324 : : alloc_transaction:
325 : 3 : if (!journal->j_running_transaction) {
326 : : /*
327 : : * If __GFP_FS is not present, then we may be being called from
328 : : * inside the fs writeback layer, so we MUST NOT fail.
329 : : */
330 : 3 : if ((gfp_mask & __GFP_FS) == 0)
331 : 3 : gfp_mask |= __GFP_NOFAIL;
332 : 3 : new_transaction = kmem_cache_zalloc(transaction_cache,
333 : : gfp_mask);
334 : 3 : if (!new_transaction)
335 : : return -ENOMEM;
336 : : }
337 : :
338 : : jbd_debug(3, "New handle %p going live.\n", handle);
339 : :
340 : : /*
341 : : * We need to hold j_state_lock until t_updates has been incremented,
342 : : * for proper journal barrier handling
343 : : */
344 : : repeat:
345 : 3 : read_lock(&journal->j_state_lock);
346 : 3 : BUG_ON(journal->j_flags & JBD2_UNMOUNT);
347 : 3 : if (is_journal_aborted(journal) ||
348 : 3 : (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) {
349 : : read_unlock(&journal->j_state_lock);
350 : : jbd2_journal_free_transaction(new_transaction);
351 : : return -EROFS;
352 : : }
353 : :
354 : : /*
355 : : * Wait on the journal's transaction barrier if necessary. Specifically
356 : : * we allow reserved handles to proceed because otherwise commit could
357 : : * deadlock on page writeback not being able to complete.
358 : : */
359 : 3 : if (!handle->h_reserved && journal->j_barrier_count) {
360 : : read_unlock(&journal->j_state_lock);
361 : 0 : wait_event(journal->j_wait_transaction_locked,
362 : : journal->j_barrier_count == 0);
363 : : goto repeat;
364 : : }
365 : :
366 : 3 : if (!journal->j_running_transaction) {
367 : : read_unlock(&journal->j_state_lock);
368 : 3 : if (!new_transaction)
369 : : goto alloc_transaction;
370 : 3 : write_lock(&journal->j_state_lock);
371 : 3 : if (!journal->j_running_transaction &&
372 : 3 : (handle->h_reserved || !journal->j_barrier_count)) {
373 : 3 : jbd2_get_transaction(journal, new_transaction);
374 : : new_transaction = NULL;
375 : : }
376 : : write_unlock(&journal->j_state_lock);
377 : : goto repeat;
378 : : }
379 : :
380 : : transaction = journal->j_running_transaction;
381 : :
382 : 3 : if (!handle->h_reserved) {
383 : : /* We may have dropped j_state_lock - restart in that case */
384 : 3 : if (add_transaction_credits(journal, blocks, rsv_blocks))
385 : : goto repeat;
386 : : } else {
387 : : /*
388 : : * We have handle reserved so we are allowed to join T_LOCKED
389 : : * transaction and we don't have to check for transaction size
390 : : * and journal space. But we still have to wait while running
391 : : * transaction is being switched to a committing one as it
392 : : * won't wait for any handles anymore.
393 : : */
394 : 0 : if (transaction->t_state == T_SWITCH) {
395 : 0 : wait_transaction_switching(journal);
396 : 0 : goto repeat;
397 : : }
398 : 0 : sub_reserved_credits(journal, blocks);
399 : 0 : handle->h_reserved = 0;
400 : : }
401 : :
402 : : /* OK, account for the buffers that this operation expects to
403 : : * use and add the handle to the running transaction.
404 : : */
405 : : update_t_max_wait(transaction, ts);
406 : 3 : handle->h_transaction = transaction;
407 : 3 : handle->h_requested_credits = blocks;
408 : 3 : handle->h_start_jiffies = jiffies;
409 : 3 : atomic_inc(&transaction->t_updates);
410 : 3 : atomic_inc(&transaction->t_handle_count);
411 : : jbd_debug(4, "Handle %p given %d credits (total %d, free %lu)\n",
412 : : handle, blocks,
413 : : atomic_read(&transaction->t_outstanding_credits),
414 : : jbd2_log_space_left(journal));
415 : : read_unlock(&journal->j_state_lock);
416 : 3 : current->journal_info = handle;
417 : :
418 : : rwsem_acquire_read(&journal->j_trans_commit_map, 0, 0, _THIS_IP_);
419 : : jbd2_journal_free_transaction(new_transaction);
420 : : /*
421 : : * Ensure that no allocations done while the transaction is open are
422 : : * going to recurse back to the fs layer.
423 : : */
424 : 3 : handle->saved_alloc_context = memalloc_nofs_save();
425 : 3 : return 0;
426 : : }
427 : :
428 : : /* Allocate a new handle. This should probably be in a slab... */
429 : 3 : static handle_t *new_handle(int nblocks)
430 : : {
431 : : handle_t *handle = jbd2_alloc_handle(GFP_NOFS);
432 : 3 : if (!handle)
433 : : return NULL;
434 : 3 : handle->h_buffer_credits = nblocks;
435 : 3 : handle->h_ref = 1;
436 : :
437 : 3 : return handle;
438 : : }
439 : :
440 : 3 : handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int rsv_blocks,
441 : : gfp_t gfp_mask, unsigned int type,
442 : : unsigned int line_no)
443 : : {
444 : : handle_t *handle = journal_current_handle();
445 : : int err;
446 : :
447 : 3 : if (!journal)
448 : : return ERR_PTR(-EROFS);
449 : :
450 : 3 : if (handle) {
451 : 3 : J_ASSERT(handle->h_transaction->t_journal == journal);
452 : 3 : handle->h_ref++;
453 : 3 : return handle;
454 : : }
455 : :
456 : 3 : handle = new_handle(nblocks);
457 : 3 : if (!handle)
458 : : return ERR_PTR(-ENOMEM);
459 : 3 : if (rsv_blocks) {
460 : : handle_t *rsv_handle;
461 : :
462 : 0 : rsv_handle = new_handle(rsv_blocks);
463 : 0 : if (!rsv_handle) {
464 : : jbd2_free_handle(handle);
465 : 0 : return ERR_PTR(-ENOMEM);
466 : : }
467 : 0 : rsv_handle->h_reserved = 1;
468 : 0 : rsv_handle->h_journal = journal;
469 : 0 : handle->h_rsv_handle = rsv_handle;
470 : : }
471 : :
472 : 3 : err = start_this_handle(journal, handle, gfp_mask);
473 : 3 : if (err < 0) {
474 : 0 : if (handle->h_rsv_handle)
475 : : jbd2_free_handle(handle->h_rsv_handle);
476 : : jbd2_free_handle(handle);
477 : 0 : return ERR_PTR(err);
478 : : }
479 : 3 : handle->h_type = type;
480 : 3 : handle->h_line_no = line_no;
481 : 3 : trace_jbd2_handle_start(journal->j_fs_dev->bd_dev,
482 : 3 : handle->h_transaction->t_tid, type,
483 : : line_no, nblocks);
484 : :
485 : 3 : return handle;
486 : : }
487 : : EXPORT_SYMBOL(jbd2__journal_start);
488 : :
489 : :
490 : : /**
491 : : * handle_t *jbd2_journal_start() - Obtain a new handle.
492 : : * @journal: Journal to start transaction on.
493 : : * @nblocks: number of block buffer we might modify
494 : : *
495 : : * We make sure that the transaction can guarantee at least nblocks of
496 : : * modified buffers in the log. We block until the log can guarantee
497 : : * that much space. Additionally, if rsv_blocks > 0, we also create another
498 : : * handle with rsv_blocks reserved blocks in the journal. This handle is
499 : : * is stored in h_rsv_handle. It is not attached to any particular transaction
500 : : * and thus doesn't block transaction commit. If the caller uses this reserved
501 : : * handle, it has to set h_rsv_handle to NULL as otherwise jbd2_journal_stop()
502 : : * on the parent handle will dispose the reserved one. Reserved handle has to
503 : : * be converted to a normal handle using jbd2_journal_start_reserved() before
504 : : * it can be used.
505 : : *
506 : : * Return a pointer to a newly allocated handle, or an ERR_PTR() value
507 : : * on failure.
508 : : */
509 : 0 : handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
510 : : {
511 : 0 : return jbd2__journal_start(journal, nblocks, 0, GFP_NOFS, 0, 0);
512 : : }
513 : : EXPORT_SYMBOL(jbd2_journal_start);
514 : :
515 : 0 : void jbd2_journal_free_reserved(handle_t *handle)
516 : : {
517 : 0 : journal_t *journal = handle->h_journal;
518 : :
519 : 0 : WARN_ON(!handle->h_reserved);
520 : 0 : sub_reserved_credits(journal, handle->h_buffer_credits);
521 : : jbd2_free_handle(handle);
522 : 0 : }
523 : : EXPORT_SYMBOL(jbd2_journal_free_reserved);
524 : :
525 : : /**
526 : : * int jbd2_journal_start_reserved() - start reserved handle
527 : : * @handle: handle to start
528 : : * @type: for handle statistics
529 : : * @line_no: for handle statistics
530 : : *
531 : : * Start handle that has been previously reserved with jbd2_journal_reserve().
532 : : * This attaches @handle to the running transaction (or creates one if there's
533 : : * not transaction running). Unlike jbd2_journal_start() this function cannot
534 : : * block on journal commit, checkpointing, or similar stuff. It can block on
535 : : * memory allocation or frozen journal though.
536 : : *
537 : : * Return 0 on success, non-zero on error - handle is freed in that case.
538 : : */
539 : 0 : int jbd2_journal_start_reserved(handle_t *handle, unsigned int type,
540 : : unsigned int line_no)
541 : : {
542 : 0 : journal_t *journal = handle->h_journal;
543 : : int ret = -EIO;
544 : :
545 : 0 : if (WARN_ON(!handle->h_reserved)) {
546 : : /* Someone passed in normal handle? Just stop it. */
547 : 0 : jbd2_journal_stop(handle);
548 : 0 : return ret;
549 : : }
550 : : /*
551 : : * Usefulness of mixing of reserved and unreserved handles is
552 : : * questionable. So far nobody seems to need it so just error out.
553 : : */
554 : 0 : if (WARN_ON(current->journal_info)) {
555 : 0 : jbd2_journal_free_reserved(handle);
556 : 0 : return ret;
557 : : }
558 : :
559 : 0 : handle->h_journal = NULL;
560 : : /*
561 : : * GFP_NOFS is here because callers are likely from writeback or
562 : : * similarly constrained call sites
563 : : */
564 : 0 : ret = start_this_handle(journal, handle, GFP_NOFS);
565 : 0 : if (ret < 0) {
566 : 0 : handle->h_journal = journal;
567 : 0 : jbd2_journal_free_reserved(handle);
568 : 0 : return ret;
569 : : }
570 : 0 : handle->h_type = type;
571 : 0 : handle->h_line_no = line_no;
572 : 0 : trace_jbd2_handle_start(journal->j_fs_dev->bd_dev,
573 : 0 : handle->h_transaction->t_tid, type,
574 : : line_no, handle->h_buffer_credits);
575 : 0 : return 0;
576 : : }
577 : : EXPORT_SYMBOL(jbd2_journal_start_reserved);
578 : :
579 : : /**
580 : : * int jbd2_journal_extend() - extend buffer credits.
581 : : * @handle: handle to 'extend'
582 : : * @nblocks: nr blocks to try to extend by.
583 : : *
584 : : * Some transactions, such as large extends and truncates, can be done
585 : : * atomically all at once or in several stages. The operation requests
586 : : * a credit for a number of buffer modifications in advance, but can
587 : : * extend its credit if it needs more.
588 : : *
589 : : * jbd2_journal_extend tries to give the running handle more buffer credits.
590 : : * It does not guarantee that allocation - this is a best-effort only.
591 : : * The calling process MUST be able to deal cleanly with a failure to
592 : : * extend here.
593 : : *
594 : : * Return 0 on success, non-zero on failure.
595 : : *
596 : : * return code < 0 implies an error
597 : : * return code > 0 implies normal transaction-full status.
598 : : */
599 : 1 : int jbd2_journal_extend(handle_t *handle, int nblocks)
600 : : {
601 : 1 : transaction_t *transaction = handle->h_transaction;
602 : : journal_t *journal;
603 : : int result;
604 : : int wanted;
605 : :
606 : 1 : if (is_handle_aborted(handle))
607 : : return -EROFS;
608 : 1 : journal = transaction->t_journal;
609 : :
610 : : result = 1;
611 : :
612 : 1 : read_lock(&journal->j_state_lock);
613 : :
614 : : /* Don't extend a locked-down transaction! */
615 : 1 : if (transaction->t_state != T_RUNNING) {
616 : : jbd_debug(3, "denied handle %p %d blocks: "
617 : : "transaction not running\n", handle, nblocks);
618 : : goto error_out;
619 : : }
620 : :
621 : : spin_lock(&transaction->t_handle_lock);
622 : 1 : wanted = atomic_add_return(nblocks,
623 : : &transaction->t_outstanding_credits);
624 : :
625 : 1 : if (wanted > journal->j_max_transaction_buffers) {
626 : : jbd_debug(3, "denied handle %p %d blocks: "
627 : : "transaction too large\n", handle, nblocks);
628 : : atomic_sub(nblocks, &transaction->t_outstanding_credits);
629 : : goto unlock;
630 : : }
631 : :
632 : 1 : if (wanted + (wanted >> JBD2_CONTROL_BLOCKS_SHIFT) >
633 : : jbd2_log_space_left(journal)) {
634 : : jbd_debug(3, "denied handle %p %d blocks: "
635 : : "insufficient log space\n", handle, nblocks);
636 : : atomic_sub(nblocks, &transaction->t_outstanding_credits);
637 : : goto unlock;
638 : : }
639 : :
640 : 1 : trace_jbd2_handle_extend(journal->j_fs_dev->bd_dev,
641 : 1 : transaction->t_tid,
642 : 1 : handle->h_type, handle->h_line_no,
643 : : handle->h_buffer_credits,
644 : : nblocks);
645 : :
646 : 1 : handle->h_buffer_credits += nblocks;
647 : 1 : handle->h_requested_credits += nblocks;
648 : : result = 0;
649 : :
650 : : jbd_debug(3, "extended handle %p by %d\n", handle, nblocks);
651 : : unlock:
652 : : spin_unlock(&transaction->t_handle_lock);
653 : : error_out:
654 : : read_unlock(&journal->j_state_lock);
655 : 1 : return result;
656 : : }
657 : :
658 : :
659 : : /**
660 : : * int jbd2_journal_restart() - restart a handle .
661 : : * @handle: handle to restart
662 : : * @nblocks: nr credits requested
663 : : * @gfp_mask: memory allocation flags (for start_this_handle)
664 : : *
665 : : * Restart a handle for a multi-transaction filesystem
666 : : * operation.
667 : : *
668 : : * If the jbd2_journal_extend() call above fails to grant new buffer credits
669 : : * to a running handle, a call to jbd2_journal_restart will commit the
670 : : * handle's transaction so far and reattach the handle to a new
671 : : * transaction capable of guaranteeing the requested number of
672 : : * credits. We preserve reserved handle if there's any attached to the
673 : : * passed in handle.
674 : : */
675 : 0 : int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask)
676 : : {
677 : 0 : transaction_t *transaction = handle->h_transaction;
678 : : journal_t *journal;
679 : : tid_t tid;
680 : : int need_to_start, ret;
681 : :
682 : : /* If we've had an abort of any type, don't even think about
683 : : * actually doing the restart! */
684 : 0 : if (is_handle_aborted(handle))
685 : : return 0;
686 : 0 : journal = transaction->t_journal;
687 : :
688 : : /*
689 : : * First unlink the handle from its current transaction, and start the
690 : : * commit on that.
691 : : */
692 : 0 : J_ASSERT(atomic_read(&transaction->t_updates) > 0);
693 : 0 : J_ASSERT(journal_current_handle() == handle);
694 : :
695 : 0 : read_lock(&journal->j_state_lock);
696 : : spin_lock(&transaction->t_handle_lock);
697 : 0 : atomic_sub(handle->h_buffer_credits,
698 : : &transaction->t_outstanding_credits);
699 : 0 : if (handle->h_rsv_handle) {
700 : 0 : sub_reserved_credits(journal,
701 : : handle->h_rsv_handle->h_buffer_credits);
702 : : }
703 : 0 : if (atomic_dec_and_test(&transaction->t_updates))
704 : 0 : wake_up(&journal->j_wait_updates);
705 : 0 : tid = transaction->t_tid;
706 : : spin_unlock(&transaction->t_handle_lock);
707 : 0 : handle->h_transaction = NULL;
708 : 0 : current->journal_info = NULL;
709 : :
710 : : jbd_debug(2, "restarting handle %p\n", handle);
711 : 0 : need_to_start = !tid_geq(journal->j_commit_request, tid);
712 : : read_unlock(&journal->j_state_lock);
713 : 0 : if (need_to_start)
714 : 0 : jbd2_log_start_commit(journal, tid);
715 : :
716 : : rwsem_release(&journal->j_trans_commit_map, 1, _THIS_IP_);
717 : 0 : handle->h_buffer_credits = nblocks;
718 : : /*
719 : : * Restore the original nofs context because the journal restart
720 : : * is basically the same thing as journal stop and start.
721 : : * start_this_handle will start a new nofs context.
722 : : */
723 : 0 : memalloc_nofs_restore(handle->saved_alloc_context);
724 : 0 : ret = start_this_handle(journal, handle, gfp_mask);
725 : 0 : return ret;
726 : : }
727 : : EXPORT_SYMBOL(jbd2__journal_restart);
728 : :
729 : :
730 : 0 : int jbd2_journal_restart(handle_t *handle, int nblocks)
731 : : {
732 : 0 : return jbd2__journal_restart(handle, nblocks, GFP_NOFS);
733 : : }
734 : : EXPORT_SYMBOL(jbd2_journal_restart);
735 : :
736 : : /**
737 : : * void jbd2_journal_lock_updates () - establish a transaction barrier.
738 : : * @journal: Journal to establish a barrier on.
739 : : *
740 : : * This locks out any further updates from being started, and blocks
741 : : * until all existing updates have completed, returning only once the
742 : : * journal is in a quiescent state with no updates running.
743 : : *
744 : : * The journal lock should not be held on entry.
745 : : */
746 : 1 : void jbd2_journal_lock_updates(journal_t *journal)
747 : : {
748 : 1 : DEFINE_WAIT(wait);
749 : :
750 : : jbd2_might_wait_for_commit(journal);
751 : :
752 : 1 : write_lock(&journal->j_state_lock);
753 : 1 : ++journal->j_barrier_count;
754 : :
755 : : /* Wait until there are no reserved handles */
756 : 1 : if (atomic_read(&journal->j_reserved_credits)) {
757 : : write_unlock(&journal->j_state_lock);
758 : 0 : wait_event(journal->j_wait_reserved,
759 : : atomic_read(&journal->j_reserved_credits) == 0);
760 : 0 : write_lock(&journal->j_state_lock);
761 : : }
762 : :
763 : : /* Wait until there are no running updates */
764 : : while (1) {
765 : 1 : transaction_t *transaction = journal->j_running_transaction;
766 : :
767 : 1 : if (!transaction)
768 : : break;
769 : :
770 : : spin_lock(&transaction->t_handle_lock);
771 : 0 : prepare_to_wait(&journal->j_wait_updates, &wait,
772 : : TASK_UNINTERRUPTIBLE);
773 : 0 : if (!atomic_read(&transaction->t_updates)) {
774 : : spin_unlock(&transaction->t_handle_lock);
775 : 0 : finish_wait(&journal->j_wait_updates, &wait);
776 : 0 : break;
777 : : }
778 : : spin_unlock(&transaction->t_handle_lock);
779 : : write_unlock(&journal->j_state_lock);
780 : 0 : schedule();
781 : 0 : finish_wait(&journal->j_wait_updates, &wait);
782 : 0 : write_lock(&journal->j_state_lock);
783 : 0 : }
784 : : write_unlock(&journal->j_state_lock);
785 : :
786 : : /*
787 : : * We have now established a barrier against other normal updates, but
788 : : * we also need to barrier against other jbd2_journal_lock_updates() calls
789 : : * to make sure that we serialise special journal-locked operations
790 : : * too.
791 : : */
792 : 1 : mutex_lock(&journal->j_barrier);
793 : 1 : }
794 : :
795 : : /**
796 : : * void jbd2_journal_unlock_updates (journal_t* journal) - release barrier
797 : : * @journal: Journal to release the barrier on.
798 : : *
799 : : * Release a transaction barrier obtained with jbd2_journal_lock_updates().
800 : : *
801 : : * Should be called without the journal lock held.
802 : : */
803 : 1 : void jbd2_journal_unlock_updates (journal_t *journal)
804 : : {
805 : 1 : J_ASSERT(journal->j_barrier_count != 0);
806 : :
807 : 1 : mutex_unlock(&journal->j_barrier);
808 : 1 : write_lock(&journal->j_state_lock);
809 : 1 : --journal->j_barrier_count;
810 : : write_unlock(&journal->j_state_lock);
811 : 1 : wake_up(&journal->j_wait_transaction_locked);
812 : 1 : }
813 : :
814 : : static void warn_dirty_buffer(struct buffer_head *bh)
815 : : {
816 : 0 : printk(KERN_WARNING
817 : : "JBD2: Spotted dirty metadata buffer (dev = %pg, blocknr = %llu). "
818 : : "There's a risk of filesystem corruption in case of system "
819 : : "crash.\n",
820 : : bh->b_bdev, (unsigned long long)bh->b_blocknr);
821 : : }
822 : :
823 : : /* Call t_frozen trigger and copy buffer data into jh->b_frozen_data. */
824 : 3 : static void jbd2_freeze_jh_data(struct journal_head *jh)
825 : : {
826 : : struct page *page;
827 : : int offset;
828 : : char *source;
829 : : struct buffer_head *bh = jh2bh(jh);
830 : :
831 : 3 : J_EXPECT_JH(jh, buffer_uptodate(bh), "Possible IO failure.\n");
832 : 3 : page = bh->b_page;
833 : 3 : offset = offset_in_page(bh->b_data);
834 : 3 : source = kmap_atomic(page);
835 : : /* Fire data frozen trigger just before we copy the data */
836 : 3 : jbd2_buffer_frozen_trigger(jh, source + offset, jh->b_triggers);
837 : 3 : memcpy(jh->b_frozen_data, source + offset, bh->b_size);
838 : : kunmap_atomic(source);
839 : :
840 : : /*
841 : : * Now that the frozen data is saved off, we need to store any matching
842 : : * triggers.
843 : : */
844 : 3 : jh->b_frozen_triggers = jh->b_triggers;
845 : 3 : }
846 : :
847 : : /*
848 : : * If the buffer is already part of the current transaction, then there
849 : : * is nothing we need to do. If it is already part of a prior
850 : : * transaction which we are still committing to disk, then we need to
851 : : * make sure that we do not overwrite the old copy: we do copy-out to
852 : : * preserve the copy going to disk. We also account the buffer against
853 : : * the handle's metadata buffer credits (unless the buffer is already
854 : : * part of the transaction, that is).
855 : : *
856 : : */
857 : : static int
858 : 3 : do_get_write_access(handle_t *handle, struct journal_head *jh,
859 : : int force_copy)
860 : : {
861 : : struct buffer_head *bh;
862 : 3 : transaction_t *transaction = handle->h_transaction;
863 : : journal_t *journal;
864 : : int error;
865 : : char *frozen_buffer = NULL;
866 : : unsigned long start_lock, time_lock;
867 : :
868 : 3 : journal = transaction->t_journal;
869 : :
870 : : jbd_debug(5, "journal_head %p, force_copy %d\n", jh, force_copy);
871 : :
872 : : JBUFFER_TRACE(jh, "entry");
873 : : repeat:
874 : : bh = jh2bh(jh);
875 : :
876 : : /* @@@ Need to check for errors here at some point. */
877 : :
878 : 3 : start_lock = jiffies;
879 : 3 : lock_buffer(bh);
880 : : jbd_lock_bh_state(bh);
881 : :
882 : : /* If it takes too long to lock the buffer, trace it */
883 : 3 : time_lock = jbd2_time_diff(start_lock, jiffies);
884 : 3 : if (time_lock > HZ/10)
885 : 3 : trace_jbd2_lock_buffer_stall(bh->b_bdev->bd_dev,
886 : 3 : jiffies_to_msecs(time_lock));
887 : :
888 : : /* We now hold the buffer lock so it is safe to query the buffer
889 : : * state. Is the buffer dirty?
890 : : *
891 : : * If so, there are two possibilities. The buffer may be
892 : : * non-journaled, and undergoing a quite legitimate writeback.
893 : : * Otherwise, it is journaled, and we don't expect dirty buffers
894 : : * in that state (the buffers should be marked JBD_Dirty
895 : : * instead.) So either the IO is being done under our own
896 : : * control and this is a bug, or it's a third party IO such as
897 : : * dump(8) (which may leave the buffer scheduled for read ---
898 : : * ie. locked but not dirty) or tune2fs (which may actually have
899 : : * the buffer dirtied, ugh.) */
900 : :
901 : 3 : if (buffer_dirty(bh)) {
902 : : /*
903 : : * First question: is this buffer already part of the current
904 : : * transaction or the existing committing transaction?
905 : : */
906 : 3 : if (jh->b_transaction) {
907 : 0 : J_ASSERT_JH(jh,
908 : : jh->b_transaction == transaction ||
909 : : jh->b_transaction ==
910 : : journal->j_committing_transaction);
911 : 0 : if (jh->b_next_transaction)
912 : 0 : J_ASSERT_JH(jh, jh->b_next_transaction ==
913 : : transaction);
914 : : warn_dirty_buffer(bh);
915 : : }
916 : : /*
917 : : * In any case we need to clean the dirty flag and we must
918 : : * do it under the buffer lock to be sure we don't race
919 : : * with running write-out.
920 : : */
921 : : JBUFFER_TRACE(jh, "Journalling dirty buffer");
922 : : clear_buffer_dirty(bh);
923 : : set_buffer_jbddirty(bh);
924 : : }
925 : :
926 : 3 : unlock_buffer(bh);
927 : :
928 : : error = -EROFS;
929 : 3 : if (is_handle_aborted(handle)) {
930 : : jbd_unlock_bh_state(bh);
931 : : goto out;
932 : : }
933 : : error = 0;
934 : :
935 : : /*
936 : : * The buffer is already part of this transaction if b_transaction or
937 : : * b_next_transaction points to it
938 : : */
939 : 3 : if (jh->b_transaction == transaction ||
940 : 3 : jh->b_next_transaction == transaction)
941 : : goto done;
942 : :
943 : : /*
944 : : * this is the first time this transaction is touching this buffer,
945 : : * reset the modified flag
946 : : */
947 : 3 : jh->b_modified = 0;
948 : :
949 : : /*
950 : : * If the buffer is not journaled right now, we need to make sure it
951 : : * doesn't get written to disk before the caller actually commits the
952 : : * new data
953 : : */
954 : 3 : if (!jh->b_transaction) {
955 : : JBUFFER_TRACE(jh, "no transaction");
956 : 3 : J_ASSERT_JH(jh, !jh->b_next_transaction);
957 : : JBUFFER_TRACE(jh, "file as BJ_Reserved");
958 : : /*
959 : : * Make sure all stores to jh (b_modified, b_frozen_data) are
960 : : * visible before attaching it to the running transaction.
961 : : * Paired with barrier in jbd2_write_access_granted()
962 : : */
963 : 3 : smp_wmb();
964 : : spin_lock(&journal->j_list_lock);
965 : 3 : __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
966 : : spin_unlock(&journal->j_list_lock);
967 : : goto done;
968 : : }
969 : : /*
970 : : * If there is already a copy-out version of this buffer, then we don't
971 : : * need to make another one
972 : : */
973 : 3 : if (jh->b_frozen_data) {
974 : : JBUFFER_TRACE(jh, "has frozen data");
975 : 0 : J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
976 : : goto attach_next;
977 : : }
978 : :
979 : : JBUFFER_TRACE(jh, "owned by older transaction");
980 : 3 : J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
981 : 3 : J_ASSERT_JH(jh, jh->b_transaction == journal->j_committing_transaction);
982 : :
983 : : /*
984 : : * There is one case we have to be very careful about. If the
985 : : * committing transaction is currently writing this buffer out to disk
986 : : * and has NOT made a copy-out, then we cannot modify the buffer
987 : : * contents at all right now. The essence of copy-out is that it is
988 : : * the extra copy, not the primary copy, which gets journaled. If the
989 : : * primary copy is already going to disk then we cannot do copy-out
990 : : * here.
991 : : */
992 : 3 : if (buffer_shadow(bh)) {
993 : : JBUFFER_TRACE(jh, "on shadow: sleep");
994 : : jbd_unlock_bh_state(bh);
995 : 3 : wait_on_bit_io(&bh->b_state, BH_Shadow, TASK_UNINTERRUPTIBLE);
996 : 3 : goto repeat;
997 : : }
998 : :
999 : : /*
1000 : : * Only do the copy if the currently-owning transaction still needs it.
1001 : : * If buffer isn't on BJ_Metadata list, the committing transaction is
1002 : : * past that stage (here we use the fact that BH_Shadow is set under
1003 : : * bh_state lock together with refiling to BJ_Shadow list and at this
1004 : : * point we know the buffer doesn't have BH_Shadow set).
1005 : : *
1006 : : * Subtle point, though: if this is a get_undo_access, then we will be
1007 : : * relying on the frozen_data to contain the new value of the
1008 : : * committed_data record after the transaction, so we HAVE to force the
1009 : : * frozen_data copy in that case.
1010 : : */
1011 : 3 : if (jh->b_jlist == BJ_Metadata || force_copy) {
1012 : : JBUFFER_TRACE(jh, "generate frozen data");
1013 : 3 : if (!frozen_buffer) {
1014 : : JBUFFER_TRACE(jh, "allocate memory for buffer");
1015 : : jbd_unlock_bh_state(bh);
1016 : 3 : frozen_buffer = jbd2_alloc(jh2bh(jh)->b_size,
1017 : : GFP_NOFS | __GFP_NOFAIL);
1018 : 3 : goto repeat;
1019 : : }
1020 : 3 : jh->b_frozen_data = frozen_buffer;
1021 : : frozen_buffer = NULL;
1022 : 3 : jbd2_freeze_jh_data(jh);
1023 : : }
1024 : : attach_next:
1025 : : /*
1026 : : * Make sure all stores to jh (b_modified, b_frozen_data) are visible
1027 : : * before attaching it to the running transaction. Paired with barrier
1028 : : * in jbd2_write_access_granted()
1029 : : */
1030 : 3 : smp_wmb();
1031 : 3 : jh->b_next_transaction = transaction;
1032 : :
1033 : : done:
1034 : : jbd_unlock_bh_state(bh);
1035 : :
1036 : : /*
1037 : : * If we are about to journal a buffer, then any revoke pending on it is
1038 : : * no longer valid
1039 : : */
1040 : 3 : jbd2_journal_cancel_revoke(handle, jh);
1041 : :
1042 : : out:
1043 : 3 : if (unlikely(frozen_buffer)) /* It's usually NULL */
1044 : 3 : jbd2_free(frozen_buffer, bh->b_size);
1045 : :
1046 : : JBUFFER_TRACE(jh, "exit");
1047 : 3 : return error;
1048 : : }
1049 : :
1050 : : /* Fast check whether buffer is already attached to the required transaction */
1051 : 3 : static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh,
1052 : : bool undo)
1053 : : {
1054 : : struct journal_head *jh;
1055 : : bool ret = false;
1056 : :
1057 : : /* Dirty buffers require special handling... */
1058 : 3 : if (buffer_dirty(bh))
1059 : : return false;
1060 : :
1061 : : /*
1062 : : * RCU protects us from dereferencing freed pages. So the checks we do
1063 : : * are guaranteed not to oops. However the jh slab object can get freed
1064 : : * & reallocated while we work with it. So we have to be careful. When
1065 : : * we see jh attached to the running transaction, we know it must stay
1066 : : * so until the transaction is committed. Thus jh won't be freed and
1067 : : * will be attached to the same bh while we run. However it can
1068 : : * happen jh gets freed, reallocated, and attached to the transaction
1069 : : * just after we get pointer to it from bh. So we have to be careful
1070 : : * and recheck jh still belongs to our bh before we return success.
1071 : : */
1072 : : rcu_read_lock();
1073 : 3 : if (!buffer_jbd(bh))
1074 : : goto out;
1075 : : /* This should be bh2jh() but that doesn't work with inline functions */
1076 : : jh = READ_ONCE(bh->b_private);
1077 : 3 : if (!jh)
1078 : : goto out;
1079 : : /* For undo access buffer must have data copied */
1080 : 3 : if (undo && !jh->b_committed_data)
1081 : : goto out;
1082 : 3 : if (READ_ONCE(jh->b_transaction) != handle->h_transaction &&
1083 : 3 : READ_ONCE(jh->b_next_transaction) != handle->h_transaction)
1084 : : goto out;
1085 : : /*
1086 : : * There are two reasons for the barrier here:
1087 : : * 1) Make sure to fetch b_bh after we did previous checks so that we
1088 : : * detect when jh went through free, realloc, attach to transaction
1089 : : * while we were checking. Paired with implicit barrier in that path.
1090 : : * 2) So that access to bh done after jbd2_write_access_granted()
1091 : : * doesn't get reordered and see inconsistent state of concurrent
1092 : : * do_get_write_access().
1093 : : */
1094 : 3 : smp_mb();
1095 : 3 : if (unlikely(jh->b_bh != bh))
1096 : : goto out;
1097 : : ret = true;
1098 : : out:
1099 : : rcu_read_unlock();
1100 : 3 : return ret;
1101 : : }
1102 : :
1103 : : /**
1104 : : * int jbd2_journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update.
1105 : : * @handle: transaction to add buffer modifications to
1106 : : * @bh: bh to be used for metadata writes
1107 : : *
1108 : : * Returns: error code or 0 on success.
1109 : : *
1110 : : * In full data journalling mode the buffer may be of type BJ_AsyncData,
1111 : : * because we're ``write()ing`` a buffer which is also part of a shared mapping.
1112 : : */
1113 : :
1114 : 3 : int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh)
1115 : : {
1116 : : struct journal_head *jh;
1117 : : int rc;
1118 : :
1119 : 3 : if (is_handle_aborted(handle))
1120 : : return -EROFS;
1121 : :
1122 : 3 : if (jbd2_write_access_granted(handle, bh, false))
1123 : : return 0;
1124 : :
1125 : 3 : jh = jbd2_journal_add_journal_head(bh);
1126 : : /* We do not want to get caught playing with fields which the
1127 : : * log thread also manipulates. Make sure that the buffer
1128 : : * completes any outstanding IO before proceeding. */
1129 : 3 : rc = do_get_write_access(handle, jh, 0);
1130 : 3 : jbd2_journal_put_journal_head(jh);
1131 : 3 : return rc;
1132 : : }
1133 : :
1134 : :
1135 : : /*
1136 : : * When the user wants to journal a newly created buffer_head
1137 : : * (ie. getblk() returned a new buffer and we are going to populate it
1138 : : * manually rather than reading off disk), then we need to keep the
1139 : : * buffer_head locked until it has been completely filled with new
1140 : : * data. In this case, we should be able to make the assertion that
1141 : : * the bh is not already part of an existing transaction.
1142 : : *
1143 : : * The buffer should already be locked by the caller by this point.
1144 : : * There is no lock ranking violation: it was a newly created,
1145 : : * unlocked buffer beforehand. */
1146 : :
1147 : : /**
1148 : : * int jbd2_journal_get_create_access () - notify intent to use newly created bh
1149 : : * @handle: transaction to new buffer to
1150 : : * @bh: new buffer.
1151 : : *
1152 : : * Call this if you create a new bh.
1153 : : */
1154 : 3 : int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
1155 : : {
1156 : 3 : transaction_t *transaction = handle->h_transaction;
1157 : : journal_t *journal;
1158 : 3 : struct journal_head *jh = jbd2_journal_add_journal_head(bh);
1159 : : int err;
1160 : :
1161 : : jbd_debug(5, "journal_head %p\n", jh);
1162 : : err = -EROFS;
1163 : 3 : if (is_handle_aborted(handle))
1164 : : goto out;
1165 : 3 : journal = transaction->t_journal;
1166 : : err = 0;
1167 : :
1168 : : JBUFFER_TRACE(jh, "entry");
1169 : : /*
1170 : : * The buffer may already belong to this transaction due to pre-zeroing
1171 : : * in the filesystem's new_block code. It may also be on the previous,
1172 : : * committing transaction's lists, but it HAS to be in Forget state in
1173 : : * that case: the transaction must have deleted the buffer for it to be
1174 : : * reused here.
1175 : : */
1176 : : jbd_lock_bh_state(bh);
1177 : 3 : J_ASSERT_JH(jh, (jh->b_transaction == transaction ||
1178 : : jh->b_transaction == NULL ||
1179 : : (jh->b_transaction == journal->j_committing_transaction &&
1180 : : jh->b_jlist == BJ_Forget)));
1181 : :
1182 : 3 : J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
1183 : 3 : J_ASSERT_JH(jh, buffer_locked(jh2bh(jh)));
1184 : :
1185 : 3 : if (jh->b_transaction == NULL) {
1186 : : /*
1187 : : * Previous jbd2_journal_forget() could have left the buffer
1188 : : * with jbddirty bit set because it was being committed. When
1189 : : * the commit finished, we've filed the buffer for
1190 : : * checkpointing and marked it dirty. Now we are reallocating
1191 : : * the buffer so the transaction freeing it must have
1192 : : * committed and so it's safe to clear the dirty bit.
1193 : : */
1194 : : clear_buffer_dirty(jh2bh(jh));
1195 : : /* first access by this transaction */
1196 : 3 : jh->b_modified = 0;
1197 : :
1198 : : JBUFFER_TRACE(jh, "file as BJ_Reserved");
1199 : : spin_lock(&journal->j_list_lock);
1200 : 3 : __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
1201 : : spin_unlock(&journal->j_list_lock);
1202 : 0 : } else if (jh->b_transaction == journal->j_committing_transaction) {
1203 : : /* first access by this transaction */
1204 : 0 : jh->b_modified = 0;
1205 : :
1206 : : JBUFFER_TRACE(jh, "set next transaction");
1207 : : spin_lock(&journal->j_list_lock);
1208 : 0 : jh->b_next_transaction = transaction;
1209 : : spin_unlock(&journal->j_list_lock);
1210 : : }
1211 : : jbd_unlock_bh_state(bh);
1212 : :
1213 : : /*
1214 : : * akpm: I added this. ext3_alloc_branch can pick up new indirect
1215 : : * blocks which contain freed but then revoked metadata. We need
1216 : : * to cancel the revoke in case we end up freeing it yet again
1217 : : * and the reallocating as data - this would cause a second revoke,
1218 : : * which hits an assertion error.
1219 : : */
1220 : : JBUFFER_TRACE(jh, "cancelling revoke");
1221 : 3 : jbd2_journal_cancel_revoke(handle, jh);
1222 : : out:
1223 : 3 : jbd2_journal_put_journal_head(jh);
1224 : 3 : return err;
1225 : : }
1226 : :
1227 : : /**
1228 : : * int jbd2_journal_get_undo_access() - Notify intent to modify metadata with
1229 : : * non-rewindable consequences
1230 : : * @handle: transaction
1231 : : * @bh: buffer to undo
1232 : : *
1233 : : * Sometimes there is a need to distinguish between metadata which has
1234 : : * been committed to disk and that which has not. The ext3fs code uses
1235 : : * this for freeing and allocating space, we have to make sure that we
1236 : : * do not reuse freed space until the deallocation has been committed,
1237 : : * since if we overwrote that space we would make the delete
1238 : : * un-rewindable in case of a crash.
1239 : : *
1240 : : * To deal with that, jbd2_journal_get_undo_access requests write access to a
1241 : : * buffer for parts of non-rewindable operations such as delete
1242 : : * operations on the bitmaps. The journaling code must keep a copy of
1243 : : * the buffer's contents prior to the undo_access call until such time
1244 : : * as we know that the buffer has definitely been committed to disk.
1245 : : *
1246 : : * We never need to know which transaction the committed data is part
1247 : : * of, buffers touched here are guaranteed to be dirtied later and so
1248 : : * will be committed to a new transaction in due course, at which point
1249 : : * we can discard the old committed data pointer.
1250 : : *
1251 : : * Returns error number or 0 on success.
1252 : : */
1253 : 0 : int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
1254 : : {
1255 : : int err;
1256 : : struct journal_head *jh;
1257 : : char *committed_data = NULL;
1258 : :
1259 : 0 : if (is_handle_aborted(handle))
1260 : : return -EROFS;
1261 : :
1262 : 0 : if (jbd2_write_access_granted(handle, bh, true))
1263 : : return 0;
1264 : :
1265 : 0 : jh = jbd2_journal_add_journal_head(bh);
1266 : : JBUFFER_TRACE(jh, "entry");
1267 : :
1268 : : /*
1269 : : * Do this first --- it can drop the journal lock, so we want to
1270 : : * make sure that obtaining the committed_data is done
1271 : : * atomically wrt. completion of any outstanding commits.
1272 : : */
1273 : 0 : err = do_get_write_access(handle, jh, 1);
1274 : 0 : if (err)
1275 : : goto out;
1276 : :
1277 : : repeat:
1278 : 0 : if (!jh->b_committed_data)
1279 : 0 : committed_data = jbd2_alloc(jh2bh(jh)->b_size,
1280 : : GFP_NOFS|__GFP_NOFAIL);
1281 : :
1282 : : jbd_lock_bh_state(bh);
1283 : 0 : if (!jh->b_committed_data) {
1284 : : /* Copy out the current buffer contents into the
1285 : : * preserved, committed copy. */
1286 : : JBUFFER_TRACE(jh, "generate b_committed data");
1287 : 0 : if (!committed_data) {
1288 : : jbd_unlock_bh_state(bh);
1289 : : goto repeat;
1290 : : }
1291 : :
1292 : 0 : jh->b_committed_data = committed_data;
1293 : : committed_data = NULL;
1294 : 0 : memcpy(jh->b_committed_data, bh->b_data, bh->b_size);
1295 : : }
1296 : : jbd_unlock_bh_state(bh);
1297 : : out:
1298 : 0 : jbd2_journal_put_journal_head(jh);
1299 : 0 : if (unlikely(committed_data))
1300 : 0 : jbd2_free(committed_data, bh->b_size);
1301 : 0 : return err;
1302 : : }
1303 : :
1304 : : /**
1305 : : * void jbd2_journal_set_triggers() - Add triggers for commit writeout
1306 : : * @bh: buffer to trigger on
1307 : : * @type: struct jbd2_buffer_trigger_type containing the trigger(s).
1308 : : *
1309 : : * Set any triggers on this journal_head. This is always safe, because
1310 : : * triggers for a committing buffer will be saved off, and triggers for
1311 : : * a running transaction will match the buffer in that transaction.
1312 : : *
1313 : : * Call with NULL to clear the triggers.
1314 : : */
1315 : 0 : void jbd2_journal_set_triggers(struct buffer_head *bh,
1316 : : struct jbd2_buffer_trigger_type *type)
1317 : : {
1318 : 0 : struct journal_head *jh = jbd2_journal_grab_journal_head(bh);
1319 : :
1320 : 0 : if (WARN_ON(!jh))
1321 : 0 : return;
1322 : 0 : jh->b_triggers = type;
1323 : 0 : jbd2_journal_put_journal_head(jh);
1324 : : }
1325 : :
1326 : 3 : void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data,
1327 : : struct jbd2_buffer_trigger_type *triggers)
1328 : : {
1329 : : struct buffer_head *bh = jh2bh(jh);
1330 : :
1331 : 3 : if (!triggers || !triggers->t_frozen)
1332 : 3 : return;
1333 : :
1334 : 0 : triggers->t_frozen(triggers, bh, mapped_data, bh->b_size);
1335 : : }
1336 : :
1337 : 0 : void jbd2_buffer_abort_trigger(struct journal_head *jh,
1338 : : struct jbd2_buffer_trigger_type *triggers)
1339 : : {
1340 : 0 : if (!triggers || !triggers->t_abort)
1341 : 0 : return;
1342 : :
1343 : 0 : triggers->t_abort(triggers, jh2bh(jh));
1344 : : }
1345 : :
1346 : : /**
1347 : : * int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata
1348 : : * @handle: transaction to add buffer to.
1349 : : * @bh: buffer to mark
1350 : : *
1351 : : * mark dirty metadata which needs to be journaled as part of the current
1352 : : * transaction.
1353 : : *
1354 : : * The buffer must have previously had jbd2_journal_get_write_access()
1355 : : * called so that it has a valid journal_head attached to the buffer
1356 : : * head.
1357 : : *
1358 : : * The buffer is placed on the transaction's metadata list and is marked
1359 : : * as belonging to the transaction.
1360 : : *
1361 : : * Returns error number or 0 on success.
1362 : : *
1363 : : * Special care needs to be taken if the buffer already belongs to the
1364 : : * current committing transaction (in which case we should have frozen
1365 : : * data present for that commit). In that case, we don't relink the
1366 : : * buffer: that only gets done when the old transaction finally
1367 : : * completes its commit.
1368 : : */
1369 : 3 : int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
1370 : : {
1371 : 3 : transaction_t *transaction = handle->h_transaction;
1372 : : journal_t *journal;
1373 : : struct journal_head *jh;
1374 : : int ret = 0;
1375 : :
1376 : 3 : if (is_handle_aborted(handle))
1377 : : return -EROFS;
1378 : 3 : if (!buffer_jbd(bh))
1379 : : return -EUCLEAN;
1380 : :
1381 : : /*
1382 : : * We don't grab jh reference here since the buffer must be part
1383 : : * of the running transaction.
1384 : : */
1385 : : jh = bh2jh(bh);
1386 : : jbd_debug(5, "journal_head %p\n", jh);
1387 : : JBUFFER_TRACE(jh, "entry");
1388 : :
1389 : : /*
1390 : : * This and the following assertions are unreliable since we may see jh
1391 : : * in inconsistent state unless we grab bh_state lock. But this is
1392 : : * crucial to catch bugs so let's do a reliable check until the
1393 : : * lockless handling is fully proven.
1394 : : */
1395 : 3 : if (jh->b_transaction != transaction &&
1396 : 3 : jh->b_next_transaction != transaction) {
1397 : : jbd_lock_bh_state(bh);
1398 : 1 : J_ASSERT_JH(jh, jh->b_transaction == transaction ||
1399 : : jh->b_next_transaction == transaction);
1400 : : jbd_unlock_bh_state(bh);
1401 : : }
1402 : 3 : if (jh->b_modified == 1) {
1403 : : /* If it's in our transaction it must be in BJ_Metadata list. */
1404 : 3 : if (jh->b_transaction == transaction &&
1405 : 3 : jh->b_jlist != BJ_Metadata) {
1406 : : jbd_lock_bh_state(bh);
1407 : 1 : if (jh->b_transaction == transaction &&
1408 : 1 : jh->b_jlist != BJ_Metadata)
1409 : 0 : pr_err("JBD2: assertion failure: h_type=%u "
1410 : : "h_line_no=%u block_no=%llu jlist=%u\n",
1411 : : handle->h_type, handle->h_line_no,
1412 : : (unsigned long long) bh->b_blocknr,
1413 : : jh->b_jlist);
1414 : 1 : J_ASSERT_JH(jh, jh->b_transaction != transaction ||
1415 : : jh->b_jlist == BJ_Metadata);
1416 : : jbd_unlock_bh_state(bh);
1417 : : }
1418 : : goto out;
1419 : : }
1420 : :
1421 : 3 : journal = transaction->t_journal;
1422 : : jbd_lock_bh_state(bh);
1423 : :
1424 : 3 : if (jh->b_modified == 0) {
1425 : : /*
1426 : : * This buffer's got modified and becoming part
1427 : : * of the transaction. This needs to be done
1428 : : * once a transaction -bzzz
1429 : : */
1430 : 3 : if (handle->h_buffer_credits <= 0) {
1431 : : ret = -ENOSPC;
1432 : : goto out_unlock_bh;
1433 : : }
1434 : 3 : jh->b_modified = 1;
1435 : 3 : handle->h_buffer_credits--;
1436 : : }
1437 : :
1438 : : /*
1439 : : * fastpath, to avoid expensive locking. If this buffer is already
1440 : : * on the running transaction's metadata list there is nothing to do.
1441 : : * Nobody can take it off again because there is a handle open.
1442 : : * I _think_ we're OK here with SMP barriers - a mistaken decision will
1443 : : * result in this test being false, so we go in and take the locks.
1444 : : */
1445 : 3 : if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) {
1446 : : JBUFFER_TRACE(jh, "fastpath");
1447 : 0 : if (unlikely(jh->b_transaction !=
1448 : : journal->j_running_transaction)) {
1449 : 0 : printk(KERN_ERR "JBD2: %s: "
1450 : : "jh->b_transaction (%llu, %p, %u) != "
1451 : : "journal->j_running_transaction (%p, %u)\n",
1452 : 0 : journal->j_devname,
1453 : : (unsigned long long) bh->b_blocknr,
1454 : : jh->b_transaction,
1455 : : jh->b_transaction ? jh->b_transaction->t_tid : 0,
1456 : : journal->j_running_transaction,
1457 : : journal->j_running_transaction ?
1458 : : journal->j_running_transaction->t_tid : 0);
1459 : : ret = -EINVAL;
1460 : : }
1461 : : goto out_unlock_bh;
1462 : : }
1463 : :
1464 : : set_buffer_jbddirty(bh);
1465 : :
1466 : : /*
1467 : : * Metadata already on the current transaction list doesn't
1468 : : * need to be filed. Metadata on another transaction's list must
1469 : : * be committing, and will be refiled once the commit completes:
1470 : : * leave it alone for now.
1471 : : */
1472 : 3 : if (jh->b_transaction != transaction) {
1473 : : JBUFFER_TRACE(jh, "already on other transaction");
1474 : 3 : if (unlikely(((jh->b_transaction !=
1475 : : journal->j_committing_transaction)) ||
1476 : : (jh->b_next_transaction != transaction))) {
1477 : 0 : printk(KERN_ERR "jbd2_journal_dirty_metadata: %s: "
1478 : : "bad jh for block %llu: "
1479 : : "transaction (%p, %u), "
1480 : : "jh->b_transaction (%p, %u), "
1481 : : "jh->b_next_transaction (%p, %u), jlist %u\n",
1482 : 0 : journal->j_devname,
1483 : : (unsigned long long) bh->b_blocknr,
1484 : : transaction, transaction->t_tid,
1485 : : jh->b_transaction,
1486 : : jh->b_transaction ?
1487 : : jh->b_transaction->t_tid : 0,
1488 : : jh->b_next_transaction,
1489 : 0 : jh->b_next_transaction ?
1490 : : jh->b_next_transaction->t_tid : 0,
1491 : : jh->b_jlist);
1492 : 0 : WARN_ON(1);
1493 : : ret = -EINVAL;
1494 : : }
1495 : : /* And this case is illegal: we can't reuse another
1496 : : * transaction's data buffer, ever. */
1497 : : goto out_unlock_bh;
1498 : : }
1499 : :
1500 : : /* That test should have eliminated the following case: */
1501 : 3 : J_ASSERT_JH(jh, jh->b_frozen_data == NULL);
1502 : :
1503 : : JBUFFER_TRACE(jh, "file as BJ_Metadata");
1504 : : spin_lock(&journal->j_list_lock);
1505 : 3 : __jbd2_journal_file_buffer(jh, transaction, BJ_Metadata);
1506 : : spin_unlock(&journal->j_list_lock);
1507 : : out_unlock_bh:
1508 : : jbd_unlock_bh_state(bh);
1509 : : out:
1510 : : JBUFFER_TRACE(jh, "exit");
1511 : 3 : return ret;
1512 : : }
1513 : :
1514 : : /**
1515 : : * void jbd2_journal_forget() - bforget() for potentially-journaled buffers.
1516 : : * @handle: transaction handle
1517 : : * @bh: bh to 'forget'
1518 : : *
1519 : : * We can only do the bforget if there are no commits pending against the
1520 : : * buffer. If the buffer is dirty in the current running transaction we
1521 : : * can safely unlink it.
1522 : : *
1523 : : * bh may not be a journalled buffer at all - it may be a non-JBD
1524 : : * buffer which came off the hashtable. Check for this.
1525 : : *
1526 : : * Decrements bh->b_count by one.
1527 : : *
1528 : : * Allow this call even if the handle has aborted --- it may be part of
1529 : : * the caller's cleanup after an abort.
1530 : : */
1531 : 3 : int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
1532 : : {
1533 : 3 : transaction_t *transaction = handle->h_transaction;
1534 : : journal_t *journal;
1535 : : struct journal_head *jh;
1536 : : int drop_reserve = 0;
1537 : : int err = 0;
1538 : : int was_modified = 0;
1539 : :
1540 : 3 : if (is_handle_aborted(handle))
1541 : : return -EROFS;
1542 : 3 : journal = transaction->t_journal;
1543 : :
1544 : : BUFFER_TRACE(bh, "entry");
1545 : :
1546 : : jbd_lock_bh_state(bh);
1547 : :
1548 : 3 : if (!buffer_jbd(bh))
1549 : : goto not_jbd;
1550 : : jh = bh2jh(bh);
1551 : :
1552 : : /* Critical error: attempting to delete a bitmap buffer, maybe?
1553 : : * Don't do any jbd operations, and return an error. */
1554 : 3 : if (!J_EXPECT_JH(jh, !jh->b_committed_data,
1555 : : "inconsistent data on disk")) {
1556 : : err = -EIO;
1557 : : goto not_jbd;
1558 : : }
1559 : :
1560 : : /* keep track of whether or not this transaction modified us */
1561 : 3 : was_modified = jh->b_modified;
1562 : :
1563 : : /*
1564 : : * The buffer's going from the transaction, we must drop
1565 : : * all references -bzzz
1566 : : */
1567 : 3 : jh->b_modified = 0;
1568 : :
1569 : 3 : if (jh->b_transaction == transaction) {
1570 : 3 : J_ASSERT_JH(jh, !jh->b_frozen_data);
1571 : :
1572 : : /* If we are forgetting a buffer which is already part
1573 : : * of this transaction, then we can just drop it from
1574 : : * the transaction immediately. */
1575 : : clear_buffer_dirty(bh);
1576 : : clear_buffer_jbddirty(bh);
1577 : :
1578 : : JBUFFER_TRACE(jh, "belongs to current transaction: unfile");
1579 : :
1580 : : /*
1581 : : * we only want to drop a reference if this transaction
1582 : : * modified the buffer
1583 : : */
1584 : 3 : if (was_modified)
1585 : : drop_reserve = 1;
1586 : :
1587 : : /*
1588 : : * We are no longer going to journal this buffer.
1589 : : * However, the commit of this transaction is still
1590 : : * important to the buffer: the delete that we are now
1591 : : * processing might obsolete an old log entry, so by
1592 : : * committing, we can satisfy the buffer's checkpoint.
1593 : : *
1594 : : * So, if we have a checkpoint on the buffer, we should
1595 : : * now refile the buffer on our BJ_Forget list so that
1596 : : * we know to remove the checkpoint after we commit.
1597 : : */
1598 : :
1599 : : spin_lock(&journal->j_list_lock);
1600 : 3 : if (jh->b_cp_transaction) {
1601 : 3 : __jbd2_journal_temp_unlink_buffer(jh);
1602 : 3 : __jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
1603 : : } else {
1604 : : __jbd2_journal_unfile_buffer(jh);
1605 : 3 : if (!buffer_jbd(bh)) {
1606 : : spin_unlock(&journal->j_list_lock);
1607 : : goto not_jbd;
1608 : : }
1609 : : }
1610 : : spin_unlock(&journal->j_list_lock);
1611 : 3 : } else if (jh->b_transaction) {
1612 : 0 : J_ASSERT_JH(jh, (jh->b_transaction ==
1613 : : journal->j_committing_transaction));
1614 : : /* However, if the buffer is still owned by a prior
1615 : : * (committing) transaction, we can't drop it yet... */
1616 : : JBUFFER_TRACE(jh, "belongs to older transaction");
1617 : : /* ... but we CAN drop it from the new transaction through
1618 : : * marking the buffer as freed and set j_next_transaction to
1619 : : * the new transaction, so that not only the commit code
1620 : : * knows it should clear dirty bits when it is done with the
1621 : : * buffer, but also the buffer can be checkpointed only
1622 : : * after the new transaction commits. */
1623 : :
1624 : : set_buffer_freed(bh);
1625 : :
1626 : 0 : if (!jh->b_next_transaction) {
1627 : : spin_lock(&journal->j_list_lock);
1628 : 0 : jh->b_next_transaction = transaction;
1629 : : spin_unlock(&journal->j_list_lock);
1630 : : } else {
1631 : 0 : J_ASSERT(jh->b_next_transaction == transaction);
1632 : :
1633 : : /*
1634 : : * only drop a reference if this transaction modified
1635 : : * the buffer
1636 : : */
1637 : 0 : if (was_modified)
1638 : : drop_reserve = 1;
1639 : : }
1640 : : } else {
1641 : : /*
1642 : : * Finally, if the buffer is not belongs to any
1643 : : * transaction, we can just drop it now if it has no
1644 : : * checkpoint.
1645 : : */
1646 : : spin_lock(&journal->j_list_lock);
1647 : 3 : if (!jh->b_cp_transaction) {
1648 : : JBUFFER_TRACE(jh, "belongs to none transaction");
1649 : : spin_unlock(&journal->j_list_lock);
1650 : : goto not_jbd;
1651 : : }
1652 : :
1653 : : /*
1654 : : * Otherwise, if the buffer has been written to disk,
1655 : : * it is safe to remove the checkpoint and drop it.
1656 : : */
1657 : 3 : if (!buffer_dirty(bh)) {
1658 : 0 : __jbd2_journal_remove_checkpoint(jh);
1659 : : spin_unlock(&journal->j_list_lock);
1660 : : goto not_jbd;
1661 : : }
1662 : :
1663 : : /*
1664 : : * The buffer is still not written to disk, we should
1665 : : * attach this buffer to current transaction so that the
1666 : : * buffer can be checkpointed only after the current
1667 : : * transaction commits.
1668 : : */
1669 : : clear_buffer_dirty(bh);
1670 : 3 : __jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
1671 : : spin_unlock(&journal->j_list_lock);
1672 : : }
1673 : :
1674 : : jbd_unlock_bh_state(bh);
1675 : 3 : __brelse(bh);
1676 : : drop:
1677 : 3 : if (drop_reserve) {
1678 : : /* no need to reserve log space for this block -bzzz */
1679 : 3 : handle->h_buffer_credits++;
1680 : : }
1681 : 3 : return err;
1682 : :
1683 : : not_jbd:
1684 : : jbd_unlock_bh_state(bh);
1685 : 3 : __bforget(bh);
1686 : 3 : goto drop;
1687 : : }
1688 : :
1689 : : /**
1690 : : * int jbd2_journal_stop() - complete a transaction
1691 : : * @handle: transaction to complete.
1692 : : *
1693 : : * All done for a particular handle.
1694 : : *
1695 : : * There is not much action needed here. We just return any remaining
1696 : : * buffer credits to the transaction and remove the handle. The only
1697 : : * complication is that we need to start a commit operation if the
1698 : : * filesystem is marked for synchronous update.
1699 : : *
1700 : : * jbd2_journal_stop itself will not usually return an error, but it may
1701 : : * do so in unusual circumstances. In particular, expect it to
1702 : : * return -EIO if a jbd2_journal_abort has been executed since the
1703 : : * transaction began.
1704 : : */
1705 : 3 : int jbd2_journal_stop(handle_t *handle)
1706 : : {
1707 : 3 : transaction_t *transaction = handle->h_transaction;
1708 : : journal_t *journal;
1709 : : int err = 0, wait_for_commit = 0;
1710 : : tid_t tid;
1711 : : pid_t pid;
1712 : :
1713 : 3 : if (!transaction) {
1714 : : /*
1715 : : * Handle is already detached from the transaction so
1716 : : * there is nothing to do other than decrease a refcount,
1717 : : * or free the handle if refcount drops to zero
1718 : : */
1719 : 0 : if (--handle->h_ref > 0) {
1720 : : jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
1721 : : handle->h_ref);
1722 : : return err;
1723 : : } else {
1724 : 0 : if (handle->h_rsv_handle)
1725 : : jbd2_free_handle(handle->h_rsv_handle);
1726 : : goto free_and_exit;
1727 : : }
1728 : : }
1729 : 3 : journal = transaction->t_journal;
1730 : :
1731 : 3 : J_ASSERT(journal_current_handle() == handle);
1732 : :
1733 : 3 : if (is_handle_aborted(handle))
1734 : : err = -EIO;
1735 : : else
1736 : 3 : J_ASSERT(atomic_read(&transaction->t_updates) > 0);
1737 : :
1738 : 3 : if (--handle->h_ref > 0) {
1739 : : jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
1740 : : handle->h_ref);
1741 : : return err;
1742 : : }
1743 : :
1744 : : jbd_debug(4, "Handle %p going down\n", handle);
1745 : 3 : trace_jbd2_handle_stats(journal->j_fs_dev->bd_dev,
1746 : 3 : transaction->t_tid,
1747 : 3 : handle->h_type, handle->h_line_no,
1748 : 3 : jiffies - handle->h_start_jiffies,
1749 : 3 : handle->h_sync, handle->h_requested_credits,
1750 : 3 : (handle->h_requested_credits -
1751 : 3 : handle->h_buffer_credits));
1752 : :
1753 : : /*
1754 : : * Implement synchronous transaction batching. If the handle
1755 : : * was synchronous, don't force a commit immediately. Let's
1756 : : * yield and let another thread piggyback onto this
1757 : : * transaction. Keep doing that while new threads continue to
1758 : : * arrive. It doesn't cost much - we're about to run a commit
1759 : : * and sleep on IO anyway. Speeds up many-threaded, many-dir
1760 : : * operations by 30x or more...
1761 : : *
1762 : : * We try and optimize the sleep time against what the
1763 : : * underlying disk can do, instead of having a static sleep
1764 : : * time. This is useful for the case where our storage is so
1765 : : * fast that it is more optimal to go ahead and force a flush
1766 : : * and wait for the transaction to be committed than it is to
1767 : : * wait for an arbitrary amount of time for new writers to
1768 : : * join the transaction. We achieve this by measuring how
1769 : : * long it takes to commit a transaction, and compare it with
1770 : : * how long this transaction has been running, and if run time
1771 : : * < commit time then we sleep for the delta and commit. This
1772 : : * greatly helps super fast disks that would see slowdowns as
1773 : : * more threads started doing fsyncs.
1774 : : *
1775 : : * But don't do this if this process was the most recent one
1776 : : * to perform a synchronous write. We do this to detect the
1777 : : * case where a single process is doing a stream of sync
1778 : : * writes. No point in waiting for joiners in that case.
1779 : : *
1780 : : * Setting max_batch_time to 0 disables this completely.
1781 : : */
1782 : 3 : pid = current->pid;
1783 : 3 : if (handle->h_sync && journal->j_last_sync_writer != pid &&
1784 : 0 : journal->j_max_batch_time) {
1785 : : u64 commit_time, trans_time;
1786 : :
1787 : 0 : journal->j_last_sync_writer = pid;
1788 : :
1789 : 0 : read_lock(&journal->j_state_lock);
1790 : 0 : commit_time = journal->j_average_commit_time;
1791 : : read_unlock(&journal->j_state_lock);
1792 : :
1793 : 0 : trans_time = ktime_to_ns(ktime_sub(ktime_get(),
1794 : : transaction->t_start_time));
1795 : :
1796 : 0 : commit_time = max_t(u64, commit_time,
1797 : : 1000*journal->j_min_batch_time);
1798 : 0 : commit_time = min_t(u64, commit_time,
1799 : : 1000*journal->j_max_batch_time);
1800 : :
1801 : 0 : if (trans_time < commit_time) {
1802 : 0 : ktime_t expires = ktime_add_ns(ktime_get(),
1803 : : commit_time);
1804 : 0 : set_current_state(TASK_UNINTERRUPTIBLE);
1805 : 0 : schedule_hrtimeout(&expires, HRTIMER_MODE_ABS);
1806 : : }
1807 : : }
1808 : :
1809 : 3 : if (handle->h_sync)
1810 : 0 : transaction->t_synchronous_commit = 1;
1811 : 3 : current->journal_info = NULL;
1812 : 3 : atomic_sub(handle->h_buffer_credits,
1813 : : &transaction->t_outstanding_credits);
1814 : :
1815 : : /*
1816 : : * If the handle is marked SYNC, we need to set another commit
1817 : : * going! We also want to force a commit if the current
1818 : : * transaction is occupying too much of the log, or if the
1819 : : * transaction is too old now.
1820 : : */
1821 : 3 : if (handle->h_sync ||
1822 : 3 : (atomic_read(&transaction->t_outstanding_credits) >
1823 : 3 : journal->j_max_transaction_buffers) ||
1824 : 3 : time_after_eq(jiffies, transaction->t_expires)) {
1825 : : /* Do this even for aborted journals: an abort still
1826 : : * completes the commit thread, it just doesn't write
1827 : : * anything to disk. */
1828 : :
1829 : : jbd_debug(2, "transaction too old, requesting commit for "
1830 : : "handle %p\n", handle);
1831 : : /* This is non-blocking */
1832 : 3 : jbd2_log_start_commit(journal, transaction->t_tid);
1833 : :
1834 : : /*
1835 : : * Special case: JBD2_SYNC synchronous updates require us
1836 : : * to wait for the commit to complete.
1837 : : */
1838 : 3 : if (handle->h_sync && !(current->flags & PF_MEMALLOC))
1839 : : wait_for_commit = 1;
1840 : : }
1841 : :
1842 : : /*
1843 : : * Once we drop t_updates, if it goes to zero the transaction
1844 : : * could start committing on us and eventually disappear. So
1845 : : * once we do this, we must not dereference transaction
1846 : : * pointer again.
1847 : : */
1848 : 3 : tid = transaction->t_tid;
1849 : 3 : if (atomic_dec_and_test(&transaction->t_updates)) {
1850 : 3 : wake_up(&journal->j_wait_updates);
1851 : 3 : if (journal->j_barrier_count)
1852 : 0 : wake_up(&journal->j_wait_transaction_locked);
1853 : : }
1854 : :
1855 : : rwsem_release(&journal->j_trans_commit_map, 1, _THIS_IP_);
1856 : :
1857 : 3 : if (wait_for_commit)
1858 : 0 : err = jbd2_log_wait_commit(journal, tid);
1859 : :
1860 : 3 : if (handle->h_rsv_handle)
1861 : 0 : jbd2_journal_free_reserved(handle->h_rsv_handle);
1862 : : free_and_exit:
1863 : : /*
1864 : : * Scope of the GFP_NOFS context is over here and so we can restore the
1865 : : * original alloc context.
1866 : : */
1867 : 3 : memalloc_nofs_restore(handle->saved_alloc_context);
1868 : : jbd2_free_handle(handle);
1869 : 3 : return err;
1870 : : }
1871 : :
1872 : : /*
1873 : : *
1874 : : * List management code snippets: various functions for manipulating the
1875 : : * transaction buffer lists.
1876 : : *
1877 : : */
1878 : :
1879 : : /*
1880 : : * Append a buffer to a transaction list, given the transaction's list head
1881 : : * pointer.
1882 : : *
1883 : : * j_list_lock is held.
1884 : : *
1885 : : * jbd_lock_bh_state(jh2bh(jh)) is held.
1886 : : */
1887 : :
1888 : : static inline void
1889 : : __blist_add_buffer(struct journal_head **list, struct journal_head *jh)
1890 : : {
1891 : 3 : if (!*list) {
1892 : 3 : jh->b_tnext = jh->b_tprev = jh;
1893 : 3 : *list = jh;
1894 : : } else {
1895 : : /* Insert at the tail of the list to preserve order */
1896 : 3 : struct journal_head *first = *list, *last = first->b_tprev;
1897 : 3 : jh->b_tprev = last;
1898 : 3 : jh->b_tnext = first;
1899 : 3 : last->b_tnext = first->b_tprev = jh;
1900 : : }
1901 : : }
1902 : :
1903 : : /*
1904 : : * Remove a buffer from a transaction list, given the transaction's list
1905 : : * head pointer.
1906 : : *
1907 : : * Called with j_list_lock held, and the journal may not be locked.
1908 : : *
1909 : : * jbd_lock_bh_state(jh2bh(jh)) is held.
1910 : : */
1911 : :
1912 : : static inline void
1913 : : __blist_del_buffer(struct journal_head **list, struct journal_head *jh)
1914 : : {
1915 : 3 : if (*list == jh) {
1916 : 3 : *list = jh->b_tnext;
1917 : 3 : if (*list == jh)
1918 : 3 : *list = NULL;
1919 : : }
1920 : 3 : jh->b_tprev->b_tnext = jh->b_tnext;
1921 : 3 : jh->b_tnext->b_tprev = jh->b_tprev;
1922 : : }
1923 : :
1924 : : /*
1925 : : * Remove a buffer from the appropriate transaction list.
1926 : : *
1927 : : * Note that this function can *change* the value of
1928 : : * bh->b_transaction->t_buffers, t_forget, t_shadow_list, t_log_list or
1929 : : * t_reserved_list. If the caller is holding onto a copy of one of these
1930 : : * pointers, it could go bad. Generally the caller needs to re-read the
1931 : : * pointer from the transaction_t.
1932 : : *
1933 : : * Called under j_list_lock.
1934 : : */
1935 : 3 : static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
1936 : : {
1937 : : struct journal_head **list = NULL;
1938 : : transaction_t *transaction;
1939 : : struct buffer_head *bh = jh2bh(jh);
1940 : :
1941 : 3 : J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
1942 : 3 : transaction = jh->b_transaction;
1943 : 3 : if (transaction)
1944 : 3 : assert_spin_locked(&transaction->t_journal->j_list_lock);
1945 : :
1946 : 3 : J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
1947 : 3 : if (jh->b_jlist != BJ_None)
1948 : 3 : J_ASSERT_JH(jh, transaction != NULL);
1949 : :
1950 : 3 : switch (jh->b_jlist) {
1951 : : case BJ_None:
1952 : 3 : return;
1953 : : case BJ_Metadata:
1954 : 3 : transaction->t_nr_buffers--;
1955 : 3 : J_ASSERT_JH(jh, transaction->t_nr_buffers >= 0);
1956 : 3 : list = &transaction->t_buffers;
1957 : 3 : break;
1958 : : case BJ_Forget:
1959 : 3 : list = &transaction->t_forget;
1960 : 3 : break;
1961 : : case BJ_Shadow:
1962 : 3 : list = &transaction->t_shadow_list;
1963 : 3 : break;
1964 : : case BJ_Reserved:
1965 : 3 : list = &transaction->t_reserved_list;
1966 : 3 : break;
1967 : : }
1968 : :
1969 : : __blist_del_buffer(list, jh);
1970 : 3 : jh->b_jlist = BJ_None;
1971 : 3 : if (transaction && is_journal_aborted(transaction->t_journal))
1972 : : clear_buffer_jbddirty(bh);
1973 : 3 : else if (test_clear_buffer_jbddirty(bh))
1974 : 3 : mark_buffer_dirty(bh); /* Expose it to the VM */
1975 : : }
1976 : :
1977 : : /*
1978 : : * Remove buffer from all transactions.
1979 : : *
1980 : : * Called with bh_state lock and j_list_lock
1981 : : *
1982 : : * jh and bh may be already freed when this function returns.
1983 : : */
1984 : : static void __jbd2_journal_unfile_buffer(struct journal_head *jh)
1985 : : {
1986 : 3 : __jbd2_journal_temp_unlink_buffer(jh);
1987 : 3 : jh->b_transaction = NULL;
1988 : 3 : jbd2_journal_put_journal_head(jh);
1989 : : }
1990 : :
1991 : 0 : void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
1992 : : {
1993 : : struct buffer_head *bh = jh2bh(jh);
1994 : :
1995 : : /* Get reference so that buffer cannot be freed before we unlock it */
1996 : : get_bh(bh);
1997 : : jbd_lock_bh_state(bh);
1998 : : spin_lock(&journal->j_list_lock);
1999 : : __jbd2_journal_unfile_buffer(jh);
2000 : : spin_unlock(&journal->j_list_lock);
2001 : : jbd_unlock_bh_state(bh);
2002 : 0 : __brelse(bh);
2003 : 0 : }
2004 : :
2005 : : /*
2006 : : * Called from jbd2_journal_try_to_free_buffers().
2007 : : *
2008 : : * Called under jbd_lock_bh_state(bh)
2009 : : */
2010 : : static void
2011 : 0 : __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
2012 : : {
2013 : : struct journal_head *jh;
2014 : :
2015 : : jh = bh2jh(bh);
2016 : :
2017 : 0 : if (buffer_locked(bh) || buffer_dirty(bh))
2018 : : goto out;
2019 : :
2020 : 0 : if (jh->b_next_transaction != NULL || jh->b_transaction != NULL)
2021 : : goto out;
2022 : :
2023 : : spin_lock(&journal->j_list_lock);
2024 : 0 : if (jh->b_cp_transaction != NULL) {
2025 : : /* written-back checkpointed metadata buffer */
2026 : : JBUFFER_TRACE(jh, "remove from checkpoint list");
2027 : 0 : __jbd2_journal_remove_checkpoint(jh);
2028 : : }
2029 : : spin_unlock(&journal->j_list_lock);
2030 : : out:
2031 : 0 : return;
2032 : : }
2033 : :
2034 : : /**
2035 : : * int jbd2_journal_try_to_free_buffers() - try to free page buffers.
2036 : : * @journal: journal for operation
2037 : : * @page: to try and free
2038 : : * @gfp_mask: we use the mask to detect how hard should we try to release
2039 : : * buffers. If __GFP_DIRECT_RECLAIM and __GFP_FS is set, we wait for commit
2040 : : * code to release the buffers.
2041 : : *
2042 : : *
2043 : : * For all the buffers on this page,
2044 : : * if they are fully written out ordered data, move them onto BUF_CLEAN
2045 : : * so try_to_free_buffers() can reap them.
2046 : : *
2047 : : * This function returns non-zero if we wish try_to_free_buffers()
2048 : : * to be called. We do this if the page is releasable by try_to_free_buffers().
2049 : : * We also do it if the page has locked or dirty buffers and the caller wants
2050 : : * us to perform sync or async writeout.
2051 : : *
2052 : : * This complicates JBD locking somewhat. We aren't protected by the
2053 : : * BKL here. We wish to remove the buffer from its committing or
2054 : : * running transaction's ->t_datalist via __jbd2_journal_unfile_buffer.
2055 : : *
2056 : : * This may *change* the value of transaction_t->t_datalist, so anyone
2057 : : * who looks at t_datalist needs to lock against this function.
2058 : : *
2059 : : * Even worse, someone may be doing a jbd2_journal_dirty_data on this
2060 : : * buffer. So we need to lock against that. jbd2_journal_dirty_data()
2061 : : * will come out of the lock with the buffer dirty, which makes it
2062 : : * ineligible for release here.
2063 : : *
2064 : : * Who else is affected by this? hmm... Really the only contender
2065 : : * is do_get_write_access() - it could be looking at the buffer while
2066 : : * journal_try_to_free_buffer() is changing its state. But that
2067 : : * cannot happen because we never reallocate freed data as metadata
2068 : : * while the data is part of a transaction. Yes?
2069 : : *
2070 : : * Return 0 on failure, 1 on success
2071 : : */
2072 : 3 : int jbd2_journal_try_to_free_buffers(journal_t *journal,
2073 : : struct page *page, gfp_t gfp_mask)
2074 : : {
2075 : : struct buffer_head *head;
2076 : : struct buffer_head *bh;
2077 : : int ret = 0;
2078 : :
2079 : 3 : J_ASSERT(PageLocked(page));
2080 : :
2081 : 3 : head = page_buffers(page);
2082 : : bh = head;
2083 : : do {
2084 : : struct journal_head *jh;
2085 : :
2086 : : /*
2087 : : * We take our own ref against the journal_head here to avoid
2088 : : * having to add tons of locking around each instance of
2089 : : * jbd2_journal_put_journal_head().
2090 : : */
2091 : 3 : jh = jbd2_journal_grab_journal_head(bh);
2092 : 3 : if (!jh)
2093 : 3 : continue;
2094 : :
2095 : : jbd_lock_bh_state(bh);
2096 : 0 : __journal_try_to_free_buffer(journal, bh);
2097 : 0 : jbd2_journal_put_journal_head(jh);
2098 : : jbd_unlock_bh_state(bh);
2099 : 0 : if (buffer_jbd(bh))
2100 : : goto busy;
2101 : 3 : } while ((bh = bh->b_this_page) != head);
2102 : :
2103 : 3 : ret = try_to_free_buffers(page);
2104 : :
2105 : : busy:
2106 : 3 : return ret;
2107 : : }
2108 : :
2109 : : /*
2110 : : * This buffer is no longer needed. If it is on an older transaction's
2111 : : * checkpoint list we need to record it on this transaction's forget list
2112 : : * to pin this buffer (and hence its checkpointing transaction) down until
2113 : : * this transaction commits. If the buffer isn't on a checkpoint list, we
2114 : : * release it.
2115 : : * Returns non-zero if JBD no longer has an interest in the buffer.
2116 : : *
2117 : : * Called under j_list_lock.
2118 : : *
2119 : : * Called under jbd_lock_bh_state(bh).
2120 : : */
2121 : 0 : static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
2122 : : {
2123 : : int may_free = 1;
2124 : : struct buffer_head *bh = jh2bh(jh);
2125 : :
2126 : 0 : if (jh->b_cp_transaction) {
2127 : : JBUFFER_TRACE(jh, "on running+cp transaction");
2128 : 0 : __jbd2_journal_temp_unlink_buffer(jh);
2129 : : /*
2130 : : * We don't want to write the buffer anymore, clear the
2131 : : * bit so that we don't confuse checks in
2132 : : * __journal_file_buffer
2133 : : */
2134 : : clear_buffer_dirty(bh);
2135 : 0 : __jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
2136 : : may_free = 0;
2137 : : } else {
2138 : : JBUFFER_TRACE(jh, "on running transaction");
2139 : : __jbd2_journal_unfile_buffer(jh);
2140 : : }
2141 : 0 : return may_free;
2142 : : }
2143 : :
2144 : : /*
2145 : : * jbd2_journal_invalidatepage
2146 : : *
2147 : : * This code is tricky. It has a number of cases to deal with.
2148 : : *
2149 : : * There are two invariants which this code relies on:
2150 : : *
2151 : : * i_size must be updated on disk before we start calling invalidatepage on the
2152 : : * data.
2153 : : *
2154 : : * This is done in ext3 by defining an ext3_setattr method which
2155 : : * updates i_size before truncate gets going. By maintaining this
2156 : : * invariant, we can be sure that it is safe to throw away any buffers
2157 : : * attached to the current transaction: once the transaction commits,
2158 : : * we know that the data will not be needed.
2159 : : *
2160 : : * Note however that we can *not* throw away data belonging to the
2161 : : * previous, committing transaction!
2162 : : *
2163 : : * Any disk blocks which *are* part of the previous, committing
2164 : : * transaction (and which therefore cannot be discarded immediately) are
2165 : : * not going to be reused in the new running transaction
2166 : : *
2167 : : * The bitmap committed_data images guarantee this: any block which is
2168 : : * allocated in one transaction and removed in the next will be marked
2169 : : * as in-use in the committed_data bitmap, so cannot be reused until
2170 : : * the next transaction to delete the block commits. This means that
2171 : : * leaving committing buffers dirty is quite safe: the disk blocks
2172 : : * cannot be reallocated to a different file and so buffer aliasing is
2173 : : * not possible.
2174 : : *
2175 : : *
2176 : : * The above applies mainly to ordered data mode. In writeback mode we
2177 : : * don't make guarantees about the order in which data hits disk --- in
2178 : : * particular we don't guarantee that new dirty data is flushed before
2179 : : * transaction commit --- so it is always safe just to discard data
2180 : : * immediately in that mode. --sct
2181 : : */
2182 : :
2183 : : /*
2184 : : * The journal_unmap_buffer helper function returns zero if the buffer
2185 : : * concerned remains pinned as an anonymous buffer belonging to an older
2186 : : * transaction.
2187 : : *
2188 : : * We're outside-transaction here. Either or both of j_running_transaction
2189 : : * and j_committing_transaction may be NULL.
2190 : : */
2191 : 0 : static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
2192 : : int partial_page)
2193 : : {
2194 : : transaction_t *transaction;
2195 : : struct journal_head *jh;
2196 : : int may_free = 1;
2197 : :
2198 : : BUFFER_TRACE(bh, "entry");
2199 : :
2200 : : /*
2201 : : * It is safe to proceed here without the j_list_lock because the
2202 : : * buffers cannot be stolen by try_to_free_buffers as long as we are
2203 : : * holding the page lock. --sct
2204 : : */
2205 : :
2206 : 0 : if (!buffer_jbd(bh))
2207 : : goto zap_buffer_unlocked;
2208 : :
2209 : : /* OK, we have data buffer in journaled mode */
2210 : 0 : write_lock(&journal->j_state_lock);
2211 : : jbd_lock_bh_state(bh);
2212 : : spin_lock(&journal->j_list_lock);
2213 : :
2214 : 0 : jh = jbd2_journal_grab_journal_head(bh);
2215 : 0 : if (!jh)
2216 : : goto zap_buffer_no_jh;
2217 : :
2218 : : /*
2219 : : * We cannot remove the buffer from checkpoint lists until the
2220 : : * transaction adding inode to orphan list (let's call it T)
2221 : : * is committed. Otherwise if the transaction changing the
2222 : : * buffer would be cleaned from the journal before T is
2223 : : * committed, a crash will cause that the correct contents of
2224 : : * the buffer will be lost. On the other hand we have to
2225 : : * clear the buffer dirty bit at latest at the moment when the
2226 : : * transaction marking the buffer as freed in the filesystem
2227 : : * structures is committed because from that moment on the
2228 : : * block can be reallocated and used by a different page.
2229 : : * Since the block hasn't been freed yet but the inode has
2230 : : * already been added to orphan list, it is safe for us to add
2231 : : * the buffer to BJ_Forget list of the newest transaction.
2232 : : *
2233 : : * Also we have to clear buffer_mapped flag of a truncated buffer
2234 : : * because the buffer_head may be attached to the page straddling
2235 : : * i_size (can happen only when blocksize < pagesize) and thus the
2236 : : * buffer_head can be reused when the file is extended again. So we end
2237 : : * up keeping around invalidated buffers attached to transactions'
2238 : : * BJ_Forget list just to stop checkpointing code from cleaning up
2239 : : * the transaction this buffer was modified in.
2240 : : */
2241 : 0 : transaction = jh->b_transaction;
2242 : 0 : if (transaction == NULL) {
2243 : : /* First case: not on any transaction. If it
2244 : : * has no checkpoint link, then we can zap it:
2245 : : * it's a writeback-mode buffer so we don't care
2246 : : * if it hits disk safely. */
2247 : 0 : if (!jh->b_cp_transaction) {
2248 : : JBUFFER_TRACE(jh, "not on any transaction: zap");
2249 : : goto zap_buffer;
2250 : : }
2251 : :
2252 : 0 : if (!buffer_dirty(bh)) {
2253 : : /* bdflush has written it. We can drop it now */
2254 : 0 : __jbd2_journal_remove_checkpoint(jh);
2255 : 0 : goto zap_buffer;
2256 : : }
2257 : :
2258 : : /* OK, it must be in the journal but still not
2259 : : * written fully to disk: it's metadata or
2260 : : * journaled data... */
2261 : :
2262 : 0 : if (journal->j_running_transaction) {
2263 : : /* ... and once the current transaction has
2264 : : * committed, the buffer won't be needed any
2265 : : * longer. */
2266 : : JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget");
2267 : 0 : may_free = __dispose_buffer(jh,
2268 : : journal->j_running_transaction);
2269 : 0 : goto zap_buffer;
2270 : : } else {
2271 : : /* There is no currently-running transaction. So the
2272 : : * orphan record which we wrote for this file must have
2273 : : * passed into commit. We must attach this buffer to
2274 : : * the committing transaction, if it exists. */
2275 : 0 : if (journal->j_committing_transaction) {
2276 : : JBUFFER_TRACE(jh, "give to committing trans");
2277 : 0 : may_free = __dispose_buffer(jh,
2278 : : journal->j_committing_transaction);
2279 : 0 : goto zap_buffer;
2280 : : } else {
2281 : : /* The orphan record's transaction has
2282 : : * committed. We can cleanse this buffer */
2283 : : clear_buffer_jbddirty(bh);
2284 : 0 : __jbd2_journal_remove_checkpoint(jh);
2285 : 0 : goto zap_buffer;
2286 : : }
2287 : : }
2288 : 0 : } else if (transaction == journal->j_committing_transaction) {
2289 : : JBUFFER_TRACE(jh, "on committing transaction");
2290 : : /*
2291 : : * The buffer is committing, we simply cannot touch
2292 : : * it. If the page is straddling i_size we have to wait
2293 : : * for commit and try again.
2294 : : */
2295 : 0 : if (partial_page) {
2296 : 0 : jbd2_journal_put_journal_head(jh);
2297 : : spin_unlock(&journal->j_list_lock);
2298 : : jbd_unlock_bh_state(bh);
2299 : : write_unlock(&journal->j_state_lock);
2300 : 0 : return -EBUSY;
2301 : : }
2302 : : /*
2303 : : * OK, buffer won't be reachable after truncate. We just clear
2304 : : * b_modified to not confuse transaction credit accounting, and
2305 : : * set j_next_transaction to the running transaction (if there
2306 : : * is one) and mark buffer as freed so that commit code knows
2307 : : * it should clear dirty bits when it is done with the buffer.
2308 : : */
2309 : : set_buffer_freed(bh);
2310 : 0 : if (journal->j_running_transaction && buffer_jbddirty(bh))
2311 : 0 : jh->b_next_transaction = journal->j_running_transaction;
2312 : 0 : jh->b_modified = 0;
2313 : 0 : jbd2_journal_put_journal_head(jh);
2314 : : spin_unlock(&journal->j_list_lock);
2315 : : jbd_unlock_bh_state(bh);
2316 : : write_unlock(&journal->j_state_lock);
2317 : 0 : return 0;
2318 : : } else {
2319 : : /* Good, the buffer belongs to the running transaction.
2320 : : * We are writing our own transaction's data, not any
2321 : : * previous one's, so it is safe to throw it away
2322 : : * (remember that we expect the filesystem to have set
2323 : : * i_size already for this truncate so recovery will not
2324 : : * expose the disk blocks we are discarding here.) */
2325 : 0 : J_ASSERT_JH(jh, transaction == journal->j_running_transaction);
2326 : : JBUFFER_TRACE(jh, "on running transaction");
2327 : 0 : may_free = __dispose_buffer(jh, transaction);
2328 : : }
2329 : :
2330 : : zap_buffer:
2331 : : /*
2332 : : * This is tricky. Although the buffer is truncated, it may be reused
2333 : : * if blocksize < pagesize and it is attached to the page straddling
2334 : : * EOF. Since the buffer might have been added to BJ_Forget list of the
2335 : : * running transaction, journal_get_write_access() won't clear
2336 : : * b_modified and credit accounting gets confused. So clear b_modified
2337 : : * here.
2338 : : */
2339 : 0 : jh->b_modified = 0;
2340 : 0 : jbd2_journal_put_journal_head(jh);
2341 : : zap_buffer_no_jh:
2342 : : spin_unlock(&journal->j_list_lock);
2343 : : jbd_unlock_bh_state(bh);
2344 : : write_unlock(&journal->j_state_lock);
2345 : : zap_buffer_unlocked:
2346 : : clear_buffer_dirty(bh);
2347 : 0 : J_ASSERT_BH(bh, !buffer_jbddirty(bh));
2348 : : clear_buffer_mapped(bh);
2349 : : clear_buffer_req(bh);
2350 : : clear_buffer_new(bh);
2351 : : clear_buffer_delay(bh);
2352 : : clear_buffer_unwritten(bh);
2353 : 0 : bh->b_bdev = NULL;
2354 : 0 : return may_free;
2355 : : }
2356 : :
2357 : : /**
2358 : : * void jbd2_journal_invalidatepage()
2359 : : * @journal: journal to use for flush...
2360 : : * @page: page to flush
2361 : : * @offset: start of the range to invalidate
2362 : : * @length: length of the range to invalidate
2363 : : *
2364 : : * Reap page buffers containing data after in the specified range in page.
2365 : : * Can return -EBUSY if buffers are part of the committing transaction and
2366 : : * the page is straddling i_size. Caller then has to wait for current commit
2367 : : * and try again.
2368 : : */
2369 : 0 : int jbd2_journal_invalidatepage(journal_t *journal,
2370 : : struct page *page,
2371 : : unsigned int offset,
2372 : : unsigned int length)
2373 : : {
2374 : : struct buffer_head *head, *bh, *next;
2375 : 0 : unsigned int stop = offset + length;
2376 : : unsigned int curr_off = 0;
2377 : 0 : int partial_page = (offset || length < PAGE_SIZE);
2378 : : int may_free = 1;
2379 : : int ret = 0;
2380 : :
2381 : 0 : if (!PageLocked(page))
2382 : 0 : BUG();
2383 : 0 : if (!page_has_buffers(page))
2384 : : return 0;
2385 : :
2386 : 0 : BUG_ON(stop > PAGE_SIZE || stop < length);
2387 : :
2388 : : /* We will potentially be playing with lists other than just the
2389 : : * data lists (especially for journaled data mode), so be
2390 : : * cautious in our locking. */
2391 : :
2392 : 0 : head = bh = page_buffers(page);
2393 : : do {
2394 : 0 : unsigned int next_off = curr_off + bh->b_size;
2395 : 0 : next = bh->b_this_page;
2396 : :
2397 : 0 : if (next_off > stop)
2398 : : return 0;
2399 : :
2400 : 0 : if (offset <= curr_off) {
2401 : : /* This block is wholly outside the truncation point */
2402 : 0 : lock_buffer(bh);
2403 : 0 : ret = journal_unmap_buffer(journal, bh, partial_page);
2404 : 0 : unlock_buffer(bh);
2405 : 0 : if (ret < 0)
2406 : 0 : return ret;
2407 : 0 : may_free &= ret;
2408 : : }
2409 : : curr_off = next_off;
2410 : : bh = next;
2411 : :
2412 : 0 : } while (bh != head);
2413 : :
2414 : 0 : if (!partial_page) {
2415 : 0 : if (may_free && try_to_free_buffers(page))
2416 : 0 : J_ASSERT(!page_has_buffers(page));
2417 : : }
2418 : : return 0;
2419 : : }
2420 : :
2421 : : /*
2422 : : * File a buffer on the given transaction list.
2423 : : */
2424 : 3 : void __jbd2_journal_file_buffer(struct journal_head *jh,
2425 : : transaction_t *transaction, int jlist)
2426 : : {
2427 : : struct journal_head **list = NULL;
2428 : : int was_dirty = 0;
2429 : : struct buffer_head *bh = jh2bh(jh);
2430 : :
2431 : 3 : J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
2432 : 3 : assert_spin_locked(&transaction->t_journal->j_list_lock);
2433 : :
2434 : 3 : J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
2435 : 3 : J_ASSERT_JH(jh, jh->b_transaction == transaction ||
2436 : : jh->b_transaction == NULL);
2437 : :
2438 : 3 : if (jh->b_transaction && jh->b_jlist == jlist)
2439 : : return;
2440 : :
2441 : 3 : if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
2442 : 3 : jlist == BJ_Shadow || jlist == BJ_Forget) {
2443 : : /*
2444 : : * For metadata buffers, we track dirty bit in buffer_jbddirty
2445 : : * instead of buffer_dirty. We should not see a dirty bit set
2446 : : * here because we clear it in do_get_write_access but e.g.
2447 : : * tune2fs can modify the sb and set the dirty bit at any time
2448 : : * so we try to gracefully handle that.
2449 : : */
2450 : 3 : if (buffer_dirty(bh))
2451 : : warn_dirty_buffer(bh);
2452 : 3 : if (test_clear_buffer_dirty(bh) ||
2453 : : test_clear_buffer_jbddirty(bh))
2454 : : was_dirty = 1;
2455 : : }
2456 : :
2457 : 3 : if (jh->b_transaction)
2458 : 3 : __jbd2_journal_temp_unlink_buffer(jh);
2459 : : else
2460 : 3 : jbd2_journal_grab_journal_head(bh);
2461 : 3 : jh->b_transaction = transaction;
2462 : :
2463 : 3 : switch (jlist) {
2464 : : case BJ_None:
2465 : 0 : J_ASSERT_JH(jh, !jh->b_committed_data);
2466 : 0 : J_ASSERT_JH(jh, !jh->b_frozen_data);
2467 : : return;
2468 : : case BJ_Metadata:
2469 : 3 : transaction->t_nr_buffers++;
2470 : 3 : list = &transaction->t_buffers;
2471 : 3 : break;
2472 : : case BJ_Forget:
2473 : 3 : list = &transaction->t_forget;
2474 : 3 : break;
2475 : : case BJ_Shadow:
2476 : 3 : list = &transaction->t_shadow_list;
2477 : 3 : break;
2478 : : case BJ_Reserved:
2479 : 3 : list = &transaction->t_reserved_list;
2480 : 3 : break;
2481 : : }
2482 : :
2483 : : __blist_add_buffer(list, jh);
2484 : 3 : jh->b_jlist = jlist;
2485 : :
2486 : 3 : if (was_dirty)
2487 : : set_buffer_jbddirty(bh);
2488 : : }
2489 : :
2490 : 3 : void jbd2_journal_file_buffer(struct journal_head *jh,
2491 : : transaction_t *transaction, int jlist)
2492 : : {
2493 : : jbd_lock_bh_state(jh2bh(jh));
2494 : 3 : spin_lock(&transaction->t_journal->j_list_lock);
2495 : 3 : __jbd2_journal_file_buffer(jh, transaction, jlist);
2496 : 3 : spin_unlock(&transaction->t_journal->j_list_lock);
2497 : : jbd_unlock_bh_state(jh2bh(jh));
2498 : 3 : }
2499 : :
2500 : : /*
2501 : : * Remove a buffer from its current buffer list in preparation for
2502 : : * dropping it from its current transaction entirely. If the buffer has
2503 : : * already started to be used by a subsequent transaction, refile the
2504 : : * buffer on that transaction's metadata list.
2505 : : *
2506 : : * Called under j_list_lock
2507 : : * Called under jbd_lock_bh_state(jh2bh(jh))
2508 : : *
2509 : : * jh and bh may be already free when this function returns
2510 : : */
2511 : 3 : void __jbd2_journal_refile_buffer(struct journal_head *jh)
2512 : : {
2513 : : int was_dirty, jlist;
2514 : : struct buffer_head *bh = jh2bh(jh);
2515 : :
2516 : 3 : J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
2517 : 3 : if (jh->b_transaction)
2518 : 3 : assert_spin_locked(&jh->b_transaction->t_journal->j_list_lock);
2519 : :
2520 : : /* If the buffer is now unused, just drop it. */
2521 : 3 : if (jh->b_next_transaction == NULL) {
2522 : : __jbd2_journal_unfile_buffer(jh);
2523 : 3 : return;
2524 : : }
2525 : :
2526 : : /*
2527 : : * It has been modified by a later transaction: add it to the new
2528 : : * transaction's metadata list.
2529 : : */
2530 : :
2531 : : was_dirty = test_clear_buffer_jbddirty(bh);
2532 : 3 : __jbd2_journal_temp_unlink_buffer(jh);
2533 : : /*
2534 : : * We set b_transaction here because b_next_transaction will inherit
2535 : : * our jh reference and thus __jbd2_journal_file_buffer() must not
2536 : : * take a new one.
2537 : : */
2538 : 3 : WRITE_ONCE(jh->b_transaction, jh->b_next_transaction);
2539 : : WRITE_ONCE(jh->b_next_transaction, NULL);
2540 : 3 : if (buffer_freed(bh))
2541 : : jlist = BJ_Forget;
2542 : 3 : else if (jh->b_modified)
2543 : : jlist = BJ_Metadata;
2544 : : else
2545 : : jlist = BJ_Reserved;
2546 : 3 : __jbd2_journal_file_buffer(jh, jh->b_transaction, jlist);
2547 : 3 : J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
2548 : :
2549 : 3 : if (was_dirty)
2550 : : set_buffer_jbddirty(bh);
2551 : : }
2552 : :
2553 : : /*
2554 : : * __jbd2_journal_refile_buffer() with necessary locking added. We take our
2555 : : * bh reference so that we can safely unlock bh.
2556 : : *
2557 : : * The jh and bh may be freed by this call.
2558 : : */
2559 : 0 : void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh)
2560 : : {
2561 : : struct buffer_head *bh = jh2bh(jh);
2562 : :
2563 : : /* Get reference so that buffer cannot be freed before we unlock it */
2564 : : get_bh(bh);
2565 : : jbd_lock_bh_state(bh);
2566 : : spin_lock(&journal->j_list_lock);
2567 : 0 : __jbd2_journal_refile_buffer(jh);
2568 : : jbd_unlock_bh_state(bh);
2569 : : spin_unlock(&journal->j_list_lock);
2570 : 0 : __brelse(bh);
2571 : 0 : }
2572 : :
2573 : : /*
2574 : : * File inode in the inode list of the handle's transaction
2575 : : */
2576 : 3 : static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
2577 : : unsigned long flags, loff_t start_byte, loff_t end_byte)
2578 : : {
2579 : 3 : transaction_t *transaction = handle->h_transaction;
2580 : : journal_t *journal;
2581 : :
2582 : 3 : if (is_handle_aborted(handle))
2583 : : return -EROFS;
2584 : 3 : journal = transaction->t_journal;
2585 : :
2586 : : jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino,
2587 : : transaction->t_tid);
2588 : :
2589 : : spin_lock(&journal->j_list_lock);
2590 : 3 : jinode->i_flags |= flags;
2591 : :
2592 : 3 : if (jinode->i_dirty_end) {
2593 : 3 : jinode->i_dirty_start = min(jinode->i_dirty_start, start_byte);
2594 : 3 : jinode->i_dirty_end = max(jinode->i_dirty_end, end_byte);
2595 : : } else {
2596 : 3 : jinode->i_dirty_start = start_byte;
2597 : 3 : jinode->i_dirty_end = end_byte;
2598 : : }
2599 : :
2600 : : /* Is inode already attached where we need it? */
2601 : 3 : if (jinode->i_transaction == transaction ||
2602 : 3 : jinode->i_next_transaction == transaction)
2603 : : goto done;
2604 : :
2605 : : /*
2606 : : * We only ever set this variable to 1 so the test is safe. Since
2607 : : * t_need_data_flush is likely to be set, we do the test to save some
2608 : : * cacheline bouncing
2609 : : */
2610 : 3 : if (!transaction->t_need_data_flush)
2611 : 3 : transaction->t_need_data_flush = 1;
2612 : : /* On some different transaction's list - should be
2613 : : * the committing one */
2614 : 3 : if (jinode->i_transaction) {
2615 : 0 : J_ASSERT(jinode->i_next_transaction == NULL);
2616 : 0 : J_ASSERT(jinode->i_transaction ==
2617 : : journal->j_committing_transaction);
2618 : 0 : jinode->i_next_transaction = transaction;
2619 : 0 : goto done;
2620 : : }
2621 : : /* Not on any transaction list... */
2622 : 3 : J_ASSERT(!jinode->i_next_transaction);
2623 : 3 : jinode->i_transaction = transaction;
2624 : 3 : list_add(&jinode->i_list, &transaction->t_inode_list);
2625 : : done:
2626 : : spin_unlock(&journal->j_list_lock);
2627 : :
2628 : 3 : return 0;
2629 : : }
2630 : :
2631 : 0 : int jbd2_journal_inode_ranged_write(handle_t *handle,
2632 : : struct jbd2_inode *jinode, loff_t start_byte, loff_t length)
2633 : : {
2634 : 0 : return jbd2_journal_file_inode(handle, jinode,
2635 : : JI_WRITE_DATA | JI_WAIT_DATA, start_byte,
2636 : 0 : start_byte + length - 1);
2637 : : }
2638 : :
2639 : 3 : int jbd2_journal_inode_ranged_wait(handle_t *handle, struct jbd2_inode *jinode,
2640 : : loff_t start_byte, loff_t length)
2641 : : {
2642 : 3 : return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA,
2643 : 3 : start_byte, start_byte + length - 1);
2644 : : }
2645 : :
2646 : : /*
2647 : : * File truncate and transaction commit interact with each other in a
2648 : : * non-trivial way. If a transaction writing data block A is
2649 : : * committing, we cannot discard the data by truncate until we have
2650 : : * written them. Otherwise if we crashed after the transaction with
2651 : : * write has committed but before the transaction with truncate has
2652 : : * committed, we could see stale data in block A. This function is a
2653 : : * helper to solve this problem. It starts writeout of the truncated
2654 : : * part in case it is in the committing transaction.
2655 : : *
2656 : : * Filesystem code must call this function when inode is journaled in
2657 : : * ordered mode before truncation happens and after the inode has been
2658 : : * placed on orphan list with the new inode size. The second condition
2659 : : * avoids the race that someone writes new data and we start
2660 : : * committing the transaction after this function has been called but
2661 : : * before a transaction for truncate is started (and furthermore it
2662 : : * allows us to optimize the case where the addition to orphan list
2663 : : * happens in the same transaction as write --- we don't have to write
2664 : : * any data in such case).
2665 : : */
2666 : 3 : int jbd2_journal_begin_ordered_truncate(journal_t *journal,
2667 : : struct jbd2_inode *jinode,
2668 : : loff_t new_size)
2669 : : {
2670 : : transaction_t *inode_trans, *commit_trans;
2671 : : int ret = 0;
2672 : :
2673 : : /* This is a quick check to avoid locking if not necessary */
2674 : 3 : if (!jinode->i_transaction)
2675 : : goto out;
2676 : : /* Locks are here just to force reading of recent values, it is
2677 : : * enough that the transaction was not committing before we started
2678 : : * a transaction adding the inode to orphan list */
2679 : 3 : read_lock(&journal->j_state_lock);
2680 : 3 : commit_trans = journal->j_committing_transaction;
2681 : : read_unlock(&journal->j_state_lock);
2682 : : spin_lock(&journal->j_list_lock);
2683 : 3 : inode_trans = jinode->i_transaction;
2684 : : spin_unlock(&journal->j_list_lock);
2685 : 3 : if (inode_trans == commit_trans) {
2686 : 0 : ret = filemap_fdatawrite_range(jinode->i_vfs_inode->i_mapping,
2687 : : new_size, LLONG_MAX);
2688 : 0 : if (ret)
2689 : 0 : jbd2_journal_abort(journal, ret);
2690 : : }
2691 : : out:
2692 : 3 : return ret;
2693 : : }
|