1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3 * Copyright (C) 2016 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_format.h"
9 #include "xfs_log_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_bit.h"
12 #include "xfs_shared.h"
13 #include "xfs_mount.h"
14 #include "xfs_defer.h"
15 #include "xfs_trans.h"
16 #include "xfs_trans_priv.h"
17 #include "xfs_buf_item.h"
18 #include "xfs_refcount_item.h"
19 #include "xfs_log.h"
20 #include "xfs_refcount.h"
21
22
23 kmem_zone_t *xfs_cui_zone;
24 kmem_zone_t *xfs_cud_zone;
25
CUI_ITEM(struct xfs_log_item * lip)26 static inline struct xfs_cui_log_item *CUI_ITEM(struct xfs_log_item *lip)
27 {
28 return container_of(lip, struct xfs_cui_log_item, cui_item);
29 }
30
31 void
xfs_cui_item_free(struct xfs_cui_log_item * cuip)32 xfs_cui_item_free(
33 struct xfs_cui_log_item *cuip)
34 {
35 if (cuip->cui_format.cui_nextents > XFS_CUI_MAX_FAST_EXTENTS)
36 kmem_free(cuip);
37 else
38 kmem_zone_free(xfs_cui_zone, cuip);
39 }
40
41 /*
42 * Freeing the CUI requires that we remove it from the AIL if it has already
43 * been placed there. However, the CUI may not yet have been placed in the AIL
44 * when called by xfs_cui_release() from CUD processing due to the ordering of
45 * committed vs unpin operations in bulk insert operations. Hence the reference
46 * count to ensure only the last caller frees the CUI.
47 */
48 void
xfs_cui_release(struct xfs_cui_log_item * cuip)49 xfs_cui_release(
50 struct xfs_cui_log_item *cuip)
51 {
52 ASSERT(atomic_read(&cuip->cui_refcount) > 0);
53 if (atomic_dec_and_test(&cuip->cui_refcount)) {
54 xfs_trans_ail_remove(&cuip->cui_item, SHUTDOWN_LOG_IO_ERROR);
55 xfs_cui_item_free(cuip);
56 }
57 }
58
59
60 STATIC void
xfs_cui_item_size(struct xfs_log_item * lip,int * nvecs,int * nbytes)61 xfs_cui_item_size(
62 struct xfs_log_item *lip,
63 int *nvecs,
64 int *nbytes)
65 {
66 struct xfs_cui_log_item *cuip = CUI_ITEM(lip);
67
68 *nvecs += 1;
69 *nbytes += xfs_cui_log_format_sizeof(cuip->cui_format.cui_nextents);
70 }
71
72 /*
73 * This is called to fill in the vector of log iovecs for the
74 * given cui log item. We use only 1 iovec, and we point that
75 * at the cui_log_format structure embedded in the cui item.
76 * It is at this point that we assert that all of the extent
77 * slots in the cui item have been filled.
78 */
79 STATIC void
xfs_cui_item_format(struct xfs_log_item * lip,struct xfs_log_vec * lv)80 xfs_cui_item_format(
81 struct xfs_log_item *lip,
82 struct xfs_log_vec *lv)
83 {
84 struct xfs_cui_log_item *cuip = CUI_ITEM(lip);
85 struct xfs_log_iovec *vecp = NULL;
86
87 ASSERT(atomic_read(&cuip->cui_next_extent) ==
88 cuip->cui_format.cui_nextents);
89
90 cuip->cui_format.cui_type = XFS_LI_CUI;
91 cuip->cui_format.cui_size = 1;
92
93 xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_CUI_FORMAT, &cuip->cui_format,
94 xfs_cui_log_format_sizeof(cuip->cui_format.cui_nextents));
95 }
96
97 /*
98 * Pinning has no meaning for an cui item, so just return.
99 */
100 STATIC void
xfs_cui_item_pin(struct xfs_log_item * lip)101 xfs_cui_item_pin(
102 struct xfs_log_item *lip)
103 {
104 }
105
106 /*
107 * The unpin operation is the last place an CUI is manipulated in the log. It is
108 * either inserted in the AIL or aborted in the event of a log I/O error. In
109 * either case, the CUI transaction has been successfully committed to make it
110 * this far. Therefore, we expect whoever committed the CUI to either construct
111 * and commit the CUD or drop the CUD's reference in the event of error. Simply
112 * drop the log's CUI reference now that the log is done with it.
113 */
114 STATIC void
xfs_cui_item_unpin(struct xfs_log_item * lip,int remove)115 xfs_cui_item_unpin(
116 struct xfs_log_item *lip,
117 int remove)
118 {
119 struct xfs_cui_log_item *cuip = CUI_ITEM(lip);
120
121 xfs_cui_release(cuip);
122 }
123
124 /*
125 * CUI items have no locking or pushing. However, since CUIs are pulled from
126 * the AIL when their corresponding CUDs are committed to disk, their situation
127 * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller
128 * will eventually flush the log. This should help in getting the CUI out of
129 * the AIL.
130 */
131 STATIC uint
xfs_cui_item_push(struct xfs_log_item * lip,struct list_head * buffer_list)132 xfs_cui_item_push(
133 struct xfs_log_item *lip,
134 struct list_head *buffer_list)
135 {
136 return XFS_ITEM_PINNED;
137 }
138
139 /*
140 * The CUI has been either committed or aborted if the transaction has been
141 * cancelled. If the transaction was cancelled, an CUD isn't going to be
142 * constructed and thus we free the CUI here directly.
143 */
144 STATIC void
xfs_cui_item_unlock(struct xfs_log_item * lip)145 xfs_cui_item_unlock(
146 struct xfs_log_item *lip)
147 {
148 if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
149 xfs_cui_release(CUI_ITEM(lip));
150 }
151
152 /*
153 * The CUI is logged only once and cannot be moved in the log, so simply return
154 * the lsn at which it's been logged.
155 */
156 STATIC xfs_lsn_t
xfs_cui_item_committed(struct xfs_log_item * lip,xfs_lsn_t lsn)157 xfs_cui_item_committed(
158 struct xfs_log_item *lip,
159 xfs_lsn_t lsn)
160 {
161 return lsn;
162 }
163
164 /*
165 * The CUI dependency tracking op doesn't do squat. It can't because
166 * it doesn't know where the free extent is coming from. The dependency
167 * tracking has to be handled by the "enclosing" metadata object. For
168 * example, for inodes, the inode is locked throughout the extent freeing
169 * so the dependency should be recorded there.
170 */
171 STATIC void
xfs_cui_item_committing(struct xfs_log_item * lip,xfs_lsn_t lsn)172 xfs_cui_item_committing(
173 struct xfs_log_item *lip,
174 xfs_lsn_t lsn)
175 {
176 }
177
178 /*
179 * This is the ops vector shared by all cui log items.
180 */
181 static const struct xfs_item_ops xfs_cui_item_ops = {
182 .iop_size = xfs_cui_item_size,
183 .iop_format = xfs_cui_item_format,
184 .iop_pin = xfs_cui_item_pin,
185 .iop_unpin = xfs_cui_item_unpin,
186 .iop_unlock = xfs_cui_item_unlock,
187 .iop_committed = xfs_cui_item_committed,
188 .iop_push = xfs_cui_item_push,
189 .iop_committing = xfs_cui_item_committing,
190 };
191
192 /*
193 * Allocate and initialize an cui item with the given number of extents.
194 */
195 struct xfs_cui_log_item *
xfs_cui_init(struct xfs_mount * mp,uint nextents)196 xfs_cui_init(
197 struct xfs_mount *mp,
198 uint nextents)
199
200 {
201 struct xfs_cui_log_item *cuip;
202
203 ASSERT(nextents > 0);
204 if (nextents > XFS_CUI_MAX_FAST_EXTENTS)
205 cuip = kmem_zalloc(xfs_cui_log_item_sizeof(nextents),
206 KM_SLEEP);
207 else
208 cuip = kmem_zone_zalloc(xfs_cui_zone, KM_SLEEP);
209
210 xfs_log_item_init(mp, &cuip->cui_item, XFS_LI_CUI, &xfs_cui_item_ops);
211 cuip->cui_format.cui_nextents = nextents;
212 cuip->cui_format.cui_id = (uintptr_t)(void *)cuip;
213 atomic_set(&cuip->cui_next_extent, 0);
214 atomic_set(&cuip->cui_refcount, 2);
215
216 return cuip;
217 }
218
CUD_ITEM(struct xfs_log_item * lip)219 static inline struct xfs_cud_log_item *CUD_ITEM(struct xfs_log_item *lip)
220 {
221 return container_of(lip, struct xfs_cud_log_item, cud_item);
222 }
223
224 STATIC void
xfs_cud_item_size(struct xfs_log_item * lip,int * nvecs,int * nbytes)225 xfs_cud_item_size(
226 struct xfs_log_item *lip,
227 int *nvecs,
228 int *nbytes)
229 {
230 *nvecs += 1;
231 *nbytes += sizeof(struct xfs_cud_log_format);
232 }
233
234 /*
235 * This is called to fill in the vector of log iovecs for the
236 * given cud log item. We use only 1 iovec, and we point that
237 * at the cud_log_format structure embedded in the cud item.
238 * It is at this point that we assert that all of the extent
239 * slots in the cud item have been filled.
240 */
241 STATIC void
xfs_cud_item_format(struct xfs_log_item * lip,struct xfs_log_vec * lv)242 xfs_cud_item_format(
243 struct xfs_log_item *lip,
244 struct xfs_log_vec *lv)
245 {
246 struct xfs_cud_log_item *cudp = CUD_ITEM(lip);
247 struct xfs_log_iovec *vecp = NULL;
248
249 cudp->cud_format.cud_type = XFS_LI_CUD;
250 cudp->cud_format.cud_size = 1;
251
252 xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_CUD_FORMAT, &cudp->cud_format,
253 sizeof(struct xfs_cud_log_format));
254 }
255
256 /*
257 * Pinning has no meaning for an cud item, so just return.
258 */
259 STATIC void
xfs_cud_item_pin(struct xfs_log_item * lip)260 xfs_cud_item_pin(
261 struct xfs_log_item *lip)
262 {
263 }
264
265 /*
266 * Since pinning has no meaning for an cud item, unpinning does
267 * not either.
268 */
269 STATIC void
xfs_cud_item_unpin(struct xfs_log_item * lip,int remove)270 xfs_cud_item_unpin(
271 struct xfs_log_item *lip,
272 int remove)
273 {
274 }
275
276 /*
277 * There isn't much you can do to push on an cud item. It is simply stuck
278 * waiting for the log to be flushed to disk.
279 */
280 STATIC uint
xfs_cud_item_push(struct xfs_log_item * lip,struct list_head * buffer_list)281 xfs_cud_item_push(
282 struct xfs_log_item *lip,
283 struct list_head *buffer_list)
284 {
285 return XFS_ITEM_PINNED;
286 }
287
288 /*
289 * The CUD is either committed or aborted if the transaction is cancelled. If
290 * the transaction is cancelled, drop our reference to the CUI and free the
291 * CUD.
292 */
293 STATIC void
xfs_cud_item_unlock(struct xfs_log_item * lip)294 xfs_cud_item_unlock(
295 struct xfs_log_item *lip)
296 {
297 struct xfs_cud_log_item *cudp = CUD_ITEM(lip);
298
299 if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) {
300 xfs_cui_release(cudp->cud_cuip);
301 kmem_zone_free(xfs_cud_zone, cudp);
302 }
303 }
304
305 /*
306 * When the cud item is committed to disk, all we need to do is delete our
307 * reference to our partner cui item and then free ourselves. Since we're
308 * freeing ourselves we must return -1 to keep the transaction code from
309 * further referencing this item.
310 */
311 STATIC xfs_lsn_t
xfs_cud_item_committed(struct xfs_log_item * lip,xfs_lsn_t lsn)312 xfs_cud_item_committed(
313 struct xfs_log_item *lip,
314 xfs_lsn_t lsn)
315 {
316 struct xfs_cud_log_item *cudp = CUD_ITEM(lip);
317
318 /*
319 * Drop the CUI reference regardless of whether the CUD has been
320 * aborted. Once the CUD transaction is constructed, it is the sole
321 * responsibility of the CUD to release the CUI (even if the CUI is
322 * aborted due to log I/O error).
323 */
324 xfs_cui_release(cudp->cud_cuip);
325 kmem_zone_free(xfs_cud_zone, cudp);
326
327 return (xfs_lsn_t)-1;
328 }
329
330 /*
331 * The CUD dependency tracking op doesn't do squat. It can't because
332 * it doesn't know where the free extent is coming from. The dependency
333 * tracking has to be handled by the "enclosing" metadata object. For
334 * example, for inodes, the inode is locked throughout the extent freeing
335 * so the dependency should be recorded there.
336 */
337 STATIC void
xfs_cud_item_committing(struct xfs_log_item * lip,xfs_lsn_t lsn)338 xfs_cud_item_committing(
339 struct xfs_log_item *lip,
340 xfs_lsn_t lsn)
341 {
342 }
343
344 /*
345 * This is the ops vector shared by all cud log items.
346 */
347 static const struct xfs_item_ops xfs_cud_item_ops = {
348 .iop_size = xfs_cud_item_size,
349 .iop_format = xfs_cud_item_format,
350 .iop_pin = xfs_cud_item_pin,
351 .iop_unpin = xfs_cud_item_unpin,
352 .iop_unlock = xfs_cud_item_unlock,
353 .iop_committed = xfs_cud_item_committed,
354 .iop_push = xfs_cud_item_push,
355 .iop_committing = xfs_cud_item_committing,
356 };
357
358 /*
359 * Allocate and initialize an cud item with the given number of extents.
360 */
361 struct xfs_cud_log_item *
xfs_cud_init(struct xfs_mount * mp,struct xfs_cui_log_item * cuip)362 xfs_cud_init(
363 struct xfs_mount *mp,
364 struct xfs_cui_log_item *cuip)
365
366 {
367 struct xfs_cud_log_item *cudp;
368
369 cudp = kmem_zone_zalloc(xfs_cud_zone, KM_SLEEP);
370 xfs_log_item_init(mp, &cudp->cud_item, XFS_LI_CUD, &xfs_cud_item_ops);
371 cudp->cud_cuip = cuip;
372 cudp->cud_format.cud_cui_id = cuip->cui_format.cui_id;
373
374 return cudp;
375 }
376
377 /*
378 * Process a refcount update intent item that was recovered from the log.
379 * We need to update the refcountbt.
380 */
381 int
xfs_cui_recover(struct xfs_trans * parent_tp,struct xfs_cui_log_item * cuip)382 xfs_cui_recover(
383 struct xfs_trans *parent_tp,
384 struct xfs_cui_log_item *cuip)
385 {
386 int i;
387 int error = 0;
388 unsigned int refc_type;
389 struct xfs_phys_extent *refc;
390 xfs_fsblock_t startblock_fsb;
391 bool op_ok;
392 struct xfs_cud_log_item *cudp;
393 struct xfs_trans *tp;
394 struct xfs_btree_cur *rcur = NULL;
395 enum xfs_refcount_intent_type type;
396 xfs_fsblock_t new_fsb;
397 xfs_extlen_t new_len;
398 struct xfs_bmbt_irec irec;
399 bool requeue_only = false;
400 struct xfs_mount *mp = parent_tp->t_mountp;
401
402 ASSERT(!test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags));
403
404 /*
405 * First check the validity of the extents described by the
406 * CUI. If any are bad, then assume that all are bad and
407 * just toss the CUI.
408 */
409 for (i = 0; i < cuip->cui_format.cui_nextents; i++) {
410 refc = &cuip->cui_format.cui_extents[i];
411 startblock_fsb = XFS_BB_TO_FSB(mp,
412 XFS_FSB_TO_DADDR(mp, refc->pe_startblock));
413 switch (refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK) {
414 case XFS_REFCOUNT_INCREASE:
415 case XFS_REFCOUNT_DECREASE:
416 case XFS_REFCOUNT_ALLOC_COW:
417 case XFS_REFCOUNT_FREE_COW:
418 op_ok = true;
419 break;
420 default:
421 op_ok = false;
422 break;
423 }
424 if (!op_ok || startblock_fsb == 0 ||
425 refc->pe_len == 0 ||
426 startblock_fsb >= mp->m_sb.sb_dblocks ||
427 refc->pe_len >= mp->m_sb.sb_agblocks ||
428 (refc->pe_flags & ~XFS_REFCOUNT_EXTENT_FLAGS)) {
429 /*
430 * This will pull the CUI from the AIL and
431 * free the memory associated with it.
432 */
433 set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags);
434 xfs_cui_release(cuip);
435 return -EIO;
436 }
437 }
438
439 /*
440 * Under normal operation, refcount updates are deferred, so we
441 * wouldn't be adding them directly to a transaction. All
442 * refcount updates manage reservation usage internally and
443 * dynamically by deferring work that won't fit in the
444 * transaction. Normally, any work that needs to be deferred
445 * gets attached to the same defer_ops that scheduled the
446 * refcount update. However, we're in log recovery here, so we
447 * we use the passed in defer_ops and to finish up any work that
448 * doesn't fit. We need to reserve enough blocks to handle a
449 * full btree split on either end of the refcount range.
450 */
451 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
452 mp->m_refc_maxlevels * 2, 0, XFS_TRANS_RESERVE, &tp);
453 if (error)
454 return error;
455 /*
456 * Recovery stashes all deferred ops during intent processing and
457 * finishes them on completion. Transfer current dfops state to this
458 * transaction and transfer the result back before we return.
459 */
460 xfs_defer_move(tp, parent_tp);
461 cudp = xfs_trans_get_cud(tp, cuip);
462
463 for (i = 0; i < cuip->cui_format.cui_nextents; i++) {
464 refc = &cuip->cui_format.cui_extents[i];
465 refc_type = refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK;
466 switch (refc_type) {
467 case XFS_REFCOUNT_INCREASE:
468 case XFS_REFCOUNT_DECREASE:
469 case XFS_REFCOUNT_ALLOC_COW:
470 case XFS_REFCOUNT_FREE_COW:
471 type = refc_type;
472 break;
473 default:
474 error = -EFSCORRUPTED;
475 goto abort_error;
476 }
477 if (requeue_only) {
478 new_fsb = refc->pe_startblock;
479 new_len = refc->pe_len;
480 } else
481 error = xfs_trans_log_finish_refcount_update(tp, cudp,
482 type, refc->pe_startblock, refc->pe_len,
483 &new_fsb, &new_len, &rcur);
484 if (error)
485 goto abort_error;
486
487 /* Requeue what we didn't finish. */
488 if (new_len > 0) {
489 irec.br_startblock = new_fsb;
490 irec.br_blockcount = new_len;
491 switch (type) {
492 case XFS_REFCOUNT_INCREASE:
493 error = xfs_refcount_increase_extent(tp, &irec);
494 break;
495 case XFS_REFCOUNT_DECREASE:
496 error = xfs_refcount_decrease_extent(tp, &irec);
497 break;
498 case XFS_REFCOUNT_ALLOC_COW:
499 error = xfs_refcount_alloc_cow_extent(tp,
500 irec.br_startblock,
501 irec.br_blockcount);
502 break;
503 case XFS_REFCOUNT_FREE_COW:
504 error = xfs_refcount_free_cow_extent(tp,
505 irec.br_startblock,
506 irec.br_blockcount);
507 break;
508 default:
509 ASSERT(0);
510 }
511 if (error)
512 goto abort_error;
513 requeue_only = true;
514 }
515 }
516
517 xfs_refcount_finish_one_cleanup(tp, rcur, error);
518 set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags);
519 xfs_defer_move(parent_tp, tp);
520 error = xfs_trans_commit(tp);
521 return error;
522
523 abort_error:
524 xfs_refcount_finish_one_cleanup(tp, rcur, error);
525 xfs_defer_move(parent_tp, tp);
526 xfs_trans_cancel(tp);
527 return error;
528 }
529