1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * Copyright (C) 2016 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <darrick.wong@oracle.com>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_format.h"
9 #include "xfs_log_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_bit.h"
12 #include "xfs_shared.h"
13 #include "xfs_mount.h"
14 #include "xfs_defer.h"
15 #include "xfs_trans.h"
16 #include "xfs_trans_priv.h"
17 #include "xfs_buf_item.h"
18 #include "xfs_refcount_item.h"
19 #include "xfs_log.h"
20 #include "xfs_refcount.h"
21 
22 
23 kmem_zone_t	*xfs_cui_zone;
24 kmem_zone_t	*xfs_cud_zone;
25 
CUI_ITEM(struct xfs_log_item * lip)26 static inline struct xfs_cui_log_item *CUI_ITEM(struct xfs_log_item *lip)
27 {
28 	return container_of(lip, struct xfs_cui_log_item, cui_item);
29 }
30 
31 void
xfs_cui_item_free(struct xfs_cui_log_item * cuip)32 xfs_cui_item_free(
33 	struct xfs_cui_log_item	*cuip)
34 {
35 	if (cuip->cui_format.cui_nextents > XFS_CUI_MAX_FAST_EXTENTS)
36 		kmem_free(cuip);
37 	else
38 		kmem_zone_free(xfs_cui_zone, cuip);
39 }
40 
41 /*
42  * Freeing the CUI requires that we remove it from the AIL if it has already
43  * been placed there. However, the CUI may not yet have been placed in the AIL
44  * when called by xfs_cui_release() from CUD processing due to the ordering of
45  * committed vs unpin operations in bulk insert operations. Hence the reference
46  * count to ensure only the last caller frees the CUI.
47  */
48 void
xfs_cui_release(struct xfs_cui_log_item * cuip)49 xfs_cui_release(
50 	struct xfs_cui_log_item	*cuip)
51 {
52 	ASSERT(atomic_read(&cuip->cui_refcount) > 0);
53 	if (atomic_dec_and_test(&cuip->cui_refcount)) {
54 		xfs_trans_ail_remove(&cuip->cui_item, SHUTDOWN_LOG_IO_ERROR);
55 		xfs_cui_item_free(cuip);
56 	}
57 }
58 
59 
60 STATIC void
xfs_cui_item_size(struct xfs_log_item * lip,int * nvecs,int * nbytes)61 xfs_cui_item_size(
62 	struct xfs_log_item	*lip,
63 	int			*nvecs,
64 	int			*nbytes)
65 {
66 	struct xfs_cui_log_item	*cuip = CUI_ITEM(lip);
67 
68 	*nvecs += 1;
69 	*nbytes += xfs_cui_log_format_sizeof(cuip->cui_format.cui_nextents);
70 }
71 
72 /*
73  * This is called to fill in the vector of log iovecs for the
74  * given cui log item. We use only 1 iovec, and we point that
75  * at the cui_log_format structure embedded in the cui item.
76  * It is at this point that we assert that all of the extent
77  * slots in the cui item have been filled.
78  */
79 STATIC void
xfs_cui_item_format(struct xfs_log_item * lip,struct xfs_log_vec * lv)80 xfs_cui_item_format(
81 	struct xfs_log_item	*lip,
82 	struct xfs_log_vec	*lv)
83 {
84 	struct xfs_cui_log_item	*cuip = CUI_ITEM(lip);
85 	struct xfs_log_iovec	*vecp = NULL;
86 
87 	ASSERT(atomic_read(&cuip->cui_next_extent) ==
88 			cuip->cui_format.cui_nextents);
89 
90 	cuip->cui_format.cui_type = XFS_LI_CUI;
91 	cuip->cui_format.cui_size = 1;
92 
93 	xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_CUI_FORMAT, &cuip->cui_format,
94 			xfs_cui_log_format_sizeof(cuip->cui_format.cui_nextents));
95 }
96 
97 /*
98  * Pinning has no meaning for an cui item, so just return.
99  */
100 STATIC void
xfs_cui_item_pin(struct xfs_log_item * lip)101 xfs_cui_item_pin(
102 	struct xfs_log_item	*lip)
103 {
104 }
105 
106 /*
107  * The unpin operation is the last place an CUI is manipulated in the log. It is
108  * either inserted in the AIL or aborted in the event of a log I/O error. In
109  * either case, the CUI transaction has been successfully committed to make it
110  * this far. Therefore, we expect whoever committed the CUI to either construct
111  * and commit the CUD or drop the CUD's reference in the event of error. Simply
112  * drop the log's CUI reference now that the log is done with it.
113  */
114 STATIC void
xfs_cui_item_unpin(struct xfs_log_item * lip,int remove)115 xfs_cui_item_unpin(
116 	struct xfs_log_item	*lip,
117 	int			remove)
118 {
119 	struct xfs_cui_log_item	*cuip = CUI_ITEM(lip);
120 
121 	xfs_cui_release(cuip);
122 }
123 
124 /*
125  * CUI items have no locking or pushing.  However, since CUIs are pulled from
126  * the AIL when their corresponding CUDs are committed to disk, their situation
127  * is very similar to being pinned.  Return XFS_ITEM_PINNED so that the caller
128  * will eventually flush the log.  This should help in getting the CUI out of
129  * the AIL.
130  */
131 STATIC uint
xfs_cui_item_push(struct xfs_log_item * lip,struct list_head * buffer_list)132 xfs_cui_item_push(
133 	struct xfs_log_item	*lip,
134 	struct list_head	*buffer_list)
135 {
136 	return XFS_ITEM_PINNED;
137 }
138 
139 /*
140  * The CUI has been either committed or aborted if the transaction has been
141  * cancelled. If the transaction was cancelled, an CUD isn't going to be
142  * constructed and thus we free the CUI here directly.
143  */
144 STATIC void
xfs_cui_item_unlock(struct xfs_log_item * lip)145 xfs_cui_item_unlock(
146 	struct xfs_log_item	*lip)
147 {
148 	if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
149 		xfs_cui_release(CUI_ITEM(lip));
150 }
151 
152 /*
153  * The CUI is logged only once and cannot be moved in the log, so simply return
154  * the lsn at which it's been logged.
155  */
156 STATIC xfs_lsn_t
xfs_cui_item_committed(struct xfs_log_item * lip,xfs_lsn_t lsn)157 xfs_cui_item_committed(
158 	struct xfs_log_item	*lip,
159 	xfs_lsn_t		lsn)
160 {
161 	return lsn;
162 }
163 
164 /*
165  * The CUI dependency tracking op doesn't do squat.  It can't because
166  * it doesn't know where the free extent is coming from.  The dependency
167  * tracking has to be handled by the "enclosing" metadata object.  For
168  * example, for inodes, the inode is locked throughout the extent freeing
169  * so the dependency should be recorded there.
170  */
171 STATIC void
xfs_cui_item_committing(struct xfs_log_item * lip,xfs_lsn_t lsn)172 xfs_cui_item_committing(
173 	struct xfs_log_item	*lip,
174 	xfs_lsn_t		lsn)
175 {
176 }
177 
178 /*
179  * This is the ops vector shared by all cui log items.
180  */
181 static const struct xfs_item_ops xfs_cui_item_ops = {
182 	.iop_size	= xfs_cui_item_size,
183 	.iop_format	= xfs_cui_item_format,
184 	.iop_pin	= xfs_cui_item_pin,
185 	.iop_unpin	= xfs_cui_item_unpin,
186 	.iop_unlock	= xfs_cui_item_unlock,
187 	.iop_committed	= xfs_cui_item_committed,
188 	.iop_push	= xfs_cui_item_push,
189 	.iop_committing = xfs_cui_item_committing,
190 };
191 
192 /*
193  * Allocate and initialize an cui item with the given number of extents.
194  */
195 struct xfs_cui_log_item *
xfs_cui_init(struct xfs_mount * mp,uint nextents)196 xfs_cui_init(
197 	struct xfs_mount		*mp,
198 	uint				nextents)
199 
200 {
201 	struct xfs_cui_log_item		*cuip;
202 
203 	ASSERT(nextents > 0);
204 	if (nextents > XFS_CUI_MAX_FAST_EXTENTS)
205 		cuip = kmem_zalloc(xfs_cui_log_item_sizeof(nextents),
206 				KM_SLEEP);
207 	else
208 		cuip = kmem_zone_zalloc(xfs_cui_zone, KM_SLEEP);
209 
210 	xfs_log_item_init(mp, &cuip->cui_item, XFS_LI_CUI, &xfs_cui_item_ops);
211 	cuip->cui_format.cui_nextents = nextents;
212 	cuip->cui_format.cui_id = (uintptr_t)(void *)cuip;
213 	atomic_set(&cuip->cui_next_extent, 0);
214 	atomic_set(&cuip->cui_refcount, 2);
215 
216 	return cuip;
217 }
218 
CUD_ITEM(struct xfs_log_item * lip)219 static inline struct xfs_cud_log_item *CUD_ITEM(struct xfs_log_item *lip)
220 {
221 	return container_of(lip, struct xfs_cud_log_item, cud_item);
222 }
223 
224 STATIC void
xfs_cud_item_size(struct xfs_log_item * lip,int * nvecs,int * nbytes)225 xfs_cud_item_size(
226 	struct xfs_log_item	*lip,
227 	int			*nvecs,
228 	int			*nbytes)
229 {
230 	*nvecs += 1;
231 	*nbytes += sizeof(struct xfs_cud_log_format);
232 }
233 
234 /*
235  * This is called to fill in the vector of log iovecs for the
236  * given cud log item. We use only 1 iovec, and we point that
237  * at the cud_log_format structure embedded in the cud item.
238  * It is at this point that we assert that all of the extent
239  * slots in the cud item have been filled.
240  */
241 STATIC void
xfs_cud_item_format(struct xfs_log_item * lip,struct xfs_log_vec * lv)242 xfs_cud_item_format(
243 	struct xfs_log_item	*lip,
244 	struct xfs_log_vec	*lv)
245 {
246 	struct xfs_cud_log_item	*cudp = CUD_ITEM(lip);
247 	struct xfs_log_iovec	*vecp = NULL;
248 
249 	cudp->cud_format.cud_type = XFS_LI_CUD;
250 	cudp->cud_format.cud_size = 1;
251 
252 	xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_CUD_FORMAT, &cudp->cud_format,
253 			sizeof(struct xfs_cud_log_format));
254 }
255 
256 /*
257  * Pinning has no meaning for an cud item, so just return.
258  */
259 STATIC void
xfs_cud_item_pin(struct xfs_log_item * lip)260 xfs_cud_item_pin(
261 	struct xfs_log_item	*lip)
262 {
263 }
264 
265 /*
266  * Since pinning has no meaning for an cud item, unpinning does
267  * not either.
268  */
269 STATIC void
xfs_cud_item_unpin(struct xfs_log_item * lip,int remove)270 xfs_cud_item_unpin(
271 	struct xfs_log_item	*lip,
272 	int			remove)
273 {
274 }
275 
276 /*
277  * There isn't much you can do to push on an cud item.  It is simply stuck
278  * waiting for the log to be flushed to disk.
279  */
280 STATIC uint
xfs_cud_item_push(struct xfs_log_item * lip,struct list_head * buffer_list)281 xfs_cud_item_push(
282 	struct xfs_log_item	*lip,
283 	struct list_head	*buffer_list)
284 {
285 	return XFS_ITEM_PINNED;
286 }
287 
288 /*
289  * The CUD is either committed or aborted if the transaction is cancelled. If
290  * the transaction is cancelled, drop our reference to the CUI and free the
291  * CUD.
292  */
293 STATIC void
xfs_cud_item_unlock(struct xfs_log_item * lip)294 xfs_cud_item_unlock(
295 	struct xfs_log_item	*lip)
296 {
297 	struct xfs_cud_log_item	*cudp = CUD_ITEM(lip);
298 
299 	if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) {
300 		xfs_cui_release(cudp->cud_cuip);
301 		kmem_zone_free(xfs_cud_zone, cudp);
302 	}
303 }
304 
305 /*
306  * When the cud item is committed to disk, all we need to do is delete our
307  * reference to our partner cui item and then free ourselves. Since we're
308  * freeing ourselves we must return -1 to keep the transaction code from
309  * further referencing this item.
310  */
311 STATIC xfs_lsn_t
xfs_cud_item_committed(struct xfs_log_item * lip,xfs_lsn_t lsn)312 xfs_cud_item_committed(
313 	struct xfs_log_item	*lip,
314 	xfs_lsn_t		lsn)
315 {
316 	struct xfs_cud_log_item	*cudp = CUD_ITEM(lip);
317 
318 	/*
319 	 * Drop the CUI reference regardless of whether the CUD has been
320 	 * aborted. Once the CUD transaction is constructed, it is the sole
321 	 * responsibility of the CUD to release the CUI (even if the CUI is
322 	 * aborted due to log I/O error).
323 	 */
324 	xfs_cui_release(cudp->cud_cuip);
325 	kmem_zone_free(xfs_cud_zone, cudp);
326 
327 	return (xfs_lsn_t)-1;
328 }
329 
330 /*
331  * The CUD dependency tracking op doesn't do squat.  It can't because
332  * it doesn't know where the free extent is coming from.  The dependency
333  * tracking has to be handled by the "enclosing" metadata object.  For
334  * example, for inodes, the inode is locked throughout the extent freeing
335  * so the dependency should be recorded there.
336  */
337 STATIC void
xfs_cud_item_committing(struct xfs_log_item * lip,xfs_lsn_t lsn)338 xfs_cud_item_committing(
339 	struct xfs_log_item	*lip,
340 	xfs_lsn_t		lsn)
341 {
342 }
343 
344 /*
345  * This is the ops vector shared by all cud log items.
346  */
347 static const struct xfs_item_ops xfs_cud_item_ops = {
348 	.iop_size	= xfs_cud_item_size,
349 	.iop_format	= xfs_cud_item_format,
350 	.iop_pin	= xfs_cud_item_pin,
351 	.iop_unpin	= xfs_cud_item_unpin,
352 	.iop_unlock	= xfs_cud_item_unlock,
353 	.iop_committed	= xfs_cud_item_committed,
354 	.iop_push	= xfs_cud_item_push,
355 	.iop_committing = xfs_cud_item_committing,
356 };
357 
358 /*
359  * Allocate and initialize an cud item with the given number of extents.
360  */
361 struct xfs_cud_log_item *
xfs_cud_init(struct xfs_mount * mp,struct xfs_cui_log_item * cuip)362 xfs_cud_init(
363 	struct xfs_mount		*mp,
364 	struct xfs_cui_log_item		*cuip)
365 
366 {
367 	struct xfs_cud_log_item	*cudp;
368 
369 	cudp = kmem_zone_zalloc(xfs_cud_zone, KM_SLEEP);
370 	xfs_log_item_init(mp, &cudp->cud_item, XFS_LI_CUD, &xfs_cud_item_ops);
371 	cudp->cud_cuip = cuip;
372 	cudp->cud_format.cud_cui_id = cuip->cui_format.cui_id;
373 
374 	return cudp;
375 }
376 
377 /*
378  * Process a refcount update intent item that was recovered from the log.
379  * We need to update the refcountbt.
380  */
381 int
xfs_cui_recover(struct xfs_trans * parent_tp,struct xfs_cui_log_item * cuip)382 xfs_cui_recover(
383 	struct xfs_trans		*parent_tp,
384 	struct xfs_cui_log_item		*cuip)
385 {
386 	int				i;
387 	int				error = 0;
388 	unsigned int			refc_type;
389 	struct xfs_phys_extent		*refc;
390 	xfs_fsblock_t			startblock_fsb;
391 	bool				op_ok;
392 	struct xfs_cud_log_item		*cudp;
393 	struct xfs_trans		*tp;
394 	struct xfs_btree_cur		*rcur = NULL;
395 	enum xfs_refcount_intent_type	type;
396 	xfs_fsblock_t			new_fsb;
397 	xfs_extlen_t			new_len;
398 	struct xfs_bmbt_irec		irec;
399 	bool				requeue_only = false;
400 	struct xfs_mount		*mp = parent_tp->t_mountp;
401 
402 	ASSERT(!test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags));
403 
404 	/*
405 	 * First check the validity of the extents described by the
406 	 * CUI.  If any are bad, then assume that all are bad and
407 	 * just toss the CUI.
408 	 */
409 	for (i = 0; i < cuip->cui_format.cui_nextents; i++) {
410 		refc = &cuip->cui_format.cui_extents[i];
411 		startblock_fsb = XFS_BB_TO_FSB(mp,
412 				   XFS_FSB_TO_DADDR(mp, refc->pe_startblock));
413 		switch (refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK) {
414 		case XFS_REFCOUNT_INCREASE:
415 		case XFS_REFCOUNT_DECREASE:
416 		case XFS_REFCOUNT_ALLOC_COW:
417 		case XFS_REFCOUNT_FREE_COW:
418 			op_ok = true;
419 			break;
420 		default:
421 			op_ok = false;
422 			break;
423 		}
424 		if (!op_ok || startblock_fsb == 0 ||
425 		    refc->pe_len == 0 ||
426 		    startblock_fsb >= mp->m_sb.sb_dblocks ||
427 		    refc->pe_len >= mp->m_sb.sb_agblocks ||
428 		    (refc->pe_flags & ~XFS_REFCOUNT_EXTENT_FLAGS)) {
429 			/*
430 			 * This will pull the CUI from the AIL and
431 			 * free the memory associated with it.
432 			 */
433 			set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags);
434 			xfs_cui_release(cuip);
435 			return -EIO;
436 		}
437 	}
438 
439 	/*
440 	 * Under normal operation, refcount updates are deferred, so we
441 	 * wouldn't be adding them directly to a transaction.  All
442 	 * refcount updates manage reservation usage internally and
443 	 * dynamically by deferring work that won't fit in the
444 	 * transaction.  Normally, any work that needs to be deferred
445 	 * gets attached to the same defer_ops that scheduled the
446 	 * refcount update.  However, we're in log recovery here, so we
447 	 * we use the passed in defer_ops and to finish up any work that
448 	 * doesn't fit.  We need to reserve enough blocks to handle a
449 	 * full btree split on either end of the refcount range.
450 	 */
451 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
452 			mp->m_refc_maxlevels * 2, 0, XFS_TRANS_RESERVE, &tp);
453 	if (error)
454 		return error;
455 	/*
456 	 * Recovery stashes all deferred ops during intent processing and
457 	 * finishes them on completion. Transfer current dfops state to this
458 	 * transaction and transfer the result back before we return.
459 	 */
460 	xfs_defer_move(tp, parent_tp);
461 	cudp = xfs_trans_get_cud(tp, cuip);
462 
463 	for (i = 0; i < cuip->cui_format.cui_nextents; i++) {
464 		refc = &cuip->cui_format.cui_extents[i];
465 		refc_type = refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK;
466 		switch (refc_type) {
467 		case XFS_REFCOUNT_INCREASE:
468 		case XFS_REFCOUNT_DECREASE:
469 		case XFS_REFCOUNT_ALLOC_COW:
470 		case XFS_REFCOUNT_FREE_COW:
471 			type = refc_type;
472 			break;
473 		default:
474 			error = -EFSCORRUPTED;
475 			goto abort_error;
476 		}
477 		if (requeue_only) {
478 			new_fsb = refc->pe_startblock;
479 			new_len = refc->pe_len;
480 		} else
481 			error = xfs_trans_log_finish_refcount_update(tp, cudp,
482 				type, refc->pe_startblock, refc->pe_len,
483 				&new_fsb, &new_len, &rcur);
484 		if (error)
485 			goto abort_error;
486 
487 		/* Requeue what we didn't finish. */
488 		if (new_len > 0) {
489 			irec.br_startblock = new_fsb;
490 			irec.br_blockcount = new_len;
491 			switch (type) {
492 			case XFS_REFCOUNT_INCREASE:
493 				error = xfs_refcount_increase_extent(tp, &irec);
494 				break;
495 			case XFS_REFCOUNT_DECREASE:
496 				error = xfs_refcount_decrease_extent(tp, &irec);
497 				break;
498 			case XFS_REFCOUNT_ALLOC_COW:
499 				error = xfs_refcount_alloc_cow_extent(tp,
500 						irec.br_startblock,
501 						irec.br_blockcount);
502 				break;
503 			case XFS_REFCOUNT_FREE_COW:
504 				error = xfs_refcount_free_cow_extent(tp,
505 						irec.br_startblock,
506 						irec.br_blockcount);
507 				break;
508 			default:
509 				ASSERT(0);
510 			}
511 			if (error)
512 				goto abort_error;
513 			requeue_only = true;
514 		}
515 	}
516 
517 	xfs_refcount_finish_one_cleanup(tp, rcur, error);
518 	set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags);
519 	xfs_defer_move(parent_tp, tp);
520 	error = xfs_trans_commit(tp);
521 	return error;
522 
523 abort_error:
524 	xfs_refcount_finish_one_cleanup(tp, rcur, error);
525 	xfs_defer_move(parent_tp, tp);
526 	xfs_trans_cancel(tp);
527 	return error;
528 }
529