| xj | b04a402 | 2021-11-25 15:01:52 +0800 | [diff] [blame] | 1 | /* SPDX-License-Identifier: GPL-2.0 */ | 
|  | 2 | #ifndef _RAID10_H | 
|  | 3 | #define _RAID10_H | 
|  | 4 |  | 
|  | 5 | /* Note: raid10_info.rdev can be set to NULL asynchronously by | 
|  | 6 | * raid10_remove_disk. | 
|  | 7 | * There are three safe ways to access raid10_info.rdev. | 
|  | 8 | * 1/ when holding mddev->reconfig_mutex | 
|  | 9 | * 2/ when resync/recovery/reshape is known to be happening - i.e. in code | 
|  | 10 | *    that is called as part of performing resync/recovery/reshape. | 
|  | 11 | * 3/ while holding rcu_read_lock(), use rcu_dereference to get the pointer | 
|  | 12 | *    and if it is non-NULL, increment rdev->nr_pending before dropping the | 
|  | 13 | *    RCU lock. | 
|  | 14 | * When .rdev is set to NULL, the nr_pending count checked again and if it has | 
|  | 15 | * been incremented, the pointer is put back in .rdev. | 
|  | 16 | */ | 
|  | 17 |  | 
|  | 18 | struct raid10_info { | 
|  | 19 | struct md_rdev	*rdev, *replacement; | 
|  | 20 | sector_t	head_position; | 
|  | 21 | int		recovery_disabled;	/* matches | 
|  | 22 | * mddev->recovery_disabled | 
|  | 23 | * when we shouldn't try | 
|  | 24 | * recovering this device. | 
|  | 25 | */ | 
|  | 26 | }; | 
|  | 27 |  | 
|  | 28 | struct r10conf { | 
|  | 29 | struct mddev		*mddev; | 
|  | 30 | struct raid10_info	*mirrors; | 
|  | 31 | struct raid10_info	*mirrors_new, *mirrors_old; | 
|  | 32 | spinlock_t		device_lock; | 
|  | 33 |  | 
|  | 34 | /* geometry */ | 
|  | 35 | struct geom { | 
|  | 36 | int		raid_disks; | 
|  | 37 | int		near_copies;  /* number of copies laid out | 
|  | 38 | * raid0 style */ | 
|  | 39 | int		far_copies;   /* number of copies laid out | 
|  | 40 | * at large strides across drives | 
|  | 41 | */ | 
|  | 42 | int		far_offset;   /* far_copies are offset by 1 | 
|  | 43 | * stripe instead of many | 
|  | 44 | */ | 
|  | 45 | sector_t	stride;	      /* distance between far copies. | 
|  | 46 | * This is size / far_copies unless | 
|  | 47 | * far_offset, in which case it is | 
|  | 48 | * 1 stripe. | 
|  | 49 | */ | 
|  | 50 | int             far_set_size; /* The number of devices in a set, | 
|  | 51 | * where a 'set' are devices that | 
|  | 52 | * contain far/offset copies of | 
|  | 53 | * each other. | 
|  | 54 | */ | 
|  | 55 | int		chunk_shift; /* shift from chunks to sectors */ | 
|  | 56 | sector_t	chunk_mask; | 
|  | 57 | } prev, geo; | 
|  | 58 | int			copies;	      /* near_copies * far_copies. | 
|  | 59 | * must be <= raid_disks | 
|  | 60 | */ | 
|  | 61 |  | 
|  | 62 | sector_t		dev_sectors;  /* temp copy of | 
|  | 63 | * mddev->dev_sectors */ | 
|  | 64 | sector_t		reshape_progress; | 
|  | 65 | sector_t		reshape_safe; | 
|  | 66 | unsigned long		reshape_checkpoint; | 
|  | 67 | sector_t		offset_diff; | 
|  | 68 |  | 
|  | 69 | struct list_head	retry_list; | 
|  | 70 | /* A separate list of r1bio which just need raid_end_bio_io called. | 
|  | 71 | * This mustn't happen for writes which had any errors if the superblock | 
|  | 72 | * needs to be written. | 
|  | 73 | */ | 
|  | 74 | struct list_head	bio_end_io_list; | 
|  | 75 |  | 
|  | 76 | /* queue pending writes and submit them on unplug */ | 
|  | 77 | struct bio_list		pending_bio_list; | 
|  | 78 | int			pending_count; | 
|  | 79 |  | 
|  | 80 | spinlock_t		resync_lock; | 
|  | 81 | atomic_t		nr_pending; | 
|  | 82 | int			nr_waiting; | 
|  | 83 | int			nr_queued; | 
|  | 84 | int			barrier; | 
|  | 85 | int			array_freeze_pending; | 
|  | 86 | sector_t		next_resync; | 
|  | 87 | int			fullsync;  /* set to 1 if a full sync is needed, | 
|  | 88 | * (fresh device added). | 
|  | 89 | * Cleared when a sync completes. | 
|  | 90 | */ | 
|  | 91 | int			have_replacement; /* There is at least one | 
|  | 92 | * replacement device. | 
|  | 93 | */ | 
|  | 94 | wait_queue_head_t	wait_barrier; | 
|  | 95 |  | 
|  | 96 | mempool_t		r10bio_pool; | 
|  | 97 | mempool_t		r10buf_pool; | 
|  | 98 | struct page		*tmppage; | 
|  | 99 | struct bio_set		bio_split; | 
|  | 100 |  | 
|  | 101 | /* When taking over an array from a different personality, we store | 
|  | 102 | * the new thread here until we fully activate the array. | 
|  | 103 | */ | 
|  | 104 | struct md_thread	*thread; | 
|  | 105 |  | 
|  | 106 | /* | 
|  | 107 | * Keep track of cluster resync window to send to other nodes. | 
|  | 108 | */ | 
|  | 109 | sector_t		cluster_sync_low; | 
|  | 110 | sector_t		cluster_sync_high; | 
|  | 111 | }; | 
|  | 112 |  | 
|  | 113 | /* | 
|  | 114 | * this is our 'private' RAID10 bio. | 
|  | 115 | * | 
|  | 116 | * it contains information about what kind of IO operations were started | 
|  | 117 | * for this RAID10 operation, and about their status: | 
|  | 118 | */ | 
|  | 119 |  | 
|  | 120 | struct r10bio { | 
|  | 121 | atomic_t		remaining; /* 'have we finished' count, | 
|  | 122 | * used from IRQ handlers | 
|  | 123 | */ | 
|  | 124 | sector_t		sector;	/* virtual sector number */ | 
|  | 125 | int			sectors; | 
|  | 126 | unsigned long		state; | 
|  | 127 | struct mddev		*mddev; | 
|  | 128 | /* | 
|  | 129 | * original bio going to /dev/mdx | 
|  | 130 | */ | 
|  | 131 | struct bio		*master_bio; | 
|  | 132 | /* | 
|  | 133 | * if the IO is in READ direction, then this is where we read | 
|  | 134 | */ | 
|  | 135 | int			read_slot; | 
|  | 136 |  | 
|  | 137 | struct list_head	retry_list; | 
|  | 138 | /* | 
|  | 139 | * if the IO is in WRITE direction, then multiple bios are used, | 
|  | 140 | * one for each copy. | 
|  | 141 | * When resyncing we also use one for each copy. | 
|  | 142 | * When reconstructing, we use 2 bios, one for read, one for write. | 
|  | 143 | * We choose the number when they are allocated. | 
|  | 144 | * We sometimes need an extra bio to write to the replacement. | 
|  | 145 | */ | 
|  | 146 | struct r10dev { | 
|  | 147 | struct bio	*bio; | 
|  | 148 | union { | 
|  | 149 | struct bio	*repl_bio; /* used for resync and | 
|  | 150 | * writes */ | 
|  | 151 | struct md_rdev	*rdev;	   /* used for reads | 
|  | 152 | * (read_slot >= 0) */ | 
|  | 153 | }; | 
|  | 154 | sector_t	addr; | 
|  | 155 | int		devnum; | 
|  | 156 | } devs[0]; | 
|  | 157 | }; | 
|  | 158 |  | 
|  | 159 | /* bits for r10bio.state */ | 
|  | 160 | enum r10bio_state { | 
|  | 161 | R10BIO_Uptodate, | 
|  | 162 | R10BIO_IsSync, | 
|  | 163 | R10BIO_IsRecover, | 
|  | 164 | R10BIO_IsReshape, | 
|  | 165 | R10BIO_Degraded, | 
|  | 166 | /* Set ReadError on bios that experience a read error | 
|  | 167 | * so that raid10d knows what to do with them. | 
|  | 168 | */ | 
|  | 169 | R10BIO_ReadError, | 
|  | 170 | /* If a write for this request means we can clear some | 
|  | 171 | * known-bad-block records, we set this flag. | 
|  | 172 | */ | 
|  | 173 | R10BIO_MadeGood, | 
|  | 174 | R10BIO_WriteError, | 
|  | 175 | /* During a reshape we might be performing IO on the | 
|  | 176 | * 'previous' part of the array, in which case this | 
|  | 177 | * flag is set | 
|  | 178 | */ | 
|  | 179 | R10BIO_Previous, | 
|  | 180 | /* failfast devices did receive failfast requests. */ | 
|  | 181 | R10BIO_FailFast, | 
|  | 182 | }; | 
|  | 183 | #endif |