blob: 933adc5a2637bcf3fc6d8fca4dff9a9b8c207378 [file] [log] [blame]
yuezonghe824eb0c2024-06-27 02:32:26 -07001/*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License version 2
4 * as published by the Free Software Foundation; or, when distributed
5 * separately from the Linux kernel or incorporated into other
6 * software packages, subject to the following license:
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this source file (the "Software"), to deal in the Software without
10 * restriction, including without limitation the rights to use, copy, modify,
11 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
12 * and to permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * IN THE SOFTWARE.
25 */
26
27#ifndef __XEN_BLKIF__BACKEND__COMMON_H__
28#define __XEN_BLKIF__BACKEND__COMMON_H__
29
30#include <linux/module.h>
31#include <linux/interrupt.h>
32#include <linux/slab.h>
33#include <linux/blkdev.h>
34#include <linux/vmalloc.h>
35#include <linux/wait.h>
36#include <linux/io.h>
37#include <asm/setup.h>
38#include <asm/pgalloc.h>
39#include <asm/hypervisor.h>
40#include <xen/grant_table.h>
41#include <xen/xenbus.h>
42#include <xen/interface/io/ring.h>
43#include <xen/interface/io/blkif.h>
44#include <xen/interface/io/protocols.h>
45
46#define DRV_PFX "xen-blkback:"
47#define DPRINTK(fmt, args...) \
48 pr_debug(DRV_PFX "(%s:%d) " fmt ".\n", \
49 __func__, __LINE__, ##args)
50
51
52/* Not a real protocol. Used to generate ring structs which contain
53 * the elements common to all protocols only. This way we get a
54 * compiler-checkable way to use common struct elements, so we can
55 * avoid using switch(protocol) in a number of places. */
56struct blkif_common_request {
57 char dummy;
58};
59struct blkif_common_response {
60 char dummy;
61};
62
63struct blkif_x86_32_request_rw {
64 uint8_t nr_segments; /* number of segments */
65 blkif_vdev_t handle; /* only for read/write requests */
66 uint64_t id; /* private guest value, echoed in resp */
67 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
68 struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
69} __attribute__((__packed__));
70
71struct blkif_x86_32_request_discard {
72 uint8_t flag; /* BLKIF_DISCARD_SECURE or zero */
73 blkif_vdev_t _pad1; /* was "handle" for read/write requests */
74 uint64_t id; /* private guest value, echoed in resp */
75 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
76 uint64_t nr_sectors;
77} __attribute__((__packed__));
78
79struct blkif_x86_32_request_other {
80 uint8_t _pad1;
81 blkif_vdev_t _pad2;
82 uint64_t id; /* private guest value, echoed in resp */
83} __attribute__((__packed__));
84
85struct blkif_x86_32_request {
86 uint8_t operation; /* BLKIF_OP_??? */
87 union {
88 struct blkif_x86_32_request_rw rw;
89 struct blkif_x86_32_request_discard discard;
90 struct blkif_x86_32_request_other other;
91 } u;
92} __attribute__((__packed__));
93
94/* i386 protocol version */
95#pragma pack(push, 4)
96struct blkif_x86_32_response {
97 uint64_t id; /* copied from request */
98 uint8_t operation; /* copied from request */
99 int16_t status; /* BLKIF_RSP_??? */
100};
101#pragma pack(pop)
102/* x86_64 protocol version */
103
104struct blkif_x86_64_request_rw {
105 uint8_t nr_segments; /* number of segments */
106 blkif_vdev_t handle; /* only for read/write requests */
107 uint32_t _pad1; /* offsetof(blkif_reqest..,u.rw.id)==8 */
108 uint64_t id;
109 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
110 struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
111} __attribute__((__packed__));
112
113struct blkif_x86_64_request_discard {
114 uint8_t flag; /* BLKIF_DISCARD_SECURE or zero */
115 blkif_vdev_t _pad1; /* was "handle" for read/write requests */
116 uint32_t _pad2; /* offsetof(blkif_..,u.discard.id)==8 */
117 uint64_t id;
118 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
119 uint64_t nr_sectors;
120} __attribute__((__packed__));
121
122struct blkif_x86_64_request_other {
123 uint8_t _pad1;
124 blkif_vdev_t _pad2;
125 uint32_t _pad3; /* offsetof(blkif_..,u.discard.id)==8 */
126 uint64_t id; /* private guest value, echoed in resp */
127} __attribute__((__packed__));
128
129struct blkif_x86_64_request {
130 uint8_t operation; /* BLKIF_OP_??? */
131 union {
132 struct blkif_x86_64_request_rw rw;
133 struct blkif_x86_64_request_discard discard;
134 struct blkif_x86_64_request_other other;
135 } u;
136} __attribute__((__packed__));
137
138struct blkif_x86_64_response {
139 uint64_t __attribute__((__aligned__(8))) id;
140 uint8_t operation; /* copied from request */
141 int16_t status; /* BLKIF_RSP_??? */
142};
143
144DEFINE_RING_TYPES(blkif_common, struct blkif_common_request,
145 struct blkif_common_response);
146DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request,
147 struct blkif_x86_32_response);
148DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request,
149 struct blkif_x86_64_response);
150
151union blkif_back_rings {
152 struct blkif_back_ring native;
153 struct blkif_common_back_ring common;
154 struct blkif_x86_32_back_ring x86_32;
155 struct blkif_x86_64_back_ring x86_64;
156};
157
158enum blkif_protocol {
159 BLKIF_PROTOCOL_NATIVE = 1,
160 BLKIF_PROTOCOL_X86_32 = 2,
161 BLKIF_PROTOCOL_X86_64 = 3,
162};
163
164struct xen_vbd {
165 /* What the domain refers to this vbd as. */
166 blkif_vdev_t handle;
167 /* Non-zero -> read-only */
168 unsigned char readonly;
169 /* VDISK_xxx */
170 unsigned char type;
171 /* phys device that this vbd maps to. */
172 u32 pdevice;
173 struct block_device *bdev;
174 /* Cached size parameter. */
175 sector_t size;
176 bool flush_support;
177 bool discard_secure;
178};
179
180struct backend_info;
181
182struct xen_blkif {
183 /* Unique identifier for this interface. */
184 domid_t domid;
185 unsigned int handle;
186 /* Physical parameters of the comms window. */
187 unsigned int irq;
188 /* Comms information. */
189 enum blkif_protocol blk_protocol;
190 union blkif_back_rings blk_rings;
191 void *blk_ring;
192 /* The VBD attached to this interface. */
193 struct xen_vbd vbd;
194 /* Back pointer to the backend_info. */
195 struct backend_info *be;
196 /* Private fields. */
197 spinlock_t blk_ring_lock;
198 atomic_t refcnt;
199
200 wait_queue_head_t wq;
201 /* for barrier (drain) requests */
202 struct completion drain_complete;
203 atomic_t drain;
204 /* One thread per one blkif. */
205 struct task_struct *xenblkd;
206 unsigned int waiting_reqs;
207
208 /* statistics */
209 unsigned long st_print;
210 int st_rd_req;
211 int st_wr_req;
212 int st_oo_req;
213 int st_f_req;
214 int st_ds_req;
215 int st_rd_sect;
216 int st_wr_sect;
217
218 wait_queue_head_t waiting_to_free;
219 /* Thread shutdown wait queue. */
220 wait_queue_head_t shutdown_wq;
221};
222
223
224#define vbd_sz(_v) ((_v)->bdev->bd_part ? \
225 (_v)->bdev->bd_part->nr_sects : \
226 get_capacity((_v)->bdev->bd_disk))
227
228#define xen_blkif_get(_b) (atomic_inc(&(_b)->refcnt))
229#define xen_blkif_put(_b) \
230 do { \
231 if (atomic_dec_and_test(&(_b)->refcnt)) \
232 wake_up(&(_b)->waiting_to_free);\
233 } while (0)
234
235struct phys_req {
236 unsigned short dev;
237 blkif_sector_t nr_sects;
238 struct block_device *bdev;
239 blkif_sector_t sector_number;
240};
241int xen_blkif_interface_init(void);
242
243int xen_blkif_xenbus_init(void);
244
245irqreturn_t xen_blkif_be_int(int irq, void *dev_id);
246int xen_blkif_schedule(void *arg);
247
248int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
249 struct backend_info *be, int state);
250
251int xen_blkbk_barrier(struct xenbus_transaction xbt,
252 struct backend_info *be, int state);
253struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be);
254
255static inline void blkif_get_x86_32_req(struct blkif_request *dst,
256 struct blkif_x86_32_request *src)
257{
258 int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
259 dst->operation = src->operation;
260 switch (src->operation) {
261 case BLKIF_OP_READ:
262 case BLKIF_OP_WRITE:
263 case BLKIF_OP_WRITE_BARRIER:
264 case BLKIF_OP_FLUSH_DISKCACHE:
265 dst->u.rw.nr_segments = src->u.rw.nr_segments;
266 dst->u.rw.handle = src->u.rw.handle;
267 dst->u.rw.id = src->u.rw.id;
268 dst->u.rw.sector_number = src->u.rw.sector_number;
269 barrier();
270 if (n > dst->u.rw.nr_segments)
271 n = dst->u.rw.nr_segments;
272 for (i = 0; i < n; i++)
273 dst->u.rw.seg[i] = src->u.rw.seg[i];
274 break;
275 case BLKIF_OP_DISCARD:
276 dst->u.discard.flag = src->u.discard.flag;
277 dst->u.discard.id = src->u.discard.id;
278 dst->u.discard.sector_number = src->u.discard.sector_number;
279 dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
280 break;
281 default:
282 /*
283 * Don't know how to translate this op. Only get the
284 * ID so failure can be reported to the frontend.
285 */
286 dst->u.other.id = src->u.other.id;
287 break;
288 }
289}
290
291static inline void blkif_get_x86_64_req(struct blkif_request *dst,
292 struct blkif_x86_64_request *src)
293{
294 int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
295 dst->operation = src->operation;
296 switch (src->operation) {
297 case BLKIF_OP_READ:
298 case BLKIF_OP_WRITE:
299 case BLKIF_OP_WRITE_BARRIER:
300 case BLKIF_OP_FLUSH_DISKCACHE:
301 dst->u.rw.nr_segments = src->u.rw.nr_segments;
302 dst->u.rw.handle = src->u.rw.handle;
303 dst->u.rw.id = src->u.rw.id;
304 dst->u.rw.sector_number = src->u.rw.sector_number;
305 barrier();
306 if (n > dst->u.rw.nr_segments)
307 n = dst->u.rw.nr_segments;
308 for (i = 0; i < n; i++)
309 dst->u.rw.seg[i] = src->u.rw.seg[i];
310 break;
311 case BLKIF_OP_DISCARD:
312 dst->u.discard.flag = src->u.discard.flag;
313 dst->u.discard.id = src->u.discard.id;
314 dst->u.discard.sector_number = src->u.discard.sector_number;
315 dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
316 break;
317 default:
318 /*
319 * Don't know how to translate this op. Only get the
320 * ID so failure can be reported to the frontend.
321 */
322 dst->u.other.id = src->u.other.id;
323 break;
324 }
325}
326
327#endif /* __XEN_BLKIF__BACKEND__COMMON_H__ */