blob: 6fcd6d8a3a56cd293b972e3faa8e30c9c03b5de1 [file] [log] [blame]
rjw1f884582022-01-06 17:20:42 +08001/*
2 * Copyright (c) 2009-2015 Travis Geiselbrecht
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files
6 * (the "Software"), to deal in the Software without restriction,
7 * including without limitation the rights to use, copy, modify, merge,
8 * publish, distribute, sublicense, and/or sell copies of the Software,
9 * and to permit persons to whom the Software is furnished to do so,
10 * subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23#include <stdlib.h>
24#include <debug.h>
25#include <trace.h>
26#include <err.h>
27#include <string.h>
28#include <assert.h>
29#include <list.h>
30#include <pow2.h>
31#include <lib/bio.h>
32#include <kernel/mutex.h>
33#include <lk/init.h>
34
35#define LOCAL_TRACE 0
36
37static struct {
38 struct list_node list;
39 mutex_t lock;
40} bdevs = {
41 .list = LIST_INITIAL_VALUE(bdevs.list),
42 .lock = MUTEX_INITIAL_VALUE(bdevs.lock),
43};
44
45/* default implementation is to use the read_block hook to 'deblock' the device */
46static ssize_t bio_default_read(struct bdev *dev, void *_buf, off_t offset, size_t len)
47{
48 uint8_t *buf = (uint8_t *)_buf;
49 ssize_t bytes_read = 0;
50 bnum_t block;
51 ssize_t err = 0;
52 uint8_t *temp;
53
54 // temporary buffer for partial block transfers
55 temp = memalign(CACHE_LINE, dev->block_size);
56 if (temp == NULL)
57 return ERR_NO_MEMORY;
58
59 /* find the starting block */
60 block = offset / dev->block_size;
61
62 LTRACEF("buf %p, offset %lld, block %u, len %zd\n", buf, offset, block, len);
63 /* handle partial first block */
64 if ((offset % dev->block_size) != 0) {
65 /* read in the block */
66 err = bio_read_block(dev, temp, block, 1);
67 if (err < 0) {
68 goto err;
69 } else if ((size_t)err != dev->block_size) {
70 err = ERR_IO;
71 goto err;
72 }
73
74 /* copy what we need */
75 size_t block_offset = offset % dev->block_size;
76 size_t tocopy = MIN(dev->block_size - block_offset, len);
77 memcpy(buf, temp + block_offset, tocopy);
78
79 /* increment our buffers */
80 buf += tocopy;
81 len -= tocopy;
82 bytes_read += tocopy;
83 block++;
84 }
85
86 LTRACEF("buf %p, block %u, len %zd\n", buf, block, len);
87
88 // If the device requires alignment AND our buffer is not alread aligned.
89 bool requires_alignment =
90 (dev->flags & BIO_FLAG_CACHE_ALIGNED_READS) &&
91 (IS_ALIGNED((size_t)buf, CACHE_LINE) == false);
92 /* handle middle blocks */
93 if (requires_alignment) {
94 while (len >= dev->block_size) {
95 /* do the middle reads */
96 err = bio_read_block(dev, temp, block, 1);
97 if (err < 0) {
98 goto err;
99 } else if ((size_t)err != dev->block_size) {
100 err = ERR_IO;
101 goto err;
102 }
103 memcpy(buf, temp, dev->block_size);
104
105 buf += dev->block_size;
106 len -= dev->block_size;
107 bytes_read += dev->block_size;
108 block++;
109 }
110 } else {
111 uint32_t num_blocks = divpow2(len, dev->block_shift);
112 err = bio_read_block(dev, buf, block, num_blocks);
113 if (err < 0) {
114 goto err;
115 } else if ((size_t)err != dev->block_size * num_blocks) {
116 err = ERR_IO;
117 goto err;
118 }
119 buf += err;
120 len -= err;
121 bytes_read += err;
122 block += num_blocks;
123 }
124
125 LTRACEF("buf %p, block %u, len %zd\n", buf, block, len);
126 /* handle partial last block */
127 if (len > 0) {
128 /* read the block */
129 err = bio_read_block(dev, temp, block, 1);
130 if (err < 0) {
131 goto err;
132 } else if ((size_t)err != dev->block_size) {
133 err = ERR_IO;
134 goto err;
135 }
136
137 /* copy the partial block from our temp buffer */
138 memcpy(buf, temp, len);
139
140 bytes_read += len;
141 }
142
143err:
144 free(temp);
145
146 /* return error or bytes read */
147 return (err >= 0) ? bytes_read : err;
148}
149
150static ssize_t bio_default_write(struct bdev *dev, const void *_buf, off_t offset, size_t len)
151{
152 const uint8_t *buf = (const uint8_t *)_buf;
153 ssize_t bytes_written = 0;
154 bnum_t block;
155 ssize_t err = 0;
156 uint8_t *temp;
157
158 // temporary buffer for partial block transfers
159 temp = memalign(CACHE_LINE, dev->block_size);
160 if (temp == NULL)
161 return ERR_NO_MEMORY;
162
163 /* find the starting block */
164 block = offset / dev->block_size;
165
166 LTRACEF("buf %p, offset %lld, block %u, len %zd\n", buf, offset, block, len);
167 /* handle partial first block */
168 if ((offset % dev->block_size) != 0) {
169 /* read in the block */
170 err = bio_read_block(dev, temp, block, 1);
171 if (err < 0) {
172 goto err;
173 } else if ((size_t)err != dev->block_size) {
174 err = ERR_IO;
175 goto err;
176 }
177
178 /* copy what we need */
179 size_t block_offset = offset % dev->block_size;
180 size_t tocopy = MIN(dev->block_size - block_offset, len);
181 memcpy(temp + block_offset, buf, tocopy);
182
183 /* write it back out */
184 err = bio_write_block(dev, temp, block, 1);
185 if (err < 0) {
186 goto err;
187 } else if ((size_t)err != dev->block_size) {
188 err = ERR_IO;
189 goto err;
190 }
191
192 /* increment our buffers */
193 buf += tocopy;
194 len -= tocopy;
195 bytes_written += tocopy;
196 block++;
197 }
198
199 LTRACEF("buf %p, block %u, len %zd\n", buf, block, len);
200
201 // If the device requires alignment AND our buffer is not alread aligned.
202 bool requires_alignment =
203 (dev->flags & BIO_FLAG_CACHE_ALIGNED_WRITES) &&
204 (IS_ALIGNED((size_t)buf, CACHE_LINE) == false);
205
206 /* handle middle blocks */
207 if (requires_alignment) {
208 while (len >= dev->block_size) {
209 /* do the middle reads */
210 memcpy(temp, buf, dev->block_size);
211 err = bio_write_block(dev, temp, block, 1);
212 if (err < 0) {
213 goto err;
214 } else if ((size_t)err != dev->block_size) {
215 err = ERR_IO;
216 goto err;
217 }
218
219 buf += dev->block_size;
220 len -= dev->block_size;
221 bytes_written += dev->block_size;
222 block++;
223 }
224 } else {
225 uint32_t block_count = divpow2(len, dev->block_shift);
226 err = bio_write_block(dev, buf, block, block_count);
227 if (err < 0) {
228 goto err;
229 } else if ((size_t)err != dev->block_size * block_count) {
230 err = ERR_IO;
231 goto err;
232 }
233
234 DEBUG_ASSERT((size_t)err == (block_count * dev->block_size));
235
236 buf += err;
237 len -= err;
238 bytes_written += err;
239 block += block_count;
240 }
241
242 LTRACEF("buf %p, block %u, len %zd\n", buf, block, len);
243 /* handle partial last block */
244 if (len > 0) {
245 /* read the block */
246 err = bio_read_block(dev, temp, block, 1);
247 if (err < 0) {
248 goto err;
249 } else if ((size_t)err != dev->block_size) {
250 err = ERR_IO;
251 goto err;
252 }
253
254 /* copy the partial block from our temp buffer */
255 memcpy(temp, buf, len);
256
257 /* write it back out */
258 err = bio_write_block(dev, temp, block, 1);
259 if (err < 0) {
260 goto err;
261 } else if ((size_t)err != dev->block_size) {
262 err = ERR_IO;
263 goto err;
264 }
265
266 bytes_written += len;
267 }
268
269err:
270 free(temp);
271
272 /* return error or bytes written */
273 return (err >= 0) ? bytes_written : err;
274}
275
276static ssize_t bio_default_erase(struct bdev *dev, off_t offset, size_t len)
277{
278 /* default erase operation is to just write zeros over the device */
279 uint8_t *erase_buf;
280
281 erase_buf = memalign(CACHE_LINE, dev->block_size);
282 if (erase_buf == NULL)
283 return ERR_NO_MEMORY;
284
285 memset(erase_buf, dev->erase_byte, dev->block_size);
286
287 ssize_t erased = 0;
288 size_t remaining = len;
289 off_t pos = offset;
290 while (remaining > 0) {
291 size_t towrite = MIN(remaining, dev->block_size);
292
293 ssize_t written = bio_write(dev, erase_buf, pos, towrite);
294 if (written < 0) {
295 free(erase_buf);
296 return written;
297 }
298
299 erased += written;
300 pos += written;
301 remaining -= written;
302
303 if ((size_t)written < towrite)
304 break;
305 }
306
307 free(erase_buf);
308 return erased;
309}
310
311static ssize_t bio_default_read_block(struct bdev *dev, void *buf, bnum_t block, uint count)
312{
313 return ERR_NOT_SUPPORTED;
314}
315
316static ssize_t bio_default_write_block(struct bdev *dev, const void *buf, bnum_t block, uint count)
317{
318 return ERR_NOT_SUPPORTED;
319}
320
321static void bdev_inc_ref(bdev_t *dev)
322{
323 LTRACEF("Add ref \"%s\" %d -> %d\n", dev->name, dev->ref, dev->ref + 1);
324
325#if WITH_KERNEL_VM
326 atomic_add(&dev->ref, 1);
327#else
328 dev->ref++;
329#endif
330
331}
332
333static void bdev_dec_ref(bdev_t *dev)
334{
335#if WITH_KERNEL_VM
336 int oldval = atomic_add(&dev->ref, -1);
337#else
338 int oldval = dev->ref--;
339#endif
340
341 LTRACEF("Dec ref \"%s\" %d -> %d\n", dev->name, oldval, dev->ref);
342
343 if (oldval == 1) {
344 // last ref, remove it
345 DEBUG_ASSERT(!list_in_list(&dev->node));
346
347 TRACEF("last ref, removing (%s)\n", dev->name);
348
349 // call the close hook if it exists
350 if (dev->close)
351 dev->close(dev);
352
353 free(dev->name);
354 }
355}
356
357size_t bio_trim_range(const bdev_t *dev, off_t offset, size_t len)
358{
359 /* range check */
360 if (offset < 0)
361 return 0;
362 if (offset >= dev->total_size)
363 return 0;
364 if (len == 0)
365 return 0;
366 if ((off_t)(offset + len) > dev->total_size)
367 len = dev->total_size - offset;
368
369 return len;
370}
371
372uint bio_trim_block_range(const bdev_t *dev, bnum_t block, uint count)
373{
374 if (block > dev->block_count)
375 return 0;
376 if (count == 0)
377 return 0;
378 if (block + count > dev->block_count)
379 count = dev->block_count - block;
380
381 return count;
382}
383
384bdev_t *bio_open(const char *name)
385{
386 bdev_t *bdev = NULL;
387
388 LTRACEF(" '%s'\n", name);
389
390 /* see if it's in our list */
391 bdev_t *entry;
392 mutex_acquire(&bdevs.lock);
393 list_for_every_entry(&bdevs.list, entry, bdev_t, node) {
394 DEBUG_ASSERT(entry->ref > 0);
395 if (!strcmp(entry->name, name)) {
396 bdev = entry;
397 bdev_inc_ref(bdev);
398 break;
399 }
400 }
401 mutex_release(&bdevs.lock);
402
403 return bdev;
404}
405
406void bio_close(bdev_t *dev)
407{
408 DEBUG_ASSERT(dev);
409 LTRACEF(" '%s'\n", dev->name);
410 bdev_dec_ref(dev);
411}
412
413bdev_t *bio_open_by_label(const char *label)
414{
415 bdev_t *bdev = NULL;
416
417 /* see if it's in our list */
418 bdev_t *entry;
419 mutex_acquire(&bdevs.lock);
420 list_for_every_entry(&bdevs.list, entry, bdev_t, node) {
421 DEBUG_ASSERT(entry->ref > 0);
422 if (entry->label && !strcmp(entry->label, label)) {
423 bdev = entry;
424 bdev_inc_ref(bdev);
425 break;
426 }
427 }
428 mutex_release(&bdevs.lock);
429
430 return bdev;
431}
432
433ssize_t bio_read(bdev_t *dev, void *buf, off_t offset, size_t len)
434{
435 LTRACEF("dev '%s', buf %p, offset %lld, len %zd\n", dev->name, buf, offset, len);
436
437 DEBUG_ASSERT(dev && dev->ref > 0);
438 DEBUG_ASSERT(buf);
439
440 /* range check */
441 len = bio_trim_range(dev, offset, len);
442 if (len == 0)
443 return 0;
444
445 return dev->read(dev, buf, offset, len);
446}
447
448ssize_t bio_read_block(bdev_t *dev, void *buf, bnum_t block, uint count)
449{
450 LTRACEF("dev '%s', buf %p, block %d, count %u\n", dev->name, buf, block, count);
451
452 DEBUG_ASSERT(dev && dev->ref > 0);
453 DEBUG_ASSERT(buf);
454
455 /* range check */
456 count = bio_trim_block_range(dev, block, count);
457 if (count == 0)
458 return 0;
459
460 return dev->read_block(dev, buf, block, count);
461}
462
463ssize_t bio_write(bdev_t *dev, const void *buf, off_t offset, size_t len)
464{
465 LTRACEF("dev '%s', buf %p, offset %lld, len %zd\n", dev->name, buf, offset, len);
466
467 DEBUG_ASSERT(dev && dev->ref > 0);
468 DEBUG_ASSERT(buf);
469
470 /* range check */
471 len = bio_trim_range(dev, offset, len);
472 if (len == 0)
473 return 0;
474
475 return dev->write(dev, buf, offset, len);
476}
477
478ssize_t bio_write_block(bdev_t *dev, const void *buf, bnum_t block, uint count)
479{
480 LTRACEF("dev '%s', buf %p, block %d, count %u\n", dev->name, buf, block, count);
481
482 DEBUG_ASSERT(dev && dev->ref > 0);
483 DEBUG_ASSERT(buf);
484
485 /* range check */
486 count = bio_trim_block_range(dev, block, count);
487 if (count == 0)
488 return 0;
489
490 return dev->write_block(dev, buf, block, count);
491}
492
493ssize_t bio_erase(bdev_t *dev, off_t offset, size_t len)
494{
495 LTRACEF("dev '%s', offset %lld, len %zd\n", dev->name, offset, len);
496
497 DEBUG_ASSERT(dev && dev->ref > 0);
498
499 /* range check */
500 len = bio_trim_range(dev, offset, len);
501 if (len == 0)
502 return 0;
503
504 return dev->erase(dev, offset, len);
505}
506
507int bio_ioctl(bdev_t *dev, int request, void *argp)
508{
509 LTRACEF("dev '%s', request %08x, argp %p\n", dev->name, request, argp);
510
511 if (dev->ioctl == NULL) {
512 return ERR_NOT_SUPPORTED;
513 } else {
514 return dev->ioctl(dev, request, argp);
515 }
516}
517
518void bio_initialize_bdev(bdev_t *dev,
519 const char *name,
520 size_t block_size,
521 bnum_t block_count,
522 size_t geometry_count,
523 const bio_erase_geometry_info_t* geometry,
524 const uint32_t flags)
525{
526 DEBUG_ASSERT(dev);
527 DEBUG_ASSERT(name);
528
529 // Block size must be finite powers of 2
530 DEBUG_ASSERT(block_size && ispow2(block_size));
531
532 list_clear_node(&dev->node);
533 dev->name = strdup(name);
534 dev->block_size = block_size;
535 dev->block_count = block_count;
536 dev->block_shift = log2_uint(block_size);
537 dev->total_size = (off_t)block_count << dev->block_shift;
538 dev->geometry_count = geometry_count;
539 dev->geometry = geometry;
540 dev->erase_byte = 0;
541 dev->ref = 0;
542 dev->label = NULL;
543 dev->is_gpt = false;
544 dev->flags = flags;
545
546#if DEBUG
547 // If we have been supplied information about our erase geometry, sanity
548 // check it in debug bulids.
549 if (geometry_count && geometry) {
550 for (size_t i = 0; i < geometry_count; ++i) {
551 bio_erase_geometry_info_t* info = geometry + i;
552
553 // Erase sizes must be powers of two and agree with the supplied erase shift.
554 DEBUG_ASSERT(info->erase_size);
555 DEBUG_ASSERT(info->erase_size == ((size_t)1 << info->erase_shift));
556
557 info->start = desc->start;
558 info->erase_size = desc->erase_size;
559 info->erase_shift = log2_uint(desc->erase_size);
560 info->size = ((off_t)desc->block_count) << desc->block_size;
561
562 // Make sure that region is aligned on both a program and erase block boundary.
563 DEBUG_ASSERT(!(info->start & (((off_t)1 << info->block_shift) - 1)));
564 DEBUG_ASSERT(!(info->start & (((off_t)1 << info->erase_shift) - 1)));
565
566 // Make sure that region's length is an integral multiple of both the
567 // program and erase block size.
568 DEBUG_ASSERT(!(info->size & (((off_t)1 << dev->block_shift) - 1)));
569 DEBUG_ASSERT(!(info->size & (((off_t)1 << info->erase_shift) - 1)));
570 }
571
572 // Make sure that none of the regions overlap each other and that they are
573 // listed in ascending order.
574 for (size_t i = 0; (i + 1) < geometry_count; ++i) {
575 bio_geometry_info_t* r1 = dev->geometry + i;
576 bio_geometry_info_t* r2 = dev->geometry + i + 1;
577 DEBUG_ASSERT(r1->start <= r2->start);
578
579 for (size_t j = (i + 1); j < geometry_count; ++j) {
580 bio_geometry_info_t* r2 = dev->geometry + j;
581 DEBUG_ASSERT(!bio_does_overlap(r1->start, r1->size, r2->start, r2->size));
582 }
583 }
584 }
585#endif
586
587 /* set up the default hooks, the sub driver should override the block operations at least */
588 dev->read = bio_default_read;
589 dev->read_block = bio_default_read_block;
590 dev->write = bio_default_write;
591 dev->write_block = bio_default_write_block;
592 dev->erase = bio_default_erase;
593 dev->close = NULL;
594}
595
596void bio_register_device(bdev_t *dev)
597{
598 DEBUG_ASSERT(dev);
599
600 LTRACEF(" '%s'\n", dev->name);
601
602 bdev_inc_ref(dev);
603
604 mutex_acquire(&bdevs.lock);
605 list_add_tail(&bdevs.list, &dev->node);
606 mutex_release(&bdevs.lock);
607}
608
609void bio_unregister_device(bdev_t *dev)
610{
611 DEBUG_ASSERT(dev);
612
613 LTRACEF(" '%s'\n", dev->name);
614
615 // remove it from the list
616 mutex_acquire(&bdevs.lock);
617 list_delete(&dev->node);
618 mutex_release(&bdevs.lock);
619
620 bdev_dec_ref(dev); // remove the ref the list used to have
621}
622
623void bio_dump_devices(void)
624{
625 printf("block devices:\n");
626 bdev_t *entry;
627 mutex_acquire(&bdevs.lock);
628 list_for_every_entry(&bdevs.list, entry, bdev_t, node) {
629
630 printf("\t%s, size %lld, bsize %zd, ref %d, label %s",
631 entry->name, entry->total_size, entry->block_size, entry->ref, entry->label);
632
633 if (!entry->geometry_count || !entry->geometry) {
634 printf(" (no erase geometry)\n");
635 } else {
636 for (size_t i = 0; i < entry->geometry_count; ++i) {
637 const bio_erase_geometry_info_t* geo = entry->geometry + i;
638 printf("\n\t\terase_region[%zu] : start %lld size %lld erase size %zu",
639 i, geo->start, geo->size, geo->erase_size);
640
641 }
642 }
643
644 printf("\n");
645 }
646 mutex_release(&bdevs.lock);
647}