| xj | b04a402 | 2021-11-25 15:01:52 +0800 | [diff] [blame] | 1 | Devres - Managed Device Resource | 
 | 2 | ================================ | 
 | 3 |  | 
 | 4 | Tejun Heo	<teheo@suse.de> | 
 | 5 |  | 
 | 6 | First draft	10 January 2007 | 
 | 7 |  | 
 | 8 |  | 
 | 9 | 1. Intro			: Huh? Devres? | 
 | 10 | 2. Devres			: Devres in a nutshell | 
 | 11 | 3. Devres Group			: Group devres'es and release them together | 
 | 12 | 4. Details			: Life time rules, calling context, ... | 
 | 13 | 5. Overhead			: How much do we have to pay for this? | 
 | 14 | 6. List of managed interfaces	: Currently implemented managed interfaces | 
 | 15 |  | 
 | 16 |  | 
 | 17 |   1. Intro | 
 | 18 |   -------- | 
 | 19 |  | 
 | 20 | devres came up while trying to convert libata to use iomap.  Each | 
 | 21 | iomapped address should be kept and unmapped on driver detach.  For | 
 | 22 | example, a plain SFF ATA controller (that is, good old PCI IDE) in | 
 | 23 | native mode makes use of 5 PCI BARs and all of them should be | 
 | 24 | maintained. | 
 | 25 |  | 
 | 26 | As with many other device drivers, libata low level drivers have | 
 | 27 | sufficient bugs in ->remove and ->probe failure path.  Well, yes, | 
 | 28 | that's probably because libata low level driver developers are lazy | 
 | 29 | bunch, but aren't all low level driver developers?  After spending a | 
 | 30 | day fiddling with braindamaged hardware with no document or | 
 | 31 | braindamaged document, if it's finally working, well, it's working. | 
 | 32 |  | 
 | 33 | For one reason or another, low level drivers don't receive as much | 
 | 34 | attention or testing as core code, and bugs on driver detach or | 
 | 35 | initialization failure don't happen often enough to be noticeable. | 
 | 36 | Init failure path is worse because it's much less travelled while | 
 | 37 | needs to handle multiple entry points. | 
 | 38 |  | 
 | 39 | So, many low level drivers end up leaking resources on driver detach | 
 | 40 | and having half broken failure path implementation in ->probe() which | 
 | 41 | would leak resources or even cause oops when failure occurs.  iomap | 
 | 42 | adds more to this mix.  So do msi and msix. | 
 | 43 |  | 
 | 44 |  | 
 | 45 |   2. Devres | 
 | 46 |   --------- | 
 | 47 |  | 
 | 48 | devres is basically linked list of arbitrarily sized memory areas | 
 | 49 | associated with a struct device.  Each devres entry is associated with | 
 | 50 | a release function.  A devres can be released in several ways.  No | 
 | 51 | matter what, all devres entries are released on driver detach.  On | 
 | 52 | release, the associated release function is invoked and then the | 
 | 53 | devres entry is freed. | 
 | 54 |  | 
 | 55 | Managed interface is created for resources commonly used by device | 
 | 56 | drivers using devres.  For example, coherent DMA memory is acquired | 
 | 57 | using dma_alloc_coherent().  The managed version is called | 
 | 58 | dmam_alloc_coherent().  It is identical to dma_alloc_coherent() except | 
 | 59 | for the DMA memory allocated using it is managed and will be | 
 | 60 | automatically released on driver detach.  Implementation looks like | 
 | 61 | the following. | 
 | 62 |  | 
 | 63 |   struct dma_devres { | 
 | 64 | 	size_t		size; | 
 | 65 | 	void		*vaddr; | 
 | 66 | 	dma_addr_t	dma_handle; | 
 | 67 |   }; | 
 | 68 |  | 
 | 69 |   static void dmam_coherent_release(struct device *dev, void *res) | 
 | 70 |   { | 
 | 71 | 	struct dma_devres *this = res; | 
 | 72 |  | 
 | 73 | 	dma_free_coherent(dev, this->size, this->vaddr, this->dma_handle); | 
 | 74 |   } | 
 | 75 |  | 
 | 76 |   dmam_alloc_coherent(dev, size, dma_handle, gfp) | 
 | 77 |   { | 
 | 78 | 	struct dma_devres *dr; | 
 | 79 | 	void *vaddr; | 
 | 80 |  | 
 | 81 | 	dr = devres_alloc(dmam_coherent_release, sizeof(*dr), gfp); | 
 | 82 | 	... | 
 | 83 |  | 
 | 84 | 	/* alloc DMA memory as usual */ | 
 | 85 | 	vaddr = dma_alloc_coherent(...); | 
 | 86 | 	... | 
 | 87 |  | 
 | 88 | 	/* record size, vaddr, dma_handle in dr */ | 
 | 89 | 	dr->vaddr = vaddr; | 
 | 90 | 	... | 
 | 91 |  | 
 | 92 | 	devres_add(dev, dr); | 
 | 93 |  | 
 | 94 | 	return vaddr; | 
 | 95 |   } | 
 | 96 |  | 
 | 97 | If a driver uses dmam_alloc_coherent(), the area is guaranteed to be | 
 | 98 | freed whether initialization fails half-way or the device gets | 
 | 99 | detached.  If most resources are acquired using managed interface, a | 
 | 100 | driver can have much simpler init and exit code.  Init path basically | 
 | 101 | looks like the following. | 
 | 102 |  | 
 | 103 |   my_init_one() | 
 | 104 |   { | 
 | 105 | 	struct mydev *d; | 
 | 106 |  | 
 | 107 | 	d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL); | 
 | 108 | 	if (!d) | 
 | 109 | 		return -ENOMEM; | 
 | 110 |  | 
 | 111 | 	d->ring = dmam_alloc_coherent(...); | 
 | 112 | 	if (!d->ring) | 
 | 113 | 		return -ENOMEM; | 
 | 114 |  | 
 | 115 | 	if (check something) | 
 | 116 | 		return -EINVAL; | 
 | 117 | 	... | 
 | 118 |  | 
 | 119 | 	return register_to_upper_layer(d); | 
 | 120 |   } | 
 | 121 |  | 
 | 122 | And exit path, | 
 | 123 |  | 
 | 124 |   my_remove_one() | 
 | 125 |   { | 
 | 126 | 	unregister_from_upper_layer(d); | 
 | 127 | 	shutdown_my_hardware(); | 
 | 128 |   } | 
 | 129 |  | 
 | 130 | As shown above, low level drivers can be simplified a lot by using | 
 | 131 | devres.  Complexity is shifted from less maintained low level drivers | 
 | 132 | to better maintained higher layer.  Also, as init failure path is | 
 | 133 | shared with exit path, both can get more testing. | 
 | 134 |  | 
 | 135 |  | 
 | 136 |   3. Devres group | 
 | 137 |   --------------- | 
 | 138 |  | 
 | 139 | Devres entries can be grouped using devres group.  When a group is | 
 | 140 | released, all contained normal devres entries and properly nested | 
 | 141 | groups are released.  One usage is to rollback series of acquired | 
 | 142 | resources on failure.  For example, | 
 | 143 |  | 
 | 144 |   if (!devres_open_group(dev, NULL, GFP_KERNEL)) | 
 | 145 | 	return -ENOMEM; | 
 | 146 |  | 
 | 147 |   acquire A; | 
 | 148 |   if (failed) | 
 | 149 | 	goto err; | 
 | 150 |  | 
 | 151 |   acquire B; | 
 | 152 |   if (failed) | 
 | 153 | 	goto err; | 
 | 154 |   ... | 
 | 155 |  | 
 | 156 |   devres_remove_group(dev, NULL); | 
 | 157 |   return 0; | 
 | 158 |  | 
 | 159 |  err: | 
 | 160 |   devres_release_group(dev, NULL); | 
 | 161 |   return err_code; | 
 | 162 |  | 
 | 163 | As resource acquisition failure usually means probe failure, constructs | 
 | 164 | like above are usually useful in midlayer driver (e.g. libata core | 
 | 165 | layer) where interface function shouldn't have side effect on failure. | 
 | 166 | For LLDs, just returning error code suffices in most cases. | 
 | 167 |  | 
 | 168 | Each group is identified by void *id.  It can either be explicitly | 
 | 169 | specified by @id argument to devres_open_group() or automatically | 
 | 170 | created by passing NULL as @id as in the above example.  In both | 
 | 171 | cases, devres_open_group() returns the group's id.  The returned id | 
 | 172 | can be passed to other devres functions to select the target group. | 
 | 173 | If NULL is given to those functions, the latest open group is | 
 | 174 | selected. | 
 | 175 |  | 
 | 176 | For example, you can do something like the following. | 
 | 177 |  | 
 | 178 |   int my_midlayer_create_something() | 
 | 179 |   { | 
 | 180 | 	if (!devres_open_group(dev, my_midlayer_create_something, GFP_KERNEL)) | 
 | 181 | 		return -ENOMEM; | 
 | 182 |  | 
 | 183 | 	... | 
 | 184 |  | 
 | 185 | 	devres_close_group(dev, my_midlayer_create_something); | 
 | 186 | 	return 0; | 
 | 187 |   } | 
 | 188 |  | 
 | 189 |   void my_midlayer_destroy_something() | 
 | 190 |   { | 
 | 191 | 	devres_release_group(dev, my_midlayer_create_something); | 
 | 192 |   } | 
 | 193 |  | 
 | 194 |  | 
 | 195 |   4. Details | 
 | 196 |   ---------- | 
 | 197 |  | 
 | 198 | Lifetime of a devres entry begins on devres allocation and finishes | 
 | 199 | when it is released or destroyed (removed and freed) - no reference | 
 | 200 | counting. | 
 | 201 |  | 
 | 202 | devres core guarantees atomicity to all basic devres operations and | 
 | 203 | has support for single-instance devres types (atomic | 
 | 204 | lookup-and-add-if-not-found).  Other than that, synchronizing | 
 | 205 | concurrent accesses to allocated devres data is caller's | 
 | 206 | responsibility.  This is usually non-issue because bus ops and | 
 | 207 | resource allocations already do the job. | 
 | 208 |  | 
 | 209 | For an example of single-instance devres type, read pcim_iomap_table() | 
 | 210 | in lib/devres.c. | 
 | 211 |  | 
 | 212 | All devres interface functions can be called without context if the | 
 | 213 | right gfp mask is given. | 
 | 214 |  | 
 | 215 |  | 
 | 216 |   5. Overhead | 
 | 217 |   ----------- | 
 | 218 |  | 
 | 219 | Each devres bookkeeping info is allocated together with requested data | 
 | 220 | area.  With debug option turned off, bookkeeping info occupies 16 | 
 | 221 | bytes on 32bit machines and 24 bytes on 64bit (three pointers rounded | 
 | 222 | up to ull alignment).  If singly linked list is used, it can be | 
 | 223 | reduced to two pointers (8 bytes on 32bit, 16 bytes on 64bit). | 
 | 224 |  | 
 | 225 | Each devres group occupies 8 pointers.  It can be reduced to 6 if | 
 | 226 | singly linked list is used. | 
 | 227 |  | 
 | 228 | Memory space overhead on ahci controller with two ports is between 300 | 
 | 229 | and 400 bytes on 32bit machine after naive conversion (we can | 
 | 230 | certainly invest a bit more effort into libata core layer). | 
 | 231 |  | 
 | 232 |  | 
 | 233 |   6. List of managed interfaces | 
 | 234 |   ----------------------------- | 
 | 235 |  | 
 | 236 | CLOCK | 
 | 237 |   devm_clk_get() | 
 | 238 |   devm_clk_get_optional() | 
 | 239 |   devm_clk_put() | 
 | 240 |   devm_clk_hw_register() | 
 | 241 |   devm_of_clk_add_hw_provider() | 
 | 242 |  | 
 | 243 | DMA | 
 | 244 |   dmaenginem_async_device_register() | 
 | 245 |   dmam_alloc_coherent() | 
 | 246 |   dmam_alloc_attrs() | 
 | 247 |   dmam_declare_coherent_memory() | 
 | 248 |   dmam_free_coherent() | 
 | 249 |   dmam_pool_create() | 
 | 250 |   dmam_pool_destroy() | 
 | 251 |  | 
 | 252 | GPIO | 
 | 253 |   devm_gpiod_get() | 
 | 254 |   devm_gpiod_get_index() | 
 | 255 |   devm_gpiod_get_index_optional() | 
 | 256 |   devm_gpiod_get_optional() | 
 | 257 |   devm_gpiod_put() | 
 | 258 |   devm_gpiochip_add_data() | 
 | 259 |   devm_gpiochip_remove() | 
 | 260 |   devm_gpio_request() | 
 | 261 |   devm_gpio_request_one() | 
 | 262 |   devm_gpio_free() | 
 | 263 |  | 
 | 264 | IIO | 
 | 265 |   devm_iio_device_alloc() | 
 | 266 |   devm_iio_device_free() | 
 | 267 |   devm_iio_device_register() | 
 | 268 |   devm_iio_device_unregister() | 
 | 269 |   devm_iio_kfifo_allocate() | 
 | 270 |   devm_iio_kfifo_free() | 
 | 271 |   devm_iio_triggered_buffer_setup() | 
 | 272 |   devm_iio_triggered_buffer_cleanup() | 
 | 273 |   devm_iio_trigger_alloc() | 
 | 274 |   devm_iio_trigger_free() | 
 | 275 |   devm_iio_trigger_register() | 
 | 276 |   devm_iio_trigger_unregister() | 
 | 277 |   devm_iio_channel_get() | 
 | 278 |   devm_iio_channel_release() | 
 | 279 |   devm_iio_channel_get_all() | 
 | 280 |   devm_iio_channel_release_all() | 
 | 281 |  | 
 | 282 | INPUT | 
 | 283 |   devm_input_allocate_device() | 
 | 284 |  | 
 | 285 | IO region | 
 | 286 |   devm_release_mem_region() | 
 | 287 |   devm_release_region() | 
 | 288 |   devm_release_resource() | 
 | 289 |   devm_request_mem_region() | 
 | 290 |   devm_request_region() | 
 | 291 |   devm_request_resource() | 
 | 292 |  | 
 | 293 | IOMAP | 
 | 294 |   devm_ioport_map() | 
 | 295 |   devm_ioport_unmap() | 
 | 296 |   devm_ioremap() | 
 | 297 |   devm_ioremap_nocache() | 
 | 298 |   devm_ioremap_wc() | 
 | 299 |   devm_ioremap_resource() : checks resource, requests memory region, ioremaps | 
 | 300 |   devm_iounmap() | 
 | 301 |   pcim_iomap() | 
 | 302 |   pcim_iomap_regions()	: do request_region() and iomap() on multiple BARs | 
 | 303 |   pcim_iomap_table()	: array of mapped addresses indexed by BAR | 
 | 304 |   pcim_iounmap() | 
 | 305 |  | 
 | 306 | IRQ | 
 | 307 |   devm_free_irq() | 
 | 308 |   devm_request_any_context_irq() | 
 | 309 |   devm_request_irq() | 
 | 310 |   devm_request_threaded_irq() | 
 | 311 |   devm_irq_alloc_descs() | 
 | 312 |   devm_irq_alloc_desc() | 
 | 313 |   devm_irq_alloc_desc_at() | 
 | 314 |   devm_irq_alloc_desc_from() | 
 | 315 |   devm_irq_alloc_descs_from() | 
 | 316 |   devm_irq_alloc_generic_chip() | 
 | 317 |   devm_irq_setup_generic_chip() | 
 | 318 |   devm_irq_sim_init() | 
 | 319 |  | 
 | 320 | LED | 
 | 321 |   devm_led_classdev_register() | 
 | 322 |   devm_led_classdev_unregister() | 
 | 323 |  | 
 | 324 | MDIO | 
 | 325 |   devm_mdiobus_alloc() | 
 | 326 |   devm_mdiobus_alloc_size() | 
 | 327 |   devm_mdiobus_free() | 
 | 328 |  | 
 | 329 | MEM | 
 | 330 |   devm_free_pages() | 
 | 331 |   devm_get_free_pages() | 
 | 332 |   devm_kasprintf() | 
 | 333 |   devm_kcalloc() | 
 | 334 |   devm_kfree() | 
 | 335 |   devm_kmalloc() | 
 | 336 |   devm_kmalloc_array() | 
 | 337 |   devm_kmemdup() | 
 | 338 |   devm_kstrdup() | 
 | 339 |   devm_kvasprintf() | 
 | 340 |   devm_kzalloc() | 
 | 341 |  | 
 | 342 | MFD | 
 | 343 |   devm_mfd_add_devices() | 
 | 344 |  | 
 | 345 | MUX | 
 | 346 |   devm_mux_chip_alloc() | 
 | 347 |   devm_mux_chip_register() | 
 | 348 |   devm_mux_control_get() | 
 | 349 |  | 
 | 350 | PER-CPU MEM | 
 | 351 |   devm_alloc_percpu() | 
 | 352 |   devm_free_percpu() | 
 | 353 |  | 
 | 354 | PCI | 
 | 355 |   devm_pci_alloc_host_bridge()  : managed PCI host bridge allocation | 
 | 356 |   devm_pci_remap_cfgspace()	: ioremap PCI configuration space | 
 | 357 |   devm_pci_remap_cfg_resource()	: ioremap PCI configuration space resource | 
 | 358 |   pcim_enable_device()		: after success, all PCI ops become managed | 
 | 359 |   pcim_pin_device()		: keep PCI device enabled after release | 
 | 360 |  | 
 | 361 | PHY | 
 | 362 |   devm_usb_get_phy() | 
 | 363 |   devm_usb_put_phy() | 
 | 364 |  | 
 | 365 | PINCTRL | 
 | 366 |   devm_pinctrl_get() | 
 | 367 |   devm_pinctrl_put() | 
 | 368 |   devm_pinctrl_register() | 
 | 369 |   devm_pinctrl_unregister() | 
 | 370 |  | 
 | 371 | POWER | 
 | 372 |   devm_reboot_mode_register() | 
 | 373 |   devm_reboot_mode_unregister() | 
 | 374 |  | 
 | 375 | PWM | 
 | 376 |   devm_pwm_get() | 
 | 377 |   devm_pwm_put() | 
 | 378 |  | 
 | 379 | REGULATOR | 
 | 380 |   devm_regulator_bulk_get() | 
 | 381 |   devm_regulator_get() | 
 | 382 |   devm_regulator_put() | 
 | 383 |   devm_regulator_register() | 
 | 384 |  | 
 | 385 | RESET | 
 | 386 |   devm_reset_control_get() | 
 | 387 |   devm_reset_controller_register() | 
 | 388 |  | 
 | 389 | SERDEV | 
 | 390 |   devm_serdev_device_open() | 
 | 391 |  | 
 | 392 | SLAVE DMA ENGINE | 
 | 393 |   devm_acpi_dma_controller_register() | 
 | 394 |  | 
 | 395 | SPI | 
 | 396 |   devm_spi_register_master() | 
 | 397 |  | 
 | 398 | WATCHDOG | 
 | 399 |   devm_watchdog_register_device() |