Blame - src/kernel/linux/v4.14/arch/arc/mm/dma.c - T103

blob: e9d93604ad0ff7b76a5458235b3ab4d0ef18aefb [file] [log] [blame]

rjw	1f88458	2022-01-06 17:20:42 +0800	[diff] [blame^]	1	/*
				2	* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
				3	*
				4	* This program is free software; you can redistribute it and/or modify
				5	* it under the terms of the GNU General Public License version 2 as
				6	* published by the Free Software Foundation.
				7	*/
				8
				9	/*
				10	* DMA Coherent API Notes
				11	*
				12	* I/O is inherently non-coherent on ARC. So a coherent DMA buffer is
				13	* implemented by accessing it using a kernel virtual address, with
				14	* Cache bit off in the TLB entry.
				15	*
				16	* The default DMA address == Phy address which is 0x8000_0000 based.
				17	*/
				18
				19	#include <linux/dma-mapping.h>
				20	#include <asm/cache.h>
				21	#include <asm/cacheflush.h>
				22
				23
				24	static void arc_dma_alloc(struct device dev, size_t size,
				25	dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
				26	{
				27	unsigned long order = get_order(size);
				28	struct page *page;
				29	phys_addr_t paddr;
				30	void *kvaddr;
				31	int need_coh = 1, need_kvaddr = 0;
				32
				33	page = alloc_pages(gfp, order);
				34	if (!page)
				35	return NULL;
				36
				37	/*
				38	* IOC relies on all data (even coherent DMA data) being in cache
				39	* Thus allocate normal cached memory
				40	*
				41	* The gains with IOC are two pronged:
				42	* -For streaming data, elides need for cache maintenance, saving
				43	* cycles in flush code, and bus bandwidth as all the lines of a
				44	* buffer need to be flushed out to memory
				45	* -For coherent data, Read/Write to buffers terminate early in cache
				46	* (vs. always going to memory - thus are faster)
				47	*/
				48	if ((is_isa_arcv2() && ioc_enable) \|\|
				49	(attrs & DMA_ATTR_NON_CONSISTENT))
				50	need_coh = 0;
				51
				52	/*
				53	* - A coherent buffer needs MMU mapping to enforce non-cachability
				54	* - A highmem page needs a virtual handle (hence MMU mapping)
				55	* independent of cachability
				56	*/
				57	if (PageHighMem(page) \|\| need_coh)
				58	need_kvaddr = 1;
				59
				60	/* This is linear addr (0x8000_0000 based) */
				61	paddr = page_to_phys(page);
				62
				63	*dma_handle = plat_phys_to_dma(dev, paddr);
				64
				65	/* This is kernel Virtual address (0x7000_0000 based) */
				66	if (need_kvaddr) {
				67	kvaddr = ioremap_nocache(paddr, size);
				68	if (kvaddr == NULL) {
				69	__free_pages(page, order);
				70	return NULL;
				71	}
				72	} else {
				73	kvaddr = (void *)(u32)paddr;
				74	}
				75
				76	/*
				77	* Evict any existing L1 and/or L2 lines for the backing page
				78	* in case it was used earlier as a normal "cached" page.
				79	* Yeah this bit us - STAR 9000898266
				80	*
				81	* Although core does call flush_cache_vmap(), it gets kvaddr hence
				82	* can't be used to efficiently flush L1 and/or L2 which need paddr
				83	* Currently flush_cache_vmap nukes the L1 cache completely which
				84	* will be optimized as a separate commit
				85	*/
				86	if (need_coh)
				87	dma_cache_wback_inv(paddr, size);
				88
				89	return kvaddr;
				90	}
				91
				92	static void arc_dma_free(struct device dev, size_t size, void vaddr,
				93	dma_addr_t dma_handle, unsigned long attrs)
				94	{
				95	phys_addr_t paddr = plat_dma_to_phys(dev, dma_handle);
				96	struct page *page = virt_to_page(paddr);
				97	int is_non_coh = 1;
				98
				99	is_non_coh = (attrs & DMA_ATTR_NON_CONSISTENT) \|\|
				100	(is_isa_arcv2() && ioc_enable);
				101
				102	if (PageHighMem(page) \|\| !is_non_coh)
				103	iounmap((void __force __iomem *)vaddr);
				104
				105	__free_pages(page, get_order(size));
				106	}
				107
				108	static int arc_dma_mmap(struct device dev, struct vm_area_struct vma,
				109	void *cpu_addr, dma_addr_t dma_addr, size_t size,
				110	unsigned long attrs)
				111	{
				112	unsigned long user_count = vma_pages(vma);
				113	unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
				114	unsigned long pfn = __phys_to_pfn(plat_dma_to_phys(dev, dma_addr));
				115	unsigned long off = vma->vm_pgoff;
				116	int ret = -ENXIO;
				117
				118	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
				119
				120	if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
				121	return ret;
				122
				123	if (off < count && user_count <= (count - off)) {
				124	ret = remap_pfn_range(vma, vma->vm_start,
				125	pfn + off,
				126	user_count << PAGE_SHIFT,
				127	vma->vm_page_prot);
				128	}
				129
				130	return ret;
				131	}
				132
				133	/*
				134	* streaming DMA Mapping API...
				135	* CPU accesses page via normal paddr, thus needs to explicitly made
				136	* consistent before each use
				137	*/
				138	static void _dma_cache_sync(phys_addr_t paddr, size_t size,
				139	enum dma_data_direction dir)
				140	{
				141	switch (dir) {
				142	case DMA_FROM_DEVICE:
				143	dma_cache_inv(paddr, size);
				144	break;
				145	case DMA_TO_DEVICE:
				146	dma_cache_wback(paddr, size);
				147	break;
				148	case DMA_BIDIRECTIONAL:
				149	dma_cache_wback_inv(paddr, size);
				150	break;
				151	default:
				152	pr_err("Invalid DMA dir [%d] for OP @ %pa[p]\n", dir, &paddr);
				153	}
				154	}
				155
				156	/*
				157	* arc_dma_map_page - map a portion of a page for streaming DMA
				158	*
				159	* Ensure that any data held in the cache is appropriately discarded
				160	* or written back.
				161	*
				162	* The device owns this memory once this call has completed. The CPU
				163	* can regain ownership by calling dma_unmap_page().
				164	*
				165	* Note: while it takes struct page as arg, caller can "abuse" it to pass
				166	* a region larger than PAGE_SIZE, provided it is physically contiguous
				167	* and this still works correctly
				168	*/
				169	static dma_addr_t arc_dma_map_page(struct device dev, struct page page,
				170	unsigned long offset, size_t size, enum dma_data_direction dir,
				171	unsigned long attrs)
				172	{
				173	phys_addr_t paddr = page_to_phys(page) + offset;
				174
				175	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
				176	_dma_cache_sync(paddr, size, dir);
				177
				178	return plat_phys_to_dma(dev, paddr);
				179	}
				180
				181	/*
				182	* arc_dma_unmap_page - unmap a buffer previously mapped through dma_map_page()
				183	*
				184	* After this call, reads by the CPU to the buffer are guaranteed to see
				185	* whatever the device wrote there.
				186	*
				187	* Note: historically this routine was not implemented for ARC
				188	*/
				189	static void arc_dma_unmap_page(struct device *dev, dma_addr_t handle,
				190	size_t size, enum dma_data_direction dir,
				191	unsigned long attrs)
				192	{
				193	phys_addr_t paddr = plat_dma_to_phys(dev, handle);
				194
				195	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
				196	_dma_cache_sync(paddr, size, dir);
				197	}
				198
				199	static int arc_dma_map_sg(struct device dev, struct scatterlist sg,
				200	int nents, enum dma_data_direction dir, unsigned long attrs)
				201	{
				202	struct scatterlist *s;
				203	int i;
				204
				205	for_each_sg(sg, s, nents, i)
				206	s->dma_address = dma_map_page(dev, sg_page(s), s->offset,
				207	s->length, dir);
				208
				209	return nents;
				210	}
				211
				212	static void arc_dma_unmap_sg(struct device dev, struct scatterlist sg,
				213	int nents, enum dma_data_direction dir,
				214	unsigned long attrs)
				215	{
				216	struct scatterlist *s;
				217	int i;
				218
				219	for_each_sg(sg, s, nents, i)
				220	arc_dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir,
				221	attrs);
				222	}
				223
				224	static void arc_dma_sync_single_for_cpu(struct device *dev,
				225	dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
				226	{
				227	_dma_cache_sync(plat_dma_to_phys(dev, dma_handle), size, DMA_FROM_DEVICE);
				228	}
				229
				230	static void arc_dma_sync_single_for_device(struct device *dev,
				231	dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
				232	{
				233	_dma_cache_sync(plat_dma_to_phys(dev, dma_handle), size, DMA_TO_DEVICE);
				234	}
				235
				236	static void arc_dma_sync_sg_for_cpu(struct device *dev,
				237	struct scatterlist *sglist, int nelems,
				238	enum dma_data_direction dir)
				239	{
				240	int i;
				241	struct scatterlist *sg;
				242
				243	for_each_sg(sglist, sg, nelems, i)
				244	_dma_cache_sync(sg_phys(sg), sg->length, dir);
				245	}
				246
				247	static void arc_dma_sync_sg_for_device(struct device *dev,
				248	struct scatterlist *sglist, int nelems,
				249	enum dma_data_direction dir)
				250	{
				251	int i;
				252	struct scatterlist *sg;
				253
				254	for_each_sg(sglist, sg, nelems, i)
				255	_dma_cache_sync(sg_phys(sg), sg->length, dir);
				256	}
				257
				258	static int arc_dma_supported(struct device *dev, u64 dma_mask)
				259	{
				260	/* Support 32 bit DMA mask exclusively */
				261	return dma_mask == DMA_BIT_MASK(32);
				262	}
				263
				264	const struct dma_map_ops arc_dma_ops = {
				265	.alloc = arc_dma_alloc,
				266	.free = arc_dma_free,
				267	.mmap = arc_dma_mmap,
				268	.map_page = arc_dma_map_page,
				269	.unmap_page = arc_dma_unmap_page,
				270	.map_sg = arc_dma_map_sg,
				271	.unmap_sg = arc_dma_unmap_sg,
				272	.sync_single_for_device = arc_dma_sync_single_for_device,
				273	.sync_single_for_cpu = arc_dma_sync_single_for_cpu,
				274	.sync_sg_for_cpu = arc_dma_sync_sg_for_cpu,
				275	.sync_sg_for_device = arc_dma_sync_sg_for_device,
				276	.dma_supported = arc_dma_supported,
				277	};
				278	EXPORT_SYMBOL(arc_dma_ops);