| lh | 9ed821d | 2023-04-07 01:36:19 -0700 | [diff] [blame] | 1 | /* | 
|  | 2 | * Copyright (C) 2001-2006 Silicon Graphics, Inc.  All rights | 
|  | 3 | * reserved. | 
|  | 4 | * | 
|  | 5 | * This program is free software; you can redistribute it and/or modify it | 
|  | 6 | * under the terms of version 2 of the GNU General Public License | 
|  | 7 | * as published by the Free Software Foundation. | 
|  | 8 | */ | 
|  | 9 |  | 
|  | 10 | /* | 
|  | 11 | * SN Platform Special Memory (mspec) Support | 
|  | 12 | * | 
|  | 13 | * This driver exports the SN special memory (mspec) facility to user | 
|  | 14 | * processes. | 
|  | 15 | * There are three types of memory made available thru this driver: | 
|  | 16 | * fetchops, uncached and cached. | 
|  | 17 | * | 
|  | 18 | * Fetchops are atomic memory operations that are implemented in the | 
|  | 19 | * memory controller on SGI SN hardware. | 
|  | 20 | * | 
|  | 21 | * Uncached are used for memory write combining feature of the ia64 | 
|  | 22 | * cpu. | 
|  | 23 | * | 
|  | 24 | * Cached are used for areas of memory that are used as cached addresses | 
|  | 25 | * on our partition and used as uncached addresses from other partitions. | 
|  | 26 | * Due to a design constraint of the SN2 Shub, you can not have processors | 
|  | 27 | * on the same FSB perform both a cached and uncached reference to the | 
|  | 28 | * same cache line.  These special memory cached regions prevent the | 
|  | 29 | * kernel from ever dropping in a TLB entry and therefore prevent the | 
|  | 30 | * processor from ever speculating a cache line from this page. | 
|  | 31 | */ | 
|  | 32 |  | 
|  | 33 | #include <linux/types.h> | 
|  | 34 | #include <linux/kernel.h> | 
|  | 35 | #include <linux/module.h> | 
|  | 36 | #include <linux/init.h> | 
|  | 37 | #include <linux/errno.h> | 
|  | 38 | #include <linux/miscdevice.h> | 
|  | 39 | #include <linux/spinlock.h> | 
|  | 40 | #include <linux/mm.h> | 
|  | 41 | #include <linux/fs.h> | 
|  | 42 | #include <linux/vmalloc.h> | 
|  | 43 | #include <linux/string.h> | 
|  | 44 | #include <linux/slab.h> | 
|  | 45 | #include <linux/numa.h> | 
|  | 46 | #include <asm/page.h> | 
|  | 47 | #include <asm/pgtable.h> | 
|  | 48 | #include <linux/atomic.h> | 
|  | 49 | #include <asm/tlbflush.h> | 
|  | 50 | #include <asm/uncached.h> | 
|  | 51 | #include <asm/sn/addrs.h> | 
|  | 52 | #include <asm/sn/arch.h> | 
|  | 53 | #include <asm/sn/mspec.h> | 
|  | 54 | #include <asm/sn/sn_cpuid.h> | 
|  | 55 | #include <asm/sn/io.h> | 
|  | 56 | #include <asm/sn/bte.h> | 
|  | 57 | #include <asm/sn/shubio.h> | 
|  | 58 |  | 
|  | 59 |  | 
|  | 60 | #define FETCHOP_ID	"SGI Fetchop," | 
|  | 61 | #define CACHED_ID	"Cached," | 
|  | 62 | #define UNCACHED_ID	"Uncached" | 
|  | 63 | #define REVISION	"4.0" | 
|  | 64 | #define MSPEC_BASENAME	"mspec" | 
|  | 65 |  | 
|  | 66 | /* | 
|  | 67 | * Page types allocated by the device. | 
|  | 68 | */ | 
|  | 69 | enum mspec_page_type { | 
|  | 70 | MSPEC_FETCHOP = 1, | 
|  | 71 | MSPEC_CACHED, | 
|  | 72 | MSPEC_UNCACHED | 
|  | 73 | }; | 
|  | 74 |  | 
|  | 75 | #ifdef CONFIG_SGI_SN | 
|  | 76 | static int is_sn2; | 
|  | 77 | #else | 
|  | 78 | #define is_sn2		0 | 
|  | 79 | #endif | 
|  | 80 |  | 
|  | 81 | /* | 
|  | 82 | * One of these structures is allocated when an mspec region is mmaped. The | 
|  | 83 | * structure is pointed to by the vma->vm_private_data field in the vma struct. | 
|  | 84 | * This structure is used to record the addresses of the mspec pages. | 
|  | 85 | * This structure is shared by all vma's that are split off from the | 
|  | 86 | * original vma when split_vma()'s are done. | 
|  | 87 | * | 
|  | 88 | * The refcnt is incremented atomically because mm->mmap_sem does not | 
|  | 89 | * protect in fork case where multiple tasks share the vma_data. | 
|  | 90 | */ | 
|  | 91 | struct vma_data { | 
|  | 92 | atomic_t refcnt;	/* Number of vmas sharing the data. */ | 
|  | 93 | spinlock_t lock;	/* Serialize access to this structure. */ | 
|  | 94 | int count;		/* Number of pages allocated. */ | 
|  | 95 | enum mspec_page_type type; /* Type of pages allocated. */ | 
|  | 96 | int flags;		/* See VMD_xxx below. */ | 
|  | 97 | unsigned long vm_start;	/* Original (unsplit) base. */ | 
|  | 98 | unsigned long vm_end;	/* Original (unsplit) end. */ | 
|  | 99 | unsigned long maddr[0];	/* Array of MSPEC addresses. */ | 
|  | 100 | }; | 
|  | 101 |  | 
|  | 102 | #define VMD_VMALLOCED 0x1	/* vmalloc'd rather than kmalloc'd */ | 
|  | 103 |  | 
|  | 104 | /* used on shub2 to clear FOP cache in the HUB */ | 
|  | 105 | static unsigned long scratch_page[MAX_NUMNODES]; | 
|  | 106 | #define SH2_AMO_CACHE_ENTRIES	4 | 
|  | 107 |  | 
|  | 108 | static inline int | 
|  | 109 | mspec_zero_block(unsigned long addr, int len) | 
|  | 110 | { | 
|  | 111 | int status; | 
|  | 112 |  | 
|  | 113 | if (is_sn2) { | 
|  | 114 | if (is_shub2()) { | 
|  | 115 | int nid; | 
|  | 116 | void *p; | 
|  | 117 | int i; | 
|  | 118 |  | 
|  | 119 | nid = nasid_to_cnodeid(get_node_number(__pa(addr))); | 
|  | 120 | p = (void *)TO_AMO(scratch_page[nid]); | 
|  | 121 |  | 
|  | 122 | for (i=0; i < SH2_AMO_CACHE_ENTRIES; i++) { | 
|  | 123 | FETCHOP_LOAD_OP(p, FETCHOP_LOAD); | 
|  | 124 | p += FETCHOP_VAR_SIZE; | 
|  | 125 | } | 
|  | 126 | } | 
|  | 127 |  | 
|  | 128 | status = bte_copy(0, addr & ~__IA64_UNCACHED_OFFSET, len, | 
|  | 129 | BTE_WACQUIRE | BTE_ZERO_FILL, NULL); | 
|  | 130 | } else { | 
|  | 131 | memset((char *) addr, 0, len); | 
|  | 132 | status = 0; | 
|  | 133 | } | 
|  | 134 | return status; | 
|  | 135 | } | 
|  | 136 |  | 
|  | 137 | /* | 
|  | 138 | * mspec_open | 
|  | 139 | * | 
|  | 140 | * Called when a device mapping is created by a means other than mmap | 
|  | 141 | * (via fork, munmap, etc.).  Increments the reference count on the | 
|  | 142 | * underlying mspec data so it is not freed prematurely. | 
|  | 143 | */ | 
|  | 144 | static void | 
|  | 145 | mspec_open(struct vm_area_struct *vma) | 
|  | 146 | { | 
|  | 147 | struct vma_data *vdata; | 
|  | 148 |  | 
|  | 149 | vdata = vma->vm_private_data; | 
|  | 150 | atomic_inc(&vdata->refcnt); | 
|  | 151 | } | 
|  | 152 |  | 
|  | 153 | /* | 
|  | 154 | * mspec_close | 
|  | 155 | * | 
|  | 156 | * Called when unmapping a device mapping. Frees all mspec pages | 
|  | 157 | * belonging to all the vma's sharing this vma_data structure. | 
|  | 158 | */ | 
|  | 159 | static void | 
|  | 160 | mspec_close(struct vm_area_struct *vma) | 
|  | 161 | { | 
|  | 162 | struct vma_data *vdata; | 
|  | 163 | int index, last_index; | 
|  | 164 | unsigned long my_page; | 
|  | 165 |  | 
|  | 166 | vdata = vma->vm_private_data; | 
|  | 167 |  | 
|  | 168 | if (!atomic_dec_and_test(&vdata->refcnt)) | 
|  | 169 | return; | 
|  | 170 |  | 
|  | 171 | last_index = (vdata->vm_end - vdata->vm_start) >> PAGE_SHIFT; | 
|  | 172 | for (index = 0; index < last_index; index++) { | 
|  | 173 | if (vdata->maddr[index] == 0) | 
|  | 174 | continue; | 
|  | 175 | /* | 
|  | 176 | * Clear the page before sticking it back | 
|  | 177 | * into the pool. | 
|  | 178 | */ | 
|  | 179 | my_page = vdata->maddr[index]; | 
|  | 180 | vdata->maddr[index] = 0; | 
|  | 181 | if (!mspec_zero_block(my_page, PAGE_SIZE)) | 
|  | 182 | uncached_free_page(my_page, 1); | 
|  | 183 | else | 
|  | 184 | printk(KERN_WARNING "mspec_close(): " | 
|  | 185 | "failed to zero page %ld\n", my_page); | 
|  | 186 | } | 
|  | 187 |  | 
|  | 188 | if (vdata->flags & VMD_VMALLOCED) | 
|  | 189 | vfree(vdata); | 
|  | 190 | else | 
|  | 191 | kfree(vdata); | 
|  | 192 | } | 
|  | 193 |  | 
|  | 194 | /* | 
|  | 195 | * mspec_fault | 
|  | 196 | * | 
|  | 197 | * Creates a mspec page and maps it to user space. | 
|  | 198 | */ | 
|  | 199 | static int | 
|  | 200 | mspec_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | 
|  | 201 | { | 
|  | 202 | unsigned long paddr, maddr; | 
|  | 203 | unsigned long pfn; | 
|  | 204 | pgoff_t index = vmf->pgoff; | 
|  | 205 | struct vma_data *vdata = vma->vm_private_data; | 
|  | 206 |  | 
|  | 207 | maddr = (volatile unsigned long) vdata->maddr[index]; | 
|  | 208 | if (maddr == 0) { | 
|  | 209 | maddr = uncached_alloc_page(numa_node_id(), 1); | 
|  | 210 | if (maddr == 0) | 
|  | 211 | return VM_FAULT_OOM; | 
|  | 212 |  | 
|  | 213 | spin_lock(&vdata->lock); | 
|  | 214 | if (vdata->maddr[index] == 0) { | 
|  | 215 | vdata->count++; | 
|  | 216 | vdata->maddr[index] = maddr; | 
|  | 217 | } else { | 
|  | 218 | uncached_free_page(maddr, 1); | 
|  | 219 | maddr = vdata->maddr[index]; | 
|  | 220 | } | 
|  | 221 | spin_unlock(&vdata->lock); | 
|  | 222 | } | 
|  | 223 |  | 
|  | 224 | if (vdata->type == MSPEC_FETCHOP) | 
|  | 225 | paddr = TO_AMO(maddr); | 
|  | 226 | else | 
|  | 227 | paddr = maddr & ~__IA64_UNCACHED_OFFSET; | 
|  | 228 |  | 
|  | 229 | pfn = paddr >> PAGE_SHIFT; | 
|  | 230 |  | 
|  | 231 | /* | 
|  | 232 | * vm_insert_pfn can fail with -EBUSY, but in that case it will | 
|  | 233 | * be because another thread has installed the pte first, so it | 
|  | 234 | * is no problem. | 
|  | 235 | */ | 
|  | 236 | vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn); | 
|  | 237 |  | 
|  | 238 | return VM_FAULT_NOPAGE; | 
|  | 239 | } | 
|  | 240 |  | 
|  | 241 | static const struct vm_operations_struct mspec_vm_ops = { | 
|  | 242 | .open = mspec_open, | 
|  | 243 | .close = mspec_close, | 
|  | 244 | .fault = mspec_fault, | 
|  | 245 | }; | 
|  | 246 |  | 
|  | 247 | /* | 
|  | 248 | * mspec_mmap | 
|  | 249 | * | 
|  | 250 | * Called when mmapping the device.  Initializes the vma with a fault handler | 
|  | 251 | * and private data structure necessary to allocate, track, and free the | 
|  | 252 | * underlying pages. | 
|  | 253 | */ | 
|  | 254 | static int | 
|  | 255 | mspec_mmap(struct file *file, struct vm_area_struct *vma, | 
|  | 256 | enum mspec_page_type type) | 
|  | 257 | { | 
|  | 258 | struct vma_data *vdata; | 
|  | 259 | int pages, vdata_size, flags = 0; | 
|  | 260 |  | 
|  | 261 | if (vma->vm_pgoff != 0) | 
|  | 262 | return -EINVAL; | 
|  | 263 |  | 
|  | 264 | if ((vma->vm_flags & VM_SHARED) == 0) | 
|  | 265 | return -EINVAL; | 
|  | 266 |  | 
|  | 267 | if ((vma->vm_flags & VM_WRITE) == 0) | 
|  | 268 | return -EPERM; | 
|  | 269 |  | 
|  | 270 | pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; | 
|  | 271 | vdata_size = sizeof(struct vma_data) + pages * sizeof(long); | 
|  | 272 | if (vdata_size <= PAGE_SIZE) | 
|  | 273 | vdata = kzalloc(vdata_size, GFP_KERNEL); | 
|  | 274 | else { | 
|  | 275 | vdata = vzalloc(vdata_size); | 
|  | 276 | flags = VMD_VMALLOCED; | 
|  | 277 | } | 
|  | 278 | if (!vdata) | 
|  | 279 | return -ENOMEM; | 
|  | 280 |  | 
|  | 281 | vdata->vm_start = vma->vm_start; | 
|  | 282 | vdata->vm_end = vma->vm_end; | 
|  | 283 | vdata->flags = flags; | 
|  | 284 | vdata->type = type; | 
|  | 285 | spin_lock_init(&vdata->lock); | 
|  | 286 | atomic_set(&vdata->refcnt, 1); | 
|  | 287 | vma->vm_private_data = vdata; | 
|  | 288 |  | 
|  | 289 | vma->vm_flags |= (VM_IO | VM_RESERVED | VM_PFNMAP | VM_DONTEXPAND); | 
|  | 290 | if (vdata->type == MSPEC_FETCHOP || vdata->type == MSPEC_UNCACHED) | 
|  | 291 | vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); | 
|  | 292 | vma->vm_ops = &mspec_vm_ops; | 
|  | 293 |  | 
|  | 294 | return 0; | 
|  | 295 | } | 
|  | 296 |  | 
|  | 297 | static int | 
|  | 298 | fetchop_mmap(struct file *file, struct vm_area_struct *vma) | 
|  | 299 | { | 
|  | 300 | return mspec_mmap(file, vma, MSPEC_FETCHOP); | 
|  | 301 | } | 
|  | 302 |  | 
|  | 303 | static int | 
|  | 304 | cached_mmap(struct file *file, struct vm_area_struct *vma) | 
|  | 305 | { | 
|  | 306 | return mspec_mmap(file, vma, MSPEC_CACHED); | 
|  | 307 | } | 
|  | 308 |  | 
|  | 309 | static int | 
|  | 310 | uncached_mmap(struct file *file, struct vm_area_struct *vma) | 
|  | 311 | { | 
|  | 312 | return mspec_mmap(file, vma, MSPEC_UNCACHED); | 
|  | 313 | } | 
|  | 314 |  | 
|  | 315 | static const struct file_operations fetchop_fops = { | 
|  | 316 | .owner = THIS_MODULE, | 
|  | 317 | .mmap = fetchop_mmap, | 
|  | 318 | .llseek = noop_llseek, | 
|  | 319 | }; | 
|  | 320 |  | 
|  | 321 | static struct miscdevice fetchop_miscdev = { | 
|  | 322 | .minor = MISC_DYNAMIC_MINOR, | 
|  | 323 | .name = "sgi_fetchop", | 
|  | 324 | .fops = &fetchop_fops | 
|  | 325 | }; | 
|  | 326 |  | 
|  | 327 | static const struct file_operations cached_fops = { | 
|  | 328 | .owner = THIS_MODULE, | 
|  | 329 | .mmap = cached_mmap, | 
|  | 330 | .llseek = noop_llseek, | 
|  | 331 | }; | 
|  | 332 |  | 
|  | 333 | static struct miscdevice cached_miscdev = { | 
|  | 334 | .minor = MISC_DYNAMIC_MINOR, | 
|  | 335 | .name = "mspec_cached", | 
|  | 336 | .fops = &cached_fops | 
|  | 337 | }; | 
|  | 338 |  | 
|  | 339 | static const struct file_operations uncached_fops = { | 
|  | 340 | .owner = THIS_MODULE, | 
|  | 341 | .mmap = uncached_mmap, | 
|  | 342 | .llseek = noop_llseek, | 
|  | 343 | }; | 
|  | 344 |  | 
|  | 345 | static struct miscdevice uncached_miscdev = { | 
|  | 346 | .minor = MISC_DYNAMIC_MINOR, | 
|  | 347 | .name = "mspec_uncached", | 
|  | 348 | .fops = &uncached_fops | 
|  | 349 | }; | 
|  | 350 |  | 
|  | 351 | /* | 
|  | 352 | * mspec_init | 
|  | 353 | * | 
|  | 354 | * Called at boot time to initialize the mspec facility. | 
|  | 355 | */ | 
|  | 356 | static int __init | 
|  | 357 | mspec_init(void) | 
|  | 358 | { | 
|  | 359 | int ret; | 
|  | 360 | int nid; | 
|  | 361 |  | 
|  | 362 | /* | 
|  | 363 | * The fetchop device only works on SN2 hardware, uncached and cached | 
|  | 364 | * memory drivers should both be valid on all ia64 hardware | 
|  | 365 | */ | 
|  | 366 | #ifdef CONFIG_SGI_SN | 
|  | 367 | if (ia64_platform_is("sn2")) { | 
|  | 368 | is_sn2 = 1; | 
|  | 369 | if (is_shub2()) { | 
|  | 370 | ret = -ENOMEM; | 
|  | 371 | for_each_node_state(nid, N_ONLINE) { | 
|  | 372 | int actual_nid; | 
|  | 373 | int nasid; | 
|  | 374 | unsigned long phys; | 
|  | 375 |  | 
|  | 376 | scratch_page[nid] = uncached_alloc_page(nid, 1); | 
|  | 377 | if (scratch_page[nid] == 0) | 
|  | 378 | goto free_scratch_pages; | 
|  | 379 | phys = __pa(scratch_page[nid]); | 
|  | 380 | nasid = get_node_number(phys); | 
|  | 381 | actual_nid = nasid_to_cnodeid(nasid); | 
|  | 382 | if (actual_nid != nid) | 
|  | 383 | goto free_scratch_pages; | 
|  | 384 | } | 
|  | 385 | } | 
|  | 386 |  | 
|  | 387 | ret = misc_register(&fetchop_miscdev); | 
|  | 388 | if (ret) { | 
|  | 389 | printk(KERN_ERR | 
|  | 390 | "%s: failed to register device %i\n", | 
|  | 391 | FETCHOP_ID, ret); | 
|  | 392 | goto free_scratch_pages; | 
|  | 393 | } | 
|  | 394 | } | 
|  | 395 | #endif | 
|  | 396 | ret = misc_register(&cached_miscdev); | 
|  | 397 | if (ret) { | 
|  | 398 | printk(KERN_ERR "%s: failed to register device %i\n", | 
|  | 399 | CACHED_ID, ret); | 
|  | 400 | if (is_sn2) | 
|  | 401 | misc_deregister(&fetchop_miscdev); | 
|  | 402 | goto free_scratch_pages; | 
|  | 403 | } | 
|  | 404 | ret = misc_register(&uncached_miscdev); | 
|  | 405 | if (ret) { | 
|  | 406 | printk(KERN_ERR "%s: failed to register device %i\n", | 
|  | 407 | UNCACHED_ID, ret); | 
|  | 408 | misc_deregister(&cached_miscdev); | 
|  | 409 | if (is_sn2) | 
|  | 410 | misc_deregister(&fetchop_miscdev); | 
|  | 411 | goto free_scratch_pages; | 
|  | 412 | } | 
|  | 413 |  | 
|  | 414 | printk(KERN_INFO "%s %s initialized devices: %s %s %s\n", | 
|  | 415 | MSPEC_BASENAME, REVISION, is_sn2 ? FETCHOP_ID : "", | 
|  | 416 | CACHED_ID, UNCACHED_ID); | 
|  | 417 |  | 
|  | 418 | return 0; | 
|  | 419 |  | 
|  | 420 | free_scratch_pages: | 
|  | 421 | for_each_node(nid) { | 
|  | 422 | if (scratch_page[nid] != 0) | 
|  | 423 | uncached_free_page(scratch_page[nid], 1); | 
|  | 424 | } | 
|  | 425 | return ret; | 
|  | 426 | } | 
|  | 427 |  | 
|  | 428 | static void __exit | 
|  | 429 | mspec_exit(void) | 
|  | 430 | { | 
|  | 431 | int nid; | 
|  | 432 |  | 
|  | 433 | misc_deregister(&uncached_miscdev); | 
|  | 434 | misc_deregister(&cached_miscdev); | 
|  | 435 | if (is_sn2) { | 
|  | 436 | misc_deregister(&fetchop_miscdev); | 
|  | 437 |  | 
|  | 438 | for_each_node(nid) { | 
|  | 439 | if (scratch_page[nid] != 0) | 
|  | 440 | uncached_free_page(scratch_page[nid], 1); | 
|  | 441 | } | 
|  | 442 | } | 
|  | 443 | } | 
|  | 444 |  | 
|  | 445 | module_init(mspec_init); | 
|  | 446 | module_exit(mspec_exit); | 
|  | 447 |  | 
|  | 448 | MODULE_AUTHOR("Silicon Graphics, Inc. <linux-altix@sgi.com>"); | 
|  | 449 | MODULE_DESCRIPTION("Driver for SGI SN special memory operations"); | 
|  | 450 | MODULE_LICENSE("GPL"); |