blob: 40a219f5d22493637ad99e195724685cd122a314 [file] [log] [blame]
b.liue9582032025-04-17 19:18:16 +08001From: Felix Fietkau <nbd@nbd.name>
2Subject: mips: replace -mlong-calls with -mno-long-calls to make function calls faster in kernel modules to achieve this, try to
3
4lede-commit: 3b3d64743ba2a874df9d70cd19e242205b0a788c
5Signed-off-by: Felix Fietkau <nbd@nbd.name>
6---
7 arch/mips/Makefile | 5 +
8 arch/mips/include/asm/module.h | 5 +
9 arch/mips/kernel/module.c | 279 ++++++++++++++++++++++++++++++++++++++++-
10 3 files changed, 284 insertions(+), 5 deletions(-)
11
12--- a/arch/mips/Makefile
13+++ b/arch/mips/Makefile
14@@ -95,8 +95,18 @@ all-$(CONFIG_SYS_SUPPORTS_ZBOOT)+= vmlin
15 cflags-y += -G 0 -mno-abicalls -fno-pic -pipe -mno-branch-likely
16 cflags-y += -msoft-float
17 LDFLAGS_vmlinux += -G 0 -static -n -nostdlib
18+ifdef CONFIG_64BIT
19 KBUILD_AFLAGS_MODULE += -mlong-calls
20 KBUILD_CFLAGS_MODULE += -mlong-calls
21+else
22+ ifdef CONFIG_DYNAMIC_FTRACE
23+ KBUILD_AFLAGS_MODULE += -mlong-calls
24+ KBUILD_CFLAGS_MODULE += -mlong-calls
25+ else
26+ KBUILD_AFLAGS_MODULE += -mno-long-calls
27+ KBUILD_CFLAGS_MODULE += -mno-long-calls
28+ endif
29+endif
30
31 ifeq ($(CONFIG_RELOCATABLE),y)
32 LDFLAGS_vmlinux += --emit-relocs
33--- a/arch/mips/include/asm/module.h
34+++ b/arch/mips/include/asm/module.h
35@@ -12,6 +12,11 @@ struct mod_arch_specific {
36 const struct exception_table_entry *dbe_start;
37 const struct exception_table_entry *dbe_end;
38 struct mips_hi16 *r_mips_hi16_list;
39+
40+ void *phys_plt_tbl;
41+ void *virt_plt_tbl;
42+ unsigned int phys_plt_offset;
43+ unsigned int virt_plt_offset;
44 };
45
46 typedef uint8_t Elf64_Byte; /* Type for a 8-bit quantity. */
47--- a/arch/mips/kernel/module.c
48+++ b/arch/mips/kernel/module.c
49@@ -32,14 +32,221 @@ struct mips_hi16 {
50 static LIST_HEAD(dbe_list);
51 static DEFINE_SPINLOCK(dbe_lock);
52
53-#ifdef MODULE_START
54+/*
55+ * Get the potential max trampolines size required of the init and
56+ * non-init sections. Only used if we cannot find enough contiguous
57+ * physically mapped memory to put the module into.
58+ */
59+static unsigned int
60+get_plt_size(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
61+ const char *secstrings, unsigned int symindex, bool is_init)
62+{
63+ unsigned long ret = 0;
64+ unsigned int i, j;
65+ Elf_Sym *syms;
66+
67+ /* Everything marked ALLOC (this includes the exported symbols) */
68+ for (i = 1; i < hdr->e_shnum; ++i) {
69+ unsigned int info = sechdrs[i].sh_info;
70+
71+ if (sechdrs[i].sh_type != SHT_REL
72+ && sechdrs[i].sh_type != SHT_RELA)
73+ continue;
74+
75+ /* Not a valid relocation section? */
76+ if (info >= hdr->e_shnum)
77+ continue;
78+
79+ /* Don't bother with non-allocated sections */
80+ if (!(sechdrs[info].sh_flags & SHF_ALLOC))
81+ continue;
82+
83+ /* If it's called *.init*, and we're not init, we're
84+ not interested */
85+ if ((strstr(secstrings + sechdrs[i].sh_name, ".init") != 0)
86+ != is_init)
87+ continue;
88+
89+ syms = (Elf_Sym *) sechdrs[symindex].sh_addr;
90+ if (sechdrs[i].sh_type == SHT_REL) {
91+ Elf_Mips_Rel *rel = (void *) sechdrs[i].sh_addr;
92+ unsigned int size = sechdrs[i].sh_size / sizeof(*rel);
93+
94+ for (j = 0; j < size; ++j) {
95+ Elf_Sym *sym;
96+
97+ if (ELF_MIPS_R_TYPE(rel[j]) != R_MIPS_26)
98+ continue;
99+
100+ sym = syms + ELF_MIPS_R_SYM(rel[j]);
101+ if (!is_init && sym->st_shndx != SHN_UNDEF)
102+ continue;
103+
104+ ret += 4 * sizeof(int);
105+ }
106+ } else {
107+ Elf_Mips_Rela *rela = (void *) sechdrs[i].sh_addr;
108+ unsigned int size = sechdrs[i].sh_size / sizeof(*rela);
109+
110+ for (j = 0; j < size; ++j) {
111+ Elf_Sym *sym;
112+
113+ if (ELF_MIPS_R_TYPE(rela[j]) != R_MIPS_26)
114+ continue;
115+
116+ sym = syms + ELF_MIPS_R_SYM(rela[j]);
117+ if (!is_init && sym->st_shndx != SHN_UNDEF)
118+ continue;
119+
120+ ret += 4 * sizeof(int);
121+ }
122+ }
123+ }
124+
125+ return ret;
126+}
127+
128+#ifndef MODULE_START
129+static void *alloc_phys(unsigned long size)
130+{
131+ unsigned order;
132+ struct page *page;
133+ struct page *p;
134+
135+ size = PAGE_ALIGN(size);
136+ order = get_order(size);
137+
138+ page = alloc_pages(GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN |
139+ __GFP_THISNODE, order);
140+ if (!page)
141+ return NULL;
142+
143+ split_page(page, order);
144+
145+ /* mark all pages except for the last one */
146+ for (p = page; p + 1 < page + (size >> PAGE_SHIFT); ++p)
147+ set_bit(PG_owner_priv_1, &p->flags);
148+
149+ for (p = page + (size >> PAGE_SHIFT); p < page + (1 << order); ++p)
150+ __free_page(p);
151+
152+ return page_address(page);
153+}
154+#endif
155+
156+static void free_phys(void *ptr)
157+{
158+ struct page *page;
159+ bool free;
160+
161+ page = virt_to_page(ptr);
162+ do {
163+ free = test_and_clear_bit(PG_owner_priv_1, &page->flags);
164+ __free_page(page);
165+ page++;
166+ } while (free);
167+}
168+
169+
170 void *module_alloc(unsigned long size)
171 {
172+#ifdef MODULE_START
173 return __vmalloc_node_range(size, 1, MODULE_START, MODULE_END,
174 GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE,
175 __builtin_return_address(0));
176+#else
177+ void *ptr;
178+
179+ if (size == 0)
180+ return NULL;
181+
182+ ptr = alloc_phys(size);
183+
184+ /* If we failed to allocate physically contiguous memory,
185+ * fall back to regular vmalloc. The module loader code will
186+ * create jump tables to handle long jumps */
187+ if (!ptr)
188+ return vmalloc(size);
189+
190+ return ptr;
191+#endif
192 }
193+
194+static inline bool is_phys_addr(void *ptr)
195+{
196+#ifdef CONFIG_64BIT
197+ return (KSEGX((unsigned long)ptr) == CKSEG0);
198+#else
199+ return (KSEGX(ptr) == KSEG0);
200 #endif
201+}
202+
203+/* Free memory returned from module_alloc */
204+void module_memfree(void *module_region)
205+{
206+ if (is_phys_addr(module_region))
207+ free_phys(module_region);
208+ else
209+ vfree(module_region);
210+}
211+
212+static void *__module_alloc(int size, bool phys)
213+{
214+ void *ptr;
215+
216+ if (phys)
217+ ptr = kmalloc(size, GFP_KERNEL);
218+ else
219+ ptr = vmalloc(size);
220+ return ptr;
221+}
222+
223+static void __module_free(void *ptr)
224+{
225+ if (is_phys_addr(ptr))
226+ kfree(ptr);
227+ else
228+ vfree(ptr);
229+}
230+
231+int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
232+ char *secstrings, struct module *mod)
233+{
234+ unsigned int symindex = 0;
235+ unsigned int core_size, init_size;
236+ int i;
237+
238+ mod->arch.phys_plt_offset = 0;
239+ mod->arch.virt_plt_offset = 0;
240+ mod->arch.phys_plt_tbl = NULL;
241+ mod->arch.virt_plt_tbl = NULL;
242+
243+ if (IS_ENABLED(CONFIG_64BIT))
244+ return 0;
245+
246+ for (i = 1; i < hdr->e_shnum; i++)
247+ if (sechdrs[i].sh_type == SHT_SYMTAB)
248+ symindex = i;
249+
250+ core_size = get_plt_size(hdr, sechdrs, secstrings, symindex, false);
251+ init_size = get_plt_size(hdr, sechdrs, secstrings, symindex, true);
252+
253+ if ((core_size + init_size) == 0)
254+ return 0;
255+
256+ mod->arch.phys_plt_tbl = __module_alloc(core_size + init_size, 1);
257+ if (!mod->arch.phys_plt_tbl)
258+ return -ENOMEM;
259+
260+ mod->arch.virt_plt_tbl = __module_alloc(core_size + init_size, 0);
261+ if (!mod->arch.virt_plt_tbl) {
262+ __module_free(mod->arch.phys_plt_tbl);
263+ mod->arch.phys_plt_tbl = NULL;
264+ return -ENOMEM;
265+ }
266+
267+ return 0;
268+}
269
270 static int apply_r_mips_none(struct module *me, u32 *location,
271 u32 base, Elf_Addr v, bool rela)
272@@ -55,9 +262,40 @@ static int apply_r_mips_32(struct module
273 return 0;
274 }
275
276+static Elf_Addr add_plt_entry_to(unsigned *plt_offset,
277+ void *start, Elf_Addr v)
278+{
279+ unsigned *tramp = start + *plt_offset;
280+ *plt_offset += 4 * sizeof(int);
281+
282+ /* adjust carry for addiu */
283+ if (v & 0x00008000)
284+ v += 0x10000;
285+
286+ tramp[0] = 0x3c190000 | (v >> 16); /* lui t9, hi16 */
287+ tramp[1] = 0x27390000 | (v & 0xffff); /* addiu t9, t9, lo16 */
288+ tramp[2] = 0x03200008; /* jr t9 */
289+ tramp[3] = 0x00000000; /* nop */
290+
291+ return (Elf_Addr) tramp;
292+}
293+
294+static Elf_Addr add_plt_entry(struct module *me, void *location, Elf_Addr v)
295+{
296+ if (is_phys_addr(location))
297+ return add_plt_entry_to(&me->arch.phys_plt_offset,
298+ me->arch.phys_plt_tbl, v);
299+ else
300+ return add_plt_entry_to(&me->arch.virt_plt_offset,
301+ me->arch.virt_plt_tbl, v);
302+
303+}
304+
305 static int apply_r_mips_26(struct module *me, u32 *location,
306 u32 base, Elf_Addr v, bool rela)
307 {
308+ u32 ofs = base & 0x03ffffff;
309+
310 if (v % 4) {
311 pr_err("module %s: dangerous R_MIPS_26 relocation\n",
312 me->name);
313@@ -65,13 +303,17 @@ static int apply_r_mips_26(struct module
314 }
315
316 if ((v & 0xf0000000) != (((unsigned long)location + 4) & 0xf0000000)) {
317- pr_err("module %s: relocation overflow\n",
318- me->name);
319- return -ENOEXEC;
320+ v = add_plt_entry(me, location, v + (ofs << 2));
321+ if (!v) {
322+ pr_err("module %s: relocation overflow\n",
323+ me->name);
324+ return -ENOEXEC;
325+ }
326+ ofs = 0;
327 }
328
329 *location = (*location & ~0x03ffffff) |
330- ((base + (v >> 2)) & 0x03ffffff);
331+ ((ofs + (v >> 2)) & 0x03ffffff);
332
333 return 0;
334 }
335@@ -447,9 +689,36 @@ int module_finalize(const Elf_Ehdr *hdr,
336 list_add(&me->arch.dbe_list, &dbe_list);
337 spin_unlock_irq(&dbe_lock);
338 }
339+
340+ /* Get rid of the fixup trampoline if we're running the module
341+ * from physically mapped address space */
342+ if (me->arch.phys_plt_offset == 0) {
343+ __module_free(me->arch.phys_plt_tbl);
344+ me->arch.phys_plt_tbl = NULL;
345+ }
346+ if (me->arch.virt_plt_offset == 0) {
347+ __module_free(me->arch.virt_plt_tbl);
348+ me->arch.virt_plt_tbl = NULL;
349+ }
350+
351 return 0;
352 }
353
354+void module_arch_freeing_init(struct module *mod)
355+{
356+ if (mod->state == MODULE_STATE_LIVE)
357+ return;
358+
359+ if (mod->arch.phys_plt_tbl) {
360+ __module_free(mod->arch.phys_plt_tbl);
361+ mod->arch.phys_plt_tbl = NULL;
362+ }
363+ if (mod->arch.virt_plt_tbl) {
364+ __module_free(mod->arch.virt_plt_tbl);
365+ mod->arch.virt_plt_tbl = NULL;
366+ }
367+}
368+
369 void module_arch_cleanup(struct module *mod)
370 {
371 spin_lock_irq(&dbe_lock);