| b.liu | e958203 | 2025-04-17 19:18:16 +0800 | [diff] [blame] | 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 |
| 2 | From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| 3 | Date: Fri, 4 Jun 2021 17:17:37 +0200 |
| 4 | Subject: [PATCH] wireguard: allowedips: allocate nodes in kmem_cache |
| 5 | |
| 6 | commit dc680de28ca849dfe589dc15ac56d22505f0ef11 upstream. |
| 7 | |
| 8 | The previous commit moved from O(n) to O(1) for removal, but in the |
| 9 | process introduced an additional pointer member to a struct that |
| 10 | increased the size from 60 to 68 bytes, putting nodes in the 128-byte |
| 11 | slab. With deployed systems having as many as 2 million nodes, this |
| 12 | represents a significant doubling in memory usage (128 MiB -> 256 MiB). |
| 13 | Fix this by using our own kmem_cache, that's sized exactly right. This |
| 14 | also makes wireguard's memory usage more transparent in tools like |
| 15 | slabtop and /proc/slabinfo. |
| 16 | |
| 17 | Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") |
| 18 | Suggested-by: Arnd Bergmann <arnd@arndb.de> |
| 19 | Suggested-by: Matthew Wilcox <willy@infradead.org> |
| 20 | Cc: stable@vger.kernel.org |
| 21 | Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| 22 | Signed-off-by: David S. Miller <davem@davemloft.net> |
| 23 | Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| 24 | --- |
| 25 | drivers/net/wireguard/allowedips.c | 31 ++++++++++++++++++++++++------ |
| 26 | drivers/net/wireguard/allowedips.h | 5 ++++- |
| 27 | drivers/net/wireguard/main.c | 10 +++++++++- |
| 28 | 3 files changed, 38 insertions(+), 8 deletions(-) |
| 29 | |
| 30 | --- a/drivers/net/wireguard/allowedips.c |
| 31 | +++ b/drivers/net/wireguard/allowedips.c |
| 32 | @@ -6,6 +6,8 @@ |
| 33 | #include "allowedips.h" |
| 34 | #include "peer.h" |
| 35 | |
| 36 | +static struct kmem_cache *node_cache; |
| 37 | + |
| 38 | static void swap_endian(u8 *dst, const u8 *src, u8 bits) |
| 39 | { |
| 40 | if (bits == 32) { |
| 41 | @@ -40,6 +42,11 @@ static void push_rcu(struct allowedips_n |
| 42 | } |
| 43 | } |
| 44 | |
| 45 | +static void node_free_rcu(struct rcu_head *rcu) |
| 46 | +{ |
| 47 | + kmem_cache_free(node_cache, container_of(rcu, struct allowedips_node, rcu)); |
| 48 | +} |
| 49 | + |
| 50 | static void root_free_rcu(struct rcu_head *rcu) |
| 51 | { |
| 52 | struct allowedips_node *node, *stack[128] = { |
| 53 | @@ -49,7 +56,7 @@ static void root_free_rcu(struct rcu_hea |
| 54 | while (len > 0 && (node = stack[--len])) { |
| 55 | push_rcu(stack, node->bit[0], &len); |
| 56 | push_rcu(stack, node->bit[1], &len); |
| 57 | - kfree(node); |
| 58 | + kmem_cache_free(node_cache, node); |
| 59 | } |
| 60 | } |
| 61 | |
| 62 | @@ -164,7 +171,7 @@ static int add(struct allowedips_node __ |
| 63 | return -EINVAL; |
| 64 | |
| 65 | if (!rcu_access_pointer(*trie)) { |
| 66 | - node = kzalloc(sizeof(*node), GFP_KERNEL); |
| 67 | + node = kmem_cache_zalloc(node_cache, GFP_KERNEL); |
| 68 | if (unlikely(!node)) |
| 69 | return -ENOMEM; |
| 70 | RCU_INIT_POINTER(node->peer, peer); |
| 71 | @@ -180,7 +187,7 @@ static int add(struct allowedips_node __ |
| 72 | return 0; |
| 73 | } |
| 74 | |
| 75 | - newnode = kzalloc(sizeof(*newnode), GFP_KERNEL); |
| 76 | + newnode = kmem_cache_zalloc(node_cache, GFP_KERNEL); |
| 77 | if (unlikely(!newnode)) |
| 78 | return -ENOMEM; |
| 79 | RCU_INIT_POINTER(newnode->peer, peer); |
| 80 | @@ -213,10 +220,10 @@ static int add(struct allowedips_node __ |
| 81 | return 0; |
| 82 | } |
| 83 | |
| 84 | - node = kzalloc(sizeof(*node), GFP_KERNEL); |
| 85 | + node = kmem_cache_zalloc(node_cache, GFP_KERNEL); |
| 86 | if (unlikely(!node)) { |
| 87 | list_del(&newnode->peer_list); |
| 88 | - kfree(newnode); |
| 89 | + kmem_cache_free(node_cache, newnode); |
| 90 | return -ENOMEM; |
| 91 | } |
| 92 | INIT_LIST_HEAD(&node->peer_list); |
| 93 | @@ -306,7 +313,7 @@ void wg_allowedips_remove_by_peer(struct |
| 94 | if (child) |
| 95 | child->parent_bit = node->parent_bit; |
| 96 | *rcu_dereference_protected(node->parent_bit, lockdep_is_held(lock)) = child; |
| 97 | - kfree_rcu(node, rcu); |
| 98 | + call_rcu(&node->rcu, node_free_rcu); |
| 99 | |
| 100 | /* TODO: Note that we currently don't walk up and down in order to |
| 101 | * free any potential filler nodes. This means that this function |
| 102 | @@ -350,4 +357,16 @@ struct wg_peer *wg_allowedips_lookup_src |
| 103 | return NULL; |
| 104 | } |
| 105 | |
| 106 | +int __init wg_allowedips_slab_init(void) |
| 107 | +{ |
| 108 | + node_cache = KMEM_CACHE(allowedips_node, 0); |
| 109 | + return node_cache ? 0 : -ENOMEM; |
| 110 | +} |
| 111 | + |
| 112 | +void wg_allowedips_slab_uninit(void) |
| 113 | +{ |
| 114 | + rcu_barrier(); |
| 115 | + kmem_cache_destroy(node_cache); |
| 116 | +} |
| 117 | + |
| 118 | #include "selftest/allowedips.c" |
| 119 | --- a/drivers/net/wireguard/allowedips.h |
| 120 | +++ b/drivers/net/wireguard/allowedips.h |
| 121 | @@ -19,7 +19,7 @@ struct allowedips_node { |
| 122 | u8 bits[16] __aligned(__alignof(u64)); |
| 123 | |
| 124 | /* Keep rarely used members at bottom to be beyond cache line. */ |
| 125 | - struct allowedips_node *__rcu *parent_bit; /* XXX: this puts us at 68->128 bytes instead of 60->64 bytes!! */ |
| 126 | + struct allowedips_node *__rcu *parent_bit; |
| 127 | union { |
| 128 | struct list_head peer_list; |
| 129 | struct rcu_head rcu; |
| 130 | @@ -53,4 +53,7 @@ struct wg_peer *wg_allowedips_lookup_src |
| 131 | bool wg_allowedips_selftest(void); |
| 132 | #endif |
| 133 | |
| 134 | +int wg_allowedips_slab_init(void); |
| 135 | +void wg_allowedips_slab_uninit(void); |
| 136 | + |
| 137 | #endif /* _WG_ALLOWEDIPS_H */ |
| 138 | --- a/drivers/net/wireguard/main.c |
| 139 | +++ b/drivers/net/wireguard/main.c |
| 140 | @@ -21,10 +21,15 @@ static int __init mod_init(void) |
| 141 | { |
| 142 | int ret; |
| 143 | |
| 144 | + ret = wg_allowedips_slab_init(); |
| 145 | + if (ret < 0) |
| 146 | + goto err_allowedips; |
| 147 | + |
| 148 | #ifdef DEBUG |
| 149 | + ret = -ENOTRECOVERABLE; |
| 150 | if (!wg_allowedips_selftest() || !wg_packet_counter_selftest() || |
| 151 | !wg_ratelimiter_selftest()) |
| 152 | - return -ENOTRECOVERABLE; |
| 153 | + goto err_peer; |
| 154 | #endif |
| 155 | wg_noise_init(); |
| 156 | |
| 157 | @@ -50,6 +55,8 @@ err_netlink: |
| 158 | err_device: |
| 159 | wg_peer_uninit(); |
| 160 | err_peer: |
| 161 | + wg_allowedips_slab_uninit(); |
| 162 | +err_allowedips: |
| 163 | return ret; |
| 164 | } |
| 165 | |
| 166 | @@ -58,6 +65,7 @@ static void __exit mod_exit(void) |
| 167 | wg_genetlink_uninit(); |
| 168 | wg_device_uninit(); |
| 169 | wg_peer_uninit(); |
| 170 | + wg_allowedips_slab_uninit(); |
| 171 | } |
| 172 | |
| 173 | module_init(mod_init); |