b.liu | e958203 | 2025-04-17 19:18:16 +0800 | [diff] [blame] | 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 |
| 2 | From: "Jason A. Donenfeld" <Jason@zx2c4.com> |
| 3 | Date: Tue, 4 Feb 2020 22:17:26 +0100 |
| 4 | Subject: [PATCH] wireguard: noise: reject peers with low order public keys |
| 5 | |
| 6 | commit ec31c2676a10e064878927b243fada8c2fb0c03c upstream. |
| 7 | |
| 8 | Our static-static calculation returns a failure if the public key is of |
| 9 | low order. We check for this when peers are added, and don't allow them |
| 10 | to be added if they're low order, except in the case where we haven't |
| 11 | yet been given a private key. In that case, we would defer the removal |
| 12 | of the peer until we're given a private key, since at that point we're |
| 13 | doing new static-static calculations which incur failures we can act on. |
| 14 | This meant, however, that we wound up removing peers rather late in the |
| 15 | configuration flow. |
| 16 | |
| 17 | Syzkaller points out that peer_remove calls flush_workqueue, which in |
| 18 | turn might then wait for sending a handshake initiation to complete. |
| 19 | Since handshake initiation needs the static identity lock, holding the |
| 20 | static identity lock while calling peer_remove can result in a rare |
| 21 | deadlock. We have precisely this case in this situation of late-stage |
| 22 | peer removal based on an invalid public key. We can't drop the lock when |
| 23 | removing, because then incoming handshakes might interact with a bogus |
| 24 | static-static calculation. |
| 25 | |
| 26 | While the band-aid patch for this would involve breaking up the peer |
| 27 | removal into two steps like wg_peer_remove_all does, in order to solve |
| 28 | the locking issue, there's actually a much more elegant way of fixing |
| 29 | this: |
| 30 | |
| 31 | If the static-static calculation succeeds with one private key, it |
| 32 | *must* succeed with all others, because all 32-byte strings map to valid |
| 33 | private keys, thanks to clamping. That means we can get rid of this |
| 34 | silly dance and locking headaches of removing peers late in the |
| 35 | configuration flow, and instead just reject them early on, regardless of |
| 36 | whether the device has yet been assigned a private key. For the case |
| 37 | where the device doesn't yet have a private key, we safely use zeros |
| 38 | just for the purposes of checking for low order points by way of |
| 39 | checking the output of the calculation. |
| 40 | |
| 41 | The following PoC will trigger the deadlock: |
| 42 | |
| 43 | ip link add wg0 type wireguard |
| 44 | ip addr add 10.0.0.1/24 dev wg0 |
| 45 | ip link set wg0 up |
| 46 | ping -f 10.0.0.2 & |
| 47 | while true; do |
| 48 | wg set wg0 private-key /dev/null peer AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= allowed-ips 10.0.0.0/24 endpoint 10.0.0.3:1234 |
| 49 | wg set wg0 private-key <(echo AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=) |
| 50 | done |
| 51 | |
| 52 | [ 0.949105] ====================================================== |
| 53 | [ 0.949550] WARNING: possible circular locking dependency detected |
| 54 | [ 0.950143] 5.5.0-debug+ #18 Not tainted |
| 55 | [ 0.950431] ------------------------------------------------------ |
| 56 | [ 0.950959] wg/89 is trying to acquire lock: |
| 57 | [ 0.951252] ffff8880333e2128 ((wq_completion)wg-kex-wg0){+.+.}, at: flush_workqueue+0xe3/0x12f0 |
| 58 | [ 0.951865] |
| 59 | [ 0.951865] but task is already holding lock: |
| 60 | [ 0.952280] ffff888032819bc0 (&wg->static_identity.lock){++++}, at: wg_set_device+0x95d/0xcc0 |
| 61 | [ 0.953011] |
| 62 | [ 0.953011] which lock already depends on the new lock. |
| 63 | [ 0.953011] |
| 64 | [ 0.953651] |
| 65 | [ 0.953651] the existing dependency chain (in reverse order) is: |
| 66 | [ 0.954292] |
| 67 | [ 0.954292] -> #2 (&wg->static_identity.lock){++++}: |
| 68 | [ 0.954804] lock_acquire+0x127/0x350 |
| 69 | [ 0.955133] down_read+0x83/0x410 |
| 70 | [ 0.955428] wg_noise_handshake_create_initiation+0x97/0x700 |
| 71 | [ 0.955885] wg_packet_send_handshake_initiation+0x13a/0x280 |
| 72 | [ 0.956401] wg_packet_handshake_send_worker+0x10/0x20 |
| 73 | [ 0.956841] process_one_work+0x806/0x1500 |
| 74 | [ 0.957167] worker_thread+0x8c/0xcb0 |
| 75 | [ 0.957549] kthread+0x2ee/0x3b0 |
| 76 | [ 0.957792] ret_from_fork+0x24/0x30 |
| 77 | [ 0.958234] |
| 78 | [ 0.958234] -> #1 ((work_completion)(&peer->transmit_handshake_work)){+.+.}: |
| 79 | [ 0.958808] lock_acquire+0x127/0x350 |
| 80 | [ 0.959075] process_one_work+0x7ab/0x1500 |
| 81 | [ 0.959369] worker_thread+0x8c/0xcb0 |
| 82 | [ 0.959639] kthread+0x2ee/0x3b0 |
| 83 | [ 0.959896] ret_from_fork+0x24/0x30 |
| 84 | [ 0.960346] |
| 85 | [ 0.960346] -> #0 ((wq_completion)wg-kex-wg0){+.+.}: |
| 86 | [ 0.960945] check_prev_add+0x167/0x1e20 |
| 87 | [ 0.961351] __lock_acquire+0x2012/0x3170 |
| 88 | [ 0.961725] lock_acquire+0x127/0x350 |
| 89 | [ 0.961990] flush_workqueue+0x106/0x12f0 |
| 90 | [ 0.962280] peer_remove_after_dead+0x160/0x220 |
| 91 | [ 0.962600] wg_set_device+0xa24/0xcc0 |
| 92 | [ 0.962994] genl_rcv_msg+0x52f/0xe90 |
| 93 | [ 0.963298] netlink_rcv_skb+0x111/0x320 |
| 94 | [ 0.963618] genl_rcv+0x1f/0x30 |
| 95 | [ 0.963853] netlink_unicast+0x3f6/0x610 |
| 96 | [ 0.964245] netlink_sendmsg+0x700/0xb80 |
| 97 | [ 0.964586] __sys_sendto+0x1dd/0x2c0 |
| 98 | [ 0.964854] __x64_sys_sendto+0xd8/0x1b0 |
| 99 | [ 0.965141] do_syscall_64+0x90/0xd9a |
| 100 | [ 0.965408] entry_SYSCALL_64_after_hwframe+0x49/0xbe |
| 101 | [ 0.965769] |
| 102 | [ 0.965769] other info that might help us debug this: |
| 103 | [ 0.965769] |
| 104 | [ 0.966337] Chain exists of: |
| 105 | [ 0.966337] (wq_completion)wg-kex-wg0 --> (work_completion)(&peer->transmit_handshake_work) --> &wg->static_identity.lock |
| 106 | [ 0.966337] |
| 107 | [ 0.967417] Possible unsafe locking scenario: |
| 108 | [ 0.967417] |
| 109 | [ 0.967836] CPU0 CPU1 |
| 110 | [ 0.968155] ---- ---- |
| 111 | [ 0.968497] lock(&wg->static_identity.lock); |
| 112 | [ 0.968779] lock((work_completion)(&peer->transmit_handshake_work)); |
| 113 | [ 0.969345] lock(&wg->static_identity.lock); |
| 114 | [ 0.969809] lock((wq_completion)wg-kex-wg0); |
| 115 | [ 0.970146] |
| 116 | [ 0.970146] *** DEADLOCK *** |
| 117 | [ 0.970146] |
| 118 | [ 0.970531] 5 locks held by wg/89: |
| 119 | [ 0.970908] #0: ffffffff827433c8 (cb_lock){++++}, at: genl_rcv+0x10/0x30 |
| 120 | [ 0.971400] #1: ffffffff82743480 (genl_mutex){+.+.}, at: genl_rcv_msg+0x642/0xe90 |
| 121 | [ 0.971924] #2: ffffffff827160c0 (rtnl_mutex){+.+.}, at: wg_set_device+0x9f/0xcc0 |
| 122 | [ 0.972488] #3: ffff888032819de0 (&wg->device_update_lock){+.+.}, at: wg_set_device+0xb0/0xcc0 |
| 123 | [ 0.973095] #4: ffff888032819bc0 (&wg->static_identity.lock){++++}, at: wg_set_device+0x95d/0xcc0 |
| 124 | [ 0.973653] |
| 125 | [ 0.973653] stack backtrace: |
| 126 | [ 0.973932] CPU: 1 PID: 89 Comm: wg Not tainted 5.5.0-debug+ #18 |
| 127 | [ 0.974476] Call Trace: |
| 128 | [ 0.974638] dump_stack+0x97/0xe0 |
| 129 | [ 0.974869] check_noncircular+0x312/0x3e0 |
| 130 | [ 0.975132] ? print_circular_bug+0x1f0/0x1f0 |
| 131 | [ 0.975410] ? __kernel_text_address+0x9/0x30 |
| 132 | [ 0.975727] ? unwind_get_return_address+0x51/0x90 |
| 133 | [ 0.976024] check_prev_add+0x167/0x1e20 |
| 134 | [ 0.976367] ? graph_lock+0x70/0x160 |
| 135 | [ 0.976682] __lock_acquire+0x2012/0x3170 |
| 136 | [ 0.976998] ? register_lock_class+0x1140/0x1140 |
| 137 | [ 0.977323] lock_acquire+0x127/0x350 |
| 138 | [ 0.977627] ? flush_workqueue+0xe3/0x12f0 |
| 139 | [ 0.977890] flush_workqueue+0x106/0x12f0 |
| 140 | [ 0.978147] ? flush_workqueue+0xe3/0x12f0 |
| 141 | [ 0.978410] ? find_held_lock+0x2c/0x110 |
| 142 | [ 0.978662] ? lock_downgrade+0x6e0/0x6e0 |
| 143 | [ 0.978919] ? queue_rcu_work+0x60/0x60 |
| 144 | [ 0.979166] ? netif_napi_del+0x151/0x3b0 |
| 145 | [ 0.979501] ? peer_remove_after_dead+0x160/0x220 |
| 146 | [ 0.979871] peer_remove_after_dead+0x160/0x220 |
| 147 | [ 0.980232] wg_set_device+0xa24/0xcc0 |
| 148 | [ 0.980516] ? deref_stack_reg+0x8e/0xc0 |
| 149 | [ 0.980801] ? set_peer+0xe10/0xe10 |
| 150 | [ 0.981040] ? __ww_mutex_check_waiters+0x150/0x150 |
| 151 | [ 0.981430] ? __nla_validate_parse+0x163/0x270 |
| 152 | [ 0.981719] ? genl_family_rcv_msg_attrs_parse+0x13f/0x310 |
| 153 | [ 0.982078] genl_rcv_msg+0x52f/0xe90 |
| 154 | [ 0.982348] ? genl_family_rcv_msg_attrs_parse+0x310/0x310 |
| 155 | [ 0.982690] ? register_lock_class+0x1140/0x1140 |
| 156 | [ 0.983049] netlink_rcv_skb+0x111/0x320 |
| 157 | [ 0.983298] ? genl_family_rcv_msg_attrs_parse+0x310/0x310 |
| 158 | [ 0.983645] ? netlink_ack+0x880/0x880 |
| 159 | [ 0.983888] genl_rcv+0x1f/0x30 |
| 160 | [ 0.984168] netlink_unicast+0x3f6/0x610 |
| 161 | [ 0.984443] ? netlink_detachskb+0x60/0x60 |
| 162 | [ 0.984729] ? find_held_lock+0x2c/0x110 |
| 163 | [ 0.984976] netlink_sendmsg+0x700/0xb80 |
| 164 | [ 0.985220] ? netlink_broadcast_filtered+0xa60/0xa60 |
| 165 | [ 0.985533] __sys_sendto+0x1dd/0x2c0 |
| 166 | [ 0.985763] ? __x64_sys_getpeername+0xb0/0xb0 |
| 167 | [ 0.986039] ? sockfd_lookup_light+0x17/0x160 |
| 168 | [ 0.986397] ? __sys_recvmsg+0x8c/0xf0 |
| 169 | [ 0.986711] ? __sys_recvmsg_sock+0xd0/0xd0 |
| 170 | [ 0.987018] __x64_sys_sendto+0xd8/0x1b0 |
| 171 | [ 0.987283] ? lockdep_hardirqs_on+0x39b/0x5a0 |
| 172 | [ 0.987666] do_syscall_64+0x90/0xd9a |
| 173 | [ 0.987903] entry_SYSCALL_64_after_hwframe+0x49/0xbe |
| 174 | [ 0.988223] RIP: 0033:0x7fe77c12003e |
| 175 | [ 0.988508] Code: c3 8b 07 85 c0 75 24 49 89 fb 48 89 f0 48 89 d7 48 89 ce 4c 89 c2 4d 89 ca 4c 8b 44 24 08 4c 8b 4c 24 10 4c 4 |
| 176 | [ 0.989666] RSP: 002b:00007fffada2ed58 EFLAGS: 00000246 ORIG_RAX: 000000000000002c |
| 177 | [ 0.990137] RAX: ffffffffffffffda RBX: 00007fe77c159d48 RCX: 00007fe77c12003e |
| 178 | [ 0.990583] RDX: 0000000000000040 RSI: 000055fd1d38e020 RDI: 0000000000000004 |
| 179 | [ 0.991091] RBP: 000055fd1d38e020 R08: 000055fd1cb63358 R09: 000000000000000c |
| 180 | [ 0.991568] R10: 0000000000000000 R11: 0000000000000246 R12: 000000000000002c |
| 181 | [ 0.992014] R13: 0000000000000004 R14: 000055fd1d38e020 R15: 0000000000000001 |
| 182 | |
| 183 | Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| 184 | Reported-by: syzbot <syzkaller@googlegroups.com> |
| 185 | Signed-off-by: David S. Miller <davem@davemloft.net> |
| 186 | Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| 187 | --- |
| 188 | drivers/net/wireguard/netlink.c | 6 ++---- |
| 189 | drivers/net/wireguard/noise.c | 10 +++++++--- |
| 190 | 2 files changed, 9 insertions(+), 7 deletions(-) |
| 191 | |
| 192 | --- a/drivers/net/wireguard/netlink.c |
| 193 | +++ b/drivers/net/wireguard/netlink.c |
| 194 | @@ -575,10 +575,8 @@ static int wg_set_device(struct sk_buff |
| 195 | private_key); |
| 196 | list_for_each_entry_safe(peer, temp, &wg->peer_list, |
| 197 | peer_list) { |
| 198 | - if (wg_noise_precompute_static_static(peer)) |
| 199 | - wg_noise_expire_current_peer_keypairs(peer); |
| 200 | - else |
| 201 | - wg_peer_remove(peer); |
| 202 | + BUG_ON(!wg_noise_precompute_static_static(peer)); |
| 203 | + wg_noise_expire_current_peer_keypairs(peer); |
| 204 | } |
| 205 | wg_cookie_checker_precompute_device_keys(&wg->cookie_checker); |
| 206 | up_write(&wg->static_identity.lock); |
| 207 | --- a/drivers/net/wireguard/noise.c |
| 208 | +++ b/drivers/net/wireguard/noise.c |
| 209 | @@ -46,17 +46,21 @@ void __init wg_noise_init(void) |
| 210 | /* Must hold peer->handshake.static_identity->lock */ |
| 211 | bool wg_noise_precompute_static_static(struct wg_peer *peer) |
| 212 | { |
| 213 | - bool ret = true; |
| 214 | + bool ret; |
| 215 | |
| 216 | down_write(&peer->handshake.lock); |
| 217 | - if (peer->handshake.static_identity->has_identity) |
| 218 | + if (peer->handshake.static_identity->has_identity) { |
| 219 | ret = curve25519( |
| 220 | peer->handshake.precomputed_static_static, |
| 221 | peer->handshake.static_identity->static_private, |
| 222 | peer->handshake.remote_static); |
| 223 | - else |
| 224 | + } else { |
| 225 | + u8 empty[NOISE_PUBLIC_KEY_LEN] = { 0 }; |
| 226 | + |
| 227 | + ret = curve25519(empty, empty, peer->handshake.remote_static); |
| 228 | memset(peer->handshake.precomputed_static_static, 0, |
| 229 | NOISE_PUBLIC_KEY_LEN); |
| 230 | + } |
| 231 | up_write(&peer->handshake.lock); |
| 232 | return ret; |
| 233 | } |