From 263e463358d38c0f594a158791005054b2522626 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 26 Aug 2020 19:48:10 +0300 Subject: [PATCH 1/9] ipv4: Silence suspicious RCU usage warning [ Upstream commit 7f6f32bb7d3355cd78ebf1dece9a6ea7a0ca8158 ] fib_info_notify_update() is always called with RTNL held, but not from an RCU read-side critical section. This leads to the following warning [1] when the FIB table list is traversed with hlist_for_each_entry_rcu(), but without a proper lockdep expression. Since modification of the list is protected by RTNL, silence the warning by adding a lockdep expression which verifies RTNL is held. [1] ============================= WARNING: suspicious RCU usage 5.9.0-rc1-custom-14233-g2f26e122d62f #129 Not tainted ----------------------------- net/ipv4/fib_trie.c:2124 RCU-list traversed in non-reader section!! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 1 lock held by ip/834: #0: ffffffff85a3b6b0 (rtnl_mutex){+.+.}-{3:3}, at: rtnetlink_rcv_msg+0x49a/0xbd0 stack backtrace: CPU: 0 PID: 834 Comm: ip Not tainted 5.9.0-rc1-custom-14233-g2f26e122d62f #129 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-2.fc32 04/01/2014 Call Trace: dump_stack+0x100/0x184 lockdep_rcu_suspicious+0x143/0x14d fib_info_notify_update+0x8d1/0xa60 __nexthop_replace_notify+0xd2/0x290 rtm_new_nexthop+0x35e2/0x5946 rtnetlink_rcv_msg+0x4f7/0xbd0 netlink_rcv_skb+0x17a/0x480 rtnetlink_rcv+0x22/0x30 netlink_unicast+0x5ae/0x890 netlink_sendmsg+0x98a/0xf40 ____sys_sendmsg+0x879/0xa00 ___sys_sendmsg+0x122/0x190 __sys_sendmsg+0x103/0x1d0 __x64_sys_sendmsg+0x7d/0xb0 do_syscall_64+0x32/0x50 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7fde28c3be57 Code: 0c 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 89 54 24 1c 48 89 74 24 10 RSP: 002b:00007ffc09330028 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fde28c3be57 RDX: 0000000000000000 RSI: 00007ffc09330090 RDI: 0000000000000003 RBP: 000000005f45f911 R08: 0000000000000001 R09: 00007ffc0933012c R10: 0000000000000076 R11: 0000000000000246 R12: 0000000000000001 R13: 00007ffc09330290 R14: 00007ffc09330eee R15: 00005610e48ed020 Fixes: 1bff1a0c9bbd ("ipv4: Add function to send route updates") Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/fib_trie.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 47b6d73d30e55a..51673d00bbeac2 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2010,7 +2010,8 @@ void fib_info_notify_update(struct net *net, struct nl_info *info) struct hlist_head *head = &net->ipv4.fib_table_hash[h]; struct fib_table *tb; - hlist_for_each_entry_rcu(tb, head, tb_hlist) + hlist_for_each_entry_rcu(tb, head, tb_hlist, + lockdep_rtnl_is_held()) __fib_info_notify_update(net, tb, info); } } From 3f73dbf94f8fbe006f252bed2093354f55055387 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 2 Sep 2020 16:16:59 +0300 Subject: [PATCH 2/9] ipv6: Fix sysctl max for fib_multipath_hash_policy [ Upstream commit 05d4487197b2b71d5363623c28924fd58c71c0b6 ] Cited commit added the possible value of '2', but it cannot be set. Fix it by adjusting the maximum value to '2'. This is consistent with the corresponding IPv4 sysctl. Before: # sysctl -w net.ipv6.fib_multipath_hash_policy=2 sysctl: setting key "net.ipv6.fib_multipath_hash_policy": Invalid argument net.ipv6.fib_multipath_hash_policy = 2 # sysctl net.ipv6.fib_multipath_hash_policy net.ipv6.fib_multipath_hash_policy = 0 After: # sysctl -w net.ipv6.fib_multipath_hash_policy=2 net.ipv6.fib_multipath_hash_policy = 2 # sysctl net.ipv6.fib_multipath_hash_policy net.ipv6.fib_multipath_hash_policy = 2 Fixes: d8f74f0975d8 ("ipv6: Support multipath hashing on inner IP pkts") Signed-off-by: Ido Schimmel Reviewed-by: Stephen Suryaputra Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/sysctl_net_ipv6.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index ec8fcfc60a27ba..73842054bfe68c 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -21,6 +21,7 @@ #include #endif +static int two = 2; static int flowlabel_reflect_max = 0x7; static int auto_flowlabels_min; static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX; @@ -151,7 +152,7 @@ static struct ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_rt6_multipath_hash_policy, .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, + .extra2 = &two, }, { .procname = "seg6_flowlabel", From 346fefa823259c86eaef64102796f1be284ac41a Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 21 Aug 2020 16:34:52 -0400 Subject: [PATCH 3/9] netlabel: fix problems with mapping removal [ Upstream commit d3b990b7f327e2afa98006e7666fb8ada8ed8683 ] This patch fixes two main problems seen when removing NetLabel mappings: memory leaks and potentially extra audit noise. The memory leaks are caused by not properly free'ing the mapping's address selector struct when free'ing the entire entry as well as not properly cleaning up a temporary mapping entry when adding new address selectors to an existing entry. This patch fixes both these problems such that kmemleak reports no NetLabel associated leaks after running the SELinux test suite. The potentially extra audit noise was caused by the auditing code in netlbl_domhsh_remove_entry() being called regardless of the entry's validity. If another thread had already marked the entry as invalid, but not removed/free'd it from the list of mappings, then it was possible that an additional mapping removal audit record would be generated. This patch fixes this by returning early from the removal function when the entry was previously marked invalid. This change also had the side benefit of improving the code by decreasing the indentation level of large chunk of code by one (accounting for most of the diffstat). Fixes: 63c416887437 ("netlabel: Add network address selectors to the NetLabel/LSM domain mapping") Reported-by: Stephen Smalley Signed-off-by: Paul Moore Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netlabel/netlabel_domainhash.c | 59 +++++++++++++++--------------- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index f5d34da0646eda..12aa803b2f6892 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -85,6 +85,7 @@ static void netlbl_domhsh_free_entry(struct rcu_head *entry) kfree(netlbl_domhsh_addr6_entry(iter6)); } #endif /* IPv6 */ + kfree(ptr->def.addrsel); } kfree(ptr->domain); kfree(ptr); @@ -536,6 +537,8 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, goto add_return; } #endif /* IPv6 */ + /* cleanup the new entry since we've moved everything over */ + netlbl_domhsh_free_entry(&entry->rcu); } else ret_val = -EINVAL; @@ -579,6 +582,12 @@ int netlbl_domhsh_remove_entry(struct netlbl_dom_map *entry, { int ret_val = 0; struct audit_buffer *audit_buf; + struct netlbl_af4list *iter4; + struct netlbl_domaddr4_map *map4; +#if IS_ENABLED(CONFIG_IPV6) + struct netlbl_af6list *iter6; + struct netlbl_domaddr6_map *map6; +#endif /* IPv6 */ if (entry == NULL) return -ENOENT; @@ -596,6 +605,9 @@ int netlbl_domhsh_remove_entry(struct netlbl_dom_map *entry, ret_val = -ENOENT; spin_unlock(&netlbl_domhsh_lock); + if (ret_val) + return ret_val; + audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_DEL, audit_info); if (audit_buf != NULL) { audit_log_format(audit_buf, @@ -605,40 +617,29 @@ int netlbl_domhsh_remove_entry(struct netlbl_dom_map *entry, audit_log_end(audit_buf); } - if (ret_val == 0) { - struct netlbl_af4list *iter4; - struct netlbl_domaddr4_map *map4; -#if IS_ENABLED(CONFIG_IPV6) - struct netlbl_af6list *iter6; - struct netlbl_domaddr6_map *map6; -#endif /* IPv6 */ - - switch (entry->def.type) { - case NETLBL_NLTYPE_ADDRSELECT: - netlbl_af4list_foreach_rcu(iter4, - &entry->def.addrsel->list4) { - map4 = netlbl_domhsh_addr4_entry(iter4); - cipso_v4_doi_putdef(map4->def.cipso); - } + switch (entry->def.type) { + case NETLBL_NLTYPE_ADDRSELECT: + netlbl_af4list_foreach_rcu(iter4, &entry->def.addrsel->list4) { + map4 = netlbl_domhsh_addr4_entry(iter4); + cipso_v4_doi_putdef(map4->def.cipso); + } #if IS_ENABLED(CONFIG_IPV6) - netlbl_af6list_foreach_rcu(iter6, - &entry->def.addrsel->list6) { - map6 = netlbl_domhsh_addr6_entry(iter6); - calipso_doi_putdef(map6->def.calipso); - } + netlbl_af6list_foreach_rcu(iter6, &entry->def.addrsel->list6) { + map6 = netlbl_domhsh_addr6_entry(iter6); + calipso_doi_putdef(map6->def.calipso); + } #endif /* IPv6 */ - break; - case NETLBL_NLTYPE_CIPSOV4: - cipso_v4_doi_putdef(entry->def.cipso); - break; + break; + case NETLBL_NLTYPE_CIPSOV4: + cipso_v4_doi_putdef(entry->def.cipso); + break; #if IS_ENABLED(CONFIG_IPV6) - case NETLBL_NLTYPE_CALIPSO: - calipso_doi_putdef(entry->def.calipso); - break; + case NETLBL_NLTYPE_CALIPSO: + calipso_doi_putdef(entry->def.calipso); + break; #endif /* IPv6 */ - } - call_rcu(&entry->rcu, netlbl_domhsh_free_entry); } + call_rcu(&entry->rcu, netlbl_domhsh_free_entry); return ret_val; } From 8b61bb0b2d57b58f59eb218c049f2f4c2a8bf7a7 Mon Sep 17 00:00:00 2001 From: Kamil Lorenc Date: Tue, 1 Sep 2020 10:57:38 +0200 Subject: [PATCH 4/9] net: usb: dm9601: Add USB ID of Keenetic Plus DSL [ Upstream commit a609d0259183a841621f252e067f40f8cc25d6f6 ] Keenetic Plus DSL is a xDSL modem that uses dm9620 as its USB interface. Signed-off-by: Kamil Lorenc Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/dm9601.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c index b91f92e4e5f22d..915ac75b55fc7c 100644 --- a/drivers/net/usb/dm9601.c +++ b/drivers/net/usb/dm9601.c @@ -625,6 +625,10 @@ static const struct usb_device_id products[] = { USB_DEVICE(0x0a46, 0x1269), /* DM9621A USB to Fast Ethernet Adapter */ .driver_info = (unsigned long)&dm9601_info, }, + { + USB_DEVICE(0x0586, 0x3427), /* ZyXEL Keenetic Plus DSL xDSL modem */ + .driver_info = (unsigned long)&dm9601_info, + }, {}, // END }; From 20f8c874789a80fbc1ab44451274bbde52bfa4b8 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 21 Aug 2020 14:59:38 +0800 Subject: [PATCH 5/9] sctp: not disable bh in the whole sctp_get_port_local() [ Upstream commit 3106ecb43a05dc3e009779764b9da245a5d082de ] With disabling bh in the whole sctp_get_port_local(), when snum == 0 and too many ports have been used, the do-while loop will take the cpu for a long time and cause cpu stuck: [ ] watchdog: BUG: soft lockup - CPU#11 stuck for 22s! [ ] RIP: 0010:native_queued_spin_lock_slowpath+0x4de/0x940 [ ] Call Trace: [ ] _raw_spin_lock+0xc1/0xd0 [ ] sctp_get_port_local+0x527/0x650 [sctp] [ ] sctp_do_bind+0x208/0x5e0 [sctp] [ ] sctp_autobind+0x165/0x1e0 [sctp] [ ] sctp_connect_new_asoc+0x355/0x480 [sctp] [ ] __sctp_connect+0x360/0xb10 [sctp] There's no need to disable bh in the whole function of sctp_get_port_local. So fix this cpu stuck by removing local_bh_disable() called at the beginning, and using spin_lock_bh() instead. The same thing was actually done for inet_csk_get_port() in Commit ea8add2b1903 ("tcp/dccp: better use of ephemeral ports in bind()"). Thanks to Marcelo for pointing the buggy code out. v1->v2: - use cond_resched() to yield cpu to other tasks if needed, as Eric noticed. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Ying Xu Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/socket.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 58fe6556cdf5bb..3a11212bb4c0e5 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -8176,8 +8176,6 @@ static int sctp_get_port_local(struct sock *sk, union sctp_addr *addr) pr_debug("%s: begins, snum:%d\n", __func__, snum); - local_bh_disable(); - if (snum == 0) { /* Search for an available port. */ int low, high, remaining, index; @@ -8196,20 +8194,21 @@ static int sctp_get_port_local(struct sock *sk, union sctp_addr *addr) continue; index = sctp_phashfn(sock_net(sk), rover); head = &sctp_port_hashtable[index]; - spin_lock(&head->lock); + spin_lock_bh(&head->lock); sctp_for_each_hentry(pp, &head->chain) if ((pp->port == rover) && net_eq(sock_net(sk), pp->net)) goto next; break; next: - spin_unlock(&head->lock); + spin_unlock_bh(&head->lock); + cond_resched(); } while (--remaining > 0); /* Exhausted local port range during search? */ ret = 1; if (remaining <= 0) - goto fail; + return ret; /* OK, here is the one we will use. HEAD (the port * hash table list entry) is non-NULL and we hold it's @@ -8224,7 +8223,7 @@ static int sctp_get_port_local(struct sock *sk, union sctp_addr *addr) * port iterator, pp being NULL. */ head = &sctp_port_hashtable[sctp_phashfn(sock_net(sk), snum)]; - spin_lock(&head->lock); + spin_lock_bh(&head->lock); sctp_for_each_hentry(pp, &head->chain) { if ((pp->port == snum) && net_eq(pp->net, sock_net(sk))) goto pp_found; @@ -8324,10 +8323,7 @@ static int sctp_get_port_local(struct sock *sk, union sctp_addr *addr) ret = 0; fail_unlock: - spin_unlock(&head->lock); - -fail: - local_bh_enable(); + spin_unlock_bh(&head->lock); return ret; } From b95eb482c288c4f2ca198b62a272038429a0c521 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Tue, 25 Aug 2020 10:44:04 -0700 Subject: [PATCH 6/9] taprio: Fix using wrong queues in gate mask [ Upstream commit 09e31cf0c528dac3358a081dc4e773d1b3de1bc9 ] Since commit 9c66d1564676 ("taprio: Add support for hardware offloading") there's a bit of inconsistency when offloading schedules to the hardware: In software mode, the gate masks are specified in terms of traffic classes, so if say "sched-entry S 03 20000", it means that the traffic classes 0 and 1 are open for 20us; when taprio is offloaded to hardware, the gate masks are specified in terms of hardware queues. The idea here is to fix hardware offloading, so schedules in hardware and software mode have the same behavior. What's needed to do is to map traffic classes to queues when applying the offload to the driver. Fixes: 9c66d1564676 ("taprio: Add support for hardware offloading") Signed-off-by: Vinicius Costa Gomes Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_taprio.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index b1eb12d33b9a6c..6a5086e586efb3 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1177,9 +1177,27 @@ static void taprio_offload_config_changed(struct taprio_sched *q) spin_unlock(&q->current_entry_lock); } -static void taprio_sched_to_offload(struct taprio_sched *q, +static u32 tc_map_to_queue_mask(struct net_device *dev, u32 tc_mask) +{ + u32 i, queue_mask = 0; + + for (i = 0; i < dev->num_tc; i++) { + u32 offset, count; + + if (!(tc_mask & BIT(i))) + continue; + + offset = dev->tc_to_txq[i].offset; + count = dev->tc_to_txq[i].count; + + queue_mask |= GENMASK(offset + count - 1, offset); + } + + return queue_mask; +} + +static void taprio_sched_to_offload(struct net_device *dev, struct sched_gate_list *sched, - const struct tc_mqprio_qopt *mqprio, struct tc_taprio_qopt_offload *offload) { struct sched_entry *entry; @@ -1194,7 +1212,8 @@ static void taprio_sched_to_offload(struct taprio_sched *q, e->command = entry->command; e->interval = entry->interval; - e->gate_mask = entry->gate_mask; + e->gate_mask = tc_map_to_queue_mask(dev, entry->gate_mask); + i++; } @@ -1202,7 +1221,6 @@ static void taprio_sched_to_offload(struct taprio_sched *q, } static int taprio_enable_offload(struct net_device *dev, - struct tc_mqprio_qopt *mqprio, struct taprio_sched *q, struct sched_gate_list *sched, struct netlink_ext_ack *extack) @@ -1224,7 +1242,7 @@ static int taprio_enable_offload(struct net_device *dev, return -ENOMEM; } offload->enable = 1; - taprio_sched_to_offload(q, sched, mqprio, offload); + taprio_sched_to_offload(dev, sched, offload); err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload); if (err < 0) { @@ -1486,7 +1504,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, } if (FULL_OFFLOAD_IS_ENABLED(q->flags)) - err = taprio_enable_offload(dev, mqprio, q, new_admin, extack); + err = taprio_enable_offload(dev, q, new_admin, extack); else err = taprio_disable_offload(dev, q, extack); if (err) From 09c45065257b5b194e08a1b4749daeebb961675e Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 2 Sep 2020 22:44:16 +0900 Subject: [PATCH 7/9] tipc: fix shutdown() of connectionless socket [ Upstream commit 2a63866c8b51a3f72cea388dfac259d0e14c4ba6 ] syzbot is reporting hung task at nbd_ioctl() [1], for there are two problems regarding TIPC's connectionless socket's shutdown() operation. ---------- #include #include #include #include #include int main(int argc, char *argv[]) { const int fd = open("/dev/nbd0", 3); alarm(5); ioctl(fd, NBD_SET_SOCK, socket(PF_TIPC, SOCK_DGRAM, 0)); ioctl(fd, NBD_DO_IT, 0); /* To be interrupted by SIGALRM. */ return 0; } ---------- One problem is that wait_for_completion() from flush_workqueue() from nbd_start_device_ioctl() from nbd_ioctl() cannot be completed when nbd_start_device_ioctl() received a signal at wait_event_interruptible(), for tipc_shutdown() from kernel_sock_shutdown(SHUT_RDWR) from nbd_mark_nsock_dead() from sock_shutdown() from nbd_start_device_ioctl() is failing to wake up a WQ thread sleeping at wait_woken() from tipc_wait_for_rcvmsg() from sock_recvmsg() from sock_xmit() from nbd_read_stat() from recv_work() scheduled by nbd_start_device() from nbd_start_device_ioctl(). Fix this problem by always invoking sk->sk_state_change() (like inet_shutdown() does) when tipc_shutdown() is called. The other problem is that tipc_wait_for_rcvmsg() cannot return when tipc_shutdown() is called, for tipc_shutdown() sets sk->sk_shutdown to SEND_SHUTDOWN (despite "how" is SHUT_RDWR) while tipc_wait_for_rcvmsg() needs sk->sk_shutdown set to RCV_SHUTDOWN or SHUTDOWN_MASK. Fix this problem by setting sk->sk_shutdown to SHUTDOWN_MASK (like inet_shutdown() does) when the socket is connectionless. [1] https://syzkaller.appspot.com/bug?id=3fe51d307c1f0a845485cf1798aa059d12bf18b2 Reported-by: syzbot Signed-off-by: Tetsuo Handa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/socket.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index aea951a1f805b5..5318bb6611abc2 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2616,18 +2616,21 @@ static int tipc_shutdown(struct socket *sock, int how) trace_tipc_sk_shutdown(sk, NULL, TIPC_DUMP_ALL, " "); __tipc_shutdown(sock, TIPC_CONN_SHUTDOWN); - sk->sk_shutdown = SEND_SHUTDOWN; + if (tipc_sk_type_connectionless(sk)) + sk->sk_shutdown = SHUTDOWN_MASK; + else + sk->sk_shutdown = SEND_SHUTDOWN; if (sk->sk_state == TIPC_DISCONNECTING) { /* Discard any unreceived messages */ __skb_queue_purge(&sk->sk_receive_queue); - /* Wake up anyone sleeping in poll */ - sk->sk_state_change(sk); res = 0; } else { res = -ENOTCONN; } + /* Wake up anyone sleeping in poll. */ + sk->sk_state_change(sk); release_sock(sk); return res; From ddb279d64b724b197dec829c016b5d386314b970 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 26 Aug 2020 12:40:06 -0700 Subject: [PATCH 8/9] net: disable netpoll on fresh napis [ Upstream commit 96e97bc07e90f175a8980a22827faf702ca4cb30 ] napi_disable() makes sure to set the NAPI_STATE_NPSVC bit to prevent netpoll from accessing rings before init is complete. However, the same is not done for fresh napi instances in netif_napi_add(), even though we expect NAPI instances to be added as disabled. This causes crashes during driver reconfiguration (enabling XDP, changing the channel count) - if there is any printk() after netif_napi_add() but before napi_enable(). To ensure memory ordering is correct we need to use RCU accessors. Reported-by: Rob Sherwood Fixes: 2d8bff12699a ("netpoll: Close race condition between poll_one_napi and napi_disable") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 3 ++- net/core/netpoll.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 56cd7b83a38293..cdc1c3a144e1f1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6231,12 +6231,13 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi, netdev_err_once(dev, "%s() called with weight %d\n", __func__, weight); napi->weight = weight; - list_add(&napi->dev_list, &dev->napi_list); napi->dev = dev; #ifdef CONFIG_NETPOLL napi->poll_owner = -1; #endif set_bit(NAPI_STATE_SCHED, &napi->state); + set_bit(NAPI_STATE_NPSVC, &napi->state); + list_add_rcu(&napi->dev_list, &dev->napi_list); napi_hash_add(napi); } EXPORT_SYMBOL(netif_napi_add); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 849380a622ef9d..cb67d36f3adb03 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -161,7 +161,7 @@ static void poll_napi(struct net_device *dev) struct napi_struct *napi; int cpu = smp_processor_id(); - list_for_each_entry(napi, &dev->napi_list, dev_list) { + list_for_each_entry_rcu(napi, &dev->napi_list, dev_list) { if (cmpxchg(&napi->poll_owner, -1, cpu) == -1) { poll_one_napi(napi); smp_store_release(&napi->poll_owner, -1); From 6c3d34dea2fc9b02c4419af4e368e8b73f566c88 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 12 Sep 2020 14:18:56 +0200 Subject: [PATCH 9/9] Linux 5.4.65 Tested-by: Jon Hunter Tested-by: Shuah Khan Tested-by: Guenter Roeck Tested-by: Linux Kernel Functional Testing Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7bdfb21bb92692..4cb68164b79eed 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 4 -SUBLEVEL = 64 +SUBLEVEL = 65 EXTRAVERSION = NAME = Kleptomaniac Octopus