From 2b92079db1888ed6b5fb2d0af08da86f993afcc6 Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:56:46 -0400 Subject: [PATCH 01/53] security/keys: fix slab-out-of-bounds in key_task_permission jira LE-3467 cve CVE-2024-50301 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Chen Ridong commit 4a74da044ec9ec8679e6beccc4306b936b62873f KASAN reports an out of bounds read: BUG: KASAN: slab-out-of-bounds in __kuid_val include/linux/uidgid.h:36 BUG: KASAN: slab-out-of-bounds in uid_eq include/linux/uidgid.h:63 [inline] BUG: KASAN: slab-out-of-bounds in key_task_permission+0x394/0x410 security/keys/permission.c:54 Read of size 4 at addr ffff88813c3ab618 by task stress-ng/4362 CPU: 2 PID: 4362 Comm: stress-ng Not tainted 5.10.0-14930-gafbffd6c3ede #15 Call Trace: __dump_stack lib/dump_stack.c:82 [inline] dump_stack+0x107/0x167 lib/dump_stack.c:123 print_address_description.constprop.0+0x19/0x170 mm/kasan/report.c:400 __kasan_report.cold+0x6c/0x84 mm/kasan/report.c:560 kasan_report+0x3a/0x50 mm/kasan/report.c:585 __kuid_val include/linux/uidgid.h:36 [inline] uid_eq include/linux/uidgid.h:63 [inline] key_task_permission+0x394/0x410 security/keys/permission.c:54 search_nested_keyrings+0x90e/0xe90 security/keys/keyring.c:793 This issue was also reported by syzbot. It can be reproduced by following these steps(more details [1]): 1. Obtain more than 32 inputs that have similar hashes, which ends with the pattern '0xxxxxxxe6'. 2. Reboot and add the keys obtained in step 1. The reproducer demonstrates how this issue happened: 1. In the search_nested_keyrings function, when it iterates through the slots in a node(below tag ascend_to_node), if the slot pointer is meta and node->back_pointer != NULL(it means a root), it will proceed to descend_to_node. However, there is an exception. If node is the root, and one of the slots points to a shortcut, it will be treated as a keyring. 2. Whether the ptr is keyring decided by keyring_ptr_is_keyring function. However, KEYRING_PTR_SUBTYPE is 0x2UL, the same as ASSOC_ARRAY_PTR_SUBTYPE_MASK. 3. When 32 keys with the similar hashes are added to the tree, the ROOT has keys with hashes that are not similar (e.g. slot 0) and it splits NODE A without using a shortcut. When NODE A is filled with keys that all hashes are xxe6, the keys are similar, NODE A will split with a shortcut. Finally, it forms the tree as shown below, where slot 6 points to a shortcut. NODE A +------>+---+ ROOT | | 0 | xxe6 +---+ | +---+ xxxx | 0 | shortcut : : xxe6 +---+ | +---+ xxe6 : : | | | xxe6 +---+ | +---+ | 6 |---+ : : xxe6 +---+ +---+ xxe6 : : | f | xxe6 +---+ +---+ xxe6 | f | +---+ 4. As mentioned above, If a slot(slot 6) of the root points to a shortcut, it may be mistakenly transferred to a key*, leading to a read out-of-bounds read. To fix this issue, one should jump to descend_to_node if the ptr is a shortcut, regardless of whether the node is root or not. [1] https://lore.kernel.org/linux-kernel/1cfa878e-8c7b-4570-8606-21daf5e13ce7@huaweicloud.com/ [jarkko: tweaked the commit message a bit to have an appropriate closes tag.] Fixes: b2a4df200d57 ("KEYS: Expand the capacity of a keyring") Reported-by: syzbot+5b415c07907a2990d1a3@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/000000000000cbb7860611f61147@google.com/T/ Signed-off-by: Chen Ridong Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen (cherry picked from commit 4a74da044ec9ec8679e6beccc4306b936b62873f) Signed-off-by: Jonathan Maple --- security/keys/keyring.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/security/keys/keyring.c b/security/keys/keyring.c index 408bff68e4e97..565f5d41058b2 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -742,8 +742,11 @@ static bool search_nested_keyrings(struct key *keyring, for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { ptr = READ_ONCE(node->slots[slot]); - if (assoc_array_ptr_is_meta(ptr) && node->back_pointer) - goto descend_to_node; + if (assoc_array_ptr_is_meta(ptr)) { + if (node->back_pointer || + assoc_array_ptr_is_shortcut(ptr)) + goto descend_to_node; + } if (!keyring_ptr_is_keyring(ptr)) continue; From 949da0e7e5ea93e8187af71c5371bec30df802a8 Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:56:47 -0400 Subject: [PATCH 02/53] cifs: fix double free race when mount fails in cifs_get_root() jira LE-3467 cve CVE-2022-48919 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Ronnie Sahlberg commit 3d6cc9898efdfb062efb74dc18cfc700e082f5d5 When cifs_get_root() fails during cifs_smb3_do_mount() we call deactivate_locked_super() which eventually will call delayed_free() which will free the context. In this situation we should not proceed to enter the out: section in cifs_smb3_do_mount() and free the same resources a second time. [Thu Feb 10 12:59:06 2022] BUG: KASAN: use-after-free in rcu_cblist_dequeue+0x32/0x60 [Thu Feb 10 12:59:06 2022] Read of size 8 at addr ffff888364f4d110 by task swapper/1/0 [Thu Feb 10 12:59:06 2022] CPU: 1 PID: 0 Comm: swapper/1 Tainted: G OE 5.17.0-rc3+ #4 [Thu Feb 10 12:59:06 2022] Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS Hyper-V UEFI Release v4.0 12/17/2019 [Thu Feb 10 12:59:06 2022] Call Trace: [Thu Feb 10 12:59:06 2022] [Thu Feb 10 12:59:06 2022] dump_stack_lvl+0x5d/0x78 [Thu Feb 10 12:59:06 2022] print_address_description.constprop.0+0x24/0x150 [Thu Feb 10 12:59:06 2022] ? rcu_cblist_dequeue+0x32/0x60 [Thu Feb 10 12:59:06 2022] kasan_report.cold+0x7d/0x117 [Thu Feb 10 12:59:06 2022] ? rcu_cblist_dequeue+0x32/0x60 [Thu Feb 10 12:59:06 2022] __asan_load8+0x86/0xa0 [Thu Feb 10 12:59:06 2022] rcu_cblist_dequeue+0x32/0x60 [Thu Feb 10 12:59:06 2022] rcu_core+0x547/0xca0 [Thu Feb 10 12:59:06 2022] ? call_rcu+0x3c0/0x3c0 [Thu Feb 10 12:59:06 2022] ? __this_cpu_preempt_check+0x13/0x20 [Thu Feb 10 12:59:06 2022] ? lock_is_held_type+0xea/0x140 [Thu Feb 10 12:59:06 2022] rcu_core_si+0xe/0x10 [Thu Feb 10 12:59:06 2022] __do_softirq+0x1d4/0x67b [Thu Feb 10 12:59:06 2022] __irq_exit_rcu+0x100/0x150 [Thu Feb 10 12:59:06 2022] irq_exit_rcu+0xe/0x30 [Thu Feb 10 12:59:06 2022] sysvec_hyperv_stimer0+0x9d/0xc0 ... [Thu Feb 10 12:59:07 2022] Freed by task 58179: [Thu Feb 10 12:59:07 2022] kasan_save_stack+0x26/0x50 [Thu Feb 10 12:59:07 2022] kasan_set_track+0x25/0x30 [Thu Feb 10 12:59:07 2022] kasan_set_free_info+0x24/0x40 [Thu Feb 10 12:59:07 2022] ____kasan_slab_free+0x137/0x170 [Thu Feb 10 12:59:07 2022] __kasan_slab_free+0x12/0x20 [Thu Feb 10 12:59:07 2022] slab_free_freelist_hook+0xb3/0x1d0 [Thu Feb 10 12:59:07 2022] kfree+0xcd/0x520 [Thu Feb 10 12:59:07 2022] cifs_smb3_do_mount+0x149/0xbe0 [cifs] [Thu Feb 10 12:59:07 2022] smb3_get_tree+0x1a0/0x2e0 [cifs] [Thu Feb 10 12:59:07 2022] vfs_get_tree+0x52/0x140 [Thu Feb 10 12:59:07 2022] path_mount+0x635/0x10c0 [Thu Feb 10 12:59:07 2022] __x64_sys_mount+0x1bf/0x210 [Thu Feb 10 12:59:07 2022] do_syscall_64+0x5c/0xc0 [Thu Feb 10 12:59:07 2022] entry_SYSCALL_64_after_hwframe+0x44/0xae [Thu Feb 10 12:59:07 2022] Last potentially related work creation: [Thu Feb 10 12:59:07 2022] kasan_save_stack+0x26/0x50 [Thu Feb 10 12:59:07 2022] __kasan_record_aux_stack+0xb6/0xc0 [Thu Feb 10 12:59:07 2022] kasan_record_aux_stack_noalloc+0xb/0x10 [Thu Feb 10 12:59:07 2022] call_rcu+0x76/0x3c0 [Thu Feb 10 12:59:07 2022] cifs_umount+0xce/0xe0 [cifs] [Thu Feb 10 12:59:07 2022] cifs_kill_sb+0xc8/0xe0 [cifs] [Thu Feb 10 12:59:07 2022] deactivate_locked_super+0x5d/0xd0 [Thu Feb 10 12:59:07 2022] cifs_smb3_do_mount+0xab9/0xbe0 [cifs] [Thu Feb 10 12:59:07 2022] smb3_get_tree+0x1a0/0x2e0 [cifs] [Thu Feb 10 12:59:07 2022] vfs_get_tree+0x52/0x140 [Thu Feb 10 12:59:07 2022] path_mount+0x635/0x10c0 [Thu Feb 10 12:59:07 2022] __x64_sys_mount+0x1bf/0x210 [Thu Feb 10 12:59:07 2022] do_syscall_64+0x5c/0xc0 [Thu Feb 10 12:59:07 2022] entry_SYSCALL_64_after_hwframe+0x44/0xae Reported-by: Shyam Prasad N Reviewed-by: Shyam Prasad N Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French (cherry picked from commit 3d6cc9898efdfb062efb74dc18cfc700e082f5d5) Signed-off-by: Jonathan Maple --- fs/cifs/cifsfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index cf323b776d497..98d41705a094d 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -900,6 +900,7 @@ cifs_smb3_do_mount(struct file_system_type *fs_type, out_super: deactivate_locked_super(sb); + return root; out: if (cifs_sb) { if (!sb || IS_ERR(sb)) { /* otherwise kill_sb will handle */ From 6bd016b86f50a58099e001af182fa8e5a4f9ff41 Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:56:47 -0400 Subject: [PATCH 03/53] smb: client: fix warning in cifs_smb3_do_mount() jira LE-3467 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Paulo Alcantara commit 12c30f33cc6769bf411088a2872843c4f9ea32f9 This fixes the following warning reported by kernel test robot fs/smb/client/cifsfs.c:982 cifs_smb3_do_mount() warn: possible memory leak of 'cifs_sb' Link: https://lore.kernel.org/all/202306170124.CtQqzf0I-lkp@intel.com/ Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French (cherry picked from commit 12c30f33cc6769bf411088a2872843c4f9ea32f9) Signed-off-by: Jonathan Maple --- fs/cifs/cifsfs.c | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 98d41705a094d..f5278aec90bbc 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -809,11 +809,11 @@ struct dentry * cifs_smb3_do_mount(struct file_system_type *fs_type, int flags, struct smb3_fs_context *old_ctx) { - int rc; - struct super_block *sb = NULL; - struct cifs_sb_info *cifs_sb = NULL; struct cifs_mnt_data mnt_data; + struct cifs_sb_info *cifs_sb; + struct super_block *sb; struct dentry *root; + int rc; /* * Prints in Kernel / CIFS log the attempted mount operation @@ -824,11 +824,9 @@ cifs_smb3_do_mount(struct file_system_type *fs_type, else cifs_info("Attempting to mount %s\n", old_ctx->UNC); - cifs_sb = kzalloc(sizeof(struct cifs_sb_info), GFP_KERNEL); - if (cifs_sb == NULL) { - root = ERR_PTR(-ENOMEM); - goto out; - } + cifs_sb = kzalloc(sizeof(*cifs_sb), GFP_KERNEL); + if (!cifs_sb) + return ERR_PTR(-ENOMEM); cifs_sb->ctx = kzalloc(sizeof(struct smb3_fs_context), GFP_KERNEL); if (!cifs_sb->ctx) { @@ -871,10 +869,8 @@ cifs_smb3_do_mount(struct file_system_type *fs_type, sb = sget(fs_type, cifs_match_super, cifs_set_super, flags, &mnt_data); if (IS_ERR(sb)) { - root = ERR_CAST(sb); cifs_umount(cifs_sb); - cifs_sb = NULL; - goto out; + return ERR_CAST(sb); } if (sb->s_root) { @@ -902,13 +898,9 @@ cifs_smb3_do_mount(struct file_system_type *fs_type, deactivate_locked_super(sb); return root; out: - if (cifs_sb) { - if (!sb || IS_ERR(sb)) { /* otherwise kill_sb will handle */ - kfree(cifs_sb->prepath); - smb3_cleanup_fs_context(cifs_sb->ctx); - kfree(cifs_sb); - } - } + kfree(cifs_sb->prepath); + smb3_cleanup_fs_context(cifs_sb->ctx); + kfree(cifs_sb); return root; } From d8d6896b7a4f5d1cf87103363f812fcfd19c487d Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:56:47 -0400 Subject: [PATCH 04/53] s390/pci: rename lock member in struct zpci_dev jira LE-3467 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Gerd Bayer commit 0d48566d4b58946c8e1b0baac0347616060a81c9 Empty-Commit: Cherry-Pick Conflicts during history rebuild. Will be included in final tarball splat. Ref for failed cherry-pick at: ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/0d48566d.failed Since this guards only the Function Measurement Block, rename from generic lock to fmb_lock in preparation to introduce another lock that guards the state member Signed-off-by: Gerd Bayer Reviewed-by: Niklas Schnelle Signed-off-by: Heiko Carstens (cherry picked from commit 0d48566d4b58946c8e1b0baac0347616060a81c9) Signed-off-by: Jonathan Maple # Conflicts: # arch/s390/pci/pci.c --- .../0d48566d.failed | 129 ++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/0d48566d.failed diff --git a/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/0d48566d.failed b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/0d48566d.failed new file mode 100644 index 0000000000000..8e0e00557e07e --- /dev/null +++ b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/0d48566d.failed @@ -0,0 +1,129 @@ +s390/pci: rename lock member in struct zpci_dev + +jira LE-3467 +Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 +commit-author Gerd Bayer +commit 0d48566d4b58946c8e1b0baac0347616060a81c9 +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/0d48566d.failed + +Since this guards only the Function Measurement Block, rename from +generic lock to fmb_lock in preparation to introduce another lock +that guards the state member + + Signed-off-by: Gerd Bayer + Reviewed-by: Niklas Schnelle + Signed-off-by: Heiko Carstens +(cherry picked from commit 0d48566d4b58946c8e1b0baac0347616060a81c9) + Signed-off-by: Jonathan Maple + +# Conflicts: +# arch/s390/pci/pci.c +diff --cc arch/s390/pci/pci.c +index 661c4e000c5b,dff609e8a2a0..000000000000 +--- a/arch/s390/pci/pci.c ++++ b/arch/s390/pci/pci.c +@@@ -885,33 -805,10 +885,38 @@@ struct zpci_dev *zpci_create_device(u3 + goto error; + zdev->state = state; + +++<<<<<<< HEAD + + mutex_init(&zdev->lock); +++======= ++ kref_init(&zdev->kref); ++ mutex_init(&zdev->fmb_lock); +++>>>>>>> 0d48566d4b58 (s390/pci: rename lock member in struct zpci_dev) + mutex_init(&zdev->kzdev_lock); + + + return zdev; + + + +error: + + zpci_dbg(0, "crt fid:%x, rc:%d\n", fid, rc); + + kfree(zdev); + + return ERR_PTR(rc); + +} + + + +/** + + * zpci_add_device() - Add a previously created zPCI device to the zPCI subsystem + + * @zdev: The zPCI device to be added + + * + + * A struct zpci_dev is added to the zPCI subsystem and to a virtual PCI bus creating + + * a new one as necessary. A hotplug slot is created and events start to be handled. + + * If successful from this point on zpci_zdev_get() and zpci_zdev_put() must be used. + + * If adding the struct zpci_dev fails the device was not added and should be freed. + + * + + * Return: 0 on success, or an error code otherwise + + */ + +int zpci_add_device(struct zpci_dev *zdev) + +{ + + int rc; + + + + zpci_dbg(1, "add fid:%x, fh:%x, c:%d\n", zdev->fid, zdev->fh, zdev->state); + rc = zpci_init_iommu(zdev); + if (rc) + goto error; +diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h +index 6652630be19e..820b3fe33129 100644 +--- a/arch/s390/include/asm/pci.h ++++ b/arch/s390/include/asm/pci.h +@@ -146,7 +146,6 @@ struct zpci_dev { + u8 reserved : 1; + unsigned int devfn; /* DEVFN part of the RID*/ + +- struct mutex lock; + u8 pfip[CLP_PFIP_NR_SEGMENTS]; /* pci function internal path */ + u32 uid; /* user defined id */ + u8 util_str[CLP_UTIL_STR_LEN]; /* utility string */ +@@ -182,6 +181,7 @@ struct zpci_dev { + u64 dma_mask; /* DMA address space mask */ + + /* Function measurement block */ ++ struct mutex fmb_lock; + struct zpci_fmb *fmb; + u16 fmb_update; /* update interval */ + u16 fmb_length; +* Unmerged path arch/s390/pci/pci.c +diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c +index 2dec8136cc16..ddb539044a69 100644 +--- a/arch/s390/pci/pci_debug.c ++++ b/arch/s390/pci/pci_debug.c +@@ -85,9 +85,9 @@ static int pci_perf_show(struct seq_file *m, void *v) + if (!zdev) + return 0; + +- mutex_lock(&zdev->lock); ++ mutex_lock(&zdev->fmb_lock); + if (!zdev->fmb) { +- mutex_unlock(&zdev->lock); ++ mutex_unlock(&zdev->fmb_lock); + seq_puts(m, "FMB statistics disabled\n"); + return 0; + } +@@ -124,7 +124,7 @@ static int pci_perf_show(struct seq_file *m, void *v) + } + + pci_sw_counter_show(m); +- mutex_unlock(&zdev->lock); ++ mutex_unlock(&zdev->fmb_lock); + return 0; + } + +@@ -142,7 +142,7 @@ static ssize_t pci_perf_seq_write(struct file *file, const char __user *ubuf, + if (rc) + return rc; + +- mutex_lock(&zdev->lock); ++ mutex_lock(&zdev->fmb_lock); + switch (val) { + case 0: + rc = zpci_fmb_disable_device(zdev); +@@ -151,7 +151,7 @@ static ssize_t pci_perf_seq_write(struct file *file, const char __user *ubuf, + rc = zpci_fmb_enable_device(zdev); + break; + } +- mutex_unlock(&zdev->lock); ++ mutex_unlock(&zdev->fmb_lock); + return rc ? rc : count; + } + From 25900119ca9642f3e3cc1c1f2782ea0f5a32a7b2 Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:56:48 -0400 Subject: [PATCH 05/53] s390/pci: introduce lock to synchronize state of zpci_dev's jira LE-3467 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Gerd Bayer commit bcb5d6c769039c8358a2359e7c3ea5d97ce93108 Empty-Commit: Cherry-Pick Conflicts during history rebuild. Will be included in final tarball splat. Ref for failed cherry-pick at: ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/bcb5d6c7.failed There's a number of tasks that need the state of a zpci device to be stable. Other tasks need to be synchronized as they change the state. State changes could be generated by the system as availability or error events, or be requested by the user through manipulations in sysfs. Some other actions accessible through sysfs - like device resets - need the state to be stable. Unsynchronized state handling could lead to unusable devices. This has been observed in cases of concurrent state changes through systemd udev rules and DPM boot control. Some breakage can be provoked by artificial tests, e.g. through repetitively injecting "recover" on a PCI function through sysfs while running a "hotplug remove/add" in a loop through a PCI slot's "power" attribute in sysfs. After a few iterations this could result in a kernel oops. So introduce a new mutex "state_lock" to guard the state property of the struct zpci_dev. Acquire this lock in all task that modify the state: - hotplug add and remove, through the PCI hotplug slot entry, - avaiability events, as reported by the platform, - error events, as reported by the platform, - during device resets, explicit through sysfs requests or implict through the common PCI layer. Break out an inner _do_recover() routine out of recover_store() to separte the necessary synchronizations from the actual manipulations of the zpci_dev required for the reset. With the following changes I was able to run the inject loops for hours without hitting an error. Signed-off-by: Gerd Bayer Reviewed-by: Niklas Schnelle Signed-off-by: Heiko Carstens (cherry picked from commit bcb5d6c769039c8358a2359e7c3ea5d97ce93108) Signed-off-by: Jonathan Maple # Conflicts: # arch/s390/pci/pci.c # arch/s390/pci/pci_sysfs.c --- .../bcb5d6c7.failed | 325 ++++++++++++++++++ 1 file changed, 325 insertions(+) create mode 100644 ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/bcb5d6c7.failed diff --git a/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/bcb5d6c7.failed b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/bcb5d6c7.failed new file mode 100644 index 0000000000000..86fb2bb2fb6d0 --- /dev/null +++ b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/bcb5d6c7.failed @@ -0,0 +1,325 @@ +s390/pci: introduce lock to synchronize state of zpci_dev's + +jira LE-3467 +Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 +commit-author Gerd Bayer +commit bcb5d6c769039c8358a2359e7c3ea5d97ce93108 +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/bcb5d6c7.failed + +There's a number of tasks that need the state of a zpci device +to be stable. Other tasks need to be synchronized as they change the state. + +State changes could be generated by the system as availability or error +events, or be requested by the user through manipulations in sysfs. +Some other actions accessible through sysfs - like device resets - need the +state to be stable. + +Unsynchronized state handling could lead to unusable devices. This has +been observed in cases of concurrent state changes through systemd udev +rules and DPM boot control. Some breakage can be provoked by artificial +tests, e.g. through repetitively injecting "recover" on a PCI function +through sysfs while running a "hotplug remove/add" in a loop through a +PCI slot's "power" attribute in sysfs. After a few iterations this could +result in a kernel oops. + +So introduce a new mutex "state_lock" to guard the state property of the +struct zpci_dev. Acquire this lock in all task that modify the state: + +- hotplug add and remove, through the PCI hotplug slot entry, +- avaiability events, as reported by the platform, +- error events, as reported by the platform, +- during device resets, explicit through sysfs requests or + implict through the common PCI layer. + +Break out an inner _do_recover() routine out of recover_store() to +separte the necessary synchronizations from the actual manipulations of +the zpci_dev required for the reset. + +With the following changes I was able to run the inject loops for hours +without hitting an error. + + Signed-off-by: Gerd Bayer + Reviewed-by: Niklas Schnelle + Signed-off-by: Heiko Carstens +(cherry picked from commit bcb5d6c769039c8358a2359e7c3ea5d97ce93108) + Signed-off-by: Jonathan Maple + +# Conflicts: +# arch/s390/pci/pci.c +# arch/s390/pci/pci_sysfs.c +diff --cc arch/s390/pci/pci.c +index 661c4e000c5b,17267f659d22..000000000000 +--- a/arch/s390/pci/pci.c ++++ b/arch/s390/pci/pci.c +@@@ -27,7 -27,8 +27,12 @@@ + #include + #include + #include +++<<<<<<< HEAD + +#include +++======= ++ #include ++ #include +++>>>>>>> bcb5d6c76903 (s390/pci: introduce lock to synchronize state of zpci_dev's) + + #include + #include +@@@ -885,33 -807,11 +891,39 @@@ struct zpci_dev *zpci_create_device(u3 + goto error; + zdev->state = state; + +++<<<<<<< HEAD + + mutex_init(&zdev->lock); +++======= ++ kref_init(&zdev->kref); ++ mutex_init(&zdev->state_lock); ++ mutex_init(&zdev->fmb_lock); +++>>>>>>> bcb5d6c76903 (s390/pci: introduce lock to synchronize state of zpci_dev's) + mutex_init(&zdev->kzdev_lock); + + + return zdev; + + + +error: + + zpci_dbg(0, "crt fid:%x, rc:%d\n", fid, rc); + + kfree(zdev); + + return ERR_PTR(rc); + +} + + + +/** + + * zpci_add_device() - Add a previously created zPCI device to the zPCI subsystem + + * @zdev: The zPCI device to be added + + * + + * A struct zpci_dev is added to the zPCI subsystem and to a virtual PCI bus creating + + * a new one as necessary. A hotplug slot is created and events start to be handled. + + * If successful from this point on zpci_zdev_get() and zpci_zdev_put() must be used. + + * If adding the struct zpci_dev fails the device was not added and should be freed. + + * + + * Return: 0 on success, or an error code otherwise + + */ + +int zpci_add_device(struct zpci_dev *zdev) + +{ + + int rc; + + + + zpci_dbg(1, "add fid:%x, fh:%x, c:%d\n", zdev->fid, zdev->fh, zdev->state); + rc = zpci_init_iommu(zdev); + if (rc) + goto error; +diff --cc arch/s390/pci/pci_sysfs.c +index cae280e5c047,a0b872b74fe3..000000000000 +--- a/arch/s390/pci/pci_sysfs.c ++++ b/arch/s390/pci/pci_sysfs.c +@@@ -81,39 -120,13 +120,48 @@@ static ssize_t recover_store(struct dev + */ + pci_lock_rescan_remove(); + if (pci_dev_is_added(pdev)) { +++<<<<<<< HEAD + + pci_stop_and_remove_bus_device(pdev); + + if (zdev->dma_table) { + + ret = zpci_dma_exit_device(zdev); + + if (ret) + + goto out; + + } + + + + if (zdev_enabled(zdev)) { + + ret = zpci_disable_device(zdev); + + /* + + * Due to a z/VM vs LPAR inconsistency in the error + + * state the FH may indicate an enabled device but + + * disable says the device is already disabled don't + + * treat it as an error here. + + */ + + if (ret == -EINVAL) + + ret = 0; + + if (ret) + + goto out; + + } + + + + ret = zpci_enable_device(zdev); + + if (ret) + + goto out; + + ret = zpci_dma_init_device(zdev); + + if (ret) { + + zpci_disable_device(zdev); + + goto out; + + } + + pci_rescan_bus(zdev->zbus->bus); + + } + +out: +++======= ++ ret = _do_recover(pdev, zdev); ++ } ++ pci_rescan_bus(zdev->zbus->bus); +++>>>>>>> bcb5d6c76903 (s390/pci: introduce lock to synchronize state of zpci_dev's) + pci_unlock_rescan_remove(); ++ ++ out: ++ mutex_unlock(&zdev->state_lock); + if (kn) + sysfs_unbreak_active_protection(kn); + return ret ? ret : count; +diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h +index 6652630be19e..9419eb1a7c43 100644 +--- a/arch/s390/include/asm/pci.h ++++ b/arch/s390/include/asm/pci.h +@@ -123,6 +123,7 @@ struct zpci_dev { + struct kref kref; + struct hotplug_slot hotplug_slot; + ++ struct mutex state_lock; /* protect state changes */ + enum zpci_state state; + u32 fid; /* function ID, used by sclp */ + u32 fh; /* function handle, used by insn's */ +* Unmerged path arch/s390/pci/pci.c +diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c +index dcebe4118195..42c3c40cb1dd 100644 +--- a/arch/s390/pci/pci_event.c ++++ b/arch/s390/pci/pci_event.c +@@ -260,6 +260,7 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf) + zpci_err_hex(ccdf, sizeof(*ccdf)); + + if (zdev) { ++ mutex_lock(&zdev->state_lock); + zpci_update_fh(zdev, ccdf->fh); + if (zdev->zbus->bus) + pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); +@@ -288,6 +289,8 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf) + } + pci_dev_put(pdev); + no_pdev: ++ if (zdev) ++ mutex_unlock(&zdev->state_lock); + zpci_zdev_put(zdev); + } + +@@ -322,6 +325,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) + + zpci_dbg(3, "avl fid:%x, fh:%x, pec:%x\n", + ccdf->fid, ccdf->fh, ccdf->pec); ++ ++ if (existing_zdev) ++ mutex_lock(&zdev->state_lock); ++ + switch (ccdf->pec) { + case 0x0301: /* Reserved|Standby -> Configured */ + if (!zdev) { +@@ -390,8 +397,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) + default: + break; + } +- if (existing_zdev) ++ if (existing_zdev) { ++ mutex_unlock(&zdev->state_lock); + zpci_zdev_put(zdev); ++ } + } + + void zpci_event_availability(void *data) +* Unmerged path arch/s390/pci/pci_sysfs.c +diff --git a/drivers/pci/hotplug/s390_pci_hpc.c b/drivers/pci/hotplug/s390_pci_hpc.c +index a89b7de72dcf..7333b305f2a5 100644 +--- a/drivers/pci/hotplug/s390_pci_hpc.c ++++ b/drivers/pci/hotplug/s390_pci_hpc.c +@@ -26,58 +26,79 @@ static int enable_slot(struct hotplug_slot *hotplug_slot) + hotplug_slot); + int rc; + +- if (zdev->state != ZPCI_FN_STATE_STANDBY) +- return -EIO; ++ mutex_lock(&zdev->state_lock); ++ if (zdev->state != ZPCI_FN_STATE_STANDBY) { ++ rc = -EIO; ++ goto out; ++ } + + rc = sclp_pci_configure(zdev->fid); + zpci_dbg(3, "conf fid:%x, rc:%d\n", zdev->fid, rc); + if (rc) +- return rc; ++ goto out; + zdev->state = ZPCI_FN_STATE_CONFIGURED; + +- return zpci_scan_configured_device(zdev, zdev->fh); ++ rc = zpci_scan_configured_device(zdev, zdev->fh); ++out: ++ mutex_unlock(&zdev->state_lock); ++ return rc; + } + + static int disable_slot(struct hotplug_slot *hotplug_slot) + { + struct zpci_dev *zdev = container_of(hotplug_slot, struct zpci_dev, + hotplug_slot); +- struct pci_dev *pdev; ++ struct pci_dev *pdev = NULL; ++ int rc; + +- if (zdev->state != ZPCI_FN_STATE_CONFIGURED) +- return -EIO; ++ mutex_lock(&zdev->state_lock); ++ if (zdev->state != ZPCI_FN_STATE_CONFIGURED) { ++ rc = -EIO; ++ goto out; ++ } + + pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); + if (pdev && pci_num_vf(pdev)) { + pci_dev_put(pdev); +- return -EBUSY; ++ rc = -EBUSY; ++ goto out; + } +- pci_dev_put(pdev); + +- return zpci_deconfigure_device(zdev); ++ rc = zpci_deconfigure_device(zdev); ++out: ++ mutex_unlock(&zdev->state_lock); ++ if (pdev) ++ pci_dev_put(pdev); ++ return rc; + } + + static int reset_slot(struct hotplug_slot *hotplug_slot, bool probe) + { + struct zpci_dev *zdev = container_of(hotplug_slot, struct zpci_dev, + hotplug_slot); ++ int rc = -EIO; + +- if (zdev->state != ZPCI_FN_STATE_CONFIGURED) +- return -EIO; + /* +- * We can't take the zdev->lock as reset_slot may be called during +- * probing and/or device removal which already happens under the +- * zdev->lock. Instead the user should use the higher level +- * pci_reset_function() or pci_bus_reset() which hold the PCI device +- * lock preventing concurrent removal. If not using these functions +- * holding the PCI device lock is required. ++ * If we can't get the zdev->state_lock the device state is ++ * currently undergoing a transition and we bail out - just ++ * the same as if the device's state is not configured at all. + */ ++ if (!mutex_trylock(&zdev->state_lock)) ++ return rc; + +- /* As long as the function is configured we can reset */ +- if (probe) +- return 0; ++ /* We can reset only if the function is configured */ ++ if (zdev->state != ZPCI_FN_STATE_CONFIGURED) ++ goto out; ++ ++ if (probe) { ++ rc = 0; ++ goto out; ++ } + +- return zpci_hot_reset_device(zdev); ++ rc = zpci_hot_reset_device(zdev); ++out: ++ mutex_unlock(&zdev->state_lock); ++ return rc; + } + + static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value) From 63104b2516cbab69fb1e86cf8462e88870a10bc1 Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:56:49 -0400 Subject: [PATCH 06/53] s390/pci: remove hotplug slot when releasing the device jira LE-3467 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Gerd Bayer commit 6ee600bfbe0f818ffb7748d99e9b0c89d0d9f02a Empty-Commit: Cherry-Pick Conflicts during history rebuild. Will be included in final tarball splat. Ref for failed cherry-pick at: ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/6ee600bf.failed Centralize the removal so all paths are covered and the hotplug slot will remain active until the device is really destroyed. Signed-off-by: Gerd Bayer Reviewed-by: Niklas Schnelle Signed-off-by: Heiko Carstens (cherry picked from commit 6ee600bfbe0f818ffb7748d99e9b0c89d0d9f02a) Signed-off-by: Jonathan Maple # Conflicts: # arch/s390/pci/pci.c --- .../6ee600bf.failed | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/6ee600bf.failed diff --git a/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/6ee600bf.failed b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/6ee600bf.failed new file mode 100644 index 0000000000000..f14a40f61f706 --- /dev/null +++ b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/6ee600bf.failed @@ -0,0 +1,59 @@ +s390/pci: remove hotplug slot when releasing the device + +jira LE-3467 +Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 +commit-author Gerd Bayer +commit 6ee600bfbe0f818ffb7748d99e9b0c89d0d9f02a +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/6ee600bf.failed + +Centralize the removal so all paths are covered and the hotplug slot +will remain active until the device is really destroyed. + + Signed-off-by: Gerd Bayer + Reviewed-by: Niklas Schnelle + Signed-off-by: Heiko Carstens +(cherry picked from commit 6ee600bfbe0f818ffb7748d99e9b0c89d0d9f02a) + Signed-off-by: Jonathan Maple + +# Conflicts: +# arch/s390/pci/pci.c +diff --cc arch/s390/pci/pci.c +index 661c4e000c5b,c87b8aff5285..000000000000 +--- a/arch/s390/pci/pci.c ++++ b/arch/s390/pci/pci.c +@@@ -1020,6 -895,29 +1020,32 @@@ int zpci_deconfigure_device(struct zpci + return 0; + } + +++<<<<<<< HEAD +++======= ++ /** ++ * zpci_device_reserved() - Mark device as resverved ++ * @zdev: the zpci_dev that was reserved ++ * ++ * Handle the case that a given zPCI function was reserved by another system. ++ * After a call to this function the zpci_dev can not be found via ++ * get_zdev_by_fid() anymore but may still be accessible via existing ++ * references though it will not be functional anymore. ++ */ ++ void zpci_device_reserved(struct zpci_dev *zdev) ++ { ++ /* ++ * Remove device from zpci_list as it is going away. This also ++ * makes sure we ignore subsequent zPCI events for this device. ++ */ ++ spin_lock(&zpci_list_lock); ++ list_del(&zdev->entry); ++ spin_unlock(&zpci_list_lock); ++ zdev->state = ZPCI_FN_STATE_RESERVED; ++ zpci_dbg(3, "rsv fid:%x\n", zdev->fid); ++ zpci_zdev_put(zdev); ++ } ++ +++>>>>>>> 6ee600bfbe0f (s390/pci: remove hotplug slot when releasing the device) + void zpci_release_device(struct kref *kref) + { + struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref); +* Unmerged path arch/s390/pci/pci.c From 4b0e6940f8a666edde9629f6352e74e05cddfa86 Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:56:49 -0400 Subject: [PATCH 07/53] s390/pci: Fix potential double remove of hotplug slot jira LE-3467 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Niklas Schnelle commit c4a585e952ca403a370586d3f16e8331a7564901 Empty-Commit: Cherry-Pick Conflicts during history rebuild. Will be included in final tarball splat. Ref for failed cherry-pick at: ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/c4a585e9.failed In commit 6ee600bfbe0f ("s390/pci: remove hotplug slot when releasing the device") the zpci_exit_slot() was moved from zpci_device_reserved() to zpci_release_device() with the intention of keeping the hotplug slot around until the device is actually removed. Now zpci_release_device() is only called once all references are dropped. Since the zPCI subsystem only drops its reference once the device is in the reserved state it follows that zpci_release_device() must only deal with devices in the reserved state. Despite that it contains code to tear down from both configured and standby state. For the standby case this already includes the removal of the hotplug slot so would cause a double removal if a device was ever removed in either configured or standby state. Instead of causing a potential double removal in a case that should never happen explicitly WARN_ON() if a device in non-reserved state is released and get rid of the dead code cases. Fixes: 6ee600bfbe0f ("s390/pci: remove hotplug slot when releasing the device") Reviewed-by: Matthew Rosato Reviewed-by: Gerd Bayer Tested-by: Gerd Bayer Signed-off-by: Niklas Schnelle Signed-off-by: Heiko Carstens (cherry picked from commit c4a585e952ca403a370586d3f16e8331a7564901) Signed-off-by: Jonathan Maple # Conflicts: # arch/s390/pci/pci.c --- .../c4a585e9.failed | 102 ++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/c4a585e9.failed diff --git a/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/c4a585e9.failed b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/c4a585e9.failed new file mode 100644 index 0000000000000..5f5856705bb42 --- /dev/null +++ b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/c4a585e9.failed @@ -0,0 +1,102 @@ +s390/pci: Fix potential double remove of hotplug slot + +jira LE-3467 +Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 +commit-author Niklas Schnelle +commit c4a585e952ca403a370586d3f16e8331a7564901 +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/c4a585e9.failed + +In commit 6ee600bfbe0f ("s390/pci: remove hotplug slot when releasing the +device") the zpci_exit_slot() was moved from zpci_device_reserved() to +zpci_release_device() with the intention of keeping the hotplug slot +around until the device is actually removed. + +Now zpci_release_device() is only called once all references are +dropped. Since the zPCI subsystem only drops its reference once the +device is in the reserved state it follows that zpci_release_device() +must only deal with devices in the reserved state. Despite that it +contains code to tear down from both configured and standby state. For +the standby case this already includes the removal of the hotplug slot +so would cause a double removal if a device was ever removed in +either configured or standby state. + +Instead of causing a potential double removal in a case that should +never happen explicitly WARN_ON() if a device in non-reserved state is +released and get rid of the dead code cases. + +Fixes: 6ee600bfbe0f ("s390/pci: remove hotplug slot when releasing the device") + Reviewed-by: Matthew Rosato + Reviewed-by: Gerd Bayer + Tested-by: Gerd Bayer + Signed-off-by: Niklas Schnelle + Signed-off-by: Heiko Carstens +(cherry picked from commit c4a585e952ca403a370586d3f16e8331a7564901) + Signed-off-by: Jonathan Maple + +# Conflicts: +# arch/s390/pci/pci.c +diff --cc arch/s390/pci/pci.c +index 661c4e000c5b,7aeab522f28d..000000000000 +--- a/arch/s390/pci/pci.c ++++ b/arch/s390/pci/pci.c +@@@ -1023,8 -914,32 +1023,12 @@@ int zpci_deconfigure_device(struct zpci + void zpci_release_device(struct kref *kref) + { + struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref); +- int ret; + +++<<<<<<< HEAD +++======= ++ WARN_ON(zdev->state != ZPCI_FN_STATE_RESERVED); ++ +++>>>>>>> c4a585e952ca (s390/pci: Fix potential double remove of hotplug slot) + if (zdev->zbus->bus) + zpci_bus_remove_device(zdev, false); + +@@@ -1033,31 -946,16 +1037,42 @@@ + if (zdev_enabled(zdev)) + zpci_disable_device(zdev); + +++<<<<<<< HEAD + + switch (zdev->state) { + + case ZPCI_FN_STATE_CONFIGURED: + + ret = sclp_pci_deconfigure(zdev->fid); + + zpci_dbg(3, "deconf fid:%x, rc:%d\n", zdev->fid, ret); + + fallthrough; + + case ZPCI_FN_STATE_STANDBY: + + if (zdev->has_hp_slot) + + zpci_exit_slot(zdev); + + spin_lock(&zpci_list_lock); + + list_del(&zdev->entry); + + spin_unlock(&zpci_list_lock); + + zpci_dbg(3, "rsv fid:%x\n", zdev->fid); + + fallthrough; + + case ZPCI_FN_STATE_RESERVED: + + if (zdev->has_resources) + + zpci_cleanup_bus_resources(zdev); + + zpci_bus_device_unregister(zdev); + + zpci_destroy_iommu(zdev); + + /* fallthrough */ + + default: + + break; + + } + + +++======= ++ if (zdev->has_hp_slot) ++ zpci_exit_slot(zdev); ++ ++ if (zdev->has_resources) ++ zpci_cleanup_bus_resources(zdev); ++ ++ zpci_bus_device_unregister(zdev); ++ zpci_destroy_iommu(zdev); +++>>>>>>> c4a585e952ca (s390/pci: Fix potential double remove of hotplug slot) + zpci_dbg(3, "rem fid:%x\n", zdev->fid); + - kfree_rcu(zdev, rcu); + + kfree(zdev); + } + + int zpci_report_error(struct pci_dev *pdev, +* Unmerged path arch/s390/pci/pci.c From 223f875e49aa274765fa2db69f862a707259326e Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:56:49 -0400 Subject: [PATCH 08/53] s390/pci: Fix missing check for zpci_create_device() error return jira LE-3467 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Niklas Schnelle commit 42420c50c68f3e95e90de2479464f420602229fc The zpci_create_device() function returns an error pointer that needs to be checked before dereferencing it as a struct zpci_dev pointer. Add the missing check in __clp_add() where it was missed when adding the scan_list in the fixed commit. Simply not adding the device to the scan list results in the previous behavior. Cc: stable@vger.kernel.org Fixes: 0467cdde8c43 ("s390/pci: Sort PCI functions prior to creating virtual busses") Signed-off-by: Niklas Schnelle Reviewed-by: Gerd Bayer Signed-off-by: Heiko Carstens (cherry picked from commit 42420c50c68f3e95e90de2479464f420602229fc) Signed-off-by: Jonathan Maple --- arch/s390/pci/pci_clp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 060523557b8e3..92b6a05d3a037 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -421,6 +421,8 @@ static void __clp_add(struct clp_fh_list_entry *entry, void *data) return; } zdev = zpci_create_device(entry->fid, entry->fh, entry->config_state); + if (IS_ERR(zdev)) + return; list_add_tail(&zdev->entry, scan_list); } From 086841e19d4fa48dbe7b0161223ac3d55ae49447 Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:56:50 -0400 Subject: [PATCH 09/53] s390/pci: Fix duplicate pci_dev_put() in disable_slot() when PF has child VFs jira LE-3467 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Niklas Schnelle commit 05a2538f2b48500cf4e8a0a0ce76623cc5bafcf1 Empty-Commit: Cherry-Pick Conflicts during history rebuild. Will be included in final tarball splat. Ref for failed cherry-pick at: ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/05a2538f.failed With commit bcb5d6c76903 ("s390/pci: introduce lock to synchronize state of zpci_dev's") the code to ignore power off of a PF that has child VFs was changed from a direct return to a goto to the unlock and pci_dev_put() section. The change however left the existing pci_dev_put() untouched resulting in a doubple put. This can subsequently cause a use after free if the struct pci_dev is released in an unexpected state. Fix this by removing the extra pci_dev_put(). Cc: stable@vger.kernel.org Fixes: bcb5d6c76903 ("s390/pci: introduce lock to synchronize state of zpci_dev's") Signed-off-by: Niklas Schnelle Reviewed-by: Gerd Bayer Signed-off-by: Heiko Carstens (cherry picked from commit 05a2538f2b48500cf4e8a0a0ce76623cc5bafcf1) Signed-off-by: Jonathan Maple # Conflicts: # drivers/pci/hotplug/s390_pci_hpc.c --- .../05a2538f.failed | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/05a2538f.failed diff --git a/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/05a2538f.failed b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/05a2538f.failed new file mode 100644 index 0000000000000..efb4a3da8db34 --- /dev/null +++ b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/05a2538f.failed @@ -0,0 +1,57 @@ +s390/pci: Fix duplicate pci_dev_put() in disable_slot() when PF has child VFs + +jira LE-3467 +Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 +commit-author Niklas Schnelle +commit 05a2538f2b48500cf4e8a0a0ce76623cc5bafcf1 +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/05a2538f.failed + +With commit bcb5d6c76903 ("s390/pci: introduce lock to synchronize state +of zpci_dev's") the code to ignore power off of a PF that has child VFs +was changed from a direct return to a goto to the unlock and +pci_dev_put() section. The change however left the existing pci_dev_put() +untouched resulting in a doubple put. This can subsequently cause a use +after free if the struct pci_dev is released in an unexpected state. +Fix this by removing the extra pci_dev_put(). + + Cc: stable@vger.kernel.org +Fixes: bcb5d6c76903 ("s390/pci: introduce lock to synchronize state of zpci_dev's") + Signed-off-by: Niklas Schnelle + Reviewed-by: Gerd Bayer + Signed-off-by: Heiko Carstens +(cherry picked from commit 05a2538f2b48500cf4e8a0a0ce76623cc5bafcf1) + Signed-off-by: Jonathan Maple + +# Conflicts: +# drivers/pci/hotplug/s390_pci_hpc.c +diff --cc drivers/pci/hotplug/s390_pci_hpc.c +index a89b7de72dcf,e9e9aaa91770..000000000000 +--- a/drivers/pci/hotplug/s390_pci_hpc.c ++++ b/drivers/pci/hotplug/s390_pci_hpc.c +@@@ -49,12 -59,16 +49,17 @@@ static int disable_slot(struct hotplug_ + + pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); + if (pdev && pci_num_vf(pdev)) { +++<<<<<<< HEAD + + pci_dev_put(pdev); + + return -EBUSY; +++======= ++ rc = -EBUSY; ++ goto out; +++>>>>>>> 05a2538f2b48 (s390/pci: Fix duplicate pci_dev_put() in disable_slot() when PF has child VFs) + } + + pci_dev_put(pdev); + + - rc = zpci_deconfigure_device(zdev); + -out: + - mutex_unlock(&zdev->state_lock); + - if (pdev) + - pci_dev_put(pdev); + - return rc; + + return zpci_deconfigure_device(zdev); + } + + static int reset_slot(struct hotplug_slot *hotplug_slot, bool probe) +* Unmerged path drivers/pci/hotplug/s390_pci_hpc.c From 07223cacf7a71da97fd92a5fb8efbd480c886b24 Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:56:50 -0400 Subject: [PATCH 10/53] s390/pci: Remove redundant bus removal and disable from zpci_release_device() jira LE-3467 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Niklas Schnelle commit d76f9633296785343d45f85199f4138cb724b6d2 Empty-Commit: Cherry-Pick Conflicts during history rebuild. Will be included in final tarball splat. Ref for failed cherry-pick at: ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/d76f9633.failed Remove zpci_bus_remove_device() and zpci_disable_device() calls from zpci_release_device(). These calls were done when the device transitioned into the ZPCI_FN_STATE_STANDBY state which is guaranteed to happen before it enters the ZPCI_FN_STATE_RESERVED state. When zpci_release_device() is called the device is known to be in the ZPCI_FN_STATE_RESERVED state which is also checked by a WARN_ON(). Cc: stable@vger.kernel.org Fixes: a46044a92add ("s390/pci: fix zpci_zdev_put() on reserve") Reviewed-by: Gerd Bayer Reviewed-by: Julian Ruess Tested-by: Gerd Bayer Signed-off-by: Niklas Schnelle Signed-off-by: Heiko Carstens (cherry picked from commit d76f9633296785343d45f85199f4138cb724b6d2) Signed-off-by: Jonathan Maple # Conflicts: # arch/s390/pci/pci.c --- .../d76f9633.failed | 88 +++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/d76f9633.failed diff --git a/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/d76f9633.failed b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/d76f9633.failed new file mode 100644 index 0000000000000..e0c3e520c5a2f --- /dev/null +++ b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/d76f9633.failed @@ -0,0 +1,88 @@ +s390/pci: Remove redundant bus removal and disable from zpci_release_device() + +jira LE-3467 +Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 +commit-author Niklas Schnelle +commit d76f9633296785343d45f85199f4138cb724b6d2 +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/d76f9633.failed + +Remove zpci_bus_remove_device() and zpci_disable_device() calls from +zpci_release_device(). These calls were done when the device +transitioned into the ZPCI_FN_STATE_STANDBY state which is guaranteed to +happen before it enters the ZPCI_FN_STATE_RESERVED state. When +zpci_release_device() is called the device is known to be in the +ZPCI_FN_STATE_RESERVED state which is also checked by a WARN_ON(). + + Cc: stable@vger.kernel.org +Fixes: a46044a92add ("s390/pci: fix zpci_zdev_put() on reserve") + Reviewed-by: Gerd Bayer + Reviewed-by: Julian Ruess + Tested-by: Gerd Bayer + Signed-off-by: Niklas Schnelle + Signed-off-by: Heiko Carstens +(cherry picked from commit d76f9633296785343d45f85199f4138cb724b6d2) + Signed-off-by: Jonathan Maple + +# Conflicts: +# arch/s390/pci/pci.c +diff --cc arch/s390/pci/pci.c +index 661c4e000c5b,9fcc6d3180f2..000000000000 +--- a/arch/s390/pci/pci.c ++++ b/arch/s390/pci/pci.c +@@@ -1023,41 -923,42 +1023,46 @@@ int zpci_deconfigure_device(struct zpci + void zpci_release_device(struct kref *kref) + { + struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref); + + int ret; + + - WARN_ON(zdev->state != ZPCI_FN_STATE_RESERVED); +++<<<<<<< HEAD + + if (zdev->zbus->bus) + + zpci_bus_remove_device(zdev, false); + + + + if (zdev->dma_table) + + zpci_dma_exit_device(zdev); + + if (zdev_enabled(zdev)) + + zpci_disable_device(zdev); + + + switch (zdev->state) { + + case ZPCI_FN_STATE_CONFIGURED: + + ret = sclp_pci_deconfigure(zdev->fid); + + zpci_dbg(3, "deconf fid:%x, rc:%d\n", zdev->fid, ret); + + fallthrough; + + case ZPCI_FN_STATE_STANDBY: + + if (zdev->has_hp_slot) + + zpci_exit_slot(zdev); + + spin_lock(&zpci_list_lock); + + list_del(&zdev->entry); + + spin_unlock(&zpci_list_lock); + + zpci_dbg(3, "rsv fid:%x\n", zdev->fid); + + fallthrough; + + case ZPCI_FN_STATE_RESERVED: + + if (zdev->has_resources) + + zpci_cleanup_bus_resources(zdev); + + zpci_bus_device_unregister(zdev); + + zpci_destroy_iommu(zdev); + + /* fallthrough */ + + default: + + break; + + } +++======= ++ if (zdev->has_hp_slot) ++ zpci_exit_slot(zdev); +++>>>>>>> d76f96332967 (s390/pci: Remove redundant bus removal and disable from zpci_release_device()) + + - if (zdev->has_resources) + - zpci_cleanup_bus_resources(zdev); + - + - zpci_bus_device_unregister(zdev); + - zpci_destroy_iommu(zdev); + zpci_dbg(3, "rem fid:%x\n", zdev->fid); + - kfree_rcu(zdev, rcu); + + kfree(zdev); + } + + int zpci_report_error(struct pci_dev *pdev, +* Unmerged path arch/s390/pci/pci.c From 6b2bcc69e7ae438a80c3049bb92f3d1a801e559c Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:56:51 -0400 Subject: [PATCH 11/53] s390/pci: Prevent self deletion in disable_slot() jira LE-3467 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Niklas Schnelle commit 47c397844869ad0e6738afb5879c7492f4691122 Empty-Commit: Cherry-Pick Conflicts during history rebuild. Will be included in final tarball splat. Ref for failed cherry-pick at: ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/47c39784.failed As disable_slot() takes a struct zpci_dev from the Configured to the Standby state. In Standby there is still a hotplug slot so this is not usually a case of sysfs self deletion. This is important because self deletion gets very hairy in terms of locking (see for example recover_store() in arch/s390/pci/pci_sysfs.c). Because the pci_dev_put() is not within the critical section of the zdev->state_lock however, disable_slot() can turn into a case of self deletion if zPCI device event handling slips between the mutex_unlock() and the pci_dev_put(). If the latter is the last put and zpci_release_device() is called this then tries to remove the hotplug slot via zpci_exit_slot() which will try to remove the hotplug slot directory the disable_slot() is part of i.e. self deletion. Prevent this by widening the zdev->state_lock critical section to include the pci_dev_put() which is then guaranteed to happen with the struct zpci_dev still in Standby state ensuring it will not lead to a zpci_release_device() call as at least the zPCI event handling code still holds a reference. Cc: stable@vger.kernel.org Fixes: a46044a92add ("s390/pci: fix zpci_zdev_put() on reserve") Reviewed-by: Gerd Bayer Tested-by: Gerd Bayer Signed-off-by: Niklas Schnelle Signed-off-by: Heiko Carstens (cherry picked from commit 47c397844869ad0e6738afb5879c7492f4691122) Signed-off-by: Jonathan Maple # Conflicts: # drivers/pci/hotplug/s390_pci_hpc.c --- .../47c39784.failed | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/47c39784.failed diff --git a/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/47c39784.failed b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/47c39784.failed new file mode 100644 index 0000000000000..45a36ecc4bc61 --- /dev/null +++ b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/47c39784.failed @@ -0,0 +1,69 @@ +s390/pci: Prevent self deletion in disable_slot() + +jira LE-3467 +Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 +commit-author Niklas Schnelle +commit 47c397844869ad0e6738afb5879c7492f4691122 +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/47c39784.failed + +As disable_slot() takes a struct zpci_dev from the Configured to the +Standby state. In Standby there is still a hotplug slot so this is not +usually a case of sysfs self deletion. This is important because self +deletion gets very hairy in terms of locking (see for example +recover_store() in arch/s390/pci/pci_sysfs.c). + +Because the pci_dev_put() is not within the critical section of the +zdev->state_lock however, disable_slot() can turn into a case of self +deletion if zPCI device event handling slips between the mutex_unlock() +and the pci_dev_put(). If the latter is the last put and +zpci_release_device() is called this then tries to remove the hotplug +slot via zpci_exit_slot() which will try to remove the hotplug slot +directory the disable_slot() is part of i.e. self deletion. + +Prevent this by widening the zdev->state_lock critical section to +include the pci_dev_put() which is then guaranteed to happen with the +struct zpci_dev still in Standby state ensuring it will not lead to +a zpci_release_device() call as at least the zPCI event handling code +still holds a reference. + + Cc: stable@vger.kernel.org +Fixes: a46044a92add ("s390/pci: fix zpci_zdev_put() on reserve") + Reviewed-by: Gerd Bayer + Tested-by: Gerd Bayer + Signed-off-by: Niklas Schnelle + Signed-off-by: Heiko Carstens +(cherry picked from commit 47c397844869ad0e6738afb5879c7492f4691122) + Signed-off-by: Jonathan Maple + +# Conflicts: +# drivers/pci/hotplug/s390_pci_hpc.c +diff --cc drivers/pci/hotplug/s390_pci_hpc.c +index a89b7de72dcf,3d26d273f29d..000000000000 +--- a/drivers/pci/hotplug/s390_pci_hpc.c ++++ b/drivers/pci/hotplug/s390_pci_hpc.c +@@@ -50,11 -60,16 +50,20 @@@ static int disable_slot(struct hotplug_ + pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); + if (pdev && pci_num_vf(pdev)) { + pci_dev_put(pdev); + - rc = -EBUSY; + - goto out; + + return -EBUSY; + } + + pci_dev_put(pdev); + +++<<<<<<< HEAD + + return zpci_deconfigure_device(zdev); +++======= ++ rc = zpci_deconfigure_device(zdev); ++ out: ++ if (pdev) ++ pci_dev_put(pdev); ++ mutex_unlock(&zdev->state_lock); ++ return rc; +++>>>>>>> 47c397844869 (s390/pci: Prevent self deletion in disable_slot()) + } + + static int reset_slot(struct hotplug_slot *hotplug_slot, bool probe) +* Unmerged path drivers/pci/hotplug/s390_pci_hpc.c From 0eb23c178d12ef84e559c59e8aca0da452839c72 Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:56:51 -0400 Subject: [PATCH 12/53] s390/pci: Allow re-add of a reserved but not yet removed device jira LE-3467 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Niklas Schnelle commit 4b1815a52d7eb03b3e0e6742c6728bc16a4b2d1d Empty-Commit: Cherry-Pick Conflicts during history rebuild. Will be included in final tarball splat. Ref for failed cherry-pick at: ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/4b1815a5.failed The architecture assumes that PCI functions can be removed synchronously as PCI events are processed. This however clashes with the reference counting of struct pci_dev which allows device drivers to hold on to a struct pci_dev reference even as the underlying device is removed. To bridge this gap commit 2a671f77ee49 ("s390/pci: fix use after free of zpci_dev") keeps the struct zpci_dev in ZPCI_FN_STATE_RESERVED state until common code releases the struct pci_dev. Only when all references are dropped, the struct zpci_dev can be removed and freed. Later commit a46044a92add ("s390/pci: fix zpci_zdev_put() on reserve") moved the deletion of the struct zpci_dev from the zpci_list in zpci_release_device() to the point where the device is reserved. This was done to prevent handling events for a device that is already being removed, e.g. when the platform generates both PCI event codes 0x304 and 0x308. In retrospect, deletion from the zpci_list in the release function without holding the zpci_list_lock was also racy. A side effect of this handling is that if the underlying device re-appears while the struct zpci_dev is in the ZPCI_FN_STATE_RESERVED state, the new and old instances of the struct zpci_dev and/or struct pci_dev may clash. For example when trying to create the IOMMU sysfs files for the new instance. In this case, re-adding the new instance is aborted. The old instance is removed, and the device will remain absent until the platform issues another event. Fix this by allowing the struct zpci_dev to be brought back up right until it is finally removed. To this end also keep the struct zpci_dev in the zpci_list until it is finally released when all references have been dropped. Deletion from the zpci_list from within the release function is made safe by using kref_put_lock() with the zpci_list_lock. This ensures that the releasing code holds the last reference. Cc: stable@vger.kernel.org Fixes: a46044a92add ("s390/pci: fix zpci_zdev_put() on reserve") Reviewed-by: Gerd Bayer Tested-by: Gerd Bayer Signed-off-by: Niklas Schnelle Signed-off-by: Heiko Carstens (cherry picked from commit 4b1815a52d7eb03b3e0e6742c6728bc16a4b2d1d) Signed-off-by: Jonathan Maple # Conflicts: # arch/s390/pci/pci.c --- .../4b1815a5.failed | 213 ++++++++++++++++++ 1 file changed, 213 insertions(+) create mode 100644 ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/4b1815a5.failed diff --git a/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/4b1815a5.failed b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/4b1815a5.failed new file mode 100644 index 0000000000000..401295e9f26c1 --- /dev/null +++ b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/4b1815a5.failed @@ -0,0 +1,213 @@ +s390/pci: Allow re-add of a reserved but not yet removed device + +jira LE-3467 +Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 +commit-author Niklas Schnelle +commit 4b1815a52d7eb03b3e0e6742c6728bc16a4b2d1d +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/4b1815a5.failed + +The architecture assumes that PCI functions can be removed synchronously +as PCI events are processed. This however clashes with the reference +counting of struct pci_dev which allows device drivers to hold on to a +struct pci_dev reference even as the underlying device is removed. To +bridge this gap commit 2a671f77ee49 ("s390/pci: fix use after free of +zpci_dev") keeps the struct zpci_dev in ZPCI_FN_STATE_RESERVED state +until common code releases the struct pci_dev. Only when all references +are dropped, the struct zpci_dev can be removed and freed. + +Later commit a46044a92add ("s390/pci: fix zpci_zdev_put() on reserve") +moved the deletion of the struct zpci_dev from the zpci_list in +zpci_release_device() to the point where the device is reserved. This +was done to prevent handling events for a device that is already being +removed, e.g. when the platform generates both PCI event codes 0x304 +and 0x308. In retrospect, deletion from the zpci_list in the release +function without holding the zpci_list_lock was also racy. + +A side effect of this handling is that if the underlying device +re-appears while the struct zpci_dev is in the ZPCI_FN_STATE_RESERVED +state, the new and old instances of the struct zpci_dev and/or struct +pci_dev may clash. For example when trying to create the IOMMU sysfs +files for the new instance. In this case, re-adding the new instance is +aborted. The old instance is removed, and the device will remain absent +until the platform issues another event. + +Fix this by allowing the struct zpci_dev to be brought back up right +until it is finally removed. To this end also keep the struct zpci_dev +in the zpci_list until it is finally released when all references have +been dropped. + +Deletion from the zpci_list from within the release function is made +safe by using kref_put_lock() with the zpci_list_lock. This ensures that +the releasing code holds the last reference. + + Cc: stable@vger.kernel.org +Fixes: a46044a92add ("s390/pci: fix zpci_zdev_put() on reserve") + Reviewed-by: Gerd Bayer + Tested-by: Gerd Bayer + Signed-off-by: Niklas Schnelle + Signed-off-by: Heiko Carstens +(cherry picked from commit 4b1815a52d7eb03b3e0e6742c6728bc16a4b2d1d) + Signed-off-by: Jonathan Maple + +# Conflicts: +# arch/s390/pci/pci.c +diff --cc arch/s390/pci/pci.c +index 661c4e000c5b,4602abd0c6f1..000000000000 +--- a/arch/s390/pci/pci.c ++++ b/arch/s390/pci/pci.c +@@@ -1020,44 -927,50 +1027,79 @@@ int zpci_deconfigure_device(struct zpci + return 0; + } + +++<<<<<<< HEAD +++======= ++ /** ++ * zpci_device_reserved() - Mark device as reserved ++ * @zdev: the zpci_dev that was reserved ++ * ++ * Handle the case that a given zPCI function was reserved by another system. ++ */ ++ void zpci_device_reserved(struct zpci_dev *zdev) ++ { ++ lockdep_assert_held(&zdev->state_lock); ++ /* We may declare the device reserved multiple times */ ++ if (zdev->state == ZPCI_FN_STATE_RESERVED) ++ return; ++ zdev->state = ZPCI_FN_STATE_RESERVED; ++ zpci_dbg(3, "rsv fid:%x\n", zdev->fid); ++ /* ++ * The underlying device is gone. Allow the zdev to be freed ++ * as soon as all other references are gone by accounting for ++ * the removal as a dropped reference. ++ */ ++ zpci_zdev_put(zdev); ++ } ++ +++>>>>>>> 4b1815a52d7e (s390/pci: Allow re-add of a reserved but not yet removed device) + void zpci_release_device(struct kref *kref) + { + struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref); + + int ret; + +++<<<<<<< HEAD + + if (zdev->zbus->bus) + + zpci_bus_remove_device(zdev, false); +++======= ++ WARN_ON(zdev->state != ZPCI_FN_STATE_RESERVED); ++ /* ++ * We already hold zpci_list_lock thanks to kref_put_lock(). ++ * This makes sure no new reference can be taken from the list. ++ */ ++ list_del(&zdev->entry); ++ spin_unlock(&zpci_list_lock); +++>>>>>>> 4b1815a52d7e (s390/pci: Allow re-add of a reserved but not yet removed device) + + - if (zdev->has_hp_slot) + - zpci_exit_slot(zdev); + + if (zdev->dma_table) + + zpci_dma_exit_device(zdev); + + if (zdev_enabled(zdev)) + + zpci_disable_device(zdev); + + - if (zdev->has_resources) + - zpci_cleanup_bus_resources(zdev); + + switch (zdev->state) { + + case ZPCI_FN_STATE_CONFIGURED: + + ret = sclp_pci_deconfigure(zdev->fid); + + zpci_dbg(3, "deconf fid:%x, rc:%d\n", zdev->fid, ret); + + fallthrough; + + case ZPCI_FN_STATE_STANDBY: + + if (zdev->has_hp_slot) + + zpci_exit_slot(zdev); + + spin_lock(&zpci_list_lock); + + list_del(&zdev->entry); + + spin_unlock(&zpci_list_lock); + + zpci_dbg(3, "rsv fid:%x\n", zdev->fid); + + fallthrough; + + case ZPCI_FN_STATE_RESERVED: + + if (zdev->has_resources) + + zpci_cleanup_bus_resources(zdev); + + zpci_bus_device_unregister(zdev); + + zpci_destroy_iommu(zdev); + + /* fallthrough */ + + default: + + break; + + } + + - zpci_bus_device_unregister(zdev); + - zpci_destroy_iommu(zdev); + zpci_dbg(3, "rem fid:%x\n", zdev->fid); + - kfree_rcu(zdev, rcu); + + kfree(zdev); + } + + int zpci_report_error(struct pci_dev *pdev, +* Unmerged path arch/s390/pci/pci.c +diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h +index af9f0ac79a1b..3febb3b297c0 100644 +--- a/arch/s390/pci/pci_bus.h ++++ b/arch/s390/pci/pci_bus.h +@@ -17,11 +17,8 @@ int zpci_bus_scan_device(struct zpci_dev *zdev); + void zpci_bus_remove_device(struct zpci_dev *zdev, bool set_error); + + void zpci_release_device(struct kref *kref); +-static inline void zpci_zdev_put(struct zpci_dev *zdev) +-{ +- if (zdev) +- kref_put(&zdev->kref, zpci_release_device); +-} ++ ++void zpci_zdev_put(struct zpci_dev *zdev); + + static inline void zpci_zdev_get(struct zpci_dev *zdev) + { +diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c +index dcebe4118195..9491d3e7a312 100644 +--- a/arch/s390/pci/pci_event.c ++++ b/arch/s390/pci/pci_event.c +@@ -314,6 +314,22 @@ static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh) + zdev->state = ZPCI_FN_STATE_STANDBY; + } + ++static void zpci_event_reappear(struct zpci_dev *zdev) ++{ ++ lockdep_assert_held(&zdev->state_lock); ++ /* ++ * The zdev is in the reserved state. This means that it was presumed to ++ * go away but there are still undropped references. Now, the platform ++ * announced its availability again. Bring back the lingering zdev ++ * to standby. This is safe because we hold a temporary reference ++ * now so that it won't go away. Account for the re-appearance of the ++ * underlying device by incrementing the reference count. ++ */ ++ zdev->state = ZPCI_FN_STATE_STANDBY; ++ zpci_zdev_get(zdev); ++ zpci_dbg(1, "rea fid:%x, fh:%x\n", zdev->fid, zdev->fh); ++} ++ + static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) + { + struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); +@@ -333,8 +349,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) + break; + } + } else { ++ if (zdev->state == ZPCI_FN_STATE_RESERVED) ++ zpci_event_reappear(zdev); + /* the configuration request may be stale */ +- if (zdev->state != ZPCI_FN_STATE_STANDBY) ++ else if (zdev->state != ZPCI_FN_STATE_STANDBY) + break; + zdev->state = ZPCI_FN_STATE_CONFIGURED; + } +@@ -350,6 +368,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) + break; + } + } else { ++ if (zdev->state == ZPCI_FN_STATE_RESERVED) ++ zpci_event_reappear(zdev); + zpci_update_fh(zdev, ccdf->fh); + } + break; From 1e3532b209944b30fe3ce1091b6e4a7b3d6d5042 Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:56:52 -0400 Subject: [PATCH 13/53] s390/pci: Serialize device addition and removal jira LE-3467 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Niklas Schnelle commit 774a1fa880bc949d88b5ddec9494a13be733dfa8 Empty-Commit: Cherry-Pick Conflicts during history rebuild. Will be included in final tarball splat. Ref for failed cherry-pick at: ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/774a1fa8.failed Prior changes ensured that when zpci_release_device() is called and it removed the zdev from the zpci_list this instance can not be found via the zpci_list anymore even while allowing re-add of reserved devices. This only accounts for the overall lifetime and zpci_list addition and removal, it does not yet prevent concurrent add of a new instance for the same underlying device. Such concurrent add would subsequently cause issues such as attempted re-use of the same IOMMU sysfs directory and is generally undesired. Introduce a new zpci_add_remove_lock mutex to serialize adding a new device with removal. Together this ensures that if a struct zpci_dev is not found in the zpci_list it was either already removed and torn down, or its removal and tear down is in progress with the zpci_add_remove_lock held. Cc: stable@vger.kernel.org Fixes: a46044a92add ("s390/pci: fix zpci_zdev_put() on reserve") Reviewed-by: Gerd Bayer Tested-by: Gerd Bayer Signed-off-by: Niklas Schnelle Signed-off-by: Heiko Carstens (cherry picked from commit 774a1fa880bc949d88b5ddec9494a13be733dfa8) Signed-off-by: Jonathan Maple # Conflicts: # arch/s390/pci/pci.c --- .../774a1fa8.failed | 120 ++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/774a1fa8.failed diff --git a/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/774a1fa8.failed b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/774a1fa8.failed new file mode 100644 index 0000000000000..9ecb947af2b4e --- /dev/null +++ b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/774a1fa8.failed @@ -0,0 +1,120 @@ +s390/pci: Serialize device addition and removal + +jira LE-3467 +Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 +commit-author Niklas Schnelle +commit 774a1fa880bc949d88b5ddec9494a13be733dfa8 +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/774a1fa8.failed + +Prior changes ensured that when zpci_release_device() is called and it +removed the zdev from the zpci_list this instance can not be found via +the zpci_list anymore even while allowing re-add of reserved devices. +This only accounts for the overall lifetime and zpci_list addition and +removal, it does not yet prevent concurrent add of a new instance for +the same underlying device. Such concurrent add would subsequently cause +issues such as attempted re-use of the same IOMMU sysfs directory and is +generally undesired. + +Introduce a new zpci_add_remove_lock mutex to serialize adding a new +device with removal. Together this ensures that if a struct zpci_dev is +not found in the zpci_list it was either already removed and torn down, +or its removal and tear down is in progress with the +zpci_add_remove_lock held. + + Cc: stable@vger.kernel.org +Fixes: a46044a92add ("s390/pci: fix zpci_zdev_put() on reserve") + Reviewed-by: Gerd Bayer + Tested-by: Gerd Bayer + Signed-off-by: Niklas Schnelle + Signed-off-by: Heiko Carstens +(cherry picked from commit 774a1fa880bc949d88b5ddec9494a13be733dfa8) + Signed-off-by: Jonathan Maple + +# Conflicts: +# arch/s390/pci/pci.c +diff --cc arch/s390/pci/pci.c +index 661c4e000c5b,cd6676c2d602..000000000000 +--- a/arch/s390/pci/pci.c ++++ b/arch/s390/pci/pci.c +@@@ -67,6 -71,15 +68,18 @@@ EXPORT_SYMBOL_GPL(zpci_aipb) + struct airq_iv *zpci_aif_sbv; + EXPORT_SYMBOL_GPL(zpci_aif_sbv); + +++<<<<<<< HEAD +++======= ++ void zpci_zdev_put(struct zpci_dev *zdev) ++ { ++ if (!zdev) ++ return; ++ mutex_lock(&zpci_add_remove_lock); ++ kref_put_lock(&zdev->kref, zpci_release_device, &zpci_list_lock); ++ mutex_unlock(&zpci_add_remove_lock); ++ } ++ +++>>>>>>> 774a1fa880bc (s390/pci: Serialize device addition and removal) + struct zpci_dev *get_zdev_by_fid(u32 fid) + { + struct zpci_dev *tmp, *zdev = NULL; +@@@ -1023,41 -936,48 +1039,52 @@@ int zpci_deconfigure_device(struct zpci + void zpci_release_device(struct kref *kref) + { + struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref); + + int ret; + +++<<<<<<< HEAD + + if (zdev->zbus->bus) + + zpci_bus_remove_device(zdev, false); +++======= ++ lockdep_assert_held(&zpci_add_remove_lock); ++ WARN_ON(zdev->state != ZPCI_FN_STATE_RESERVED); ++ /* ++ * We already hold zpci_list_lock thanks to kref_put_lock(). ++ * This makes sure no new reference can be taken from the list. ++ */ ++ list_del(&zdev->entry); ++ spin_unlock(&zpci_list_lock); +++>>>>>>> 774a1fa880bc (s390/pci: Serialize device addition and removal) + + - if (zdev->has_hp_slot) + - zpci_exit_slot(zdev); + + if (zdev->dma_table) + + zpci_dma_exit_device(zdev); + + if (zdev_enabled(zdev)) + + zpci_disable_device(zdev); + + - if (zdev->has_resources) + - zpci_cleanup_bus_resources(zdev); + + switch (zdev->state) { + + case ZPCI_FN_STATE_CONFIGURED: + + ret = sclp_pci_deconfigure(zdev->fid); + + zpci_dbg(3, "deconf fid:%x, rc:%d\n", zdev->fid, ret); + + fallthrough; + + case ZPCI_FN_STATE_STANDBY: + + if (zdev->has_hp_slot) + + zpci_exit_slot(zdev); + + spin_lock(&zpci_list_lock); + + list_del(&zdev->entry); + + spin_unlock(&zpci_list_lock); + + zpci_dbg(3, "rsv fid:%x\n", zdev->fid); + + fallthrough; + + case ZPCI_FN_STATE_RESERVED: + + if (zdev->has_resources) + + zpci_cleanup_bus_resources(zdev); + + zpci_bus_device_unregister(zdev); + + zpci_destroy_iommu(zdev); + + /* fallthrough */ + + default: + + break; + + } + + - zpci_bus_device_unregister(zdev); + - zpci_destroy_iommu(zdev); + zpci_dbg(3, "rem fid:%x\n", zdev->fid); + - kfree_rcu(zdev, rcu); + + kfree(zdev); + } + + int zpci_report_error(struct pci_dev *pdev, +* Unmerged path arch/s390/pci/pci.c From ac859789f23340674974c745c85a27a4679012df Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:56:52 -0400 Subject: [PATCH 14/53] idpf: make virtchnl2.h self-contained jira LE-3467 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Alexander Lobakin commit c00d33f1fc7958e6e7f461c994fa025aa2273c13 To ease maintaining of virtchnl2.h, which already is messy enough, make it self-contained by adding missing if_ether.h include due to %ETH_ALEN usage. At the same time, virtchnl2_lan_desc.h is not used anywhere in the file, so move this include to idpf_txrx.h to speed up C preprocessing. Acked-by: Kees Cook Acked-by: Gustavo A. R. Silva Signed-off-by: Alexander Lobakin Reviewed-by: Przemek Kitszel Link: https://lore.kernel.org/r/20240327142241.1745989-3-aleksander.lobakin@intel.com Signed-off-by: Jakub Kicinski (cherry picked from commit c00d33f1fc7958e6e7f461c994fa025aa2273c13) Signed-off-by: Jonathan Maple --- drivers/net/ethernet/intel/idpf/idpf_txrx.h | 2 ++ drivers/net/ethernet/intel/idpf/virtchnl2.h | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h index dca84031bcec5..06842178f741b 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h @@ -8,6 +8,8 @@ #include #include +#include "virtchnl2_lan_desc.h" + #define IDPF_LARGE_MAX_Q 256 #define IDPF_MAX_Q 16 #define IDPF_MIN_Q 2 diff --git a/drivers/net/ethernet/intel/idpf/virtchnl2.h b/drivers/net/ethernet/intel/idpf/virtchnl2.h index 4a3c4454d25ab..29419211b3d9c 100644 --- a/drivers/net/ethernet/intel/idpf/virtchnl2.h +++ b/drivers/net/ethernet/intel/idpf/virtchnl2.h @@ -4,6 +4,8 @@ #ifndef _VIRTCHNL2_H_ #define _VIRTCHNL2_H_ +#include + /* All opcodes associated with virtchnl2 are prefixed with virtchnl2 or * VIRTCHNL2. Any future opcodes, offloads/capabilities, structures, * and defines must be prefixed with virtchnl2 or VIRTCHNL2 to avoid confusion. @@ -17,8 +19,6 @@ * must remain unchanged over time, so we specify explicit values for all enums. */ -#include "virtchnl2_lan_desc.h" - /* This macro is used to generate compilation errors if a structure * is not exactly the correct length. */ From d6ef534f1350ba83475e332a9b7329196390e03d Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:56:52 -0400 Subject: [PATCH 15/53] idpf: Interpret .set_channels() input differently jira LE-3467 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Larysa Zaremba commit 5e7695e0219bf6acb96081af3ba0ca08b1829656 Unlike ice, idpf does not check, if user has requested at least 1 combined channel. Instead, it relies on a check in the core code. Unfortunately, the check does not trigger for us because of the hacky .set_channels() interpretation logic that is not consistent with the core code. This naturally leads to user being able to trigger a crash with an invalid input. This is how: 1. ethtool -l -> combined: 40 2. ethtool -L rx 0 tx 0 combined number is not specified, so command becomes {rx_count = 0, tx_count = 0, combined_count = 40}. 3. ethnl_set_channels checks, if there is at least 1 RX and 1 TX channel, comparing (combined_count + rx_count) and (combined_count + tx_count) to zero. Obviously, (40 + 0) is greater than zero, so the core code deems the input OK. 4. idpf interprets `rx 0 tx 0` as 0 channels and tries to proceed with such configuration. The issue has to be solved fundamentally, as current logic is also known to cause AF_XDP problems in ice [0]. Interpret the command in a way that is more consistent with ethtool manual [1] (--show-channels and --set-channels) and new ice logic. Considering that in the idpf driver only the difference between RX and TX queues forms dedicated channels, change the correct way to set number of channels to: ethtool -L combined 10 /* For symmetric queues */ ethtool -L combined 8 tx 2 rx 0 /* For asymmetric queues */ [0] https://lore.kernel.org/netdev/20240418095857.2827-1-larysa.zaremba@intel.com/ [1] https://man7.org/linux/man-pages/man8/ethtool.8.html Fixes: 02cbfba1add5 ("idpf: add ethtool callbacks") Reviewed-by: Przemek Kitszel Reviewed-by: Igor Bagnucki Signed-off-by: Larysa Zaremba Tested-by: Krishneil Singh Reviewed-by: Simon Horman Signed-off-by: Jacob Keller Signed-off-by: Paolo Abeni (cherry picked from commit 5e7695e0219bf6acb96081af3ba0ca08b1829656) Signed-off-by: Jonathan Maple --- .../net/ethernet/intel/idpf/idpf_ethtool.c | 21 ++++++------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c index 5e6777ea55ca6..bf40a9cce50ed 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c +++ b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c @@ -224,14 +224,19 @@ static int idpf_set_channels(struct net_device *netdev, struct ethtool_channels *ch) { struct idpf_vport_config *vport_config; - u16 combined, num_txq, num_rxq; unsigned int num_req_tx_q; unsigned int num_req_rx_q; struct idpf_vport *vport; + u16 num_txq, num_rxq; struct device *dev; int err = 0; u16 idx; + if (ch->rx_count && ch->tx_count) { + netdev_err(netdev, "Dedicated RX or TX channels cannot be used simultaneously\n"); + return -EINVAL; + } + idpf_vport_ctrl_lock(netdev); vport = idpf_netdev_to_vport(netdev); @@ -241,20 +246,6 @@ static int idpf_set_channels(struct net_device *netdev, num_txq = vport_config->user_config.num_req_tx_qs; num_rxq = vport_config->user_config.num_req_rx_qs; - combined = min(num_txq, num_rxq); - - /* these checks are for cases where user didn't specify a particular - * value on cmd line but we get non-zero value anyway via - * get_channels(); look at ethtool.c in ethtool repository (the user - * space part), particularly, do_schannels() routine - */ - if (ch->combined_count == combined) - ch->combined_count = 0; - if (ch->combined_count && ch->rx_count == num_rxq - combined) - ch->rx_count = 0; - if (ch->combined_count && ch->tx_count == num_txq - combined) - ch->tx_count = 0; - num_req_tx_q = ch->combined_count + ch->tx_count; num_req_rx_q = ch->combined_count + ch->rx_count; From 7863cd7ecc59df0bb3e4d8dffb868157c30a9c5a Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:56:53 -0400 Subject: [PATCH 16/53] idpf: don't enable NAPI and interrupts prior to allocating Rx buffers jira LE-3467 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Alexander Lobakin commit d514c8b54209de7a95ab37259fe32c7406976bd9 Currently, idpf enables NAPI and interrupts prior to allocating Rx buffers. This may lead to frame loss (there are no buffers to place incoming frames) and even crashes on quick ifup-ifdown. Interrupts must be enabled only after all the resources are here and available. Split interrupt init into two phases: initialization and enabling, and perform the second only after the queues are fully initialized. Note that we can't just move interrupt initialization down the init process, as the queues must have correct a ::q_vector pointer set and NAPI already added in order to allocate buffers correctly. Also, during the deinit process, disable HW interrupts first and only then disable NAPI. Otherwise, there can be a HW event leading to napi_schedule(), but the NAPI will already be unavailable. Fixes: d4d558718266 ("idpf: initialize interrupts and enable vport") Reported-by: Michal Kubiak Reviewed-by: Wojciech Drewek Signed-off-by: Alexander Lobakin Reviewed-by: Simon Horman Tested-by: Krishneil Singh Signed-off-by: Jacob Keller Link: https://lore.kernel.org/r/20240523-net-2024-05-23-intel-net-fixes-v1-1-17a923e0bb5f@intel.com Signed-off-by: Jakub Kicinski (cherry picked from commit d514c8b54209de7a95ab37259fe32c7406976bd9) Signed-off-by: Jonathan Maple --- drivers/net/ethernet/intel/idpf/idpf_lib.c | 1 + drivers/net/ethernet/intel/idpf/idpf_txrx.c | 12 +++++++----- drivers/net/ethernet/intel/idpf/idpf_txrx.h | 1 + 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 5d3532c27d57f..ae8a48c480708 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -1394,6 +1394,7 @@ static int idpf_vport_open(struct idpf_vport *vport, bool alloc_res) } idpf_rx_init_buf_tail(vport); + idpf_vport_intr_ena(vport); err = idpf_send_config_queues_msg(vport); if (err) { diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index fed316fe1a1b7..32617d5ca9299 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -3760,9 +3760,9 @@ static void idpf_vport_intr_ena_irq_all(struct idpf_vport *vport) */ void idpf_vport_intr_deinit(struct idpf_vport *vport) { + idpf_vport_intr_dis_irq_all(vport); idpf_vport_intr_napi_dis_all(vport); idpf_vport_intr_napi_del_all(vport); - idpf_vport_intr_dis_irq_all(vport); idpf_vport_intr_rel_irq(vport); } @@ -4197,7 +4197,6 @@ int idpf_vport_intr_init(struct idpf_vport *vport) idpf_vport_intr_map_vector_to_qs(vport); idpf_vport_intr_napi_add_all(vport); - idpf_vport_intr_napi_ena_all(vport); err = vport->adapter->dev_ops.reg_ops.intr_reg_init(vport); if (err) @@ -4211,17 +4210,20 @@ int idpf_vport_intr_init(struct idpf_vport *vport) if (err) goto unroll_vectors_alloc; - idpf_vport_intr_ena_irq_all(vport); - return 0; unroll_vectors_alloc: - idpf_vport_intr_napi_dis_all(vport); idpf_vport_intr_napi_del_all(vport); return err; } +void idpf_vport_intr_ena(struct idpf_vport *vport) +{ + idpf_vport_intr_napi_ena_all(vport); + idpf_vport_intr_ena_irq_all(vport); +} + /** * idpf_config_rss - Send virtchnl messages to configure RSS * @vport: virtual port diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h index 06842178f741b..2f7b297f5db45 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h @@ -1020,6 +1020,7 @@ int idpf_vport_intr_alloc(struct idpf_vport *vport); void idpf_vport_intr_update_itr_ena_irq(struct idpf_q_vector *q_vector); void idpf_vport_intr_deinit(struct idpf_vport *vport); int idpf_vport_intr_init(struct idpf_vport *vport); +void idpf_vport_intr_ena(struct idpf_vport *vport); enum pkt_hash_types idpf_ptype_to_htype(const struct idpf_rx_ptype_decoded *decoded); int idpf_config_rss(struct idpf_vport *vport); int idpf_init_rss(struct idpf_vport *vport); From 9e1a24d4919c9152dc37c947a7ff12509b38a578 Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:56:53 -0400 Subject: [PATCH 17/53] idpf: stop using macros for accessing queue descriptors jira LE-3467 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Alexander Lobakin commit 66c27e3b19d5aae58d7f0145113de61d6fba5e09 In C, we have structures and unions. Casting `void *` via macros is not only error-prone, but also looks confusing and awful in general. In preparation for splitting the queue structs, replace it with a union and direct array dereferences. Reviewed-by: Przemek Kitszel Reviewed-by: Mina Almasry Signed-off-by: Alexander Lobakin Signed-off-by: Tony Nguyen (cherry picked from commit 66c27e3b19d5aae58d7f0145113de61d6fba5e09) Signed-off-by: Jonathan Maple --- drivers/net/ethernet/intel/idpf/idpf.h | 1 - .../net/ethernet/intel/idpf/idpf_lan_txrx.h | 2 + .../ethernet/intel/idpf/idpf_singleq_txrx.c | 20 ++++---- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 32 ++++++------- drivers/net/ethernet/intel/idpf/idpf_txrx.h | 47 ++++++++++--------- 5 files changed, 52 insertions(+), 50 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h index 25cde08a355ae..8e39fae179a6a 100644 --- a/drivers/net/ethernet/intel/idpf/idpf.h +++ b/drivers/net/ethernet/intel/idpf/idpf.h @@ -20,7 +20,6 @@ struct idpf_vport_max_q; #include #include "virtchnl2.h" -#include "idpf_lan_txrx.h" #include "idpf_txrx.h" #include "idpf_controlq.h" diff --git a/drivers/net/ethernet/intel/idpf/idpf_lan_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_lan_txrx.h index a5752dcab8887..8c7f8ef8f1a15 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lan_txrx.h +++ b/drivers/net/ethernet/intel/idpf/idpf_lan_txrx.h @@ -4,6 +4,8 @@ #ifndef _IDPF_LAN_TXRX_H_ #define _IDPF_LAN_TXRX_H_ +#include + enum idpf_rss_hash { IDPF_HASH_INVALID = 0, /* Values 1 - 28 are reserved for future use */ diff --git a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c index fe39412fece7c..5c3d34d3de8ac 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c @@ -205,7 +205,7 @@ static void idpf_tx_singleq_map(struct idpf_queue *tx_q, data_len = skb->data_len; size = skb_headlen(skb); - tx_desc = IDPF_BASE_TX_DESC(tx_q, i); + tx_desc = &tx_q->base_tx[i]; dma = dma_map_single(tx_q->dev, skb->data, size, DMA_TO_DEVICE); @@ -239,7 +239,7 @@ static void idpf_tx_singleq_map(struct idpf_queue *tx_q, i++; if (i == tx_q->desc_count) { - tx_desc = IDPF_BASE_TX_DESC(tx_q, 0); + tx_desc = &tx_q->base_tx[0]; i = 0; } @@ -259,7 +259,7 @@ static void idpf_tx_singleq_map(struct idpf_queue *tx_q, i++; if (i == tx_q->desc_count) { - tx_desc = IDPF_BASE_TX_DESC(tx_q, 0); + tx_desc = &tx_q->base_tx[0]; i = 0; } @@ -307,7 +307,7 @@ idpf_tx_singleq_get_ctx_desc(struct idpf_queue *txq) memset(&txq->tx_buf[ntu], 0, sizeof(struct idpf_tx_buf)); txq->tx_buf[ntu].ctx_entry = true; - ctx_desc = IDPF_BASE_TX_CTX_DESC(txq, ntu); + ctx_desc = &txq->base_ctx[ntu]; IDPF_SINGLEQ_BUMP_RING_IDX(txq, ntu); txq->next_to_use = ntu; @@ -455,7 +455,7 @@ static bool idpf_tx_singleq_clean(struct idpf_queue *tx_q, int napi_budget, struct netdev_queue *nq; bool dont_wake; - tx_desc = IDPF_BASE_TX_DESC(tx_q, ntc); + tx_desc = &tx_q->base_tx[ntc]; tx_buf = &tx_q->tx_buf[ntc]; ntc -= tx_q->desc_count; @@ -517,7 +517,7 @@ static bool idpf_tx_singleq_clean(struct idpf_queue *tx_q, int napi_budget, if (unlikely(!ntc)) { ntc -= tx_q->desc_count; tx_buf = tx_q->tx_buf; - tx_desc = IDPF_BASE_TX_DESC(tx_q, 0); + tx_desc = &tx_q->base_tx[0]; } /* unmap any remaining paged data */ @@ -540,7 +540,7 @@ static bool idpf_tx_singleq_clean(struct idpf_queue *tx_q, int napi_budget, if (unlikely(!ntc)) { ntc -= tx_q->desc_count; tx_buf = tx_q->tx_buf; - tx_desc = IDPF_BASE_TX_DESC(tx_q, 0); + tx_desc = &tx_q->base_tx[0]; } } while (likely(budget)); @@ -895,7 +895,7 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rx_q, if (!cleaned_count) return false; - desc = IDPF_SINGLEQ_RX_BUF_DESC(rx_q, nta); + desc = &rx_q->single_buf[nta]; buf = &rx_q->rx_buf.buf[nta]; do { @@ -915,7 +915,7 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rx_q, buf++; nta++; if (unlikely(nta == rx_q->desc_count)) { - desc = IDPF_SINGLEQ_RX_BUF_DESC(rx_q, 0); + desc = &rx_q->single_buf[0]; buf = rx_q->rx_buf.buf; nta = 0; } @@ -1016,7 +1016,7 @@ static int idpf_rx_singleq_clean(struct idpf_queue *rx_q, int budget) struct idpf_rx_buf *rx_buf; /* get the Rx desc from Rx queue based on 'next_to_clean' */ - rx_desc = IDPF_RX_DESC(rx_q, ntc); + rx_desc = &rx_q->rx[ntc]; /* status_error_ptype_len will always be zero for unused * descriptors because it's cleared in cleanup, and overlaps diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 32617d5ca9299..4a7062c55fcb6 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -531,7 +531,7 @@ static bool idpf_rx_post_buf_desc(struct idpf_queue *bufq, u16 buf_id) struct idpf_rx_buf *buf; dma_addr_t addr; - splitq_rx_desc = IDPF_SPLITQ_RX_BUF_DESC(bufq, nta); + splitq_rx_desc = &bufq->split_buf[nta]; buf = &bufq->rx_buf.buf[buf_id]; if (bufq->rx_hsplit_en) { @@ -1586,7 +1586,7 @@ do { \ if (unlikely(!(ntc))) { \ ntc -= (txq)->desc_count; \ buf = (txq)->tx_buf; \ - desc = IDPF_FLEX_TX_DESC(txq, 0); \ + desc = &(txq)->flex_tx[0]; \ } else { \ (buf)++; \ (desc)++; \ @@ -1619,8 +1619,8 @@ static void idpf_tx_splitq_clean(struct idpf_queue *tx_q, u16 end, s16 ntc = tx_q->next_to_clean; struct idpf_tx_buf *tx_buf; - tx_desc = IDPF_FLEX_TX_DESC(tx_q, ntc); - next_pending_desc = IDPF_FLEX_TX_DESC(tx_q, end); + tx_desc = &tx_q->flex_tx[ntc]; + next_pending_desc = &tx_q->flex_tx[end]; tx_buf = &tx_q->tx_buf[ntc]; ntc -= tx_q->desc_count; @@ -1816,7 +1816,7 @@ static bool idpf_tx_clean_complq(struct idpf_queue *complq, int budget, int i; complq_budget = vport->compln_clean_budget; - tx_desc = IDPF_SPLITQ_TX_COMPLQ_DESC(complq, ntc); + tx_desc = &complq->comp[ntc]; ntc -= complq->desc_count; do { @@ -1881,7 +1881,7 @@ static bool idpf_tx_clean_complq(struct idpf_queue *complq, int budget, ntc++; if (unlikely(!ntc)) { ntc -= complq->desc_count; - tx_desc = IDPF_SPLITQ_TX_COMPLQ_DESC(complq, 0); + tx_desc = &complq->comp[0]; change_bit(__IDPF_Q_GEN_CHK, complq->flags); } @@ -2145,7 +2145,7 @@ void idpf_tx_dma_map_error(struct idpf_queue *txq, struct sk_buff *skb, * used one additional descriptor for a context * descriptor. Reset that here. */ - tx_desc = IDPF_FLEX_TX_DESC(txq, idx); + tx_desc = &txq->flex_tx[idx]; memset(tx_desc, 0, sizeof(struct idpf_flex_tx_ctx_desc)); if (idx == 0) idx = txq->desc_count; @@ -2204,7 +2204,7 @@ static void idpf_tx_splitq_map(struct idpf_queue *tx_q, data_len = skb->data_len; size = skb_headlen(skb); - tx_desc = IDPF_FLEX_TX_DESC(tx_q, i); + tx_desc = &tx_q->flex_tx[i]; dma = dma_map_single(tx_q->dev, skb->data, size, DMA_TO_DEVICE); @@ -2277,7 +2277,7 @@ static void idpf_tx_splitq_map(struct idpf_queue *tx_q, i++; if (i == tx_q->desc_count) { - tx_desc = IDPF_FLEX_TX_DESC(tx_q, 0); + tx_desc = &tx_q->flex_tx[0]; i = 0; tx_q->compl_tag_cur_gen = IDPF_TX_ADJ_COMPL_TAG_GEN(tx_q); @@ -2322,7 +2322,7 @@ static void idpf_tx_splitq_map(struct idpf_queue *tx_q, i++; if (i == tx_q->desc_count) { - tx_desc = IDPF_FLEX_TX_DESC(tx_q, 0); + tx_desc = &tx_q->flex_tx[0]; i = 0; tx_q->compl_tag_cur_gen = IDPF_TX_ADJ_COMPL_TAG_GEN(tx_q); } @@ -2555,7 +2555,7 @@ idpf_tx_splitq_get_ctx_desc(struct idpf_queue *txq) txq->tx_buf[i].compl_tag = IDPF_SPLITQ_TX_INVAL_COMPL_TAG; /* grab the next descriptor */ - desc = IDPF_FLEX_TX_CTX_DESC(txq, i); + desc = &txq->flex_ctx[i]; txq->next_to_use = idpf_tx_splitq_bump_ntu(txq, i); return desc; @@ -3130,7 +3130,6 @@ static int idpf_rx_splitq_clean(struct idpf_queue *rxq, int budget) struct idpf_sw_queue *refillq = NULL; struct idpf_rxq_set *rxq_set = NULL; struct idpf_rx_buf *rx_buf = NULL; - union virtchnl2_rx_desc *desc; unsigned int pkt_len = 0; unsigned int hdr_len = 0; u16 gen_id, buf_id = 0; @@ -3140,8 +3139,7 @@ static int idpf_rx_splitq_clean(struct idpf_queue *rxq, int budget) u8 rxdid; /* get the Rx desc from Rx queue based on 'next_to_clean' */ - desc = IDPF_RX_DESC(rxq, ntc); - rx_desc = (struct virtchnl2_rx_flex_desc_adv_nic_3 *)desc; + rx_desc = &rxq->rx[ntc].flex_adv_nic_3_wb; /* This memory barrier is needed to keep us from reading * any other fields out of the rx_desc @@ -3324,11 +3322,11 @@ static void idpf_rx_clean_refillq(struct idpf_queue *bufq, int cleaned = 0; u16 gen; - buf_desc = IDPF_SPLITQ_RX_BUF_DESC(bufq, bufq_nta); + buf_desc = &bufq->split_buf[bufq_nta]; /* make sure we stop at ring wrap in the unlikely case ring is full */ while (likely(cleaned < refillq->desc_count)) { - u16 refill_desc = IDPF_SPLITQ_RX_BI_DESC(refillq, ntc); + u16 refill_desc = refillq->ring[ntc]; bool failure; gen = FIELD_GET(IDPF_RX_BI_GEN_M, refill_desc); @@ -3346,7 +3344,7 @@ static void idpf_rx_clean_refillq(struct idpf_queue *bufq, } if (unlikely(++bufq_nta == bufq->desc_count)) { - buf_desc = IDPF_SPLITQ_RX_BUF_DESC(bufq, 0); + buf_desc = &bufq->split_buf[0]; bufq_nta = 0; } else { buf_desc++; diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h index 2f7b297f5db45..1669bf01ba1db 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h @@ -8,6 +8,7 @@ #include #include +#include "idpf_lan_txrx.h" #include "virtchnl2_lan_desc.h" #define IDPF_LARGE_MAX_Q 256 @@ -117,24 +118,6 @@ do { \ #define IDPF_RXD_EOF_SPLITQ VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_EOF_M #define IDPF_RXD_EOF_SINGLEQ VIRTCHNL2_RX_BASE_DESC_STATUS_EOF_M -#define IDPF_SINGLEQ_RX_BUF_DESC(rxq, i) \ - (&(((struct virtchnl2_singleq_rx_buf_desc *)((rxq)->desc_ring))[i])) -#define IDPF_SPLITQ_RX_BUF_DESC(rxq, i) \ - (&(((struct virtchnl2_splitq_rx_buf_desc *)((rxq)->desc_ring))[i])) -#define IDPF_SPLITQ_RX_BI_DESC(rxq, i) ((((rxq)->ring))[i]) - -#define IDPF_BASE_TX_DESC(txq, i) \ - (&(((struct idpf_base_tx_desc *)((txq)->desc_ring))[i])) -#define IDPF_BASE_TX_CTX_DESC(txq, i) \ - (&(((struct idpf_base_tx_ctx_desc *)((txq)->desc_ring))[i])) -#define IDPF_SPLITQ_TX_COMPLQ_DESC(txcq, i) \ - (&(((struct idpf_splitq_tx_compl_desc *)((txcq)->desc_ring))[i])) - -#define IDPF_FLEX_TX_DESC(txq, i) \ - (&(((union idpf_tx_flex_desc *)((txq)->desc_ring))[i])) -#define IDPF_FLEX_TX_CTX_DESC(txq, i) \ - (&(((struct idpf_flex_tx_ctx_desc *)((txq)->desc_ring))[i])) - #define IDPF_DESC_UNUSED(txq) \ ((((txq)->next_to_clean > (txq)->next_to_use) ? 0 : (txq)->desc_count) + \ (txq)->next_to_clean - (txq)->next_to_use - 1) @@ -317,8 +300,6 @@ struct idpf_rx_extracted { #define IDPF_RX_DMA_ATTR \ (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING) -#define IDPF_RX_DESC(rxq, i) \ - (&(((union virtchnl2_rx_desc *)((rxq)->desc_ring))[i])) struct idpf_rx_buf { struct page *page; @@ -668,7 +649,15 @@ union idpf_queue_stats { * @q_vector: Backreference to associated vector * @size: Length of descriptor ring in bytes * @dma: Physical address of ring - * @desc_ring: Descriptor ring memory + * @rx: universal receive descriptor array + * @single_buf: Rx buffer descriptor array in singleq + * @split_buf: Rx buffer descriptor array in splitq + * @base_tx: basic Tx descriptor array + * @base_ctx: basic Tx context descriptor array + * @flex_tx: flex Tx descriptor array + * @flex_ctx: flex Tx context descriptor array + * @comp: completion descriptor array + * @desc_ring: virtual descriptor ring address * @tx_max_bufs: Max buffers that can be transmitted with scatter-gather * @tx_min_pkt_len: Min supported packet length * @num_completions: Only relevant for TX completion queue. It tracks the @@ -746,7 +735,21 @@ struct idpf_queue { struct idpf_q_vector *q_vector; unsigned int size; dma_addr_t dma; - void *desc_ring; + union { + union virtchnl2_rx_desc *rx; + + struct virtchnl2_singleq_rx_buf_desc *single_buf; + struct virtchnl2_splitq_rx_buf_desc *split_buf; + + struct idpf_base_tx_desc *base_tx; + struct idpf_base_tx_ctx_desc *base_ctx; + union idpf_tx_flex_desc *flex_tx; + struct idpf_flex_tx_ctx_desc *flex_ctx; + + struct idpf_splitq_tx_compl_desc *comp; + + void *desc_ring; + }; u16 tx_max_bufs; u8 tx_min_pkt_len; From 85049fdedfd8d24bc7842d0dfc0c75f9788dc6db Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:56:54 -0400 Subject: [PATCH 18/53] net: remove gfp_mask from napi_alloc_skb() jira LE-3467 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 Rebuild_CHGLOG: - net: remove gfp_mask from napi_alloc_skb() [idpf] (Michal Schmidt) [RHEL-71182] Rebuild_FUZZ: 92.31% commit-author Jakub Kicinski commit 6e9b01909a811555ff3326cf80a5847169c57806 Empty-Commit: Cherry-Pick Conflicts during history rebuild. Will be included in final tarball splat. Ref for failed cherry-pick at: ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/6e9b0190.failed __napi_alloc_skb() is napi_alloc_skb() with the added flexibility of choosing gfp_mask. This is a NAPI function, so GFP_ATOMIC is implied. The only practical choice the caller has is whether to set __GFP_NOWARN. But that's a false choice, too, allocation failures in atomic context will happen, and printing warnings in logs, effectively for a packet drop, is both too much and very likely non-actionable. This leads me to a conclusion that most uses of napi_alloc_skb() are simply misguided, and should use __GFP_NOWARN in the first place. We also have a "standard" way of reporting allocation failures via the queue stat API (qstats::rx-alloc-fail). The direct motivation for this patch is that one of the drivers used at Meta calls napi_alloc_skb() (so prior to this patch without __GFP_NOWARN), and the resulting OOM warning is the top networking warning in our fleet. Reviewed-by: Alexander Lobakin Reviewed-by: Simon Horman Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240327040213.3153864-1-kuba@kernel.org Signed-off-by: Jakub Kicinski (cherry picked from commit 6e9b01909a811555ff3326cf80a5847169c57806) Signed-off-by: Jonathan Maple # Conflicts: # Documentation/translations/zh_CN/mm/page_frags.rst # Documentation/vm/page_frags.rst # drivers/net/ethernet/intel/ice/ice_txrx.c # drivers/net/ethernet/stmicro/stmmac/stmmac_main.c --- .../6e9b0190.failed | 779 ++++++++++++++++++ 1 file changed, 779 insertions(+) create mode 100644 ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/6e9b0190.failed diff --git a/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/6e9b0190.failed b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/6e9b0190.failed new file mode 100644 index 0000000000000..1bdbcac2a650a --- /dev/null +++ b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/6e9b0190.failed @@ -0,0 +1,779 @@ +net: remove gfp_mask from napi_alloc_skb() + +jira LE-3467 +Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 +Rebuild_CHGLOG: - net: remove gfp_mask from napi_alloc_skb() [idpf] (Michal Schmidt) [RHEL-71182] +Rebuild_FUZZ: 92.31% +commit-author Jakub Kicinski +commit 6e9b01909a811555ff3326cf80a5847169c57806 +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/6e9b0190.failed + +__napi_alloc_skb() is napi_alloc_skb() with the added flexibility +of choosing gfp_mask. This is a NAPI function, so GFP_ATOMIC is +implied. The only practical choice the caller has is whether to +set __GFP_NOWARN. But that's a false choice, too, allocation failures +in atomic context will happen, and printing warnings in logs, +effectively for a packet drop, is both too much and very likely +non-actionable. + +This leads me to a conclusion that most uses of napi_alloc_skb() +are simply misguided, and should use __GFP_NOWARN in the first +place. We also have a "standard" way of reporting allocation +failures via the queue stat API (qstats::rx-alloc-fail). + +The direct motivation for this patch is that one of the drivers +used at Meta calls napi_alloc_skb() (so prior to this patch without +__GFP_NOWARN), and the resulting OOM warning is the top networking +warning in our fleet. + + Reviewed-by: Alexander Lobakin + Reviewed-by: Simon Horman + Reviewed-by: Eric Dumazet +Link: https://lore.kernel.org/r/20240327040213.3153864-1-kuba@kernel.org + Signed-off-by: Jakub Kicinski +(cherry picked from commit 6e9b01909a811555ff3326cf80a5847169c57806) + Signed-off-by: Jonathan Maple + +# Conflicts: +# Documentation/translations/zh_CN/mm/page_frags.rst +# Documentation/vm/page_frags.rst +# drivers/net/ethernet/intel/ice/ice_txrx.c +# drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +diff --cc Documentation/vm/page_frags.rst +index 637cc49d1b2f,503ca6cdb804..000000000000 +--- a/Documentation/vm/page_frags.rst ++++ b/Documentation/vm/page_frags.rst +@@@ -26,8 -24,8 +26,13 @@@ to be disabled when executing the fragm + + The network stack uses two separate caches per CPU to handle fragment + allocation. The netdev_alloc_cache is used by callers making use of the +++<<<<<<< HEAD:Documentation/vm/page_frags.rst + +__netdev_alloc_frag and __netdev_alloc_skb calls. The napi_alloc_cache is + +used by callers of the __napi_alloc_frag and __napi_alloc_skb calls. The +++======= ++ netdev_alloc_frag and __netdev_alloc_skb calls. The napi_alloc_cache is ++ used by callers of the __napi_alloc_frag and napi_alloc_skb calls. The +++>>>>>>> 6e9b01909a81 (net: remove gfp_mask from napi_alloc_skb()):Documentation/mm/page_frags.rst + main difference between these two calls is the context in which they may be + called. The "netdev" prefixed functions are usable in any context as these + functions will disable interrupts, while the "napi" prefixed functions are +diff --cc drivers/net/ethernet/intel/ice/ice_txrx.c +index b62c095891da,8bb743f78fcb..000000000000 +--- a/drivers/net/ethernet/intel/ice/ice_txrx.c ++++ b/drivers/net/ethernet/intel/ice/ice_txrx.c +@@@ -990,12 -1043,15 +990,16 @@@ ice_construct_skb(struct ice_rx_ring *r + struct sk_buff *skb; + + /* prefetch first cache line of first page */ + - net_prefetch(xdp->data); + - + - if (unlikely(xdp_buff_has_frags(xdp))) { + - sinfo = xdp_get_shared_info_from_buff(xdp); + - nr_frags = sinfo->nr_frags; + - } + + net_prefetch(xdp->data_meta); + + /* allocate a skb to store the frags */ +++<<<<<<< HEAD + + skb = __napi_alloc_skb(&rx_ring->q_vector->napi, + + ICE_RX_HDR_SIZE + metasize, + + GFP_ATOMIC | __GFP_NOWARN); +++======= ++ skb = napi_alloc_skb(&rx_ring->q_vector->napi, ICE_RX_HDR_SIZE); +++>>>>>>> 6e9b01909a81 (net: remove gfp_mask from napi_alloc_skb()) + if (unlikely(!skb)) + return NULL; + +diff --cc drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +index 34009ae26aed,bcdde68a099a..000000000000 +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +@@@ -4144,6 -4911,513 +4144,516 @@@ static unsigned int stmmac_rx_buf2_len( + return plen - len; + } + +++<<<<<<< HEAD +++======= ++ static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue, ++ struct xdp_frame *xdpf, bool dma_map) ++ { ++ struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[queue]; ++ struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[queue]; ++ unsigned int entry = tx_q->cur_tx; ++ struct dma_desc *tx_desc; ++ dma_addr_t dma_addr; ++ bool set_ic; ++ ++ if (stmmac_tx_avail(priv, queue) < STMMAC_TX_THRESH(priv)) ++ return STMMAC_XDP_CONSUMED; ++ ++ if (priv->plat->est && priv->plat->est->enable && ++ priv->plat->est->max_sdu[queue] && ++ xdpf->len > priv->plat->est->max_sdu[queue]) { ++ priv->xstats.max_sdu_txq_drop[queue]++; ++ return STMMAC_XDP_CONSUMED; ++ } ++ ++ if (likely(priv->extend_desc)) ++ tx_desc = (struct dma_desc *)(tx_q->dma_etx + entry); ++ else if (tx_q->tbs & STMMAC_TBS_AVAIL) ++ tx_desc = &tx_q->dma_entx[entry].basic; ++ else ++ tx_desc = tx_q->dma_tx + entry; ++ ++ if (dma_map) { ++ dma_addr = dma_map_single(priv->device, xdpf->data, ++ xdpf->len, DMA_TO_DEVICE); ++ if (dma_mapping_error(priv->device, dma_addr)) ++ return STMMAC_XDP_CONSUMED; ++ ++ tx_q->tx_skbuff_dma[entry].buf_type = STMMAC_TXBUF_T_XDP_NDO; ++ } else { ++ struct page *page = virt_to_page(xdpf->data); ++ ++ dma_addr = page_pool_get_dma_addr(page) + sizeof(*xdpf) + ++ xdpf->headroom; ++ dma_sync_single_for_device(priv->device, dma_addr, ++ xdpf->len, DMA_BIDIRECTIONAL); ++ ++ tx_q->tx_skbuff_dma[entry].buf_type = STMMAC_TXBUF_T_XDP_TX; ++ } ++ ++ tx_q->tx_skbuff_dma[entry].buf = dma_addr; ++ tx_q->tx_skbuff_dma[entry].map_as_page = false; ++ tx_q->tx_skbuff_dma[entry].len = xdpf->len; ++ tx_q->tx_skbuff_dma[entry].last_segment = true; ++ tx_q->tx_skbuff_dma[entry].is_jumbo = false; ++ ++ tx_q->xdpf[entry] = xdpf; ++ ++ stmmac_set_desc_addr(priv, tx_desc, dma_addr); ++ ++ stmmac_prepare_tx_desc(priv, tx_desc, 1, xdpf->len, ++ true, priv->mode, true, true, ++ xdpf->len); ++ ++ tx_q->tx_count_frames++; ++ ++ if (tx_q->tx_count_frames % priv->tx_coal_frames[queue] == 0) ++ set_ic = true; ++ else ++ set_ic = false; ++ ++ if (set_ic) { ++ tx_q->tx_count_frames = 0; ++ stmmac_set_tx_ic(priv, tx_desc); ++ u64_stats_update_begin(&txq_stats->q_syncp); ++ u64_stats_inc(&txq_stats->q.tx_set_ic_bit); ++ u64_stats_update_end(&txq_stats->q_syncp); ++ } ++ ++ stmmac_enable_dma_transmission(priv, priv->ioaddr); ++ ++ entry = STMMAC_GET_ENTRY(entry, priv->dma_conf.dma_tx_size); ++ tx_q->cur_tx = entry; ++ ++ return STMMAC_XDP_TX; ++ } ++ ++ static int stmmac_xdp_get_tx_queue(struct stmmac_priv *priv, ++ int cpu) ++ { ++ int index = cpu; ++ ++ if (unlikely(index < 0)) ++ index = 0; ++ ++ while (index >= priv->plat->tx_queues_to_use) ++ index -= priv->plat->tx_queues_to_use; ++ ++ return index; ++ } ++ ++ static int stmmac_xdp_xmit_back(struct stmmac_priv *priv, ++ struct xdp_buff *xdp) ++ { ++ struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp); ++ int cpu = smp_processor_id(); ++ struct netdev_queue *nq; ++ int queue; ++ int res; ++ ++ if (unlikely(!xdpf)) ++ return STMMAC_XDP_CONSUMED; ++ ++ queue = stmmac_xdp_get_tx_queue(priv, cpu); ++ nq = netdev_get_tx_queue(priv->dev, queue); ++ ++ __netif_tx_lock(nq, cpu); ++ /* Avoids TX time-out as we are sharing with slow path */ ++ txq_trans_cond_update(nq); ++ ++ res = stmmac_xdp_xmit_xdpf(priv, queue, xdpf, false); ++ if (res == STMMAC_XDP_TX) ++ stmmac_flush_tx_descriptors(priv, queue); ++ ++ __netif_tx_unlock(nq); ++ ++ return res; ++ } ++ ++ static int __stmmac_xdp_run_prog(struct stmmac_priv *priv, ++ struct bpf_prog *prog, ++ struct xdp_buff *xdp) ++ { ++ u32 act; ++ int res; ++ ++ act = bpf_prog_run_xdp(prog, xdp); ++ switch (act) { ++ case XDP_PASS: ++ res = STMMAC_XDP_PASS; ++ break; ++ case XDP_TX: ++ res = stmmac_xdp_xmit_back(priv, xdp); ++ break; ++ case XDP_REDIRECT: ++ if (xdp_do_redirect(priv->dev, xdp, prog) < 0) ++ res = STMMAC_XDP_CONSUMED; ++ else ++ res = STMMAC_XDP_REDIRECT; ++ break; ++ default: ++ bpf_warn_invalid_xdp_action(priv->dev, prog, act); ++ fallthrough; ++ case XDP_ABORTED: ++ trace_xdp_exception(priv->dev, prog, act); ++ fallthrough; ++ case XDP_DROP: ++ res = STMMAC_XDP_CONSUMED; ++ break; ++ } ++ ++ return res; ++ } ++ ++ static struct sk_buff *stmmac_xdp_run_prog(struct stmmac_priv *priv, ++ struct xdp_buff *xdp) ++ { ++ struct bpf_prog *prog; ++ int res; ++ ++ prog = READ_ONCE(priv->xdp_prog); ++ if (!prog) { ++ res = STMMAC_XDP_PASS; ++ goto out; ++ } ++ ++ res = __stmmac_xdp_run_prog(priv, prog, xdp); ++ out: ++ return ERR_PTR(-res); ++ } ++ ++ static void stmmac_finalize_xdp_rx(struct stmmac_priv *priv, ++ int xdp_status) ++ { ++ int cpu = smp_processor_id(); ++ int queue; ++ ++ queue = stmmac_xdp_get_tx_queue(priv, cpu); ++ ++ if (xdp_status & STMMAC_XDP_TX) ++ stmmac_tx_timer_arm(priv, queue); ++ ++ if (xdp_status & STMMAC_XDP_REDIRECT) ++ xdp_do_flush(); ++ } ++ ++ static struct sk_buff *stmmac_construct_skb_zc(struct stmmac_channel *ch, ++ struct xdp_buff *xdp) ++ { ++ unsigned int metasize = xdp->data - xdp->data_meta; ++ unsigned int datasize = xdp->data_end - xdp->data; ++ struct sk_buff *skb; ++ ++ skb = napi_alloc_skb(&ch->rxtx_napi, ++ xdp->data_end - xdp->data_hard_start); ++ if (unlikely(!skb)) ++ return NULL; ++ ++ skb_reserve(skb, xdp->data - xdp->data_hard_start); ++ memcpy(__skb_put(skb, datasize), xdp->data, datasize); ++ if (metasize) ++ skb_metadata_set(skb, metasize); ++ ++ return skb; ++ } ++ ++ static void stmmac_dispatch_skb_zc(struct stmmac_priv *priv, u32 queue, ++ struct dma_desc *p, struct dma_desc *np, ++ struct xdp_buff *xdp) ++ { ++ struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[queue]; ++ struct stmmac_channel *ch = &priv->channel[queue]; ++ unsigned int len = xdp->data_end - xdp->data; ++ enum pkt_hash_types hash_type; ++ int coe = priv->hw->rx_csum; ++ struct sk_buff *skb; ++ u32 hash; ++ ++ skb = stmmac_construct_skb_zc(ch, xdp); ++ if (!skb) { ++ priv->xstats.rx_dropped++; ++ return; ++ } ++ ++ stmmac_get_rx_hwtstamp(priv, p, np, skb); ++ if (priv->hw->hw_vlan_en) ++ /* MAC level stripping. */ ++ stmmac_rx_hw_vlan(priv, priv->hw, p, skb); ++ else ++ /* Driver level stripping. */ ++ stmmac_rx_vlan(priv->dev, skb); ++ skb->protocol = eth_type_trans(skb, priv->dev); ++ ++ if (unlikely(!coe) || !stmmac_has_ip_ethertype(skb)) ++ skb_checksum_none_assert(skb); ++ else ++ skb->ip_summed = CHECKSUM_UNNECESSARY; ++ ++ if (!stmmac_get_rx_hash(priv, p, &hash, &hash_type)) ++ skb_set_hash(skb, hash, hash_type); ++ ++ skb_record_rx_queue(skb, queue); ++ napi_gro_receive(&ch->rxtx_napi, skb); ++ ++ u64_stats_update_begin(&rxq_stats->napi_syncp); ++ u64_stats_inc(&rxq_stats->napi.rx_pkt_n); ++ u64_stats_add(&rxq_stats->napi.rx_bytes, len); ++ u64_stats_update_end(&rxq_stats->napi_syncp); ++ } ++ ++ static bool stmmac_rx_refill_zc(struct stmmac_priv *priv, u32 queue, u32 budget) ++ { ++ struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[queue]; ++ unsigned int entry = rx_q->dirty_rx; ++ struct dma_desc *rx_desc = NULL; ++ bool ret = true; ++ ++ budget = min(budget, stmmac_rx_dirty(priv, queue)); ++ ++ while (budget-- > 0 && entry != rx_q->cur_rx) { ++ struct stmmac_rx_buffer *buf = &rx_q->buf_pool[entry]; ++ dma_addr_t dma_addr; ++ bool use_rx_wd; ++ ++ if (!buf->xdp) { ++ buf->xdp = xsk_buff_alloc(rx_q->xsk_pool); ++ if (!buf->xdp) { ++ ret = false; ++ break; ++ } ++ } ++ ++ if (priv->extend_desc) ++ rx_desc = (struct dma_desc *)(rx_q->dma_erx + entry); ++ else ++ rx_desc = rx_q->dma_rx + entry; ++ ++ dma_addr = xsk_buff_xdp_get_dma(buf->xdp); ++ stmmac_set_desc_addr(priv, rx_desc, dma_addr); ++ stmmac_set_desc_sec_addr(priv, rx_desc, 0, false); ++ stmmac_refill_desc3(priv, rx_q, rx_desc); ++ ++ rx_q->rx_count_frames++; ++ rx_q->rx_count_frames += priv->rx_coal_frames[queue]; ++ if (rx_q->rx_count_frames > priv->rx_coal_frames[queue]) ++ rx_q->rx_count_frames = 0; ++ ++ use_rx_wd = !priv->rx_coal_frames[queue]; ++ use_rx_wd |= rx_q->rx_count_frames > 0; ++ if (!priv->use_riwt) ++ use_rx_wd = false; ++ ++ dma_wmb(); ++ stmmac_set_rx_owner(priv, rx_desc, use_rx_wd); ++ ++ entry = STMMAC_GET_ENTRY(entry, priv->dma_conf.dma_rx_size); ++ } ++ ++ if (rx_desc) { ++ rx_q->dirty_rx = entry; ++ rx_q->rx_tail_addr = rx_q->dma_rx_phy + ++ (rx_q->dirty_rx * sizeof(struct dma_desc)); ++ stmmac_set_rx_tail_ptr(priv, priv->ioaddr, rx_q->rx_tail_addr, queue); ++ } ++ ++ return ret; ++ } ++ ++ static struct stmmac_xdp_buff *xsk_buff_to_stmmac_ctx(struct xdp_buff *xdp) ++ { ++ /* In XDP zero copy data path, xdp field in struct xdp_buff_xsk is used ++ * to represent incoming packet, whereas cb field in the same structure ++ * is used to store driver specific info. Thus, struct stmmac_xdp_buff ++ * is laid on top of xdp and cb fields of struct xdp_buff_xsk. ++ */ ++ return (struct stmmac_xdp_buff *)xdp; ++ } ++ ++ static int stmmac_rx_zc(struct stmmac_priv *priv, int limit, u32 queue) ++ { ++ struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[queue]; ++ struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[queue]; ++ unsigned int count = 0, error = 0, len = 0; ++ int dirty = stmmac_rx_dirty(priv, queue); ++ unsigned int next_entry = rx_q->cur_rx; ++ u32 rx_errors = 0, rx_dropped = 0; ++ unsigned int desc_size; ++ struct bpf_prog *prog; ++ bool failure = false; ++ int xdp_status = 0; ++ int status = 0; ++ ++ if (netif_msg_rx_status(priv)) { ++ void *rx_head; ++ ++ netdev_dbg(priv->dev, "%s: descriptor ring:\n", __func__); ++ if (priv->extend_desc) { ++ rx_head = (void *)rx_q->dma_erx; ++ desc_size = sizeof(struct dma_extended_desc); ++ } else { ++ rx_head = (void *)rx_q->dma_rx; ++ desc_size = sizeof(struct dma_desc); ++ } ++ ++ stmmac_display_ring(priv, rx_head, priv->dma_conf.dma_rx_size, true, ++ rx_q->dma_rx_phy, desc_size); ++ } ++ while (count < limit) { ++ struct stmmac_rx_buffer *buf; ++ struct stmmac_xdp_buff *ctx; ++ unsigned int buf1_len = 0; ++ struct dma_desc *np, *p; ++ int entry; ++ int res; ++ ++ if (!count && rx_q->state_saved) { ++ error = rx_q->state.error; ++ len = rx_q->state.len; ++ } else { ++ rx_q->state_saved = false; ++ error = 0; ++ len = 0; ++ } ++ ++ if (count >= limit) ++ break; ++ ++ read_again: ++ buf1_len = 0; ++ entry = next_entry; ++ buf = &rx_q->buf_pool[entry]; ++ ++ if (dirty >= STMMAC_RX_FILL_BATCH) { ++ failure = failure || ++ !stmmac_rx_refill_zc(priv, queue, dirty); ++ dirty = 0; ++ } ++ ++ if (priv->extend_desc) ++ p = (struct dma_desc *)(rx_q->dma_erx + entry); ++ else ++ p = rx_q->dma_rx + entry; ++ ++ /* read the status of the incoming frame */ ++ status = stmmac_rx_status(priv, &priv->xstats, p); ++ /* check if managed by the DMA otherwise go ahead */ ++ if (unlikely(status & dma_own)) ++ break; ++ ++ /* Prefetch the next RX descriptor */ ++ rx_q->cur_rx = STMMAC_GET_ENTRY(rx_q->cur_rx, ++ priv->dma_conf.dma_rx_size); ++ next_entry = rx_q->cur_rx; ++ ++ if (priv->extend_desc) ++ np = (struct dma_desc *)(rx_q->dma_erx + next_entry); ++ else ++ np = rx_q->dma_rx + next_entry; ++ ++ prefetch(np); ++ ++ /* Ensure a valid XSK buffer before proceed */ ++ if (!buf->xdp) ++ break; ++ ++ if (priv->extend_desc) ++ stmmac_rx_extended_status(priv, &priv->xstats, ++ rx_q->dma_erx + entry); ++ if (unlikely(status == discard_frame)) { ++ xsk_buff_free(buf->xdp); ++ buf->xdp = NULL; ++ dirty++; ++ error = 1; ++ if (!priv->hwts_rx_en) ++ rx_errors++; ++ } ++ ++ if (unlikely(error && (status & rx_not_ls))) ++ goto read_again; ++ if (unlikely(error)) { ++ count++; ++ continue; ++ } ++ ++ /* XSK pool expects RX frame 1:1 mapped to XSK buffer */ ++ if (likely(status & rx_not_ls)) { ++ xsk_buff_free(buf->xdp); ++ buf->xdp = NULL; ++ dirty++; ++ count++; ++ goto read_again; ++ } ++ ++ ctx = xsk_buff_to_stmmac_ctx(buf->xdp); ++ ctx->priv = priv; ++ ctx->desc = p; ++ ctx->ndesc = np; ++ ++ /* XDP ZC Frame only support primary buffers for now */ ++ buf1_len = stmmac_rx_buf1_len(priv, p, status, len); ++ len += buf1_len; ++ ++ /* ACS is disabled; strip manually. */ ++ if (likely(!(status & rx_not_ls))) { ++ buf1_len -= ETH_FCS_LEN; ++ len -= ETH_FCS_LEN; ++ } ++ ++ /* RX buffer is good and fit into a XSK pool buffer */ ++ buf->xdp->data_end = buf->xdp->data + buf1_len; ++ xsk_buff_dma_sync_for_cpu(buf->xdp, rx_q->xsk_pool); ++ ++ prog = READ_ONCE(priv->xdp_prog); ++ res = __stmmac_xdp_run_prog(priv, prog, buf->xdp); ++ ++ switch (res) { ++ case STMMAC_XDP_PASS: ++ stmmac_dispatch_skb_zc(priv, queue, p, np, buf->xdp); ++ xsk_buff_free(buf->xdp); ++ break; ++ case STMMAC_XDP_CONSUMED: ++ xsk_buff_free(buf->xdp); ++ rx_dropped++; ++ break; ++ case STMMAC_XDP_TX: ++ case STMMAC_XDP_REDIRECT: ++ xdp_status |= res; ++ break; ++ } ++ ++ buf->xdp = NULL; ++ dirty++; ++ count++; ++ } ++ ++ if (status & rx_not_ls) { ++ rx_q->state_saved = true; ++ rx_q->state.error = error; ++ rx_q->state.len = len; ++ } ++ ++ stmmac_finalize_xdp_rx(priv, xdp_status); ++ ++ u64_stats_update_begin(&rxq_stats->napi_syncp); ++ u64_stats_add(&rxq_stats->napi.rx_pkt_n, count); ++ u64_stats_update_end(&rxq_stats->napi_syncp); ++ ++ priv->xstats.rx_dropped += rx_dropped; ++ priv->xstats.rx_errors += rx_errors; ++ ++ if (xsk_uses_need_wakeup(rx_q->xsk_pool)) { ++ if (failure || stmmac_rx_dirty(priv, queue) > 0) ++ xsk_set_rx_need_wakeup(rx_q->xsk_pool); ++ else ++ xsk_clear_rx_need_wakeup(rx_q->xsk_pool); ++ ++ return (int)count; ++ } ++ ++ return failure ? limit : (int)count; ++ } ++ +++>>>>>>> 6e9b01909a81 (net: remove gfp_mask from napi_alloc_skb()) + /** + * stmmac_rx - manage the receive process + * @priv: driver private structure +* Unmerged path Documentation/translations/zh_CN/mm/page_frags.rst +* Unmerged path Documentation/translations/zh_CN/mm/page_frags.rst +* Unmerged path Documentation/vm/page_frags.rst +diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c +index 44c8a3b7fd14..674efe554b7b 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c ++++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c +@@ -2134,9 +2134,7 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, + */ + + /* allocate a skb to store the frags */ +- skb = __napi_alloc_skb(&rx_ring->q_vector->napi, +- I40E_RX_HDR_SIZE, +- GFP_ATOMIC | __GFP_NOWARN); ++ skb = napi_alloc_skb(&rx_ring->q_vector->napi, I40E_RX_HDR_SIZE); + if (unlikely(!skb)) + return NULL; + +diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c +index 51ce422d0e5d..9dfbba1dc8eb 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c ++++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c +@@ -302,8 +302,7 @@ static struct sk_buff *i40e_construct_skb_zc(struct i40e_ring *rx_ring, + net_prefetch(xdp->data_meta); + + /* allocate a skb to store the frags */ +- skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize, +- GFP_ATOMIC | __GFP_NOWARN); ++ skb = napi_alloc_skb(&rx_ring->q_vector->napi, totalsize); + if (unlikely(!skb)) + goto out; + +diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c +index d64c4997136b..ef37886b222a 100644 +--- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c ++++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c +@@ -1335,9 +1335,7 @@ static struct sk_buff *iavf_construct_skb(struct iavf_ring *rx_ring, + net_prefetch(va); + + /* allocate a skb to store the frags */ +- skb = __napi_alloc_skb(&rx_ring->q_vector->napi, +- IAVF_RX_HDR_SIZE, +- GFP_ATOMIC | __GFP_NOWARN); ++ skb = napi_alloc_skb(&rx_ring->q_vector->napi, IAVF_RX_HDR_SIZE); + if (unlikely(!skb)) + return NULL; + +* Unmerged path drivers/net/ethernet/intel/ice/ice_txrx.c +diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c +index 360de5b754a7..725c52d6f29e 100644 +--- a/drivers/net/ethernet/intel/ice/ice_xsk.c ++++ b/drivers/net/ethernet/intel/ice/ice_xsk.c +@@ -579,8 +579,7 @@ ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp) + + net_prefetch(xdp->data_meta); + +- skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize, +- GFP_ATOMIC | __GFP_NOWARN); ++ skb = napi_alloc_skb(&rx_ring->q_vector->napi, totalsize); + if (unlikely(!skb)) + return NULL; + +diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c +index 4a7062c55fcb..2fd55d454984 100644 +--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c ++++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c +@@ -3007,8 +3007,7 @@ struct sk_buff *idpf_rx_construct_skb(struct idpf_queue *rxq, + /* prefetch first cache line of first page */ + net_prefetch(va); + /* allocate a skb to store the frags */ +- skb = __napi_alloc_skb(&rxq->q_vector->napi, IDPF_RX_HDR_SIZE, +- GFP_ATOMIC); ++ skb = napi_alloc_skb(&rxq->q_vector->napi, IDPF_RX_HDR_SIZE); + if (unlikely(!skb)) { + idpf_rx_put_page(rx_buf); + +@@ -3062,7 +3061,7 @@ static struct sk_buff *idpf_rx_hdr_construct_skb(struct idpf_queue *rxq, + struct sk_buff *skb; + + /* allocate a skb to store the frags */ +- skb = __napi_alloc_skb(&rxq->q_vector->napi, size, GFP_ATOMIC); ++ skb = napi_alloc_skb(&rxq->q_vector->napi, size); + if (unlikely(!skb)) + return NULL; + +diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c +index eaa4005ef616..409b7f1eebad 100644 +--- a/drivers/net/ethernet/intel/igc/igc_main.c ++++ b/drivers/net/ethernet/intel/igc/igc_main.c +@@ -2681,8 +2681,7 @@ static struct sk_buff *igc_construct_skb_zc(struct igc_ring *ring, + + net_prefetch(xdp->data_meta); + +- skb = __napi_alloc_skb(&ring->q_vector->napi, totalsize, +- GFP_ATOMIC | __GFP_NOWARN); ++ skb = napi_alloc_skb(&ring->q_vector->napi, totalsize); + if (unlikely(!skb)) + return NULL; + +diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +index 1703c640a434..4a90a1380be4 100644 +--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c ++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +@@ -220,8 +220,7 @@ static struct sk_buff *ixgbe_construct_skb_zc(struct ixgbe_ring *rx_ring, + net_prefetch(xdp->data_meta); + + /* allocate a skb to store the frags */ +- skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize, +- GFP_ATOMIC | __GFP_NOWARN); ++ skb = napi_alloc_skb(&rx_ring->q_vector->napi, totalsize); + if (unlikely(!skb)) + return NULL; + +* Unmerged path drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h +index 197c9f9ded5f..d1c9fb9e898d 100644 +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -2975,13 +2975,7 @@ static inline void *napi_alloc_frag_align(unsigned int fragsz, + return __napi_alloc_frag_align(fragsz, -align); + } + +-struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, +- unsigned int length, gfp_t gfp_mask); +-static inline struct sk_buff *napi_alloc_skb(struct napi_struct *napi, +- unsigned int length) +-{ +- return __napi_alloc_skb(napi, length, GFP_ATOMIC); +-} ++struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int length); + void napi_consume_skb(struct sk_buff *skb, int budget); + + void napi_skb_free_stolen_head(struct sk_buff *skb); +diff --git a/net/core/skbuff.c b/net/core/skbuff.c +index c2c53c1c31aa..aca6313cb05d 100644 +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -605,10 +605,9 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, + EXPORT_SYMBOL(__netdev_alloc_skb); + + /** +- * __napi_alloc_skb - allocate skbuff for rx in a specific NAPI instance ++ * napi_alloc_skb - allocate skbuff for rx in a specific NAPI instance + * @napi: napi instance this buffer was allocated for + * @len: length to allocate +- * @gfp_mask: get_free_pages mask, passed to alloc_skb and alloc_pages + * + * Allocate a new sk_buff for use in NAPI receive. This buffer will + * attempt to allocate the head from a special reserved region used +@@ -617,9 +616,9 @@ EXPORT_SYMBOL(__netdev_alloc_skb); + * + * %NULL is returned if there is no free memory. + */ +-struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, +- gfp_t gfp_mask) ++struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len) + { ++ gfp_t gfp_mask = GFP_ATOMIC | __GFP_NOWARN; + struct napi_alloc_cache *nc; + struct sk_buff *skb; + bool pfmemalloc; +@@ -693,7 +692,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, + skb_fail: + return skb; + } +-EXPORT_SYMBOL(__napi_alloc_skb); ++EXPORT_SYMBOL(napi_alloc_skb); + + void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, + int size, unsigned int truesize) From c3c1c148c0314a3cff12616d167d70d744bf6549 Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:56:55 -0400 Subject: [PATCH 19/53] idpf: remove legacy Page Pool Ethtool stats jira LE-3467 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Alexander Lobakin commit 4309363f19598999b25a1e55fccf688daa4cc220 Empty-Commit: Cherry-Pick Conflicts during history rebuild. Will be included in final tarball splat. Ref for failed cherry-pick at: ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/4309363f.failed Page Pool Ethtool stats are deprecated since the Netlink Page Pool interface introduction. idpf receives big changes in Rx buffer management, including &page_pool layout, so keeping these deprecated stats does only harm, not speaking of that CONFIG_IDPF selects CONFIG_PAGE_POOL_STATS unconditionally, while the latter is often turned off for better performance. Remove all the references to PP stats from the Ethtool code. The stats are still available in their full via the generic Netlink interface. Reviewed-by: Przemek Kitszel Reviewed-by: Jacob Keller Signed-off-by: Alexander Lobakin Signed-off-by: Tony Nguyen (cherry picked from commit 4309363f19598999b25a1e55fccf688daa4cc220) Signed-off-by: Jonathan Maple # Conflicts: # drivers/net/ethernet/intel/idpf/Kconfig # drivers/net/ethernet/intel/idpf/idpf_ethtool.c --- .../4309363f.failed | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/4309363f.failed diff --git a/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/4309363f.failed b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/4309363f.failed new file mode 100644 index 0000000000000..87a0c8e965ffa --- /dev/null +++ b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/4309363f.failed @@ -0,0 +1,63 @@ +idpf: remove legacy Page Pool Ethtool stats + +jira LE-3467 +Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 +commit-author Alexander Lobakin +commit 4309363f19598999b25a1e55fccf688daa4cc220 +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/4309363f.failed + +Page Pool Ethtool stats are deprecated since the Netlink Page Pool +interface introduction. +idpf receives big changes in Rx buffer management, including &page_pool +layout, so keeping these deprecated stats does only harm, not speaking +of that CONFIG_IDPF selects CONFIG_PAGE_POOL_STATS unconditionally, +while the latter is often turned off for better performance. +Remove all the references to PP stats from the Ethtool code. The stats +are still available in their full via the generic Netlink interface. + + Reviewed-by: Przemek Kitszel + Reviewed-by: Jacob Keller + Signed-off-by: Alexander Lobakin + Signed-off-by: Tony Nguyen +(cherry picked from commit 4309363f19598999b25a1e55fccf688daa4cc220) + Signed-off-by: Jonathan Maple + +# Conflicts: +# drivers/net/ethernet/intel/idpf/Kconfig +# drivers/net/ethernet/intel/idpf/idpf_ethtool.c +diff --cc drivers/net/ethernet/intel/idpf/idpf_ethtool.c +index bf40a9cce50e,3806ddd3ce4a..000000000000 +--- a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c ++++ b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c +@@@ -616,10 -616,8 +615,13 @@@ static int idpf_get_sset_count(struct n + max_txq = vport_config->max_q.max_txq; + max_rxq = vport_config->max_q.max_rxq; + +- size = IDPF_PORT_STATS_LEN + (IDPF_TX_QUEUE_STATS_LEN * max_txq) + ++ return IDPF_PORT_STATS_LEN + (IDPF_TX_QUEUE_STATS_LEN * max_txq) + + (IDPF_RX_QUEUE_STATS_LEN * max_rxq); +++<<<<<<< HEAD + + + + return size; +++======= +++>>>>>>> 4309363f1959 (idpf: remove legacy Page Pool Ethtool stats) + } + + /** +@@@ -935,7 -938,7 +937,11 @@@ static void idpf_get_ethtool_stats(stru + if (!rxq) + idpf_add_empty_queue_stats(&data, qtype); + else +++<<<<<<< HEAD + + idpf_add_queue_stats(&data, rxq); +++======= ++ idpf_add_queue_stats(&data, rxq, qtype); +++>>>>>>> 4309363f1959 (idpf: remove legacy Page Pool Ethtool stats) + } + } + +* Unmerged path drivers/net/ethernet/intel/idpf/Kconfig +* Unmerged path drivers/net/ethernet/intel/idpf/Kconfig +* Unmerged path drivers/net/ethernet/intel/idpf/idpf_ethtool.c From 27f5fbf59f2f5f3b620a4e8d375a41ca0ba653f3 Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:56:55 -0400 Subject: [PATCH 20/53] idpf: split &idpf_queue into 4 strictly-typed queue structures jira LE-3467 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Alexander Lobakin commit e4891e4687c8dd136d80d6c1b857a02931ed6fc8 Empty-Commit: Cherry-Pick Conflicts during history rebuild. Will be included in final tarball splat. Ref for failed cherry-pick at: ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/e4891e46.failed Currently, sizeof(struct idpf_queue) is 32 Kb. This is due to the 12-bit hashtable declaration at the end of the queue. This HT is needed only for Tx queues when the flow scheduling mode is enabled. But &idpf_queue is unified for all of the queue types, provoking excessive memory usage. The unified structure in general makes the code less effective via suboptimal fields placement. You can't avoid that unless you make unions each 2 fields. Even then, different field alignment etc., doesn't allow you to optimize things to the limit. Split &idpf_queue into 4 structures corresponding to the queue types: RQ (Rx queue), SQ (Tx queue), FQ (buffer queue), and CQ (completion queue). Place only needed fields there and shortcuts handy for hotpath. Allocate the abovementioned hashtable dynamically and only when needed, keeping &idpf_tx_queue relatively short (192 bytes, same as Rx). This HT is used only for OOO completions, which aren't really hotpath anyway. Note that this change must be done atomically, otherwise it's really easy to get lost and miss something. Signed-off-by: Alexander Lobakin Signed-off-by: Tony Nguyen (cherry picked from commit e4891e4687c8dd136d80d6c1b857a02931ed6fc8) Signed-off-by: Jonathan Maple # Conflicts: # drivers/net/ethernet/intel/idpf/idpf.h # drivers/net/ethernet/intel/idpf/idpf_ethtool.c # drivers/net/ethernet/intel/idpf/idpf_txrx.c # drivers/net/ethernet/intel/idpf/idpf_txrx.h --- .../e4891e46.failed | 946 ++++++++++++++++++ 1 file changed, 946 insertions(+) create mode 100644 ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/e4891e46.failed diff --git a/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/e4891e46.failed b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/e4891e46.failed new file mode 100644 index 0000000000000..9eb9cca097db4 --- /dev/null +++ b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/e4891e46.failed @@ -0,0 +1,946 @@ +idpf: split &idpf_queue into 4 strictly-typed queue structures + +jira LE-3467 +Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 +commit-author Alexander Lobakin +commit e4891e4687c8dd136d80d6c1b857a02931ed6fc8 +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/e4891e46.failed + +Currently, sizeof(struct idpf_queue) is 32 Kb. +This is due to the 12-bit hashtable declaration at the end of the queue. +This HT is needed only for Tx queues when the flow scheduling mode is +enabled. But &idpf_queue is unified for all of the queue types, +provoking excessive memory usage. +The unified structure in general makes the code less effective via +suboptimal fields placement. You can't avoid that unless you make unions +each 2 fields. Even then, different field alignment etc., doesn't allow +you to optimize things to the limit. +Split &idpf_queue into 4 structures corresponding to the queue types: +RQ (Rx queue), SQ (Tx queue), FQ (buffer queue), and CQ (completion +queue). Place only needed fields there and shortcuts handy for hotpath. +Allocate the abovementioned hashtable dynamically and only when needed, +keeping &idpf_tx_queue relatively short (192 bytes, same as Rx). This HT +is used only for OOO completions, which aren't really hotpath anyway. +Note that this change must be done atomically, otherwise it's really +easy to get lost and miss something. + + Signed-off-by: Alexander Lobakin + Signed-off-by: Tony Nguyen +(cherry picked from commit e4891e4687c8dd136d80d6c1b857a02931ed6fc8) + Signed-off-by: Jonathan Maple + +# Conflicts: +# drivers/net/ethernet/intel/idpf/idpf.h +# drivers/net/ethernet/intel/idpf/idpf_ethtool.c +# drivers/net/ethernet/intel/idpf/idpf_txrx.c +# drivers/net/ethernet/intel/idpf/idpf_txrx.h +diff --cc drivers/net/ethernet/intel/idpf/idpf.h +index 8e39fae179a6,f9e43d171f17..000000000000 +--- a/drivers/net/ethernet/intel/idpf/idpf.h ++++ b/drivers/net/ethernet/intel/idpf/idpf.h +@@@ -16,8 -16,7 +16,12 @@@ struct idpf_vport_max_q + #include + #include + #include +++<<<<<<< HEAD + +#include + +#include +++======= ++ #include +++>>>>>>> e4891e4687c8 (idpf: split &idpf_queue into 4 strictly-typed queue structures) + + #include "virtchnl2.h" + #include "idpf_txrx.h" +diff --cc drivers/net/ethernet/intel/idpf/idpf_ethtool.c +index bf40a9cce50e,e933fed16c7e..000000000000 +--- a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c ++++ b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c +@@@ -935,7 -945,25 +942,29 @@@ static void idpf_get_ethtool_stats(stru + if (!rxq) + idpf_add_empty_queue_stats(&data, qtype); + else +++<<<<<<< HEAD + + idpf_add_queue_stats(&data, rxq); +++======= ++ idpf_add_queue_stats(&data, rxq, qtype); ++ ++ /* In splitq mode, don't get page pool stats here since ++ * the pools are attached to the buffer queues ++ */ ++ if (is_splitq) ++ continue; ++ ++ if (rxq) ++ page_pool_get_stats(rxq->pp, &pp_stats); ++ } ++ } ++ ++ for (i = 0; i < vport->num_rxq_grp; i++) { ++ for (j = 0; j < vport->num_bufqs_per_qgrp; j++) { ++ struct idpf_buf_queue *rxbufq = ++ &vport->rxq_grps[i].splitq.bufq_sets[j].bufq; ++ ++ page_pool_get_stats(rxbufq->pp, &pp_stats); +++>>>>>>> e4891e4687c8 (idpf: split &idpf_queue into 4 strictly-typed queue structures) + } + } + +diff --cc drivers/net/ethernet/intel/idpf/idpf_txrx.c +index 4a7062c55fcb,7be5a723f558..000000000000 +--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c ++++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c +@@@ -3007,8 -3178,7 +3180,12 @@@ struct sk_buff *idpf_rx_construct_skb(c + /* prefetch first cache line of first page */ + net_prefetch(va); + /* allocate a skb to store the frags */ +++<<<<<<< HEAD + + skb = __napi_alloc_skb(&rxq->q_vector->napi, IDPF_RX_HDR_SIZE, + + GFP_ATOMIC); +++======= ++ skb = napi_alloc_skb(rxq->napi, IDPF_RX_HDR_SIZE); +++>>>>>>> e4891e4687c8 (idpf: split &idpf_queue into 4 strictly-typed queue structures) + if (unlikely(!skb)) { + idpf_rx_put_page(rx_buf); + +@@@ -3062,7 -3232,7 +3239,11 @@@ idpf_rx_hdr_construct_skb(const struct + struct sk_buff *skb; + + /* allocate a skb to store the frags */ +++<<<<<<< HEAD + + skb = __napi_alloc_skb(&rxq->q_vector->napi, size, GFP_ATOMIC); +++======= ++ skb = napi_alloc_skb(rxq->napi, size); +++>>>>>>> e4891e4687c8 (idpf: split &idpf_queue into 4 strictly-typed queue structures) + if (unlikely(!skb)) + return NULL; + +diff --cc drivers/net/ethernet/intel/idpf/idpf_txrx.h +index 1669bf01ba1d,704aec5c383b..000000000000 +--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h ++++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h +@@@ -4,7 -4,9 +4,13 @@@ + #ifndef _IDPF_TXRX_H_ + #define _IDPF_TXRX_H_ + +++<<<<<<< HEAD + +#include +++======= ++ #include ++ ++ #include +++>>>>>>> e4891e4687c8 (idpf: split &idpf_queue into 4 strictly-typed queue structures) + #include + #include + +@@@ -543,10 -553,9 +567,10 @@@ struct idpf_q_vector + u32 rx_itr_idx; + + u16 num_bufq; +- struct idpf_queue **bufq; ++ struct idpf_buf_queue **bufq; + + u16 total_events; + + bool wb_on_itr; + char *name; + }; + +* Unmerged path drivers/net/ethernet/intel/idpf/idpf.h +* Unmerged path drivers/net/ethernet/intel/idpf/idpf_ethtool.c +diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c +index ae8a48c48070..99db4f8c27c3 100644 +--- a/drivers/net/ethernet/intel/idpf/idpf_lib.c ++++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c +@@ -1318,14 +1318,14 @@ static void idpf_rx_init_buf_tail(struct idpf_vport *vport) + + if (idpf_is_queue_model_split(vport->rxq_model)) { + for (j = 0; j < vport->num_bufqs_per_qgrp; j++) { +- struct idpf_queue *q = ++ const struct idpf_buf_queue *q = + &grp->splitq.bufq_sets[j].bufq; + + writel(q->next_to_alloc, q->tail); + } + } else { + for (j = 0; j < grp->singleq.num_rxq; j++) { +- struct idpf_queue *q = ++ const struct idpf_rx_queue *q = + grp->singleq.rxqs[j]; + + writel(q->next_to_alloc, q->tail); +@@ -1855,7 +1855,7 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport, + enum idpf_vport_state current_state = np->state; + struct idpf_adapter *adapter = vport->adapter; + struct idpf_vport *new_vport; +- int err, i; ++ int err; + + /* If the system is low on memory, we can end up in bad state if we + * free all the memory for queue resources and try to allocate them +@@ -1929,46 +1929,6 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport, + */ + memcpy(vport, new_vport, offsetof(struct idpf_vport, link_speed_mbps)); + +- /* Since idpf_vport_queues_alloc was called with new_port, the queue +- * back pointers are currently pointing to the local new_vport. Reset +- * the backpointers to the original vport here +- */ +- for (i = 0; i < vport->num_txq_grp; i++) { +- struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; +- int j; +- +- tx_qgrp->vport = vport; +- for (j = 0; j < tx_qgrp->num_txq; j++) +- tx_qgrp->txqs[j]->vport = vport; +- +- if (idpf_is_queue_model_split(vport->txq_model)) +- tx_qgrp->complq->vport = vport; +- } +- +- for (i = 0; i < vport->num_rxq_grp; i++) { +- struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; +- struct idpf_queue *q; +- u16 num_rxq; +- int j; +- +- rx_qgrp->vport = vport; +- for (j = 0; j < vport->num_bufqs_per_qgrp; j++) +- rx_qgrp->splitq.bufq_sets[j].bufq.vport = vport; +- +- if (idpf_is_queue_model_split(vport->rxq_model)) +- num_rxq = rx_qgrp->splitq.num_rxq_sets; +- else +- num_rxq = rx_qgrp->singleq.num_rxq; +- +- for (j = 0; j < num_rxq; j++) { +- if (idpf_is_queue_model_split(vport->rxq_model)) +- q = &rx_qgrp->splitq.rxq_sets[j]->rxq; +- else +- q = rx_qgrp->singleq.rxqs[j]; +- q->vport = vport; +- } +- } +- + if (reset_cause == IDPF_SR_Q_CHANGE) + idpf_vport_alloc_vec_indexes(vport); + +diff --git a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c +index 5c3d34d3de8a..58510a95b163 100644 +--- a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c ++++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c +@@ -186,7 +186,7 @@ static int idpf_tx_singleq_csum(struct sk_buff *skb, + * and gets a physical address for each memory location and programs + * it and the length into the transmit base mode descriptor. + */ +-static void idpf_tx_singleq_map(struct idpf_queue *tx_q, ++static void idpf_tx_singleq_map(struct idpf_tx_queue *tx_q, + struct idpf_tx_buf *first, + struct idpf_tx_offload_params *offloads) + { +@@ -210,7 +210,7 @@ static void idpf_tx_singleq_map(struct idpf_queue *tx_q, + dma = dma_map_single(tx_q->dev, skb->data, size, DMA_TO_DEVICE); + + /* write each descriptor with CRC bit */ +- if (tx_q->vport->crc_enable) ++ if (idpf_queue_has(CRC_EN, tx_q)) + td_cmd |= IDPF_TX_DESC_CMD_ICRC; + + for (frag = &skb_shinfo(skb)->frags[0];; frag++) { +@@ -285,7 +285,7 @@ static void idpf_tx_singleq_map(struct idpf_queue *tx_q, + /* set next_to_watch value indicating a packet is present */ + first->next_to_watch = tx_desc; + +- nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx); ++ nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx); + netdev_tx_sent_queue(nq, first->bytecount); + + idpf_tx_buf_hw_update(tx_q, i, netdev_xmit_more()); +@@ -299,7 +299,7 @@ static void idpf_tx_singleq_map(struct idpf_queue *tx_q, + * ring entry to reflect that this index is a context descriptor + */ + static struct idpf_base_tx_ctx_desc * +-idpf_tx_singleq_get_ctx_desc(struct idpf_queue *txq) ++idpf_tx_singleq_get_ctx_desc(struct idpf_tx_queue *txq) + { + struct idpf_base_tx_ctx_desc *ctx_desc; + int ntu = txq->next_to_use; +@@ -320,7 +320,7 @@ idpf_tx_singleq_get_ctx_desc(struct idpf_queue *txq) + * @txq: queue to send buffer on + * @offload: offload parameter structure + **/ +-static void idpf_tx_singleq_build_ctx_desc(struct idpf_queue *txq, ++static void idpf_tx_singleq_build_ctx_desc(struct idpf_tx_queue *txq, + struct idpf_tx_offload_params *offload) + { + struct idpf_base_tx_ctx_desc *desc = idpf_tx_singleq_get_ctx_desc(txq); +@@ -333,7 +333,7 @@ static void idpf_tx_singleq_build_ctx_desc(struct idpf_queue *txq, + qw1 |= FIELD_PREP(IDPF_TXD_CTX_QW1_MSS_M, offload->mss); + + u64_stats_update_begin(&txq->stats_sync); +- u64_stats_inc(&txq->q_stats.tx.lso_pkts); ++ u64_stats_inc(&txq->q_stats.lso_pkts); + u64_stats_update_end(&txq->stats_sync); + } + +@@ -352,7 +352,7 @@ static void idpf_tx_singleq_build_ctx_desc(struct idpf_queue *txq, + * Returns NETDEV_TX_OK if sent, else an error code + */ + static netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, +- struct idpf_queue *tx_q) ++ struct idpf_tx_queue *tx_q) + { + struct idpf_tx_offload_params offload = { }; + struct idpf_tx_buf *first; +@@ -419,7 +419,7 @@ netdev_tx_t idpf_tx_singleq_start(struct sk_buff *skb, + struct net_device *netdev) + { + struct idpf_vport *vport = idpf_netdev_to_vport(netdev); +- struct idpf_queue *tx_q; ++ struct idpf_tx_queue *tx_q; + + tx_q = vport->txqs[skb_get_queue_mapping(skb)]; + +@@ -442,16 +442,15 @@ netdev_tx_t idpf_tx_singleq_start(struct sk_buff *skb, + * @cleaned: returns number of packets cleaned + * + */ +-static bool idpf_tx_singleq_clean(struct idpf_queue *tx_q, int napi_budget, ++static bool idpf_tx_singleq_clean(struct idpf_tx_queue *tx_q, int napi_budget, + int *cleaned) + { +- unsigned int budget = tx_q->vport->compln_clean_budget; + unsigned int total_bytes = 0, total_pkts = 0; + struct idpf_base_tx_desc *tx_desc; ++ u32 budget = tx_q->clean_budget; + s16 ntc = tx_q->next_to_clean; + struct idpf_netdev_priv *np; + struct idpf_tx_buf *tx_buf; +- struct idpf_vport *vport; + struct netdev_queue *nq; + bool dont_wake; + +@@ -550,16 +549,15 @@ static bool idpf_tx_singleq_clean(struct idpf_queue *tx_q, int napi_budget, + *cleaned += total_pkts; + + u64_stats_update_begin(&tx_q->stats_sync); +- u64_stats_add(&tx_q->q_stats.tx.packets, total_pkts); +- u64_stats_add(&tx_q->q_stats.tx.bytes, total_bytes); ++ u64_stats_add(&tx_q->q_stats.packets, total_pkts); ++ u64_stats_add(&tx_q->q_stats.bytes, total_bytes); + u64_stats_update_end(&tx_q->stats_sync); + +- vport = tx_q->vport; +- np = netdev_priv(vport->netdev); +- nq = netdev_get_tx_queue(vport->netdev, tx_q->idx); ++ np = netdev_priv(tx_q->netdev); ++ nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx); + + dont_wake = np->state != __IDPF_VPORT_UP || +- !netif_carrier_ok(vport->netdev); ++ !netif_carrier_ok(tx_q->netdev); + __netif_txq_completed_wake(nq, total_pkts, total_bytes, + IDPF_DESC_UNUSED(tx_q), IDPF_TX_WAKE_THRESH, + dont_wake); +@@ -584,7 +582,7 @@ static bool idpf_tx_singleq_clean_all(struct idpf_q_vector *q_vec, int budget, + + budget_per_q = num_txq ? max(budget / num_txq, 1) : 0; + for (i = 0; i < num_txq; i++) { +- struct idpf_queue *q; ++ struct idpf_tx_queue *q; + + q = q_vec->tx[i]; + clean_complete &= idpf_tx_singleq_clean(q, budget_per_q, +@@ -614,14 +612,9 @@ static bool idpf_rx_singleq_test_staterr(const union virtchnl2_rx_desc *rx_desc, + + /** + * idpf_rx_singleq_is_non_eop - process handling of non-EOP buffers +- * @rxq: Rx ring being processed + * @rx_desc: Rx descriptor for current buffer +- * @skb: Current socket buffer containing buffer in progress +- * @ntc: next to clean + */ +-static bool idpf_rx_singleq_is_non_eop(struct idpf_queue *rxq, +- union virtchnl2_rx_desc *rx_desc, +- struct sk_buff *skb, u16 ntc) ++static bool idpf_rx_singleq_is_non_eop(const union virtchnl2_rx_desc *rx_desc) + { + /* if we are the last buffer then there is nothing else to do */ + if (likely(idpf_rx_singleq_test_staterr(rx_desc, IDPF_RXD_EOF_SINGLEQ))) +@@ -639,7 +632,7 @@ static bool idpf_rx_singleq_is_non_eop(struct idpf_queue *rxq, + * + * skb->protocol must be set before this function is called + */ +-static void idpf_rx_singleq_csum(struct idpf_queue *rxq, struct sk_buff *skb, ++static void idpf_rx_singleq_csum(struct idpf_rx_queue *rxq, struct sk_buff *skb, + struct idpf_rx_csum_decoded *csum_bits, + u16 ptype) + { +@@ -647,14 +640,14 @@ static void idpf_rx_singleq_csum(struct idpf_queue *rxq, struct sk_buff *skb, + bool ipv4, ipv6; + + /* check if Rx checksum is enabled */ +- if (unlikely(!(rxq->vport->netdev->features & NETIF_F_RXCSUM))) ++ if (unlikely(!(rxq->netdev->features & NETIF_F_RXCSUM))) + return; + + /* check if HW has decoded the packet and checksum */ + if (unlikely(!(csum_bits->l3l4p))) + return; + +- decoded = rxq->vport->rx_ptype_lkup[ptype]; ++ decoded = rxq->rx_ptype_lkup[ptype]; + if (unlikely(!(decoded.known && decoded.outer_ip))) + return; + +@@ -707,7 +700,7 @@ static void idpf_rx_singleq_csum(struct idpf_queue *rxq, struct sk_buff *skb, + + checksum_fail: + u64_stats_update_begin(&rxq->stats_sync); +- u64_stats_inc(&rxq->q_stats.rx.hw_csum_err); ++ u64_stats_inc(&rxq->q_stats.hw_csum_err); + u64_stats_update_end(&rxq->stats_sync); + } + +@@ -721,9 +714,9 @@ static void idpf_rx_singleq_csum(struct idpf_queue *rxq, struct sk_buff *skb, + * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte + * descriptor writeback format. + **/ +-static void idpf_rx_singleq_base_csum(struct idpf_queue *rx_q, ++static void idpf_rx_singleq_base_csum(struct idpf_rx_queue *rx_q, + struct sk_buff *skb, +- union virtchnl2_rx_desc *rx_desc, ++ const union virtchnl2_rx_desc *rx_desc, + u16 ptype) + { + struct idpf_rx_csum_decoded csum_bits; +@@ -761,9 +754,9 @@ static void idpf_rx_singleq_base_csum(struct idpf_queue *rx_q, + * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible + * descriptor writeback format. + **/ +-static void idpf_rx_singleq_flex_csum(struct idpf_queue *rx_q, ++static void idpf_rx_singleq_flex_csum(struct idpf_rx_queue *rx_q, + struct sk_buff *skb, +- union virtchnl2_rx_desc *rx_desc, ++ const union virtchnl2_rx_desc *rx_desc, + u16 ptype) + { + struct idpf_rx_csum_decoded csum_bits; +@@ -801,14 +794,14 @@ static void idpf_rx_singleq_flex_csum(struct idpf_queue *rx_q, + * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte + * descriptor writeback format. + **/ +-static void idpf_rx_singleq_base_hash(struct idpf_queue *rx_q, ++static void idpf_rx_singleq_base_hash(struct idpf_rx_queue *rx_q, + struct sk_buff *skb, +- union virtchnl2_rx_desc *rx_desc, ++ const union virtchnl2_rx_desc *rx_desc, + struct idpf_rx_ptype_decoded *decoded) + { + u64 mask, qw1; + +- if (unlikely(!(rx_q->vport->netdev->features & NETIF_F_RXHASH))) ++ if (unlikely(!(rx_q->netdev->features & NETIF_F_RXHASH))) + return; + + mask = VIRTCHNL2_RX_BASE_DESC_FLTSTAT_RSS_HASH_M; +@@ -831,12 +824,12 @@ static void idpf_rx_singleq_base_hash(struct idpf_queue *rx_q, + * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible + * descriptor writeback format. + **/ +-static void idpf_rx_singleq_flex_hash(struct idpf_queue *rx_q, ++static void idpf_rx_singleq_flex_hash(struct idpf_rx_queue *rx_q, + struct sk_buff *skb, +- union virtchnl2_rx_desc *rx_desc, ++ const union virtchnl2_rx_desc *rx_desc, + struct idpf_rx_ptype_decoded *decoded) + { +- if (unlikely(!(rx_q->vport->netdev->features & NETIF_F_RXHASH))) ++ if (unlikely(!(rx_q->netdev->features & NETIF_F_RXHASH))) + return; + + if (FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_RSS_VALID_M, +@@ -857,16 +850,16 @@ static void idpf_rx_singleq_flex_hash(struct idpf_queue *rx_q, + * order to populate the hash, checksum, VLAN, protocol, and + * other fields within the skb. + */ +-static void idpf_rx_singleq_process_skb_fields(struct idpf_queue *rx_q, +- struct sk_buff *skb, +- union virtchnl2_rx_desc *rx_desc, +- u16 ptype) ++static void ++idpf_rx_singleq_process_skb_fields(struct idpf_rx_queue *rx_q, ++ struct sk_buff *skb, ++ const union virtchnl2_rx_desc *rx_desc, ++ u16 ptype) + { +- struct idpf_rx_ptype_decoded decoded = +- rx_q->vport->rx_ptype_lkup[ptype]; ++ struct idpf_rx_ptype_decoded decoded = rx_q->rx_ptype_lkup[ptype]; + + /* modifies the skb - consumes the enet header */ +- skb->protocol = eth_type_trans(skb, rx_q->vport->netdev); ++ skb->protocol = eth_type_trans(skb, rx_q->netdev); + + /* Check if we're using base mode descriptor IDs */ + if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M) { +@@ -878,6 +871,22 @@ static void idpf_rx_singleq_process_skb_fields(struct idpf_queue *rx_q, + } + } + ++/** ++ * idpf_rx_buf_hw_update - Store the new tail and head values ++ * @rxq: queue to bump ++ * @val: new head index ++ */ ++static void idpf_rx_buf_hw_update(struct idpf_rx_queue *rxq, u32 val) ++{ ++ rxq->next_to_use = val; ++ ++ if (unlikely(!rxq->tail)) ++ return; ++ ++ /* writel has an implicit memory barrier */ ++ writel(val, rxq->tail); ++} ++ + /** + * idpf_rx_singleq_buf_hw_alloc_all - Replace used receive buffers + * @rx_q: queue for which the hw buffers are allocated +@@ -885,7 +894,7 @@ static void idpf_rx_singleq_process_skb_fields(struct idpf_queue *rx_q, + * + * Returns false if all allocations were successful, true if any fail + */ +-bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rx_q, ++bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rx_q, + u16 cleaned_count) + { + struct virtchnl2_singleq_rx_buf_desc *desc; +@@ -896,7 +905,7 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rx_q, + return false; + + desc = &rx_q->single_buf[nta]; +- buf = &rx_q->rx_buf.buf[nta]; ++ buf = &rx_q->rx_buf[nta]; + + do { + dma_addr_t addr; +@@ -916,7 +925,7 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rx_q, + nta++; + if (unlikely(nta == rx_q->desc_count)) { + desc = &rx_q->single_buf[0]; +- buf = rx_q->rx_buf.buf; ++ buf = rx_q->rx_buf; + nta = 0; + } + +@@ -933,7 +942,6 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rx_q, + + /** + * idpf_rx_singleq_extract_base_fields - Extract fields from the Rx descriptor +- * @rx_q: Rx descriptor queue + * @rx_desc: the descriptor to process + * @fields: storage for extracted values + * +@@ -943,9 +951,9 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rx_q, + * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte + * descriptor writeback format. + */ +-static void idpf_rx_singleq_extract_base_fields(struct idpf_queue *rx_q, +- union virtchnl2_rx_desc *rx_desc, +- struct idpf_rx_extracted *fields) ++static void ++idpf_rx_singleq_extract_base_fields(const union virtchnl2_rx_desc *rx_desc, ++ struct idpf_rx_extracted *fields) + { + u64 qword; + +@@ -957,7 +965,6 @@ static void idpf_rx_singleq_extract_base_fields(struct idpf_queue *rx_q, + + /** + * idpf_rx_singleq_extract_flex_fields - Extract fields from the Rx descriptor +- * @rx_q: Rx descriptor queue + * @rx_desc: the descriptor to process + * @fields: storage for extracted values + * +@@ -967,9 +974,9 @@ static void idpf_rx_singleq_extract_base_fields(struct idpf_queue *rx_q, + * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible + * descriptor writeback format. + */ +-static void idpf_rx_singleq_extract_flex_fields(struct idpf_queue *rx_q, +- union virtchnl2_rx_desc *rx_desc, +- struct idpf_rx_extracted *fields) ++static void ++idpf_rx_singleq_extract_flex_fields(const union virtchnl2_rx_desc *rx_desc, ++ struct idpf_rx_extracted *fields) + { + fields->size = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_M, + le16_to_cpu(rx_desc->flex_nic_wb.pkt_len)); +@@ -984,14 +991,15 @@ static void idpf_rx_singleq_extract_flex_fields(struct idpf_queue *rx_q, + * @fields: storage for extracted values + * + */ +-static void idpf_rx_singleq_extract_fields(struct idpf_queue *rx_q, +- union virtchnl2_rx_desc *rx_desc, +- struct idpf_rx_extracted *fields) ++static void ++idpf_rx_singleq_extract_fields(const struct idpf_rx_queue *rx_q, ++ const union virtchnl2_rx_desc *rx_desc, ++ struct idpf_rx_extracted *fields) + { + if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M) +- idpf_rx_singleq_extract_base_fields(rx_q, rx_desc, fields); ++ idpf_rx_singleq_extract_base_fields(rx_desc, fields); + else +- idpf_rx_singleq_extract_flex_fields(rx_q, rx_desc, fields); ++ idpf_rx_singleq_extract_flex_fields(rx_desc, fields); + } + + /** +@@ -1001,7 +1009,7 @@ static void idpf_rx_singleq_extract_fields(struct idpf_queue *rx_q, + * + * Returns true if there's any budget left (e.g. the clean is finished) + */ +-static int idpf_rx_singleq_clean(struct idpf_queue *rx_q, int budget) ++static int idpf_rx_singleq_clean(struct idpf_rx_queue *rx_q, int budget) + { + unsigned int total_rx_bytes = 0, total_rx_pkts = 0; + struct sk_buff *skb = rx_q->skb; +@@ -1036,7 +1044,7 @@ static int idpf_rx_singleq_clean(struct idpf_queue *rx_q, int budget) + + idpf_rx_singleq_extract_fields(rx_q, rx_desc, &fields); + +- rx_buf = &rx_q->rx_buf.buf[ntc]; ++ rx_buf = &rx_q->rx_buf[ntc]; + if (!fields.size) { + idpf_rx_put_page(rx_buf); + goto skip_data; +@@ -1058,7 +1066,7 @@ static int idpf_rx_singleq_clean(struct idpf_queue *rx_q, int budget) + cleaned_count++; + + /* skip if it is non EOP desc */ +- if (idpf_rx_singleq_is_non_eop(rx_q, rx_desc, skb, ntc)) ++ if (idpf_rx_singleq_is_non_eop(rx_desc)) + continue; + + #define IDPF_RXD_ERR_S FIELD_PREP(VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M, \ +@@ -1084,7 +1092,7 @@ static int idpf_rx_singleq_clean(struct idpf_queue *rx_q, int budget) + rx_desc, fields.rx_ptype); + + /* send completed skb up the stack */ +- napi_gro_receive(&rx_q->q_vector->napi, skb); ++ napi_gro_receive(rx_q->pp->p.napi, skb); + skb = NULL; + + /* update budget accounting */ +@@ -1099,8 +1107,8 @@ static int idpf_rx_singleq_clean(struct idpf_queue *rx_q, int budget) + failure = idpf_rx_singleq_buf_hw_alloc_all(rx_q, cleaned_count); + + u64_stats_update_begin(&rx_q->stats_sync); +- u64_stats_add(&rx_q->q_stats.rx.packets, total_rx_pkts); +- u64_stats_add(&rx_q->q_stats.rx.bytes, total_rx_bytes); ++ u64_stats_add(&rx_q->q_stats.packets, total_rx_pkts); ++ u64_stats_add(&rx_q->q_stats.bytes, total_rx_bytes); + u64_stats_update_end(&rx_q->stats_sync); + + /* guarantee a trip back through this routine if there was a failure */ +@@ -1127,7 +1135,7 @@ static bool idpf_rx_singleq_clean_all(struct idpf_q_vector *q_vec, int budget, + */ + budget_per_q = num_rxq ? max(budget / num_rxq, 1) : 0; + for (i = 0; i < num_rxq; i++) { +- struct idpf_queue *rxq = q_vec->rx[i]; ++ struct idpf_rx_queue *rxq = q_vec->rx[i]; + int pkts_cleaned_per_q; + + pkts_cleaned_per_q = idpf_rx_singleq_clean(rxq, budget_per_q); +* Unmerged path drivers/net/ethernet/intel/idpf/idpf_txrx.c +* Unmerged path drivers/net/ethernet/intel/idpf/idpf_txrx.h +diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c +index 3b849b668ede..8f51d7213ccb 100644 +--- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c ++++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c +@@ -761,7 +761,7 @@ static int idpf_wait_for_marker_event(struct idpf_vport *vport) + int i; + + for (i = 0; i < vport->num_txq; i++) +- set_bit(__IDPF_Q_SW_MARKER, vport->txqs[i]->flags); ++ idpf_queue_set(SW_MARKER, vport->txqs[i]); + + event = wait_event_timeout(vport->sw_marker_wq, + test_and_clear_bit(IDPF_VPORT_SW_MARKER, +@@ -769,7 +769,7 @@ static int idpf_wait_for_marker_event(struct idpf_vport *vport) + msecs_to_jiffies(500)); + + for (i = 0; i < vport->num_txq; i++) +- clear_bit(__IDPF_Q_POLL_MODE, vport->txqs[i]->flags); ++ idpf_queue_clear(POLL_MODE, vport->txqs[i]); + + if (event) + return 0; +@@ -1103,7 +1103,6 @@ static int __idpf_queue_reg_init(struct idpf_vport *vport, u32 *reg_vals, + int num_regs, u32 q_type) + { + struct idpf_adapter *adapter = vport->adapter; +- struct idpf_queue *q; + int i, j, k = 0; + + switch (q_type) { +@@ -1122,6 +1121,8 @@ static int __idpf_queue_reg_init(struct idpf_vport *vport, u32 *reg_vals, + u16 num_rxq = rx_qgrp->singleq.num_rxq; + + for (j = 0; j < num_rxq && k < num_regs; j++, k++) { ++ struct idpf_rx_queue *q; ++ + q = rx_qgrp->singleq.rxqs[j]; + q->tail = idpf_get_reg_addr(adapter, + reg_vals[k]); +@@ -1134,6 +1135,8 @@ static int __idpf_queue_reg_init(struct idpf_vport *vport, u32 *reg_vals, + u8 num_bufqs = vport->num_bufqs_per_qgrp; + + for (j = 0; j < num_bufqs && k < num_regs; j++, k++) { ++ struct idpf_buf_queue *q; ++ + q = &rx_qgrp->splitq.bufq_sets[j].bufq; + q->tail = idpf_get_reg_addr(adapter, + reg_vals[k]); +@@ -1460,19 +1463,19 @@ static int idpf_send_config_tx_queues_msg(struct idpf_vport *vport) + qi[k].model = + cpu_to_le16(vport->txq_model); + qi[k].type = +- cpu_to_le32(tx_qgrp->txqs[j]->q_type); ++ cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_TX); + qi[k].ring_len = + cpu_to_le16(tx_qgrp->txqs[j]->desc_count); + qi[k].dma_ring_addr = + cpu_to_le64(tx_qgrp->txqs[j]->dma); + if (idpf_is_queue_model_split(vport->txq_model)) { +- struct idpf_queue *q = tx_qgrp->txqs[j]; ++ struct idpf_tx_queue *q = tx_qgrp->txqs[j]; + + qi[k].tx_compl_queue_id = + cpu_to_le16(tx_qgrp->complq->q_id); + qi[k].relative_queue_id = cpu_to_le16(j); + +- if (test_bit(__IDPF_Q_FLOW_SCH_EN, q->flags)) ++ if (idpf_queue_has(FLOW_SCH_EN, q)) + qi[k].sched_mode = + cpu_to_le16(VIRTCHNL2_TXQ_SCHED_MODE_FLOW); + else +@@ -1489,11 +1492,11 @@ static int idpf_send_config_tx_queues_msg(struct idpf_vport *vport) + + qi[k].queue_id = cpu_to_le32(tx_qgrp->complq->q_id); + qi[k].model = cpu_to_le16(vport->txq_model); +- qi[k].type = cpu_to_le32(tx_qgrp->complq->q_type); ++ qi[k].type = cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION); + qi[k].ring_len = cpu_to_le16(tx_qgrp->complq->desc_count); + qi[k].dma_ring_addr = cpu_to_le64(tx_qgrp->complq->dma); + +- if (test_bit(__IDPF_Q_FLOW_SCH_EN, tx_qgrp->complq->flags)) ++ if (idpf_queue_has(FLOW_SCH_EN, tx_qgrp->complq)) + sched_mode = VIRTCHNL2_TXQ_SCHED_MODE_FLOW; + else + sched_mode = VIRTCHNL2_TXQ_SCHED_MODE_QUEUE; +@@ -1578,17 +1581,18 @@ static int idpf_send_config_rx_queues_msg(struct idpf_vport *vport) + goto setup_rxqs; + + for (j = 0; j < vport->num_bufqs_per_qgrp; j++, k++) { +- struct idpf_queue *bufq = ++ struct idpf_buf_queue *bufq = + &rx_qgrp->splitq.bufq_sets[j].bufq; + + qi[k].queue_id = cpu_to_le32(bufq->q_id); + qi[k].model = cpu_to_le16(vport->rxq_model); +- qi[k].type = cpu_to_le32(bufq->q_type); ++ qi[k].type = ++ cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX_BUFFER); + qi[k].desc_ids = cpu_to_le64(VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M); + qi[k].ring_len = cpu_to_le16(bufq->desc_count); + qi[k].dma_ring_addr = cpu_to_le64(bufq->dma); + qi[k].data_buffer_size = cpu_to_le32(bufq->rx_buf_size); +- qi[k].buffer_notif_stride = bufq->rx_buf_stride; ++ qi[k].buffer_notif_stride = IDPF_RX_BUF_STRIDE; + qi[k].rx_buffer_low_watermark = + cpu_to_le16(bufq->rx_buffer_low_watermark); + if (idpf_is_feature_ena(vport, NETIF_F_GRO_HW)) +@@ -1602,7 +1606,7 @@ static int idpf_send_config_rx_queues_msg(struct idpf_vport *vport) + num_rxq = rx_qgrp->singleq.num_rxq; + + for (j = 0; j < num_rxq; j++, k++) { +- struct idpf_queue *rxq; ++ struct idpf_rx_queue *rxq; + + if (!idpf_is_queue_model_split(vport->rxq_model)) { + rxq = rx_qgrp->singleq.rxqs[j]; +@@ -1610,11 +1614,11 @@ static int idpf_send_config_rx_queues_msg(struct idpf_vport *vport) + } + rxq = &rx_qgrp->splitq.rxq_sets[j]->rxq; + qi[k].rx_bufq1_id = +- cpu_to_le16(rxq->rxq_grp->splitq.bufq_sets[0].bufq.q_id); ++ cpu_to_le16(rxq->bufq_sets[0].bufq.q_id); + if (vport->num_bufqs_per_qgrp > IDPF_SINGLE_BUFQ_PER_RXQ_GRP) { + qi[k].bufq2_ena = IDPF_BUFQ2_ENA; + qi[k].rx_bufq2_id = +- cpu_to_le16(rxq->rxq_grp->splitq.bufq_sets[1].bufq.q_id); ++ cpu_to_le16(rxq->bufq_sets[1].bufq.q_id); + } + qi[k].rx_buffer_low_watermark = + cpu_to_le16(rxq->rx_buffer_low_watermark); +@@ -1622,7 +1626,7 @@ static int idpf_send_config_rx_queues_msg(struct idpf_vport *vport) + qi[k].qflags |= cpu_to_le16(VIRTCHNL2_RXQ_RSC); + + common_qi_fields: +- if (rxq->rx_hsplit_en) { ++ if (idpf_queue_has(HSPLIT_EN, rxq)) { + qi[k].qflags |= + cpu_to_le16(VIRTCHNL2_RXQ_HDR_SPLIT); + qi[k].hdr_buffer_size = +@@ -1630,7 +1634,7 @@ static int idpf_send_config_rx_queues_msg(struct idpf_vport *vport) + } + qi[k].queue_id = cpu_to_le32(rxq->q_id); + qi[k].model = cpu_to_le16(vport->rxq_model); +- qi[k].type = cpu_to_le32(rxq->q_type); ++ qi[k].type = cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX); + qi[k].ring_len = cpu_to_le16(rxq->desc_count); + qi[k].dma_ring_addr = cpu_to_le64(rxq->dma); + qi[k].max_pkt_size = cpu_to_le32(rxq->rx_max_pkt_size); +@@ -1717,7 +1721,7 @@ static int idpf_send_ena_dis_queues_msg(struct idpf_vport *vport, bool ena) + struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; + + for (j = 0; j < tx_qgrp->num_txq; j++, k++) { +- qc[k].type = cpu_to_le32(tx_qgrp->txqs[j]->q_type); ++ qc[k].type = cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_TX); + qc[k].start_queue_id = cpu_to_le32(tx_qgrp->txqs[j]->q_id); + qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK); + } +@@ -1731,7 +1735,7 @@ static int idpf_send_ena_dis_queues_msg(struct idpf_vport *vport, bool ena) + for (i = 0; i < vport->num_txq_grp; i++, k++) { + struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; + +- qc[k].type = cpu_to_le32(tx_qgrp->complq->q_type); ++ qc[k].type = cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION); + qc[k].start_queue_id = cpu_to_le32(tx_qgrp->complq->q_id); + qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK); + } +@@ -1752,12 +1756,12 @@ static int idpf_send_ena_dis_queues_msg(struct idpf_vport *vport, bool ena) + qc[k].start_queue_id = + cpu_to_le32(rx_qgrp->splitq.rxq_sets[j]->rxq.q_id); + qc[k].type = +- cpu_to_le32(rx_qgrp->splitq.rxq_sets[j]->rxq.q_type); ++ cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX); + } else { + qc[k].start_queue_id = + cpu_to_le32(rx_qgrp->singleq.rxqs[j]->q_id); + qc[k].type = +- cpu_to_le32(rx_qgrp->singleq.rxqs[j]->q_type); ++ cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX); + } + qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK); + } +@@ -1772,10 +1776,11 @@ static int idpf_send_ena_dis_queues_msg(struct idpf_vport *vport, bool ena) + struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; + + for (j = 0; j < vport->num_bufqs_per_qgrp; j++, k++) { +- struct idpf_queue *q; ++ const struct idpf_buf_queue *q; + + q = &rx_qgrp->splitq.bufq_sets[j].bufq; +- qc[k].type = cpu_to_le32(q->q_type); ++ qc[k].type = ++ cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX_BUFFER); + qc[k].start_queue_id = cpu_to_le32(q->q_id); + qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK); + } +@@ -1860,7 +1865,8 @@ int idpf_send_map_unmap_queue_vector_msg(struct idpf_vport *vport, bool map) + struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; + + for (j = 0; j < tx_qgrp->num_txq; j++, k++) { +- vqv[k].queue_type = cpu_to_le32(tx_qgrp->txqs[j]->q_type); ++ vqv[k].queue_type = ++ cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_TX); + vqv[k].queue_id = cpu_to_le32(tx_qgrp->txqs[j]->q_id); + + if (idpf_is_queue_model_split(vport->txq_model)) { +@@ -1890,14 +1896,15 @@ int idpf_send_map_unmap_queue_vector_msg(struct idpf_vport *vport, bool map) + num_rxq = rx_qgrp->singleq.num_rxq; + + for (j = 0; j < num_rxq; j++, k++) { +- struct idpf_queue *rxq; ++ struct idpf_rx_queue *rxq; + + if (idpf_is_queue_model_split(vport->rxq_model)) + rxq = &rx_qgrp->splitq.rxq_sets[j]->rxq; + else + rxq = rx_qgrp->singleq.rxqs[j]; + +- vqv[k].queue_type = cpu_to_le32(rxq->q_type); ++ vqv[k].queue_type = ++ cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX); + vqv[k].queue_id = cpu_to_le32(rxq->q_id); + vqv[k].vector_id = cpu_to_le16(rxq->q_vector->v_idx); + vqv[k].itr_idx = cpu_to_le32(rxq->q_vector->rx_itr_idx); +@@ -1986,7 +1993,7 @@ int idpf_send_disable_queues_msg(struct idpf_vport *vport) + * queues virtchnl message is sent + */ + for (i = 0; i < vport->num_txq; i++) +- set_bit(__IDPF_Q_POLL_MODE, vport->txqs[i]->flags); ++ idpf_queue_set(POLL_MODE, vport->txqs[i]); + + /* schedule the napi to receive all the marker packets */ + local_bh_disable(); +@@ -3253,7 +3260,6 @@ static int __idpf_vport_queue_ids_init(struct idpf_vport *vport, + int num_qids, + u32 q_type) + { +- struct idpf_queue *q; + int i, j, k = 0; + + switch (q_type) { +@@ -3261,11 +3267,8 @@ static int __idpf_vport_queue_ids_init(struct idpf_vport *vport, + for (i = 0; i < vport->num_txq_grp; i++) { + struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; + +- for (j = 0; j < tx_qgrp->num_txq && k < num_qids; j++, k++) { ++ for (j = 0; j < tx_qgrp->num_txq && k < num_qids; j++, k++) + tx_qgrp->txqs[j]->q_id = qids[k]; +- tx_qgrp->txqs[j]->q_type = +- VIRTCHNL2_QUEUE_TYPE_TX; +- } + } + break; + case VIRTCHNL2_QUEUE_TYPE_RX: +@@ -3279,12 +3282,13 @@ static int __idpf_vport_queue_ids_init(struct idpf_vport *vport, + num_rxq = rx_qgrp->singleq.num_rxq; + + for (j = 0; j < num_rxq && k < num_qids; j++, k++) { ++ struct idpf_rx_queue *q; ++ + if (idpf_is_queue_model_split(vport->rxq_model)) + q = &rx_qgrp->splitq.rxq_sets[j]->rxq; + else + q = rx_qgrp->singleq.rxqs[j]; + q->q_id = qids[k]; +- q->q_type = VIRTCHNL2_QUEUE_TYPE_RX; + } + } + break; +@@ -3293,8 +3297,6 @@ static int __idpf_vport_queue_ids_init(struct idpf_vport *vport, + struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; + + tx_qgrp->complq->q_id = qids[k]; +- tx_qgrp->complq->q_type = +- VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION; + } + break; + case VIRTCHNL2_QUEUE_TYPE_RX_BUFFER: +@@ -3303,9 +3305,10 @@ static int __idpf_vport_queue_ids_init(struct idpf_vport *vport, + u8 num_bufqs = vport->num_bufqs_per_qgrp; + + for (j = 0; j < num_bufqs && k < num_qids; j++, k++) { ++ struct idpf_buf_queue *q; ++ + q = &rx_qgrp->splitq.bufq_sets[j].bufq; + q->q_id = qids[k]; +- q->q_type = VIRTCHNL2_QUEUE_TYPE_RX_BUFFER; + } + } + break; From 5e7e55713773821c30a0458a2d7977bde806152b Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:56:56 -0400 Subject: [PATCH 21/53] idpf: avoid bloating &idpf_q_vector with big %NR_CPUS jira LE-3467 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Alexander Lobakin commit bf9bf7042a38ebd2485592467772db50605bd4a2 Empty-Commit: Cherry-Pick Conflicts during history rebuild. Will be included in final tarball splat. Ref for failed cherry-pick at: ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/bf9bf704.failed With CONFIG_MAXSMP, sizeof(cpumask_t) is 1 Kb. The queue vector structure has them embedded, which means 1 additional Kb of not really hotpath data. We have cpumask_var_t, which is either an embedded cpumask or a pointer for allocating it dynamically when it's big. Use it instead of plain cpumasks and put &idpf_q_vector on a good diet. Also remove redundant pointer to the interrupt name from the structure. request_irq() saves it and free_irq() returns it on deinit, so that you can free the memory. Reviewed-by: Przemek Kitszel Signed-off-by: Alexander Lobakin Signed-off-by: Tony Nguyen (cherry picked from commit bf9bf7042a38ebd2485592467772db50605bd4a2) Signed-off-by: Jonathan Maple # Conflicts: # drivers/net/ethernet/intel/idpf/idpf_txrx.c # drivers/net/ethernet/intel/idpf/idpf_txrx.h --- .../bf9bf704.failed | 136 ++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/bf9bf704.failed diff --git a/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/bf9bf704.failed b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/bf9bf704.failed new file mode 100644 index 0000000000000..2c7fca3a53062 --- /dev/null +++ b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/bf9bf704.failed @@ -0,0 +1,136 @@ +idpf: avoid bloating &idpf_q_vector with big %NR_CPUS + +jira LE-3467 +Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 +commit-author Alexander Lobakin +commit bf9bf7042a38ebd2485592467772db50605bd4a2 +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/bf9bf704.failed + +With CONFIG_MAXSMP, sizeof(cpumask_t) is 1 Kb. The queue vector +structure has them embedded, which means 1 additional Kb of not +really hotpath data. +We have cpumask_var_t, which is either an embedded cpumask or a pointer +for allocating it dynamically when it's big. Use it instead of plain +cpumasks and put &idpf_q_vector on a good diet. +Also remove redundant pointer to the interrupt name from the structure. +request_irq() saves it and free_irq() returns it on deinit, so that you +can free the memory. + + Reviewed-by: Przemek Kitszel + Signed-off-by: Alexander Lobakin + Signed-off-by: Tony Nguyen +(cherry picked from commit bf9bf7042a38ebd2485592467772db50605bd4a2) + Signed-off-by: Jonathan Maple + +# Conflicts: +# drivers/net/ethernet/intel/idpf/idpf_txrx.c +# drivers/net/ethernet/intel/idpf/idpf_txrx.h +diff --cc drivers/net/ethernet/intel/idpf/idpf_txrx.c +index 4a7062c55fcb,f569ea389b04..000000000000 +--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c ++++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c +@@@ -4142,21 -4302,18 +4145,28 @@@ int idpf_vport_intr_alloc(struct idpf_v + q_vector->rx_intr_mode = IDPF_ITR_DYNAMIC; + q_vector->rx_itr_idx = VIRTCHNL2_ITR_IDX_0; + +++<<<<<<< HEAD + + q_vector->tx = kcalloc(txqs_per_vector, + + sizeof(struct idpf_queue *), +++======= ++ if (!zalloc_cpumask_var(&q_vector->affinity_mask, GFP_KERNEL)) ++ goto error; ++ ++ q_vector->tx = kcalloc(txqs_per_vector, sizeof(*q_vector->tx), +++>>>>>>> bf9bf7042a38 (idpf: avoid bloating &idpf_q_vector with big %NR_CPUS) + GFP_KERNEL); + - if (!q_vector->tx) + + if (!q_vector->tx) { + + err = -ENOMEM; + goto error; + + } + + - q_vector->rx = kcalloc(rxqs_per_vector, sizeof(*q_vector->rx), + + q_vector->rx = kcalloc(rxqs_per_vector, + + sizeof(struct idpf_queue *), + GFP_KERNEL); + - if (!q_vector->rx) + + if (!q_vector->rx) { + + err = -ENOMEM; + goto error; + + } + + if (!idpf_is_queue_model_split(vport->rxq_model)) + continue; +diff --cc drivers/net/ethernet/intel/idpf/idpf_txrx.h +index 1669bf01ba1d,5daa8f905f86..000000000000 +--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h ++++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h +@@@ -518,8 -525,7 +517,12 @@@ struct idpf_intr_reg + * @num_bufq: Number of buffer queues + * @bufq: Array of buffer queues to service + * @total_events: Number of interrupts processed +++<<<<<<< HEAD + + * @wb_on_itr: whether WB on ITR is enabled + + * @name: Queue vector name +++======= ++ * @affinity_mask: CPU affinity mask +++>>>>>>> bf9bf7042a38 (idpf: avoid bloating &idpf_q_vector with big %NR_CPUS) + */ + struct idpf_q_vector { + struct idpf_vport *vport; +@@@ -543,11 -551,11 +545,16 @@@ + u32 rx_itr_idx; + + u16 num_bufq; + - struct idpf_buf_queue **bufq; + + struct idpf_queue **bufq; + + u16 total_events; +++<<<<<<< HEAD + + bool wb_on_itr; + + char *name; +++======= ++ ++ cpumask_var_t affinity_mask; +++>>>>>>> bf9bf7042a38 (idpf: avoid bloating &idpf_q_vector with big %NR_CPUS) + }; + + struct idpf_rx_queue_stats { +diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c +index ae8a48c48070..4c8fe567f702 100644 +--- a/drivers/net/ethernet/intel/idpf/idpf_lib.c ++++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c +@@ -69,7 +69,7 @@ static void idpf_deinit_vector_stack(struct idpf_adapter *adapter) + static void idpf_mb_intr_rel_irq(struct idpf_adapter *adapter) + { + clear_bit(IDPF_MB_INTR_MODE, adapter->flags); +- free_irq(adapter->msix_entries[0].vector, adapter); ++ kfree(free_irq(adapter->msix_entries[0].vector, adapter)); + queue_delayed_work(adapter->mbx_wq, &adapter->mbx_task, 0); + } + +@@ -124,15 +124,14 @@ static void idpf_mb_irq_enable(struct idpf_adapter *adapter) + */ + static int idpf_mb_intr_req_irq(struct idpf_adapter *adapter) + { +- struct idpf_q_vector *mb_vector = &adapter->mb_vector; + int irq_num, mb_vidx = 0, err; ++ char *name; + + irq_num = adapter->msix_entries[mb_vidx].vector; +- mb_vector->name = kasprintf(GFP_KERNEL, "%s-%s-%d", +- dev_driver_string(&adapter->pdev->dev), +- "Mailbox", mb_vidx); +- err = request_irq(irq_num, adapter->irq_mb_handler, 0, +- mb_vector->name, adapter); ++ name = kasprintf(GFP_KERNEL, "%s-%s-%d", ++ dev_driver_string(&adapter->pdev->dev), ++ "Mailbox", mb_vidx); ++ err = request_irq(irq_num, adapter->irq_mb_handler, 0, name, adapter); + if (err) { + dev_err(&adapter->pdev->dev, + "IRQ request for mailbox failed, error: %d\n", err); +* Unmerged path drivers/net/ethernet/intel/idpf/idpf_txrx.c +* Unmerged path drivers/net/ethernet/intel/idpf/idpf_txrx.h From aa35c23918a3d524e2f199c9af8f01a4d4b9e4d7 Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:56:57 -0400 Subject: [PATCH 22/53] idpf: merge singleq and splitq &net_device_ops jira LE-3467 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Alexander Lobakin commit 14f662b43bf8c765114f73d184af2702b2280436 Empty-Commit: Cherry-Pick Conflicts during history rebuild. Will be included in final tarball splat. Ref for failed cherry-pick at: ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/14f662b4.failed It makes no sense to have a second &net_device_ops struct (800 bytes of rodata) with only one difference in .ndo_start_xmit, which can easily be just one `if`. This `if` is a drop in the ocean and you won't see any difference. Define unified idpf_xmit_start(). The preparation for sending is the same, just call either idpf_tx_splitq_frame() or idpf_tx_singleq_frame() depending on the active model to actually map and send the skb. Reviewed-by: Przemek Kitszel Reviewed-by: Jacob Keller Signed-off-by: Alexander Lobakin Signed-off-by: Tony Nguyen (cherry picked from commit 14f662b43bf8c765114f73d184af2702b2280436) Signed-off-by: Jonathan Maple # Conflicts: # drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c # drivers/net/ethernet/intel/idpf/idpf_txrx.h --- .../14f662b4.failed | 239 ++++++++++++++++++ 1 file changed, 239 insertions(+) create mode 100644 ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/14f662b4.failed diff --git a/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/14f662b4.failed b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/14f662b4.failed new file mode 100644 index 0000000000000..c5129b61035d9 --- /dev/null +++ b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/14f662b4.failed @@ -0,0 +1,239 @@ +idpf: merge singleq and splitq &net_device_ops + +jira LE-3467 +Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 +commit-author Alexander Lobakin +commit 14f662b43bf8c765114f73d184af2702b2280436 +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/14f662b4.failed + +It makes no sense to have a second &net_device_ops struct (800 bytes of +rodata) with only one difference in .ndo_start_xmit, which can easily +be just one `if`. This `if` is a drop in the ocean and you won't see +any difference. +Define unified idpf_xmit_start(). The preparation for sending is the +same, just call either idpf_tx_splitq_frame() or idpf_tx_singleq_frame() +depending on the active model to actually map and send the skb. + + Reviewed-by: Przemek Kitszel + Reviewed-by: Jacob Keller + Signed-off-by: Alexander Lobakin + Signed-off-by: Tony Nguyen +(cherry picked from commit 14f662b43bf8c765114f73d184af2702b2280436) + Signed-off-by: Jonathan Maple + +# Conflicts: +# drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c +# drivers/net/ethernet/intel/idpf/idpf_txrx.h +diff --cc drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c +index 5c3d34d3de8a,8630db24f63a..000000000000 +--- a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c ++++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c +@@@ -351,8 -351,8 +351,13 @@@ static void idpf_tx_singleq_build_ctx_d + * + * Returns NETDEV_TX_OK if sent, else an error code + */ +++<<<<<<< HEAD + +static netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, + + struct idpf_queue *tx_q) +++======= ++ netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, ++ struct idpf_tx_queue *tx_q) +++>>>>>>> 14f662b43bf8 (idpf: merge singleq and splitq &net_device_ops) + { + struct idpf_tx_offload_params offload = { }; + struct idpf_tx_buf *first; +@@@ -409,33 -409,6 +414,36 @@@ out_drop + } + + /** +++<<<<<<< HEAD + + * idpf_tx_singleq_start - Selects the right Tx queue to send buffer + + * @skb: send buffer + + * @netdev: network interface device structure + + * + + * Returns NETDEV_TX_OK if sent, else an error code + + */ + +netdev_tx_t idpf_tx_singleq_start(struct sk_buff *skb, + + struct net_device *netdev) + +{ + + struct idpf_vport *vport = idpf_netdev_to_vport(netdev); + + struct idpf_queue *tx_q; + + + + tx_q = vport->txqs[skb_get_queue_mapping(skb)]; + + + + /* hardware can't handle really short frames, hardware padding works + + * beyond this point + + */ + + if (skb_put_padto(skb, IDPF_TX_MIN_PKT_LEN)) { + + idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false); + + + + return NETDEV_TX_OK; + + } + + + + return idpf_tx_singleq_frame(skb, tx_q); + +} + + + +/** +++======= +++>>>>>>> 14f662b43bf8 (idpf: merge singleq and splitq &net_device_ops) + * idpf_tx_singleq_clean - Reclaim resources from queue + * @tx_q: Tx queue to clean + * @napi_budget: Used to determine if we are in netpoll +diff --cc drivers/net/ethernet/intel/idpf/idpf_txrx.h +index 1669bf01ba1d,b2bf58146484..000000000000 +--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h ++++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h +@@@ -1031,28 -1187,23 +1031,37 @@@ void idpf_deinit_rss(struct idpf_vport + int idpf_rx_bufs_init_all(struct idpf_vport *vport); + void idpf_rx_add_frag(struct idpf_rx_buf *rx_buf, struct sk_buff *skb, + unsigned int size); + -struct sk_buff *idpf_rx_construct_skb(const struct idpf_rx_queue *rxq, + +struct sk_buff *idpf_rx_construct_skb(struct idpf_queue *rxq, + struct idpf_rx_buf *rx_buf, + unsigned int size); + -void idpf_tx_buf_hw_update(struct idpf_tx_queue *tx_q, u32 val, + +bool idpf_init_rx_buf_hw_alloc(struct idpf_queue *rxq, struct idpf_rx_buf *buf); + +void idpf_rx_buf_hw_update(struct idpf_queue *rxq, u32 val); + +void idpf_tx_buf_hw_update(struct idpf_queue *tx_q, u32 val, + bool xmit_more); + unsigned int idpf_size_to_txd_count(unsigned int size); + -netdev_tx_t idpf_tx_drop_skb(struct idpf_tx_queue *tx_q, struct sk_buff *skb); + -void idpf_tx_dma_map_error(struct idpf_tx_queue *txq, struct sk_buff *skb, + +netdev_tx_t idpf_tx_drop_skb(struct idpf_queue *tx_q, struct sk_buff *skb); + +void idpf_tx_dma_map_error(struct idpf_queue *txq, struct sk_buff *skb, + struct idpf_tx_buf *first, u16 ring_idx); + -unsigned int idpf_tx_desc_count_required(struct idpf_tx_queue *txq, + +unsigned int idpf_tx_desc_count_required(struct idpf_queue *txq, + struct sk_buff *skb); +++<<<<<<< HEAD + +bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs, + + unsigned int count); + +int idpf_tx_maybe_stop_common(struct idpf_queue *tx_q, unsigned int size); + +void idpf_tx_timeout(struct net_device *netdev, unsigned int txqueue); + +netdev_tx_t idpf_tx_splitq_start(struct sk_buff *skb, + + struct net_device *netdev); + +netdev_tx_t idpf_tx_singleq_start(struct sk_buff *skb, + + struct net_device *netdev); + +bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rxq, +++======= ++ int idpf_tx_maybe_stop_common(struct idpf_tx_queue *tx_q, unsigned int size); ++ void idpf_tx_timeout(struct net_device *netdev, unsigned int txqueue); ++ netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, ++ struct idpf_tx_queue *tx_q); ++ netdev_tx_t idpf_tx_start(struct sk_buff *skb, struct net_device *netdev); ++ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rxq, +++>>>>>>> 14f662b43bf8 (idpf: merge singleq and splitq &net_device_ops) + u16 cleaned_count); + int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off); + +diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c +index ae8a48c48070..0964cfb4633c 100644 +--- a/drivers/net/ethernet/intel/idpf/idpf_lib.c ++++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c +@@ -4,8 +4,7 @@ + #include "idpf.h" + #include "idpf_virtchnl.h" + +-static const struct net_device_ops idpf_netdev_ops_splitq; +-static const struct net_device_ops idpf_netdev_ops_singleq; ++static const struct net_device_ops idpf_netdev_ops; + + /** + * idpf_init_vector_stack - Fill the MSIX vector stack with vector index +@@ -765,10 +764,7 @@ static int idpf_cfg_netdev(struct idpf_vport *vport) + } + + /* assign netdev_ops */ +- if (idpf_is_queue_model_split(vport->txq_model)) +- netdev->netdev_ops = &idpf_netdev_ops_splitq; +- else +- netdev->netdev_ops = &idpf_netdev_ops_singleq; ++ netdev->netdev_ops = &idpf_netdev_ops; + + /* setup watchdog timeout value to be 5 second */ + netdev->watchdog_timeo = 5 * HZ; +@@ -2393,24 +2389,10 @@ void idpf_free_dma_mem(struct idpf_hw *hw, struct idpf_dma_mem *mem) + mem->pa = 0; + } + +-static const struct net_device_ops idpf_netdev_ops_splitq = { +- .ndo_open = idpf_open, +- .ndo_stop = idpf_stop, +- .ndo_start_xmit = idpf_tx_splitq_start, +- .ndo_features_check = idpf_features_check, +- .ndo_set_rx_mode = idpf_set_rx_mode, +- .ndo_validate_addr = eth_validate_addr, +- .ndo_set_mac_address = idpf_set_mac, +- .ndo_change_mtu = idpf_change_mtu, +- .ndo_get_stats64 = idpf_get_stats64, +- .ndo_set_features = idpf_set_features, +- .ndo_tx_timeout = idpf_tx_timeout, +-}; +- +-static const struct net_device_ops idpf_netdev_ops_singleq = { ++static const struct net_device_ops idpf_netdev_ops = { + .ndo_open = idpf_open, + .ndo_stop = idpf_stop, +- .ndo_start_xmit = idpf_tx_singleq_start, ++ .ndo_start_xmit = idpf_tx_start, + .ndo_features_check = idpf_features_check, + .ndo_set_rx_mode = idpf_set_rx_mode, + .ndo_validate_addr = eth_validate_addr, +* Unmerged path drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c +diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c +index 4a7062c55fcb..1634a59120fd 100644 +--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c ++++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c +@@ -4,6 +4,9 @@ + #include "idpf.h" + #include "idpf_virtchnl.h" + ++static bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs, ++ unsigned int count); ++ + /** + * idpf_buf_lifo_push - push a buffer pointer onto stack + * @stack: pointer to stack struct +@@ -2527,8 +2530,8 @@ static bool __idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs) + * E.g.: a packet with 7 fragments can require 9 DMA transactions; 1 for TSO + * header, 1 for segment payload, and then 7 for the fragments. + */ +-bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs, +- unsigned int count) ++static bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs, ++ unsigned int count) + { + if (likely(count < max_bufs)) + return false; +@@ -2674,14 +2677,13 @@ static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, + } + + /** +- * idpf_tx_splitq_start - Selects the right Tx queue to send buffer ++ * idpf_tx_start - Selects the right Tx queue to send buffer + * @skb: send buffer + * @netdev: network interface device structure + * + * Returns NETDEV_TX_OK if sent, else an error code + */ +-netdev_tx_t idpf_tx_splitq_start(struct sk_buff *skb, +- struct net_device *netdev) ++netdev_tx_t idpf_tx_start(struct sk_buff *skb, struct net_device *netdev) + { + struct idpf_vport *vport = idpf_netdev_to_vport(netdev); + struct idpf_queue *tx_q; +@@ -2703,7 +2705,10 @@ netdev_tx_t idpf_tx_splitq_start(struct sk_buff *skb, + return NETDEV_TX_OK; + } + +- return idpf_tx_splitq_frame(skb, tx_q); ++ if (idpf_is_queue_model_split(vport->txq_model)) ++ return idpf_tx_splitq_frame(skb, tx_q); ++ else ++ return idpf_tx_singleq_frame(skb, tx_q); + } + + /** +* Unmerged path drivers/net/ethernet/intel/idpf/idpf_txrx.h From 012ac8aacc98d77355e01974c78c65c79024906e Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:56:57 -0400 Subject: [PATCH 23/53] idpf: compile singleq code only under default-n CONFIG_IDPF_SINGLEQ jira LE-3467 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Alexander Lobakin commit f771314d6b75181de7079c3c7d666293e4ed2b22 Empty-Commit: Cherry-Pick Conflicts during history rebuild. Will be included in final tarball splat. Ref for failed cherry-pick at: ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/f771314d.failed Currently, all HW supporting idpf supports the singleq model, but none of it advertises it by default, as splitq is supported and preferred for multiple reasons. Still, this almost dead code often times adds hotpath branches and redundant cacheline accesses. While it can't currently be removed, add CONFIG_IDPF_SINGLEQ and build the singleq code only when it's enabled manually. This corresponds to -10 Kb of object code size and a good bunch of hotpath checks. idpf_is_queue_model_split() works as a gate and compiles out to `true` when the config option is disabled. Reviewed-by: Przemek Kitszel Signed-off-by: Alexander Lobakin Signed-off-by: Tony Nguyen (cherry picked from commit f771314d6b75181de7079c3c7d666293e4ed2b22) Signed-off-by: Jonathan Maple # Conflicts: # drivers/net/ethernet/intel/Kconfig --- .../f771314d.failed | 185 ++++++++++++++++++ 1 file changed, 185 insertions(+) create mode 100644 ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/f771314d.failed diff --git a/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/f771314d.failed b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/f771314d.failed new file mode 100644 index 0000000000000..d70a4fc8bdc04 --- /dev/null +++ b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/f771314d.failed @@ -0,0 +1,185 @@ +idpf: compile singleq code only under default-n CONFIG_IDPF_SINGLEQ + +jira LE-3467 +Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 +commit-author Alexander Lobakin +commit f771314d6b75181de7079c3c7d666293e4ed2b22 +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/f771314d.failed + +Currently, all HW supporting idpf supports the singleq model, but none +of it advertises it by default, as splitq is supported and preferred +for multiple reasons. Still, this almost dead code often times adds +hotpath branches and redundant cacheline accesses. +While it can't currently be removed, add CONFIG_IDPF_SINGLEQ and build +the singleq code only when it's enabled manually. This corresponds to +-10 Kb of object code size and a good bunch of hotpath checks. +idpf_is_queue_model_split() works as a gate and compiles out to `true` +when the config option is disabled. + + Reviewed-by: Przemek Kitszel + Signed-off-by: Alexander Lobakin + Signed-off-by: Tony Nguyen +(cherry picked from commit f771314d6b75181de7079c3c7d666293e4ed2b22) + Signed-off-by: Jonathan Maple + +# Conflicts: +# drivers/net/ethernet/intel/Kconfig +diff --cc drivers/net/ethernet/intel/Kconfig +index ea88ce9ff84f,0375c7448a57..000000000000 +--- a/drivers/net/ethernet/intel/Kconfig ++++ b/drivers/net/ethernet/intel/Kconfig +@@@ -354,17 -375,15 +354,30 @@@ config IG + + To compile this driver as a module, choose M here. The module + will be called igc. +++<<<<<<< HEAD + +config IDPF + + tristate "Intel(R) Infrastructure Data Path Function Support" + + depends on PCI_MSI + + select DIMLIB + + select PAGE_POOL + + select PAGE_POOL_STATS + + help + + This driver supports Intel(R) Infrastructure Data Path Function + + devices. + + + + To compile this driver as a module, choose M here. The module + + will be called idpf. +++======= ++ ++ config IGC_LEDS ++ def_bool LEDS_TRIGGER_NETDEV ++ depends on IGC && LEDS_CLASS ++ depends on LEDS_CLASS=y || IGC=m ++ help ++ Optional support for controlling the NIC LED's with the netdev ++ LED trigger. ++ ++ source "drivers/net/ethernet/intel/idpf/Kconfig" +++>>>>>>> f771314d6b75 (idpf: compile singleq code only under default-n CONFIG_IDPF_SINGLEQ) + + endif # NET_VENDOR_INTEL +* Unmerged path drivers/net/ethernet/intel/Kconfig +diff --git a/drivers/net/ethernet/intel/idpf/Kconfig b/drivers/net/ethernet/intel/idpf/Kconfig +new file mode 100644 +index 000000000000..9082c16edb7e +--- /dev/null ++++ b/drivers/net/ethernet/intel/idpf/Kconfig +@@ -0,0 +1,27 @@ ++# SPDX-License-Identifier: GPL-2.0-only ++# Copyright (C) 2024 Intel Corporation ++ ++config IDPF ++ tristate "Intel(R) Infrastructure Data Path Function Support" ++ depends on PCI_MSI ++ select DIMLIB ++ select PAGE_POOL ++ select PAGE_POOL_STATS ++ help ++ This driver supports Intel(R) Infrastructure Data Path Function ++ devices. ++ ++ To compile this driver as a module, choose M here. The module ++ will be called idpf. ++ ++if IDPF ++ ++config IDPF_SINGLEQ ++ bool "idpf singleq support" ++ help ++ This option enables support for legacy single Rx/Tx queues w/no ++ completion and fill queues. Only enable if you have hardware which ++ wants to work in this mode as it increases the driver size and adds ++ runtme checks on hotpath. ++ ++endif # IDPF +diff --git a/drivers/net/ethernet/intel/idpf/Makefile b/drivers/net/ethernet/intel/idpf/Makefile +index 6844ead2f3ac..2ce01a0b5898 100644 +--- a/drivers/net/ethernet/intel/idpf/Makefile ++++ b/drivers/net/ethernet/intel/idpf/Makefile +@@ -12,7 +12,8 @@ idpf-y := \ + idpf_ethtool.o \ + idpf_lib.o \ + idpf_main.o \ +- idpf_singleq_txrx.o \ + idpf_txrx.o \ + idpf_virtchnl.o \ + idpf_vf_dev.o ++ ++idpf-$(CONFIG_IDPF_SINGLEQ) += idpf_singleq_txrx.o +diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h +index 8e39fae179a6..67b7a10fb685 100644 +--- a/drivers/net/ethernet/intel/idpf/idpf.h ++++ b/drivers/net/ethernet/intel/idpf/idpf.h +@@ -600,7 +600,8 @@ struct idpf_adapter { + */ + static inline int idpf_is_queue_model_split(u16 q_model) + { +- return q_model == VIRTCHNL2_QUEUE_MODEL_SPLIT; ++ return !IS_ENABLED(CONFIG_IDPF_SINGLEQ) || ++ q_model == VIRTCHNL2_QUEUE_MODEL_SPLIT; + } + + #define idpf_is_cap_ena(adapter, field, flag) \ +diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c +index 4a7062c55fcb..28be16ae7190 100644 +--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c ++++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c +@@ -1141,7 +1141,7 @@ static void idpf_vport_calc_numq_per_grp(struct idpf_vport *vport, + */ + static void idpf_rxq_set_descids(struct idpf_vport *vport, struct idpf_queue *q) + { +- if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) { ++ if (idpf_is_queue_model_split(vport->rxq_model)) { + q->rxdids = VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M; + } else { + if (vport->base_rxd) +diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c +index 3b849b668ede..6dc810dac975 100644 +--- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c ++++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c +@@ -1264,12 +1264,12 @@ int idpf_send_create_vport_msg(struct idpf_adapter *adapter, + vport_msg->vport_type = cpu_to_le16(VIRTCHNL2_VPORT_TYPE_DEFAULT); + vport_msg->vport_index = cpu_to_le16(idx); + +- if (adapter->req_tx_splitq) ++ if (adapter->req_tx_splitq || !IS_ENABLED(CONFIG_IDPF_SINGLEQ)) + vport_msg->txq_model = cpu_to_le16(VIRTCHNL2_QUEUE_MODEL_SPLIT); + else + vport_msg->txq_model = cpu_to_le16(VIRTCHNL2_QUEUE_MODEL_SINGLE); + +- if (adapter->req_rx_splitq) ++ if (adapter->req_rx_splitq || !IS_ENABLED(CONFIG_IDPF_SINGLEQ)) + vport_msg->rxq_model = cpu_to_le16(VIRTCHNL2_QUEUE_MODEL_SPLIT); + else + vport_msg->rxq_model = cpu_to_le16(VIRTCHNL2_QUEUE_MODEL_SINGLE); +@@ -1331,10 +1331,17 @@ int idpf_check_supported_desc_ids(struct idpf_vport *vport) + + vport_msg = adapter->vport_params_recvd[vport->idx]; + ++ if (!IS_ENABLED(CONFIG_IDPF_SINGLEQ) && ++ (vport_msg->rxq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE || ++ vport_msg->txq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE)) { ++ pci_err(adapter->pdev, "singleq mode requested, but not compiled-in\n"); ++ return -EOPNOTSUPP; ++ } ++ + rx_desc_ids = le64_to_cpu(vport_msg->rx_desc_ids); + tx_desc_ids = le64_to_cpu(vport_msg->tx_desc_ids); + +- if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) { ++ if (idpf_is_queue_model_split(vport->rxq_model)) { + if (!(rx_desc_ids & VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M)) { + dev_info(&adapter->pdev->dev, "Minimum RX descriptor support not provided, using the default\n"); + vport_msg->rx_desc_ids = cpu_to_le64(VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M); +@@ -1344,7 +1351,7 @@ int idpf_check_supported_desc_ids(struct idpf_vport *vport) + vport->base_rxd = true; + } + +- if (vport->txq_model != VIRTCHNL2_QUEUE_MODEL_SPLIT) ++ if (!idpf_is_queue_model_split(vport->txq_model)) + return 0; + + if ((tx_desc_ids & MIN_SUPPORT_TXDID) != MIN_SUPPORT_TXDID) { From 05ab4cde7847ef626b25fd8e623f9c7a8ed94920 Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:56:57 -0400 Subject: [PATCH 24/53] idpf: fix memory leaks and crashes while performing a soft reset jira LE-3467 cve CVE-2024-44964 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Alexander Lobakin commit f01032a2ca099ec8d619aaa916c3762aa62495df The second tagged commit introduced a UAF, as it removed restoring q_vector->vport pointers after reinitializating the structures. This is due to that all queue allocation functions are performed here with the new temporary vport structure and those functions rewrite the backpointers to the vport. Then, this new struct is freed and the pointers start leading to nowhere. But generally speaking, the current logic is very fragile. It claims to be more reliable when the system is low on memory, but in fact, it consumes two times more memory as at the moment of running this function, there are two vports allocated with their queues and vectors. Moreover, it claims to prevent the driver from running into "bad state", but in fact, any error during the rebuild leaves the old vport in the partially allocated state. Finally, if the interface is down when the function is called, it always allocates a new queue set, but when the user decides to enable the interface later on, vport_open() allocates them once again, IOW there's a clear memory leak here. Just don't allocate a new queue set when performing a reset, that solves crashes and memory leaks. Readd the old queue number and reopen the interface on rollback - that solves limbo states when the device is left disabled and/or without HW queues enabled. Fixes: 02cbfba1add5 ("idpf: add ethtool callbacks") Fixes: e4891e4687c8 ("idpf: split &idpf_queue into 4 strictly-typed queue structures") Signed-off-by: Alexander Lobakin Reviewed-by: Simon Horman Tested-by: Krishneil Singh Signed-off-by: Tony Nguyen Link: https://patch.msgid.link/20240806220923.3359860-2-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski (cherry picked from commit f01032a2ca099ec8d619aaa916c3762aa62495df) Signed-off-by: Jonathan Maple --- drivers/net/ethernet/intel/idpf/idpf_lib.c | 30 +++++++++++----------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index ae8a48c480708..7a9c321d228a9 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -1337,9 +1337,8 @@ static void idpf_rx_init_buf_tail(struct idpf_vport *vport) /** * idpf_vport_open - Bring up a vport * @vport: vport to bring up - * @alloc_res: allocate queue resources */ -static int idpf_vport_open(struct idpf_vport *vport, bool alloc_res) +static int idpf_vport_open(struct idpf_vport *vport) { struct idpf_netdev_priv *np = netdev_priv(vport->netdev); struct idpf_adapter *adapter = vport->adapter; @@ -1352,11 +1351,9 @@ static int idpf_vport_open(struct idpf_vport *vport, bool alloc_res) /* we do not allow interface up just yet */ netif_carrier_off(vport->netdev); - if (alloc_res) { - err = idpf_vport_queues_alloc(vport); - if (err) - return err; - } + err = idpf_vport_queues_alloc(vport); + if (err) + return err; err = idpf_vport_intr_alloc(vport); if (err) { @@ -1541,7 +1538,7 @@ void idpf_init_task(struct work_struct *work) np = netdev_priv(vport->netdev); np->state = __IDPF_VPORT_DOWN; if (test_and_clear_bit(IDPF_VPORT_UP_REQUESTED, vport_config->flags)) - idpf_vport_open(vport, true); + idpf_vport_open(vport); /* Spawn and return 'idpf_init_task' work queue until all the * default vports are created @@ -1900,9 +1897,6 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport, goto free_vport; } - err = idpf_vport_queues_alloc(new_vport); - if (err) - goto free_vport; if (current_state <= __IDPF_VPORT_DOWN) { idpf_send_delete_queues_msg(vport); } else { @@ -1974,17 +1968,23 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport, err = idpf_set_real_num_queues(vport); if (err) - goto err_reset; + goto err_open; if (current_state == __IDPF_VPORT_UP) - err = idpf_vport_open(vport, false); + err = idpf_vport_open(vport); kfree(new_vport); return err; err_reset: - idpf_vport_queues_rel(new_vport); + idpf_send_add_queues_msg(vport, vport->num_txq, vport->num_complq, + vport->num_rxq, vport->num_bufq); + +err_open: + if (current_state == __IDPF_VPORT_UP) + idpf_vport_open(vport); + free_vport: kfree(new_vport); @@ -2213,7 +2213,7 @@ static int idpf_open(struct net_device *netdev) idpf_vport_ctrl_lock(netdev); vport = idpf_netdev_to_vport(netdev); - err = idpf_vport_open(vport, true); + err = idpf_vport_open(vport); idpf_vport_ctrl_unlock(netdev); From c64728dd07d47d3fdd7d80ea711b56571876248e Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:56:58 -0400 Subject: [PATCH 25/53] idpf: fix memleak in vport interrupt configuration jira LE-3467 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Michal Kubiak commit 3cc88e8405b8d55e0ff035e31971aadd6baee2b6 Empty-Commit: Cherry-Pick Conflicts during history rebuild. Will be included in final tarball splat. Ref for failed cherry-pick at: ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/3cc88e84.failed The initialization of vport interrupt consists of two functions: 1) idpf_vport_intr_init() where a generic configuration is done 2) idpf_vport_intr_req_irq() where the irq for each q_vector is requested. The first function used to create a base name for each interrupt using "kasprintf()" call. Unfortunately, although that call allocated memory for a text buffer, that memory was never released. Fix this by removing creating the interrupt base name in 1). Instead, always create a full interrupt name in the function 2), because there is no need to create a base name separately, considering that the function 2) is never called out of idpf_vport_intr_init() context. Fixes: d4d558718266 ("idpf: initialize interrupts and enable vport") Cc: stable@vger.kernel.org # 6.7 Signed-off-by: Michal Kubiak Reviewed-by: Pavan Kumar Linga Signed-off-by: Alexander Lobakin Reviewed-by: Simon Horman Tested-by: Krishneil Singh Signed-off-by: Tony Nguyen Link: https://patch.msgid.link/20240806220923.3359860-3-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski (cherry picked from commit 3cc88e8405b8d55e0ff035e31971aadd6baee2b6) Signed-off-by: Jonathan Maple # Conflicts: # drivers/net/ethernet/intel/idpf/idpf_txrx.c --- .../3cc88e84.failed | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/3cc88e84.failed diff --git a/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/3cc88e84.failed b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/3cc88e84.failed new file mode 100644 index 0000000000000..adbe5db1619e7 --- /dev/null +++ b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/3cc88e84.failed @@ -0,0 +1,62 @@ +idpf: fix memleak in vport interrupt configuration + +jira LE-3467 +Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 +commit-author Michal Kubiak +commit 3cc88e8405b8d55e0ff035e31971aadd6baee2b6 +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/3cc88e84.failed + +The initialization of vport interrupt consists of two functions: + 1) idpf_vport_intr_init() where a generic configuration is done + 2) idpf_vport_intr_req_irq() where the irq for each q_vector is + requested. + +The first function used to create a base name for each interrupt using +"kasprintf()" call. Unfortunately, although that call allocated memory +for a text buffer, that memory was never released. + +Fix this by removing creating the interrupt base name in 1). +Instead, always create a full interrupt name in the function 2), because +there is no need to create a base name separately, considering that the +function 2) is never called out of idpf_vport_intr_init() context. + +Fixes: d4d558718266 ("idpf: initialize interrupts and enable vport") + Cc: stable@vger.kernel.org # 6.7 + Signed-off-by: Michal Kubiak + Reviewed-by: Pavan Kumar Linga + Signed-off-by: Alexander Lobakin + Reviewed-by: Simon Horman + Tested-by: Krishneil Singh + Signed-off-by: Tony Nguyen +Link: https://patch.msgid.link/20240806220923.3359860-3-anthony.l.nguyen@intel.com + Signed-off-by: Jakub Kicinski +(cherry picked from commit 3cc88e8405b8d55e0ff035e31971aadd6baee2b6) + Signed-off-by: Jonathan Maple + +# Conflicts: +# drivers/net/ethernet/intel/idpf/idpf_txrx.c +diff --cc drivers/net/ethernet/intel/idpf/idpf_txrx.c +index 4a7062c55fcb,a2f9f252694a..000000000000 +--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c ++++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c +@@@ -3671,11 -3806,11 +3673,16 @@@ static int idpf_vport_intr_req_irq(stru + else + continue; + +++<<<<<<< HEAD + + q_vector->name = kasprintf(GFP_KERNEL, "%s-%s-%d", + + basename, vec_name, vidx); +++======= ++ name = kasprintf(GFP_KERNEL, "%s-%s-%s-%d", drv_name, if_name, ++ vec_name, vidx); +++>>>>>>> 3cc88e8405b8 (idpf: fix memleak in vport interrupt configuration) + + err = request_irq(irq_num, idpf_vport_intr_clean_queues, 0, + - name, q_vector); + + q_vector->name, q_vector); + if (err) { + netdev_err(vport->netdev, + "Request_irq failed, error: %d\n", err); +* Unmerged path drivers/net/ethernet/intel/idpf/idpf_txrx.c From 7188d787b2077d9a23c56501e5248a9eb0bbf1de Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:56:58 -0400 Subject: [PATCH 26/53] idpf: fix UAFs when destroying the queues jira LE-3467 cve CVE-2024-44932 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Alexander Lobakin commit 290f1c033281c1a502a3cd1c53c3a549259c491f The second tagged commit started sometimes (very rarely, but possible) throwing WARNs from net/core/page_pool.c:page_pool_disable_direct_recycling(). Turned out idpf frees interrupt vectors with embedded NAPIs *before* freeing the queues making page_pools' NAPI pointers lead to freed memory before these pools are destroyed by libeth. It's not clear whether there are other accesses to the freed vectors when destroying the queues, but anyway, we usually free queue/interrupt vectors only when the queues are destroyed and the NAPIs are guaranteed to not be referenced anywhere. Invert the allocation and freeing logic making queue/interrupt vectors be allocated first and freed last. Vectors don't require queues to be present, so this is safe. Additionally, this change allows to remove that useless queue->q_vector pointer cleanup, as vectors are still valid when freeing the queues (+ both are freed within one function, so it's not clear why nullify the pointers at all). Fixes: 1c325aac10a8 ("idpf: configure resources for TX queues") Fixes: 90912f9f4f2d ("idpf: convert header split mode to libeth + napi_build_skb()") Reported-by: Michal Kubiak Signed-off-by: Alexander Lobakin Reviewed-by: Simon Horman Tested-by: Krishneil Singh Signed-off-by: Tony Nguyen Link: https://patch.msgid.link/20240806220923.3359860-4-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski (cherry picked from commit 290f1c033281c1a502a3cd1c53c3a549259c491f) Signed-off-by: Jonathan Maple --- drivers/net/ethernet/intel/idpf/idpf_lib.c | 24 ++++++++++----------- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 24 +-------------------- 2 files changed, 13 insertions(+), 35 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 7a9c321d228a9..094e6428fc954 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -905,8 +905,8 @@ static void idpf_vport_stop(struct idpf_vport *vport) vport->link_up = false; idpf_vport_intr_deinit(vport); - idpf_vport_intr_rel(vport); idpf_vport_queues_rel(vport); + idpf_vport_intr_rel(vport); np->state = __IDPF_VPORT_DOWN; } @@ -1351,43 +1351,43 @@ static int idpf_vport_open(struct idpf_vport *vport) /* we do not allow interface up just yet */ netif_carrier_off(vport->netdev); - err = idpf_vport_queues_alloc(vport); - if (err) - return err; - err = idpf_vport_intr_alloc(vport); if (err) { dev_err(&adapter->pdev->dev, "Failed to allocate interrupts for vport %u: %d\n", vport->vport_id, err); - goto queues_rel; + return err; } + err = idpf_vport_queues_alloc(vport); + if (err) + goto intr_rel; + err = idpf_vport_queue_ids_init(vport); if (err) { dev_err(&adapter->pdev->dev, "Failed to initialize queue ids for vport %u: %d\n", vport->vport_id, err); - goto intr_rel; + goto queues_rel; } err = idpf_vport_intr_init(vport); if (err) { dev_err(&adapter->pdev->dev, "Failed to initialize interrupts for vport %u: %d\n", vport->vport_id, err); - goto intr_rel; + goto queues_rel; } err = idpf_rx_bufs_init_all(vport); if (err) { dev_err(&adapter->pdev->dev, "Failed to initialize RX buffers for vport %u: %d\n", vport->vport_id, err); - goto intr_rel; + goto queues_rel; } err = idpf_queue_reg_init(vport); if (err) { dev_err(&adapter->pdev->dev, "Failed to initialize queue registers for vport %u: %d\n", vport->vport_id, err); - goto intr_rel; + goto queues_rel; } idpf_rx_init_buf_tail(vport); @@ -1454,10 +1454,10 @@ static int idpf_vport_open(struct idpf_vport *vport) idpf_send_map_unmap_queue_vector_msg(vport, false); intr_deinit: idpf_vport_intr_deinit(vport); -intr_rel: - idpf_vport_intr_rel(vport); queues_rel: idpf_vport_queues_rel(vport); +intr_rel: + idpf_vport_intr_rel(vport); return err; } diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 4a7062c55fcb6..c176fd614edad 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -3438,9 +3438,7 @@ static void idpf_vport_intr_napi_dis_all(struct idpf_vport *vport) */ void idpf_vport_intr_rel(struct idpf_vport *vport) { - int i, j, v_idx; - - for (v_idx = 0; v_idx < vport->num_q_vectors; v_idx++) { + for (u32 v_idx = 0; v_idx < vport->num_q_vectors; v_idx++) { struct idpf_q_vector *q_vector = &vport->q_vectors[v_idx]; kfree(q_vector->bufq); @@ -3451,26 +3449,6 @@ void idpf_vport_intr_rel(struct idpf_vport *vport) q_vector->rx = NULL; } - /* Clean up the mapping of queues to vectors */ - for (i = 0; i < vport->num_rxq_grp; i++) { - struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; - - if (idpf_is_queue_model_split(vport->rxq_model)) - for (j = 0; j < rx_qgrp->splitq.num_rxq_sets; j++) - rx_qgrp->splitq.rxq_sets[j]->rxq.q_vector = NULL; - else - for (j = 0; j < rx_qgrp->singleq.num_rxq; j++) - rx_qgrp->singleq.rxqs[j]->q_vector = NULL; - } - - if (idpf_is_queue_model_split(vport->txq_model)) - for (i = 0; i < vport->num_txq_grp; i++) - vport->txq_grps[i].complq->q_vector = NULL; - else - for (i = 0; i < vport->num_txq_grp; i++) - for (j = 0; j < vport->txq_grps[i].num_txq; j++) - vport->txq_grps[i].txqs[j]->q_vector = NULL; - kfree(vport->q_vectors); vport->q_vectors = NULL; } From 4e139cf40749299d0a82c88fd4f7172b8c8fc987 Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:56:59 -0400 Subject: [PATCH 27/53] idpf: fix netdev Tx queue stop/wake jira LE-3467 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Michal Kubiak commit e4b398dd82f5d5867bc5f442c43abc8fba30ed2c Empty-Commit: Cherry-Pick Conflicts during history rebuild. Will be included in final tarball splat. Ref for failed cherry-pick at: ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/e4b398dd.failed netif_txq_maybe_stop() returns -1, 0, or 1, while idpf_tx_maybe_stop_common() says it returns 0 or -EBUSY. As a result, there sometimes are Tx queue timeout warnings despite that the queue is empty or there is at least enough space to restart it. Make idpf_tx_maybe_stop_common() inline and returning true or false, handling the return of netif_txq_maybe_stop() properly. Use a correct goto in idpf_tx_maybe_stop_splitq() to avoid stopping the queue or incrementing the stops counter twice. Fixes: 6818c4d5b3c2 ("idpf: add splitq start_xmit") Fixes: a5ab9ee0df0b ("idpf: add singleq start_xmit and napi poll") Cc: stable@vger.kernel.org # 6.7+ Signed-off-by: Michal Kubiak Reviewed-by: Przemek Kitszel Signed-off-by: Alexander Lobakin Signed-off-by: Tony Nguyen (cherry picked from commit e4b398dd82f5d5867bc5f442c43abc8fba30ed2c) Signed-off-by: Jonathan Maple # Conflicts: # drivers/net/ethernet/intel/idpf/idpf_txrx.c # drivers/net/ethernet/intel/idpf/idpf_txrx.h --- .../e4b398dd.failed | 166 ++++++++++++++++++ 1 file changed, 166 insertions(+) create mode 100644 ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/e4b398dd.failed diff --git a/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/e4b398dd.failed b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/e4b398dd.failed new file mode 100644 index 0000000000000..7d91e8b75b828 --- /dev/null +++ b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/e4b398dd.failed @@ -0,0 +1,166 @@ +idpf: fix netdev Tx queue stop/wake + +jira LE-3467 +Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 +commit-author Michal Kubiak +commit e4b398dd82f5d5867bc5f442c43abc8fba30ed2c +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/e4b398dd.failed + +netif_txq_maybe_stop() returns -1, 0, or 1, while +idpf_tx_maybe_stop_common() says it returns 0 or -EBUSY. As a result, +there sometimes are Tx queue timeout warnings despite that the queue +is empty or there is at least enough space to restart it. +Make idpf_tx_maybe_stop_common() inline and returning true or false, +handling the return of netif_txq_maybe_stop() properly. Use a correct +goto in idpf_tx_maybe_stop_splitq() to avoid stopping the queue or +incrementing the stops counter twice. + +Fixes: 6818c4d5b3c2 ("idpf: add splitq start_xmit") +Fixes: a5ab9ee0df0b ("idpf: add singleq start_xmit and napi poll") + Cc: stable@vger.kernel.org # 6.7+ + Signed-off-by: Michal Kubiak + Reviewed-by: Przemek Kitszel + Signed-off-by: Alexander Lobakin + Signed-off-by: Tony Nguyen +(cherry picked from commit e4b398dd82f5d5867bc5f442c43abc8fba30ed2c) + Signed-off-by: Jonathan Maple + +# Conflicts: +# drivers/net/ethernet/intel/idpf/idpf_txrx.c +# drivers/net/ethernet/intel/idpf/idpf_txrx.h +diff --cc drivers/net/ethernet/intel/idpf/idpf_txrx.c +index c176fd614eda,5d74f324bcd4..000000000000 +--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c ++++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c +@@@ -1972,29 -2133,6 +1972,32 @@@ void idpf_tx_splitq_build_flow_desc(uni + } + + /** +++<<<<<<< HEAD + + * idpf_tx_maybe_stop_common - 1st level check for common Tx stop conditions + + * @tx_q: the queue to be checked + + * @size: number of descriptors we want to assure is available + + * + + * Returns 0 if stop is not needed + + */ + +int idpf_tx_maybe_stop_common(struct idpf_queue *tx_q, unsigned int size) + +{ + + struct netdev_queue *nq; + + + + if (likely(IDPF_DESC_UNUSED(tx_q) >= size)) + + return 0; + + + + u64_stats_update_begin(&tx_q->stats_sync); + + u64_stats_inc(&tx_q->q_stats.tx.q_busy); + + u64_stats_update_end(&tx_q->stats_sync); + + + + nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx); + + + + return netif_txq_maybe_stop(nq, IDPF_DESC_UNUSED(tx_q), size, size); + +} + + + +/** +++======= +++>>>>>>> e4b398dd82f5 (idpf: fix netdev Tx queue stop/wake) + * idpf_tx_maybe_stop_splitq - 1st level check for Tx splitq stop conditions + * @tx_q: the queue to be checked + * @descs_needed: number of descriptors required for this packet +@@@ -2024,10 -2162,12 +2027,16 @@@ static int idpf_tx_maybe_stop_splitq(st + return 0; + + splitq_stop: ++ netif_stop_subqueue(tx_q->netdev, tx_q->idx); ++ ++ out: + u64_stats_update_begin(&tx_q->stats_sync); + - u64_stats_inc(&tx_q->q_stats.q_busy); + + u64_stats_inc(&tx_q->q_stats.tx.q_busy); + u64_stats_update_end(&tx_q->stats_sync); +++<<<<<<< HEAD + + netif_stop_subqueue(tx_q->vport->netdev, tx_q->idx); +++======= +++>>>>>>> e4b398dd82f5 (idpf: fix netdev Tx queue stop/wake) + + return -EBUSY; + } +@@@ -2047,10 -2187,14 +2056,14 @@@ void idpf_tx_buf_hw_update(struct idpf_ + { + struct netdev_queue *nq; + + - nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx); + + nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx); + tx_q->next_to_use = val; + +- idpf_tx_maybe_stop_common(tx_q, IDPF_TX_DESC_NEEDED); ++ if (idpf_tx_maybe_stop_common(tx_q, IDPF_TX_DESC_NEEDED)) { ++ u64_stats_update_begin(&tx_q->stats_sync); ++ u64_stats_inc(&tx_q->q_stats.q_busy); ++ u64_stats_update_end(&tx_q->stats_sync); ++ } + + /* Force memory writes to complete before letting h/w + * know there are new descriptors to fetch. (Only +diff --cc drivers/net/ethernet/intel/idpf/idpf_txrx.h +index 1669bf01ba1d,33305de06975..000000000000 +--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h ++++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h +@@@ -1031,28 -1009,20 +1031,31 @@@ void idpf_deinit_rss(struct idpf_vport + int idpf_rx_bufs_init_all(struct idpf_vport *vport); + void idpf_rx_add_frag(struct idpf_rx_buf *rx_buf, struct sk_buff *skb, + unsigned int size); + -struct sk_buff *idpf_rx_build_skb(const struct libeth_fqe *buf, u32 size); + -void idpf_tx_buf_hw_update(struct idpf_tx_queue *tx_q, u32 val, + +struct sk_buff *idpf_rx_construct_skb(struct idpf_queue *rxq, + + struct idpf_rx_buf *rx_buf, + + unsigned int size); + +bool idpf_init_rx_buf_hw_alloc(struct idpf_queue *rxq, struct idpf_rx_buf *buf); + +void idpf_rx_buf_hw_update(struct idpf_queue *rxq, u32 val); + +void idpf_tx_buf_hw_update(struct idpf_queue *tx_q, u32 val, + bool xmit_more); + unsigned int idpf_size_to_txd_count(unsigned int size); + -netdev_tx_t idpf_tx_drop_skb(struct idpf_tx_queue *tx_q, struct sk_buff *skb); + -void idpf_tx_dma_map_error(struct idpf_tx_queue *txq, struct sk_buff *skb, + +netdev_tx_t idpf_tx_drop_skb(struct idpf_queue *tx_q, struct sk_buff *skb); + +void idpf_tx_dma_map_error(struct idpf_queue *txq, struct sk_buff *skb, + struct idpf_tx_buf *first, u16 ring_idx); + -unsigned int idpf_tx_desc_count_required(struct idpf_tx_queue *txq, + +unsigned int idpf_tx_desc_count_required(struct idpf_queue *txq, + struct sk_buff *skb); +++<<<<<<< HEAD + +bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs, + + unsigned int count); + +int idpf_tx_maybe_stop_common(struct idpf_queue *tx_q, unsigned int size); +++======= +++>>>>>>> e4b398dd82f5 (idpf: fix netdev Tx queue stop/wake) + void idpf_tx_timeout(struct net_device *netdev, unsigned int txqueue); + -netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, + - struct idpf_tx_queue *tx_q); + -netdev_tx_t idpf_tx_start(struct sk_buff *skb, struct net_device *netdev); + -bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rxq, + +netdev_tx_t idpf_tx_splitq_start(struct sk_buff *skb, + + struct net_device *netdev); + +netdev_tx_t idpf_tx_singleq_start(struct sk_buff *skb, + + struct net_device *netdev); + +bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rxq, + u16 cleaned_count); + int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off); + +diff --git a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c +index 5c3d34d3de8a..729830b5f531 100644 +--- a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c ++++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c +@@ -369,6 +369,10 @@ static netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, + IDPF_TX_DESCS_FOR_CTX)) { + idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false); + ++ u64_stats_update_begin(&tx_q->stats_sync); ++ u64_stats_inc(&tx_q->q_stats.q_busy); ++ u64_stats_update_end(&tx_q->stats_sync); ++ + return NETDEV_TX_BUSY; + } + +* Unmerged path drivers/net/ethernet/intel/idpf/idpf_txrx.c +* Unmerged path drivers/net/ethernet/intel/idpf/idpf_txrx.h From a053968a61b94ea0303eab34514b72d394e3740d Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:56:59 -0400 Subject: [PATCH 28/53] idpf: avoid vport access in idpf_get_link_ksettings jira LE-3467 cve CVE-2024-50274 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Pavan Kumar Linga commit 81d2fb4c7c18a3b36ba3e00b9d5b753107472d75 When the device control plane is removed or the platform running device control plane is rebooted, a reset is detected on the driver. On driver reset, it releases the resources and waits for the reset to complete. If the reset fails, it takes the error path and releases the vport lock. At this time if the monitoring tools tries to access link settings, it call traces for accessing released vport pointer. To avoid it, move link_speed_mbps to netdev_priv structure which removes the dependency on vport pointer and the vport lock in idpf_get_link_ksettings. Also use netif_carrier_ok() to check the link status and adjust the offsetof to use link_up instead of link_speed_mbps. Fixes: 02cbfba1add5 ("idpf: add ethtool callbacks") Cc: stable@vger.kernel.org # 6.7+ Reviewed-by: Tarun K Singh Signed-off-by: Pavan Kumar Linga Tested-by: Krishneil Singh Signed-off-by: Tony Nguyen (cherry picked from commit 81d2fb4c7c18a3b36ba3e00b9d5b753107472d75) Signed-off-by: Jonathan Maple --- drivers/net/ethernet/intel/idpf/idpf.h | 4 ++-- drivers/net/ethernet/intel/idpf/idpf_ethtool.c | 11 +++-------- drivers/net/ethernet/intel/idpf/idpf_lib.c | 4 ++-- drivers/net/ethernet/intel/idpf/idpf_virtchnl.c | 2 +- 4 files changed, 8 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h index 8e39fae179a6a..afcf5f6da1ff3 100644 --- a/drivers/net/ethernet/intel/idpf/idpf.h +++ b/drivers/net/ethernet/intel/idpf/idpf.h @@ -142,6 +142,7 @@ enum idpf_vport_state { * @adapter: Adapter back pointer * @vport: Vport back pointer * @vport_id: Vport identifier + * @link_speed_mbps: Link speed in mbps * @vport_idx: Relative vport index * @state: See enum idpf_vport_state * @netstats: Packet and byte stats @@ -151,6 +152,7 @@ struct idpf_netdev_priv { struct idpf_adapter *adapter; struct idpf_vport *vport; u32 vport_id; + u32 link_speed_mbps; u16 vport_idx; enum idpf_vport_state state; struct rtnl_link_stats64 netstats; @@ -289,7 +291,6 @@ struct idpf_port_stats { * @tx_itr_profile: TX profiles for Dynamic Interrupt Moderation * @port_stats: per port csum, header split, and other offload stats * @link_up: True if link is up - * @link_speed_mbps: Link speed in mbps * @sw_marker_wq: workqueue for marker packets */ struct idpf_vport { @@ -334,7 +335,6 @@ struct idpf_vport { struct idpf_port_stats port_stats; bool link_up; - u32 link_speed_mbps; wait_queue_head_t sw_marker_wq; }; diff --git a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c index bf40a9cce50ed..bbb3e263638e6 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c +++ b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c @@ -1287,24 +1287,19 @@ static void idpf_set_msglevel(struct net_device *netdev, u32 data) static int idpf_get_link_ksettings(struct net_device *netdev, struct ethtool_link_ksettings *cmd) { - struct idpf_vport *vport; - - idpf_vport_ctrl_lock(netdev); - vport = idpf_netdev_to_vport(netdev); + struct idpf_netdev_priv *np = netdev_priv(netdev); ethtool_link_ksettings_zero_link_mode(cmd, supported); cmd->base.autoneg = AUTONEG_DISABLE; cmd->base.port = PORT_NONE; - if (vport->link_up) { + if (netif_carrier_ok(netdev)) { cmd->base.duplex = DUPLEX_FULL; - cmd->base.speed = vport->link_speed_mbps; + cmd->base.speed = np->link_speed_mbps; } else { cmd->base.duplex = DUPLEX_UNKNOWN; cmd->base.speed = SPEED_UNKNOWN; } - idpf_vport_ctrl_unlock(netdev); - return 0; } diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 094e6428fc954..9084bb3e48162 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -1875,7 +1875,7 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport, * mess with. Nothing below should use those variables from new_vport * and should instead always refer to them in vport if they need to. */ - memcpy(new_vport, vport, offsetof(struct idpf_vport, link_speed_mbps)); + memcpy(new_vport, vport, offsetof(struct idpf_vport, link_up)); /* Adjust resource parameters prior to reallocating resources */ switch (reset_cause) { @@ -1921,7 +1921,7 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport, /* Same comment as above regarding avoiding copying the wait_queues and * mutexes applies here. We do not want to mess with those if possible. */ - memcpy(vport, new_vport, offsetof(struct idpf_vport, link_speed_mbps)); + memcpy(vport, new_vport, offsetof(struct idpf_vport, link_up)); /* Since idpf_vport_queues_alloc was called with new_port, the queue * back pointers are currently pointing to the local new_vport. Reset diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c index 3b849b668eded..6b14cb7991f6a 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c @@ -150,7 +150,7 @@ static void idpf_handle_event_link(struct idpf_adapter *adapter, } np = netdev_priv(vport->netdev); - vport->link_speed_mbps = le32_to_cpu(v2e->link_speed); + np->link_speed_mbps = le32_to_cpu(v2e->link_speed); if (vport->link_up == v2e->link_status) return; From d560396bf9827061cebb764fe01ddf4143f4526c Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:56:59 -0400 Subject: [PATCH 29/53] idpf: fix idpf_vc_core_init error path jira LE-3467 cve CVE-2024-53064 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Pavan Kumar Linga commit 9b58031ff96b84a38d7b73b23c7ecfb2e0557f43 In an event where the platform running the device control plane is rebooted, reset is detected on the driver. It releases all the resources and waits for the reset to complete. Once the reset is done, it tries to build the resources back. At this time if the device control plane is not yet started, then the driver timeouts on the virtchnl message and retries to establish the mailbox again. In the retry flow, mailbox is deinitialized but the mailbox workqueue is still alive and polling for the mailbox message. This results in accessing the released control queue leading to null-ptr-deref. Fix it by unrolling the work queue cancellation and mailbox deinitialization in the reverse order which they got initialized. Fixes: 4930fbf419a7 ("idpf: add core init and interrupt request") Fixes: 34c21fa894a1 ("idpf: implement virtchnl transaction manager") Cc: stable@vger.kernel.org # 6.9+ Reviewed-by: Tarun K Singh Signed-off-by: Pavan Kumar Linga Tested-by: Krishneil Singh Signed-off-by: Tony Nguyen (cherry picked from commit 9b58031ff96b84a38d7b73b23c7ecfb2e0557f43) Signed-off-by: Jonathan Maple --- drivers/net/ethernet/intel/idpf/idpf_lib.c | 1 + drivers/net/ethernet/intel/idpf/idpf_virtchnl.c | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 9084bb3e48162..0717ef504d791 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -1801,6 +1801,7 @@ static int idpf_init_hard_reset(struct idpf_adapter *adapter) */ err = idpf_vc_core_init(adapter); if (err) { + cancel_delayed_work_sync(&adapter->mbx_task); idpf_deinit_dflt_mbx(adapter); goto unlock_mutex; } diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c index 6b14cb7991f6a..475984c8c471f 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c @@ -3033,7 +3033,6 @@ int idpf_vc_core_init(struct idpf_adapter *adapter) adapter->state = __IDPF_VER_CHECK; if (adapter->vcxn_mngr) idpf_vc_xn_shutdown(adapter->vcxn_mngr); - idpf_deinit_dflt_mbx(adapter); set_bit(IDPF_HR_DRV_LOAD, adapter->flags); queue_delayed_work(adapter->vc_event_wq, &adapter->vc_event_task, msecs_to_jiffies(task_delay)); From 0f7c5759b6ce6b8e5f719f2d5eea60b5d5804e02 Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:57:00 -0400 Subject: [PATCH 30/53] idpf: call set_real_num_queues in idpf_open jira LE-3467 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Joshua Hay commit 52c11d31b5a1d1c747bb5f36cc4808e93e2348f4 On initial driver load, alloc_etherdev_mqs is called with whatever max queue values are provided by the control plane. However, if the driver is loaded on a system where num_online_cpus() returns less than the max queues, the netdev will think there are more queues than are actually available. Only num_online_cpus() will be allocated, but skb_get_queue_mapping(skb) could possibly return an index beyond the range of allocated queues. Consequently, the packet is silently dropped and it appears as if TX is broken. Set the real number of queues during open so the netdev knows how many queues will be allocated. Fixes: 1c325aac10a8 ("idpf: configure resources for TX queues") Signed-off-by: Joshua Hay Reviewed-by: Madhu Chittim Tested-by: Samuel Salin Signed-off-by: Tony Nguyen (cherry picked from commit 52c11d31b5a1d1c747bb5f36cc4808e93e2348f4) Signed-off-by: Jonathan Maple --- drivers/net/ethernet/intel/idpf/idpf_lib.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 0717ef504d791..32ae06008fe52 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -2214,8 +2214,13 @@ static int idpf_open(struct net_device *netdev) idpf_vport_ctrl_lock(netdev); vport = idpf_netdev_to_vport(netdev); + err = idpf_set_real_num_queues(vport); + if (err) + goto unlock; + err = idpf_vport_open(vport); +unlock: idpf_vport_ctrl_unlock(netdev); return err; From e7ce76e9e6cb31e5d176dd0331327ccc6f30e82a Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:57:00 -0400 Subject: [PATCH 31/53] idpf: use actual mbx receive payload length jira LE-3467 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Joshua Hay commit 640f70063e6d3a76a63f57e130fba43ba8c7e980 When a mailbox message is received, the driver is checking for a non 0 datalen in the controlq descriptor. If it is valid, the payload is attached to the ctlq message to give to the upper layer. However, the payload response size given to the upper layer was taken from the buffer metadata which is _always_ the max buffer size. This meant the API was returning 4K as the payload size for all messages. This went unnoticed since the virtchnl exchange response logic was checking for a response size less than 0 (error), not less than exact size, or not greater than or equal to the max mailbox buffer size (4K). All of these checks will pass in the success case since the size provided is always 4K. However, this breaks anyone that wants to validate the exact response size. Fetch the actual payload length from the value provided in the descriptor data_len field (instead of the buffer metadata). Unfortunately, this means we lose some extra error parsing for variable sized virtchnl responses such as create vport and get ptypes. However, the original checks weren't really helping anyways since the size was _always_ 4K. Fixes: 34c21fa894a1 ("idpf: implement virtchnl transaction manager") Cc: stable@vger.kernel.org # 6.9+ Signed-off-by: Joshua Hay Reviewed-by: Przemek Kitszel Tested-by: Krishneil Singh Signed-off-by: Tony Nguyen (cherry picked from commit 640f70063e6d3a76a63f57e130fba43ba8c7e980) Signed-off-by: Jonathan Maple --- drivers/net/ethernet/intel/idpf/idpf_virtchnl.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c index 475984c8c471f..4a60d30e98521 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c @@ -675,7 +675,7 @@ idpf_vc_xn_forward_reply(struct idpf_adapter *adapter, if (ctlq_msg->data_len) { payload = ctlq_msg->ctx.indirect.payload->va; - payload_size = ctlq_msg->ctx.indirect.payload->size; + payload_size = ctlq_msg->data_len; } xn->reply_sz = payload_size; @@ -1301,10 +1301,6 @@ int idpf_send_create_vport_msg(struct idpf_adapter *adapter, err = reply_sz; goto free_vport_params; } - if (reply_sz < IDPF_CTLQ_MAX_BUF_LEN) { - err = -EIO; - goto free_vport_params; - } return 0; @@ -2567,9 +2563,6 @@ int idpf_send_get_rx_ptype_msg(struct idpf_vport *vport) if (reply_sz < 0) return reply_sz; - if (reply_sz < IDPF_CTLQ_MAX_BUF_LEN) - return -EIO; - ptypes_recvd += le16_to_cpu(ptype_info->num_ptypes); if (ptypes_recvd > max_ptype) return -EINVAL; From cd0ad4b769b243ac283365e6cbd3a7e8b921e4c4 Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:57:00 -0400 Subject: [PATCH 32/53] idpf: deinit virtchnl transaction manager after vport and vectors jira LE-3467 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Larysa Zaremba commit 09d0fb5cb30ebcaed4a33028ae383f5a1463e2b2 When the device is removed, idpf is supposed to make certain virtchnl requests e.g. VIRTCHNL2_OP_DEALLOC_VECTORS and VIRTCHNL2_OP_DESTROY_VPORT. However, this does not happen due to the referenced commit introducing virtchnl transaction manager and placing its deinitialization before those messages are sent. Then the sending is impossible due to no transactions being available. Lack of cleanup can lead to the FW becoming unresponsive from e.g. unloading-loading the driver and creating-destroying VFs afterwards. Move transaction manager deinitialization to after other virtchnl-related cleanup is done. Fixes: 34c21fa894a1 ("idpf: implement virtchnl transaction manager") Reviewed-by: Przemek Kitszel Signed-off-by: Larysa Zaremba Tested-by: Krishneil Singh Signed-off-by: Tony Nguyen (cherry picked from commit 09d0fb5cb30ebcaed4a33028ae383f5a1463e2b2) Signed-off-by: Jonathan Maple --- drivers/net/ethernet/intel/idpf/idpf_virtchnl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c index 4a60d30e98521..b138b2c2dacb5 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c @@ -3043,9 +3043,9 @@ void idpf_vc_core_deinit(struct idpf_adapter *adapter) if (!test_bit(IDPF_VC_CORE_INIT, adapter->flags)) return; - idpf_vc_xn_shutdown(adapter->vcxn_mngr); idpf_deinit_task(adapter); idpf_intr_rel(adapter); + idpf_vc_xn_shutdown(adapter->vcxn_mngr); cancel_delayed_work_sync(&adapter->serv_task); cancel_delayed_work_sync(&adapter->mbx_task); From 5b6a1ba15535a4d581876fe59976db7c381b3521 Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:57:00 -0400 Subject: [PATCH 33/53] idpf: add read memory barrier when checking descriptor done bit jira LE-3467 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Emil Tantilov commit 396f0165672c6a74d7379027d344b83b5f05948c Add read memory barrier to ensure the order of operations when accessing control queue descriptors. Specifically, we want to avoid cases where loads can be reordered: 1. Load #1 is dispatched to read descriptor flags. 2. Load #2 is dispatched to read some other field from the descriptor. 3. Load #2 completes, accessing memory/cache at a point in time when the DD flag is zero. 4. NIC DMA overwrites the descriptor, now the DD flag is one. 5. Any fields loaded before step 4 are now inconsistent with the actual descriptor state. Add read memory barrier between steps 1 and 2, so that load #2 is not executed until load #1 has completed. Fixes: 8077c727561a ("idpf: add controlq init and reset checks") Reviewed-by: Przemek Kitszel Reviewed-by: Sridhar Samudrala Suggested-by: Lance Richardson Signed-off-by: Emil Tantilov Tested-by: Krishneil Singh Signed-off-by: Tony Nguyen (cherry picked from commit 396f0165672c6a74d7379027d344b83b5f05948c) Signed-off-by: Jonathan Maple --- drivers/net/ethernet/intel/idpf/idpf_controlq.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/intel/idpf/idpf_controlq.c b/drivers/net/ethernet/intel/idpf/idpf_controlq.c index 4849590a5591f..b28991dd18703 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_controlq.c +++ b/drivers/net/ethernet/intel/idpf/idpf_controlq.c @@ -376,6 +376,9 @@ int idpf_ctlq_clean_sq(struct idpf_ctlq_info *cq, u16 *clean_count, if (!(le16_to_cpu(desc->flags) & IDPF_CTLQ_FLAG_DD)) break; + /* Ensure no other fields are read until DD flag is checked */ + dma_rmb(); + /* strip off FW internal code */ desc_err = le16_to_cpu(desc->ret_val) & 0xff; @@ -563,6 +566,9 @@ int idpf_ctlq_recv(struct idpf_ctlq_info *cq, u16 *num_q_msg, if (!(flags & IDPF_CTLQ_FLAG_DD)) break; + /* Ensure no other fields are read until DD flag is checked */ + dma_rmb(); + q_msg[i].vmvf_type = (flags & (IDPF_CTLQ_FLAG_FTYPE_VM | IDPF_CTLQ_FLAG_FTYPE_PF)) >> From 006feb0bd8100758f570fe8841753882ec1c459c Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:57:01 -0400 Subject: [PATCH 34/53] idpf: fix transaction timeouts on reset jira LE-3467 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Emil Tantilov commit 137da75ba72593598898a4e79da34f4b2da5d151 Restore the call to idpf_vc_xn_shutdown() at the beginning of idpf_vc_core_deinit() provided the function is not called on remove. In the reset path the mailbox is destroyed, leading to all transactions timing out. Fixes: 09d0fb5cb30e ("idpf: deinit virtchnl transaction manager after vport and vectors") Reviewed-by: Larysa Zaremba Signed-off-by: Emil Tantilov Reviewed-by: Simon Horman Tested-by: Krishneil Singh Signed-off-by: Tony Nguyen (cherry picked from commit 137da75ba72593598898a4e79da34f4b2da5d151) Signed-off-by: Jonathan Maple --- drivers/net/ethernet/intel/idpf/idpf_virtchnl.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c index b138b2c2dacb5..f3555d90c34c1 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c @@ -3040,12 +3040,21 @@ int idpf_vc_core_init(struct idpf_adapter *adapter) */ void idpf_vc_core_deinit(struct idpf_adapter *adapter) { + bool remove_in_prog; + if (!test_bit(IDPF_VC_CORE_INIT, adapter->flags)) return; + /* Avoid transaction timeouts when called during reset */ + remove_in_prog = test_bit(IDPF_REMOVE_IN_PROG, adapter->flags); + if (!remove_in_prog) + idpf_vc_xn_shutdown(adapter->vcxn_mngr); + idpf_deinit_task(adapter); idpf_intr_rel(adapter); - idpf_vc_xn_shutdown(adapter->vcxn_mngr); + + if (remove_in_prog) + idpf_vc_xn_shutdown(adapter->vcxn_mngr); cancel_delayed_work_sync(&adapter->serv_task); cancel_delayed_work_sync(&adapter->mbx_task); From c557981b2f2ceef92ce824a8d5dc7d0a047d59eb Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:57:01 -0400 Subject: [PATCH 35/53] idpf: Acquire the lock before accessing the xn->salt jira LE-3467 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Manoj Vishwanathan commit d15fe4edd7decdf14d8ad2b78df100ea23302065 The transaction salt was being accessed before acquiring the idpf_vc_xn_lock when idpf has to forward the virtchnl reply. Fixes: 34c21fa894a1 ("idpf: implement virtchnl transaction manager") Signed-off-by: Manoj Vishwanathan Signed-off-by: David Decotigny Signed-off-by: Brian Vazquez Reviewed-by: Jacob Keller Reviewed-by: Pavan Kumar Linga Tested-by: Krishneil Singh Signed-off-by: Tony Nguyen (cherry picked from commit d15fe4edd7decdf14d8ad2b78df100ea23302065) Signed-off-by: Jonathan Maple --- drivers/net/ethernet/intel/idpf/idpf_virtchnl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c index f3555d90c34c1..32dfa6c0667a6 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c @@ -621,14 +621,15 @@ idpf_vc_xn_forward_reply(struct idpf_adapter *adapter, return -EINVAL; } xn = &adapter->vcxn_mngr->ring[xn_idx]; + idpf_vc_xn_lock(xn); salt = FIELD_GET(IDPF_VC_XN_SALT_M, msg_info); if (xn->salt != salt) { dev_err_ratelimited(&adapter->pdev->dev, "Transaction salt does not match (%02x != %02x)\n", xn->salt, salt); + idpf_vc_xn_unlock(xn); return -EINVAL; } - idpf_vc_xn_lock(xn); switch (xn->state) { case IDPF_VC_XN_WAITING: /* success */ From 17492690279e9c55450fbfea08698c7647afa9bc Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:57:01 -0400 Subject: [PATCH 36/53] idpf: convert workqueues to unbound jira LE-3467 cve CVE-2024-58057 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Marco Leogrande commit 9a5b021cb8186f1854bac2812bd4f396bb1e881c When a workqueue is created with `WQ_UNBOUND`, its work items are served by special worker-pools, whose host workers are not bound to any specific CPU. In the default configuration (i.e. when `queue_delayed_work` and friends do not specify which CPU to run the work item on), `WQ_UNBOUND` allows the work item to be executed on any CPU in the same node of the CPU it was enqueued on. While this solution potentially sacrifices locality, it avoids contention with other processes that might dominate the CPU time of the processor the work item was scheduled on. This is not just a theoretical problem: in a particular scenario misconfigured process was hogging most of the time from CPU0, leaving less than 0.5% of its CPU time to the kworker. The IDPF workqueues that were using the kworker on CPU0 suffered large completion delays as a result, causing performance degradation, timeouts and eventual system crash. Tested: * I have also run a manual test to gauge the performance improvement. The test consists of an antagonist process (`./stress --cpu 2`) consuming as much of CPU 0 as possible. This process is run under `taskset 01` to bind it to CPU0, and its priority is changed with `chrt -pQ 9900 10000 ${pid}` and `renice -n -20 ${pid}` after start. Then, the IDPF driver is forced to prefer CPU0 by editing all calls to `queue_delayed_work`, `mod_delayed_work`, etc... to use CPU 0. Finally, `ktraces` for the workqueue events are collected. Without the current patch, the antagonist process can force arbitrary delays between `workqueue_queue_work` and `workqueue_execute_start`, that in my tests were as high as `30ms`. With the current patch applied, the workqueue can be migrated to another unloaded CPU in the same node, and, keeping everything else equal, the maximum delay I could see was `6us`. Fixes: 0fe45467a104 ("idpf: add create vport and netdev configuration") Signed-off-by: Marco Leogrande Signed-off-by: Manoj Vishwanathan Signed-off-by: Brian Vazquez Reviewed-by: Jacob Keller Reviewed-by: Pavan Kumar Linga Tested-by: Krishneil Singh Signed-off-by: Tony Nguyen (cherry picked from commit 9a5b021cb8186f1854bac2812bd4f396bb1e881c) Signed-off-by: Jonathan Maple --- drivers/net/ethernet/intel/idpf/idpf_main.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_main.c b/drivers/net/ethernet/intel/idpf/idpf_main.c index 05115b6398489..8e889086aa570 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_main.c +++ b/drivers/net/ethernet/intel/idpf/idpf_main.c @@ -183,7 +183,8 @@ static int idpf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_master(pdev); pci_set_drvdata(pdev, adapter); - adapter->init_wq = alloc_workqueue("%s-%s-init", 0, 0, + adapter->init_wq = alloc_workqueue("%s-%s-init", + WQ_UNBOUND | WQ_MEM_RECLAIM, 0, dev_driver_string(dev), dev_name(dev)); if (!adapter->init_wq) { @@ -192,7 +193,8 @@ static int idpf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_free; } - adapter->serv_wq = alloc_workqueue("%s-%s-service", 0, 0, + adapter->serv_wq = alloc_workqueue("%s-%s-service", + WQ_UNBOUND | WQ_MEM_RECLAIM, 0, dev_driver_string(dev), dev_name(dev)); if (!adapter->serv_wq) { @@ -201,7 +203,8 @@ static int idpf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_serv_wq_alloc; } - adapter->mbx_wq = alloc_workqueue("%s-%s-mbx", 0, 0, + adapter->mbx_wq = alloc_workqueue("%s-%s-mbx", + WQ_UNBOUND | WQ_MEM_RECLAIM, 0, dev_driver_string(dev), dev_name(dev)); if (!adapter->mbx_wq) { @@ -210,7 +213,8 @@ static int idpf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_mbx_wq_alloc; } - adapter->stats_wq = alloc_workqueue("%s-%s-stats", 0, 0, + adapter->stats_wq = alloc_workqueue("%s-%s-stats", + WQ_UNBOUND | WQ_MEM_RECLAIM, 0, dev_driver_string(dev), dev_name(dev)); if (!adapter->stats_wq) { @@ -219,7 +223,8 @@ static int idpf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_stats_wq_alloc; } - adapter->vc_event_wq = alloc_workqueue("%s-%s-vc_event", 0, 0, + adapter->vc_event_wq = alloc_workqueue("%s-%s-vc_event", + WQ_UNBOUND | WQ_MEM_RECLAIM, 0, dev_driver_string(dev), dev_name(dev)); if (!adapter->vc_event_wq) { From dd47c6b38f0935a52e455631c22e4a087409331a Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:57:02 -0400 Subject: [PATCH 37/53] idpf: add more info during virtchnl transaction timeout/salt mismatch jira LE-3467 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Manoj Vishwanathan commit d0ea9ebac3e7a5b53bc259e51f54043aa98696ad Add more information related to the transaction like cookie, vc_op, salt when transaction times out and include similar information when transaction salt does not match. Info output for transaction timeout: ------------------- (op:5015 cookie:45fe vc_op:5015 salt:45 timeout:60000ms) ------------------- before it was: ------------------- (op 5015, 60000ms) ------------------- Signed-off-by: Manoj Vishwanathan Signed-off-by: Brian Vazquez Reviewed-by: Jacob Keller Reviewed-by: Pavan Kumar Linga Reviewed-by: Paul Menzel Tested-by: Krishneil Singh Signed-off-by: Tony Nguyen (cherry picked from commit d0ea9ebac3e7a5b53bc259e51f54043aa98696ad) Signed-off-by: Jonathan Maple --- drivers/net/ethernet/intel/idpf/idpf_virtchnl.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c index 32dfa6c0667a6..2e0c41883ff59 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c @@ -526,8 +526,10 @@ static ssize_t idpf_vc_xn_exec(struct idpf_adapter *adapter, retval = -ENXIO; goto only_unlock; case IDPF_VC_XN_WAITING: - dev_notice_ratelimited(&adapter->pdev->dev, "Transaction timed-out (op %d, %dms)\n", - params->vc_op, params->timeout_ms); + dev_notice_ratelimited(&adapter->pdev->dev, + "Transaction timed-out (op:%d cookie:%04x vc_op:%d salt:%02x timeout:%dms)\n", + params->vc_op, cookie, xn->vc_op, + xn->salt, params->timeout_ms); retval = -ETIME; break; case IDPF_VC_XN_COMPLETED_SUCCESS: @@ -624,8 +626,9 @@ idpf_vc_xn_forward_reply(struct idpf_adapter *adapter, idpf_vc_xn_lock(xn); salt = FIELD_GET(IDPF_VC_XN_SALT_M, msg_info); if (xn->salt != salt) { - dev_err_ratelimited(&adapter->pdev->dev, "Transaction salt does not match (%02x != %02x)\n", - xn->salt, salt); + dev_err_ratelimited(&adapter->pdev->dev, "Transaction salt does not match (exp:%d@%02x(%d) != got:%d@%02x)\n", + xn->vc_op, xn->salt, xn->state, + ctlq_msg->cookie.mbx.chnl_opcode, salt); idpf_vc_xn_unlock(xn); return -EINVAL; } From b228e7d97b15a4d45f9fb41622558cb5aaea2e6f Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:57:02 -0400 Subject: [PATCH 38/53] idpf: fix handling rsc packet with a single segment jira LE-3467 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Sridhar Samudrala commit 69ab25a74e2df53edc2de4acfce0a484bdb88155 Handle rsc packet with a single segment same as a multi segment rsc packet so that CHECKSUM_PARTIAL is set in the skb->ip_summed field. The current code is passing CHECKSUM_NONE resulting in TCP GRO layer doing checksum in SW and hiding the issue. This will fail when using dmabufs as payload buffers as skb frag would be unreadable. Fixes: 3a8845af66ed ("idpf: add RX splitq napi poll support") Signed-off-by: Sridhar Samudrala Reviewed-by: Przemek Kitszel Tested-by: Samuel Salin Signed-off-by: Tony Nguyen (cherry picked from commit 69ab25a74e2df53edc2de4acfce0a484bdb88155) Signed-off-by: Jonathan Maple --- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index c176fd614edad..29fc923bd4c58 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -2884,8 +2884,6 @@ static int idpf_rx_rsc(struct idpf_queue *rxq, struct sk_buff *skb, return -EINVAL; rsc_segments = DIV_ROUND_UP(skb->data_len, rsc_seg_len); - if (unlikely(rsc_segments == 1)) - return 0; NAPI_GRO_CB(skb)->count = rsc_segments; skb_shinfo(skb)->gso_size = rsc_seg_len; From 60dd9c3fcd243dff50cbdd3ae3f9c8d9e0d7de58 Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:57:02 -0400 Subject: [PATCH 39/53] idpf: fix checksums set in idpf_rx_rsc() jira LE-3467 cve CVE-2025-21890 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Eric Dumazet commit 674fcb4f4a7e3e277417a01788cc6daae47c3804 idpf_rx_rsc() uses skb_transport_offset(skb) while the transport header is not set yet. This triggers the following warning for CONFIG_DEBUG_NET=y builds. DEBUG_NET_WARN_ON_ONCE(!skb_transport_header_was_set(skb)) [ 69.261620] WARNING: CPU: 7 PID: 0 at ./include/linux/skbuff.h:3020 idpf_vport_splitq_napi_poll (include/linux/skbuff.h:3020) idpf [ 69.261629] Modules linked in: vfat fat dummy bridge intel_uncore_frequency_tpmi intel_uncore_frequency_common intel_vsec_tpmi idpf intel_vsec cdc_ncm cdc_eem cdc_ether usbnet mii xhci_pci xhci_hcd ehci_pci ehci_hcd libeth [ 69.261644] CPU: 7 UID: 0 PID: 0 Comm: swapper/7 Tainted: G S W 6.14.0-smp-DEV #1697 [ 69.261648] Tainted: [S]=CPU_OUT_OF_SPEC, [W]=WARN [ 69.261650] RIP: 0010:idpf_vport_splitq_napi_poll (include/linux/skbuff.h:3020) idpf [ 69.261677] ? __warn (kernel/panic.c:242 kernel/panic.c:748) [ 69.261682] ? idpf_vport_splitq_napi_poll (include/linux/skbuff.h:3020) idpf [ 69.261687] ? report_bug (lib/bug.c:?) [ 69.261690] ? handle_bug (arch/x86/kernel/traps.c:285) [ 69.261694] ? exc_invalid_op (arch/x86/kernel/traps.c:309) [ 69.261697] ? asm_exc_invalid_op (arch/x86/include/asm/idtentry.h:621) [ 69.261700] ? __pfx_idpf_vport_splitq_napi_poll (drivers/net/ethernet/intel/idpf/idpf_txrx.c:4011) idpf [ 69.261704] ? idpf_vport_splitq_napi_poll (include/linux/skbuff.h:3020) idpf [ 69.261708] ? idpf_vport_splitq_napi_poll (drivers/net/ethernet/intel/idpf/idpf_txrx.c:3072) idpf [ 69.261712] __napi_poll (net/core/dev.c:7194) [ 69.261716] net_rx_action (net/core/dev.c:7265) [ 69.261718] ? __qdisc_run (net/sched/sch_generic.c:293) [ 69.261721] ? sched_clock (arch/x86/include/asm/preempt.h:84 arch/x86/kernel/tsc.c:288) [ 69.261726] handle_softirqs (kernel/softirq.c:561) Fixes: 3a8845af66edb ("idpf: add RX splitq napi poll support") Signed-off-by: Eric Dumazet Cc: Alan Brady Cc: Joshua Hay Cc: Willem de Bruijn Acked-by: Przemek Kitszel Link: https://patch.msgid.link/20250226221253.1927782-1-edumazet@google.com Signed-off-by: Jakub Kicinski (cherry picked from commit 674fcb4f4a7e3e277417a01788cc6daae47c3804) Signed-off-by: Jonathan Maple --- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 29fc923bd4c58..7501a74f8dd92 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -2889,7 +2889,6 @@ static int idpf_rx_rsc(struct idpf_queue *rxq, struct sk_buff *skb, skb_shinfo(skb)->gso_size = rsc_seg_len; skb_reset_network_header(skb); - len = skb->len - skb_transport_offset(skb); if (ipv4) { struct iphdr *ipv4h = ip_hdr(skb); @@ -2898,6 +2897,7 @@ static int idpf_rx_rsc(struct idpf_queue *rxq, struct sk_buff *skb, /* Reset and set transport header offset in skb */ skb_set_transport_header(skb, sizeof(struct iphdr)); + len = skb->len - skb_transport_offset(skb); /* Compute the TCP pseudo header checksum*/ tcp_hdr(skb)->check = @@ -2907,6 +2907,7 @@ static int idpf_rx_rsc(struct idpf_queue *rxq, struct sk_buff *skb, skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; skb_set_transport_header(skb, sizeof(struct ipv6hdr)); + len = skb->len - skb_transport_offset(skb); tcp_hdr(skb)->check = ~tcp_v6_check(len, &ipv6h->saddr, &ipv6h->daddr, 0); } From 8bce3d7d8a2cea2593f575b1d930197749e379bf Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:57:02 -0400 Subject: [PATCH 40/53] idpf: fix adapter NULL pointer dereference on reboot jira LE-3467 cve CVE-2025-22065 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Emil Tantilov commit 4c9106f4906a85f6b13542d862e423bcdc118cc3 With SRIOV enabled, idpf ends up calling into idpf_remove() twice. First via idpf_shutdown() and then again when idpf_remove() calls into sriov_disable(), because the VF devices use the idpf driver, hence the same remove routine. When that happens, it is possible for the adapter to be NULL from the first call to idpf_remove(), leading to a NULL pointer dereference. echo 1 > /sys/class/net//device/sriov_numvfs reboot BUG: kernel NULL pointer dereference, address: 0000000000000020 ... RIP: 0010:idpf_remove+0x22/0x1f0 [idpf] ... ? idpf_remove+0x22/0x1f0 [idpf] ? idpf_remove+0x1e4/0x1f0 [idpf] pci_device_remove+0x3f/0xb0 device_release_driver_internal+0x19f/0x200 pci_stop_bus_device+0x6d/0x90 pci_stop_and_remove_bus_device+0x12/0x20 pci_iov_remove_virtfn+0xbe/0x120 sriov_disable+0x34/0xe0 idpf_sriov_configure+0x58/0x140 [idpf] idpf_remove+0x1b9/0x1f0 [idpf] idpf_shutdown+0x12/0x30 [idpf] pci_device_shutdown+0x35/0x60 device_shutdown+0x156/0x200 ... Replace the direct idpf_remove() call in idpf_shutdown() with idpf_vc_core_deinit() and idpf_deinit_dflt_mbx(), which perform the bulk of the cleanup, such as stopping the init task, freeing IRQs, destroying the vports and freeing the mailbox. This avoids the calls to sriov_disable() in addition to a small netdev cleanup, and destroying workqueues, which don't seem to be required on shutdown. Reported-by: Yuying Ma Fixes: e850efed5e15 ("idpf: add module register and probe functionality") Reviewed-by: Madhu Chittim Signed-off-by: Emil Tantilov Reviewed-by: Simon Horman Tested-by: Samuel Salin Signed-off-by: Tony Nguyen (cherry picked from commit 4c9106f4906a85f6b13542d862e423bcdc118cc3) Signed-off-by: Jonathan Maple --- drivers/net/ethernet/intel/idpf/idpf_main.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_main.c b/drivers/net/ethernet/intel/idpf/idpf_main.c index 8e889086aa570..fdbb06dc94d07 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_main.c +++ b/drivers/net/ethernet/intel/idpf/idpf_main.c @@ -91,7 +91,11 @@ static void idpf_remove(struct pci_dev *pdev) */ static void idpf_shutdown(struct pci_dev *pdev) { - idpf_remove(pdev); + struct idpf_adapter *adapter = pci_get_drvdata(pdev); + + cancel_delayed_work_sync(&adapter->vc_event_task); + idpf_vc_core_deinit(adapter); + idpf_deinit_dflt_mbx(adapter); if (system_state == SYSTEM_POWER_OFF) pci_set_power_state(pdev, PCI_D3hot); From 09bd57455ac02676b1c62c7e8b00378a0628aa22 Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:57:03 -0400 Subject: [PATCH 41/53] idpf: fix offloads support for encapsulated packets jira LE-3467 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Madhu Chittim commit 713dd6c2deca88cba0596b1e2576f7b7a8e5c59e Split offloads into csum, tso and other offloads so that tunneled packets do not by default have all the offloads enabled. Stateless offloads for encapsulated packets are not yet supported in firmware/software but in the driver we were setting the features same as non encapsulated features. Fixed naming to clarify CSUM bits are being checked for Tx. Inherit netdev features to VLAN interfaces as well. Fixes: 0fe45467a104 ("idpf: add create vport and netdev configuration") Reviewed-by: Sridhar Samudrala Signed-off-by: Madhu Chittim Tested-by: Zachary Goldstein Tested-by: Samuel Salin Signed-off-by: Tony Nguyen Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250425222636.3188441-4-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski (cherry picked from commit 713dd6c2deca88cba0596b1e2576f7b7a8e5c59e) Signed-off-by: Jonathan Maple --- drivers/net/ethernet/intel/idpf/idpf.h | 18 +++---- drivers/net/ethernet/intel/idpf/idpf_lib.c | 57 ++++++++-------------- 2 files changed, 27 insertions(+), 48 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h index afcf5f6da1ff3..99c7b6c015a2a 100644 --- a/drivers/net/ethernet/intel/idpf/idpf.h +++ b/drivers/net/ethernet/intel/idpf/idpf.h @@ -631,13 +631,13 @@ bool idpf_is_capability_ena(struct idpf_adapter *adapter, bool all, VIRTCHNL2_CAP_RX_HSPLIT_AT_L4V4 |\ VIRTCHNL2_CAP_RX_HSPLIT_AT_L4V6) -#define IDPF_CAP_RX_CSUM_L4V4 (\ - VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_TCP |\ - VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_UDP) +#define IDPF_CAP_TX_CSUM_L4V4 (\ + VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_TCP |\ + VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_UDP) -#define IDPF_CAP_RX_CSUM_L4V6 (\ - VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_TCP |\ - VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_UDP) +#define IDPF_CAP_TX_CSUM_L4V6 (\ + VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_TCP |\ + VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_UDP) #define IDPF_CAP_RX_CSUM (\ VIRTCHNL2_CAP_RX_CSUM_L3_IPV4 |\ @@ -646,11 +646,9 @@ bool idpf_is_capability_ena(struct idpf_adapter *adapter, bool all, VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_TCP |\ VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_UDP) -#define IDPF_CAP_SCTP_CSUM (\ +#define IDPF_CAP_TX_SCTP_CSUM (\ VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_SCTP |\ - VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_SCTP |\ - VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_SCTP |\ - VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_SCTP) + VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_SCTP) #define IDPF_CAP_TUNNEL_TX_CSUM (\ VIRTCHNL2_CAP_TX_CSUM_L3_SINGLE_TUNNEL |\ diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 32ae06008fe52..6509987f5077b 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -718,8 +718,10 @@ static int idpf_cfg_netdev(struct idpf_vport *vport) { struct idpf_adapter *adapter = vport->adapter; struct idpf_vport_config *vport_config; + netdev_features_t other_offloads = 0; + netdev_features_t csum_offloads = 0; + netdev_features_t tso_offloads = 0; netdev_features_t dflt_features; - netdev_features_t offloads = 0; struct idpf_netdev_priv *np; struct net_device *netdev; u16 idx = vport->idx; @@ -784,53 +786,32 @@ static int idpf_cfg_netdev(struct idpf_vport *vport) if (idpf_is_cap_ena_all(adapter, IDPF_RSS_CAPS, IDPF_CAP_RSS)) dflt_features |= NETIF_F_RXHASH; - if (idpf_is_cap_ena_all(adapter, IDPF_CSUM_CAPS, IDPF_CAP_RX_CSUM_L4V4)) - dflt_features |= NETIF_F_IP_CSUM; - if (idpf_is_cap_ena_all(adapter, IDPF_CSUM_CAPS, IDPF_CAP_RX_CSUM_L4V6)) - dflt_features |= NETIF_F_IPV6_CSUM; + if (idpf_is_cap_ena_all(adapter, IDPF_CSUM_CAPS, IDPF_CAP_TX_CSUM_L4V4)) + csum_offloads |= NETIF_F_IP_CSUM; + if (idpf_is_cap_ena_all(adapter, IDPF_CSUM_CAPS, IDPF_CAP_TX_CSUM_L4V6)) + csum_offloads |= NETIF_F_IPV6_CSUM; if (idpf_is_cap_ena(adapter, IDPF_CSUM_CAPS, IDPF_CAP_RX_CSUM)) - dflt_features |= NETIF_F_RXCSUM; - if (idpf_is_cap_ena_all(adapter, IDPF_CSUM_CAPS, IDPF_CAP_SCTP_CSUM)) - dflt_features |= NETIF_F_SCTP_CRC; + csum_offloads |= NETIF_F_RXCSUM; + if (idpf_is_cap_ena_all(adapter, IDPF_CSUM_CAPS, IDPF_CAP_TX_SCTP_CSUM)) + csum_offloads |= NETIF_F_SCTP_CRC; if (idpf_is_cap_ena(adapter, IDPF_SEG_CAPS, VIRTCHNL2_CAP_SEG_IPV4_TCP)) - dflt_features |= NETIF_F_TSO; + tso_offloads |= NETIF_F_TSO; if (idpf_is_cap_ena(adapter, IDPF_SEG_CAPS, VIRTCHNL2_CAP_SEG_IPV6_TCP)) - dflt_features |= NETIF_F_TSO6; + tso_offloads |= NETIF_F_TSO6; if (idpf_is_cap_ena_all(adapter, IDPF_SEG_CAPS, VIRTCHNL2_CAP_SEG_IPV4_UDP | VIRTCHNL2_CAP_SEG_IPV6_UDP)) - dflt_features |= NETIF_F_GSO_UDP_L4; + tso_offloads |= NETIF_F_GSO_UDP_L4; if (idpf_is_cap_ena_all(adapter, IDPF_RSC_CAPS, IDPF_CAP_RSC)) - offloads |= NETIF_F_GRO_HW; - /* advertise to stack only if offloads for encapsulated packets is - * supported - */ - if (idpf_is_cap_ena(vport->adapter, IDPF_SEG_CAPS, - VIRTCHNL2_CAP_SEG_TX_SINGLE_TUNNEL)) { - offloads |= NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_GRE | - NETIF_F_GSO_GRE_CSUM | - NETIF_F_GSO_PARTIAL | - NETIF_F_GSO_UDP_TUNNEL_CSUM | - NETIF_F_GSO_IPXIP4 | - NETIF_F_GSO_IPXIP6 | - 0; - - if (!idpf_is_cap_ena_all(vport->adapter, IDPF_CSUM_CAPS, - IDPF_CAP_TUNNEL_TX_CSUM)) - netdev->gso_partial_features |= - NETIF_F_GSO_UDP_TUNNEL_CSUM; - - netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM; - offloads |= NETIF_F_TSO_MANGLEID; - } + other_offloads |= NETIF_F_GRO_HW; if (idpf_is_cap_ena(adapter, IDPF_OTHER_CAPS, VIRTCHNL2_CAP_LOOPBACK)) - offloads |= NETIF_F_LOOPBACK; + other_offloads |= NETIF_F_LOOPBACK; - netdev->features |= dflt_features; - netdev->hw_features |= dflt_features | offloads; - netdev->hw_enc_features |= dflt_features | offloads; + netdev->features |= dflt_features | csum_offloads | tso_offloads; + netdev->hw_features |= netdev->features | other_offloads; + netdev->vlan_features |= netdev->features | other_offloads; + netdev->hw_enc_features |= dflt_features | other_offloads; idpf_set_ethtool_ops(netdev); SET_NETDEV_DEV(netdev, &adapter->pdev->dev); From 3ac747e0b95e3e07e103fda927bb42a216092137 Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:57:03 -0400 Subject: [PATCH 42/53] idpf: fix potential memory leak on kcalloc() failure jira LE-3467 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Michal Swiatkowski commit 8a558cbda51bef09773c72bf74a32047479110c7 In case of failing on rss_data->rss_key allocation the function is freeing vport without freeing earlier allocated q_vector_idxs. Fix it. Move from freeing in error branch to goto scheme. Fixes: d4d558718266 ("idpf: initialize interrupts and enable vport") Reviewed-by: Pavan Kumar Linga Reviewed-by: Aleksandr Loktionov Suggested-by: Pavan Kumar Linga Signed-off-by: Michal Swiatkowski Reviewed-by: Simon Horman Tested-by: Samuel Salin Signed-off-by: Tony Nguyen (cherry picked from commit 8a558cbda51bef09773c72bf74a32047479110c7) Signed-off-by: Jonathan Maple --- drivers/net/ethernet/intel/idpf/idpf_lib.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 6509987f5077b..a7b0ea596064e 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -1123,11 +1123,9 @@ static struct idpf_vport *idpf_vport_alloc(struct idpf_adapter *adapter, num_max_q = max(max_q->max_txq, max_q->max_rxq); vport->q_vector_idxs = kcalloc(num_max_q, sizeof(u16), GFP_KERNEL); - if (!vport->q_vector_idxs) { - kfree(vport); + if (!vport->q_vector_idxs) + goto free_vport; - return NULL; - } idpf_vport_init(vport, max_q); /* This alloc is done separate from the LUT because it's not strictly @@ -1137,11 +1135,9 @@ static struct idpf_vport *idpf_vport_alloc(struct idpf_adapter *adapter, */ rss_data = &adapter->vport_config[idx]->user_config.rss_data; rss_data->rss_key = kzalloc(rss_data->rss_key_size, GFP_KERNEL); - if (!rss_data->rss_key) { - kfree(vport); + if (!rss_data->rss_key) + goto free_vector_idxs; - return NULL; - } /* Initialize default rss key */ netdev_rss_key_fill((void *)rss_data->rss_key, rss_data->rss_key_size); @@ -1154,6 +1150,13 @@ static struct idpf_vport *idpf_vport_alloc(struct idpf_adapter *adapter, adapter->next_vport = idpf_get_free_slot(adapter); return vport; + +free_vector_idxs: + kfree(vport->q_vector_idxs); +free_vport: + kfree(vport); + + return NULL; } /** From cb281976404144af3f1c945975efb3ddb5a4726b Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:57:03 -0400 Subject: [PATCH 43/53] idpf: protect shutdown from reset jira LE-3467 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Larysa Zaremba commit ed375b182140eeb9c73609b17939c8a29b27489e Before the referenced commit, the shutdown just called idpf_remove(), this way IDPF_REMOVE_IN_PROG was protecting us from the serv_task rescheduling reset. Without this flag set the shutdown process is vulnerable to HW reset or any other triggering conditions (such as default mailbox being destroyed). When one of conditions checked in idpf_service_task becomes true, vc_event_task can be rescheduled during shutdown, this leads to accessing freed memory e.g. idpf_req_rel_vector_indexes() trying to read vport->q_vector_idxs. This in turn causes the system to become defunct during e.g. systemctl kexec. Considering using IDPF_REMOVE_IN_PROG would lead to more heavy shutdown process, instead just cancel the serv_task before cancelling adapter->serv_task before cancelling adapter->vc_event_task to ensure that reset will not be scheduled while we are doing a shutdown. Fixes: 4c9106f4906a ("idpf: fix adapter NULL pointer dereference on reboot") Reviewed-by: Michal Swiatkowski Signed-off-by: Larysa Zaremba Reviewed-by: Simon Horman Reviewed-by: Emil Tantilov Tested-by: Samuel Salin Signed-off-by: Tony Nguyen (cherry picked from commit ed375b182140eeb9c73609b17939c8a29b27489e) Signed-off-by: Jonathan Maple --- drivers/net/ethernet/intel/idpf/idpf_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/idpf/idpf_main.c b/drivers/net/ethernet/intel/idpf/idpf_main.c index fdbb06dc94d07..610bc32fa2e76 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_main.c +++ b/drivers/net/ethernet/intel/idpf/idpf_main.c @@ -93,6 +93,7 @@ static void idpf_shutdown(struct pci_dev *pdev) { struct idpf_adapter *adapter = pci_get_drvdata(pdev); + cancel_delayed_work_sync(&adapter->serv_task); cancel_delayed_work_sync(&adapter->vc_event_task); idpf_vc_core_deinit(adapter); idpf_deinit_dflt_mbx(adapter); From 740c452211076ca4be00fce63b4646e92ac725df Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:57:04 -0400 Subject: [PATCH 44/53] idpf: fix null-ptr-deref in idpf_features_check jira LE-3467 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Pavan Kumar Linga commit 2dabe349f7882ff1407a784d54d8541909329088 idpf_features_check is used to validate the TX packet. skb header length is compared with the hardware supported value received from the device control plane. The value is stored in the adapter structure and to access it, vport pointer is used. During reset all the vports are released and the vport pointer that the netdev private structure points to is NULL. To avoid null-ptr-deref, store the max header length value in netdev private structure. This also helps to cache the value and avoid accessing adapter pointer in hot path. BUG: kernel NULL pointer dereference, address: 0000000000000068 ... RIP: 0010:idpf_features_check+0x6d/0xe0 [idpf] Call Trace: ? __die+0x23/0x70 ? page_fault_oops+0x154/0x520 ? exc_page_fault+0x76/0x190 ? asm_exc_page_fault+0x26/0x30 ? idpf_features_check+0x6d/0xe0 [idpf] netif_skb_features+0x88/0x310 validate_xmit_skb+0x2a/0x2b0 validate_xmit_skb_list+0x4c/0x70 sch_direct_xmit+0x19d/0x3a0 __dev_queue_xmit+0xb74/0xe70 ... Fixes: a251eee62133 ("idpf: add SRIOV support and other ndo_ops") Reviewed-by: Madhu Chititm Signed-off-by: Pavan Kumar Linga Reviewed-by: Simon Horman Tested-by: Samuel Salin Signed-off-by: Tony Nguyen (cherry picked from commit 2dabe349f7882ff1407a784d54d8541909329088) Signed-off-by: Jonathan Maple --- drivers/net/ethernet/intel/idpf/idpf.h | 2 ++ drivers/net/ethernet/intel/idpf/idpf_lib.c | 10 ++++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h index 99c7b6c015a2a..8f793b642fac5 100644 --- a/drivers/net/ethernet/intel/idpf/idpf.h +++ b/drivers/net/ethernet/intel/idpf/idpf.h @@ -144,6 +144,7 @@ enum idpf_vport_state { * @vport_id: Vport identifier * @link_speed_mbps: Link speed in mbps * @vport_idx: Relative vport index + * @max_tx_hdr_size: Max header length hardware can support * @state: See enum idpf_vport_state * @netstats: Packet and byte stats * @stats_lock: Lock to protect stats update @@ -154,6 +155,7 @@ struct idpf_netdev_priv { u32 vport_id; u32 link_speed_mbps; u16 vport_idx; + u16 max_tx_hdr_size; enum idpf_vport_state state; struct rtnl_link_stats64 netstats; spinlock_t stats_lock; diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index a7b0ea596064e..c32685cfbcdb4 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -738,6 +738,7 @@ static int idpf_cfg_netdev(struct idpf_vport *vport) np->vport = vport; np->vport_idx = vport->idx; np->vport_id = vport->vport_id; + np->max_tx_hdr_size = idpf_get_max_tx_hdr_size(adapter); vport->netdev = netdev; return idpf_init_mac_addr(vport, netdev); @@ -755,6 +756,7 @@ static int idpf_cfg_netdev(struct idpf_vport *vport) np->adapter = adapter; np->vport_idx = vport->idx; np->vport_id = vport->vport_id; + np->max_tx_hdr_size = idpf_get_max_tx_hdr_size(adapter); spin_lock_init(&np->stats_lock); @@ -2244,8 +2246,8 @@ static netdev_features_t idpf_features_check(struct sk_buff *skb, struct net_device *netdev, netdev_features_t features) { - struct idpf_vport *vport = idpf_netdev_to_vport(netdev); - struct idpf_adapter *adapter = vport->adapter; + struct idpf_netdev_priv *np = netdev_priv(netdev); + u16 max_tx_hdr_size = np->max_tx_hdr_size; size_t len; /* No point in doing any of this if neither checksum nor GSO are @@ -2268,7 +2270,7 @@ static netdev_features_t idpf_features_check(struct sk_buff *skb, goto unsupported; len = skb_network_header_len(skb); - if (unlikely(len > idpf_get_max_tx_hdr_size(adapter))) + if (unlikely(len > max_tx_hdr_size)) goto unsupported; if (!skb->encapsulation) @@ -2281,7 +2283,7 @@ static netdev_features_t idpf_features_check(struct sk_buff *skb, /* IPLEN can support at most 127 dwords */ len = skb_inner_network_header_len(skb); - if (unlikely(len > idpf_get_max_tx_hdr_size(adapter))) + if (unlikely(len > max_tx_hdr_size)) goto unsupported; /* No need to validate L4LEN as TCP is the only protocol with a From f286fee442a0b0640289865505813f06df3f7977 Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:57:04 -0400 Subject: [PATCH 45/53] idpf: fix idpf_vport_splitq_napi_poll() jira LE-3467 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Eric Dumazet commit 407e0efdf8baf1672876d5948b75049860a93e59 Empty-Commit: Cherry-Pick Conflicts during history rebuild. Will be included in final tarball splat. Ref for failed cherry-pick at: ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/407e0efd.failed idpf_vport_splitq_napi_poll() can incorrectly return @budget after napi_complete_done() has been called. This violates NAPI rules, because after napi_complete_done(), current thread lost napi ownership. Move the test against POLL_MODE before the napi_complete_done(). Fixes: c2d548cad150 ("idpf: add TX splitq napi poll support") Reported-by: Peter Newman Closes: https://lore.kernel.org/netdev/20250520121908.1805732-1-edumazet@google.com/T/#u Signed-off-by: Eric Dumazet Cc: Joshua Hay Cc: Alan Brady Cc: Madhu Chittim Cc: Phani Burra Cc: Pavan Kumar Linga Link: https://patch.msgid.link/20250520124030.1983936-1-edumazet@google.com Signed-off-by: Jakub Kicinski (cherry picked from commit 407e0efdf8baf1672876d5948b75049860a93e59) Signed-off-by: Jonathan Maple # Conflicts: # drivers/net/ethernet/intel/idpf/idpf_txrx.c --- .../407e0efd.failed | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/407e0efd.failed diff --git a/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/407e0efd.failed b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/407e0efd.failed new file mode 100644 index 0000000000000..3f324e10dc1f3 --- /dev/null +++ b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/407e0efd.failed @@ -0,0 +1,59 @@ +idpf: fix idpf_vport_splitq_napi_poll() + +jira LE-3467 +Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 +commit-author Eric Dumazet +commit 407e0efdf8baf1672876d5948b75049860a93e59 +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/407e0efd.failed + +idpf_vport_splitq_napi_poll() can incorrectly return @budget +after napi_complete_done() has been called. + +This violates NAPI rules, because after napi_complete_done(), +current thread lost napi ownership. + +Move the test against POLL_MODE before the napi_complete_done(). + +Fixes: c2d548cad150 ("idpf: add TX splitq napi poll support") + Reported-by: Peter Newman +Closes: https://lore.kernel.org/netdev/20250520121908.1805732-1-edumazet@google.com/T/#u + Signed-off-by: Eric Dumazet + Cc: Joshua Hay + Cc: Alan Brady + Cc: Madhu Chittim + Cc: Phani Burra + Cc: Pavan Kumar Linga +Link: https://patch.msgid.link/20250520124030.1983936-1-edumazet@google.com + Signed-off-by: Jakub Kicinski +(cherry picked from commit 407e0efdf8baf1672876d5948b75049860a93e59) + Signed-off-by: Jonathan Maple + +# Conflicts: +# drivers/net/ethernet/intel/idpf/idpf_txrx.c +diff --cc drivers/net/ethernet/intel/idpf/idpf_txrx.c +index 7501a74f8dd9,2d5f5c9f91ce..000000000000 +--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c ++++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c +@@@ -3926,15 -4043,7 +3934,19 @@@ static int idpf_vport_splitq_napi_poll( + else + idpf_vport_intr_set_wb_on_itr(q_vector); + +++<<<<<<< HEAD + + /* Switch to poll mode in the tear-down path after sending disable + + * queues virtchnl message, as the interrupts will be disabled after + + * that + + */ + + if (unlikely(q_vector->num_txq && test_bit(__IDPF_Q_POLL_MODE, + + q_vector->tx[0]->flags))) + + return budget; + + else + + return work_done; +++======= ++ return work_done; +++>>>>>>> 407e0efdf8ba (idpf: fix idpf_vport_splitq_napi_poll()) + } + + /** +* Unmerged path drivers/net/ethernet/intel/idpf/idpf_txrx.c From de41e8057493d86748aa463dea01016cbc854d0a Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:57:05 -0400 Subject: [PATCH 46/53] idpf: fix a race in txq wakeup jira LE-3467 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Brian Vazquez commit 7292af042bcf22e2c18b96ed250f78498a5b28ab Empty-Commit: Cherry-Pick Conflicts during history rebuild. Will be included in final tarball splat. Ref for failed cherry-pick at: ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/7292af04.failed Add a helper function to correctly handle the lockless synchronization when the sender needs to block. The paradigm is if (no_resources()) { stop_queue(); barrier(); if (!no_resources()) restart_queue(); } netif_subqueue_maybe_stop already handles the paradigm correctly, but the code split the check for resources in three parts, the first one (descriptors) followed the protocol, but the other two (completions and tx_buf) were only doing the first part and so race prone. Luckily netif_subqueue_maybe_stop macro already allows you to use a function to evaluate the start/stop conditions so the fix only requires the right helper function to evaluate all the conditions at once. The patch removes idpf_tx_maybe_stop_common since it's no longer needed and instead adjusts separately the conditions for singleq and splitq. Note that idpf_tx_buf_hw_update doesn't need to check for resources since that will be covered in idpf_tx_splitq_frame. To reproduce: Reduce the threshold for pending completions to increase the chances of hitting this pause by changing your kernel: drivers/net/ethernet/intel/idpf/idpf_txrx.h -#define IDPF_TX_COMPLQ_OVERFLOW_THRESH(txcq) ((txcq)->desc_count >> 1) +#define IDPF_TX_COMPLQ_OVERFLOW_THRESH(txcq) ((txcq)->desc_count >> 4) Use pktgen to force the host to push small pkts very aggressively: ./pktgen_sample02_multiqueue.sh -i eth1 -s 100 -6 -d $IP -m $MAC \ -p 10000-10000 -t 16 -n 0 -v -x -c 64 Fixes: 6818c4d5b3c2 ("idpf: add splitq start_xmit") Reviewed-by: Jacob Keller Reviewed-by: Madhu Chittim Signed-off-by: Josh Hay Signed-off-by: Brian Vazquez Signed-off-by: Luigi Rizzo Reviewed-by: Simon Horman Tested-by: Samuel Salin Signed-off-by: Tony Nguyen (cherry picked from commit 7292af042bcf22e2c18b96ed250f78498a5b28ab) Signed-off-by: Jonathan Maple # Conflicts: # drivers/net/ethernet/intel/idpf/idpf_txrx.c --- .../7292af04.failed | 199 ++++++++++++++++++ 1 file changed, 199 insertions(+) create mode 100644 ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/7292af04.failed diff --git a/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/7292af04.failed b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/7292af04.failed new file mode 100644 index 0000000000000..cf6827f28f5b9 --- /dev/null +++ b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/7292af04.failed @@ -0,0 +1,199 @@ +idpf: fix a race in txq wakeup + +jira LE-3467 +Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 +commit-author Brian Vazquez +commit 7292af042bcf22e2c18b96ed250f78498a5b28ab +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/7292af04.failed + +Add a helper function to correctly handle the lockless +synchronization when the sender needs to block. The paradigm is + + if (no_resources()) { + stop_queue(); + barrier(); + if (!no_resources()) + restart_queue(); + } + +netif_subqueue_maybe_stop already handles the paradigm correctly, but +the code split the check for resources in three parts, the first one +(descriptors) followed the protocol, but the other two (completions and +tx_buf) were only doing the first part and so race prone. + +Luckily netif_subqueue_maybe_stop macro already allows you to use a +function to evaluate the start/stop conditions so the fix only requires +the right helper function to evaluate all the conditions at once. + +The patch removes idpf_tx_maybe_stop_common since it's no longer needed +and instead adjusts separately the conditions for singleq and splitq. + +Note that idpf_tx_buf_hw_update doesn't need to check for resources +since that will be covered in idpf_tx_splitq_frame. + +To reproduce: + +Reduce the threshold for pending completions to increase the chances of +hitting this pause by changing your kernel: + +drivers/net/ethernet/intel/idpf/idpf_txrx.h + +-#define IDPF_TX_COMPLQ_OVERFLOW_THRESH(txcq) ((txcq)->desc_count >> 1) ++#define IDPF_TX_COMPLQ_OVERFLOW_THRESH(txcq) ((txcq)->desc_count >> 4) + +Use pktgen to force the host to push small pkts very aggressively: + +./pktgen_sample02_multiqueue.sh -i eth1 -s 100 -6 -d $IP -m $MAC \ + -p 10000-10000 -t 16 -n 0 -v -x -c 64 + +Fixes: 6818c4d5b3c2 ("idpf: add splitq start_xmit") + Reviewed-by: Jacob Keller + Reviewed-by: Madhu Chittim + Signed-off-by: Josh Hay + Signed-off-by: Brian Vazquez + Signed-off-by: Luigi Rizzo + Reviewed-by: Simon Horman + Tested-by: Samuel Salin + Signed-off-by: Tony Nguyen +(cherry picked from commit 7292af042bcf22e2c18b96ed250f78498a5b28ab) + Signed-off-by: Jonathan Maple + +# Conflicts: +# drivers/net/ethernet/intel/idpf/idpf_txrx.c +diff --cc drivers/net/ethernet/intel/idpf/idpf_txrx.c +index 7501a74f8dd9,5cf440e09d0a..000000000000 +--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c ++++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c +@@@ -1971,29 -2184,19 +1971,42 @@@ void idpf_tx_splitq_build_flow_desc(uni + desc->flow.qw1.compl_tag = cpu_to_le16(params->compl_tag); + } + ++ /* Global conditions to tell whether the txq (and related resources) ++ * has room to allow the use of "size" descriptors. ++ */ ++ static int idpf_txq_has_room(struct idpf_tx_queue *tx_q, u32 size) ++ { ++ if (IDPF_DESC_UNUSED(tx_q) < size || ++ IDPF_TX_COMPLQ_PENDING(tx_q->txq_grp) > ++ IDPF_TX_COMPLQ_OVERFLOW_THRESH(tx_q->txq_grp->complq) || ++ IDPF_TX_BUF_RSV_LOW(tx_q)) ++ return 0; ++ return 1; ++ } ++ + +/** + + * idpf_tx_maybe_stop_common - 1st level check for common Tx stop conditions + + * @tx_q: the queue to be checked + + * @size: number of descriptors we want to assure is available + + * + + * Returns 0 if stop is not needed + + */ + +int idpf_tx_maybe_stop_common(struct idpf_queue *tx_q, unsigned int size) + +{ + + struct netdev_queue *nq; + + + + if (likely(IDPF_DESC_UNUSED(tx_q) >= size)) + + return 0; + + + + u64_stats_update_begin(&tx_q->stats_sync); + + u64_stats_inc(&tx_q->q_stats.tx.q_busy); + + u64_stats_update_end(&tx_q->stats_sync); + + + + nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx); + + + + return netif_txq_maybe_stop(nq, IDPF_DESC_UNUSED(tx_q), size, size); + +} + + + /** + * idpf_tx_maybe_stop_splitq - 1st level check for Tx splitq stop conditions + * @tx_q: the queue to be checked +@@@ -2001,33 -2204,17 +2014,41 @@@ + * + * Returns 0 if stop is not needed + */ + -static int idpf_tx_maybe_stop_splitq(struct idpf_tx_queue *tx_q, + +static int idpf_tx_maybe_stop_splitq(struct idpf_queue *tx_q, + unsigned int descs_needed) + { +++<<<<<<< HEAD + + if (idpf_tx_maybe_stop_common(tx_q, descs_needed)) + + goto splitq_stop; + + + + /* If there are too many outstanding completions expected on the + + * completion queue, stop the TX queue to give the device some time to + + * catch up + + */ + + if (unlikely(IDPF_TX_COMPLQ_PENDING(tx_q->txq_grp) > + + IDPF_TX_COMPLQ_OVERFLOW_THRESH(tx_q->txq_grp->complq))) + + goto splitq_stop; + + + + /* Also check for available book keeping buffers; if we are low, stop + + * the queue to wait for more completions + + */ + + if (unlikely(IDPF_TX_BUF_RSV_LOW(tx_q))) + + goto splitq_stop; + + + + return 0; + + + +splitq_stop: +++======= ++ if (netif_subqueue_maybe_stop(tx_q->netdev, tx_q->idx, ++ idpf_txq_has_room(tx_q, descs_needed), ++ 1, 1)) ++ return 0; ++ +++>>>>>>> 7292af042bcf (idpf: fix a race in txq wakeup) + u64_stats_update_begin(&tx_q->stats_sync); + - u64_stats_inc(&tx_q->q_stats.q_busy); + + u64_stats_inc(&tx_q->q_stats.tx.q_busy); + u64_stats_update_end(&tx_q->stats_sync); + + netif_stop_subqueue(tx_q->vport->netdev, tx_q->idx); + + return -EBUSY; + } +@@@ -2047,11 -2234,9 +2068,14 @@@ void idpf_tx_buf_hw_update(struct idpf_ + { + struct netdev_queue *nq; + + - nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx); + + nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx); + tx_q->next_to_use = val; + +++<<<<<<< HEAD + + idpf_tx_maybe_stop_common(tx_q, IDPF_TX_DESC_NEEDED); + + +++======= +++>>>>>>> 7292af042bcf (idpf: fix a race in txq wakeup) + /* Force memory writes to complete before letting h/w + * know there are new descriptors to fetch. (Only + * applicable for weak-ordered memory model archs, +diff --git a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c +index 5c3d34d3de8a..464b98b59418 100644 +--- a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c ++++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c +@@ -356,17 +356,18 @@ static netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, + { + struct idpf_tx_offload_params offload = { }; + struct idpf_tx_buf *first; ++ int csum, tso, needed; + unsigned int count; + __be16 protocol; +- int csum, tso; + + count = idpf_tx_desc_count_required(tx_q, skb); + if (unlikely(!count)) + return idpf_tx_drop_skb(tx_q, skb); + +- if (idpf_tx_maybe_stop_common(tx_q, +- count + IDPF_TX_DESCS_PER_CACHE_LINE + +- IDPF_TX_DESCS_FOR_CTX)) { ++ needed = count + IDPF_TX_DESCS_PER_CACHE_LINE + IDPF_TX_DESCS_FOR_CTX; ++ if (!netif_subqueue_maybe_stop(tx_q->netdev, tx_q->idx, ++ IDPF_DESC_UNUSED(tx_q), ++ needed, needed)) { + idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false); + + return NETDEV_TX_BUSY; +* Unmerged path drivers/net/ethernet/intel/idpf/idpf_txrx.c From b31692eefd581a167883890d4df66542cf9e412e Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:57:05 -0400 Subject: [PATCH 47/53] idpf: avoid mailbox timeout delays during reset jira LE-3467 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Emil Tantilov commit 9dc63d8ff182150d7d7b318ab9389702a2c0a292 Mailbox operations are not possible while the driver is in reset. Operations that require MBX exchange with the control plane will result in long delays if executed while a reset is in progress: ethtool -L combined 8& echo 1 > /sys/class/net//device/reset idpf 0000:83:00.0: HW reset detected idpf 0000:83:00.0: Device HW Reset initiated idpf 0000:83:00.0: Transaction timed-out (op:504 cookie:be00 vc_op:504 salt:be timeout:2000ms) idpf 0000:83:00.0: Transaction timed-out (op:508 cookie:bf00 vc_op:508 salt:bf timeout:2000ms) idpf 0000:83:00.0: Transaction timed-out (op:512 cookie:c000 vc_op:512 salt:c0 timeout:2000ms) idpf 0000:83:00.0: Transaction timed-out (op:510 cookie:c100 vc_op:510 salt:c1 timeout:2000ms) idpf 0000:83:00.0: Transaction timed-out (op:509 cookie:c200 vc_op:509 salt:c2 timeout:60000ms) idpf 0000:83:00.0: Transaction timed-out (op:509 cookie:c300 vc_op:509 salt:c3 timeout:60000ms) idpf 0000:83:00.0: Transaction timed-out (op:505 cookie:c400 vc_op:505 salt:c4 timeout:60000ms) idpf 0000:83:00.0: Failed to configure queues for vport 0, -62 Disable mailbox communication in case of a reset, unless it's done during a driver load, where the virtchnl operations are needed to configure the device. Fixes: 8077c727561aa ("idpf: add controlq init and reset checks") Co-developed-by: Joshua Hay Signed-off-by: Joshua Hay Signed-off-by: Emil Tantilov Reviewed-by: Ahmed Zaki Reviewed-by: Aleksandr Loktionov Reviewed-by: Simon Horman Tested-by: Samuel Salin Signed-off-by: Tony Nguyen (cherry picked from commit 9dc63d8ff182150d7d7b318ab9389702a2c0a292) Signed-off-by: Jonathan Maple --- drivers/net/ethernet/intel/idpf/idpf_lib.c | 18 +++++++++++++----- .../net/ethernet/intel/idpf/idpf_virtchnl.c | 2 +- .../net/ethernet/intel/idpf/idpf_virtchnl.h | 1 + 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index c32685cfbcdb4..aea4da7e54a84 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -1817,11 +1817,19 @@ void idpf_vc_event_task(struct work_struct *work) if (test_bit(IDPF_REMOVE_IN_PROG, adapter->flags)) return; - if (test_bit(IDPF_HR_FUNC_RESET, adapter->flags) || - test_bit(IDPF_HR_DRV_LOAD, adapter->flags)) { - set_bit(IDPF_HR_RESET_IN_PROG, adapter->flags); - idpf_init_hard_reset(adapter); - } + if (test_bit(IDPF_HR_FUNC_RESET, adapter->flags)) + goto func_reset; + + if (test_bit(IDPF_HR_DRV_LOAD, adapter->flags)) + goto drv_load; + + return; + +func_reset: + idpf_vc_xn_shutdown(adapter->vcxn_mngr); +drv_load: + set_bit(IDPF_HR_RESET_IN_PROG, adapter->flags); + idpf_init_hard_reset(adapter); } /** diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c index 2e0c41883ff59..b97e46e494c43 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c @@ -385,7 +385,7 @@ static void idpf_vc_xn_init(struct idpf_vc_xn_manager *vcxn_mngr) * All waiting threads will be woken-up and their transaction aborted. Further * operations on that object will fail. */ -static void idpf_vc_xn_shutdown(struct idpf_vc_xn_manager *vcxn_mngr) +void idpf_vc_xn_shutdown(struct idpf_vc_xn_manager *vcxn_mngr) { int i; diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.h b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.h index 83da5d8da56bf..23271cf0a2160 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.h +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.h @@ -66,5 +66,6 @@ int idpf_send_get_stats_msg(struct idpf_vport *vport); int idpf_send_set_sriov_vfs_msg(struct idpf_adapter *adapter, u16 num_vfs); int idpf_send_get_set_rss_key_msg(struct idpf_vport *vport, bool get); int idpf_send_get_set_rss_lut_msg(struct idpf_vport *vport, bool get); +void idpf_vc_xn_shutdown(struct idpf_vc_xn_manager *vcxn_mngr); #endif /* _IDPF_VIRTCHNL_H_ */ From b93e9c7220b68d0e918e00d2f5b135dfc5a972d9 Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:57:05 -0400 Subject: [PATCH 48/53] idpf: check error for register_netdev() on init jira LE-3467 cve CVE-2025-22116 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Emil Tantilov commit 680811c67906191b237bbafe7dabbbad64649b39 Empty-Commit: Cherry-Pick Conflicts during history rebuild. Will be included in final tarball splat. Ref for failed cherry-pick at: ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/680811c6.failed Current init logic ignores the error code from register_netdev(), which will cause WARN_ON() on attempt to unregister it, if there was one, and there is no info for the user that the creation of the netdev failed. WARNING: CPU: 89 PID: 6902 at net/core/dev.c:11512 unregister_netdevice_many_notify+0x211/0x1a10 ... [ 3707.563641] unregister_netdev+0x1c/0x30 [ 3707.563656] idpf_vport_dealloc+0x5cf/0xce0 [idpf] [ 3707.563684] idpf_deinit_task+0xef/0x160 [idpf] [ 3707.563712] idpf_vc_core_deinit+0x84/0x320 [idpf] [ 3707.563739] idpf_remove+0xbf/0x780 [idpf] [ 3707.563769] pci_device_remove+0xab/0x1e0 [ 3707.563786] device_release_driver_internal+0x371/0x530 [ 3707.563803] driver_detach+0xbf/0x180 [ 3707.563816] bus_remove_driver+0x11b/0x2a0 [ 3707.563829] pci_unregister_driver+0x2a/0x250 Introduce an error check and log the vport number and error code. On removal make sure to check VPORT_REG_NETDEV flag prior to calling unregister and free on the netdev. Add local variables for idx, vport_config and netdev for readability. Fixes: 0fe45467a104 ("idpf: add create vport and netdev configuration") Suggested-by: Tony Nguyen Signed-off-by: Emil Tantilov Reviewed-by: Simon Horman Tested-by: Samuel Salin Signed-off-by: Tony Nguyen (cherry picked from commit 680811c67906191b237bbafe7dabbbad64649b39) Signed-off-by: Jonathan Maple # Conflicts: # drivers/net/ethernet/intel/idpf/idpf_lib.c --- .../680811c6.failed | 76 +++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/680811c6.failed diff --git a/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/680811c6.failed b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/680811c6.failed new file mode 100644 index 0000000000000..22d35c1b96ed3 --- /dev/null +++ b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/680811c6.failed @@ -0,0 +1,76 @@ +idpf: check error for register_netdev() on init + +jira LE-3467 +cve CVE-2025-22116 +Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 +commit-author Emil Tantilov +commit 680811c67906191b237bbafe7dabbbad64649b39 +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/680811c6.failed + +Current init logic ignores the error code from register_netdev(), +which will cause WARN_ON() on attempt to unregister it, if there was one, +and there is no info for the user that the creation of the netdev failed. + +WARNING: CPU: 89 PID: 6902 at net/core/dev.c:11512 unregister_netdevice_many_notify+0x211/0x1a10 +... +[ 3707.563641] unregister_netdev+0x1c/0x30 +[ 3707.563656] idpf_vport_dealloc+0x5cf/0xce0 [idpf] +[ 3707.563684] idpf_deinit_task+0xef/0x160 [idpf] +[ 3707.563712] idpf_vc_core_deinit+0x84/0x320 [idpf] +[ 3707.563739] idpf_remove+0xbf/0x780 [idpf] +[ 3707.563769] pci_device_remove+0xab/0x1e0 +[ 3707.563786] device_release_driver_internal+0x371/0x530 +[ 3707.563803] driver_detach+0xbf/0x180 +[ 3707.563816] bus_remove_driver+0x11b/0x2a0 +[ 3707.563829] pci_unregister_driver+0x2a/0x250 + +Introduce an error check and log the vport number and error code. +On removal make sure to check VPORT_REG_NETDEV flag prior to calling +unregister and free on the netdev. + +Add local variables for idx, vport_config and netdev for readability. + +Fixes: 0fe45467a104 ("idpf: add create vport and netdev configuration") + Suggested-by: Tony Nguyen + Signed-off-by: Emil Tantilov + Reviewed-by: Simon Horman + Tested-by: Samuel Salin + Signed-off-by: Tony Nguyen +(cherry picked from commit 680811c67906191b237bbafe7dabbbad64649b39) + Signed-off-by: Jonathan Maple + +# Conflicts: +# drivers/net/ethernet/intel/idpf/idpf_lib.c +diff --cc drivers/net/ethernet/intel/idpf/idpf_lib.c +index aea4da7e54a8,a055a47449f1..000000000000 +--- a/drivers/net/ethernet/intel/idpf/idpf_lib.c ++++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c +@@@ -928,12 -927,19 +928,24 @@@ static int idpf_stop(struct net_device + static void idpf_decfg_netdev(struct idpf_vport *vport) + { + struct idpf_adapter *adapter = vport->adapter; ++ u16 idx = vport->idx; + +++<<<<<<< HEAD + + unregister_netdev(vport->netdev); + + free_netdev(vport->netdev); +++======= ++ kfree(vport->rx_ptype_lkup); ++ vport->rx_ptype_lkup = NULL; ++ ++ if (test_and_clear_bit(IDPF_VPORT_REG_NETDEV, ++ adapter->vport_config[idx]->flags)) { ++ unregister_netdev(vport->netdev); ++ free_netdev(vport->netdev); ++ } +++>>>>>>> 680811c67906 (idpf: check error for register_netdev() on init) + vport->netdev = NULL; + +- adapter->netdevs[vport->idx] = NULL; ++ adapter->netdevs[idx] = NULL; + } + + /** +* Unmerged path drivers/net/ethernet/intel/idpf/idpf_lib.c From 6510a3de91eb02c80245f9d3cee74738d6053fc3 Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:57:06 -0400 Subject: [PATCH 49/53] net: treat possible_net_t net pointer as an RCU one and add read_pnet_rcu() jira LE-3467 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Jiri Pirko commit 2034d90ae41ae93e30d492ebcf1f06f97a9cfba6 Make the net pointer stored in possible_net_t structure annotated as an RCU pointer. Change the access helpers to treat it as such. Introduce read_pnet_rcu() helper to allow caller to dereference the net pointer under RCU read lock. Signed-off-by: Jiri Pirko Reviewed-by: Simon Horman Signed-off-by: David S. Miller (cherry picked from commit 2034d90ae41ae93e30d492ebcf1f06f97a9cfba6) Signed-off-by: Jonathan Maple --- include/net/net_namespace.h | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 54648a4484b65..abcd6fe82170c 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -342,21 +342,30 @@ static inline int check_net(const struct net *net) typedef struct { #ifdef CONFIG_NET_NS - struct net *net; + struct net __rcu *net; #endif } possible_net_t; static inline void write_pnet(possible_net_t *pnet, struct net *net) { #ifdef CONFIG_NET_NS - pnet->net = net; + rcu_assign_pointer(pnet->net, net); #endif } static inline struct net *read_pnet(const possible_net_t *pnet) { #ifdef CONFIG_NET_NS - return pnet->net; + return rcu_dereference_protected(pnet->net, true); +#else + return &init_net; +#endif +} + +static inline struct net *read_pnet_rcu(possible_net_t *pnet) +{ +#ifdef CONFIG_NET_NS + return rcu_dereference(pnet->net); #else return &init_net; #endif From c92854d6d2e3d5c3a7612a9aae3a6b50151e7473 Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:57:06 -0400 Subject: [PATCH 50/53] net: add dev_net_rcu() helper jira LE-3467 cve CVE-2025-21765 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Eric Dumazet commit 482ad2a4ace2740ca0ff1cbc8f3c7f862f3ab507 dev->nd_net can change, readers should either use rcu_read_lock() or RTNL. We currently use a generic helper, dev_net() with no debugging support. We probably have many hidden bugs. Add dev_net_rcu() helper for callers using rcu_read_lock() protection. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250205155120.1676781-2-edumazet@google.com Signed-off-by: Jakub Kicinski (cherry picked from commit 482ad2a4ace2740ca0ff1cbc8f3c7f862f3ab507) Signed-off-by: Jonathan Maple --- include/linux/netdevice.h | 6 ++++++ include/net/net_namespace.h | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c4fdbef3098f0..cef072e7cfbbc 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2468,6 +2468,12 @@ struct net *dev_net(const struct net_device *dev) return read_pnet(&dev->nd_net); } +static inline +struct net *dev_net_rcu(const struct net_device *dev) +{ + return read_pnet_rcu(&dev->nd_net); +} + static inline void dev_net_set(struct net_device *dev, struct net *net) { diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index abcd6fe82170c..c410b14582952 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -362,7 +362,7 @@ static inline struct net *read_pnet(const possible_net_t *pnet) #endif } -static inline struct net *read_pnet_rcu(possible_net_t *pnet) +static inline struct net *read_pnet_rcu(const possible_net_t *pnet) { #ifdef CONFIG_NET_NS return rcu_dereference(pnet->net); From 59ff24e84dbb012b0a84503ab4f63f60690d2b43 Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:57:06 -0400 Subject: [PATCH 51/53] ipv6: use RCU protection in ip6_default_advmss() jira LE-3467 cve CVE-2025-21765 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Eric Dumazet commit 3c8ffcd248da34fc41e52a46e51505900115fc2a ip6_default_advmss() needs rcu protection to make sure the net structure it reads does not disappear. Fixes: 5578689a4e3c ("[NETNS][IPV6] route6 - make route6 per namespace") Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250205155120.1676781-11-edumazet@google.com Signed-off-by: Jakub Kicinski (cherry picked from commit 3c8ffcd248da34fc41e52a46e51505900115fc2a) Signed-off-by: Jonathan Maple --- net/ipv6/route.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 2995fe4ca54ef..9415dd1aea672 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2763,13 +2763,18 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst) { struct net_device *dev = dst->dev; unsigned int mtu = dst_mtu(dst); - struct net *net = dev_net(dev); + struct net *net; mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); + rcu_read_lock(); + + net = dev_net_rcu(dev); if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss) mtu = net->ipv6.sysctl.ip6_rt_min_advmss; + rcu_read_unlock(); + /* * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. From 77874c7fe949cc2451ff3d6861afc0c7e28c12bb Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:57:07 -0400 Subject: [PATCH 52/53] ndisc: use RCU protection in ndisc_alloc_skb() jira LE-3467 cve CVE-2025-21764 Rebuild_History Non-Buildable kernel-4.18.0-553.58.1.el8_10 commit-author Eric Dumazet commit 628e6d18930bbd21f2d4562228afe27694f66da9 ndisc_alloc_skb() can be called without RTNL or RCU being held. Add RCU protection to avoid possible UAF. Fixes: de09334b9326 ("ndisc: Introduce ndisc_alloc_skb() helper.") Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250207135841.1948589-3-edumazet@google.com Signed-off-by: Jakub Kicinski (cherry picked from commit 628e6d18930bbd21f2d4562228afe27694f66da9) Signed-off-by: Jonathan Maple --- net/ipv6/ndisc.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 3532e0a037d67..492255d3bc48a 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -400,15 +400,11 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev, { int hlen = LL_RESERVED_SPACE(dev); int tlen = dev->needed_tailroom; - struct sock *sk = dev_net(dev)->ipv6.ndisc_sk; struct sk_buff *skb; skb = alloc_skb(hlen + sizeof(struct ipv6hdr) + len + tlen, GFP_ATOMIC); - if (!skb) { - ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb\n", - __func__); + if (!skb) return NULL; - } skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; @@ -419,7 +415,9 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev, /* Manually assign socket ownership as we avoid calling * sock_alloc_send_pskb() to bypass wmem buffer limits */ - skb_set_owner_w(skb, sk); + rcu_read_lock(); + skb_set_owner_w(skb, dev_net_rcu(dev)->ipv6.ndisc_sk); + rcu_read_unlock(); return skb; } From 2e416d1677153d818b7930066693f5b696873dc5 Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 27 Jun 2025 18:57:19 -0400 Subject: [PATCH 53/53] Rebuild rocky8_10 with kernel-4.18.0-553.58.1.el8_10 Rebuild_History BUILDABLE Rebuilding Kernel from rpm changelog with Fuzz Limit: 87.50% Number of commits in upstream range v4.18~1..kernel-mainline: 553283 Number of commits in rpm: 59 Number of commits matched with upstream: 52 (88.14%) Number of commits in upstream but not in rpm: 553231 Number of commits NOT found in upstream: 7 (11.86%) Rebuilding Kernel on Branch rocky8_10_rebuild_kernel-4.18.0-553.58.1.el8_10 for kernel-4.18.0-553.58.1.el8_10 Clean Cherry Picks: 32 (61.54%) Empty Cherry Picks: 20 (38.46%) _______________________________ Full Details Located here: ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/rebuild.details.txt Includes: * git commit header above * Empty Commits with upstream SHA * RPM ChangeLog Entries that could not be matched Individual Empty Commit failures contained in the same containing directory. The git message for empty commits will have the path for the failed commit. File names are the first 8 characters of the upstream SHA --- Makefile.rhelver | 2 +- arch/s390/include/asm/pci.h | 3 +- arch/s390/pci/pci.c | 89 +- arch/s390/pci/pci_bus.h | 7 +- arch/s390/pci/pci_debug.c | 10 +- arch/s390/pci/pci_event.c | 33 +- arch/s390/pci/pci_sysfs.c | 71 +- .../rebuild.details.txt | 43 + configs/kernel-4.18.0-aarch64-debug.config | 1 + configs/kernel-4.18.0-aarch64.config | 1 + configs/kernel-4.18.0-ppc64le-debug.config | 1 + configs/kernel-4.18.0-ppc64le.config | 1 + configs/kernel-4.18.0-x86_64-debug.config | 1 + configs/kernel-4.18.0-x86_64.config | 1 + drivers/net/ethernet/intel/Kconfig | 12 +- drivers/net/ethernet/intel/idpf/Kconfig | 27 + drivers/net/ethernet/intel/idpf/Makefile | 3 +- drivers/net/ethernet/intel/idpf/idpf.h | 6 +- .../net/ethernet/intel/idpf/idpf_ethtool.c | 128 +- drivers/net/ethernet/intel/idpf/idpf_lib.c | 116 +- .../ethernet/intel/idpf/idpf_singleq_txrx.c | 182 ++- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 1034 ++++++++++------- drivers/net/ethernet/intel/idpf/idpf_txrx.h | 455 +++++--- .../net/ethernet/intel/idpf/idpf_virtchnl.c | 88 +- drivers/pci/hotplug/s390_pci_hpc.c | 66 +- 25 files changed, 1376 insertions(+), 1005 deletions(-) create mode 100644 ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/rebuild.details.txt create mode 100644 drivers/net/ethernet/intel/idpf/Kconfig diff --git a/Makefile.rhelver b/Makefile.rhelver index 01e8c44f1a6a2..3059c19f48aef 100644 --- a/Makefile.rhelver +++ b/Makefile.rhelver @@ -12,7 +12,7 @@ RHEL_MINOR = 10 # # Use this spot to avoid future merge conflicts. # Do not trim this comment. -RHEL_RELEASE = 553.56.1 +RHEL_RELEASE = 553.58.1 # # ZSTREAM diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 6652630be19e6..cd4aa64781b74 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -123,6 +123,7 @@ struct zpci_dev { struct kref kref; struct hotplug_slot hotplug_slot; + struct mutex state_lock; /* protect state changes */ enum zpci_state state; u32 fid; /* function ID, used by sclp */ u32 fh; /* function handle, used by insn's */ @@ -146,7 +147,6 @@ struct zpci_dev { u8 reserved : 1; unsigned int devfn; /* DEVFN part of the RID*/ - struct mutex lock; u8 pfip[CLP_PFIP_NR_SEGMENTS]; /* pci function internal path */ u32 uid; /* user defined id */ u8 util_str[CLP_UTIL_STR_LEN]; /* utility string */ @@ -182,6 +182,7 @@ struct zpci_dev { u64 dma_mask; /* DMA address space mask */ /* Function measurement block */ + struct mutex fmb_lock; struct zpci_fmb *fmb; u16 fmb_update; /* update interval */ u16 fmb_length; diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 661c4e000c5bf..8f31b4af7dc0e 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -42,6 +42,7 @@ /* list of all detected zpci devices */ static LIST_HEAD(zpci_list); static DEFINE_SPINLOCK(zpci_list_lock); +static DEFINE_MUTEX(zpci_add_remove_lock); static DECLARE_BITMAP(zpci_domain, ZPCI_DOMAIN_BITMAP_SIZE); static DEFINE_SPINLOCK(zpci_domain_lock); @@ -67,6 +68,15 @@ EXPORT_SYMBOL_GPL(zpci_aipb); struct airq_iv *zpci_aif_sbv; EXPORT_SYMBOL_GPL(zpci_aif_sbv); +void zpci_zdev_put(struct zpci_dev *zdev) +{ + if (!zdev) + return; + mutex_lock(&zpci_add_remove_lock); + kref_put_lock(&zdev->kref, zpci_release_device, &zpci_list_lock); + mutex_unlock(&zpci_add_remove_lock); +} + struct zpci_dev *get_zdev_by_fid(u32 fid) { struct zpci_dev *tmp, *zdev = NULL; @@ -808,12 +818,12 @@ EXPORT_SYMBOL_GPL(zpci_disable_device); * equivalent to its state during boot when first probing a driver. * Consequently after reset the PCI function requires re-initialization via the * common PCI code including re-enabling IRQs via pci_alloc_irq_vectors() - * and enabling the function via e.g.pci_enablde_device_flags().The caller + * and enabling the function via e.g. pci_enable_device_flags(). The caller * must guard against concurrent reset attempts. * * In most cases this function should not be called directly but through * pci_reset_function() or pci_reset_bus() which handle the save/restore and - * locking. + * locking - asserted by lockdep. * * Return: 0 on success and an error value otherwise */ @@ -822,6 +832,7 @@ int zpci_hot_reset_device(struct zpci_dev *zdev) u8 status; int rc; + lockdep_assert_held(&zdev->state_lock); zpci_dbg(3, "rst fid:%x, fh:%x\n", zdev->fid, zdev->fh); if (zdev_enabled(zdev)) { /* Disables device access, DMAs and IRQs (reset state) */ @@ -885,7 +896,8 @@ struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state) goto error; zdev->state = state; - mutex_init(&zdev->lock); + mutex_init(&zdev->state_lock); + mutex_init(&zdev->fmb_lock); mutex_init(&zdev->kzdev_lock); return zdev; @@ -911,6 +923,7 @@ int zpci_add_device(struct zpci_dev *zdev) { int rc; + mutex_lock(&zpci_add_remove_lock); zpci_dbg(1, "add fid:%x, fh:%x, c:%d\n", zdev->fid, zdev->fh, zdev->state); rc = zpci_init_iommu(zdev); if (rc) @@ -924,12 +937,14 @@ int zpci_add_device(struct zpci_dev *zdev) spin_lock(&zpci_list_lock); list_add_tail(&zdev->entry, &zpci_list); spin_unlock(&zpci_list_lock); + mutex_unlock(&zpci_add_remove_lock); return 0; error_destroy_iommu: zpci_destroy_iommu(zdev); error: zpci_dbg(0, "add fid:%x, rc:%d\n", zdev->fid, rc); + mutex_unlock(&zpci_add_remove_lock); return rc; } @@ -946,23 +961,20 @@ bool zpci_is_device_configured(struct zpci_dev *zdev) * @zdev: the zpci_dev that was reserved * * Handle the case that a given zPCI function was reserved by another system. - * After a call to this function the zpci_dev can not be found via - * get_zdev_by_fid() anymore but may still be accessible via existing - * references though it will not be functional anymore. */ void zpci_device_reserved(struct zpci_dev *zdev) { - if (zdev->has_hp_slot) - zpci_exit_slot(zdev); - /* - * Remove device from zpci_list as it is going away. This also - * makes sure we ignore subsequent zPCI events for this device. - */ - spin_lock(&zpci_list_lock); - list_del(&zdev->entry); - spin_unlock(&zpci_list_lock); + lockdep_assert_held(&zdev->state_lock); + /* We may declare the device reserved multiple times */ + if (zdev->state == ZPCI_FN_STATE_RESERVED) + return; zdev->state = ZPCI_FN_STATE_RESERVED; zpci_dbg(3, "rsv fid:%x\n", zdev->fid); + /* + * The underlying device is gone. Allow the zdev to be freed + * as soon as all other references are gone by accounting for + * the removal as a dropped reference. + */ zpci_zdev_put(zdev); } @@ -997,6 +1009,10 @@ int zpci_deconfigure_device(struct zpci_dev *zdev) { int rc; + lockdep_assert_held(&zdev->state_lock); + if (zdev->state != ZPCI_FN_STATE_CONFIGURED) + return 0; + if (zdev->zbus->bus) zpci_bus_remove_device(zdev, false); @@ -1023,39 +1039,24 @@ int zpci_deconfigure_device(struct zpci_dev *zdev) void zpci_release_device(struct kref *kref) { struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref); - int ret; - if (zdev->zbus->bus) - zpci_bus_remove_device(zdev, false); + lockdep_assert_held(&zpci_add_remove_lock); + WARN_ON(zdev->state != ZPCI_FN_STATE_RESERVED); + /* + * We already hold zpci_list_lock thanks to kref_put_lock(). + * This makes sure no new reference can be taken from the list. + */ + list_del(&zdev->entry); + spin_unlock(&zpci_list_lock); - if (zdev->dma_table) - zpci_dma_exit_device(zdev); - if (zdev_enabled(zdev)) - zpci_disable_device(zdev); + if (zdev->has_hp_slot) + zpci_exit_slot(zdev); - switch (zdev->state) { - case ZPCI_FN_STATE_CONFIGURED: - ret = sclp_pci_deconfigure(zdev->fid); - zpci_dbg(3, "deconf fid:%x, rc:%d\n", zdev->fid, ret); - fallthrough; - case ZPCI_FN_STATE_STANDBY: - if (zdev->has_hp_slot) - zpci_exit_slot(zdev); - spin_lock(&zpci_list_lock); - list_del(&zdev->entry); - spin_unlock(&zpci_list_lock); - zpci_dbg(3, "rsv fid:%x\n", zdev->fid); - fallthrough; - case ZPCI_FN_STATE_RESERVED: - if (zdev->has_resources) - zpci_cleanup_bus_resources(zdev); - zpci_bus_device_unregister(zdev); - zpci_destroy_iommu(zdev); - /* fallthrough */ - default: - break; - } + if (zdev->has_resources) + zpci_cleanup_bus_resources(zdev); + zpci_bus_device_unregister(zdev); + zpci_destroy_iommu(zdev); zpci_dbg(3, "rem fid:%x\n", zdev->fid); kfree(zdev); } diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h index af9f0ac79a1b1..3febb3b297c0c 100644 --- a/arch/s390/pci/pci_bus.h +++ b/arch/s390/pci/pci_bus.h @@ -17,11 +17,8 @@ int zpci_bus_scan_device(struct zpci_dev *zdev); void zpci_bus_remove_device(struct zpci_dev *zdev, bool set_error); void zpci_release_device(struct kref *kref); -static inline void zpci_zdev_put(struct zpci_dev *zdev) -{ - if (zdev) - kref_put(&zdev->kref, zpci_release_device); -} + +void zpci_zdev_put(struct zpci_dev *zdev); static inline void zpci_zdev_get(struct zpci_dev *zdev) { diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c index 2dec8136cc160..ddb539044a690 100644 --- a/arch/s390/pci/pci_debug.c +++ b/arch/s390/pci/pci_debug.c @@ -85,9 +85,9 @@ static int pci_perf_show(struct seq_file *m, void *v) if (!zdev) return 0; - mutex_lock(&zdev->lock); + mutex_lock(&zdev->fmb_lock); if (!zdev->fmb) { - mutex_unlock(&zdev->lock); + mutex_unlock(&zdev->fmb_lock); seq_puts(m, "FMB statistics disabled\n"); return 0; } @@ -124,7 +124,7 @@ static int pci_perf_show(struct seq_file *m, void *v) } pci_sw_counter_show(m); - mutex_unlock(&zdev->lock); + mutex_unlock(&zdev->fmb_lock); return 0; } @@ -142,7 +142,7 @@ static ssize_t pci_perf_seq_write(struct file *file, const char __user *ubuf, if (rc) return rc; - mutex_lock(&zdev->lock); + mutex_lock(&zdev->fmb_lock); switch (val) { case 0: rc = zpci_fmb_disable_device(zdev); @@ -151,7 +151,7 @@ static ssize_t pci_perf_seq_write(struct file *file, const char __user *ubuf, rc = zpci_fmb_enable_device(zdev); break; } - mutex_unlock(&zdev->lock); + mutex_unlock(&zdev->fmb_lock); return rc ? rc : count; } diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index dcebe41181954..21f34b2269882 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -260,6 +260,7 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf) zpci_err_hex(ccdf, sizeof(*ccdf)); if (zdev) { + mutex_lock(&zdev->state_lock); zpci_update_fh(zdev, ccdf->fh); if (zdev->zbus->bus) pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); @@ -288,6 +289,8 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf) } pci_dev_put(pdev); no_pdev: + if (zdev) + mutex_unlock(&zdev->state_lock); zpci_zdev_put(zdev); } @@ -314,6 +317,22 @@ static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh) zdev->state = ZPCI_FN_STATE_STANDBY; } +static void zpci_event_reappear(struct zpci_dev *zdev) +{ + lockdep_assert_held(&zdev->state_lock); + /* + * The zdev is in the reserved state. This means that it was presumed to + * go away but there are still undropped references. Now, the platform + * announced its availability again. Bring back the lingering zdev + * to standby. This is safe because we hold a temporary reference + * now so that it won't go away. Account for the re-appearance of the + * underlying device by incrementing the reference count. + */ + zdev->state = ZPCI_FN_STATE_STANDBY; + zpci_zdev_get(zdev); + zpci_dbg(1, "rea fid:%x, fh:%x\n", zdev->fid, zdev->fh); +} + static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) { struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); @@ -322,6 +341,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) zpci_dbg(3, "avl fid:%x, fh:%x, pec:%x\n", ccdf->fid, ccdf->fh, ccdf->pec); + + if (existing_zdev) + mutex_lock(&zdev->state_lock); + switch (ccdf->pec) { case 0x0301: /* Reserved|Standby -> Configured */ if (!zdev) { @@ -333,8 +356,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) break; } } else { + if (zdev->state == ZPCI_FN_STATE_RESERVED) + zpci_event_reappear(zdev); /* the configuration request may be stale */ - if (zdev->state != ZPCI_FN_STATE_STANDBY) + else if (zdev->state != ZPCI_FN_STATE_STANDBY) break; zdev->state = ZPCI_FN_STATE_CONFIGURED; } @@ -350,6 +375,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) break; } } else { + if (zdev->state == ZPCI_FN_STATE_RESERVED) + zpci_event_reappear(zdev); zpci_update_fh(zdev, ccdf->fh); } break; @@ -390,8 +417,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) default: break; } - if (existing_zdev) + if (existing_zdev) { + mutex_unlock(&zdev->state_lock); zpci_zdev_put(zdev); + } } void zpci_event_availability(void *data) diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c index cae280e5c047d..f712aa5bfe75b 100644 --- a/arch/s390/pci/pci_sysfs.c +++ b/arch/s390/pci/pci_sysfs.c @@ -49,6 +49,36 @@ static ssize_t mio_enabled_show(struct device *dev, } static DEVICE_ATTR_RO(mio_enabled); +static int _do_recover(struct pci_dev *pdev, struct zpci_dev *zdev) +{ + int ret; + + pci_stop_and_remove_bus_device(pdev); + if (zdev_enabled(zdev)) { + ret = zpci_disable_device(zdev); + /* + * Due to a z/VM vs LPAR inconsistency in the error + * state the FH may indicate an enabled device but + * disable says the device is already disabled don't + * treat it as an error here. + */ + if (ret == -EINVAL) + ret = 0; + if (ret) + return ret; + } + + ret = zpci_enable_device(zdev); + if (ret) + return ret; + + ret = zpci_dma_init_device(zdev); + if (ret) { + zpci_disable_device(zdev); + } + return ret; +} + static ssize_t recover_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { @@ -69,6 +99,12 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr, */ kn = sysfs_break_active_protection(&dev->kobj, &attr->attr); WARN_ON_ONCE(!kn); + + /* Device needs to be configured and state must not change */ + mutex_lock(&zdev->state_lock); + if (zdev->state != ZPCI_FN_STATE_CONFIGURED) + goto out; + /* device_remove_file() serializes concurrent calls ignoring all but * the first */ @@ -81,39 +117,12 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr, */ pci_lock_rescan_remove(); if (pci_dev_is_added(pdev)) { - pci_stop_and_remove_bus_device(pdev); - if (zdev->dma_table) { - ret = zpci_dma_exit_device(zdev); - if (ret) - goto out; - } - - if (zdev_enabled(zdev)) { - ret = zpci_disable_device(zdev); - /* - * Due to a z/VM vs LPAR inconsistency in the error - * state the FH may indicate an enabled device but - * disable says the device is already disabled don't - * treat it as an error here. - */ - if (ret == -EINVAL) - ret = 0; - if (ret) - goto out; - } - - ret = zpci_enable_device(zdev); - if (ret) - goto out; - ret = zpci_dma_init_device(zdev); - if (ret) { - zpci_disable_device(zdev); - goto out; - } - pci_rescan_bus(zdev->zbus->bus); + _do_recover(pdev, zdev); } -out: pci_unlock_rescan_remove(); + +out: + mutex_unlock(&zdev->state_lock); if (kn) sysfs_unbreak_active_protection(kn); return ret ? ret : count; diff --git a/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/rebuild.details.txt b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/rebuild.details.txt new file mode 100644 index 0000000000000..ae72c440373d1 --- /dev/null +++ b/ciq/ciq_backports/kernel-4.18.0-553.58.1.el8_10/rebuild.details.txt @@ -0,0 +1,43 @@ +Rebuild_History BUILDABLE +Rebuilding Kernel from rpm changelog with Fuzz Limit: 87.50% +Number of commits in upstream range v4.18~1..kernel-mainline: 553283 +Number of commits in rpm: 59 +Number of commits matched with upstream: 52 (88.14%) +Number of commits in upstream but not in rpm: 553231 +Number of commits NOT found in upstream: 7 (11.86%) + +Rebuilding Kernel on Branch rocky8_10_rebuild_kernel-4.18.0-553.58.1.el8_10 for kernel-4.18.0-553.58.1.el8_10 +Clean Cherry Picks: 32 (61.54%) +Empty Cherry Picks: 20 (38.46%) +_______________________________ + +__EMPTY COMMITS__________________________ +0d48566d4b58946c8e1b0baac0347616060a81c9 s390/pci: rename lock member in struct zpci_dev +bcb5d6c769039c8358a2359e7c3ea5d97ce93108 s390/pci: introduce lock to synchronize state of zpci_dev's +6ee600bfbe0f818ffb7748d99e9b0c89d0d9f02a s390/pci: remove hotplug slot when releasing the device +c4a585e952ca403a370586d3f16e8331a7564901 s390/pci: Fix potential double remove of hotplug slot +05a2538f2b48500cf4e8a0a0ce76623cc5bafcf1 s390/pci: Fix duplicate pci_dev_put() in disable_slot() when PF has child VFs +d76f9633296785343d45f85199f4138cb724b6d2 s390/pci: Remove redundant bus removal and disable from zpci_release_device() +47c397844869ad0e6738afb5879c7492f4691122 s390/pci: Prevent self deletion in disable_slot() +4b1815a52d7eb03b3e0e6742c6728bc16a4b2d1d s390/pci: Allow re-add of a reserved but not yet removed device +774a1fa880bc949d88b5ddec9494a13be733dfa8 s390/pci: Serialize device addition and removal +6e9b01909a811555ff3326cf80a5847169c57806 net: remove gfp_mask from napi_alloc_skb() +4309363f19598999b25a1e55fccf688daa4cc220 idpf: remove legacy Page Pool Ethtool stats +e4891e4687c8dd136d80d6c1b857a02931ed6fc8 idpf: split &idpf_queue into 4 strictly-typed queue structures +bf9bf7042a38ebd2485592467772db50605bd4a2 idpf: avoid bloating &idpf_q_vector with big %NR_CPUS +14f662b43bf8c765114f73d184af2702b2280436 idpf: merge singleq and splitq &net_device_ops +f771314d6b75181de7079c3c7d666293e4ed2b22 idpf: compile singleq code only under default-n CONFIG_IDPF_SINGLEQ +3cc88e8405b8d55e0ff035e31971aadd6baee2b6 idpf: fix memleak in vport interrupt configuration +e4b398dd82f5d5867bc5f442c43abc8fba30ed2c idpf: fix netdev Tx queue stop/wake +407e0efdf8baf1672876d5948b75049860a93e59 idpf: fix idpf_vport_splitq_napi_poll() +7292af042bcf22e2c18b96ed250f78498a5b28ab idpf: fix a race in txq wakeup +680811c67906191b237bbafe7dabbbad64649b39 idpf: check error for register_netdev() on init + +__CHANGES NOT IN UPSTREAM________________ +Adding prod certs and changed cert date to 20210620 +Adding Rocky secure boot certs +Fixing vmlinuz removal +Fixing UEFI CA path +Porting to 8.10, debranding and Rocky branding +Fixing pesign_key_name values +redhat/configs: set CONFIG_IDPF_SINGLEQ as disabled diff --git a/configs/kernel-4.18.0-aarch64-debug.config b/configs/kernel-4.18.0-aarch64-debug.config index df6fcef60dd4c..44e707b1232ed 100644 --- a/configs/kernel-4.18.0-aarch64-debug.config +++ b/configs/kernel-4.18.0-aarch64-debug.config @@ -2331,6 +2331,7 @@ CONFIG_ICE_SWITCHDEV=y CONFIG_FM10K=m CONFIG_IGC=m CONFIG_IDPF=m +# CONFIG_IDPF_SINGLEQ is not set CONFIG_NET_VENDOR_MICROSOFT=y # CONFIG_NET_VENDOR_EXAR is not set # CONFIG_JME is not set diff --git a/configs/kernel-4.18.0-aarch64.config b/configs/kernel-4.18.0-aarch64.config index 36a3c741ac3b0..83e68560aaf49 100644 --- a/configs/kernel-4.18.0-aarch64.config +++ b/configs/kernel-4.18.0-aarch64.config @@ -2349,6 +2349,7 @@ CONFIG_ICE_SWITCHDEV=y CONFIG_FM10K=m CONFIG_IGC=m CONFIG_IDPF=m +# CONFIG_IDPF_SINGLEQ is not set CONFIG_NET_VENDOR_MICROSOFT=y # CONFIG_NET_VENDOR_EXAR is not set # CONFIG_JME is not set diff --git a/configs/kernel-4.18.0-ppc64le-debug.config b/configs/kernel-4.18.0-ppc64le-debug.config index 6e66abe6b9d5c..f9e5eb16d2008 100644 --- a/configs/kernel-4.18.0-ppc64le-debug.config +++ b/configs/kernel-4.18.0-ppc64le-debug.config @@ -2292,6 +2292,7 @@ CONFIG_ICE_SWITCHDEV=y CONFIG_FM10K=m CONFIG_IGC=m CONFIG_IDPF=m +# CONFIG_IDPF_SINGLEQ is not set CONFIG_NET_VENDOR_MICROSOFT=y # CONFIG_NET_VENDOR_EXAR is not set # CONFIG_JME is not set diff --git a/configs/kernel-4.18.0-ppc64le.config b/configs/kernel-4.18.0-ppc64le.config index 766aef68c21f5..93525b1e2bdeb 100644 --- a/configs/kernel-4.18.0-ppc64le.config +++ b/configs/kernel-4.18.0-ppc64le.config @@ -2293,6 +2293,7 @@ CONFIG_ICE_SWITCHDEV=y CONFIG_FM10K=m CONFIG_IGC=m CONFIG_IDPF=m +# CONFIG_IDPF_SINGLEQ is not set CONFIG_NET_VENDOR_MICROSOFT=y # CONFIG_NET_VENDOR_EXAR is not set # CONFIG_JME is not set diff --git a/configs/kernel-4.18.0-x86_64-debug.config b/configs/kernel-4.18.0-x86_64-debug.config index 7d362af952c3a..f8fefedd8fd1b 100644 --- a/configs/kernel-4.18.0-x86_64-debug.config +++ b/configs/kernel-4.18.0-x86_64-debug.config @@ -2561,6 +2561,7 @@ CONFIG_ICE_HWTS=y CONFIG_FM10K=m CONFIG_IGC=m CONFIG_IDPF=m +# CONFIG_IDPF_SINGLEQ is not set CONFIG_NET_VENDOR_MICROSOFT=y CONFIG_MICROSOFT_MANA=m # CONFIG_NET_VENDOR_EXAR is not set diff --git a/configs/kernel-4.18.0-x86_64.config b/configs/kernel-4.18.0-x86_64.config index f0aa97c1558ce..4e3de67981430 100644 --- a/configs/kernel-4.18.0-x86_64.config +++ b/configs/kernel-4.18.0-x86_64.config @@ -2559,6 +2559,7 @@ CONFIG_ICE_HWTS=y CONFIG_FM10K=m CONFIG_IGC=m CONFIG_IDPF=m +# CONFIG_IDPF_SINGLEQ is not set CONFIG_NET_VENDOR_MICROSOFT=y CONFIG_MICROSOFT_MANA=m # CONFIG_NET_VENDOR_EXAR is not set diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index ea88ce9ff84f8..a543badfe7de1 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -354,17 +354,7 @@ config IGC To compile this driver as a module, choose M here. The module will be called igc. -config IDPF - tristate "Intel(R) Infrastructure Data Path Function Support" - depends on PCI_MSI - select DIMLIB - select PAGE_POOL - select PAGE_POOL_STATS - help - This driver supports Intel(R) Infrastructure Data Path Function - devices. - To compile this driver as a module, choose M here. The module - will be called idpf. +source "drivers/net/ethernet/intel/idpf/Kconfig" endif # NET_VENDOR_INTEL diff --git a/drivers/net/ethernet/intel/idpf/Kconfig b/drivers/net/ethernet/intel/idpf/Kconfig new file mode 100644 index 0000000000000..9082c16edb7e7 --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/Kconfig @@ -0,0 +1,27 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (C) 2024 Intel Corporation + +config IDPF + tristate "Intel(R) Infrastructure Data Path Function Support" + depends on PCI_MSI + select DIMLIB + select PAGE_POOL + select PAGE_POOL_STATS + help + This driver supports Intel(R) Infrastructure Data Path Function + devices. + + To compile this driver as a module, choose M here. The module + will be called idpf. + +if IDPF + +config IDPF_SINGLEQ + bool "idpf singleq support" + help + This option enables support for legacy single Rx/Tx queues w/no + completion and fill queues. Only enable if you have hardware which + wants to work in this mode as it increases the driver size and adds + runtme checks on hotpath. + +endif # IDPF diff --git a/drivers/net/ethernet/intel/idpf/Makefile b/drivers/net/ethernet/intel/idpf/Makefile index 6844ead2f3acf..2ce01a0b58981 100644 --- a/drivers/net/ethernet/intel/idpf/Makefile +++ b/drivers/net/ethernet/intel/idpf/Makefile @@ -12,7 +12,8 @@ idpf-y := \ idpf_ethtool.o \ idpf_lib.o \ idpf_main.o \ - idpf_singleq_txrx.o \ idpf_txrx.o \ idpf_virtchnl.o \ idpf_vf_dev.o + +idpf-$(CONFIG_IDPF_SINGLEQ) += idpf_singleq_txrx.o diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h index 8f793b642fac5..645829a5720ab 100644 --- a/drivers/net/ethernet/intel/idpf/idpf.h +++ b/drivers/net/ethernet/intel/idpf/idpf.h @@ -17,7 +17,6 @@ struct idpf_vport_max_q; #include #include #include -#include #include "virtchnl2.h" #include "idpf_txrx.h" @@ -304,7 +303,7 @@ struct idpf_vport { u16 num_txq_grp; struct idpf_txq_group *txq_grps; u32 txq_model; - struct idpf_queue **txqs; + struct idpf_tx_queue **txqs; bool crc_enable; u16 num_rxq; @@ -602,7 +601,8 @@ struct idpf_adapter { */ static inline int idpf_is_queue_model_split(u16 q_model) { - return q_model == VIRTCHNL2_QUEUE_MODEL_SPLIT; + return !IS_ENABLED(CONFIG_IDPF_SINGLEQ) || + q_model == VIRTCHNL2_QUEUE_MODEL_SPLIT; } #define idpf_is_cap_ena(adapter, field, flag) \ diff --git a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c index bbb3e263638e6..15b4d246c589f 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c +++ b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c @@ -438,22 +438,24 @@ struct idpf_stats { .stat_offset = offsetof(_type, _stat) \ } -/* Helper macro for defining some statistics related to queues */ -#define IDPF_QUEUE_STAT(_name, _stat) \ - IDPF_STAT(struct idpf_queue, _name, _stat) +/* Helper macros for defining some statistics related to queues */ +#define IDPF_RX_QUEUE_STAT(_name, _stat) \ + IDPF_STAT(struct idpf_rx_queue, _name, _stat) +#define IDPF_TX_QUEUE_STAT(_name, _stat) \ + IDPF_STAT(struct idpf_tx_queue, _name, _stat) /* Stats associated with a Tx queue */ static const struct idpf_stats idpf_gstrings_tx_queue_stats[] = { - IDPF_QUEUE_STAT("pkts", q_stats.tx.packets), - IDPF_QUEUE_STAT("bytes", q_stats.tx.bytes), - IDPF_QUEUE_STAT("lso_pkts", q_stats.tx.lso_pkts), + IDPF_TX_QUEUE_STAT("pkts", q_stats.packets), + IDPF_TX_QUEUE_STAT("bytes", q_stats.bytes), + IDPF_TX_QUEUE_STAT("lso_pkts", q_stats.lso_pkts), }; /* Stats associated with an Rx queue */ static const struct idpf_stats idpf_gstrings_rx_queue_stats[] = { - IDPF_QUEUE_STAT("pkts", q_stats.rx.packets), - IDPF_QUEUE_STAT("bytes", q_stats.rx.bytes), - IDPF_QUEUE_STAT("rx_gro_hw_pkts", q_stats.rx.rsc_pkts), + IDPF_RX_QUEUE_STAT("pkts", q_stats.packets), + IDPF_RX_QUEUE_STAT("bytes", q_stats.bytes), + IDPF_RX_QUEUE_STAT("rx_gro_hw_pkts", q_stats.rsc_pkts), }; #define IDPF_TX_QUEUE_STATS_LEN ARRAY_SIZE(idpf_gstrings_tx_queue_stats) @@ -597,7 +599,6 @@ static int idpf_get_sset_count(struct net_device *netdev, int sset) struct idpf_netdev_priv *np = netdev_priv(netdev); struct idpf_vport_config *vport_config; u16 max_txq, max_rxq; - unsigned int size; if (sset != ETH_SS_STATS) return -EINVAL; @@ -616,10 +617,8 @@ static int idpf_get_sset_count(struct net_device *netdev, int sset) max_txq = vport_config->max_q.max_txq; max_rxq = vport_config->max_q.max_rxq; - size = IDPF_PORT_STATS_LEN + (IDPF_TX_QUEUE_STATS_LEN * max_txq) + + return IDPF_PORT_STATS_LEN + (IDPF_TX_QUEUE_STATS_LEN * max_txq) + (IDPF_RX_QUEUE_STATS_LEN * max_rxq); - - return size; } /** @@ -631,7 +630,7 @@ static int idpf_get_sset_count(struct net_device *netdev, int sset) * Copies the stat data defined by the pointer and stat structure pair into * the memory supplied as data. If the pointer is null, data will be zero'd. */ -static void idpf_add_one_ethtool_stat(u64 *data, void *pstat, +static void idpf_add_one_ethtool_stat(u64 *data, const void *pstat, const struct idpf_stats *stat) { char *p; @@ -669,6 +668,7 @@ static void idpf_add_one_ethtool_stat(u64 *data, void *pstat, * idpf_add_queue_stats - copy queue statistics into supplied buffer * @data: ethtool stats buffer * @q: the queue to copy + * @type: type of the queue * * Queue statistics must be copied while protected by u64_stats_fetch_begin, * so we can't directly use idpf_add_ethtool_stats. Assumes that queue stats @@ -679,19 +679,23 @@ static void idpf_add_one_ethtool_stat(u64 *data, void *pstat, * * This function expects to be called while under rcu_read_lock(). */ -static void idpf_add_queue_stats(u64 **data, struct idpf_queue *q) +static void idpf_add_queue_stats(u64 **data, const void *q, + enum virtchnl2_queue_type type) { + const struct u64_stats_sync *stats_sync; const struct idpf_stats *stats; unsigned int start; unsigned int size; unsigned int i; - if (q->q_type == VIRTCHNL2_QUEUE_TYPE_RX) { + if (type == VIRTCHNL2_QUEUE_TYPE_RX) { size = IDPF_RX_QUEUE_STATS_LEN; stats = idpf_gstrings_rx_queue_stats; + stats_sync = &((const struct idpf_rx_queue *)q)->stats_sync; } else { size = IDPF_TX_QUEUE_STATS_LEN; stats = idpf_gstrings_tx_queue_stats; + stats_sync = &((const struct idpf_tx_queue *)q)->stats_sync; } /* To avoid invalid statistics values, ensure that we keep retrying @@ -699,10 +703,10 @@ static void idpf_add_queue_stats(u64 **data, struct idpf_queue *q) * u64_stats_fetch_retry. */ do { - start = u64_stats_fetch_begin(&q->stats_sync); + start = u64_stats_fetch_begin(stats_sync); for (i = 0; i < size; i++) idpf_add_one_ethtool_stat(&(*data)[i], q, &stats[i]); - } while (u64_stats_fetch_retry(&q->stats_sync, start)); + } while (u64_stats_fetch_retry(stats_sync, start)); /* Once we successfully copy the stats in, update the data pointer */ *data += size; @@ -791,7 +795,7 @@ static void idpf_collect_queue_stats(struct idpf_vport *vport) for (j = 0; j < num_rxq; j++) { u64 hw_csum_err, hsplit, hsplit_hbo, bad_descs; struct idpf_rx_queue_stats *stats; - struct idpf_queue *rxq; + struct idpf_rx_queue *rxq; unsigned int start; if (idpf_is_queue_model_split(vport->rxq_model)) @@ -805,7 +809,7 @@ static void idpf_collect_queue_stats(struct idpf_vport *vport) do { start = u64_stats_fetch_begin(&rxq->stats_sync); - stats = &rxq->q_stats.rx; + stats = &rxq->q_stats; hw_csum_err = u64_stats_read(&stats->hw_csum_err); hsplit = u64_stats_read(&stats->hsplit_pkts); hsplit_hbo = u64_stats_read(&stats->hsplit_buf_ovf); @@ -826,7 +830,7 @@ static void idpf_collect_queue_stats(struct idpf_vport *vport) for (j = 0; j < txq_grp->num_txq; j++) { u64 linearize, qbusy, skb_drops, dma_map_errs; - struct idpf_queue *txq = txq_grp->txqs[j]; + struct idpf_tx_queue *txq = txq_grp->txqs[j]; struct idpf_tx_queue_stats *stats; unsigned int start; @@ -836,7 +840,7 @@ static void idpf_collect_queue_stats(struct idpf_vport *vport) do { start = u64_stats_fetch_begin(&txq->stats_sync); - stats = &txq->q_stats.tx; + stats = &txq->q_stats; linearize = u64_stats_read(&stats->linearize); qbusy = u64_stats_read(&stats->q_busy); skb_drops = u64_stats_read(&stats->skb_drops); @@ -893,12 +897,12 @@ static void idpf_get_ethtool_stats(struct net_device *netdev, qtype = VIRTCHNL2_QUEUE_TYPE_TX; for (j = 0; j < txq_grp->num_txq; j++, total++) { - struct idpf_queue *txq = txq_grp->txqs[j]; + struct idpf_tx_queue *txq = txq_grp->txqs[j]; if (!txq) idpf_add_empty_queue_stats(&data, qtype); else - idpf_add_queue_stats(&data, txq); + idpf_add_queue_stats(&data, txq, qtype); } } @@ -926,7 +930,7 @@ static void idpf_get_ethtool_stats(struct net_device *netdev, num_rxq = rxq_grp->singleq.num_rxq; for (j = 0; j < num_rxq; j++, total++) { - struct idpf_queue *rxq; + struct idpf_rx_queue *rxq; if (is_splitq) rxq = &rxq_grp->splitq.rxq_sets[j]->rxq; @@ -935,7 +939,7 @@ static void idpf_get_ethtool_stats(struct net_device *netdev, if (!rxq) idpf_add_empty_queue_stats(&data, qtype); else - idpf_add_queue_stats(&data, rxq); + idpf_add_queue_stats(&data, rxq, qtype); } } @@ -948,60 +952,64 @@ static void idpf_get_ethtool_stats(struct net_device *netdev, } /** - * idpf_find_rxq - find rxq from q index + * idpf_find_rxq_vec - find rxq vector from q index * @vport: virtual port associated to queue * @q_num: q index used to find queue * - * returns pointer to rx queue + * returns pointer to rx vector */ -static struct idpf_queue *idpf_find_rxq(struct idpf_vport *vport, int q_num) +static struct idpf_q_vector *idpf_find_rxq_vec(const struct idpf_vport *vport, + int q_num) { int q_grp, q_idx; if (!idpf_is_queue_model_split(vport->rxq_model)) - return vport->rxq_grps->singleq.rxqs[q_num]; + return vport->rxq_grps->singleq.rxqs[q_num]->q_vector; q_grp = q_num / IDPF_DFLT_SPLITQ_RXQ_PER_GROUP; q_idx = q_num % IDPF_DFLT_SPLITQ_RXQ_PER_GROUP; - return &vport->rxq_grps[q_grp].splitq.rxq_sets[q_idx]->rxq; + return vport->rxq_grps[q_grp].splitq.rxq_sets[q_idx]->rxq.q_vector; } /** - * idpf_find_txq - find txq from q index + * idpf_find_txq_vec - find txq vector from q index * @vport: virtual port associated to queue * @q_num: q index used to find queue * - * returns pointer to tx queue + * returns pointer to tx vector */ -static struct idpf_queue *idpf_find_txq(struct idpf_vport *vport, int q_num) +static struct idpf_q_vector *idpf_find_txq_vec(const struct idpf_vport *vport, + int q_num) { int q_grp; if (!idpf_is_queue_model_split(vport->txq_model)) - return vport->txqs[q_num]; + return vport->txqs[q_num]->q_vector; q_grp = q_num / IDPF_DFLT_SPLITQ_TXQ_PER_GROUP; - return vport->txq_grps[q_grp].complq; + return vport->txq_grps[q_grp].complq->q_vector; } /** * __idpf_get_q_coalesce - get ITR values for specific queue * @ec: ethtool structure to fill with driver's coalesce settings - * @q: quuee of Rx or Tx + * @q_vector: queue vector corresponding to this queue + * @type: queue type */ static void __idpf_get_q_coalesce(struct ethtool_coalesce *ec, - struct idpf_queue *q) + const struct idpf_q_vector *q_vector, + enum virtchnl2_queue_type type) { - if (q->q_type == VIRTCHNL2_QUEUE_TYPE_RX) { + if (type == VIRTCHNL2_QUEUE_TYPE_RX) { ec->use_adaptive_rx_coalesce = - IDPF_ITR_IS_DYNAMIC(q->q_vector->rx_intr_mode); - ec->rx_coalesce_usecs = q->q_vector->rx_itr_value; + IDPF_ITR_IS_DYNAMIC(q_vector->rx_intr_mode); + ec->rx_coalesce_usecs = q_vector->rx_itr_value; } else { ec->use_adaptive_tx_coalesce = - IDPF_ITR_IS_DYNAMIC(q->q_vector->tx_intr_mode); - ec->tx_coalesce_usecs = q->q_vector->tx_itr_value; + IDPF_ITR_IS_DYNAMIC(q_vector->tx_intr_mode); + ec->tx_coalesce_usecs = q_vector->tx_itr_value; } } @@ -1017,8 +1025,8 @@ static int idpf_get_q_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec, u32 q_num) { - struct idpf_netdev_priv *np = netdev_priv(netdev); - struct idpf_vport *vport; + const struct idpf_netdev_priv *np = netdev_priv(netdev); + const struct idpf_vport *vport; int err = 0; idpf_vport_ctrl_lock(netdev); @@ -1033,10 +1041,12 @@ static int idpf_get_q_coalesce(struct net_device *netdev, } if (q_num < vport->num_rxq) - __idpf_get_q_coalesce(ec, idpf_find_rxq(vport, q_num)); + __idpf_get_q_coalesce(ec, idpf_find_rxq_vec(vport, q_num), + VIRTCHNL2_QUEUE_TYPE_RX); if (q_num < vport->num_txq) - __idpf_get_q_coalesce(ec, idpf_find_txq(vport, q_num)); + __idpf_get_q_coalesce(ec, idpf_find_txq_vec(vport, q_num), + VIRTCHNL2_QUEUE_TYPE_TX); unlock_mutex: idpf_vport_ctrl_unlock(netdev); @@ -1080,16 +1090,15 @@ static int idpf_get_per_q_coalesce(struct net_device *netdev, u32 q_num, /** * __idpf_set_q_coalesce - set ITR values for specific queue * @ec: ethtool structure from user to update ITR settings - * @q: queue for which itr values has to be set + * @qv: queue vector for which itr values has to be set * @is_rxq: is queue type rx * * Returns 0 on success, negative otherwise. */ -static int __idpf_set_q_coalesce(struct ethtool_coalesce *ec, - struct idpf_queue *q, bool is_rxq) +static int __idpf_set_q_coalesce(const struct ethtool_coalesce *ec, + struct idpf_q_vector *qv, bool is_rxq) { u32 use_adaptive_coalesce, coalesce_usecs; - struct idpf_q_vector *qv = q->q_vector; bool is_dim_ena = false; u16 itr_val; @@ -1105,7 +1114,7 @@ static int __idpf_set_q_coalesce(struct ethtool_coalesce *ec, itr_val = qv->tx_itr_value; } if (coalesce_usecs != itr_val && use_adaptive_coalesce) { - netdev_err(q->vport->netdev, "Cannot set coalesce usecs if adaptive enabled\n"); + netdev_err(qv->vport->netdev, "Cannot set coalesce usecs if adaptive enabled\n"); return -EINVAL; } @@ -1114,7 +1123,7 @@ static int __idpf_set_q_coalesce(struct ethtool_coalesce *ec, return 0; if (coalesce_usecs > IDPF_ITR_MAX) { - netdev_err(q->vport->netdev, + netdev_err(qv->vport->netdev, "Invalid value, %d-usecs range is 0-%d\n", coalesce_usecs, IDPF_ITR_MAX); @@ -1123,7 +1132,7 @@ static int __idpf_set_q_coalesce(struct ethtool_coalesce *ec, if (coalesce_usecs % 2) { coalesce_usecs--; - netdev_info(q->vport->netdev, + netdev_info(qv->vport->netdev, "HW only supports even ITR values, ITR rounded to %d\n", coalesce_usecs); } @@ -1162,15 +1171,16 @@ static int __idpf_set_q_coalesce(struct ethtool_coalesce *ec, * * Return 0 on success, and negative on failure */ -static int idpf_set_q_coalesce(struct idpf_vport *vport, - struct ethtool_coalesce *ec, +static int idpf_set_q_coalesce(const struct idpf_vport *vport, + const struct ethtool_coalesce *ec, int q_num, bool is_rxq) { - struct idpf_queue *q; + struct idpf_q_vector *qv; - q = is_rxq ? idpf_find_rxq(vport, q_num) : idpf_find_txq(vport, q_num); + qv = is_rxq ? idpf_find_rxq_vec(vport, q_num) : + idpf_find_txq_vec(vport, q_num); - if (q && __idpf_set_q_coalesce(ec, q, is_rxq)) + if (qv && __idpf_set_q_coalesce(ec, qv, is_rxq)) return -EINVAL; return 0; diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index aea4da7e54a84..74cce7bb9a568 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -4,8 +4,7 @@ #include "idpf.h" #include "idpf_virtchnl.h" -static const struct net_device_ops idpf_netdev_ops_splitq; -static const struct net_device_ops idpf_netdev_ops_singleq; +static const struct net_device_ops idpf_netdev_ops; /** * idpf_init_vector_stack - Fill the MSIX vector stack with vector index @@ -69,7 +68,7 @@ static void idpf_deinit_vector_stack(struct idpf_adapter *adapter) static void idpf_mb_intr_rel_irq(struct idpf_adapter *adapter) { clear_bit(IDPF_MB_INTR_MODE, adapter->flags); - free_irq(adapter->msix_entries[0].vector, adapter); + kfree(free_irq(adapter->msix_entries[0].vector, adapter)); queue_delayed_work(adapter->mbx_wq, &adapter->mbx_task, 0); } @@ -124,15 +123,14 @@ static void idpf_mb_irq_enable(struct idpf_adapter *adapter) */ static int idpf_mb_intr_req_irq(struct idpf_adapter *adapter) { - struct idpf_q_vector *mb_vector = &adapter->mb_vector; int irq_num, mb_vidx = 0, err; + char *name; irq_num = adapter->msix_entries[mb_vidx].vector; - mb_vector->name = kasprintf(GFP_KERNEL, "%s-%s-%d", - dev_driver_string(&adapter->pdev->dev), - "Mailbox", mb_vidx); - err = request_irq(irq_num, adapter->irq_mb_handler, 0, - mb_vector->name, adapter); + name = kasprintf(GFP_KERNEL, "%s-%s-%d", + dev_driver_string(&adapter->pdev->dev), + "Mailbox", mb_vidx); + err = request_irq(irq_num, adapter->irq_mb_handler, 0, name, adapter); if (err) { dev_err(&adapter->pdev->dev, "IRQ request for mailbox failed, error: %d\n", err); @@ -769,10 +767,7 @@ static int idpf_cfg_netdev(struct idpf_vport *vport) } /* assign netdev_ops */ - if (idpf_is_queue_model_split(vport->txq_model)) - netdev->netdev_ops = &idpf_netdev_ops_splitq; - else - netdev->netdev_ops = &idpf_netdev_ops_singleq; + netdev->netdev_ops = &idpf_netdev_ops; /* setup watchdog timeout value to be 5 second */ netdev->watchdog_timeo = 5 * HZ; @@ -928,12 +923,16 @@ static int idpf_stop(struct net_device *netdev) static void idpf_decfg_netdev(struct idpf_vport *vport) { struct idpf_adapter *adapter = vport->adapter; + u16 idx = vport->idx; - unregister_netdev(vport->netdev); - free_netdev(vport->netdev); + if (test_and_clear_bit(IDPF_VPORT_REG_NETDEV, + adapter->vport_config[idx]->flags)) { + unregister_netdev(vport->netdev); + free_netdev(vport->netdev); + } vport->netdev = NULL; - adapter->netdevs[vport->idx] = NULL; + adapter->netdevs[idx] = NULL; } /** @@ -1304,14 +1303,14 @@ static void idpf_rx_init_buf_tail(struct idpf_vport *vport) if (idpf_is_queue_model_split(vport->rxq_model)) { for (j = 0; j < vport->num_bufqs_per_qgrp; j++) { - struct idpf_queue *q = + const struct idpf_buf_queue *q = &grp->splitq.bufq_sets[j].bufq; writel(q->next_to_alloc, q->tail); } } else { for (j = 0; j < grp->singleq.num_rxq; j++) { - struct idpf_queue *q = + const struct idpf_rx_queue *q = grp->singleq.rxqs[j]; writel(q->next_to_alloc, q->tail); @@ -1537,13 +1536,22 @@ void idpf_init_task(struct work_struct *work) } for (index = 0; index < adapter->max_vports; index++) { - if (adapter->netdevs[index] && - !test_bit(IDPF_VPORT_REG_NETDEV, - adapter->vport_config[index]->flags)) { - register_netdev(adapter->netdevs[index]); - set_bit(IDPF_VPORT_REG_NETDEV, - adapter->vport_config[index]->flags); + struct net_device *netdev = adapter->netdevs[index]; + struct idpf_vport_config *vport_config; + + vport_config = adapter->vport_config[index]; + + if (!netdev || + test_bit(IDPF_VPORT_REG_NETDEV, vport_config->flags)) + continue; + + err = register_netdev(netdev); + if (err) { + dev_err(&pdev->dev, "failed to register netdev for vport %d: %pe\n", + index, ERR_PTR(err)); + continue; } + set_bit(IDPF_VPORT_REG_NETDEV, vport_config->flags); } /* As all the required vports are created, clear the reset flag @@ -1847,7 +1855,7 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport, enum idpf_vport_state current_state = np->state; struct idpf_adapter *adapter = vport->adapter; struct idpf_vport *new_vport; - int err, i; + int err; /* If the system is low on memory, we can end up in bad state if we * free all the memory for queue resources and try to allocate them @@ -1918,46 +1926,6 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport, */ memcpy(vport, new_vport, offsetof(struct idpf_vport, link_up)); - /* Since idpf_vport_queues_alloc was called with new_port, the queue - * back pointers are currently pointing to the local new_vport. Reset - * the backpointers to the original vport here - */ - for (i = 0; i < vport->num_txq_grp; i++) { - struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; - int j; - - tx_qgrp->vport = vport; - for (j = 0; j < tx_qgrp->num_txq; j++) - tx_qgrp->txqs[j]->vport = vport; - - if (idpf_is_queue_model_split(vport->txq_model)) - tx_qgrp->complq->vport = vport; - } - - for (i = 0; i < vport->num_rxq_grp; i++) { - struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; - struct idpf_queue *q; - u16 num_rxq; - int j; - - rx_qgrp->vport = vport; - for (j = 0; j < vport->num_bufqs_per_qgrp; j++) - rx_qgrp->splitq.bufq_sets[j].bufq.vport = vport; - - if (idpf_is_queue_model_split(vport->rxq_model)) - num_rxq = rx_qgrp->splitq.num_rxq_sets; - else - num_rxq = rx_qgrp->singleq.num_rxq; - - for (j = 0; j < num_rxq; j++) { - if (idpf_is_queue_model_split(vport->rxq_model)) - q = &rx_qgrp->splitq.rxq_sets[j]->rxq; - else - q = rx_qgrp->singleq.rxqs[j]; - q->vport = vport; - } - } - if (reset_cause == IDPF_SR_Q_CHANGE) idpf_vport_alloc_vec_indexes(vport); @@ -2393,24 +2361,10 @@ void idpf_free_dma_mem(struct idpf_hw *hw, struct idpf_dma_mem *mem) mem->pa = 0; } -static const struct net_device_ops idpf_netdev_ops_splitq = { - .ndo_open = idpf_open, - .ndo_stop = idpf_stop, - .ndo_start_xmit = idpf_tx_splitq_start, - .ndo_features_check = idpf_features_check, - .ndo_set_rx_mode = idpf_set_rx_mode, - .ndo_validate_addr = eth_validate_addr, - .ndo_set_mac_address = idpf_set_mac, - .ndo_change_mtu = idpf_change_mtu, - .ndo_get_stats64 = idpf_get_stats64, - .ndo_set_features = idpf_set_features, - .ndo_tx_timeout = idpf_tx_timeout, -}; - -static const struct net_device_ops idpf_netdev_ops_singleq = { +static const struct net_device_ops idpf_netdev_ops = { .ndo_open = idpf_open, .ndo_stop = idpf_stop, - .ndo_start_xmit = idpf_tx_singleq_start, + .ndo_start_xmit = idpf_tx_start, .ndo_features_check = idpf_features_check, .ndo_set_rx_mode = idpf_set_rx_mode, .ndo_validate_addr = eth_validate_addr, diff --git a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c index 5c3d34d3de8ac..3cf493a51b8d0 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c @@ -186,7 +186,7 @@ static int idpf_tx_singleq_csum(struct sk_buff *skb, * and gets a physical address for each memory location and programs * it and the length into the transmit base mode descriptor. */ -static void idpf_tx_singleq_map(struct idpf_queue *tx_q, +static void idpf_tx_singleq_map(struct idpf_tx_queue *tx_q, struct idpf_tx_buf *first, struct idpf_tx_offload_params *offloads) { @@ -210,7 +210,7 @@ static void idpf_tx_singleq_map(struct idpf_queue *tx_q, dma = dma_map_single(tx_q->dev, skb->data, size, DMA_TO_DEVICE); /* write each descriptor with CRC bit */ - if (tx_q->vport->crc_enable) + if (idpf_queue_has(CRC_EN, tx_q)) td_cmd |= IDPF_TX_DESC_CMD_ICRC; for (frag = &skb_shinfo(skb)->frags[0];; frag++) { @@ -285,7 +285,7 @@ static void idpf_tx_singleq_map(struct idpf_queue *tx_q, /* set next_to_watch value indicating a packet is present */ first->next_to_watch = tx_desc; - nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx); + nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx); netdev_tx_sent_queue(nq, first->bytecount); idpf_tx_buf_hw_update(tx_q, i, netdev_xmit_more()); @@ -299,7 +299,7 @@ static void idpf_tx_singleq_map(struct idpf_queue *tx_q, * ring entry to reflect that this index is a context descriptor */ static struct idpf_base_tx_ctx_desc * -idpf_tx_singleq_get_ctx_desc(struct idpf_queue *txq) +idpf_tx_singleq_get_ctx_desc(struct idpf_tx_queue *txq) { struct idpf_base_tx_ctx_desc *ctx_desc; int ntu = txq->next_to_use; @@ -320,7 +320,7 @@ idpf_tx_singleq_get_ctx_desc(struct idpf_queue *txq) * @txq: queue to send buffer on * @offload: offload parameter structure **/ -static void idpf_tx_singleq_build_ctx_desc(struct idpf_queue *txq, +static void idpf_tx_singleq_build_ctx_desc(struct idpf_tx_queue *txq, struct idpf_tx_offload_params *offload) { struct idpf_base_tx_ctx_desc *desc = idpf_tx_singleq_get_ctx_desc(txq); @@ -333,7 +333,7 @@ static void idpf_tx_singleq_build_ctx_desc(struct idpf_queue *txq, qw1 |= FIELD_PREP(IDPF_TXD_CTX_QW1_MSS_M, offload->mss); u64_stats_update_begin(&txq->stats_sync); - u64_stats_inc(&txq->q_stats.tx.lso_pkts); + u64_stats_inc(&txq->q_stats.lso_pkts); u64_stats_update_end(&txq->stats_sync); } @@ -351,24 +351,29 @@ static void idpf_tx_singleq_build_ctx_desc(struct idpf_queue *txq, * * Returns NETDEV_TX_OK if sent, else an error code */ -static netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, - struct idpf_queue *tx_q) +netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, + struct idpf_tx_queue *tx_q) { struct idpf_tx_offload_params offload = { }; struct idpf_tx_buf *first; + int csum, tso, needed; unsigned int count; __be16 protocol; - int csum, tso; count = idpf_tx_desc_count_required(tx_q, skb); if (unlikely(!count)) return idpf_tx_drop_skb(tx_q, skb); - if (idpf_tx_maybe_stop_common(tx_q, - count + IDPF_TX_DESCS_PER_CACHE_LINE + - IDPF_TX_DESCS_FOR_CTX)) { + needed = count + IDPF_TX_DESCS_PER_CACHE_LINE + IDPF_TX_DESCS_FOR_CTX; + if (!netif_subqueue_maybe_stop(tx_q->netdev, tx_q->idx, + IDPF_DESC_UNUSED(tx_q), + needed, needed)) { idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false); + u64_stats_update_begin(&tx_q->stats_sync); + u64_stats_inc(&tx_q->q_stats.q_busy); + u64_stats_update_end(&tx_q->stats_sync); + return NETDEV_TX_BUSY; } @@ -408,33 +413,6 @@ static netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, return idpf_tx_drop_skb(tx_q, skb); } -/** - * idpf_tx_singleq_start - Selects the right Tx queue to send buffer - * @skb: send buffer - * @netdev: network interface device structure - * - * Returns NETDEV_TX_OK if sent, else an error code - */ -netdev_tx_t idpf_tx_singleq_start(struct sk_buff *skb, - struct net_device *netdev) -{ - struct idpf_vport *vport = idpf_netdev_to_vport(netdev); - struct idpf_queue *tx_q; - - tx_q = vport->txqs[skb_get_queue_mapping(skb)]; - - /* hardware can't handle really short frames, hardware padding works - * beyond this point - */ - if (skb_put_padto(skb, IDPF_TX_MIN_PKT_LEN)) { - idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false); - - return NETDEV_TX_OK; - } - - return idpf_tx_singleq_frame(skb, tx_q); -} - /** * idpf_tx_singleq_clean - Reclaim resources from queue * @tx_q: Tx queue to clean @@ -442,16 +420,15 @@ netdev_tx_t idpf_tx_singleq_start(struct sk_buff *skb, * @cleaned: returns number of packets cleaned * */ -static bool idpf_tx_singleq_clean(struct idpf_queue *tx_q, int napi_budget, +static bool idpf_tx_singleq_clean(struct idpf_tx_queue *tx_q, int napi_budget, int *cleaned) { - unsigned int budget = tx_q->vport->compln_clean_budget; unsigned int total_bytes = 0, total_pkts = 0; struct idpf_base_tx_desc *tx_desc; + u32 budget = tx_q->clean_budget; s16 ntc = tx_q->next_to_clean; struct idpf_netdev_priv *np; struct idpf_tx_buf *tx_buf; - struct idpf_vport *vport; struct netdev_queue *nq; bool dont_wake; @@ -550,16 +527,15 @@ static bool idpf_tx_singleq_clean(struct idpf_queue *tx_q, int napi_budget, *cleaned += total_pkts; u64_stats_update_begin(&tx_q->stats_sync); - u64_stats_add(&tx_q->q_stats.tx.packets, total_pkts); - u64_stats_add(&tx_q->q_stats.tx.bytes, total_bytes); + u64_stats_add(&tx_q->q_stats.packets, total_pkts); + u64_stats_add(&tx_q->q_stats.bytes, total_bytes); u64_stats_update_end(&tx_q->stats_sync); - vport = tx_q->vport; - np = netdev_priv(vport->netdev); - nq = netdev_get_tx_queue(vport->netdev, tx_q->idx); + np = netdev_priv(tx_q->netdev); + nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx); dont_wake = np->state != __IDPF_VPORT_UP || - !netif_carrier_ok(vport->netdev); + !netif_carrier_ok(tx_q->netdev); __netif_txq_completed_wake(nq, total_pkts, total_bytes, IDPF_DESC_UNUSED(tx_q), IDPF_TX_WAKE_THRESH, dont_wake); @@ -584,7 +560,7 @@ static bool idpf_tx_singleq_clean_all(struct idpf_q_vector *q_vec, int budget, budget_per_q = num_txq ? max(budget / num_txq, 1) : 0; for (i = 0; i < num_txq; i++) { - struct idpf_queue *q; + struct idpf_tx_queue *q; q = q_vec->tx[i]; clean_complete &= idpf_tx_singleq_clean(q, budget_per_q, @@ -614,14 +590,9 @@ static bool idpf_rx_singleq_test_staterr(const union virtchnl2_rx_desc *rx_desc, /** * idpf_rx_singleq_is_non_eop - process handling of non-EOP buffers - * @rxq: Rx ring being processed * @rx_desc: Rx descriptor for current buffer - * @skb: Current socket buffer containing buffer in progress - * @ntc: next to clean */ -static bool idpf_rx_singleq_is_non_eop(struct idpf_queue *rxq, - union virtchnl2_rx_desc *rx_desc, - struct sk_buff *skb, u16 ntc) +static bool idpf_rx_singleq_is_non_eop(const union virtchnl2_rx_desc *rx_desc) { /* if we are the last buffer then there is nothing else to do */ if (likely(idpf_rx_singleq_test_staterr(rx_desc, IDPF_RXD_EOF_SINGLEQ))) @@ -639,7 +610,7 @@ static bool idpf_rx_singleq_is_non_eop(struct idpf_queue *rxq, * * skb->protocol must be set before this function is called */ -static void idpf_rx_singleq_csum(struct idpf_queue *rxq, struct sk_buff *skb, +static void idpf_rx_singleq_csum(struct idpf_rx_queue *rxq, struct sk_buff *skb, struct idpf_rx_csum_decoded *csum_bits, u16 ptype) { @@ -647,14 +618,14 @@ static void idpf_rx_singleq_csum(struct idpf_queue *rxq, struct sk_buff *skb, bool ipv4, ipv6; /* check if Rx checksum is enabled */ - if (unlikely(!(rxq->vport->netdev->features & NETIF_F_RXCSUM))) + if (unlikely(!(rxq->netdev->features & NETIF_F_RXCSUM))) return; /* check if HW has decoded the packet and checksum */ if (unlikely(!(csum_bits->l3l4p))) return; - decoded = rxq->vport->rx_ptype_lkup[ptype]; + decoded = rxq->rx_ptype_lkup[ptype]; if (unlikely(!(decoded.known && decoded.outer_ip))) return; @@ -707,7 +678,7 @@ static void idpf_rx_singleq_csum(struct idpf_queue *rxq, struct sk_buff *skb, checksum_fail: u64_stats_update_begin(&rxq->stats_sync); - u64_stats_inc(&rxq->q_stats.rx.hw_csum_err); + u64_stats_inc(&rxq->q_stats.hw_csum_err); u64_stats_update_end(&rxq->stats_sync); } @@ -721,9 +692,9 @@ static void idpf_rx_singleq_csum(struct idpf_queue *rxq, struct sk_buff *skb, * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte * descriptor writeback format. **/ -static void idpf_rx_singleq_base_csum(struct idpf_queue *rx_q, +static void idpf_rx_singleq_base_csum(struct idpf_rx_queue *rx_q, struct sk_buff *skb, - union virtchnl2_rx_desc *rx_desc, + const union virtchnl2_rx_desc *rx_desc, u16 ptype) { struct idpf_rx_csum_decoded csum_bits; @@ -761,9 +732,9 @@ static void idpf_rx_singleq_base_csum(struct idpf_queue *rx_q, * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible * descriptor writeback format. **/ -static void idpf_rx_singleq_flex_csum(struct idpf_queue *rx_q, +static void idpf_rx_singleq_flex_csum(struct idpf_rx_queue *rx_q, struct sk_buff *skb, - union virtchnl2_rx_desc *rx_desc, + const union virtchnl2_rx_desc *rx_desc, u16 ptype) { struct idpf_rx_csum_decoded csum_bits; @@ -801,14 +772,14 @@ static void idpf_rx_singleq_flex_csum(struct idpf_queue *rx_q, * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte * descriptor writeback format. **/ -static void idpf_rx_singleq_base_hash(struct idpf_queue *rx_q, +static void idpf_rx_singleq_base_hash(struct idpf_rx_queue *rx_q, struct sk_buff *skb, - union virtchnl2_rx_desc *rx_desc, + const union virtchnl2_rx_desc *rx_desc, struct idpf_rx_ptype_decoded *decoded) { u64 mask, qw1; - if (unlikely(!(rx_q->vport->netdev->features & NETIF_F_RXHASH))) + if (unlikely(!(rx_q->netdev->features & NETIF_F_RXHASH))) return; mask = VIRTCHNL2_RX_BASE_DESC_FLTSTAT_RSS_HASH_M; @@ -831,12 +802,12 @@ static void idpf_rx_singleq_base_hash(struct idpf_queue *rx_q, * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible * descriptor writeback format. **/ -static void idpf_rx_singleq_flex_hash(struct idpf_queue *rx_q, +static void idpf_rx_singleq_flex_hash(struct idpf_rx_queue *rx_q, struct sk_buff *skb, - union virtchnl2_rx_desc *rx_desc, + const union virtchnl2_rx_desc *rx_desc, struct idpf_rx_ptype_decoded *decoded) { - if (unlikely(!(rx_q->vport->netdev->features & NETIF_F_RXHASH))) + if (unlikely(!(rx_q->netdev->features & NETIF_F_RXHASH))) return; if (FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_RSS_VALID_M, @@ -857,16 +828,16 @@ static void idpf_rx_singleq_flex_hash(struct idpf_queue *rx_q, * order to populate the hash, checksum, VLAN, protocol, and * other fields within the skb. */ -static void idpf_rx_singleq_process_skb_fields(struct idpf_queue *rx_q, - struct sk_buff *skb, - union virtchnl2_rx_desc *rx_desc, - u16 ptype) +static void +idpf_rx_singleq_process_skb_fields(struct idpf_rx_queue *rx_q, + struct sk_buff *skb, + const union virtchnl2_rx_desc *rx_desc, + u16 ptype) { - struct idpf_rx_ptype_decoded decoded = - rx_q->vport->rx_ptype_lkup[ptype]; + struct idpf_rx_ptype_decoded decoded = rx_q->rx_ptype_lkup[ptype]; /* modifies the skb - consumes the enet header */ - skb->protocol = eth_type_trans(skb, rx_q->vport->netdev); + skb->protocol = eth_type_trans(skb, rx_q->netdev); /* Check if we're using base mode descriptor IDs */ if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M) { @@ -878,6 +849,22 @@ static void idpf_rx_singleq_process_skb_fields(struct idpf_queue *rx_q, } } +/** + * idpf_rx_buf_hw_update - Store the new tail and head values + * @rxq: queue to bump + * @val: new head index + */ +static void idpf_rx_buf_hw_update(struct idpf_rx_queue *rxq, u32 val) +{ + rxq->next_to_use = val; + + if (unlikely(!rxq->tail)) + return; + + /* writel has an implicit memory barrier */ + writel(val, rxq->tail); +} + /** * idpf_rx_singleq_buf_hw_alloc_all - Replace used receive buffers * @rx_q: queue for which the hw buffers are allocated @@ -885,7 +872,7 @@ static void idpf_rx_singleq_process_skb_fields(struct idpf_queue *rx_q, * * Returns false if all allocations were successful, true if any fail */ -bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rx_q, +bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rx_q, u16 cleaned_count) { struct virtchnl2_singleq_rx_buf_desc *desc; @@ -896,7 +883,7 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rx_q, return false; desc = &rx_q->single_buf[nta]; - buf = &rx_q->rx_buf.buf[nta]; + buf = &rx_q->rx_buf[nta]; do { dma_addr_t addr; @@ -916,7 +903,7 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rx_q, nta++; if (unlikely(nta == rx_q->desc_count)) { desc = &rx_q->single_buf[0]; - buf = rx_q->rx_buf.buf; + buf = rx_q->rx_buf; nta = 0; } @@ -933,7 +920,6 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rx_q, /** * idpf_rx_singleq_extract_base_fields - Extract fields from the Rx descriptor - * @rx_q: Rx descriptor queue * @rx_desc: the descriptor to process * @fields: storage for extracted values * @@ -943,9 +929,9 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rx_q, * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte * descriptor writeback format. */ -static void idpf_rx_singleq_extract_base_fields(struct idpf_queue *rx_q, - union virtchnl2_rx_desc *rx_desc, - struct idpf_rx_extracted *fields) +static void +idpf_rx_singleq_extract_base_fields(const union virtchnl2_rx_desc *rx_desc, + struct idpf_rx_extracted *fields) { u64 qword; @@ -957,7 +943,6 @@ static void idpf_rx_singleq_extract_base_fields(struct idpf_queue *rx_q, /** * idpf_rx_singleq_extract_flex_fields - Extract fields from the Rx descriptor - * @rx_q: Rx descriptor queue * @rx_desc: the descriptor to process * @fields: storage for extracted values * @@ -967,9 +952,9 @@ static void idpf_rx_singleq_extract_base_fields(struct idpf_queue *rx_q, * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible * descriptor writeback format. */ -static void idpf_rx_singleq_extract_flex_fields(struct idpf_queue *rx_q, - union virtchnl2_rx_desc *rx_desc, - struct idpf_rx_extracted *fields) +static void +idpf_rx_singleq_extract_flex_fields(const union virtchnl2_rx_desc *rx_desc, + struct idpf_rx_extracted *fields) { fields->size = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_M, le16_to_cpu(rx_desc->flex_nic_wb.pkt_len)); @@ -984,14 +969,15 @@ static void idpf_rx_singleq_extract_flex_fields(struct idpf_queue *rx_q, * @fields: storage for extracted values * */ -static void idpf_rx_singleq_extract_fields(struct idpf_queue *rx_q, - union virtchnl2_rx_desc *rx_desc, - struct idpf_rx_extracted *fields) +static void +idpf_rx_singleq_extract_fields(const struct idpf_rx_queue *rx_q, + const union virtchnl2_rx_desc *rx_desc, + struct idpf_rx_extracted *fields) { if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M) - idpf_rx_singleq_extract_base_fields(rx_q, rx_desc, fields); + idpf_rx_singleq_extract_base_fields(rx_desc, fields); else - idpf_rx_singleq_extract_flex_fields(rx_q, rx_desc, fields); + idpf_rx_singleq_extract_flex_fields(rx_desc, fields); } /** @@ -1001,7 +987,7 @@ static void idpf_rx_singleq_extract_fields(struct idpf_queue *rx_q, * * Returns true if there's any budget left (e.g. the clean is finished) */ -static int idpf_rx_singleq_clean(struct idpf_queue *rx_q, int budget) +static int idpf_rx_singleq_clean(struct idpf_rx_queue *rx_q, int budget) { unsigned int total_rx_bytes = 0, total_rx_pkts = 0; struct sk_buff *skb = rx_q->skb; @@ -1036,7 +1022,7 @@ static int idpf_rx_singleq_clean(struct idpf_queue *rx_q, int budget) idpf_rx_singleq_extract_fields(rx_q, rx_desc, &fields); - rx_buf = &rx_q->rx_buf.buf[ntc]; + rx_buf = &rx_q->rx_buf[ntc]; if (!fields.size) { idpf_rx_put_page(rx_buf); goto skip_data; @@ -1058,7 +1044,7 @@ static int idpf_rx_singleq_clean(struct idpf_queue *rx_q, int budget) cleaned_count++; /* skip if it is non EOP desc */ - if (idpf_rx_singleq_is_non_eop(rx_q, rx_desc, skb, ntc)) + if (idpf_rx_singleq_is_non_eop(rx_desc)) continue; #define IDPF_RXD_ERR_S FIELD_PREP(VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M, \ @@ -1099,8 +1085,8 @@ static int idpf_rx_singleq_clean(struct idpf_queue *rx_q, int budget) failure = idpf_rx_singleq_buf_hw_alloc_all(rx_q, cleaned_count); u64_stats_update_begin(&rx_q->stats_sync); - u64_stats_add(&rx_q->q_stats.rx.packets, total_rx_pkts); - u64_stats_add(&rx_q->q_stats.rx.bytes, total_rx_bytes); + u64_stats_add(&rx_q->q_stats.packets, total_rx_pkts); + u64_stats_add(&rx_q->q_stats.bytes, total_rx_bytes); u64_stats_update_end(&rx_q->stats_sync); /* guarantee a trip back through this routine if there was a failure */ @@ -1127,7 +1113,7 @@ static bool idpf_rx_singleq_clean_all(struct idpf_q_vector *q_vec, int budget, */ budget_per_q = num_rxq ? max(budget / num_rxq, 1) : 0; for (i = 0; i < num_rxq; i++) { - struct idpf_queue *rxq = q_vec->rx[i]; + struct idpf_rx_queue *rxq = q_vec->rx[i]; int pkts_cleaned_per_q; pkts_cleaned_per_q = idpf_rx_singleq_clean(rxq, budget_per_q); diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 7501a74f8dd92..e163e54d1c31e 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -4,6 +4,9 @@ #include "idpf.h" #include "idpf_virtchnl.h" +static bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs, + unsigned int count); + /** * idpf_buf_lifo_push - push a buffer pointer onto stack * @stack: pointer to stack struct @@ -60,7 +63,8 @@ void idpf_tx_timeout(struct net_device *netdev, unsigned int txqueue) * @tx_q: the queue that owns the buffer * @tx_buf: the buffer to free */ -static void idpf_tx_buf_rel(struct idpf_queue *tx_q, struct idpf_tx_buf *tx_buf) +static void idpf_tx_buf_rel(struct idpf_tx_queue *tx_q, + struct idpf_tx_buf *tx_buf) { if (tx_buf->skb) { if (dma_unmap_len(tx_buf, len)) @@ -86,8 +90,9 @@ static void idpf_tx_buf_rel(struct idpf_queue *tx_q, struct idpf_tx_buf *tx_buf) * idpf_tx_buf_rel_all - Free any empty Tx buffers * @txq: queue to be cleaned */ -static void idpf_tx_buf_rel_all(struct idpf_queue *txq) +static void idpf_tx_buf_rel_all(struct idpf_tx_queue *txq) { + struct idpf_buf_lifo *buf_stack; u16 i; /* Buffers already cleared, nothing to do */ @@ -101,38 +106,57 @@ static void idpf_tx_buf_rel_all(struct idpf_queue *txq) kfree(txq->tx_buf); txq->tx_buf = NULL; - if (!txq->buf_stack.bufs) + if (!idpf_queue_has(FLOW_SCH_EN, txq)) + return; + + buf_stack = &txq->stash->buf_stack; + if (!buf_stack->bufs) return; - for (i = 0; i < txq->buf_stack.size; i++) - kfree(txq->buf_stack.bufs[i]); + for (i = 0; i < buf_stack->size; i++) + kfree(buf_stack->bufs[i]); - kfree(txq->buf_stack.bufs); - txq->buf_stack.bufs = NULL; + kfree(buf_stack->bufs); + buf_stack->bufs = NULL; } /** * idpf_tx_desc_rel - Free Tx resources per queue * @txq: Tx descriptor ring for a specific queue - * @bufq: buffer q or completion q * * Free all transmit software resources */ -static void idpf_tx_desc_rel(struct idpf_queue *txq, bool bufq) +static void idpf_tx_desc_rel(struct idpf_tx_queue *txq) { - if (bufq) - idpf_tx_buf_rel_all(txq); + idpf_tx_buf_rel_all(txq); if (!txq->desc_ring) return; dmam_free_coherent(txq->dev, txq->size, txq->desc_ring, txq->dma); txq->desc_ring = NULL; - txq->next_to_alloc = 0; txq->next_to_use = 0; txq->next_to_clean = 0; } +/** + * idpf_compl_desc_rel - Free completion resources per queue + * @complq: completion queue + * + * Free all completion software resources. + */ +static void idpf_compl_desc_rel(struct idpf_compl_queue *complq) +{ + if (!complq->comp) + return; + + dma_free_coherent(complq->netdev->dev.parent, complq->size, + complq->comp, complq->dma); + complq->comp = NULL; + complq->next_to_use = 0; + complq->next_to_clean = 0; +} + /** * idpf_tx_desc_rel_all - Free Tx Resources for All Queues * @vport: virtual port structure @@ -150,10 +174,10 @@ static void idpf_tx_desc_rel_all(struct idpf_vport *vport) struct idpf_txq_group *txq_grp = &vport->txq_grps[i]; for (j = 0; j < txq_grp->num_txq; j++) - idpf_tx_desc_rel(txq_grp->txqs[j], true); + idpf_tx_desc_rel(txq_grp->txqs[j]); if (idpf_is_queue_model_split(vport->txq_model)) - idpf_tx_desc_rel(txq_grp->complq, false); + idpf_compl_desc_rel(txq_grp->complq); } } @@ -163,8 +187,9 @@ static void idpf_tx_desc_rel_all(struct idpf_vport *vport) * * Returns 0 on success, negative on failure */ -static int idpf_tx_buf_alloc_all(struct idpf_queue *tx_q) +static int idpf_tx_buf_alloc_all(struct idpf_tx_queue *tx_q) { + struct idpf_buf_lifo *buf_stack; int buf_size; int i; @@ -180,22 +205,26 @@ static int idpf_tx_buf_alloc_all(struct idpf_queue *tx_q) for (i = 0; i < tx_q->desc_count; i++) tx_q->tx_buf[i].compl_tag = IDPF_SPLITQ_TX_INVAL_COMPL_TAG; + if (!idpf_queue_has(FLOW_SCH_EN, tx_q)) + return 0; + + buf_stack = &tx_q->stash->buf_stack; + /* Initialize tx buf stack for out-of-order completions if * flow scheduling offload is enabled */ - tx_q->buf_stack.bufs = - kcalloc(tx_q->desc_count, sizeof(struct idpf_tx_stash *), - GFP_KERNEL); - if (!tx_q->buf_stack.bufs) + buf_stack->bufs = kcalloc(tx_q->desc_count, sizeof(*buf_stack->bufs), + GFP_KERNEL); + if (!buf_stack->bufs) return -ENOMEM; - tx_q->buf_stack.size = tx_q->desc_count; - tx_q->buf_stack.top = tx_q->desc_count; + buf_stack->size = tx_q->desc_count; + buf_stack->top = tx_q->desc_count; for (i = 0; i < tx_q->desc_count; i++) { - tx_q->buf_stack.bufs[i] = kzalloc(sizeof(*tx_q->buf_stack.bufs[i]), - GFP_KERNEL); - if (!tx_q->buf_stack.bufs[i]) + buf_stack->bufs[i] = kzalloc(sizeof(*buf_stack->bufs[i]), + GFP_KERNEL); + if (!buf_stack->bufs[i]) return -ENOMEM; } @@ -204,28 +233,22 @@ static int idpf_tx_buf_alloc_all(struct idpf_queue *tx_q) /** * idpf_tx_desc_alloc - Allocate the Tx descriptors + * @vport: vport to allocate resources for * @tx_q: the tx ring to set up - * @bufq: buffer or completion queue * * Returns 0 on success, negative on failure */ -static int idpf_tx_desc_alloc(struct idpf_queue *tx_q, bool bufq) +static int idpf_tx_desc_alloc(const struct idpf_vport *vport, + struct idpf_tx_queue *tx_q) { struct device *dev = tx_q->dev; - u32 desc_sz; int err; - if (bufq) { - err = idpf_tx_buf_alloc_all(tx_q); - if (err) - goto err_alloc; - - desc_sz = sizeof(struct idpf_base_tx_desc); - } else { - desc_sz = sizeof(struct idpf_splitq_tx_compl_desc); - } + err = idpf_tx_buf_alloc_all(tx_q); + if (err) + goto err_alloc; - tx_q->size = tx_q->desc_count * desc_sz; + tx_q->size = tx_q->desc_count * sizeof(*tx_q->base_tx); /* Allocate descriptors also round up to nearest 4K */ tx_q->size = ALIGN(tx_q->size, 4096); @@ -238,19 +261,43 @@ static int idpf_tx_desc_alloc(struct idpf_queue *tx_q, bool bufq) goto err_alloc; } - tx_q->next_to_alloc = 0; tx_q->next_to_use = 0; tx_q->next_to_clean = 0; - set_bit(__IDPF_Q_GEN_CHK, tx_q->flags); + idpf_queue_set(GEN_CHK, tx_q); return 0; err_alloc: - idpf_tx_desc_rel(tx_q, bufq); + idpf_tx_desc_rel(tx_q); return err; } +/** + * idpf_compl_desc_alloc - allocate completion descriptors + * @vport: vport to allocate resources for + * @complq: completion queue to set up + * + * Return: 0 on success, -errno on failure. + */ +static int idpf_compl_desc_alloc(const struct idpf_vport *vport, + struct idpf_compl_queue *complq) +{ + complq->size = array_size(complq->desc_count, sizeof(*complq->comp)); + + complq->comp = dma_alloc_coherent(complq->netdev->dev.parent, + complq->size, &complq->dma, + GFP_KERNEL); + if (!complq->comp) + return -ENOMEM; + + complq->next_to_use = 0; + complq->next_to_clean = 0; + idpf_queue_set(GEN_CHK, complq); + + return 0; +} + /** * idpf_tx_desc_alloc_all - allocate all queues Tx resources * @vport: virtual port private structure @@ -259,7 +306,6 @@ static int idpf_tx_desc_alloc(struct idpf_queue *tx_q, bool bufq) */ static int idpf_tx_desc_alloc_all(struct idpf_vport *vport) { - struct device *dev = &vport->adapter->pdev->dev; int err = 0; int i, j; @@ -268,13 +314,14 @@ static int idpf_tx_desc_alloc_all(struct idpf_vport *vport) */ for (i = 0; i < vport->num_txq_grp; i++) { for (j = 0; j < vport->txq_grps[i].num_txq; j++) { - struct idpf_queue *txq = vport->txq_grps[i].txqs[j]; + struct idpf_tx_queue *txq = vport->txq_grps[i].txqs[j]; u8 gen_bits = 0; u16 bufidx_mask; - err = idpf_tx_desc_alloc(txq, true); + err = idpf_tx_desc_alloc(vport, txq); if (err) { - dev_err(dev, "Allocation for Tx Queue %u failed\n", + pci_err(vport->adapter->pdev, + "Allocation for Tx Queue %u failed\n", i); goto err_out; } @@ -312,9 +359,10 @@ static int idpf_tx_desc_alloc_all(struct idpf_vport *vport) continue; /* Setup completion queues */ - err = idpf_tx_desc_alloc(vport->txq_grps[i].complq, false); + err = idpf_compl_desc_alloc(vport, vport->txq_grps[i].complq); if (err) { - dev_err(dev, "Allocation for Tx Completion Queue %u failed\n", + pci_err(vport->adapter->pdev, + "Allocation for Tx Completion Queue %u failed\n", i); goto err_out; } @@ -329,15 +377,14 @@ static int idpf_tx_desc_alloc_all(struct idpf_vport *vport) /** * idpf_rx_page_rel - Release an rx buffer page - * @rxq: the queue that owns the buffer * @rx_buf: the buffer to free */ -static void idpf_rx_page_rel(struct idpf_queue *rxq, struct idpf_rx_buf *rx_buf) +static void idpf_rx_page_rel(struct idpf_rx_buf *rx_buf) { if (unlikely(!rx_buf->page)) return; - page_pool_put_full_page(rxq->pp, rx_buf->page, false); + page_pool_put_full_page(rx_buf->pp, rx_buf->page, false); rx_buf->page = NULL; rx_buf->page_offset = 0; @@ -345,54 +392,76 @@ static void idpf_rx_page_rel(struct idpf_queue *rxq, struct idpf_rx_buf *rx_buf) /** * idpf_rx_hdr_buf_rel_all - Release header buffer memory - * @rxq: queue to use + * @bufq: queue to use + * @dev: device to free DMA memory */ -static void idpf_rx_hdr_buf_rel_all(struct idpf_queue *rxq) +static void idpf_rx_hdr_buf_rel_all(struct idpf_buf_queue *bufq, + struct device *dev) { - struct idpf_adapter *adapter = rxq->vport->adapter; - - dma_free_coherent(&adapter->pdev->dev, - rxq->desc_count * IDPF_HDR_BUF_SIZE, - rxq->rx_buf.hdr_buf_va, - rxq->rx_buf.hdr_buf_pa); - rxq->rx_buf.hdr_buf_va = NULL; + dma_free_coherent(dev, bufq->desc_count * IDPF_HDR_BUF_SIZE, + bufq->rx_buf.hdr_buf_va, bufq->rx_buf.hdr_buf_pa); + bufq->rx_buf.hdr_buf_va = NULL; } /** - * idpf_rx_buf_rel_all - Free all Rx buffer resources for a queue - * @rxq: queue to be cleaned + * idpf_rx_buf_rel_bufq - Free all Rx buffer resources for a buffer queue + * @bufq: queue to be cleaned + * @dev: device to free DMA memory */ -static void idpf_rx_buf_rel_all(struct idpf_queue *rxq) +static void idpf_rx_buf_rel_bufq(struct idpf_buf_queue *bufq, + struct device *dev) { - u16 i; + u32 i; /* queue already cleared, nothing to do */ - if (!rxq->rx_buf.buf) + if (!bufq->rx_buf.buf) return; /* Free all the bufs allocated and given to hw on Rx queue */ - for (i = 0; i < rxq->desc_count; i++) - idpf_rx_page_rel(rxq, &rxq->rx_buf.buf[i]); + for (i = 0; i < bufq->desc_count; i++) + idpf_rx_page_rel(&bufq->rx_buf.buf[i]); + + if (idpf_queue_has(HSPLIT_EN, bufq)) + idpf_rx_hdr_buf_rel_all(bufq, dev); + + page_pool_destroy(bufq->pp); + bufq->pp = NULL; + + kfree(bufq->rx_buf.buf); + bufq->rx_buf.buf = NULL; +} - if (rxq->rx_hsplit_en) - idpf_rx_hdr_buf_rel_all(rxq); +/** + * idpf_rx_buf_rel_all - Free all Rx buffer resources for a receive queue + * @rxq: queue to be cleaned + */ +static void idpf_rx_buf_rel_all(struct idpf_rx_queue *rxq) +{ + u32 i; + + if (!rxq->rx_buf) + return; + + for (i = 0; i < rxq->desc_count; i++) + idpf_rx_page_rel(&rxq->rx_buf[i]); page_pool_destroy(rxq->pp); rxq->pp = NULL; - kfree(rxq->rx_buf.buf); - rxq->rx_buf.buf = NULL; + kfree(rxq->rx_buf); + rxq->rx_buf = NULL; } /** * idpf_rx_desc_rel - Free a specific Rx q resources * @rxq: queue to clean the resources from - * @bufq: buffer q or completion q - * @q_model: single or split q model + * @dev: device to free DMA memory + * @model: single or split queue model * * Free a specific rx queue resources */ -static void idpf_rx_desc_rel(struct idpf_queue *rxq, bool bufq, s32 q_model) +static void idpf_rx_desc_rel(struct idpf_rx_queue *rxq, struct device *dev, + u32 model) { if (!rxq) return; @@ -402,7 +471,7 @@ static void idpf_rx_desc_rel(struct idpf_queue *rxq, bool bufq, s32 q_model) rxq->skb = NULL; } - if (bufq || !idpf_is_queue_model_split(q_model)) + if (!idpf_is_queue_model_split(model)) idpf_rx_buf_rel_all(rxq); rxq->next_to_alloc = 0; @@ -411,10 +480,34 @@ static void idpf_rx_desc_rel(struct idpf_queue *rxq, bool bufq, s32 q_model) if (!rxq->desc_ring) return; - dmam_free_coherent(rxq->dev, rxq->size, rxq->desc_ring, rxq->dma); + dmam_free_coherent(dev, rxq->size, rxq->desc_ring, rxq->dma); rxq->desc_ring = NULL; } +/** + * idpf_rx_desc_rel_bufq - free buffer queue resources + * @bufq: buffer queue to clean the resources from + * @dev: device to free DMA memory + */ +static void idpf_rx_desc_rel_bufq(struct idpf_buf_queue *bufq, + struct device *dev) +{ + if (!bufq) + return; + + idpf_rx_buf_rel_bufq(bufq, dev); + + bufq->next_to_alloc = 0; + bufq->next_to_clean = 0; + bufq->next_to_use = 0; + + if (!bufq->split_buf) + return; + + dma_free_coherent(dev, bufq->size, bufq->split_buf, bufq->dma); + bufq->split_buf = NULL; +} + /** * idpf_rx_desc_rel_all - Free Rx Resources for All Queues * @vport: virtual port structure @@ -423,6 +516,7 @@ static void idpf_rx_desc_rel(struct idpf_queue *rxq, bool bufq, s32 q_model) */ static void idpf_rx_desc_rel_all(struct idpf_vport *vport) { + struct device *dev = &vport->adapter->pdev->dev; struct idpf_rxq_group *rx_qgrp; u16 num_rxq; int i, j; @@ -435,15 +529,15 @@ static void idpf_rx_desc_rel_all(struct idpf_vport *vport) if (!idpf_is_queue_model_split(vport->rxq_model)) { for (j = 0; j < rx_qgrp->singleq.num_rxq; j++) - idpf_rx_desc_rel(rx_qgrp->singleq.rxqs[j], - false, vport->rxq_model); + idpf_rx_desc_rel(rx_qgrp->singleq.rxqs[j], dev, + VIRTCHNL2_QUEUE_MODEL_SINGLE); continue; } num_rxq = rx_qgrp->splitq.num_rxq_sets; for (j = 0; j < num_rxq; j++) idpf_rx_desc_rel(&rx_qgrp->splitq.rxq_sets[j]->rxq, - false, vport->rxq_model); + dev, VIRTCHNL2_QUEUE_MODEL_SPLIT); if (!rx_qgrp->splitq.bufq_sets) continue; @@ -452,44 +546,40 @@ static void idpf_rx_desc_rel_all(struct idpf_vport *vport) struct idpf_bufq_set *bufq_set = &rx_qgrp->splitq.bufq_sets[j]; - idpf_rx_desc_rel(&bufq_set->bufq, true, - vport->rxq_model); + idpf_rx_desc_rel_bufq(&bufq_set->bufq, dev); } } } /** * idpf_rx_buf_hw_update - Store the new tail and head values - * @rxq: queue to bump + * @bufq: queue to bump * @val: new head index */ -void idpf_rx_buf_hw_update(struct idpf_queue *rxq, u32 val) +static void idpf_rx_buf_hw_update(struct idpf_buf_queue *bufq, u32 val) { - rxq->next_to_use = val; + bufq->next_to_use = val; - if (unlikely(!rxq->tail)) + if (unlikely(!bufq->tail)) return; /* writel has an implicit memory barrier */ - writel(val, rxq->tail); + writel(val, bufq->tail); } /** * idpf_rx_hdr_buf_alloc_all - Allocate memory for header buffers - * @rxq: ring to use + * @bufq: ring to use * * Returns 0 on success, negative on failure. */ -static int idpf_rx_hdr_buf_alloc_all(struct idpf_queue *rxq) +static int idpf_rx_hdr_buf_alloc_all(struct idpf_buf_queue *bufq) { - struct idpf_adapter *adapter = rxq->vport->adapter; - - rxq->rx_buf.hdr_buf_va = - dma_alloc_coherent(&adapter->pdev->dev, - IDPF_HDR_BUF_SIZE * rxq->desc_count, - &rxq->rx_buf.hdr_buf_pa, - GFP_KERNEL); - if (!rxq->rx_buf.hdr_buf_va) + bufq->rx_buf.hdr_buf_va = + dma_alloc_coherent(bufq->q_vector->vport->netdev->dev.parent, + IDPF_HDR_BUF_SIZE * bufq->desc_count, + &bufq->rx_buf.hdr_buf_pa, GFP_KERNEL); + if (!bufq->rx_buf.hdr_buf_va) return -ENOMEM; return 0; @@ -502,19 +592,20 @@ static int idpf_rx_hdr_buf_alloc_all(struct idpf_queue *rxq) */ static void idpf_rx_post_buf_refill(struct idpf_sw_queue *refillq, u16 buf_id) { - u16 nta = refillq->next_to_alloc; + u32 nta = refillq->next_to_use; /* store the buffer ID and the SW maintained GEN bit to the refillq */ refillq->ring[nta] = FIELD_PREP(IDPF_RX_BI_BUFID_M, buf_id) | FIELD_PREP(IDPF_RX_BI_GEN_M, - test_bit(__IDPF_Q_GEN_CHK, refillq->flags)); + idpf_queue_has(GEN_CHK, refillq)); if (unlikely(++nta == refillq->desc_count)) { nta = 0; - change_bit(__IDPF_Q_GEN_CHK, refillq->flags); + idpf_queue_change(GEN_CHK, refillq); } - refillq->next_to_alloc = nta; + + refillq->next_to_use = nta; } /** @@ -524,7 +615,7 @@ static void idpf_rx_post_buf_refill(struct idpf_sw_queue *refillq, u16 buf_id) * * Returns false if buffer could not be allocated, true otherwise. */ -static bool idpf_rx_post_buf_desc(struct idpf_queue *bufq, u16 buf_id) +static bool idpf_rx_post_buf_desc(struct idpf_buf_queue *bufq, u16 buf_id) { struct virtchnl2_splitq_rx_buf_desc *splitq_rx_desc = NULL; u16 nta = bufq->next_to_alloc; @@ -534,11 +625,10 @@ static bool idpf_rx_post_buf_desc(struct idpf_queue *bufq, u16 buf_id) splitq_rx_desc = &bufq->split_buf[nta]; buf = &bufq->rx_buf.buf[buf_id]; - if (bufq->rx_hsplit_en) { + if (idpf_queue_has(HSPLIT_EN, bufq)) splitq_rx_desc->hdr_addr = cpu_to_le64(bufq->rx_buf.hdr_buf_pa + (u32)buf_id * IDPF_HDR_BUF_SIZE); - } addr = idpf_alloc_page(bufq->pp, buf, bufq->rx_buf_size); if (unlikely(addr == DMA_MAPPING_ERROR)) @@ -562,7 +652,8 @@ static bool idpf_rx_post_buf_desc(struct idpf_queue *bufq, u16 buf_id) * * Returns true if @working_set bufs were posted successfully, false otherwise. */ -static bool idpf_rx_post_init_bufs(struct idpf_queue *bufq, u16 working_set) +static bool idpf_rx_post_init_bufs(struct idpf_buf_queue *bufq, + u16 working_set) { int i; @@ -571,45 +662,88 @@ static bool idpf_rx_post_init_bufs(struct idpf_queue *bufq, u16 working_set) return false; } - idpf_rx_buf_hw_update(bufq, - bufq->next_to_alloc & ~(bufq->rx_buf_stride - 1)); + idpf_rx_buf_hw_update(bufq, ALIGN_DOWN(bufq->next_to_alloc, + IDPF_RX_BUF_STRIDE)); return true; } /** * idpf_rx_create_page_pool - Create a page pool - * @rxbufq: RX queue to create page pool for + * @napi: NAPI of the associated queue vector + * @count: queue descriptor count * * Returns &page_pool on success, casted -errno on failure */ -static struct page_pool *idpf_rx_create_page_pool(struct idpf_queue *rxbufq) +static struct page_pool *idpf_rx_create_page_pool(struct napi_struct *napi, + u32 count) { struct page_pool_params pp = { .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV, .order = 0, - .pool_size = rxbufq->desc_count, + .pool_size = count, .nid = NUMA_NO_NODE, - .dev = rxbufq->vport->netdev->dev.parent, + .dev = napi->dev->dev.parent, .max_len = PAGE_SIZE, .dma_dir = DMA_FROM_DEVICE, .offset = 0, }; - WARN_ON_ONCE(rxbufq->rx_buf_size != IDPF_RX_BUF_4096); - return page_pool_create(&pp); } +/** + * idpf_rx_buf_alloc_singleq - Allocate memory for all buffer resources + * @rxq: queue for which the buffers are allocated + * + * Return: 0 on success, -ENOMEM on failure. + */ +static int idpf_rx_buf_alloc_singleq(struct idpf_rx_queue *rxq) +{ + rxq->rx_buf = kcalloc(rxq->desc_count, sizeof(*rxq->rx_buf), + GFP_KERNEL); + if (!rxq->rx_buf) + return -ENOMEM; + + if (idpf_rx_singleq_buf_hw_alloc_all(rxq, rxq->desc_count - 1)) + goto err; + + return 0; + +err: + idpf_rx_buf_rel_all(rxq); + + return -ENOMEM; +} + +/** + * idpf_rx_bufs_init_singleq - Initialize page pool and allocate Rx bufs + * @rxq: buffer queue to create page pool for + * + * Return: 0 on success, -errno on failure. + */ +static int idpf_rx_bufs_init_singleq(struct idpf_rx_queue *rxq) +{ + struct page_pool *pool; + + pool = idpf_rx_create_page_pool(&rxq->q_vector->napi, rxq->desc_count); + if (IS_ERR(pool)) + return PTR_ERR(pool); + + rxq->pp = pool; + + return idpf_rx_buf_alloc_singleq(rxq); +} + /** * idpf_rx_buf_alloc_all - Allocate memory for all buffer resources - * @rxbufq: queue for which the buffers are allocated; equivalent to - * rxq when operating in singleq mode + * @rxbufq: queue for which the buffers are allocated * * Returns 0 on success, negative on failure */ -static int idpf_rx_buf_alloc_all(struct idpf_queue *rxbufq) +static int idpf_rx_buf_alloc_all(struct idpf_buf_queue *rxbufq) { + struct device *dev = rxbufq->q_vector->vport->netdev->dev.parent; int err = 0; /* Allocate book keeping buffers */ @@ -620,48 +754,41 @@ static int idpf_rx_buf_alloc_all(struct idpf_queue *rxbufq) goto rx_buf_alloc_all_out; } - if (rxbufq->rx_hsplit_en) { + if (idpf_queue_has(HSPLIT_EN, rxbufq)) { err = idpf_rx_hdr_buf_alloc_all(rxbufq); if (err) goto rx_buf_alloc_all_out; } /* Allocate buffers to be given to HW. */ - if (idpf_is_queue_model_split(rxbufq->vport->rxq_model)) { - int working_set = IDPF_RX_BUFQ_WORKING_SET(rxbufq); - - if (!idpf_rx_post_init_bufs(rxbufq, working_set)) - err = -ENOMEM; - } else { - if (idpf_rx_singleq_buf_hw_alloc_all(rxbufq, - rxbufq->desc_count - 1)) - err = -ENOMEM; - } + if (!idpf_rx_post_init_bufs(rxbufq, IDPF_RX_BUFQ_WORKING_SET(rxbufq))) + err = -ENOMEM; rx_buf_alloc_all_out: if (err) - idpf_rx_buf_rel_all(rxbufq); + idpf_rx_buf_rel_bufq(rxbufq, dev); return err; } /** * idpf_rx_bufs_init - Initialize page pool, allocate rx bufs, and post to HW - * @rxbufq: RX queue to create page pool for + * @bufq: buffer queue to create page pool for * * Returns 0 on success, negative on failure */ -static int idpf_rx_bufs_init(struct idpf_queue *rxbufq) +static int idpf_rx_bufs_init(struct idpf_buf_queue *bufq) { struct page_pool *pool; - pool = idpf_rx_create_page_pool(rxbufq); + pool = idpf_rx_create_page_pool(&bufq->q_vector->napi, + bufq->desc_count); if (IS_ERR(pool)) return PTR_ERR(pool); - rxbufq->pp = pool; + bufq->pp = pool; - return idpf_rx_buf_alloc_all(rxbufq); + return idpf_rx_buf_alloc_all(bufq); } /** @@ -673,7 +800,6 @@ static int idpf_rx_bufs_init(struct idpf_queue *rxbufq) int idpf_rx_bufs_init_all(struct idpf_vport *vport) { struct idpf_rxq_group *rx_qgrp; - struct idpf_queue *q; int i, j, err; for (i = 0; i < vport->num_rxq_grp; i++) { @@ -684,8 +810,10 @@ int idpf_rx_bufs_init_all(struct idpf_vport *vport) int num_rxq = rx_qgrp->singleq.num_rxq; for (j = 0; j < num_rxq; j++) { + struct idpf_rx_queue *q; + q = rx_qgrp->singleq.rxqs[j]; - err = idpf_rx_bufs_init(q); + err = idpf_rx_bufs_init_singleq(q); if (err) return err; } @@ -695,6 +823,8 @@ int idpf_rx_bufs_init_all(struct idpf_vport *vport) /* Otherwise, allocate bufs for the buffer queues */ for (j = 0; j < vport->num_bufqs_per_qgrp; j++) { + struct idpf_buf_queue *q; + q = &rx_qgrp->splitq.bufq_sets[j].bufq; err = idpf_rx_bufs_init(q); if (err) @@ -707,22 +837,17 @@ int idpf_rx_bufs_init_all(struct idpf_vport *vport) /** * idpf_rx_desc_alloc - Allocate queue Rx resources + * @vport: vport to allocate resources for * @rxq: Rx queue for which the resources are setup - * @bufq: buffer or completion queue - * @q_model: single or split queue model * * Returns 0 on success, negative on failure */ -static int idpf_rx_desc_alloc(struct idpf_queue *rxq, bool bufq, s32 q_model) +static int idpf_rx_desc_alloc(const struct idpf_vport *vport, + struct idpf_rx_queue *rxq) { - struct device *dev = rxq->dev; + struct device *dev = &vport->adapter->pdev->dev; - if (bufq) - rxq->size = rxq->desc_count * - sizeof(struct virtchnl2_splitq_rx_buf_desc); - else - rxq->size = rxq->desc_count * - sizeof(union virtchnl2_rx_desc); + rxq->size = rxq->desc_count * sizeof(union virtchnl2_rx_desc); /* Allocate descriptors and also round up to nearest 4K */ rxq->size = ALIGN(rxq->size, 4096); @@ -737,7 +862,35 @@ static int idpf_rx_desc_alloc(struct idpf_queue *rxq, bool bufq, s32 q_model) rxq->next_to_alloc = 0; rxq->next_to_clean = 0; rxq->next_to_use = 0; - set_bit(__IDPF_Q_GEN_CHK, rxq->flags); + idpf_queue_set(GEN_CHK, rxq); + + return 0; +} + +/** + * idpf_bufq_desc_alloc - Allocate buffer queue descriptor ring + * @vport: vport to allocate resources for + * @bufq: buffer queue for which the resources are set up + * + * Return: 0 on success, -ENOMEM on failure. + */ +static int idpf_bufq_desc_alloc(const struct idpf_vport *vport, + struct idpf_buf_queue *bufq) +{ + struct device *dev = &vport->adapter->pdev->dev; + + bufq->size = array_size(bufq->desc_count, sizeof(*bufq->split_buf)); + + bufq->split_buf = dma_alloc_coherent(dev, bufq->size, &bufq->dma, + GFP_KERNEL); + if (!bufq->split_buf) + return -ENOMEM; + + bufq->next_to_alloc = 0; + bufq->next_to_clean = 0; + bufq->next_to_use = 0; + + idpf_queue_set(GEN_CHK, bufq); return 0; } @@ -750,9 +903,7 @@ static int idpf_rx_desc_alloc(struct idpf_queue *rxq, bool bufq, s32 q_model) */ static int idpf_rx_desc_alloc_all(struct idpf_vport *vport) { - struct device *dev = &vport->adapter->pdev->dev; struct idpf_rxq_group *rx_qgrp; - struct idpf_queue *q; int i, j, err; u16 num_rxq; @@ -764,13 +915,17 @@ static int idpf_rx_desc_alloc_all(struct idpf_vport *vport) num_rxq = rx_qgrp->singleq.num_rxq; for (j = 0; j < num_rxq; j++) { + struct idpf_rx_queue *q; + if (idpf_is_queue_model_split(vport->rxq_model)) q = &rx_qgrp->splitq.rxq_sets[j]->rxq; else q = rx_qgrp->singleq.rxqs[j]; - err = idpf_rx_desc_alloc(q, false, vport->rxq_model); + + err = idpf_rx_desc_alloc(vport, q); if (err) { - dev_err(dev, "Memory allocation for Rx Queue %u failed\n", + pci_err(vport->adapter->pdev, + "Memory allocation for Rx Queue %u failed\n", i); goto err_out; } @@ -780,10 +935,14 @@ static int idpf_rx_desc_alloc_all(struct idpf_vport *vport) continue; for (j = 0; j < vport->num_bufqs_per_qgrp; j++) { + struct idpf_buf_queue *q; + q = &rx_qgrp->splitq.bufq_sets[j].bufq; - err = idpf_rx_desc_alloc(q, true, vport->rxq_model); + + err = idpf_bufq_desc_alloc(vport, q); if (err) { - dev_err(dev, "Memory allocation for Rx Buffer Queue %u failed\n", + pci_err(vport->adapter->pdev, + "Memory allocation for Rx Buffer Queue %u failed\n", i); goto err_out; } @@ -804,11 +963,16 @@ static int idpf_rx_desc_alloc_all(struct idpf_vport *vport) */ static void idpf_txq_group_rel(struct idpf_vport *vport) { + bool split, flow_sch_en; int i, j; if (!vport->txq_grps) return; + split = idpf_is_queue_model_split(vport->txq_model); + flow_sch_en = !idpf_is_cap_ena(vport->adapter, IDPF_OTHER_CAPS, + VIRTCHNL2_CAP_SPLITQ_QSCHED); + for (i = 0; i < vport->num_txq_grp; i++) { struct idpf_txq_group *txq_grp = &vport->txq_grps[i]; @@ -816,8 +980,15 @@ static void idpf_txq_group_rel(struct idpf_vport *vport) kfree(txq_grp->txqs[j]); txq_grp->txqs[j] = NULL; } + + if (!split) + continue; + kfree(txq_grp->complq); txq_grp->complq = NULL; + + if (flow_sch_en) + kfree(txq_grp->stashes); } kfree(vport->txq_grps); vport->txq_grps = NULL; @@ -921,7 +1092,7 @@ static int idpf_vport_init_fast_path_txqs(struct idpf_vport *vport) { int i, j, k = 0; - vport->txqs = kcalloc(vport->num_txq, sizeof(struct idpf_queue *), + vport->txqs = kcalloc(vport->num_txq, sizeof(*vport->txqs), GFP_KERNEL); if (!vport->txqs) @@ -1139,9 +1310,10 @@ static void idpf_vport_calc_numq_per_grp(struct idpf_vport *vport, * @q: rx queue for which descids are set * */ -static void idpf_rxq_set_descids(struct idpf_vport *vport, struct idpf_queue *q) +static void idpf_rxq_set_descids(const struct idpf_vport *vport, + struct idpf_rx_queue *q) { - if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) { + if (idpf_is_queue_model_split(vport->rxq_model)) { q->rxdids = VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M; } else { if (vport->base_rxd) @@ -1160,20 +1332,22 @@ static void idpf_rxq_set_descids(struct idpf_vport *vport, struct idpf_queue *q) */ static int idpf_txq_group_alloc(struct idpf_vport *vport, u16 num_txq) { - bool flow_sch_en; - int err, i; + bool split, flow_sch_en; + int i; vport->txq_grps = kcalloc(vport->num_txq_grp, sizeof(*vport->txq_grps), GFP_KERNEL); if (!vport->txq_grps) return -ENOMEM; + split = idpf_is_queue_model_split(vport->txq_model); flow_sch_en = !idpf_is_cap_ena(vport->adapter, IDPF_OTHER_CAPS, VIRTCHNL2_CAP_SPLITQ_QSCHED); for (i = 0; i < vport->num_txq_grp; i++) { struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; struct idpf_adapter *adapter = vport->adapter; + struct idpf_txq_stash *stashes; int j; tx_qgrp->vport = vport; @@ -1182,45 +1356,62 @@ static int idpf_txq_group_alloc(struct idpf_vport *vport, u16 num_txq) for (j = 0; j < tx_qgrp->num_txq; j++) { tx_qgrp->txqs[j] = kzalloc(sizeof(*tx_qgrp->txqs[j]), GFP_KERNEL); - if (!tx_qgrp->txqs[j]) { - err = -ENOMEM; + if (!tx_qgrp->txqs[j]) goto err_alloc; - } + } + + if (split && flow_sch_en) { + stashes = kcalloc(num_txq, sizeof(*stashes), + GFP_KERNEL); + if (!stashes) + goto err_alloc; + + tx_qgrp->stashes = stashes; } for (j = 0; j < tx_qgrp->num_txq; j++) { - struct idpf_queue *q = tx_qgrp->txqs[j]; + struct idpf_tx_queue *q = tx_qgrp->txqs[j]; q->dev = &adapter->pdev->dev; q->desc_count = vport->txq_desc_count; q->tx_max_bufs = idpf_get_max_tx_bufs(adapter); q->tx_min_pkt_len = idpf_get_min_tx_pkt_len(adapter); - q->vport = vport; + q->netdev = vport->netdev; q->txq_grp = tx_qgrp; - hash_init(q->sched_buf_hash); - if (flow_sch_en) - set_bit(__IDPF_Q_FLOW_SCH_EN, q->flags); + if (!split) { + q->clean_budget = vport->compln_clean_budget; + idpf_queue_assign(CRC_EN, q, + vport->crc_enable); + } + + if (!flow_sch_en) + continue; + + if (split) { + q->stash = &stashes[j]; + hash_init(q->stash->sched_buf_hash); + } + + idpf_queue_set(FLOW_SCH_EN, q); } - if (!idpf_is_queue_model_split(vport->txq_model)) + if (!split) continue; tx_qgrp->complq = kcalloc(IDPF_COMPLQ_PER_GROUP, sizeof(*tx_qgrp->complq), GFP_KERNEL); - if (!tx_qgrp->complq) { - err = -ENOMEM; + if (!tx_qgrp->complq) goto err_alloc; - } - tx_qgrp->complq->dev = &adapter->pdev->dev; tx_qgrp->complq->desc_count = vport->complq_desc_count; - tx_qgrp->complq->vport = vport; tx_qgrp->complq->txq_grp = tx_qgrp; + tx_qgrp->complq->netdev = vport->netdev; + tx_qgrp->complq->clean_budget = vport->compln_clean_budget; if (flow_sch_en) - __set_bit(__IDPF_Q_FLOW_SCH_EN, tx_qgrp->complq->flags); + idpf_queue_set(FLOW_SCH_EN, tx_qgrp->complq); } return 0; @@ -1228,7 +1419,7 @@ static int idpf_txq_group_alloc(struct idpf_vport *vport, u16 num_txq) err_alloc: idpf_txq_group_rel(vport); - return err; + return -ENOMEM; } /** @@ -1240,8 +1431,6 @@ static int idpf_txq_group_alloc(struct idpf_vport *vport, u16 num_txq) */ static int idpf_rxq_group_alloc(struct idpf_vport *vport, u16 num_rxq) { - struct idpf_adapter *adapter = vport->adapter; - struct idpf_queue *q; int i, k, err = 0; bool hs; @@ -1294,21 +1483,15 @@ static int idpf_rxq_group_alloc(struct idpf_vport *vport, u16 num_rxq) struct idpf_bufq_set *bufq_set = &rx_qgrp->splitq.bufq_sets[j]; int swq_size = sizeof(struct idpf_sw_queue); + struct idpf_buf_queue *q; q = &rx_qgrp->splitq.bufq_sets[j].bufq; - q->dev = &adapter->pdev->dev; q->desc_count = vport->bufq_desc_count[j]; - q->vport = vport; - q->rxq_grp = rx_qgrp; - q->idx = j; q->rx_buf_size = vport->bufq_size[j]; q->rx_buffer_low_watermark = IDPF_LOW_WATERMARK; - q->rx_buf_stride = IDPF_RX_BUF_STRIDE; - if (hs) { - q->rx_hsplit_en = true; - q->rx_hbuf_size = IDPF_HDR_BUF_SIZE; - } + idpf_queue_assign(HSPLIT_EN, q, hs); + q->rx_hbuf_size = hs ? IDPF_HDR_BUF_SIZE : 0; bufq_set->num_refillqs = num_rxq; bufq_set->refillqs = kcalloc(num_rxq, swq_size, @@ -1321,13 +1504,12 @@ static int idpf_rxq_group_alloc(struct idpf_vport *vport, u16 num_rxq) struct idpf_sw_queue *refillq = &bufq_set->refillqs[k]; - refillq->dev = &vport->adapter->pdev->dev; refillq->desc_count = vport->bufq_desc_count[j]; - set_bit(__IDPF_Q_GEN_CHK, refillq->flags); - set_bit(__IDPF_RFLQ_GEN_CHK, refillq->flags); + idpf_queue_set(GEN_CHK, refillq); + idpf_queue_set(RFL_GEN_CHK, refillq); refillq->ring = kcalloc(refillq->desc_count, - sizeof(u16), + sizeof(*refillq->ring), GFP_KERNEL); if (!refillq->ring) { err = -ENOMEM; @@ -1338,27 +1520,27 @@ static int idpf_rxq_group_alloc(struct idpf_vport *vport, u16 num_rxq) skip_splitq_rx_init: for (j = 0; j < num_rxq; j++) { + struct idpf_rx_queue *q; + if (!idpf_is_queue_model_split(vport->rxq_model)) { q = rx_qgrp->singleq.rxqs[j]; goto setup_rxq; } q = &rx_qgrp->splitq.rxq_sets[j]->rxq; - rx_qgrp->splitq.rxq_sets[j]->refillq0 = + rx_qgrp->splitq.rxq_sets[j]->refillq[0] = &rx_qgrp->splitq.bufq_sets[0].refillqs[j]; if (vport->num_bufqs_per_qgrp > IDPF_SINGLE_BUFQ_PER_RXQ_GRP) - rx_qgrp->splitq.rxq_sets[j]->refillq1 = + rx_qgrp->splitq.rxq_sets[j]->refillq[1] = &rx_qgrp->splitq.bufq_sets[1].refillqs[j]; - if (hs) { - q->rx_hsplit_en = true; - q->rx_hbuf_size = IDPF_HDR_BUF_SIZE; - } + idpf_queue_assign(HSPLIT_EN, q, hs); + q->rx_hbuf_size = hs ? IDPF_HDR_BUF_SIZE : 0; setup_rxq: - q->dev = &adapter->pdev->dev; q->desc_count = vport->rxq_desc_count; - q->vport = vport; - q->rxq_grp = rx_qgrp; + q->rx_ptype_lkup = vport->rx_ptype_lkup; + q->netdev = vport->netdev; + q->bufq_sets = rx_qgrp->splitq.bufq_sets; q->idx = (i * num_rxq) + j; /* In splitq mode, RXQ buffer size should be * set to that of the first buffer queue @@ -1447,12 +1629,13 @@ int idpf_vport_queues_alloc(struct idpf_vport *vport) * idpf_tx_handle_sw_marker - Handle queue marker packet * @tx_q: tx queue to handle software marker */ -static void idpf_tx_handle_sw_marker(struct idpf_queue *tx_q) +static void idpf_tx_handle_sw_marker(struct idpf_tx_queue *tx_q) { - struct idpf_vport *vport = tx_q->vport; + struct idpf_netdev_priv *priv = netdev_priv(tx_q->netdev); + struct idpf_vport *vport = priv->vport; int i; - clear_bit(__IDPF_Q_SW_MARKER, tx_q->flags); + idpf_queue_clear(SW_MARKER, tx_q); /* Hardware must write marker packets to all queues associated with * completion queues. So check if all queues received marker packets */ @@ -1460,7 +1643,7 @@ static void idpf_tx_handle_sw_marker(struct idpf_queue *tx_q) /* If we're still waiting on any other TXQ marker completions, * just return now since we cannot wake up the marker_wq yet. */ - if (test_bit(__IDPF_Q_SW_MARKER, vport->txqs[i]->flags)) + if (idpf_queue_has(SW_MARKER, vport->txqs[i])) return; /* Drain complete */ @@ -1476,7 +1659,7 @@ static void idpf_tx_handle_sw_marker(struct idpf_queue *tx_q) * @cleaned: pointer to stats struct to track cleaned packets/bytes * @napi_budget: Used to determine if we are in netpoll */ -static void idpf_tx_splitq_clean_hdr(struct idpf_queue *tx_q, +static void idpf_tx_splitq_clean_hdr(struct idpf_tx_queue *tx_q, struct idpf_tx_buf *tx_buf, struct idpf_cleaned_stats *cleaned, int napi_budget) @@ -1507,7 +1690,8 @@ static void idpf_tx_splitq_clean_hdr(struct idpf_queue *tx_q, * @cleaned: pointer to stats struct to track cleaned packets/bytes * @budget: Used to determine if we are in netpoll */ -static void idpf_tx_clean_stashed_bufs(struct idpf_queue *txq, u16 compl_tag, +static void idpf_tx_clean_stashed_bufs(struct idpf_tx_queue *txq, + u16 compl_tag, struct idpf_cleaned_stats *cleaned, int budget) { @@ -1515,7 +1699,7 @@ static void idpf_tx_clean_stashed_bufs(struct idpf_queue *txq, u16 compl_tag, struct hlist_node *tmp_buf; /* Buffer completion */ - hash_for_each_possible_safe(txq->sched_buf_hash, stash, tmp_buf, + hash_for_each_possible_safe(txq->stash->sched_buf_hash, stash, tmp_buf, hlist, compl_tag) { if (unlikely(stash->buf.compl_tag != (int)compl_tag)) continue; @@ -1532,7 +1716,7 @@ static void idpf_tx_clean_stashed_bufs(struct idpf_queue *txq, u16 compl_tag, } /* Push shadow buf back onto stack */ - idpf_buf_lifo_push(&txq->buf_stack, stash); + idpf_buf_lifo_push(&txq->stash->buf_stack, stash); hash_del(&stash->hlist); } @@ -1544,7 +1728,7 @@ static void idpf_tx_clean_stashed_bufs(struct idpf_queue *txq, u16 compl_tag, * @txq: Tx queue to clean * @tx_buf: buffer to store */ -static int idpf_stash_flow_sch_buffers(struct idpf_queue *txq, +static int idpf_stash_flow_sch_buffers(struct idpf_tx_queue *txq, struct idpf_tx_buf *tx_buf) { struct idpf_tx_stash *stash; @@ -1553,10 +1737,10 @@ static int idpf_stash_flow_sch_buffers(struct idpf_queue *txq, !dma_unmap_len(tx_buf, len))) return 0; - stash = idpf_buf_lifo_pop(&txq->buf_stack); + stash = idpf_buf_lifo_pop(&txq->stash->buf_stack); if (unlikely(!stash)) { net_err_ratelimited("%s: No out-of-order TX buffers left!\n", - txq->vport->netdev->name); + netdev_name(txq->netdev)); return -ENOMEM; } @@ -1570,7 +1754,8 @@ static int idpf_stash_flow_sch_buffers(struct idpf_queue *txq, stash->buf.compl_tag = tx_buf->compl_tag; /* Add buffer to buf_hash table to be freed later */ - hash_add(txq->sched_buf_hash, &stash->hlist, stash->buf.compl_tag); + hash_add(txq->stash->sched_buf_hash, &stash->hlist, + stash->buf.compl_tag); memset(tx_buf, 0, sizeof(struct idpf_tx_buf)); @@ -1609,7 +1794,7 @@ do { \ * and the buffers will be cleaned separately. The stats are not updated from * this function when using flow-based scheduling. */ -static void idpf_tx_splitq_clean(struct idpf_queue *tx_q, u16 end, +static void idpf_tx_splitq_clean(struct idpf_tx_queue *tx_q, u16 end, int napi_budget, struct idpf_cleaned_stats *cleaned, bool descs_only) @@ -1705,7 +1890,7 @@ do { \ * stashed. Returns the byte/segment count for the cleaned packet associated * this completion tag. */ -static bool idpf_tx_clean_buf_ring(struct idpf_queue *txq, u16 compl_tag, +static bool idpf_tx_clean_buf_ring(struct idpf_tx_queue *txq, u16 compl_tag, struct idpf_cleaned_stats *cleaned, int budget) { @@ -1774,14 +1959,14 @@ static bool idpf_tx_clean_buf_ring(struct idpf_queue *txq, u16 compl_tag, * * Returns bytes/packets cleaned */ -static void idpf_tx_handle_rs_completion(struct idpf_queue *txq, +static void idpf_tx_handle_rs_completion(struct idpf_tx_queue *txq, struct idpf_splitq_tx_compl_desc *desc, struct idpf_cleaned_stats *cleaned, int budget) { u16 compl_tag; - if (!test_bit(__IDPF_Q_FLOW_SCH_EN, txq->flags)) { + if (!idpf_queue_has(FLOW_SCH_EN, txq)) { u16 head = le16_to_cpu(desc->q_head_compl_tag.q_head); return idpf_tx_splitq_clean(txq, head, budget, cleaned, false); @@ -1804,24 +1989,23 @@ static void idpf_tx_handle_rs_completion(struct idpf_queue *txq, * * Returns true if there's any budget left (e.g. the clean is finished) */ -static bool idpf_tx_clean_complq(struct idpf_queue *complq, int budget, +static bool idpf_tx_clean_complq(struct idpf_compl_queue *complq, int budget, int *cleaned) { struct idpf_splitq_tx_compl_desc *tx_desc; - struct idpf_vport *vport = complq->vport; s16 ntc = complq->next_to_clean; struct idpf_netdev_priv *np; unsigned int complq_budget; bool complq_ok = true; int i; - complq_budget = vport->compln_clean_budget; + complq_budget = complq->clean_budget; tx_desc = &complq->comp[ntc]; ntc -= complq->desc_count; do { struct idpf_cleaned_stats cleaned_stats = { }; - struct idpf_queue *tx_q; + struct idpf_tx_queue *tx_q; int rel_tx_qid; u16 hw_head; u8 ctype; /* completion type */ @@ -1830,7 +2014,7 @@ static bool idpf_tx_clean_complq(struct idpf_queue *complq, int budget, /* if the descriptor isn't done, no work yet to do */ gen = le16_get_bits(tx_desc->qid_comptype_gen, IDPF_TXD_COMPLQ_GEN_M); - if (test_bit(__IDPF_Q_GEN_CHK, complq->flags) != gen) + if (idpf_queue_has(GEN_CHK, complq) != gen) break; /* Find necessary info of TX queue to clean buffers */ @@ -1838,8 +2022,7 @@ static bool idpf_tx_clean_complq(struct idpf_queue *complq, int budget, IDPF_TXD_COMPLQ_QID_M); if (rel_tx_qid >= complq->txq_grp->num_txq || !complq->txq_grp->txqs[rel_tx_qid]) { - dev_err(&complq->vport->adapter->pdev->dev, - "TxQ not found\n"); + netdev_err(complq->netdev, "TxQ not found\n"); goto fetch_next_desc; } tx_q = complq->txq_grp->txqs[rel_tx_qid]; @@ -1862,15 +2045,14 @@ static bool idpf_tx_clean_complq(struct idpf_queue *complq, int budget, idpf_tx_handle_sw_marker(tx_q); break; default: - dev_err(&tx_q->vport->adapter->pdev->dev, - "Unknown TX completion type: %d\n", - ctype); + netdev_err(tx_q->netdev, + "Unknown TX completion type: %d\n", ctype); goto fetch_next_desc; } u64_stats_update_begin(&tx_q->stats_sync); - u64_stats_add(&tx_q->q_stats.tx.packets, cleaned_stats.packets); - u64_stats_add(&tx_q->q_stats.tx.bytes, cleaned_stats.bytes); + u64_stats_add(&tx_q->q_stats.packets, cleaned_stats.packets); + u64_stats_add(&tx_q->q_stats.bytes, cleaned_stats.bytes); tx_q->cleaned_pkts += cleaned_stats.packets; tx_q->cleaned_bytes += cleaned_stats.bytes; complq->num_completions++; @@ -1882,7 +2064,7 @@ static bool idpf_tx_clean_complq(struct idpf_queue *complq, int budget, if (unlikely(!ntc)) { ntc -= complq->desc_count; tx_desc = &complq->comp[0]; - change_bit(__IDPF_Q_GEN_CHK, complq->flags); + idpf_queue_change(GEN_CHK, complq); } prefetch(tx_desc); @@ -1898,9 +2080,9 @@ static bool idpf_tx_clean_complq(struct idpf_queue *complq, int budget, IDPF_TX_COMPLQ_OVERFLOW_THRESH(complq))) complq_ok = false; - np = netdev_priv(complq->vport->netdev); + np = netdev_priv(complq->netdev); for (i = 0; i < complq->txq_grp->num_txq; ++i) { - struct idpf_queue *tx_q = complq->txq_grp->txqs[i]; + struct idpf_tx_queue *tx_q = complq->txq_grp->txqs[i]; struct netdev_queue *nq; bool dont_wake; @@ -1911,11 +2093,11 @@ static bool idpf_tx_clean_complq(struct idpf_queue *complq, int budget, *cleaned += tx_q->cleaned_pkts; /* Update BQL */ - nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx); + nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx); dont_wake = !complq_ok || IDPF_TX_BUF_RSV_LOW(tx_q) || np->state != __IDPF_VPORT_UP || - !netif_carrier_ok(tx_q->vport->netdev); + !netif_carrier_ok(tx_q->netdev); /* Check if the TXQ needs to and can be restarted */ __netif_txq_completed_wake(nq, tx_q->cleaned_pkts, tx_q->cleaned_bytes, IDPF_DESC_UNUSED(tx_q), IDPF_TX_WAKE_THRESH, @@ -1971,27 +2153,17 @@ void idpf_tx_splitq_build_flow_desc(union idpf_tx_flex_desc *desc, desc->flow.qw1.compl_tag = cpu_to_le16(params->compl_tag); } -/** - * idpf_tx_maybe_stop_common - 1st level check for common Tx stop conditions - * @tx_q: the queue to be checked - * @size: number of descriptors we want to assure is available - * - * Returns 0 if stop is not needed +/* Global conditions to tell whether the txq (and related resources) + * has room to allow the use of "size" descriptors. */ -int idpf_tx_maybe_stop_common(struct idpf_queue *tx_q, unsigned int size) +static int idpf_txq_has_room(struct idpf_tx_queue *tx_q, u32 size) { - struct netdev_queue *nq; - - if (likely(IDPF_DESC_UNUSED(tx_q) >= size)) + if (IDPF_DESC_UNUSED(tx_q) < size || + IDPF_TX_COMPLQ_PENDING(tx_q->txq_grp) > + IDPF_TX_COMPLQ_OVERFLOW_THRESH(tx_q->txq_grp->complq) || + IDPF_TX_BUF_RSV_LOW(tx_q)) return 0; - - u64_stats_update_begin(&tx_q->stats_sync); - u64_stats_inc(&tx_q->q_stats.tx.q_busy); - u64_stats_update_end(&tx_q->stats_sync); - - nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx); - - return netif_txq_maybe_stop(nq, IDPF_DESC_UNUSED(tx_q), size, size); + return 1; } /** @@ -2001,33 +2173,17 @@ int idpf_tx_maybe_stop_common(struct idpf_queue *tx_q, unsigned int size) * * Returns 0 if stop is not needed */ -static int idpf_tx_maybe_stop_splitq(struct idpf_queue *tx_q, +static int idpf_tx_maybe_stop_splitq(struct idpf_tx_queue *tx_q, unsigned int descs_needed) { - if (idpf_tx_maybe_stop_common(tx_q, descs_needed)) - goto splitq_stop; - - /* If there are too many outstanding completions expected on the - * completion queue, stop the TX queue to give the device some time to - * catch up - */ - if (unlikely(IDPF_TX_COMPLQ_PENDING(tx_q->txq_grp) > - IDPF_TX_COMPLQ_OVERFLOW_THRESH(tx_q->txq_grp->complq))) - goto splitq_stop; - - /* Also check for available book keeping buffers; if we are low, stop - * the queue to wait for more completions - */ - if (unlikely(IDPF_TX_BUF_RSV_LOW(tx_q))) - goto splitq_stop; - - return 0; + if (netif_subqueue_maybe_stop(tx_q->netdev, tx_q->idx, + idpf_txq_has_room(tx_q, descs_needed), + 1, 1)) + return 0; -splitq_stop: u64_stats_update_begin(&tx_q->stats_sync); - u64_stats_inc(&tx_q->q_stats.tx.q_busy); + u64_stats_inc(&tx_q->q_stats.q_busy); u64_stats_update_end(&tx_q->stats_sync); - netif_stop_subqueue(tx_q->vport->netdev, tx_q->idx); return -EBUSY; } @@ -2042,16 +2198,14 @@ static int idpf_tx_maybe_stop_splitq(struct idpf_queue *tx_q, * to do a register write to update our queue status. We know this can only * mean tail here as HW should be owning head for TX. */ -void idpf_tx_buf_hw_update(struct idpf_queue *tx_q, u32 val, +void idpf_tx_buf_hw_update(struct idpf_tx_queue *tx_q, u32 val, bool xmit_more) { struct netdev_queue *nq; - nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx); + nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx); tx_q->next_to_use = val; - idpf_tx_maybe_stop_common(tx_q, IDPF_TX_DESC_NEEDED); - /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only * applicable for weak-ordered memory model archs, @@ -2071,7 +2225,7 @@ void idpf_tx_buf_hw_update(struct idpf_queue *tx_q, u32 val, * * Returns number of data descriptors needed for this skb. */ -unsigned int idpf_tx_desc_count_required(struct idpf_queue *txq, +unsigned int idpf_tx_desc_count_required(struct idpf_tx_queue *txq, struct sk_buff *skb) { const struct skb_shared_info *shinfo; @@ -2104,7 +2258,7 @@ unsigned int idpf_tx_desc_count_required(struct idpf_queue *txq, count = idpf_size_to_txd_count(skb->len); u64_stats_update_begin(&txq->stats_sync); - u64_stats_inc(&txq->q_stats.tx.linearize); + u64_stats_inc(&txq->q_stats.linearize); u64_stats_update_end(&txq->stats_sync); } @@ -2118,11 +2272,11 @@ unsigned int idpf_tx_desc_count_required(struct idpf_queue *txq, * @first: original first buffer info buffer for packet * @idx: starting point on ring to unwind */ -void idpf_tx_dma_map_error(struct idpf_queue *txq, struct sk_buff *skb, +void idpf_tx_dma_map_error(struct idpf_tx_queue *txq, struct sk_buff *skb, struct idpf_tx_buf *first, u16 idx) { u64_stats_update_begin(&txq->stats_sync); - u64_stats_inc(&txq->q_stats.tx.dma_map_errs); + u64_stats_inc(&txq->q_stats.dma_map_errs); u64_stats_update_end(&txq->stats_sync); /* clear dma mappings for failed tx_buf map */ @@ -2161,7 +2315,7 @@ void idpf_tx_dma_map_error(struct idpf_queue *txq, struct sk_buff *skb, * @txq: the tx ring to wrap * @ntu: ring index to bump */ -static unsigned int idpf_tx_splitq_bump_ntu(struct idpf_queue *txq, u16 ntu) +static unsigned int idpf_tx_splitq_bump_ntu(struct idpf_tx_queue *txq, u16 ntu) { ntu++; @@ -2183,7 +2337,7 @@ static unsigned int idpf_tx_splitq_bump_ntu(struct idpf_queue *txq, u16 ntu) * and gets a physical address for each memory location and programs * it and the length into the transmit flex descriptor. */ -static void idpf_tx_splitq_map(struct idpf_queue *tx_q, +static void idpf_tx_splitq_map(struct idpf_tx_queue *tx_q, struct idpf_tx_splitq_params *params, struct idpf_tx_buf *first) { @@ -2350,7 +2504,7 @@ static void idpf_tx_splitq_map(struct idpf_queue *tx_q, tx_q->txq_grp->num_completions_pending++; /* record bytecount for BQL */ - nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx); + nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx); netdev_tx_sent_queue(nq, first->bytecount); idpf_tx_buf_hw_update(tx_q, i, netdev_xmit_more()); @@ -2527,8 +2681,8 @@ static bool __idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs) * E.g.: a packet with 7 fragments can require 9 DMA transactions; 1 for TSO * header, 1 for segment payload, and then 7 for the fragments. */ -bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs, - unsigned int count) +static bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs, + unsigned int count) { if (likely(count < max_bufs)) return false; @@ -2546,7 +2700,7 @@ bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs, * ring entry to reflect that this index is a context descriptor */ static struct idpf_flex_tx_ctx_desc * -idpf_tx_splitq_get_ctx_desc(struct idpf_queue *txq) +idpf_tx_splitq_get_ctx_desc(struct idpf_tx_queue *txq) { struct idpf_flex_tx_ctx_desc *desc; int i = txq->next_to_use; @@ -2566,10 +2720,10 @@ idpf_tx_splitq_get_ctx_desc(struct idpf_queue *txq) * @tx_q: queue to send buffer on * @skb: pointer to skb */ -netdev_tx_t idpf_tx_drop_skb(struct idpf_queue *tx_q, struct sk_buff *skb) +netdev_tx_t idpf_tx_drop_skb(struct idpf_tx_queue *tx_q, struct sk_buff *skb) { u64_stats_update_begin(&tx_q->stats_sync); - u64_stats_inc(&tx_q->q_stats.tx.skb_drops); + u64_stats_inc(&tx_q->q_stats.skb_drops); u64_stats_update_end(&tx_q->stats_sync); idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false); @@ -2587,7 +2741,7 @@ netdev_tx_t idpf_tx_drop_skb(struct idpf_queue *tx_q, struct sk_buff *skb) * Returns NETDEV_TX_OK if sent, else an error code */ static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, - struct idpf_queue *tx_q) + struct idpf_tx_queue *tx_q) { struct idpf_tx_splitq_params tx_params = { }; struct idpf_tx_buf *first; @@ -2627,7 +2781,7 @@ static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, ctx_desc->tso.qw0.hdr_len = tx_params.offload.tso_hdr_len; u64_stats_update_begin(&tx_q->stats_sync); - u64_stats_inc(&tx_q->q_stats.tx.lso_pkts); + u64_stats_inc(&tx_q->q_stats.lso_pkts); u64_stats_update_end(&tx_q->stats_sync); } @@ -2644,7 +2798,7 @@ static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN); } - if (test_bit(__IDPF_Q_FLOW_SCH_EN, tx_q->flags)) { + if (idpf_queue_has(FLOW_SCH_EN, tx_q)) { tx_params.dtype = IDPF_TX_DESC_DTYPE_FLEX_FLOW_SCHE; tx_params.eop_cmd = IDPF_TXD_FLEX_FLOW_CMD_EOP; /* Set the RE bit to catch any packets that may have not been @@ -2674,17 +2828,16 @@ static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, } /** - * idpf_tx_splitq_start - Selects the right Tx queue to send buffer + * idpf_tx_start - Selects the right Tx queue to send buffer * @skb: send buffer * @netdev: network interface device structure * * Returns NETDEV_TX_OK if sent, else an error code */ -netdev_tx_t idpf_tx_splitq_start(struct sk_buff *skb, - struct net_device *netdev) +netdev_tx_t idpf_tx_start(struct sk_buff *skb, struct net_device *netdev) { struct idpf_vport *vport = idpf_netdev_to_vport(netdev); - struct idpf_queue *tx_q; + struct idpf_tx_queue *tx_q; if (unlikely(skb_get_queue_mapping(skb) >= vport->num_txq)) { dev_kfree_skb_any(skb); @@ -2703,7 +2856,10 @@ netdev_tx_t idpf_tx_splitq_start(struct sk_buff *skb, return NETDEV_TX_OK; } - return idpf_tx_splitq_frame(skb, tx_q); + if (idpf_is_queue_model_split(vport->txq_model)) + return idpf_tx_splitq_frame(skb, tx_q); + else + return idpf_tx_singleq_frame(skb, tx_q); } /** @@ -2737,13 +2893,14 @@ enum pkt_hash_types idpf_ptype_to_htype(const struct idpf_rx_ptype_decoded *deco * @rx_desc: Receive descriptor * @decoded: Decoded Rx packet type related fields */ -static void idpf_rx_hash(struct idpf_queue *rxq, struct sk_buff *skb, - struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc, - struct idpf_rx_ptype_decoded *decoded) +static void +idpf_rx_hash(const struct idpf_rx_queue *rxq, struct sk_buff *skb, + const struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc, + struct idpf_rx_ptype_decoded *decoded) { u32 hash; - if (unlikely(!idpf_is_feature_ena(rxq->vport, NETIF_F_RXHASH))) + if (unlikely(!(rxq->netdev->features & NETIF_F_RXHASH))) return; hash = le16_to_cpu(rx_desc->hash1) | @@ -2762,14 +2919,14 @@ static void idpf_rx_hash(struct idpf_queue *rxq, struct sk_buff *skb, * * skb->protocol must be set before this function is called */ -static void idpf_rx_csum(struct idpf_queue *rxq, struct sk_buff *skb, +static void idpf_rx_csum(struct idpf_rx_queue *rxq, struct sk_buff *skb, struct idpf_rx_csum_decoded *csum_bits, struct idpf_rx_ptype_decoded *decoded) { bool ipv4, ipv6; /* check if Rx checksum is enabled */ - if (unlikely(!idpf_is_feature_ena(rxq->vport, NETIF_F_RXCSUM))) + if (unlikely(!(rxq->netdev->features & NETIF_F_RXCSUM))) return; /* check if HW has decoded the packet and checksum */ @@ -2816,7 +2973,7 @@ static void idpf_rx_csum(struct idpf_queue *rxq, struct sk_buff *skb, checksum_fail: u64_stats_update_begin(&rxq->stats_sync); - u64_stats_inc(&rxq->q_stats.rx.hw_csum_err); + u64_stats_inc(&rxq->q_stats.hw_csum_err); u64_stats_update_end(&rxq->stats_sync); } @@ -2826,8 +2983,9 @@ static void idpf_rx_csum(struct idpf_queue *rxq, struct sk_buff *skb, * @csum: structure to extract checksum fields * **/ -static void idpf_rx_splitq_extract_csum_bits(struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc, - struct idpf_rx_csum_decoded *csum) +static void +idpf_rx_splitq_extract_csum_bits(const struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc, + struct idpf_rx_csum_decoded *csum) { u8 qword0, qword1; @@ -2862,8 +3020,8 @@ static void idpf_rx_splitq_extract_csum_bits(struct virtchnl2_rx_flex_desc_adv_n * Populate the skb fields with the total number of RSC segments, RSC payload * length and packet type. */ -static int idpf_rx_rsc(struct idpf_queue *rxq, struct sk_buff *skb, - struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc, +static int idpf_rx_rsc(struct idpf_rx_queue *rxq, struct sk_buff *skb, + const struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc, struct idpf_rx_ptype_decoded *decoded) { u16 rsc_segments, rsc_seg_len; @@ -2915,7 +3073,7 @@ static int idpf_rx_rsc(struct idpf_queue *rxq, struct sk_buff *skb, tcp_gro_complete(skb); u64_stats_update_begin(&rxq->stats_sync); - u64_stats_inc(&rxq->q_stats.rx.rsc_pkts); + u64_stats_inc(&rxq->q_stats.rsc_pkts); u64_stats_update_end(&rxq->stats_sync); return 0; @@ -2931,9 +3089,9 @@ static int idpf_rx_rsc(struct idpf_queue *rxq, struct sk_buff *skb, * order to populate the hash, checksum, protocol, and * other fields within the skb. */ -static int idpf_rx_process_skb_fields(struct idpf_queue *rxq, - struct sk_buff *skb, - struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc) +static int +idpf_rx_process_skb_fields(struct idpf_rx_queue *rxq, struct sk_buff *skb, + const struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc) { struct idpf_rx_csum_decoded csum_bits = { }; struct idpf_rx_ptype_decoded decoded; @@ -2941,19 +3099,13 @@ static int idpf_rx_process_skb_fields(struct idpf_queue *rxq, rx_ptype = le16_get_bits(rx_desc->ptype_err_fflags0, VIRTCHNL2_RX_FLEX_DESC_ADV_PTYPE_M); - - skb->protocol = eth_type_trans(skb, rxq->vport->netdev); - - decoded = rxq->vport->rx_ptype_lkup[rx_ptype]; - /* If we don't know the ptype we can't do anything else with it. Just - * pass it up the stack as-is. - */ - if (!decoded.known) - return 0; + decoded = rxq->rx_ptype_lkup[rx_ptype]; /* process RSS/hash */ idpf_rx_hash(rxq, skb, rx_desc, &decoded); + skb->protocol = eth_type_trans(skb, rxq->netdev); + if (le16_get_bits(rx_desc->hdrlen_flags, VIRTCHNL2_RX_FLEX_DESC_ADV_RSC_M)) return idpf_rx_rsc(rxq, skb, rx_desc, &decoded); @@ -2993,7 +3145,7 @@ void idpf_rx_add_frag(struct idpf_rx_buf *rx_buf, struct sk_buff *skb, * data from the current receive descriptor, taking care to set up the * skb correctly. */ -struct sk_buff *idpf_rx_construct_skb(struct idpf_queue *rxq, +struct sk_buff *idpf_rx_construct_skb(const struct idpf_rx_queue *rxq, struct idpf_rx_buf *rx_buf, unsigned int size) { @@ -3006,8 +3158,7 @@ struct sk_buff *idpf_rx_construct_skb(struct idpf_queue *rxq, /* prefetch first cache line of first page */ net_prefetch(va); /* allocate a skb to store the frags */ - skb = __napi_alloc_skb(&rxq->q_vector->napi, IDPF_RX_HDR_SIZE, - GFP_ATOMIC); + skb = napi_alloc_skb(rxq->napi, IDPF_RX_HDR_SIZE); if (unlikely(!skb)) { idpf_rx_put_page(rx_buf); @@ -3054,14 +3205,14 @@ struct sk_buff *idpf_rx_construct_skb(struct idpf_queue *rxq, * the current receive descriptor, taking care to set up the skb correctly. * This specifically uses a header buffer to start building the skb. */ -static struct sk_buff *idpf_rx_hdr_construct_skb(struct idpf_queue *rxq, - const void *va, - unsigned int size) +static struct sk_buff * +idpf_rx_hdr_construct_skb(const struct idpf_rx_queue *rxq, const void *va, + unsigned int size) { struct sk_buff *skb; /* allocate a skb to store the frags */ - skb = __napi_alloc_skb(&rxq->q_vector->napi, size, GFP_ATOMIC); + skb = napi_alloc_skb(rxq->napi, size); if (unlikely(!skb)) return NULL; @@ -3116,10 +3267,10 @@ static bool idpf_rx_splitq_is_eop(struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_de * * Returns amount of work completed */ -static int idpf_rx_splitq_clean(struct idpf_queue *rxq, int budget) +static int idpf_rx_splitq_clean(struct idpf_rx_queue *rxq, int budget) { int total_rx_bytes = 0, total_rx_pkts = 0; - struct idpf_queue *rx_bufq = NULL; + struct idpf_buf_queue *rx_bufq = NULL; struct sk_buff *skb = rxq->skb; u16 ntc = rxq->next_to_clean; @@ -3149,7 +3300,7 @@ static int idpf_rx_splitq_clean(struct idpf_queue *rxq, int budget) gen_id = le16_get_bits(rx_desc->pktlen_gen_bufq_id, VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_M); - if (test_bit(__IDPF_Q_GEN_CHK, rxq->flags) != gen_id) + if (idpf_queue_has(GEN_CHK, rxq) != gen_id) break; rxdid = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_RXDID_M, @@ -3157,7 +3308,7 @@ static int idpf_rx_splitq_clean(struct idpf_queue *rxq, int budget) if (rxdid != VIRTCHNL2_RXDID_2_FLEX_SPLITQ) { IDPF_RX_BUMP_NTC(rxq, ntc); u64_stats_update_begin(&rxq->stats_sync); - u64_stats_inc(&rxq->q_stats.rx.bad_descs); + u64_stats_inc(&rxq->q_stats.bad_descs); u64_stats_update_end(&rxq->stats_sync); continue; } @@ -3175,7 +3326,7 @@ static int idpf_rx_splitq_clean(struct idpf_queue *rxq, int budget) * data/payload buffer. */ u64_stats_update_begin(&rxq->stats_sync); - u64_stats_inc(&rxq->q_stats.rx.hsplit_buf_ovf); + u64_stats_inc(&rxq->q_stats.hsplit_buf_ovf); u64_stats_update_end(&rxq->stats_sync); goto bypass_hsplit; } @@ -3188,13 +3339,10 @@ static int idpf_rx_splitq_clean(struct idpf_queue *rxq, int budget) VIRTCHNL2_RX_FLEX_DESC_ADV_BUFQ_ID_M); rxq_set = container_of(rxq, struct idpf_rxq_set, rxq); - if (!bufq_id) - refillq = rxq_set->refillq0; - else - refillq = rxq_set->refillq1; + refillq = rxq_set->refillq[bufq_id]; /* retrieve buffer from the rxq */ - rx_bufq = &rxq->rxq_grp->splitq.bufq_sets[bufq_id].bufq; + rx_bufq = &rxq->bufq_sets[bufq_id].bufq; buf_id = le16_to_cpu(rx_desc->buf_id); @@ -3206,7 +3354,7 @@ static int idpf_rx_splitq_clean(struct idpf_queue *rxq, int budget) skb = idpf_rx_hdr_construct_skb(rxq, va, hdr_len); u64_stats_update_begin(&rxq->stats_sync); - u64_stats_inc(&rxq->q_stats.rx.hsplit_pkts); + u64_stats_inc(&rxq->q_stats.hsplit_pkts); u64_stats_update_end(&rxq->stats_sync); } @@ -3251,7 +3399,7 @@ static int idpf_rx_splitq_clean(struct idpf_queue *rxq, int budget) } /* send completed skb up the stack */ - napi_gro_receive(&rxq->q_vector->napi, skb); + napi_gro_receive(rxq->napi, skb); skb = NULL; /* update budget accounting */ @@ -3262,8 +3410,8 @@ static int idpf_rx_splitq_clean(struct idpf_queue *rxq, int budget) rxq->skb = skb; u64_stats_update_begin(&rxq->stats_sync); - u64_stats_add(&rxq->q_stats.rx.packets, total_rx_pkts); - u64_stats_add(&rxq->q_stats.rx.bytes, total_rx_bytes); + u64_stats_add(&rxq->q_stats.packets, total_rx_pkts); + u64_stats_add(&rxq->q_stats.bytes, total_rx_bytes); u64_stats_update_end(&rxq->stats_sync); /* guarantee a trip back through this routine if there was a failure */ @@ -3273,19 +3421,16 @@ static int idpf_rx_splitq_clean(struct idpf_queue *rxq, int budget) /** * idpf_rx_update_bufq_desc - Update buffer queue descriptor * @bufq: Pointer to the buffer queue - * @refill_desc: SW Refill queue descriptor containing buffer ID + * @buf_id: buffer ID * @buf_desc: Buffer queue descriptor * * Return 0 on success and negative on failure. */ -static int idpf_rx_update_bufq_desc(struct idpf_queue *bufq, u16 refill_desc, +static int idpf_rx_update_bufq_desc(struct idpf_buf_queue *bufq, u32 buf_id, struct virtchnl2_splitq_rx_buf_desc *buf_desc) { struct idpf_rx_buf *buf; dma_addr_t addr; - u16 buf_id; - - buf_id = FIELD_GET(IDPF_RX_BI_BUFID_M, refill_desc); buf = &bufq->rx_buf.buf[buf_id]; @@ -3296,7 +3441,7 @@ static int idpf_rx_update_bufq_desc(struct idpf_queue *bufq, u16 refill_desc, buf_desc->pkt_addr = cpu_to_le64(addr); buf_desc->qword0.buf_id = cpu_to_le16(buf_id); - if (!bufq->rx_hsplit_en) + if (!idpf_queue_has(HSPLIT_EN, bufq)) return 0; buf_desc->hdr_addr = cpu_to_le64(bufq->rx_buf.hdr_buf_pa + @@ -3312,33 +3457,32 @@ static int idpf_rx_update_bufq_desc(struct idpf_queue *bufq, u16 refill_desc, * * This function takes care of the buffer refill management */ -static void idpf_rx_clean_refillq(struct idpf_queue *bufq, +static void idpf_rx_clean_refillq(struct idpf_buf_queue *bufq, struct idpf_sw_queue *refillq) { struct virtchnl2_splitq_rx_buf_desc *buf_desc; u16 bufq_nta = bufq->next_to_alloc; u16 ntc = refillq->next_to_clean; int cleaned = 0; - u16 gen; buf_desc = &bufq->split_buf[bufq_nta]; /* make sure we stop at ring wrap in the unlikely case ring is full */ while (likely(cleaned < refillq->desc_count)) { - u16 refill_desc = refillq->ring[ntc]; + u32 buf_id, refill_desc = refillq->ring[ntc]; bool failure; - gen = FIELD_GET(IDPF_RX_BI_GEN_M, refill_desc); - if (test_bit(__IDPF_RFLQ_GEN_CHK, refillq->flags) != gen) + if (idpf_queue_has(RFL_GEN_CHK, refillq) != + !!(refill_desc & IDPF_RX_BI_GEN_M)) break; - failure = idpf_rx_update_bufq_desc(bufq, refill_desc, - buf_desc); + buf_id = FIELD_GET(IDPF_RX_BI_BUFID_M, refill_desc); + failure = idpf_rx_update_bufq_desc(bufq, buf_id, buf_desc); if (failure) break; if (unlikely(++ntc == refillq->desc_count)) { - change_bit(__IDPF_RFLQ_GEN_CHK, refillq->flags); + idpf_queue_change(RFL_GEN_CHK, refillq); ntc = 0; } @@ -3377,7 +3521,7 @@ static void idpf_rx_clean_refillq(struct idpf_queue *bufq, * this vector. Returns true if clean is complete within budget, false * otherwise. */ -static void idpf_rx_clean_refillq_all(struct idpf_queue *bufq) +static void idpf_rx_clean_refillq_all(struct idpf_buf_queue *bufq) { struct idpf_bufq_set *bufq_set; int i; @@ -3437,15 +3581,21 @@ static void idpf_vport_intr_napi_dis_all(struct idpf_vport *vport) */ void idpf_vport_intr_rel(struct idpf_vport *vport) { - for (u32 v_idx = 0; v_idx < vport->num_q_vectors; v_idx++) { + u32 v_idx; + + for (v_idx = 0; v_idx < vport->num_q_vectors; v_idx++) { struct idpf_q_vector *q_vector = &vport->q_vectors[v_idx]; + kfree(q_vector->complq); + q_vector->complq = NULL; kfree(q_vector->bufq); q_vector->bufq = NULL; kfree(q_vector->tx); q_vector->tx = NULL; kfree(q_vector->rx); q_vector->rx = NULL; + + free_cpumask_var(q_vector->affinity_mask); } kfree(vport->q_vectors); @@ -3474,7 +3624,7 @@ static void idpf_vport_intr_rel_irq(struct idpf_vport *vport) /* clear the affinity_mask in the IRQ descriptor */ irq_set_affinity_hint(irq_num, NULL); - free_irq(irq_num, q_vector); + kfree(free_irq(irq_num, q_vector)); } } @@ -3568,13 +3718,13 @@ static void idpf_net_dim(struct idpf_q_vector *q_vector) goto check_rx_itr; for (i = 0, packets = 0, bytes = 0; i < q_vector->num_txq; i++) { - struct idpf_queue *txq = q_vector->tx[i]; + struct idpf_tx_queue *txq = q_vector->tx[i]; unsigned int start; do { start = u64_stats_fetch_begin(&txq->stats_sync); - packets += u64_stats_read(&txq->q_stats.tx.packets); - bytes += u64_stats_read(&txq->q_stats.tx.bytes); + packets += u64_stats_read(&txq->q_stats.packets); + bytes += u64_stats_read(&txq->q_stats.bytes); } while (u64_stats_fetch_retry(&txq->stats_sync, start)); } @@ -3587,13 +3737,13 @@ static void idpf_net_dim(struct idpf_q_vector *q_vector) return; for (i = 0, packets = 0, bytes = 0; i < q_vector->num_rxq; i++) { - struct idpf_queue *rxq = q_vector->rx[i]; + struct idpf_rx_queue *rxq = q_vector->rx[i]; unsigned int start; do { start = u64_stats_fetch_begin(&rxq->stats_sync); - packets += u64_stats_read(&rxq->q_stats.rx.packets); - bytes += u64_stats_read(&rxq->q_stats.rx.bytes); + packets += u64_stats_read(&rxq->q_stats.packets); + bytes += u64_stats_read(&rxq->q_stats.bytes); } while (u64_stats_fetch_retry(&rxq->stats_sync, start)); } @@ -3625,16 +3775,19 @@ void idpf_vport_intr_update_itr_ena_irq(struct idpf_q_vector *q_vector) /** * idpf_vport_intr_req_irq - get MSI-X vectors from the OS for the vport * @vport: main vport structure - * @basename: name for the vector */ -static int idpf_vport_intr_req_irq(struct idpf_vport *vport, char *basename) +static int idpf_vport_intr_req_irq(struct idpf_vport *vport) { struct idpf_adapter *adapter = vport->adapter; + const char *drv_name, *if_name, *vec_name; int vector, err, irq_num, vidx; - const char *vec_name; + + drv_name = dev_driver_string(&adapter->pdev->dev); + if_name = netdev_name(vport->netdev); for (vector = 0; vector < vport->num_q_vectors; vector++) { struct idpf_q_vector *q_vector = &vport->q_vectors[vector]; + char *name; vidx = vport->q_vector_idxs[vector]; irq_num = adapter->msix_entries[vidx].vector; @@ -3648,18 +3801,18 @@ static int idpf_vport_intr_req_irq(struct idpf_vport *vport, char *basename) else continue; - q_vector->name = kasprintf(GFP_KERNEL, "%s-%s-%d", - basename, vec_name, vidx); + name = kasprintf(GFP_KERNEL, "%s-%s-%s-%d", drv_name, if_name, + vec_name, vidx); err = request_irq(irq_num, idpf_vport_intr_clean_queues, 0, - q_vector->name, q_vector); + name, q_vector); if (err) { netdev_err(vport->netdev, "Request_irq failed, error: %d\n", err); goto free_q_irqs; } /* assign the mask for this irq */ - irq_set_affinity_hint(irq_num, &q_vector->affinity_mask); + irq_set_affinity_hint(irq_num, q_vector->affinity_mask); } return 0; @@ -3668,7 +3821,7 @@ static int idpf_vport_intr_req_irq(struct idpf_vport *vport, char *basename) while (--vector >= 0) { vidx = vport->q_vector_idxs[vector]; irq_num = adapter->msix_entries[vidx].vector; - free_irq(irq_num, &vport->q_vectors[vector]); + kfree(free_irq(irq_num, &vport->q_vectors[vector])); } return err; @@ -3835,16 +3988,17 @@ static void idpf_vport_intr_napi_ena_all(struct idpf_vport *vport) static bool idpf_tx_splitq_clean_all(struct idpf_q_vector *q_vec, int budget, int *cleaned) { - u16 num_txq = q_vec->num_txq; + u16 num_complq = q_vec->num_complq; bool clean_complete = true; int i, budget_per_q; - if (unlikely(!num_txq)) + if (unlikely(!num_complq)) return true; - budget_per_q = DIV_ROUND_UP(budget, num_txq); - for (i = 0; i < num_txq; i++) - clean_complete &= idpf_tx_clean_complq(q_vec->tx[i], + budget_per_q = DIV_ROUND_UP(budget, num_complq); + + for (i = 0; i < num_complq; i++) + clean_complete &= idpf_tx_clean_complq(q_vec->complq[i], budget_per_q, cleaned); return clean_complete; @@ -3871,7 +4025,7 @@ static bool idpf_rx_splitq_clean_all(struct idpf_q_vector *q_vec, int budget, */ budget_per_q = num_rxq ? max(budget / num_rxq, 1) : 0; for (i = 0; i < num_rxq; i++) { - struct idpf_queue *rxq = q_vec->rx[i]; + struct idpf_rx_queue *rxq = q_vec->rx[i]; int pkts_cleaned_per_q; pkts_cleaned_per_q = idpf_rx_splitq_clean(rxq, budget_per_q); @@ -3916,6 +4070,14 @@ static int idpf_vport_splitq_napi_poll(struct napi_struct *napi, int budget) return budget; } + /* Switch to poll mode in the tear-down path after sending disable + * queues virtchnl message, as the interrupts will be disabled after + * that. + */ + if (unlikely(q_vector->num_txq && idpf_queue_has(POLL_MODE, + q_vector->tx[0]))) + return budget; + work_done = min_t(int, work_done, budget - 1); /* Exit the polling mode, but don't re-enable interrupts if stack might @@ -3926,15 +4088,7 @@ static int idpf_vport_splitq_napi_poll(struct napi_struct *napi, int budget) else idpf_vport_intr_set_wb_on_itr(q_vector); - /* Switch to poll mode in the tear-down path after sending disable - * queues virtchnl message, as the interrupts will be disabled after - * that - */ - if (unlikely(q_vector->num_txq && test_bit(__IDPF_Q_POLL_MODE, - q_vector->tx[0]->flags))) - return budget; - else - return work_done; + return work_done; } /** @@ -3945,27 +4099,29 @@ static int idpf_vport_splitq_napi_poll(struct napi_struct *napi, int budget) */ static void idpf_vport_intr_map_vector_to_qs(struct idpf_vport *vport) { + bool split = idpf_is_queue_model_split(vport->rxq_model); u16 num_txq_grp = vport->num_txq_grp; - int i, j, qv_idx, bufq_vidx = 0; struct idpf_rxq_group *rx_qgrp; struct idpf_txq_group *tx_qgrp; - struct idpf_queue *q, *bufq; - u16 q_index; + u32 i, qv_idx, q_index; for (i = 0, qv_idx = 0; i < vport->num_rxq_grp; i++) { u16 num_rxq; + u32 j; + + if (qv_idx >= vport->num_q_vectors) + qv_idx = 0; rx_qgrp = &vport->rxq_grps[i]; - if (idpf_is_queue_model_split(vport->rxq_model)) + if (split) num_rxq = rx_qgrp->splitq.num_rxq_sets; else num_rxq = rx_qgrp->singleq.num_rxq; for (j = 0; j < num_rxq; j++) { - if (qv_idx >= vport->num_q_vectors) - qv_idx = 0; + struct idpf_rx_queue *q; - if (idpf_is_queue_model_split(vport->rxq_model)) + if (split) q = &rx_qgrp->splitq.rxq_sets[j]->rxq; else q = rx_qgrp->singleq.rxqs[j]; @@ -3973,52 +4129,54 @@ static void idpf_vport_intr_map_vector_to_qs(struct idpf_vport *vport) q_index = q->q_vector->num_rxq; q->q_vector->rx[q_index] = q; q->q_vector->num_rxq++; - qv_idx++; + + if (split) + q->napi = &q->q_vector->napi; } - if (idpf_is_queue_model_split(vport->rxq_model)) { + if (split) { for (j = 0; j < vport->num_bufqs_per_qgrp; j++) { + struct idpf_buf_queue *bufq; + bufq = &rx_qgrp->splitq.bufq_sets[j].bufq; - bufq->q_vector = &vport->q_vectors[bufq_vidx]; + bufq->q_vector = &vport->q_vectors[qv_idx]; q_index = bufq->q_vector->num_bufq; bufq->q_vector->bufq[q_index] = bufq; bufq->q_vector->num_bufq++; } - if (++bufq_vidx >= vport->num_q_vectors) - bufq_vidx = 0; } + + qv_idx++; } + split = idpf_is_queue_model_split(vport->txq_model); + for (i = 0, qv_idx = 0; i < num_txq_grp; i++) { u16 num_txq; + u32 j; + + if (qv_idx >= vport->num_q_vectors) + qv_idx = 0; tx_qgrp = &vport->txq_grps[i]; num_txq = tx_qgrp->num_txq; - if (idpf_is_queue_model_split(vport->txq_model)) { - if (qv_idx >= vport->num_q_vectors) - qv_idx = 0; + for (j = 0; j < num_txq; j++) { + struct idpf_tx_queue *q; - q = tx_qgrp->complq; + q = tx_qgrp->txqs[j]; q->q_vector = &vport->q_vectors[qv_idx]; - q_index = q->q_vector->num_txq; - q->q_vector->tx[q_index] = q; - q->q_vector->num_txq++; - qv_idx++; - } else { - for (j = 0; j < num_txq; j++) { - if (qv_idx >= vport->num_q_vectors) - qv_idx = 0; + q->q_vector->tx[q->q_vector->num_txq++] = q; + } - q = tx_qgrp->txqs[j]; - q->q_vector = &vport->q_vectors[qv_idx]; - q_index = q->q_vector->num_txq; - q->q_vector->tx[q_index] = q; - q->q_vector->num_txq++; + if (split) { + struct idpf_compl_queue *q = tx_qgrp->complq; - qv_idx++; - } + q->q_vector = &vport->q_vectors[qv_idx]; + q->q_vector->complq[q->q_vector->num_complq++] = q; } + + qv_idx++; } } @@ -4079,7 +4237,7 @@ static void idpf_vport_intr_napi_add_all(struct idpf_vport *vport) /* only set affinity_mask if the CPU is online */ if (cpu_online(v_idx)) - cpumask_set_cpu(v_idx, &q_vector->affinity_mask); + cpumask_set_cpu(v_idx, q_vector->affinity_mask); } } @@ -4094,18 +4252,22 @@ int idpf_vport_intr_alloc(struct idpf_vport *vport) { u16 txqs_per_vector, rxqs_per_vector, bufqs_per_vector; struct idpf_q_vector *q_vector; - int v_idx, err; + u32 complqs_per_vector, v_idx; vport->q_vectors = kcalloc(vport->num_q_vectors, sizeof(struct idpf_q_vector), GFP_KERNEL); if (!vport->q_vectors) return -ENOMEM; - txqs_per_vector = DIV_ROUND_UP(vport->num_txq, vport->num_q_vectors); - rxqs_per_vector = DIV_ROUND_UP(vport->num_rxq, vport->num_q_vectors); + txqs_per_vector = DIV_ROUND_UP(vport->num_txq_grp, + vport->num_q_vectors); + rxqs_per_vector = DIV_ROUND_UP(vport->num_rxq_grp, + vport->num_q_vectors); bufqs_per_vector = vport->num_bufqs_per_qgrp * DIV_ROUND_UP(vport->num_rxq_grp, vport->num_q_vectors); + complqs_per_vector = DIV_ROUND_UP(vport->num_txq_grp, + vport->num_q_vectors); for (v_idx = 0; v_idx < vport->num_q_vectors; v_idx++) { q_vector = &vport->q_vectors[v_idx]; @@ -4119,32 +4281,33 @@ int idpf_vport_intr_alloc(struct idpf_vport *vport) q_vector->rx_intr_mode = IDPF_ITR_DYNAMIC; q_vector->rx_itr_idx = VIRTCHNL2_ITR_IDX_0; - q_vector->tx = kcalloc(txqs_per_vector, - sizeof(struct idpf_queue *), + if (!zalloc_cpumask_var(&q_vector->affinity_mask, GFP_KERNEL)) + goto error; + + q_vector->tx = kcalloc(txqs_per_vector, sizeof(*q_vector->tx), GFP_KERNEL); - if (!q_vector->tx) { - err = -ENOMEM; + if (!q_vector->tx) goto error; - } - q_vector->rx = kcalloc(rxqs_per_vector, - sizeof(struct idpf_queue *), + q_vector->rx = kcalloc(rxqs_per_vector, sizeof(*q_vector->rx), GFP_KERNEL); - if (!q_vector->rx) { - err = -ENOMEM; + if (!q_vector->rx) goto error; - } if (!idpf_is_queue_model_split(vport->rxq_model)) continue; q_vector->bufq = kcalloc(bufqs_per_vector, - sizeof(struct idpf_queue *), + sizeof(*q_vector->bufq), GFP_KERNEL); - if (!q_vector->bufq) { - err = -ENOMEM; + if (!q_vector->bufq) + goto error; + + q_vector->complq = kcalloc(complqs_per_vector, + sizeof(*q_vector->complq), + GFP_KERNEL); + if (!q_vector->complq) goto error; - } } return 0; @@ -4152,7 +4315,7 @@ int idpf_vport_intr_alloc(struct idpf_vport *vport) error: idpf_vport_intr_rel(vport); - return err; + return -ENOMEM; } /** @@ -4163,7 +4326,6 @@ int idpf_vport_intr_alloc(struct idpf_vport *vport) */ int idpf_vport_intr_init(struct idpf_vport *vport) { - char *int_name; int err; err = idpf_vport_intr_init_vec_idx(vport); @@ -4177,11 +4339,7 @@ int idpf_vport_intr_init(struct idpf_vport *vport) if (err) goto unroll_vectors_alloc; - int_name = kasprintf(GFP_KERNEL, "%s-%s", - dev_driver_string(&vport->adapter->pdev->dev), - vport->netdev->name); - - err = idpf_vport_intr_req_irq(vport, int_name); + err = idpf_vport_intr_req_irq(vport); if (err) goto unroll_vectors_alloc; diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h index 1669bf01ba1db..f119f240d21cd 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h @@ -4,6 +4,8 @@ #ifndef _IDPF_TXRX_H_ #define _IDPF_TXRX_H_ +#include + #include #include #include @@ -84,7 +86,7 @@ do { \ if (unlikely(++(ntc) == (rxq)->desc_count)) { \ ntc = 0; \ - change_bit(__IDPF_Q_GEN_CHK, (rxq)->flags); \ + idpf_queue_change(GEN_CHK, rxq); \ } \ } while (0) @@ -111,10 +113,9 @@ do { \ */ #define IDPF_TX_SPLITQ_RE_MIN_GAP 64 -#define IDPF_RX_BI_BUFID_S 0 -#define IDPF_RX_BI_BUFID_M GENMASK(14, 0) -#define IDPF_RX_BI_GEN_S 15 -#define IDPF_RX_BI_GEN_M BIT(IDPF_RX_BI_GEN_S) +#define IDPF_RX_BI_GEN_M BIT(16) +#define IDPF_RX_BI_BUFID_M GENMASK(15, 0) + #define IDPF_RXD_EOF_SPLITQ VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_EOF_M #define IDPF_RXD_EOF_SINGLEQ VIRTCHNL2_RX_BASE_DESC_STATUS_EOF_M @@ -122,7 +123,7 @@ do { \ ((((txq)->next_to_clean > (txq)->next_to_use) ? 0 : (txq)->desc_count) + \ (txq)->next_to_clean - (txq)->next_to_use - 1) -#define IDPF_TX_BUF_RSV_UNUSED(txq) ((txq)->buf_stack.top) +#define IDPF_TX_BUF_RSV_UNUSED(txq) ((txq)->stash->buf_stack.top) #define IDPF_TX_BUF_RSV_LOW(txq) (IDPF_TX_BUF_RSV_UNUSED(txq) < \ (txq)->desc_count >> 2) @@ -434,23 +435,37 @@ struct idpf_rx_ptype_decoded { * to 1 and knows that reading a gen bit of 1 in any * descriptor on the initial pass of the ring indicates a * writeback. It also flips on every ring wrap. - * @__IDPF_RFLQ_GEN_CHK: Refill queues are SW only, so Q_GEN acts as the HW bit - * and RFLGQ_GEN is the SW bit. + * @__IDPF_Q_RFL_GEN_CHK: Refill queues are SW only, so Q_GEN acts as the HW + * bit and Q_RFL_GEN is the SW bit. * @__IDPF_Q_FLOW_SCH_EN: Enable flow scheduling * @__IDPF_Q_SW_MARKER: Used to indicate TX queue marker completions * @__IDPF_Q_POLL_MODE: Enable poll mode + * @__IDPF_Q_CRC_EN: enable CRC offload in singleq mode + * @__IDPF_Q_HSPLIT_EN: enable header split on Rx (splitq) * @__IDPF_Q_FLAGS_NBITS: Must be last */ enum idpf_queue_flags_t { __IDPF_Q_GEN_CHK, - __IDPF_RFLQ_GEN_CHK, + __IDPF_Q_RFL_GEN_CHK, __IDPF_Q_FLOW_SCH_EN, __IDPF_Q_SW_MARKER, __IDPF_Q_POLL_MODE, + __IDPF_Q_CRC_EN, + __IDPF_Q_HSPLIT_EN, __IDPF_Q_FLAGS_NBITS, }; +#define idpf_queue_set(f, q) __set_bit(__IDPF_Q_##f, (q)->flags) +#define idpf_queue_clear(f, q) __clear_bit(__IDPF_Q_##f, (q)->flags) +#define idpf_queue_change(f, q) __change_bit(__IDPF_Q_##f, (q)->flags) +#define idpf_queue_has(f, q) test_bit(__IDPF_Q_##f, (q)->flags) + +#define idpf_queue_has_clear(f, q) \ + __test_and_clear_bit(__IDPF_Q_##f, (q)->flags) +#define idpf_queue_assign(f, q, v) \ + __assign_bit(__IDPF_Q_##f, (q)->flags, v) + /** * struct idpf_vec_regs * @dyn_ctl_reg: Dynamic control interrupt register offset @@ -499,12 +514,13 @@ struct idpf_intr_reg { /** * struct idpf_q_vector * @vport: Vport back pointer - * @affinity_mask: CPU affinity mask * @napi: napi handler * @v_idx: Vector index * @intr_reg: See struct idpf_intr_reg * @num_txq: Number of TX queues + * @num_complq: number of completion queues * @tx: Array of TX queues to service + * @complq: array of completion queues * @tx_dim: Data for TX net_dim algorithm * @tx_itr_value: TX interrupt throttling rate * @tx_intr_mode: Dynamic ITR or not @@ -519,35 +535,38 @@ struct idpf_intr_reg { * @bufq: Array of buffer queues to service * @total_events: Number of interrupts processed * @wb_on_itr: whether WB on ITR is enabled - * @name: Queue vector name + * @affinity_mask: CPU affinity mask */ struct idpf_q_vector { struct idpf_vport *vport; - cpumask_t affinity_mask; struct napi_struct napi; u16 v_idx; struct idpf_intr_reg intr_reg; u16 num_txq; - struct idpf_queue **tx; + u16 num_complq; + struct idpf_tx_queue **tx; + struct idpf_compl_queue **complq; + struct dim tx_dim; u16 tx_itr_value; bool tx_intr_mode; u32 tx_itr_idx; u16 num_rxq; - struct idpf_queue **rx; + struct idpf_rx_queue **rx; struct dim rx_dim; u16 rx_itr_value; bool rx_intr_mode; u32 rx_itr_idx; u16 num_bufq; - struct idpf_queue **bufq; + struct idpf_buf_queue **bufq; u16 total_events; bool wb_on_itr; - char *name; + + cpumask_var_t affinity_mask; }; struct idpf_rx_queue_stats { @@ -575,11 +594,6 @@ struct idpf_cleaned_stats { u32 bytes; }; -union idpf_queue_stats { - struct idpf_rx_queue_stats rx; - struct idpf_tx_queue_stats tx; -}; - #define IDPF_ITR_DYNAMIC 1 #define IDPF_ITR_MAX 0x1FE0 #define IDPF_ITR_20K 0x0032 @@ -597,39 +611,114 @@ union idpf_queue_stats { #define IDPF_DIM_DEFAULT_PROFILE_IX 1 /** - * struct idpf_queue - * @dev: Device back pointer for DMA mapping - * @vport: Back pointer to associated vport - * @txq_grp: See struct idpf_txq_group - * @rxq_grp: See struct idpf_rxq_group - * @idx: For buffer queue, it is used as group id, either 0 or 1. On clean, - * buffer queue uses this index to determine which group of refill queues - * to clean. - * For TX queue, it is used as index to map between TX queue group and - * hot path TX pointers stored in vport. Used in both singleq/splitq. - * For RX queue, it is used to index to total RX queue across groups and + * struct idpf_txq_stash - Tx buffer stash for Flow-based scheduling mode + * @buf_stack: Stack of empty buffers to store buffer info for out of order + * buffer completions. See struct idpf_buf_lifo + * @sched_buf_hash: Hash table to store buffers + */ +struct idpf_txq_stash { + struct idpf_buf_lifo buf_stack; + DECLARE_HASHTABLE(sched_buf_hash, 12); +} ____cacheline_aligned; + +/** + * struct idpf_rx_queue - software structure representing a receive queue + * @rx: universal receive descriptor array + * @single_buf: buffer descriptor array in singleq + * @desc_ring: virtual descriptor ring address + * @bufq_sets: Pointer to the array of buffer queues in splitq mode + * @napi: NAPI instance corresponding to this queue (splitq) + * @rx_buf: See struct idpf_rx_buf + * @pp: Page pool pointer in singleq mode + * @netdev: &net_device corresponding to this queue + * @tail: Tail offset. Used for both queue models single and split. + * @flags: See enum idpf_queue_flags_t + * @idx: For RX queue, it is used to index to total RX queue across groups and * used for skb reporting. - * @tail: Tail offset. Used for both queue models single and split. In splitq - * model relevant only for TX queue and RX queue. - * @tx_buf: See struct idpf_tx_buf - * @rx_buf: Struct with RX buffer related members - * @rx_buf.buf: See struct idpf_rx_buf - * @rx_buf.hdr_buf_pa: DMA handle - * @rx_buf.hdr_buf_va: Virtual address - * @pp: Page pool pointer + * @desc_count: Number of descriptors + * @next_to_use: Next descriptor to use + * @next_to_clean: Next descriptor to clean + * @next_to_alloc: RX buffer to allocate at + * @rxdids: Supported RX descriptor ids + * @rx_ptype_lkup: LUT of Rx ptypes * @skb: Pointer to the skb - * @q_type: Queue type (TX, RX, TX completion, RX buffer) + * @stats_sync: See struct u64_stats_sync + * @q_stats: See union idpf_rx_queue_stats * @q_id: Queue id - * @desc_count: Number of descriptors - * @next_to_use: Next descriptor to use. Relevant in both split & single txq - * and bufq. - * @next_to_clean: Next descriptor to clean. In split queue model, only - * relevant to TX completion queue and RX queue. - * @next_to_alloc: RX buffer to allocate at. Used only for RX. In splitq model - * only relevant to RX queue. + * @size: Length of descriptor ring in bytes + * @dma: Physical address of ring + * @q_vector: Backreference to associated vector + * @rx_buffer_low_watermark: RX buffer low watermark + * @rx_hbuf_size: Header buffer size + * @rx_buf_size: Buffer size + * @rx_max_pkt_size: RX max packet size + */ +struct idpf_rx_queue { + union { + union virtchnl2_rx_desc *rx; + struct virtchnl2_singleq_rx_buf_desc *single_buf; + + void *desc_ring; + }; + union { + struct { + struct idpf_bufq_set *bufq_sets; + struct napi_struct *napi; + }; + struct { + struct idpf_rx_buf *rx_buf; + struct page_pool *pp; + }; + }; + struct net_device *netdev; + void __iomem *tail; + + DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS); + u16 idx; + u16 desc_count; + u16 next_to_use; + u16 next_to_clean; + u16 next_to_alloc; + + u32 rxdids; + + const struct idpf_rx_ptype_decoded *rx_ptype_lkup; + struct sk_buff *skb; + + struct u64_stats_sync stats_sync; + struct idpf_rx_queue_stats q_stats; + + /* Slowpath */ + u32 q_id; + u32 size; + dma_addr_t dma; + + struct idpf_q_vector *q_vector; + + u16 rx_buffer_low_watermark; + u16 rx_hbuf_size; + u16 rx_buf_size; + u16 rx_max_pkt_size; +} ____cacheline_aligned; + +/** + * struct idpf_tx_queue - software structure representing a transmit queue + * @base_tx: base Tx descriptor array + * @base_ctx: base Tx context descriptor array + * @flex_tx: flex Tx descriptor array + * @flex_ctx: flex Tx context descriptor array + * @desc_ring: virtual descriptor ring address + * @tx_buf: See struct idpf_tx_buf + * @txq_grp: See struct idpf_txq_group + * @dev: Device back pointer for DMA mapping + * @tail: Tail offset. Used for both queue models single and split * @flags: See enum idpf_queue_flags_t - * @q_stats: See union idpf_queue_stats - * @stats_sync: See struct u64_stats_sync + * @idx: For TX queue, it is used as index to map between TX queue group and + * hot path TX pointers stored in vport. Used in both singleq/splitq. + * @desc_count: Number of descriptors + * @next_to_use: Next descriptor to use + * @next_to_clean: Next descriptor to clean + * @netdev: &net_device corresponding to this queue * @cleaned_bytes: Splitq only, TXQ only: When a TX completion is received on * the TX completion queue, it can be for any TXQ associated * with that completion queue. This means we can clean up to @@ -638,34 +727,10 @@ union idpf_queue_stats { * that single call to clean the completion queue. By doing so, * we can update BQL with aggregate cleaned stats for each TXQ * only once at the end of the cleaning routine. + * @clean_budget: singleq only, queue cleaning budget * @cleaned_pkts: Number of packets cleaned for the above said case - * @rx_hsplit_en: RX headsplit enable - * @rx_hbuf_size: Header buffer size - * @rx_buf_size: Buffer size - * @rx_max_pkt_size: RX max packet size - * @rx_buf_stride: RX buffer stride - * @rx_buffer_low_watermark: RX buffer low watermark - * @rxdids: Supported RX descriptor ids - * @q_vector: Backreference to associated vector - * @size: Length of descriptor ring in bytes - * @dma: Physical address of ring - * @rx: universal receive descriptor array - * @single_buf: Rx buffer descriptor array in singleq - * @split_buf: Rx buffer descriptor array in splitq - * @base_tx: basic Tx descriptor array - * @base_ctx: basic Tx context descriptor array - * @flex_tx: flex Tx descriptor array - * @flex_ctx: flex Tx context descriptor array - * @comp: completion descriptor array - * @desc_ring: virtual descriptor ring address * @tx_max_bufs: Max buffers that can be transmitted with scatter-gather * @tx_min_pkt_len: Min supported packet length - * @num_completions: Only relevant for TX completion queue. It tracks the - * number of completions received to compare against the - * number of completions pending, as accumulated by the - * TX queues. - * @buf_stack: Stack of empty buffers to store buffer info for out of order - * buffer completions. See struct idpf_buf_lifo. * @compl_tag_bufid_m: Completion tag buffer id mask * @compl_tag_gen_s: Completion tag generation bit * The format of the completion tag will change based on the TXQ @@ -689,120 +754,188 @@ union idpf_queue_stats { * This gives us 8*8160 = 65280 possible unique values. * @compl_tag_cur_gen: Used to keep track of current completion tag generation * @compl_tag_gen_max: To determine when compl_tag_cur_gen should be reset - * @sched_buf_hash: Hash table to stores buffers + * @stash: Tx buffer stash for Flow-based scheduling mode + * @stats_sync: See struct u64_stats_sync + * @q_stats: See union idpf_tx_queue_stats + * @q_id: Queue id + * @size: Length of descriptor ring in bytes + * @dma: Physical address of ring + * @q_vector: Backreference to associated vector */ -struct idpf_queue { - struct device *dev; - struct idpf_vport *vport; +struct idpf_tx_queue { union { - struct idpf_txq_group *txq_grp; - struct idpf_rxq_group *rxq_grp; + struct idpf_base_tx_desc *base_tx; + struct idpf_base_tx_ctx_desc *base_ctx; + union idpf_tx_flex_desc *flex_tx; + struct idpf_flex_tx_ctx_desc *flex_ctx; + + void *desc_ring; }; - u16 idx; + struct idpf_tx_buf *tx_buf; + struct idpf_txq_group *txq_grp; + struct device *dev; void __iomem *tail; - union { - struct idpf_tx_buf *tx_buf; - struct { - struct idpf_rx_buf *buf; - dma_addr_t hdr_buf_pa; - void *hdr_buf_va; - } rx_buf; - }; - struct page_pool *pp; - struct sk_buff *skb; - u16 q_type; - u32 q_id; - u16 desc_count; + DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS); + u16 idx; + u16 desc_count; u16 next_to_use; u16 next_to_clean; - u16 next_to_alloc; - DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS); - union idpf_queue_stats q_stats; - struct u64_stats_sync stats_sync; + struct net_device *netdev; - u32 cleaned_bytes; + union { + u32 cleaned_bytes; + u32 clean_budget; + }; u16 cleaned_pkts; - bool rx_hsplit_en; - u16 rx_hbuf_size; - u16 rx_buf_size; - u16 rx_max_pkt_size; - u16 rx_buf_stride; - u8 rx_buffer_low_watermark; - u64 rxdids; - struct idpf_q_vector *q_vector; - unsigned int size; + u16 tx_max_bufs; + u16 tx_min_pkt_len; + + u16 compl_tag_bufid_m; + u16 compl_tag_gen_s; + + u16 compl_tag_cur_gen; + u16 compl_tag_gen_max; + + struct idpf_txq_stash *stash; + + struct u64_stats_sync stats_sync; + struct idpf_tx_queue_stats q_stats; + + /* Slowpath */ + u32 q_id; + u32 size; dma_addr_t dma; - union { - union virtchnl2_rx_desc *rx; - struct virtchnl2_singleq_rx_buf_desc *single_buf; - struct virtchnl2_splitq_rx_buf_desc *split_buf; + struct idpf_q_vector *q_vector; +} ____cacheline_aligned; - struct idpf_base_tx_desc *base_tx; - struct idpf_base_tx_ctx_desc *base_ctx; - union idpf_tx_flex_desc *flex_tx; - struct idpf_flex_tx_ctx_desc *flex_ctx; +/** + * struct idpf_buf_queue - software structure representing a buffer queue + * @split_buf: buffer descriptor array + * @rx_buf: Struct with RX buffer related members + * @rx_buf.buf: See struct idpf_rx_buf + * @rx_buf.hdr_buf_pa: DMA handle + * @rx_buf.hdr_buf_va: Virtual address + * @pp: Page pool pointer + * @tail: Tail offset + * @flags: See enum idpf_queue_flags_t + * @desc_count: Number of descriptors + * @next_to_use: Next descriptor to use + * @next_to_clean: Next descriptor to clean + * @next_to_alloc: RX buffer to allocate at + * @q_id: Queue id + * @size: Length of descriptor ring in bytes + * @dma: Physical address of ring + * @q_vector: Backreference to associated vector + * @rx_buffer_low_watermark: RX buffer low watermark + * @rx_hbuf_size: Header buffer size + * @rx_buf_size: Buffer size + */ +struct idpf_buf_queue { + struct virtchnl2_splitq_rx_buf_desc *split_buf; + struct { + struct idpf_rx_buf *buf; + dma_addr_t hdr_buf_pa; + void *hdr_buf_va; + } rx_buf; + struct page_pool *pp; + void __iomem *tail; - struct idpf_splitq_tx_compl_desc *comp; + DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS); + u16 desc_count; + u16 next_to_use; + u16 next_to_clean; + u16 next_to_alloc; - void *desc_ring; - }; + /* Slowpath */ + u32 q_id; + u32 size; + dma_addr_t dma; - u16 tx_max_bufs; - u8 tx_min_pkt_len; + struct idpf_q_vector *q_vector; - u32 num_completions; + u16 rx_buffer_low_watermark; + u16 rx_hbuf_size; + u16 rx_buf_size; +} ____cacheline_aligned; - struct idpf_buf_lifo buf_stack; +/** + * struct idpf_compl_queue - software structure representing a completion queue + * @comp: completion descriptor array + * @txq_grp: See struct idpf_txq_group + * @flags: See enum idpf_queue_flags_t + * @desc_count: Number of descriptors + * @next_to_use: Next descriptor to use. Relevant in both split & single txq + * and bufq. + * @next_to_clean: Next descriptor to clean + * @netdev: &net_device corresponding to this queue + * @clean_budget: queue cleaning budget + * @num_completions: Only relevant for TX completion queue. It tracks the + * number of completions received to compare against the + * number of completions pending, as accumulated by the + * TX queues. + * @q_id: Queue id + * @size: Length of descriptor ring in bytes + * @dma: Physical address of ring + * @q_vector: Backreference to associated vector + */ +struct idpf_compl_queue { + struct idpf_splitq_tx_compl_desc *comp; + struct idpf_txq_group *txq_grp; - u16 compl_tag_bufid_m; - u16 compl_tag_gen_s; + DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS); + u16 desc_count; + u16 next_to_use; + u16 next_to_clean; - u16 compl_tag_cur_gen; - u16 compl_tag_gen_max; + struct net_device *netdev; + u32 clean_budget; + u32 num_completions; - DECLARE_HASHTABLE(sched_buf_hash, 12); -} ____cacheline_internodealigned_in_smp; + /* Slowpath */ + u32 q_id; + u32 size; + dma_addr_t dma; + + struct idpf_q_vector *q_vector; +} ____cacheline_aligned; /** * struct idpf_sw_queue - * @next_to_clean: Next descriptor to clean - * @next_to_alloc: Buffer to allocate at - * @flags: See enum idpf_queue_flags_t * @ring: Pointer to the ring + * @flags: See enum idpf_queue_flags_t * @desc_count: Descriptor count - * @dev: Device back pointer for DMA mapping + * @next_to_use: Buffer to allocate at + * @next_to_clean: Next descriptor to clean * * Software queues are used in splitq mode to manage buffers between rxq * producer and the bufq consumer. These are required in order to maintain a * lockless buffer management system and are strictly software only constructs. */ struct idpf_sw_queue { - u16 next_to_clean; - u16 next_to_alloc; + u32 *ring; + DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS); - u16 *ring; u16 desc_count; - struct device *dev; -} ____cacheline_internodealigned_in_smp; + u16 next_to_use; + u16 next_to_clean; +} ____cacheline_aligned; /** * struct idpf_rxq_set * @rxq: RX queue - * @refillq0: Pointer to refill queue 0 - * @refillq1: Pointer to refill queue 1 + * @refillq: pointers to refill queues * * Splitq only. idpf_rxq_set associates an rxq with at an array of refillqs. * Each rxq needs a refillq to return used buffers back to the respective bufq. * Bufqs then clean these refillqs for buffers to give to hardware. */ struct idpf_rxq_set { - struct idpf_queue rxq; - struct idpf_sw_queue *refillq0; - struct idpf_sw_queue *refillq1; + struct idpf_rx_queue rxq; + struct idpf_sw_queue *refillq[IDPF_MAX_BUFQS_PER_RXQ_GRP]; }; /** @@ -821,7 +954,7 @@ struct idpf_rxq_set { * managed by at most two bufqs (depending on performance configuration). */ struct idpf_bufq_set { - struct idpf_queue bufq; + struct idpf_buf_queue bufq; int num_refillqs; struct idpf_sw_queue *refillqs; }; @@ -847,7 +980,7 @@ struct idpf_rxq_group { union { struct { u16 num_rxq; - struct idpf_queue *rxqs[IDPF_LARGE_MAX_Q]; + struct idpf_rx_queue *rxqs[IDPF_LARGE_MAX_Q]; } singleq; struct { u16 num_rxq_sets; @@ -862,6 +995,7 @@ struct idpf_rxq_group { * @vport: Vport back pointer * @num_txq: Number of TX queues associated * @txqs: Array of TX queue pointers + * @stashes: array of OOO stashes for the queues * @complq: Associated completion queue pointer, split queue only * @num_completions_pending: Total number of completions pending for the * completion queue, acculumated for all TX queues @@ -875,9 +1009,10 @@ struct idpf_txq_group { struct idpf_vport *vport; u16 num_txq; - struct idpf_queue *txqs[IDPF_LARGE_MAX_Q]; + struct idpf_tx_queue *txqs[IDPF_LARGE_MAX_Q]; + struct idpf_txq_stash *stashes; - struct idpf_queue *complq; + struct idpf_compl_queue *complq; u32 num_completions_pending; }; @@ -1031,28 +1166,22 @@ void idpf_deinit_rss(struct idpf_vport *vport); int idpf_rx_bufs_init_all(struct idpf_vport *vport); void idpf_rx_add_frag(struct idpf_rx_buf *rx_buf, struct sk_buff *skb, unsigned int size); -struct sk_buff *idpf_rx_construct_skb(struct idpf_queue *rxq, +struct sk_buff *idpf_rx_construct_skb(const struct idpf_rx_queue *rxq, struct idpf_rx_buf *rx_buf, unsigned int size); -bool idpf_init_rx_buf_hw_alloc(struct idpf_queue *rxq, struct idpf_rx_buf *buf); -void idpf_rx_buf_hw_update(struct idpf_queue *rxq, u32 val); -void idpf_tx_buf_hw_update(struct idpf_queue *tx_q, u32 val, +void idpf_tx_buf_hw_update(struct idpf_tx_queue *tx_q, u32 val, bool xmit_more); unsigned int idpf_size_to_txd_count(unsigned int size); -netdev_tx_t idpf_tx_drop_skb(struct idpf_queue *tx_q, struct sk_buff *skb); -void idpf_tx_dma_map_error(struct idpf_queue *txq, struct sk_buff *skb, +netdev_tx_t idpf_tx_drop_skb(struct idpf_tx_queue *tx_q, struct sk_buff *skb); +void idpf_tx_dma_map_error(struct idpf_tx_queue *txq, struct sk_buff *skb, struct idpf_tx_buf *first, u16 ring_idx); -unsigned int idpf_tx_desc_count_required(struct idpf_queue *txq, +unsigned int idpf_tx_desc_count_required(struct idpf_tx_queue *txq, struct sk_buff *skb); -bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs, - unsigned int count); -int idpf_tx_maybe_stop_common(struct idpf_queue *tx_q, unsigned int size); void idpf_tx_timeout(struct net_device *netdev, unsigned int txqueue); -netdev_tx_t idpf_tx_splitq_start(struct sk_buff *skb, - struct net_device *netdev); -netdev_tx_t idpf_tx_singleq_start(struct sk_buff *skb, - struct net_device *netdev); -bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rxq, +netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, + struct idpf_tx_queue *tx_q); +netdev_tx_t idpf_tx_start(struct sk_buff *skb, struct net_device *netdev); +bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rxq, u16 cleaned_count); int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off); diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c index b97e46e494c43..723e385a85d84 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c @@ -765,7 +765,7 @@ static int idpf_wait_for_marker_event(struct idpf_vport *vport) int i; for (i = 0; i < vport->num_txq; i++) - set_bit(__IDPF_Q_SW_MARKER, vport->txqs[i]->flags); + idpf_queue_set(SW_MARKER, vport->txqs[i]); event = wait_event_timeout(vport->sw_marker_wq, test_and_clear_bit(IDPF_VPORT_SW_MARKER, @@ -773,7 +773,7 @@ static int idpf_wait_for_marker_event(struct idpf_vport *vport) msecs_to_jiffies(500)); for (i = 0; i < vport->num_txq; i++) - clear_bit(__IDPF_Q_POLL_MODE, vport->txqs[i]->flags); + idpf_queue_clear(POLL_MODE, vport->txqs[i]); if (event) return 0; @@ -1107,7 +1107,6 @@ static int __idpf_queue_reg_init(struct idpf_vport *vport, u32 *reg_vals, int num_regs, u32 q_type) { struct idpf_adapter *adapter = vport->adapter; - struct idpf_queue *q; int i, j, k = 0; switch (q_type) { @@ -1126,6 +1125,8 @@ static int __idpf_queue_reg_init(struct idpf_vport *vport, u32 *reg_vals, u16 num_rxq = rx_qgrp->singleq.num_rxq; for (j = 0; j < num_rxq && k < num_regs; j++, k++) { + struct idpf_rx_queue *q; + q = rx_qgrp->singleq.rxqs[j]; q->tail = idpf_get_reg_addr(adapter, reg_vals[k]); @@ -1138,6 +1139,8 @@ static int __idpf_queue_reg_init(struct idpf_vport *vport, u32 *reg_vals, u8 num_bufqs = vport->num_bufqs_per_qgrp; for (j = 0; j < num_bufqs && k < num_regs; j++, k++) { + struct idpf_buf_queue *q; + q = &rx_qgrp->splitq.bufq_sets[j].bufq; q->tail = idpf_get_reg_addr(adapter, reg_vals[k]); @@ -1268,12 +1271,12 @@ int idpf_send_create_vport_msg(struct idpf_adapter *adapter, vport_msg->vport_type = cpu_to_le16(VIRTCHNL2_VPORT_TYPE_DEFAULT); vport_msg->vport_index = cpu_to_le16(idx); - if (adapter->req_tx_splitq) + if (adapter->req_tx_splitq || !IS_ENABLED(CONFIG_IDPF_SINGLEQ)) vport_msg->txq_model = cpu_to_le16(VIRTCHNL2_QUEUE_MODEL_SPLIT); else vport_msg->txq_model = cpu_to_le16(VIRTCHNL2_QUEUE_MODEL_SINGLE); - if (adapter->req_rx_splitq) + if (adapter->req_rx_splitq || !IS_ENABLED(CONFIG_IDPF_SINGLEQ)) vport_msg->rxq_model = cpu_to_le16(VIRTCHNL2_QUEUE_MODEL_SPLIT); else vport_msg->rxq_model = cpu_to_le16(VIRTCHNL2_QUEUE_MODEL_SINGLE); @@ -1331,10 +1334,17 @@ int idpf_check_supported_desc_ids(struct idpf_vport *vport) vport_msg = adapter->vport_params_recvd[vport->idx]; + if (!IS_ENABLED(CONFIG_IDPF_SINGLEQ) && + (vport_msg->rxq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE || + vport_msg->txq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE)) { + pci_err(adapter->pdev, "singleq mode requested, but not compiled-in\n"); + return -EOPNOTSUPP; + } + rx_desc_ids = le64_to_cpu(vport_msg->rx_desc_ids); tx_desc_ids = le64_to_cpu(vport_msg->tx_desc_ids); - if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) { + if (idpf_is_queue_model_split(vport->rxq_model)) { if (!(rx_desc_ids & VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M)) { dev_info(&adapter->pdev->dev, "Minimum RX descriptor support not provided, using the default\n"); vport_msg->rx_desc_ids = cpu_to_le64(VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M); @@ -1344,7 +1354,7 @@ int idpf_check_supported_desc_ids(struct idpf_vport *vport) vport->base_rxd = true; } - if (vport->txq_model != VIRTCHNL2_QUEUE_MODEL_SPLIT) + if (!idpf_is_queue_model_split(vport->txq_model)) return 0; if ((tx_desc_ids & MIN_SUPPORT_TXDID) != MIN_SUPPORT_TXDID) { @@ -1460,19 +1470,19 @@ static int idpf_send_config_tx_queues_msg(struct idpf_vport *vport) qi[k].model = cpu_to_le16(vport->txq_model); qi[k].type = - cpu_to_le32(tx_qgrp->txqs[j]->q_type); + cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_TX); qi[k].ring_len = cpu_to_le16(tx_qgrp->txqs[j]->desc_count); qi[k].dma_ring_addr = cpu_to_le64(tx_qgrp->txqs[j]->dma); if (idpf_is_queue_model_split(vport->txq_model)) { - struct idpf_queue *q = tx_qgrp->txqs[j]; + struct idpf_tx_queue *q = tx_qgrp->txqs[j]; qi[k].tx_compl_queue_id = cpu_to_le16(tx_qgrp->complq->q_id); qi[k].relative_queue_id = cpu_to_le16(j); - if (test_bit(__IDPF_Q_FLOW_SCH_EN, q->flags)) + if (idpf_queue_has(FLOW_SCH_EN, q)) qi[k].sched_mode = cpu_to_le16(VIRTCHNL2_TXQ_SCHED_MODE_FLOW); else @@ -1489,11 +1499,11 @@ static int idpf_send_config_tx_queues_msg(struct idpf_vport *vport) qi[k].queue_id = cpu_to_le32(tx_qgrp->complq->q_id); qi[k].model = cpu_to_le16(vport->txq_model); - qi[k].type = cpu_to_le32(tx_qgrp->complq->q_type); + qi[k].type = cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION); qi[k].ring_len = cpu_to_le16(tx_qgrp->complq->desc_count); qi[k].dma_ring_addr = cpu_to_le64(tx_qgrp->complq->dma); - if (test_bit(__IDPF_Q_FLOW_SCH_EN, tx_qgrp->complq->flags)) + if (idpf_queue_has(FLOW_SCH_EN, tx_qgrp->complq)) sched_mode = VIRTCHNL2_TXQ_SCHED_MODE_FLOW; else sched_mode = VIRTCHNL2_TXQ_SCHED_MODE_QUEUE; @@ -1578,17 +1588,18 @@ static int idpf_send_config_rx_queues_msg(struct idpf_vport *vport) goto setup_rxqs; for (j = 0; j < vport->num_bufqs_per_qgrp; j++, k++) { - struct idpf_queue *bufq = + struct idpf_buf_queue *bufq = &rx_qgrp->splitq.bufq_sets[j].bufq; qi[k].queue_id = cpu_to_le32(bufq->q_id); qi[k].model = cpu_to_le16(vport->rxq_model); - qi[k].type = cpu_to_le32(bufq->q_type); + qi[k].type = + cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX_BUFFER); qi[k].desc_ids = cpu_to_le64(VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M); qi[k].ring_len = cpu_to_le16(bufq->desc_count); qi[k].dma_ring_addr = cpu_to_le64(bufq->dma); qi[k].data_buffer_size = cpu_to_le32(bufq->rx_buf_size); - qi[k].buffer_notif_stride = bufq->rx_buf_stride; + qi[k].buffer_notif_stride = IDPF_RX_BUF_STRIDE; qi[k].rx_buffer_low_watermark = cpu_to_le16(bufq->rx_buffer_low_watermark); if (idpf_is_feature_ena(vport, NETIF_F_GRO_HW)) @@ -1602,7 +1613,7 @@ static int idpf_send_config_rx_queues_msg(struct idpf_vport *vport) num_rxq = rx_qgrp->singleq.num_rxq; for (j = 0; j < num_rxq; j++, k++) { - struct idpf_queue *rxq; + struct idpf_rx_queue *rxq; if (!idpf_is_queue_model_split(vport->rxq_model)) { rxq = rx_qgrp->singleq.rxqs[j]; @@ -1610,11 +1621,11 @@ static int idpf_send_config_rx_queues_msg(struct idpf_vport *vport) } rxq = &rx_qgrp->splitq.rxq_sets[j]->rxq; qi[k].rx_bufq1_id = - cpu_to_le16(rxq->rxq_grp->splitq.bufq_sets[0].bufq.q_id); + cpu_to_le16(rxq->bufq_sets[0].bufq.q_id); if (vport->num_bufqs_per_qgrp > IDPF_SINGLE_BUFQ_PER_RXQ_GRP) { qi[k].bufq2_ena = IDPF_BUFQ2_ENA; qi[k].rx_bufq2_id = - cpu_to_le16(rxq->rxq_grp->splitq.bufq_sets[1].bufq.q_id); + cpu_to_le16(rxq->bufq_sets[1].bufq.q_id); } qi[k].rx_buffer_low_watermark = cpu_to_le16(rxq->rx_buffer_low_watermark); @@ -1622,7 +1633,7 @@ static int idpf_send_config_rx_queues_msg(struct idpf_vport *vport) qi[k].qflags |= cpu_to_le16(VIRTCHNL2_RXQ_RSC); common_qi_fields: - if (rxq->rx_hsplit_en) { + if (idpf_queue_has(HSPLIT_EN, rxq)) { qi[k].qflags |= cpu_to_le16(VIRTCHNL2_RXQ_HDR_SPLIT); qi[k].hdr_buffer_size = @@ -1630,7 +1641,7 @@ static int idpf_send_config_rx_queues_msg(struct idpf_vport *vport) } qi[k].queue_id = cpu_to_le32(rxq->q_id); qi[k].model = cpu_to_le16(vport->rxq_model); - qi[k].type = cpu_to_le32(rxq->q_type); + qi[k].type = cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX); qi[k].ring_len = cpu_to_le16(rxq->desc_count); qi[k].dma_ring_addr = cpu_to_le64(rxq->dma); qi[k].max_pkt_size = cpu_to_le32(rxq->rx_max_pkt_size); @@ -1717,7 +1728,7 @@ static int idpf_send_ena_dis_queues_msg(struct idpf_vport *vport, bool ena) struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; for (j = 0; j < tx_qgrp->num_txq; j++, k++) { - qc[k].type = cpu_to_le32(tx_qgrp->txqs[j]->q_type); + qc[k].type = cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_TX); qc[k].start_queue_id = cpu_to_le32(tx_qgrp->txqs[j]->q_id); qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK); } @@ -1731,7 +1742,7 @@ static int idpf_send_ena_dis_queues_msg(struct idpf_vport *vport, bool ena) for (i = 0; i < vport->num_txq_grp; i++, k++) { struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; - qc[k].type = cpu_to_le32(tx_qgrp->complq->q_type); + qc[k].type = cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION); qc[k].start_queue_id = cpu_to_le32(tx_qgrp->complq->q_id); qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK); } @@ -1752,12 +1763,12 @@ static int idpf_send_ena_dis_queues_msg(struct idpf_vport *vport, bool ena) qc[k].start_queue_id = cpu_to_le32(rx_qgrp->splitq.rxq_sets[j]->rxq.q_id); qc[k].type = - cpu_to_le32(rx_qgrp->splitq.rxq_sets[j]->rxq.q_type); + cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX); } else { qc[k].start_queue_id = cpu_to_le32(rx_qgrp->singleq.rxqs[j]->q_id); qc[k].type = - cpu_to_le32(rx_qgrp->singleq.rxqs[j]->q_type); + cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX); } qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK); } @@ -1772,10 +1783,11 @@ static int idpf_send_ena_dis_queues_msg(struct idpf_vport *vport, bool ena) struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; for (j = 0; j < vport->num_bufqs_per_qgrp; j++, k++) { - struct idpf_queue *q; + const struct idpf_buf_queue *q; q = &rx_qgrp->splitq.bufq_sets[j].bufq; - qc[k].type = cpu_to_le32(q->q_type); + qc[k].type = + cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX_BUFFER); qc[k].start_queue_id = cpu_to_le32(q->q_id); qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK); } @@ -1860,7 +1872,8 @@ int idpf_send_map_unmap_queue_vector_msg(struct idpf_vport *vport, bool map) struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; for (j = 0; j < tx_qgrp->num_txq; j++, k++) { - vqv[k].queue_type = cpu_to_le32(tx_qgrp->txqs[j]->q_type); + vqv[k].queue_type = + cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_TX); vqv[k].queue_id = cpu_to_le32(tx_qgrp->txqs[j]->q_id); if (idpf_is_queue_model_split(vport->txq_model)) { @@ -1890,14 +1903,15 @@ int idpf_send_map_unmap_queue_vector_msg(struct idpf_vport *vport, bool map) num_rxq = rx_qgrp->singleq.num_rxq; for (j = 0; j < num_rxq; j++, k++) { - struct idpf_queue *rxq; + struct idpf_rx_queue *rxq; if (idpf_is_queue_model_split(vport->rxq_model)) rxq = &rx_qgrp->splitq.rxq_sets[j]->rxq; else rxq = rx_qgrp->singleq.rxqs[j]; - vqv[k].queue_type = cpu_to_le32(rxq->q_type); + vqv[k].queue_type = + cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX); vqv[k].queue_id = cpu_to_le32(rxq->q_id); vqv[k].vector_id = cpu_to_le16(rxq->q_vector->v_idx); vqv[k].itr_idx = cpu_to_le32(rxq->q_vector->rx_itr_idx); @@ -1986,7 +2000,7 @@ int idpf_send_disable_queues_msg(struct idpf_vport *vport) * queues virtchnl message is sent */ for (i = 0; i < vport->num_txq; i++) - set_bit(__IDPF_Q_POLL_MODE, vport->txqs[i]->flags); + idpf_queue_set(POLL_MODE, vport->txqs[i]); /* schedule the napi to receive all the marker packets */ local_bh_disable(); @@ -3258,7 +3272,6 @@ static int __idpf_vport_queue_ids_init(struct idpf_vport *vport, int num_qids, u32 q_type) { - struct idpf_queue *q; int i, j, k = 0; switch (q_type) { @@ -3266,11 +3279,8 @@ static int __idpf_vport_queue_ids_init(struct idpf_vport *vport, for (i = 0; i < vport->num_txq_grp; i++) { struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; - for (j = 0; j < tx_qgrp->num_txq && k < num_qids; j++, k++) { + for (j = 0; j < tx_qgrp->num_txq && k < num_qids; j++, k++) tx_qgrp->txqs[j]->q_id = qids[k]; - tx_qgrp->txqs[j]->q_type = - VIRTCHNL2_QUEUE_TYPE_TX; - } } break; case VIRTCHNL2_QUEUE_TYPE_RX: @@ -3284,12 +3294,13 @@ static int __idpf_vport_queue_ids_init(struct idpf_vport *vport, num_rxq = rx_qgrp->singleq.num_rxq; for (j = 0; j < num_rxq && k < num_qids; j++, k++) { + struct idpf_rx_queue *q; + if (idpf_is_queue_model_split(vport->rxq_model)) q = &rx_qgrp->splitq.rxq_sets[j]->rxq; else q = rx_qgrp->singleq.rxqs[j]; q->q_id = qids[k]; - q->q_type = VIRTCHNL2_QUEUE_TYPE_RX; } } break; @@ -3298,8 +3309,6 @@ static int __idpf_vport_queue_ids_init(struct idpf_vport *vport, struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; tx_qgrp->complq->q_id = qids[k]; - tx_qgrp->complq->q_type = - VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION; } break; case VIRTCHNL2_QUEUE_TYPE_RX_BUFFER: @@ -3308,9 +3317,10 @@ static int __idpf_vport_queue_ids_init(struct idpf_vport *vport, u8 num_bufqs = vport->num_bufqs_per_qgrp; for (j = 0; j < num_bufqs && k < num_qids; j++, k++) { + struct idpf_buf_queue *q; + q = &rx_qgrp->splitq.bufq_sets[j].bufq; q->q_id = qids[k]; - q->q_type = VIRTCHNL2_QUEUE_TYPE_RX_BUFFER; } } break; diff --git a/drivers/pci/hotplug/s390_pci_hpc.c b/drivers/pci/hotplug/s390_pci_hpc.c index a89b7de72dcfa..970a57c64d15e 100644 --- a/drivers/pci/hotplug/s390_pci_hpc.c +++ b/drivers/pci/hotplug/s390_pci_hpc.c @@ -26,58 +26,78 @@ static int enable_slot(struct hotplug_slot *hotplug_slot) hotplug_slot); int rc; - if (zdev->state != ZPCI_FN_STATE_STANDBY) - return -EIO; + mutex_lock(&zdev->state_lock); + if (zdev->state != ZPCI_FN_STATE_STANDBY) { + rc = -EIO; + goto out; + } rc = sclp_pci_configure(zdev->fid); zpci_dbg(3, "conf fid:%x, rc:%d\n", zdev->fid, rc); if (rc) - return rc; + goto out; zdev->state = ZPCI_FN_STATE_CONFIGURED; - return zpci_scan_configured_device(zdev, zdev->fh); + rc = zpci_scan_configured_device(zdev, zdev->fh); +out: + mutex_unlock(&zdev->state_lock); + return rc; } static int disable_slot(struct hotplug_slot *hotplug_slot) { struct zpci_dev *zdev = container_of(hotplug_slot, struct zpci_dev, hotplug_slot); - struct pci_dev *pdev; + struct pci_dev *pdev = NULL; + int rc; - if (zdev->state != ZPCI_FN_STATE_CONFIGURED) - return -EIO; + mutex_lock(&zdev->state_lock); + if (zdev->state != ZPCI_FN_STATE_CONFIGURED) { + rc = -EIO; + goto out; + } pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); if (pdev && pci_num_vf(pdev)) { - pci_dev_put(pdev); - return -EBUSY; + rc = -EBUSY; + goto out; } - pci_dev_put(pdev); - return zpci_deconfigure_device(zdev); + rc = zpci_deconfigure_device(zdev); +out: + if (pdev) + pci_dev_put(pdev); + mutex_unlock(&zdev->state_lock); + return rc; } static int reset_slot(struct hotplug_slot *hotplug_slot, bool probe) { struct zpci_dev *zdev = container_of(hotplug_slot, struct zpci_dev, hotplug_slot); + int rc = -EIO; - if (zdev->state != ZPCI_FN_STATE_CONFIGURED) - return -EIO; /* - * We can't take the zdev->lock as reset_slot may be called during - * probing and/or device removal which already happens under the - * zdev->lock. Instead the user should use the higher level - * pci_reset_function() or pci_bus_reset() which hold the PCI device - * lock preventing concurrent removal. If not using these functions - * holding the PCI device lock is required. + * If we can't get the zdev->state_lock the device state is + * currently undergoing a transition and we bail out - just + * the same as if the device's state is not configured at all. */ + if (!mutex_trylock(&zdev->state_lock)) + return rc; - /* As long as the function is configured we can reset */ - if (probe) - return 0; + /* We can reset only if the function is configured */ + if (zdev->state != ZPCI_FN_STATE_CONFIGURED) + goto out; + + if (probe) { + rc = 0; + goto out; + } - return zpci_hot_reset_device(zdev); + rc = zpci_hot_reset_device(zdev); +out: + mutex_unlock(&zdev->state_lock); + return rc; } static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)