Compare commits

...

2696 Commits

Author SHA1 Message Date
Alice Mikityanska
ebe560ea5f l2tp: Drop large packets with UDP encap
syzbot reported a WARN on my patch series [1]. The actual issue is an
overflow of 16-bit UDP length field, and it exists in the upstream code.
My series added a debug WARN with an overflow check that exposed the
issue, that's why syzbot tripped on my patches, rather than on upstream
code.

syzbot's repro:

r0 = socket$pppl2tp(0x18, 0x1, 0x1)
r1 = socket$inet6_udp(0xa, 0x2, 0x0)
connect$inet6(r1, &(0x7f00000000c0)={0xa, 0x0, 0x0, @loopback, 0xfffffffc}, 0x1c)
connect$pppl2tp(r0, &(0x7f0000000240)=@pppol2tpin6={0x18, 0x1, {0x0, r1, 0x4, 0x0, 0x0, 0x0, {0xa, 0x4e22, 0xffff, @ipv4={'\x00', '\xff\xff', @empty}}}}, 0x32)
writev(r0, &(0x7f0000000080)=[{&(0x7f0000000000)="ee", 0x34000}], 0x1)

It basically sends an oversized (0x34000 bytes) PPPoL2TP packet with UDP
encapsulation, and l2tp_xmit_core doesn't check for overflows when it
assigns the UDP length field. The value gets trimmed to 16 bites.

Add an overflow check that drops oversized packets and avoids sending
packets with trimmed UDP length to the wire.

syzbot's stack trace (with my patch applied):

len >= 65536u
WARNING: ./include/linux/udp.h:38 at udp_set_len_short include/linux/udp.h:38 [inline], CPU#1: syz.0.17/5957
WARNING: ./include/linux/udp.h:38 at l2tp_xmit_core net/l2tp/l2tp_core.c:1293 [inline], CPU#1: syz.0.17/5957
WARNING: ./include/linux/udp.h:38 at l2tp_xmit_skb+0x1204/0x18d0 net/l2tp/l2tp_core.c:1327, CPU#1: syz.0.17/5957
Modules linked in:
CPU: 1 UID: 0 PID: 5957 Comm: syz.0.17 Not tainted syzkaller #0 PREEMPT(full)
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
RIP: 0010:udp_set_len_short include/linux/udp.h:38 [inline]
RIP: 0010:l2tp_xmit_core net/l2tp/l2tp_core.c:1293 [inline]
RIP: 0010:l2tp_xmit_skb+0x1204/0x18d0 net/l2tp/l2tp_core.c:1327
Code: 0f 0b 90 e9 21 f9 ff ff e8 e9 05 ec f6 90 0f 0b 90 e9 8d f9 ff ff e8 db 05 ec f6 90 0f 0b 90 e9 cc f9 ff ff e8 cd 05 ec f6 90 <0f> 0b 90 e9 de fa ff ff 44 89 f1 80 e1 07 80 c1 03 38 c1 0f 8c 4f
RSP: 0018:ffffc90003d67878 EFLAGS: 00010293
RAX: ffffffff8ad985e3 RBX: ffff8881a6400090 RCX: ffff8881697f0000
RDX: 0000000000000000 RSI: 0000000000034010 RDI: 000000000000ffff
RBP: dffffc0000000000 R08: 0000000000000003 R09: 0000000000000004
R10: dffffc0000000000 R11: fffff520007acf00 R12: ffff8881baf20900
R13: 0000000000034010 R14: ffff8881a640008e R15: ffff8881760f7000
FS:  000055557e81f500(0000) GS:ffff8882a9467000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000200000033000 CR3: 00000001612f4000 CR4: 00000000000006f0
Call Trace:
 <TASK>
 pppol2tp_sendmsg+0x40a/0x5f0 net/l2tp/l2tp_ppp.c:302
 sock_sendmsg_nosec net/socket.c:727 [inline]
 __sock_sendmsg net/socket.c:742 [inline]
 sock_write_iter+0x503/0x550 net/socket.c:1195
 do_iter_readv_writev+0x619/0x8c0 fs/read_write.c:-1
 vfs_writev+0x33c/0x990 fs/read_write.c:1059
 do_writev+0x154/0x2e0 fs/read_write.c:1105
 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
 do_syscall_64+0x14d/0xf80 arch/x86/entry/syscall_64.c:94
 entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7f636479c629
Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 e8 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007ffffd4241c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000014
RAX: ffffffffffffffda RBX: 00007f6364a15fa0 RCX: 00007f636479c629
RDX: 0000000000000001 RSI: 0000200000000080 RDI: 0000000000000003
RBP: 00007f6364832b39 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
R13: 00007f6364a15fac R14: 00007f6364a15fa0 R15: 00007f6364a15fa0
 </TASK>

[1]: https://lore.kernel.org/all/20260226201600.222044-1-alice.kernel@fastmail.im/

Fixes: 3557baabf2 ("[L2TP]: PPP over L2TP driver core")
Reported-by: syzbot+ci3edea60a44225dec@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/netdev/69a1dfba.050a0220.3a55be.0026.GAE@google.com/
Signed-off-by: Alice Mikityanska <alice@isovalent.com>
Link: https://patch.msgid.link/20260403174949.843941-1-alice.kernel@fastmail.im
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-04-09 10:19:05 +02:00
Alexander Koskovich
56007972c0 net: ipa: fix event ring index not programmed for IPA v5.0+
For IPA v5.0+, the event ring index field moved from CH_C_CNTXT_0 to
CH_C_CNTXT_1. The v5.0 register definition intended to define this
field in the CH_C_CNTXT_1 fmask array but used the old identifier of
ERINDEX instead of CH_ERINDEX.

Without a valid event ring, GSI channels could never signal transfer
completions. This caused gsi_channel_trans_quiesce() to block
forever in wait_for_completion().

At least for IPA v5.2 this resolves an issue seen where runtime
suspend, system suspend, and remoteproc stop all hanged forever. It
also meant the IPA data path was completely non functional.

Fixes: faf0678ec8 ("net: ipa: add IPA v5.0 GSI register definitions")
Signed-off-by: Alexander Koskovich <akoskovich@pm.me>
Signed-off-by: Luca Weiss <luca.weiss@fairphone.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20260403-milos-ipa-v1-2-01e9e4e03d3e@fairphone.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-04-09 09:47:31 +02:00
Alexander Koskovich
9709b56d90 net: ipa: fix GENERIC_CMD register field masks for IPA v5.0+
Fix the field masks to match the hardware layout documented in
downstream GSI (GSI_V3_0_EE_n_GSI_EE_GENERIC_CMD_*).

Notably this fixes a WARN I was seeing when I tried to send "stop"
to the MPSS remoteproc while IPA was up.

Fixes: faf0678ec8 ("net: ipa: add IPA v5.0 GSI register definitions")
Signed-off-by: Alexander Koskovich <akoskovich@pm.me>
Signed-off-by: Luca Weiss <luca.weiss@fairphone.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20260403-milos-ipa-v1-1-01e9e4e03d3e@fairphone.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-04-09 09:47:31 +02:00
Jakub Kicinski
2607c0907d Merge branch '200GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue
Tony Nguyen says:

====================
Intel Wired LAN Driver Updates 2026-04-06 (idpf, ice, ixgbe, ixgbevf, igb, e1000)

Emil converts to use spinlock_t for virtchnl transactions to make
consistent use of the xn_bm_lock when accessing the free_xn_bm bitmap,
while also avoiding nested raw/bh spinlock issue on PREEMPT_RT kernels.
He also sets payload size before calling the async handler, to make sure
it doesn't error out prematurely due to invalid size check for idpf.

Kohei Enju changes WARN_ON for missing PTP control PF to a dev_info() on
ice as there are cases where this is expected and acceptable.

Petr Oros fixes conditions in which error paths failed to call
ice_ptp_port_phy_restart() breaking PTP functionality on ice.

Alex significantly reduces reporting of driver information, and time
under RTNL locl, on ixgbe e610 devices by reducing reads of flash info
only on events that could change it.

Michal Schmidt adds missing Hyper-V op on ixgbevf.

Alex Dvoretsky removes call to napi_synchronize() in igb_down() to
resolve a deadlock.

Agalakov Daniil adds error check on e1000 for failed EEPROM read.

* '200GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue:
  e1000: check return value of e1000_read_eeprom
  igb: remove napi_synchronize() in igb_down()
  ixgbevf: add missing negotiate_features op to Hyper-V ops table
  ixgbe: stop re-reading flash on every get_drvinfo for e610
  ice: fix PTP timestamping broken by SyncE code on E825C
  ice: ptp: don't WARN when controlling PF is unavailable
  idpf: set the payload size before calling the async handler
  idpf: improve locking around idpf_vc_xn_push_free()
  idpf: fix PREEMPT_RT raw/bh spinlock nesting for async VC handling
====================

Link: https://patch.msgid.link/20260406213038.444732-1-anthony.l.nguyen@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-08 20:05:10 -07:00
Raju Rangoju
30f3b767ae MAINTAINERS: Add Prashanth as additional maintainer for amd-xgbe driver
Add Prashanth as an additional maintainer for the amd-xgbe Ethernet
driver to help with ongoing development and maintenance.

Cc: Prashanth Kumar K R <PrashanthKumar.K.R@amd.com>
Signed-off-by: Raju Rangoju <Raju.Rangoju@amd.com>
Link: https://patch.msgid.link/20260406073816.3218387-1-Raju.Rangoju@amd.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-08 19:43:13 -07:00
Li RongQing
0006c6f109 devlink: Fix incorrect skb socket family dumping
The devlink_fmsg_dump_skb function was incorrectly using the socket
type (sk->sk_type) instead of the socket family (sk->sk_family)
when filling the "family" field in the fast message dump.

This patch fixes this to properly display the socket family.

Fixes: 3dbfde7f6b ("devlink: add devlink_fmsg_dump_skb() function")
Signed-off-by: Li RongQing <lirongqing@baidu.com>
Link: https://patch.msgid.link/20260407022730.2393-1-lirongqing@baidu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-08 19:34:38 -07:00
Jiexun Wang
39897df386 af_unix: read UNIX_DIAG_VFS data under unix_state_lock
Exact UNIX diag lookups hold a reference to the socket, but not to
u->path. Meanwhile, unix_release_sock() clears u->path under
unix_state_lock() and drops the path reference after unlocking.

Read the inode and device numbers for UNIX_DIAG_VFS while holding
unix_state_lock(), then emit the netlink attribute after dropping the
lock.

This keeps the VFS data stable while the reply is being built.

Fixes: 5f7b056946 ("unix_diag: Unix inode info NLA")
Reported-by: Yifan Wu <yifanwucs@gmail.com>
Reported-by: Juefei Pu <tomapufckgml@gmail.com>
Co-developed-by: Yuan Tan <yuantan098@gmail.com>
Signed-off-by: Yuan Tan <yuantan098@gmail.com>
Suggested-by: Xin Liu <bird@lzu.edu.cn>
Tested-by: Ren Wei <enjou1224z@gmail.com>
Signed-off-by: Jiexun Wang <wangjiexun2025@gmail.com>
Signed-off-by: Ren Wei <n05ec@lzu.edu.cn>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20260407080015.1744197-1-n05ec@lzu.edu.cn
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-08 19:33:52 -07:00
Matthieu Baerts (NGI0)
8e2760eaab Revert "mptcp: add needs_id for netlink appending addr"
This commit was originally adding the ability to add MPTCP endpoints
with ID 0 by accident. The in-kernel PM, handling MPTCP endpoints at the
net namespace level, is not supposed to handle endpoints with such ID,
because this ID 0 is reserved to the initial subflow, as mentioned in
the MPTCPv1 protocol [1], a per-connection setting.

Note that 'ip mptcp endpoint add id 0' stops early with an error, but
other tools might still request the in-kernel PM to create MPTCP
endpoints with this restricted ID 0.

In other words, it was wrong to call the mptcp_pm_has_addr_attr_id
helper to check whether the address ID attribute is set: if it was set
to 0, a new MPTCP endpoint would be created with ID 0, which is not
expected, and might cause various issues later.

Fixes: 584f389426 ("mptcp: add needs_id for netlink appending addr")
Cc: stable@vger.kernel.org
Link: https://datatracker.ietf.org/doc/html/rfc8684#section-3.2-9 [1]
Reviewed-by: Geliang Tang <geliang@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20260407-net-mptcp-revert-pm-needs-id-v2-1-7a25cbc324f8@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-08 19:31:16 -07:00
Jiayuan Chen
9b55b25390 mptcp: fix slab-use-after-free in __inet_lookup_established
The ehash table lookups are lockless and rely on
SLAB_TYPESAFE_BY_RCU to guarantee socket memory stability
during RCU read-side critical sections. Both tcp_prot and
tcpv6_prot have their slab caches created with this flag
via proto_register().

However, MPTCP's mptcp_subflow_init() copies tcpv6_prot into
tcpv6_prot_override during inet_init() (fs_initcall, level 5),
before inet6_init() (module_init/device_initcall, level 6) has
called proto_register(&tcpv6_prot). At that point,
tcpv6_prot.slab is still NULL, so tcpv6_prot_override.slab
remains NULL permanently.

This causes MPTCP v6 subflow child sockets to be allocated via
kmalloc (falling into kmalloc-4k) instead of the TCPv6 slab
cache. The kmalloc-4k cache lacks SLAB_TYPESAFE_BY_RCU, so
when these sockets are freed without SOCK_RCU_FREE (which is
cleared for child sockets by design), the memory can be
immediately reused. Concurrent ehash lookups under
rcu_read_lock can then access freed memory, triggering a
slab-use-after-free in __inet_lookup_established.

Fix this by splitting the IPv6-specific initialization out of
mptcp_subflow_init() into a new mptcp_subflow_v6_init(), called
from mptcp_proto_v6_init() before protocol registration. This
ensures tcpv6_prot_override.slab correctly inherits the
SLAB_TYPESAFE_BY_RCU slab cache.

Fixes: b19bc2945b ("mptcp: implement delegated actions")
Cc: stable@vger.kernel.org
Signed-off-by: Jiayuan Chen <jiayuan.chen@linux.dev>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20260406031512.189159-1-jiayuan.chen@linux.dev
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-08 19:30:08 -07:00
Fabio Baltieri
5a37d22879 net: txgbe: leave space for null terminators on property_entry
Lists of struct property_entry are supposed to be terminated with an
empty property, this driver currently seems to be allocating exactly the
amount of entry used.

Change the struct definition to leave an extra element for all
property_entry.

Fixes: c3e382ad6d ("net: txgbe: Add software nodes to support phylink")
Signed-off-by: Fabio Baltieri <fabio.baltieri@gmail.com>
Tested-by: Jiawen Wu <jiawenwu@trustnetic.com>
Link: https://patch.msgid.link/20260405222013.5347-1-fabio.baltieri@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-08 19:11:37 -07:00
Justin Iurman
b30b1675aa net: ioam6: fix OOB and missing lock
When trace->type.bit6 is set:

    if (trace->type.bit6) {
        ...
        queue = skb_get_tx_queue(dev, skb);
        qdisc = rcu_dereference(queue->qdisc);

This code can lead to an out-of-bounds access of the dev->_tx[] array
when is_input is true. In such a case, the packet is on the RX path and
skb->queue_mapping contains the RX queue index of the ingress device. If
the ingress device has more RX queues than the egress device (dev) has
TX queues, skb_get_queue_mapping(skb) will exceed dev->num_tx_queues.
Add a check to avoid this situation since skb_get_tx_queue() does not
clamp the index. This issue has also revealed that per queue visibility
cannot be accurate and will be replaced later as a new feature.

While at it, add missing lock around qdisc_qstats_qlen_backlog(). The
function __ioam6_fill_trace_data() is called from both softirq and
process contexts, hence the use of spin_lock_bh() here.

Fixes: b63c5478e9 ("ipv6: ioam: Support for Queue depth data field")
Reported-by: Jakub Kicinski <kuba@kernel.org>
Closes: https://lore.kernel.org/netdev/20260403214418.2233266-2-kuba@kernel.org/
Signed-off-by: Justin Iurman <justin.iurman@gmail.com>
Link: https://patch.msgid.link/20260404134137.24553-1-justin.iurman@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-08 19:08:56 -07:00
Jakub Kicinski
d65b175cfa Merge tag 'wireless-2026-04-08' of https://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless
Johannes Berg says:

====================
A few last-minute fixes:
 - rfkill: prevent boundless event list
 - rt2x00: fix USB resource management
 - brcmfmac: validate firmware IDs
 - brcmsmac: fix DMA free size

* tag 'wireless-2026-04-08' of https://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless:
  net: rfkill: prevent unlimited numbers of rfkill events from being created
  wifi: rt2x00usb: fix devres lifetime
  wifi: brcmfmac: validate bsscfg indices in IF events
  wifi: brcmsmac: Fix dma_free_coherent() size
====================

Link: https://patch.msgid.link/20260408081802.111623-3-johannes@sipsolutions.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-08 18:56:17 -07:00
Jakub Kicinski
84ac9a922d Merge tag 'ipsec-2026-04-08' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec
Steffen Klassert says:

====================
pull request (net): ipsec 2026-04-08

1) Clear trailing padding in build_polexpire() to prevent
   leaking unititialized memory. From Yasuaki Torimaru.

2) Fix aevent size calculation when XFRMA_IF_ID is used.
   From Keenan Dong.

3) Wait for RCU readers during policy netns exit before
   freeing the policy hash tables.

4) Fix dome too eaerly dropped references on the netdev
   when uding transport mode. From Qi Tang.

5) Fix refcount leak in xfrm_migrate_policy_find().
   From Kotlyarov Mihail.

6) Fix two fix info leaks in build_report() and
   in build_mapping(). From Greg Kroah-Hartman.

7) Zero aligned sockaddr tail in PF_KEY exports.
   From Zhengchuan Liang.

* tag 'ipsec-2026-04-08' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec:
  net: af_key: zero aligned sockaddr tail in PF_KEY exports
  xfrm_user: fix info leak in build_report()
  xfrm_user: fix info leak in build_mapping()
  xfrm: fix refcount leak in xfrm_migrate_policy_find
  xfrm: hold dev ref until after transport_finish NF_HOOK
  xfrm: Wait for RCU readers during policy netns exit
  xfrm: account XFRMA_IF_ID in aevent size calculation
  xfrm: clear trailing padding in build_polexpire()
====================

Link: https://patch.msgid.link/20260408095925.253681-1-steffen.klassert@secunet.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-08 18:54:32 -07:00
Jakub Kicinski
d614e0186b Merge tag 'batadv-net-pullrequest-20260408' of https://git.open-mesh.org/linux-merge
Simon Wunderlich says:

====================
Here are two batman-adv bugfixes:

 - reject oversized global TT response buffers, by Ruide Cao

 - hold claim backbone gateways by reference, by Haoze Xie

* tag 'batadv-net-pullrequest-20260408' of https://git.open-mesh.org/linux-merge:
  batman-adv: hold claim backbone gateways by reference
  batman-adv: reject oversized global TT response buffers
====================

Link: https://patch.msgid.link/20260408110255.976389-1-sw@simonwunderlich.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-08 18:50:27 -07:00
Jakub Kicinski
1ee3b19a26 Merge tag 'nf-26-04-08' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf
Florian Westphal says:

====================
netfilter updates for net

I only included crash fixes, as we're closer to a release, rest will
be handled via -next.

1) Fix a NULL pointer dereference in ip_vs_add_service error path, from
   Weiming Shi, bug added in 6.2 development cycle.

2) Don't leak kernel data bytes from allocator to userspace: nfnetlink_log
   needs to init the trailing NLMSG_DONE terminator. From Xiang Mei.

3) xt_multiport match lacks range validation, bogus userspace request will
   cause out-of-bounds read. From Ren Wei.

4) ip6t_eui64 match must reject packets with invalid mac header before
   calling eth_hdr. Make existing check unconditional.  From Zhengchuan
   Liang.

5) nft_ct timeout policies are free'd via kfree() while they may still
   be reachable by other cpus that process a conntrack object that
   uses such a timeout policy.  Existing reaping of entries is not
   sufficient because it doesn't wait for a grace period.  Use kfree_rcu().
   From Tuan Do.

6/7) Make nfnetlink_queue hash table per queue.  As-is we can hit a page
   fault in case underlying page of removed element was free'd.  Per-queue
   hash prevents parallel lookups.  This comes with a test case that
   demonstrates the bug, from Fernando Fernandez Mancera.

* tag 'nf-26-04-08' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf:
  selftests: nft_queue.sh: add a parallel stress test
  netfilter: nfnetlink_queue: make hash table per queue
  netfilter: nft_ct: fix use-after-free in timeout object destroy
  netfilter: ip6t_eui64: reject invalid MAC header for all packets
  netfilter: xt_multiport: validate range encoding in checkentry
  netfilter: nfnetlink_log: initialize nfgenmsg in NLMSG_DONE terminator
  ipvs: fix NULL deref in ip_vs_add_service error path
====================

Link: https://patch.msgid.link/20260408163512.30537-1-fw@strlen.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-08 18:48:44 -07:00
Jakub Kicinski
cade36eed7 Merge branch 'rxrpc-miscellaneous-fixes'
David Howells says:

====================
rxrpc: Miscellaneous fixes

Here are some fixes for rxrpc:

 (1) Fix key quota calculation.

 (2) Fix a memory leak.

 (3) Fix rxrpc_new_client_call_for_sendmsg() to substitute NULL for an
     empty key.

     Might want to remove this substitution entirely or handle it in
     rxrpc_init_client_call_security() instead.

 (4) Fix deletion of call->link to be RCU safe.

 (5) Fix missing bounds checks when parsing RxGK tickets.

 (6) Fix use of wrong skbuff to get challenge serial number.  Also actually
     substitute the newer response skbuff and release the older one.

 (7) Fix unexpected RACK timer warning to report old mode.

 (8) Fix call key refcount leak.

 (9) Fix the interaction of jumbograms with Tx window space, setting the
     request-ack flag when the window space is getting low, typically
     because each jumbogram take a big bite out of the window and fewer UDP
     packets get traded.

(10) Don't call rxrpc_put_call() with a NULL pointer.

(11) Reject undecryptable rxkad response tickets by checking result of
     decryption.

(12) Fix buffer bounds calculation in the RESPONSE authenticator parser.

(13) Fix oversized response length check.

(14) Fix refcount leak on multiple setting of server keyring.

(15) Fix checks made by RXRPC_SECURITY_KEY and RXRPC_SECURITY_KEYRING (both
     should be allowed).

(16) Fix lack of result checking on calls to crypto_skcipher_en/decrypt().

(17) Fix token_len limit check in rxgk_verify_response().

(18) Fix rxgk context leak in rxgk_verify_response().

(19) Fix read beyond end of buffer in rxgk_do_verify_authenticator().

(20) Fix parsing of RESPONSE packet on a connection that has already been set
     from a prior response.

(21) Fix size of buffers used for rendering addresses into for procfiles.
====================

Link: https://patch.msgid.link/20260408121252.2249051-1-dhowells@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-08 18:45:32 -07:00
Pengpeng Hou
a44ce6aa2e rxrpc: proc: size address buffers for %pISpc output
The AF_RXRPC procfs helpers format local and remote socket addresses into
fixed 50-byte stack buffers with "%pISpc".

That is too small for the longest current-tree IPv6-with-port form the
formatter can produce. In lib/vsprintf.c, the compressed IPv6 path uses a
dotted-quad tail not only for v4mapped addresses, but also for ISATAP
addresses via ipv6_addr_is_isatap().

As a result, a case such as

  [ffff:ffff:ffff:ffff:0:5efe:255.255.255.255]:65535

is possible with the current formatter. That is 50 visible characters, so
51 bytes including the trailing NUL, which does not fit in the existing
char[50] buffers used by net/rxrpc/proc.c.

Size the buffers from the formatter's maximum textual form and switch the
call sites to scnprintf().

Changes since v1:
- correct the changelog to cite the actual maximum current-tree case
  explicitly
- frame the proof around the ISATAP formatting path instead of the earlier
  mapped-v4 example

Fixes: 75b54cb57c ("rxrpc: Add IPv6 support")
Signed-off-by: Pengpeng Hou <pengpeng@iscas.ac.cn>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Anderson Nascimento <anderson@allelesecurity.com>
cc: Simon Horman <horms@kernel.org>
cc: linux-afs@lists.infradead.org
cc: stable@kernel.org
Link: https://patch.msgid.link/20260408121252.2249051-22-dhowells@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-08 18:45:32 -07:00
Wang Jie
c43ffdcfdb rxrpc: only handle RESPONSE during service challenge
Only process RESPONSE packets while the service connection is still in
RXRPC_CONN_SERVICE_CHALLENGING. Check that state under state_lock before
running response verification and security initialization, then use a local
secured flag to decide whether to queue the secured-connection work after
the state transition. This keeps duplicate or late RESPONSE packets from
re-running the setup path and removes the unlocked post-transition state
test.

Fixes: 17926a7932 ("[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both")
Reported-by: Yifan Wu <yifanwucs@gmail.com>
Reported-by: Juefei Pu <tomapufckgml@gmail.com>
Co-developed-by: Yuan Tan <yuantan098@gmail.com>
Signed-off-by: Yuan Tan <yuantan098@gmail.com>
Suggested-by: Xin Liu <bird@lzu.edu.cn>
Signed-off-by: Jie Wang <jiewang2024@lzu.edu.cn>
Signed-off-by: Yang Yang <n05ec@lzu.edu.cn>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Jeffrey Altman <jaltman@auristor.com>
cc: Simon Horman <horms@kernel.org>
cc: linux-afs@lists.infradead.org
cc: stable@kernel.org
Link: https://patch.msgid.link/20260408121252.2249051-21-dhowells@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-08 18:45:05 -07:00
David Howells
f564af387c rxrpc: Fix buffer overread in rxgk_do_verify_authenticator()
Fix rxgk_do_verify_authenticator() to check the buffer size before checking
the nonce.

Fixes: 9d1d2b5934 ("rxrpc: rxgk: Implement the yfs-rxgk security class (GSSAPI)")
Closes: https://sashiko.dev/#/patchset/20260401105614.1696001-10-dhowells@redhat.com
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Jeffrey Altman <jaltman@auristor.com>
cc: Simon Horman <horms@kernel.org>
cc: linux-afs@lists.infradead.org
cc: stable@kernel.org
Link: https://patch.msgid.link/20260408121252.2249051-20-dhowells@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-08 18:44:34 -07:00
David Howells
7e1876caa8 rxrpc: Fix leak of rxgk context in rxgk_verify_response()
Fix rxgk_verify_response() to clean up the rxgk context it creates.

Fixes: 9d1d2b5934 ("rxrpc: rxgk: Implement the yfs-rxgk security class (GSSAPI)")
Closes: https://sashiko.dev/#/patchset/20260401105614.1696001-10-dhowells@redhat.com
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Jeffrey Altman <jaltman@auristor.com>
cc: Simon Horman <horms@kernel.org>
cc: linux-afs@lists.infradead.org
cc: stable@kernel.org
Link: https://patch.msgid.link/20260408121252.2249051-19-dhowells@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-08 18:44:34 -07:00
David Howells
699e52180f rxrpc: Fix integer overflow in rxgk_verify_response()
In rxgk_verify_response(), there's a potential integer overflow due to
rounding up token_len before checking it, thereby allowing the length check to
be bypassed.

Fix this by checking the unrounded value against len too (len is limited as
the response must fit in a single UDP packet).

Fixes: 9d1d2b5934 ("rxrpc: rxgk: Implement the yfs-rxgk security class (GSSAPI)")
Closes: https://sashiko.dev/#/patchset/20260401105614.1696001-10-dhowells@redhat.com
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Jeffrey Altman <jaltman@auristor.com>
cc: Simon Horman <horms@kernel.org>
cc: linux-afs@lists.infradead.org
cc: stable@kernel.org
Link: https://patch.msgid.link/20260408121252.2249051-18-dhowells@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-08 18:44:34 -07:00
David Howells
f93af41b9f rxrpc: Fix missing error checks for rxkad encryption/decryption failure
Add error checking for failure of crypto_skcipher_en/decrypt() to various
rxkad function as the crypto functions can fail with ENOMEM at least.

Fixes: 17926a7932 ("[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both")
Closes: https://sashiko.dev/#/patchset/20260401105614.1696001-10-dhowells@redhat.com
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Jeffrey Altman <jaltman@auristor.com>
cc: Simon Horman <horms@kernel.org>
cc: linux-afs@lists.infradead.org
cc: stable@kernel.org
Link: https://patch.msgid.link/20260408121252.2249051-17-dhowells@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-08 18:44:34 -07:00
David Howells
2afd86ccbb rxrpc: Fix key/keyring checks in setsockopt(RXRPC_SECURITY_KEY/KEYRING)
An AF_RXRPC socket can be both client and server at the same time.  When
sending new calls (ie. it's acting as a client), it uses rx->key to set the
security, and when accepting incoming calls (ie. it's acting as a server),
it uses rx->securities.

setsockopt(RXRPC_SECURITY_KEY) sets rx->key to point to an rxrpc-type key
and setsockopt(RXRPC_SECURITY_KEYRING) sets rx->securities to point to a
keyring of rxrpc_s-type keys.

Now, it should be possible to use both rx->key and rx->securities on the
same socket - but for userspace AF_RXRPC sockets rxrpc_setsockopt()
prevents that.

Fix this by:

 (1) Remove the incorrect check rxrpc_setsockopt(RXRPC_SECURITY_KEYRING)
     makes on rx->key.

 (2) Move the check that rxrpc_setsockopt(RXRPC_SECURITY_KEY) makes on
     rx->key down into rxrpc_request_key().

 (3) Remove rxrpc_request_key()'s check on rx->securities.

This (in combination with a previous patch) pushes the checks down into the
functions that set those pointers and removes the cross-checks that prevent
both key and keyring being set.

Fixes: 17926a7932 ("[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both")
Closes: https://sashiko.dev/#/patchset/20260401105614.1696001-10-dhowells@redhat.com
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Anderson Nascimento <anderson@allelesecurity.com>
cc: Luxiao Xu <rakukuip@gmail.com>
cc: Yuan Tan <yuantan098@gmail.com>
cc: Simon Horman <horms@kernel.org>
cc: linux-afs@lists.infradead.org
cc: stable@kernel.org
Link: https://patch.msgid.link/20260408121252.2249051-16-dhowells@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-08 18:44:34 -07:00
Luxiao Xu
f125846ee7 rxrpc: fix reference count leak in rxrpc_server_keyring()
This patch fixes a reference count leak in rxrpc_server_keyring()
by checking if rx->securities is already set.

Fixes: 17926a7932 ("[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both")
Reported-by: Yifan Wu <yifanwucs@gmail.com>
Reported-by: Juefei Pu <tomapufckgml@gmail.com>
Co-developed-by: Yuan Tan <yuantan098@gmail.com>
Signed-off-by: Yuan Tan <yuantan098@gmail.com>
Suggested-by: Xin Liu <bird@lzu.edu.cn>
Tested-by: Ren Wei <enjou1224z@gmail.com>
Signed-off-by: Luxiao Xu <rakukuip@gmail.com>
Signed-off-by: Ren Wei <n05ec@lzu.edu.cn>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Simon Horman <horms@kernel.org>
cc: linux-afs@lists.infradead.org
cc: stable@kernel.org
Link: https://patch.msgid.link/20260408121252.2249051-15-dhowells@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-08 18:44:33 -07:00
Keenan Dong
a2567217ad rxrpc: fix oversized RESPONSE authenticator length check
rxgk_verify_response() decodes auth_len from the packet and is supposed
to verify that it fits in the remaining bytes. The existing check is
inverted, so oversized RESPONSE authenticators are accepted and passed
to rxgk_decrypt_skb(), which can later reach skb_to_sgvec() with an
impossible length and hit BUG_ON(len).

Decoded from the original latest-net reproduction logs with
scripts/decode_stacktrace.sh:

RIP: __skb_to_sgvec()
  [net/core/skbuff.c:5285 (discriminator 1)]
Call Trace:
 skb_to_sgvec() [net/core/skbuff.c:5305]
 rxgk_decrypt_skb() [net/rxrpc/rxgk_common.h:81]
 rxgk_verify_response() [net/rxrpc/rxgk.c:1268]
 rxrpc_process_connection()
   [net/rxrpc/conn_event.c:266 net/rxrpc/conn_event.c:364
    net/rxrpc/conn_event.c:386]
 process_one_work() [kernel/workqueue.c:3281]
 worker_thread()
   [kernel/workqueue.c:3353 kernel/workqueue.c:3440]
 kthread() [kernel/kthread.c:436]
 ret_from_fork() [arch/x86/kernel/process.c:164]

Reject authenticator lengths that exceed the remaining packet payload.

Fixes: 9d1d2b5934 ("rxrpc: rxgk: Implement the yfs-rxgk security class (GSSAPI)")
Signed-off-by: Keenan Dong <keenanat2000@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Simon Horman <horms@kernel.org>
cc: Willy Tarreau <w@1wt.eu>
cc: linux-afs@lists.infradead.org
cc: stable@kernel.org
Link: https://patch.msgid.link/20260408121252.2249051-14-dhowells@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-08 18:44:33 -07:00
Keenan Dong
3e31380078 rxrpc: fix RESPONSE authenticator parser OOB read
rxgk_verify_authenticator() copies auth_len bytes into a temporary
buffer and then passes p + auth_len as the parser limit to
rxgk_do_verify_authenticator(). Since p is a __be32 *, that inflates the
parser end pointer by a factor of four and lets malformed RESPONSE
authenticators read past the kmalloc() buffer.

Decoded from the original latest-net reproduction logs with
scripts/decode_stacktrace.sh:

BUG: KASAN: slab-out-of-bounds in rxgk_verify_response()
Call Trace:
 dump_stack_lvl() [lib/dump_stack.c:123]
 print_report() [mm/kasan/report.c:379 mm/kasan/report.c:482]
 kasan_report() [mm/kasan/report.c:597]
 rxgk_verify_response()
   [net/rxrpc/rxgk.c:1103 net/rxrpc/rxgk.c:1167
    net/rxrpc/rxgk.c:1274]
 rxrpc_process_connection()
   [net/rxrpc/conn_event.c:266 net/rxrpc/conn_event.c:364
    net/rxrpc/conn_event.c:386]
 process_one_work() [kernel/workqueue.c:3281]
 worker_thread()
   [kernel/workqueue.c:3353 kernel/workqueue.c:3440]
 kthread() [kernel/kthread.c:436]
 ret_from_fork() [arch/x86/kernel/process.c:164]

Allocated by task 54:
 rxgk_verify_response()
   [include/linux/slab.h:954 net/rxrpc/rxgk.c:1155
    net/rxrpc/rxgk.c:1274]
 rxrpc_process_connection()
   [net/rxrpc/conn_event.c:266 net/rxrpc/conn_event.c:364
    net/rxrpc/conn_event.c:386]

Convert the byte count to __be32 units before constructing the parser
limit.

Fixes: 9d1d2b5934 ("rxrpc: rxgk: Implement the yfs-rxgk security class (GSSAPI)")
Signed-off-by: Keenan Dong <keenanat2000@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Simon Horman <horms@kernel.org>
cc: Willy Tarreau <w@1wt.eu>
cc: linux-afs@lists.infradead.org
cc: stable@kernel.org
Link: https://patch.msgid.link/20260408121252.2249051-13-dhowells@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-08 18:44:33 -07:00
Yuqi Xu
fe4447cd95 rxrpc: reject undecryptable rxkad response tickets
rxkad_decrypt_ticket() decrypts the RXKAD response ticket and then
parses the buffer as plaintext without checking whether
crypto_skcipher_decrypt() succeeded.

A malformed RESPONSE can therefore use a non-block-aligned ticket
length, make the decrypt operation fail, and still drive the ticket
parser with attacker-controlled bytes.

Check the decrypt result and abort the connection with RXKADBADTICKET
when ticket decryption fails.

Fixes: 17926a7932 ("[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both")
Reported-by: Yifan Wu <yifanwucs@gmail.com>
Reported-by: Juefei Pu <tomapufckgml@gmail.com>
Co-developed-by: Yuan Tan <yuantan098@gmail.com>
Signed-off-by: Yuan Tan <yuantan098@gmail.com>
Suggested-by: Xin Liu <bird@lzu.edu.cn>
Tested-by: Ren Wei <enjou1224z@gmail.com>
Signed-off-by: Yuqi Xu <xuyuqiabc@gmail.com>
Signed-off-by: Ren Wei <n05ec@lzu.edu.cn>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Simon Horman <horms@kernel.org>
cc: linux-afs@lists.infradead.org
cc: stable@kernel.org
Link: https://patch.msgid.link/20260408121252.2249051-12-dhowells@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-08 18:44:33 -07:00
Douya Le
6331f1b24a rxrpc: Only put the call ref if one was acquired
rxrpc_input_packet_on_conn() can process a to-client packet after the
current client call on the channel has already been torn down.  In that
case chan->call is NULL, rxrpc_try_get_call() returns NULL and there is
no reference to drop.

The client-side implicit-end error path does not account for that and
unconditionally calls rxrpc_put_call().  This turns a protocol error
path into a kernel crash instead of rejecting the packet.

Only drop the call reference if one was actually acquired.  Keep the
existing protocol error handling unchanged.

Fixes: 5e6ef4f101 ("rxrpc: Make the I/O thread take over the call and local processor work")
Reported-by: Yifan Wu <yifanwucs@gmail.com>
Reported-by: Juefei Pu <tomapufckgml@gmail.com>
Signed-off-by: Douya Le <ldy3087146292@gmail.com>
Co-developed-by: Yuan Tan <tanyuan98@gmail.com>
Signed-off-by: Yuan Tan <tanyuan98@gmail.com>
Suggested-by: Xin Liu <bird@lzu.edu.cn>
Signed-off-by: Ao Zhou <n05ec@lzu.edu.cn>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Simon Horman <horms@kernel.org>
cc: linux-afs@lists.infradead.org
cc: stable@kernel.org
Link: https://patch.msgid.link/20260408121252.2249051-11-dhowells@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-08 18:44:33 -07:00
Marc Dionne
0cd3e3f3f2 rxrpc: Fix to request an ack if window is limited
Peers may only send immediate acks for every 2 UDP packets received.
When sending a jumbogram, it is important to check that there is
sufficient window space to send another same sized jumbogram following
the current one, and request an ack if there isn't.  Failure to do so may
cause the call to stall waiting for an ack until the resend timer fires.

Where jumbograms are in use this causes a very significant drop in
performance.

Fixes: fe24a54943 ("rxrpc: Send jumbo DATA packets")
Signed-off-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Jeffrey Altman <jaltman@auristor.com>
cc: Simon Horman <horms@kernel.org>
cc: linux-afs@lists.infradead.org
cc: stable@kernel.org
Link: https://patch.msgid.link/20260408121252.2249051-10-dhowells@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-08 18:44:33 -07:00
Anderson Nascimento
d666540d21 rxrpc: Fix key reference count leak from call->key
When creating a client call in rxrpc_alloc_client_call(), the code obtains
a reference to the key.  This is never cleaned up and gets leaked when the
call is destroyed.

Fix this by freeing call->key in rxrpc_destroy_call().

Before the patch, it shows the key reference counter elevated:

$ cat /proc/keys | grep afs@54321
1bffe9cd I--Q--i 8053480 4169w 3b010000  1000  1000 rxrpc     afs@54321: ka
$

After the patch, the invalidated key is removed when the code exits:

$ cat /proc/keys | grep afs@54321
$

Fixes: f3441d4125 ("rxrpc: Copy client call parameters into rxrpc_call earlier")
Signed-off-by: Anderson Nascimento <anderson@allelesecurity.com>
Co-developed-by: David Howells <dhowells@redhat.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeffrey Altman <jaltman@auristor.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Simon Horman <horms@kernel.org>
cc: linux-afs@lists.infradead.org
cc: stable@kernel.org
Link: https://patch.msgid.link/20260408121252.2249051-9-dhowells@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-08 18:44:32 -07:00
Alok Tiwari
65b3ffe097 rxrpc: Fix rack timer warning to report unexpected mode
rxrpc_rack_timer_expired() clears call->rack_timer_mode to OFF before
the switch. The default case warning therefore always prints OFF and
doesn't identify the unexpected timer mode.

Log the saved mode value instead so the warning reports the actual
unexpected rack timer mode.

Fixes: 7c48266593 ("rxrpc: Implement RACK/TLP to deal with transmission stalls [RFC8985]")
Signed-off-by: Alok Tiwari <alok.a.tiwari@oracle.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Reviewed-by: Jeffrey Altman <jaltman@auristor.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
cc: stable@kernel.org
Link: https://patch.msgid.link/20260408121252.2249051-8-dhowells@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-08 18:44:32 -07:00
Alok Tiwari
b33f5741bb rxrpc: Fix use of wrong skb when comparing queued RESP challenge serial
In rxrpc_post_response(), the code should be comparing the challenge serial
number from the cached response before deciding to switch to a newer
response, but looks at the newer packet private data instead, rendering the
comparison always false.

Fix this by switching to look at the older packet.

Fix further[1] to substitute the new packet in place of the old one if
newer and also to release whichever we don't use.

Fixes: 5800b1cf3f ("rxrpc: Allow CHALLENGEs to the passed to the app for a RESPONSE")
Signed-off-by: Alok Tiwari <alok.a.tiwari@oracle.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeffrey Altman <jaltman@auristor.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Simon Horman <horms@kernel.org>
cc: linux-afs@lists.infradead.org
cc: stable@kernel.org
Link: https://sashiko.dev/#/patchset/20260319150150.4189381-1-dhowells%40redhat.com [1]
Link: https://patch.msgid.link/20260408121252.2249051-7-dhowells@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-08 18:44:32 -07:00
Oleh Konko
d179a868dd rxrpc: Fix RxGK token loading to check bounds
rxrpc_preparse_xdr_yfs_rxgk() reads the raw key length and ticket length
from the XDR token as u32 values and passes each through round_up(x, 4)
before using the rounded value for validation and allocation.  When the raw
length is >= 0xfffffffd, round_up() wraps to 0, so the bounds check and
kzalloc both use 0 while the subsequent memcpy still copies the original
~4 GiB value, producing a heap buffer overflow reachable from an
unprivileged add_key() call.

Fix this by:

 (1) Rejecting raw key lengths above AFSTOKEN_GK_KEY_MAX and raw ticket
     lengths above AFSTOKEN_GK_TOKEN_MAX before rounding, consistent with
     the caps that the RxKAD path already enforces via AFSTOKEN_RK_TIX_MAX.

 (2) Sizing the flexible-array allocation from the validated raw key
     length via struct_size_t() instead of the rounded value.

 (3) Caching the raw lengths so that the later field assignments and
     memcpy calls do not re-read from the token, eliminating a class of
     TOCTOU re-parse.

The control path (valid token with lengths within bounds) is unaffected.

Fixes: 0ca100ff4d ("rxrpc: Add YFS RxGK (GSSAPI) security class")
Signed-off-by: Oleh Konko <security@1seal.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeffrey Altman <jaltman@auristor.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Simon Horman <horms@kernel.org>
cc: linux-afs@lists.infradead.org
cc: stable@kernel.org
Link: https://patch.msgid.link/20260408121252.2249051-6-dhowells@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-08 18:44:32 -07:00
David Howells
146d4ab94c rxrpc: Fix call removal to use RCU safe deletion
Fix rxrpc call removal from the rxnet->calls list to use list_del_rcu()
rather than list_del_init() to prevent stuffing up reading
/proc/net/rxrpc/calls from potentially getting into an infinite loop.

This, however, means that list_empty() no longer works on an entry that's
been deleted from the list, making it harder to detect prior deletion.  Fix
this by:

Firstly, make rxrpc_destroy_all_calls() only dump the first ten calls that
are unexpectedly still on the list.  Limiting the number of steps means
there's no need to call cond_resched() or to remove calls from the list
here, thereby eliminating the need for rxrpc_put_call() to check for that.

rxrpc_put_call() can then be fixed to unconditionally delete the call from
the list as it is the only place that the deletion occurs.

Fixes: 2baec2c3f8 ("rxrpc: Support network namespacing")
Closes: https://sashiko.dev/#/patchset/20260319150150.4189381-1-dhowells%40redhat.com
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Jeffrey Altman <jaltman@auristor.com>
cc: Linus Torvalds <torvalds@linux-foundation.org>
cc: Simon Horman <horms@kernel.org>
cc: linux-afs@lists.infradead.org
cc: stable@kernel.org
Link: https://patch.msgid.link/20260408121252.2249051-5-dhowells@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-08 18:44:32 -07:00
David Howells
6a59d84b4f rxrpc: Fix anonymous key handling
In rxrpc_new_client_call_for_sendmsg(), a key with no payload is meant to
be substituted for a NULL key pointer, but the variable this is done with
is subsequently not used.

Fix this by using "key" rather than "rx->key" when filling in the
connection parameters.

Note that this only affects direct use of AF_RXRPC; the kAFS filesystem
doesn't use sendmsg() directly and so bypasses the issue.  Further,
AF_RXRPC passes a NULL key in if no key is set, so using an anonymous key
in that manner works.  Since this hasn't been noticed to this point, it
might be better just to remove the "key" variable and the code that sets it
- and, arguably, rxrpc_init_client_call_security() would be a better place
to handle it.

Fixes: 19ffa01c9c ("rxrpc: Use structs to hold connection params and protocol info")
Closes: https://sashiko.dev/#/patchset/20260319150150.4189381-1-dhowells%40redhat.com
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Jeffrey Altman <jaltman@auristor.com>
cc: Simon Horman <horms@kernel.org>
cc: linux-afs@lists.infradead.org
cc: stable@kernel.org
Link: https://patch.msgid.link/20260408121252.2249051-4-dhowells@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-08 18:44:31 -07:00
David Howells
b555912b9b rxrpc: Fix key parsing memleak
In rxrpc_preparse_xdr_yfs_rxgk(), the memory attached to token->rxgk can be
leaked in a few error paths after it's allocated.

Fix this by freeing it in the "reject_token:" case.

Fixes: 0ca100ff4d ("rxrpc: Add YFS RxGK (GSSAPI) security class")
Closes: https://sashiko.dev/#/patchset/20260319150150.4189381-1-dhowells%40redhat.com
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Jeffrey Altman <jaltman@auristor.com>
cc: Simon Horman <horms@kernel.org>
cc: linux-afs@lists.infradead.org
cc: stable@kernel.org
Link: https://patch.msgid.link/20260408121252.2249051-3-dhowells@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-08 18:44:31 -07:00
David Howells
bdbfead6d3 rxrpc: Fix key quota calculation for multitoken keys
In the rxrpc key preparsing, every token extracted sets the proposed quota
value, but for multitoken keys, this will overwrite the previous proposed
quota, losing it.

Fix this by adding to the proposed quota instead.

Fixes: 8a7a3eb4dd ("KEYS: RxRPC: Use key preparsing")
Closes: https://sashiko.dev/#/patchset/20260319150150.4189381-1-dhowells%40redhat.com
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Jeffrey Altman <jaltman@auristor.com>
cc: Simon Horman <horms@kernel.org>
cc: linux-afs@lists.infradead.org
cc: stable@kernel.org
Link: https://patch.msgid.link/20260408121252.2249051-2-dhowells@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-08 18:44:31 -07:00
Felix Gu
c09ea768bd net: mdio: realtek-rtl9300: use scoped device_for_each_child_node loop
Switch to device_for_each_child_node_scoped() to auto-release fwnode
references on early exit.

Fixes: 24e31e4747 ("net: mdio: Add RTL9300 MDIO driver")
Signed-off-by: Felix Gu <ustc.gu@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://patch.msgid.link/20260405-rtl9300-v1-1-08e4499cf944@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-08 18:42:08 -07:00
Fernando Fernandez Mancera
dde1a6084c selftests: nft_queue.sh: add a parallel stress test
Introduce a new stress test to check for race conditions in the
nfnetlink_queue subsystem, where an entry is freed while another CPU is
concurrently walking the global rhashtable.

To trigger this, `nf_queue.c` is extended with two new flags:
  * -O (out-of-order): Buffers packet IDs and flushes them in reverse.
  * -b (bogus verdicts): Floods the kernel with non-existent packet IDs.

The bogus verdict loop forces the kernel's lookup function to perform
full rhashtable bucket traversals (-ENOENT). Combined with reverse-order
flushing and heavy parallel UDP/ping flooding across 8 queues, this puts
the nfnetlink_queue code under pressure.

Joint work with Florian Westphal.

Signed-off-by: Fernando Fernandez Mancera <fmancera@suse.de>
Signed-off-by: Florian Westphal <fw@strlen.de>
2026-04-08 13:34:51 +02:00
Florian Westphal
936206e3f6 netfilter: nfnetlink_queue: make hash table per queue
Sharing a global hash table among all queues is tempting, but
it can cause crash:

BUG: KASAN: slab-use-after-free in nfqnl_recv_verdict+0x11ac/0x15e0 [nfnetlink_queue]
[..]
 nfqnl_recv_verdict+0x11ac/0x15e0 [nfnetlink_queue]
 nfnetlink_rcv_msg+0x46a/0x930
 kmem_cache_alloc_node_noprof+0x11e/0x450

struct nf_queue_entry is freed via kfree, but parallel cpu can still
encounter such an nf_queue_entry when walking the list.

Alternative fix is to free the nf_queue_entry via kfree_rcu() instead,
but as we have to alloc/free for each skb this will cause more mem
pressure.

Cc: Scott Mitchell <scott.k.mitch1@gmail.com>
Fixes: e19079adcd ("netfilter: nfnetlink_queue: optimize verdict lookup with hash table")
Signed-off-by: Florian Westphal <fw@strlen.de>
2026-04-08 13:34:51 +02:00
Tuan Do
f8dca15a1b netfilter: nft_ct: fix use-after-free in timeout object destroy
nft_ct_timeout_obj_destroy() frees the timeout object with kfree()
immediately after nf_ct_untimeout(), without waiting for an RCU grace
period. Concurrent packet processing on other CPUs may still hold
RCU-protected references to the timeout object obtained via
rcu_dereference() in nf_ct_timeout_data().

Add an rcu_head to struct nf_ct_timeout and use kfree_rcu() to defer
freeing until after an RCU grace period, matching the approach already
used in nfnetlink_cttimeout.c.

KASAN report:
 BUG: KASAN: slab-use-after-free in nf_conntrack_tcp_packet+0x1381/0x29d0
 Read of size 4 at addr ffff8881035fe19c by task exploit/80

 Call Trace:
  nf_conntrack_tcp_packet+0x1381/0x29d0
  nf_conntrack_in+0x612/0x8b0
  nf_hook_slow+0x70/0x100
  __ip_local_out+0x1b2/0x210
  tcp_sendmsg_locked+0x722/0x1580
  __sys_sendto+0x2d8/0x320

 Allocated by task 75:
  nft_ct_timeout_obj_init+0xf6/0x290
  nft_obj_init+0x107/0x1b0
  nf_tables_newobj+0x680/0x9c0
  nfnetlink_rcv_batch+0xc29/0xe00

 Freed by task 26:
  nft_obj_destroy+0x3f/0xa0
  nf_tables_trans_destroy_work+0x51c/0x5c0
  process_one_work+0x2c4/0x5a0

Fixes: 7e0b2b57f0 ("netfilter: nft_ct: add ct timeout support")
Cc: stable@vger.kernel.org
Signed-off-by: Tuan Do <tuan@calif.io>
Signed-off-by: Florian Westphal <fw@strlen.de>
2026-04-08 13:34:16 +02:00
Zhengchuan Liang
fdce0b3590 netfilter: ip6t_eui64: reject invalid MAC header for all packets
`eui64_mt6()` derives a modified EUI-64 from the Ethernet source address
and compares it with the low 64 bits of the IPv6 source address.

The existing guard only rejects an invalid MAC header when
`par->fragoff != 0`. For packets with `par->fragoff == 0`, `eui64_mt6()`
can still reach `eth_hdr(skb)` even when the MAC header is not valid.

Fix this by removing the `par->fragoff != 0` condition so that packets
with an invalid MAC header are rejected before accessing `eth_hdr(skb)`.

Fixes: 1da177e4c3 ("Linux-2.6.12-rc2")
Reported-by: Yifan Wu <yifanwucs@gmail.com>
Reported-by: Juefei Pu <tomapufckgml@gmail.com>
Co-developed-by: Yuan Tan <yuantan098@gmail.com>
Signed-off-by: Yuan Tan <yuantan098@gmail.com>
Suggested-by: Xin Liu <bird@lzu.edu.cn>
Tested-by: Ren Wei <enjou1224z@gmail.com>
Signed-off-by: Zhengchuan Liang <zcliangcn@gmail.com>
Signed-off-by: Ren Wei <n05ec@lzu.edu.cn>
Signed-off-by: Florian Westphal <fw@strlen.de>
2026-04-08 13:33:56 +02:00
ff64c5bfef netfilter: xt_multiport: validate range encoding in checkentry
ports_match_v1() treats any non-zero pflags entry as the start of a
port range and unconditionally consumes the next ports[] element as
the range end.

The checkentry path currently validates protocol, flags and count, but
it does not validate the range encoding itself. As a result, malformed
rules can mark the last slot as a range start or place two range starts
back to back, leaving ports_match_v1() to step past the last valid
ports[] element while interpreting the rule.

Reject malformed multiport v1 rules in checkentry by validating that
each range start has a following element and that the following element
is not itself marked as another range start.

Fixes: a89ecb6a2e ("[NETFILTER]: x_tables: unify IPv4/IPv6 multiport match")
Reported-by: Yifan Wu <yifanwucs@gmail.com>
Reported-by: Juefei Pu <tomapufckgml@gmail.com>
Co-developed-by: Yuan Tan <yuantan098@gmail.com>
Signed-off-by: Yuan Tan <yuantan098@gmail.com>
Suggested-by: Xin Liu <bird@lzu.edu.cn>
Tested-by: Yuhang Zheng <z1652074432@gmail.com>
Signed-off-by: Ren Wei <n05ec@lzu.edu.cn>
Signed-off-by: Florian Westphal <fw@strlen.de>
2026-04-08 13:33:38 +02:00
Xiang Mei
1f3083aec8 netfilter: nfnetlink_log: initialize nfgenmsg in NLMSG_DONE terminator
When batching multiple NFLOG messages (inst->qlen > 1), __nfulnl_send()
appends an NLMSG_DONE terminator with sizeof(struct nfgenmsg) payload via
nlmsg_put(), but never initializes the nfgenmsg bytes. The nlmsg_put()
helper only zeroes alignment padding after the payload, not the payload
itself, so four bytes of stale kernel heap data are leaked to userspace
in the NLMSG_DONE message body.

Use nfnl_msg_put() to build the NLMSG_DONE terminator, which initializes
the nfgenmsg payload via nfnl_fill_hdr(), consistent with how
__build_packet_message() already constructs NFULNL_MSG_PACKET headers.

Fixes: 29c5d4afba ("[NETFILTER]: nfnetlink_log: fix sending of multipart messages")
Reported-by: Weiming Shi <bestswngs@gmail.com>
Signed-off-by: Xiang Mei <xmei5@asu.edu>
Signed-off-by: Florian Westphal <fw@strlen.de>
2026-04-08 13:33:36 +02:00
Weiming Shi
9a91797e61 ipvs: fix NULL deref in ip_vs_add_service error path
When ip_vs_bind_scheduler() succeeds in ip_vs_add_service(), the local
variable sched is set to NULL.  If ip_vs_start_estimator() subsequently
fails, the out_err cleanup calls ip_vs_unbind_scheduler(svc, sched)
with sched == NULL.  ip_vs_unbind_scheduler() passes the cur_sched NULL
check (because svc->scheduler was set by the successful bind) but then
dereferences the NULL sched parameter at sched->done_service, causing a
kernel panic at offset 0x30 from NULL.

 Oops: general protection fault, [..] [#1] PREEMPT SMP KASAN NOPTI
 KASAN: null-ptr-deref in range [0x0000000000000030-0x0000000000000037]
 RIP: 0010:ip_vs_unbind_scheduler (net/netfilter/ipvs/ip_vs_sched.c:69)
 Call Trace:
  <TASK>
  ip_vs_add_service.isra.0 (net/netfilter/ipvs/ip_vs_ctl.c:1500)
  do_ip_vs_set_ctl (net/netfilter/ipvs/ip_vs_ctl.c:2809)
  nf_setsockopt (net/netfilter/nf_sockopt.c:102)
  [..]

Fix by simply not clearing the local sched variable after a successful
bind.  ip_vs_unbind_scheduler() already detects whether a scheduler is
installed via svc->scheduler, and keeping sched non-NULL ensures the
error path passes the correct pointer to both ip_vs_unbind_scheduler()
and ip_vs_scheduler_put().

While the bug is older, the problem popups in more recent kernels (6.2),
when the new error path is taken after the ip_vs_start_estimator() call.

Fixes: 705dd34440 ("ipvs: use kthreads for stats estimation")
Reported-by: Xiang Mei <xmei5@asu.edu>
Signed-off-by: Weiming Shi <bestswngs@gmail.com>
Acked-by: Simon Horman <horms@kernel.org>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Florian Westphal <fw@strlen.de>
2026-04-08 13:33:28 +02:00
Jakub Kicinski
f821664dde Merge branch 'seg6-fix-dst_cache-sharing-in-seg6-lwtunnel'
Andrea Mayer says:

====================
seg6: fix dst_cache sharing in seg6 lwtunnel

The seg6 lwtunnel encap uses a single per-route dst_cache shared
between seg6_input_core() and seg6_output_core(). These two paths
can perform the post-encap SID lookup in different routing contexts
(e.g., ip rules matching on the ingress interface, or VRF table
separation). Whichever path runs first populates the cache, and the
other reuses it blindly, bypassing its own lookup.

Patch 1 fixes this by splitting the cache into cache_input and
cache_output. Patch 2 adds a selftest that validates the isolation.
====================

Link: https://patch.msgid.link/20260404004405.4057-1-andrea.mayer@uniroma2.it
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-07 20:21:00 -07:00
Andrea Mayer
32dfd742f0 selftests: seg6: add test for dst_cache isolation in seg6 lwtunnel
Add a selftest that verifies the dst_cache in seg6 lwtunnel is not
shared between the input (forwarding) and output (locally generated)
paths.

The test creates three namespaces (ns_src, ns_router, ns_dst)
connected in a line. An SRv6 encap route on ns_router encapsulates
traffic destined to cafe::1 with SID fc00::100. The SID is
reachable only for forwarded traffic (from ns_src) via an ip rule
matching the ingress interface (iif veth-r0 lookup 100), and
blackholed in the main table.

The test verifies that:

  1. A packet generated locally on ns_router does not reach
     ns_dst with an empty cache, since the SID is blackholed;
  2. A forwarded packet from ns_src populates the input cache
     from table 100 and reaches ns_dst;
  3. A packet generated locally on ns_router still does not
     reach ns_dst after the input cache is populated,
     confirming the output path does not reuse the input
     cache entry.

Both the forwarded and local packets are pinned to the same CPU
with taskset, since dst_cache is per-cpu.

Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrea Mayer <andrea.mayer@uniroma2.it>
Reviewed-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Reviewed-by: Justin Iurman <justin.iurman@gmail.com>
Link: https://patch.msgid.link/20260404004405.4057-3-andrea.mayer@uniroma2.it
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-07 20:20:56 -07:00
Andrea Mayer
c3812651b5 seg6: separate dst_cache for input and output paths in seg6 lwtunnel
The seg6 lwtunnel uses a single dst_cache per encap route, shared
between seg6_input_core() and seg6_output_core(). These two paths
can perform the post-encap SID lookup in different routing contexts
(e.g., ip rules matching on the ingress interface, or VRF table
separation). Whichever path runs first populates the cache, and the
other reuses it blindly, bypassing its own lookup.

Fix this by splitting the cache into cache_input and cache_output,
so each path maintains its own cached dst independently.

Fixes: 6c8702c60b ("ipv6: sr: add support for SRH encapsulation and injection with lwtunnels")
Cc: stable@vger.kernel.org
Signed-off-by: Andrea Mayer <andrea.mayer@uniroma2.it>
Reviewed-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Reviewed-by: Justin Iurman <justin.iurman@gmail.com>
Link: https://patch.msgid.link/20260404004405.4057-2-andrea.mayer@uniroma2.it
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-07 20:20:56 -07:00
Daniel Golle
efaa71faf2 selftests: net: bridge_vlan_mcast: wait for h1 before querier check
The querier-interval test adds h1 (currently a slave of the VRF created
by simple_if_init) to a temporary bridge br1 acting as an outside IGMP
querier. The kernel VRF driver (drivers/net/vrf.c) calls cycle_netdev()
on every slave add and remove, toggling the interface admin-down then up.
Phylink takes the PHY down during the admin-down half of that cycle.
Since h1 and swp1 are cable-connected, swp1 also loses its link may need
several seconds to re-negotiate.

Use setup_wait_dev $h1 0 which waits for h1 to return to UP state, so the
test can rely on the link being back up at this point.

Fixes: 4d8610ee8b ("selftests: net: bridge: add vlan mcast_querier_interval tests")
Signed-off-by: Daniel Golle <daniel@makrotopia.org>
Reviewed-by: Alexander Sverdlin <alexander.sverdlin@siemens.com>
Link: https://patch.msgid.link/c830f130860fd2efae08bfb9e5b25fd028e58ce5.1775424423.git.daniel@makrotopia.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-07 20:16:16 -07:00
Jakub Kicinski
944b3b734c net: avoid nul-deref trying to bind mp to incapable device
Sashiko points out that we use qops in __net_mp_open_rxq()
but never validate they are null. This was introduced when
check was moved from netdev_rx_queue_restart().

Look at ops directly instead of the locking config.
qops imply netdev_need_ops_lock(). We used netdev_need_ops_lock()
initially to signify that the real_num_rx_queues check below
is safe without rtnl_lock, but I'm not sure if this is actually
clear to most people, anyway.

Fixes: da7772a2b4 ("net: move mp->rx_page_size validation to __net_mp_open_rxq()")
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Mina Almasry <almasrymina@google.com>
Link: https://patch.msgid.link/20260404001938.2425670-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-07 18:57:56 -07:00
Johan Alvarado
f2777d5cb5 net: stmmac: dwmac-motorcomm: fix eFUSE MAC address read failure
This patch fixes an issue where reading the MAC address from the eFUSE
fails due to a race condition.

The root cause was identified by comparing the driver's behavior with a
custom U-Boot port. In U-Boot, the MAC address was read successfully
every time because the driver was loaded later in the boot process, giving
the hardware ample time to initialize. In Linux, reading the eFUSE
immediately returns all zeros, resulting in a fallback to a random MAC address.

Hardware cold-boot testing revealed that the eFUSE controller requires a
short settling time to load its internal data. Adding a 2000-5000us
delay after the reset ensures the hardware is fully ready, allowing the
native MAC address to be read consistently.

Fixes: 02ff155ea2 ("net: stmmac: Add glue driver for Motorcomm YT6801 ethernet controller")
Reported-by: Georg Gottleuber <ggo@tuxedocomputers.com>
Closes: https://lore.kernel.org/24cfefff-1233-4745-8c47-812b502d5d19@tuxedocomputers.com
Signed-off-by: Johan Alvarado <contact@c127.dev>
Reviewed-by: Yao Zi <me@ziyao.cc>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://patch.msgid.link/fc5992a4-9532-49c3-8ec1-c2f8c5b84ca1@smtp-relay.sendinblue.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-07 18:21:00 -07:00
John Pavlick
95aca8602e net: sfp: add quirks for Hisense and HSGQ GPON ONT SFP modules
Several GPON ONT SFP sticks based on Realtek RTL960x report
1000BASE-LX at 1300MBd in their EEPROM but can operate at 2500base-X.
On hosts capable of 2500base-X (e.g. Banana Pi R3 / MT7986), the
kernel negotiates only 1G because it trusts the incorrect EEPROM data.

Add quirks for:
- Hisense-Leox LXT-010S-H
- Hisense ZNID-GPON-2311NA
- HSGQ HSGQ-XPON-Stick

Each quirk advertises 2500base-X and ignores TX_FAULT during the
module's ~40s Linux boot time.

Tested on Banana Pi R3 (MT7986) with OpenWrt 25.12.1, confirmed
2.5Gbps link and full throughput with flow offloading.

Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Suggested-by: Marcin Nita <marcin.nita@leolabs.pl>
Signed-off-by: John Pavlick <jspavlick@posteo.net>
Link: https://patch.msgid.link/20260406132321.72563-1-jspavlick@posteo.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-07 18:13:51 -07:00
Muhammad Alifa Ramdhan
a9b8b18364 net/tls: fix use-after-free in -EBUSY error path of tls_do_encryption
The -EBUSY handling in tls_do_encryption(), introduced by commit
8590541473 ("net: tls: handle backlogging of crypto requests"), has
a use-after-free due to double cleanup of encrypt_pending and the
scatterlist entry.

When crypto_aead_encrypt() returns -EBUSY, the request is enqueued to
the cryptd backlog and the async callback tls_encrypt_done() will be
invoked upon completion. That callback unconditionally restores the
scatterlist entry (sge->offset, sge->length) and decrements
ctx->encrypt_pending. However, if tls_encrypt_async_wait() returns an
error, the synchronous error path in tls_do_encryption() performs the
same cleanup again, double-decrementing encrypt_pending and
double-restoring the scatterlist.

The double-decrement corrupts the encrypt_pending sentinel (initialized
to 1), making tls_encrypt_async_wait() permanently skip the wait for
pending async callbacks. A subsequent sendmsg can then free the
tls_rec via bpf_exec_tx_verdict() while a cryptd callback is still
pending, resulting in a use-after-free when the callback fires on the
freed record.

Fix this by skipping the synchronous cleanup when the -EBUSY async
wait returns an error, since the callback has already handled
encrypt_pending and sge restoration.

Fixes: 8590541473 ("net: tls: handle backlogging of crypto requests")
Cc: stable@vger.kernel.org
Signed-off-by: Muhammad Alifa Ramdhan <ramdhan@starlabs.sg>
Reviewed-by: Sabrina Dubroca <sd@queasysnail.net>
Link: https://patch.msgid.link/20260403013617.2838875-1-ramdhan@starlabs.sg
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-04-07 14:53:42 +02:00
Greg Kroah-Hartman
ea245d78de net: rfkill: prevent unlimited numbers of rfkill events from being created
Userspace can create an unlimited number of rfkill events if the system
is so configured, while not consuming them from the rfkill file
descriptor, causing a potential out of memory situation.  Prevent this
from bounding the number of pending rfkill events at a "large" number
(i.e. 1000) to prevent abuses like this.

Cc: Johannes Berg <johannes@sipsolutions.net>
Reported-by: Yuan Tan <yuantan098@gmail.com>
Reported-by: Yifan Wu <yifanwucs@gmail.com>
Reported-by: Juefei Pu <tomapufckgml@gmail.com>
Reported-by: Xin Liu <bird@lzu.edu.cn>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://patch.msgid.link/2026033013-disfigure-scroll-e25e@gregkh
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2026-04-07 12:35:04 +02:00
Johan Hovold
25369b2222 wifi: rt2x00usb: fix devres lifetime
USB drivers bind to USB interfaces and any device managed resources
should have their lifetime tied to the interface rather than parent USB
device. This avoids issues like memory leaks when drivers are unbound
without their devices being physically disconnected (e.g. on probe
deferral or configuration changes).

Fix the USB anchor lifetime so that it is released on driver unbind.

Fixes: 8b4c000931 ("rt2x00usb: Use usb anchor to manage URB")
Cc: stable@vger.kernel.org	# 4.7
Cc: Vishal Thanki <vishalthanki@gmail.com>
Signed-off-by: Johan Hovold <johan@kernel.org>
Acked-by: Stanislaw Gruszka <stf_xl@wp.pl>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://patch.msgid.link/20260327113219.1313748-1-johan@kernel.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2026-04-07 12:34:52 +02:00
Pengpeng Hou
304950a467 wifi: brcmfmac: validate bsscfg indices in IF events
brcmf_fweh_handle_if_event() validates the firmware-provided interface
index before it touches drvr->iflist[], but it still uses the raw
bsscfgidx field as an array index without a matching range check.

Reject IF events whose bsscfg index does not fit in drvr->iflist[]
before indexing the interface array.

Signed-off-by: Pengpeng Hou <pengpeng@iscas.ac.cn>
Acked-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Link: https://patch.msgid.link/20260323074551.93530-1-pengpeng@iscas.ac.cn
[add missing wifi prefix]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2026-04-07 12:34:20 +02:00
Thomas Fourier
12cd763275 wifi: brcmsmac: Fix dma_free_coherent() size
dma_alloc_consistent() may change the size to align it. The new size is
saved in alloced.

Change the free size to match the allocation size.

Fixes: 5b435de0d7 ("net: wireless: add brcm80211 drivers")
Cc: <stable@vger.kernel.org>
Signed-off-by: Thomas Fourier <fourier.thomas@gmail.com>
Acked-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Link: https://patch.msgid.link/20260218130741.46566-3-fourier.thomas@gmail.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2026-04-07 12:34:03 +02:00
Zhengchuan Liang
426c355742 net: af_key: zero aligned sockaddr tail in PF_KEY exports
PF_KEY export paths use `pfkey_sockaddr_size()` when reserving sockaddr
payload space, so IPv6 addresses occupy 32 bytes on the wire. However,
`pfkey_sockaddr_fill()` initializes only the first 28 bytes of
`struct sockaddr_in6`, leaving the final 4 aligned bytes uninitialized.

Not every PF_KEY message is affected. The state and policy dump builders
already zero the whole message buffer before filling the sockaddr
payloads. Keep the fix to the export paths that still append aligned
sockaddr payloads with plain `skb_put()`:

  - `SADB_ACQUIRE`
  - `SADB_X_NAT_T_NEW_MAPPING`
  - `SADB_X_MIGRATE`

Fix those paths by clearing only the aligned sockaddr tail after
`pfkey_sockaddr_fill()`.

Fixes: 1da177e4c3 ("Linux-2.6.12-rc2")
Fixes: 08de61beab ("[PFKEYV2]: Extension for dynamic update of endpoint address(es)")
Reported-by: Yifan Wu <yifanwucs@gmail.com>
Reported-by: Juefei Pu <tomapufckgml@gmail.com>
Co-developed-by: Yuan Tan <yuantan098@gmail.com>
Signed-off-by: Yuan Tan <yuantan098@gmail.com>
Suggested-by: Xin Liu <bird@lzu.edu.cn>
Tested-by: Xiao Liu <lx24@stu.ynu.edu.cn>
Signed-off-by: Zhengchuan Liang <zcliangcn@gmail.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2026-04-07 11:08:24 +02:00
Greg Kroah-Hartman
d10119968d xfrm_user: fix info leak in build_report()
struct xfrm_user_report is a __u8 proto field followed by a struct
xfrm_selector which means there is three "empty" bytes of padding, but
the padding is never zeroed before copying to userspace.  Fix that up by
zeroing the structure before setting individual member variables.

Cc: stable <stable@kernel.org>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Simon Horman <horms@kernel.org>
Assisted-by: gregkh_clanker_t1000
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2026-04-07 10:36:38 +02:00
Greg Kroah-Hartman
1beb76b205 xfrm_user: fix info leak in build_mapping()
struct xfrm_usersa_id has a one-byte padding hole after the proto
field, which ends up never getting set to zero before copying out to
userspace.  Fix that up by zeroing out the whole structure before
setting individual variables.

Fixes: 3a2dfbe8ac ("xfrm: Notify changes in UDP encapsulation via netlink")
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Simon Horman <horms@kernel.org>
Assisted-by: gregkh_clanker_t1000
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2026-04-07 10:36:37 +02:00
Kotlyarov Mihail
83317cce60 xfrm: fix refcount leak in xfrm_migrate_policy_find
syzkaller reported a memory leak in xfrm_policy_alloc:

  BUG: memory leak
  unreferenced object 0xffff888114d79000 (size 1024):
    comm "syz.1.17", pid 931
    ...
    xfrm_policy_alloc+0xb3/0x4b0 net/xfrm/xfrm_policy.c:432

The root cause is a double call to xfrm_pol_hold_rcu() in
xfrm_migrate_policy_find(). The lookup function already returns
a policy with held reference, making the second call redundant.

Remove the redundant xfrm_pol_hold_rcu() call to fix the refcount
imbalance and prevent the memory leak.

Found by Linux Verification Center (linuxtesting.org) with Syzkaller.

Fixes: 563d5ca93e ("xfrm: switch migrate to xfrm_policy_lookup_bytype")
Signed-off-by: Kotlyarov Mihail <mihailkotlyarow@gmail.com>
Reviewed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2026-04-07 10:24:23 +02:00
Qi Tang
1c428b0384 xfrm: hold dev ref until after transport_finish NF_HOOK
After async crypto completes, xfrm_input_resume() calls dev_put()
immediately on re-entry before the skb reaches transport_finish.
The skb->dev pointer is then used inside NF_HOOK and its okfn,
which can race with device teardown.

Remove the dev_put from the async resumption entry and instead
drop the reference after the NF_HOOK call in transport_finish,
using a saved device pointer since NF_HOOK may consume the skb.
This covers NF_DROP, NF_QUEUE and NF_STOLEN paths that skip
the okfn.

For non-transport exits (decaps, gro, drop) and secondary
async return points, release the reference inline when
async is set.

Suggested-by: Florian Westphal <fw@strlen.de>
Fixes: acf568ee85 ("xfrm: Reinject transport-mode packets through tasklet")
Cc: stable@vger.kernel.org
Signed-off-by: Qi Tang <tpluszz77@gmail.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2026-04-07 10:12:40 +02:00
Steffen Klassert
069daad4f2 xfrm: Wait for RCU readers during policy netns exit
xfrm_policy_fini() frees the policy_bydst hash tables after flushing the
policy work items and deleting all policies, but it does not wait for
concurrent RCU readers to leave their read-side critical sections first.

The policy_bydst tables are published via rcu_assign_pointer() and are
looked up through rcu_dereference_check(), so netns teardown must also
wait for an RCU grace period before freeing the table memory.

Fix this by adding synchronize_rcu() before freeing the policy hash tables.

Fixes: e1e551bc56 ("xfrm: policy: prepare policy_bydst hash for rcu lookups")
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Reviewed-by: Florian Westphal <fw@strlen.de>
2026-04-07 10:12:40 +02:00
Michael Guralnik
a9d4f4f6e6 net/mlx5: Update the list of the PCI supported devices
Add the upcoming ConnectX-10 NVLink-C2C device ID to the table of
supported PCI device IDs.

Cc: stable@vger.kernel.org
Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
Reviewed-by: Patrisious Haddad <phaddad@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20260403091756.139583-1-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-06 19:17:42 -07:00
Jiayuan Chen
0f42e3f4fe net: skb: fix cross-cache free of KFENCE-allocated skb head
SKB_SMALL_HEAD_CACHE_SIZE is intentionally set to a non-power-of-2
value (e.g. 704 on x86_64) to avoid collisions with generic kmalloc
bucket sizes. This ensures that skb_kfree_head() can reliably use
skb_end_offset to distinguish skb heads allocated from
skb_small_head_cache vs. generic kmalloc caches.

However, when KFENCE is enabled, kfence_ksize() returns the exact
requested allocation size instead of the slab bucket size. If a caller
(e.g. bpf_test_init) allocates skb head data via kzalloc() and the
requested size happens to equal SKB_SMALL_HEAD_CACHE_SIZE, then
slab_build_skb() -> ksize() returns that exact value. After subtracting
skb_shared_info overhead, skb_end_offset ends up matching
SKB_SMALL_HEAD_HEADROOM, causing skb_kfree_head() to incorrectly free
the object to skb_small_head_cache instead of back to the original
kmalloc cache, resulting in a slab cross-cache free:

  kmem_cache_free(skbuff_small_head): Wrong slab cache. Expected
  skbuff_small_head but got kmalloc-1k

Fix this by always calling kfree(head) in skb_kfree_head(). This keeps
the free path generic and avoids allocator-specific misclassification
for KFENCE objects.

Fixes: bf9f1baa27 ("net: add dedicated kmem_cache for typical/small skb->head")
Reported-by: Antonius <antonius@bluedragonsec.com>
Closes: https://lore.kernel.org/netdev/CAK8a0jxC5L5N7hq-DT2_NhUyjBxrPocoiDazzsBk4TGgT1r4-A@mail.gmail.com/
Signed-off-by: Jiayuan Chen <jiayuan.chen@linux.dev>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20260403014517.142550-1-jiayuan.chen@linux.dev
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-06 18:46:53 -07:00
Stefano Garzarella
24ad7ff668 vsock/test: fix send_buf()/recv_buf() EINTR handling
When send() or recv() returns -1 with errno == EINTR, the code skips
the break but still adds the return value to nwritten/nread, making it
decrease by 1. This leads to wrong buffer offsets and wrong bytes count.

Fix it by explicitly continuing the loop on EINTR, so the return value
is only added when it is positive.

Fixes: a8ed71a27e ("vsock/test: add recv_buf() utility function")
Fixes: 12329bd51f ("vsock/test: add send_buf() utility function")
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Reviewed-by: Luigi Leonardi <leonardi@redhat.com>
Link: https://patch.msgid.link/20260403093251.30662-1-sgarzare@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-06 18:46:03 -07:00
Jakub Kicinski
270c0637b9 Merge branch 'xsk-tailroom-reservation-and-mtu-validation'
Maciej Fijalkowski says:

====================
xsk: tailroom reservation and MTU validation

here we fix a long-standing issue regarding multi-buffer scenario in ZC
mode - we have not been providing space at the end of the buffer where
multi-buffer XDP works on skb_shared_info. This has been brought to our
attention via [0].

Unaligned mode does not get any specific treatment, it is user's
responsibility to properly handle XSK addresses in queues.

With adjustments included here in this set against xskxceiver I have
been able to pass the full test suite on ice.

[0]: https://community.intel.com/t5/Ethernet-Products/X710-XDP-Packet-Corruption-Issue-DRV-MODE-Zero-Copy-Multi-Buffer/m-p/1724208
====================

Link: https://patch.msgid.link/20260402154958.562179-1-maciej.fijalkowski@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-06 18:43:54 -07:00
Maciej Fijalkowski
62838e363e selftests: bpf: adjust rx_dropped xskxceiver's test to respect tailroom
Since we have changed how big user defined headroom in umem can be,
change the logic in testapp_stats_rx_dropped() so we pass updated
headroom validation in xdp_umem_reg() and still drop half of frames.

Test works on non-mbuf setup so __xsk_pool_get_rx_frame_size() that is
called on xsk_rcv_check() will not account skb_shared_info size. Taking
the tailroom size into account in test being fixed is needed as
xdp_umem_reg() defaults to respect it.

Reviewed-by: Björn Töpel <bjorn@kernel.org>
Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://patch.msgid.link/20260402154958.562179-9-maciej.fijalkowski@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-06 18:43:52 -07:00
Maciej Fijalkowski
16546954e1 selftests: bpf: have a separate variable for drop test
Currently two different XDP programs share a static variable for
different purposes (picking where to redirect on shared umem test &
whether to drop a packet). This can be a problem when running full test
suite - idx can be written by shared umem test and this value can cause
a false behavior within XDP drop half test.

Introduce a dedicated variable for drop half test so that these two
don't step on each other toes. There is no real need for using
__sync_fetch_and_add here as XSK tests are executed on single CPU.

Reviewed-by: Björn Töpel <bjorn@kernel.org>
Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://patch.msgid.link/20260402154958.562179-8-maciej.fijalkowski@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-06 18:43:52 -07:00
Maciej Fijalkowski
3197c51ce2 selftests: bpf: fix pkt grow tests
Skip tail adjust tests in xskxceiver for SKB mode as it is not very
friendly for it. multi-buffer case does not work as xdp_rxq_info that is
registered for generic XDP does not report ::frag_size. The non-mbuf
path copies packet via skb_pp_cow_data() which only accounts for
headroom, leaving us with no tailroom and causing underlying XDP prog to
drop packets therefore.

For multi-buffer test on other modes, change the amount of bytes we use
for growth, assume worst-case scenario and take care of headroom and
tailroom.

Reviewed-by: Björn Töpel <bjorn@kernel.org>
Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://patch.msgid.link/20260402154958.562179-7-maciej.fijalkowski@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-06 18:43:51 -07:00
Maciej Fijalkowski
c5866a6be4 selftests: bpf: introduce a common routine for reading procfs
Parametrize current way of getting MAX_SKB_FRAGS value from {sys,proc}fs
so that it can be re-used to get cache line size of system's CPU. All
that just to mimic and compute size of kernel's struct skb_shared_info
which for xsk and test suite interpret as tailroom.

Introduce two variables to ifobject struct that will carry count of skb
frags and tailroom size. Do the reading and computing once, at the
beginning of test suite execution in xskxceiver, but for test_progs such
way is not possible as in this environment each test setups and torns
down ifobject structs.

Reviewed-by: Björn Töpel <bjorn@kernel.org>
Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://patch.msgid.link/20260402154958.562179-6-maciej.fijalkowski@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-06 18:43:51 -07:00
Maciej Fijalkowski
36ee60b569 xsk: validate MTU against usable frame size on bind
AF_XDP bind currently accepts zero-copy pool configurations without
verifying that the device MTU fits into the usable frame space provided
by the UMEM chunk.

This becomes a problem since we started to respect tailroom which is
subtracted from chunk_size (among with headroom). 2k chunk size might
not provide enough space for standard 1500 MTU, so let us catch such
settings at bind time. Furthermore, validate whether underlying HW will
be able to satisfy configured MTU wrt XSK's frame size multiplied by
supported Rx buffer chain length (that is exposed via
net_device::xdp_zc_max_segs).

Fixes: 24ea50127e ("xsk: support mbuf on ZC RX")
Reviewed-by: Björn Töpel <bjorn@kernel.org>
Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://patch.msgid.link/20260402154958.562179-5-maciej.fijalkowski@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-06 18:43:51 -07:00
Maciej Fijalkowski
93e84fe45b xsk: fix XDP_UMEM_SG_FLAG issues
Currently xp_assign_dev_shared() is missing XDP_USE_SG being propagated
to flags so set it in order to preserve mtu check that is supposed to be
done only when no multi-buffer setup is in picture.

Also, this flag has the same value as XDP_UMEM_TX_SW_CSUM so we could
get unexpected SG setups for software Tx checksums. Since csum flag is
UAPI, modify value of XDP_UMEM_SG_FLAG.

Fixes: d609f3d228 ("xsk: add multi-buffer support for sockets sharing umem")
Reviewed-by: Björn Töpel <bjorn@kernel.org>
Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://patch.msgid.link/20260402154958.562179-4-maciej.fijalkowski@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-06 18:43:51 -07:00
Maciej Fijalkowski
1ee1605138 xsk: respect tailroom for ZC setups
Multi-buffer XDP stores information about frags in skb_shared_info that
sits at the tailroom of a packet. The storage space is reserved via
xdp_data_hard_end():

	((xdp)->data_hard_start + (xdp)->frame_sz -	\
	 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))

and then we refer to it via macro below:

static inline struct skb_shared_info *
xdp_get_shared_info_from_buff(const struct xdp_buff *xdp)
{
        return (struct skb_shared_info *)xdp_data_hard_end(xdp);
}

Currently we do not respect this tailroom space in multi-buffer AF_XDP
ZC scenario. To address this, introduce xsk_pool_get_tailroom() and use
it within xsk_pool_get_rx_frame_size() which is used in ZC drivers to
configure length of HW Rx buffer.

Typically drivers on Rx Hw buffers side work on 128 byte alignment so
let us align the value returned by xsk_pool_get_rx_frame_size() in order
to avoid addressing this on driver's side. This addresses the fact that
idpf uses mentioned function *before* pool->dev being set so we were at
risk that after subtracting tailroom we would not provide 128-byte
aligned value to HW.

Since xsk_pool_get_rx_frame_size() is actively used in xsk_rcv_check()
and __xsk_rcv(), add a variant of this routine that will not include 128
byte alignment and therefore old behavior is preserved.

Reviewed-by: Björn Töpel <bjorn@kernel.org>
Acked-by: Stanislav Fomichev <sdf@fomichev.me>
Fixes: 24ea50127e ("xsk: support mbuf on ZC RX")
Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://patch.msgid.link/20260402154958.562179-3-maciej.fijalkowski@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-06 18:43:51 -07:00
Maciej Fijalkowski
a315e022a7 xsk: tighten UMEM headroom validation to account for tailroom and min frame
The current headroom validation in xdp_umem_reg() could leave us with
insufficient space dedicated to even receive minimum-sized ethernet
frame. Furthermore if multi-buffer would come to play then
skb_shared_info stored at the end of XSK frame would be corrupted.

HW typically works with 128-aligned sizes so let us provide this value
as bare minimum.

Multi-buffer setting is known later in the configuration process so
besides accounting for 128 bytes, let us also take care of tailroom space
upfront.

Reviewed-by: Björn Töpel <bjorn@kernel.org>
Acked-by: Stanislav Fomichev <sdf@fomichev.me>
Fixes: 99e3a236dd ("xsk: Add missing check on user supplied headroom size")
Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://patch.msgid.link/20260402154958.562179-2-maciej.fijalkowski@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-06 18:43:51 -07:00
Agalakov Daniil
d3baa34a47 e1000: check return value of e1000_read_eeprom
[Why]
e1000_set_eeprom() performs a read-modify-write operation when the write
range is not word-aligned. This requires reading the first and last words
of the range from the EEPROM to preserve the unmodified bytes.

However, the code does not check the return value of e1000_read_eeprom().
If the read fails, the operation continues using uninitialized data from
eeprom_buff. This results in corrupted data being written back to the
EEPROM for the boundary words.

Add the missing error checks and abort the operation if reading fails.

Found by Linux Verification Center (linuxtesting.org) with SVACE.

Fixes: 1da177e4c3 ("Linux-2.6.12-rc2")
Co-developed-by: Iskhakov Daniil <dish@amicon.ru>
Signed-off-by: Iskhakov Daniil <dish@amicon.ru>
Signed-off-by: Agalakov Daniil <ade@amicon.ru>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-04-06 14:13:38 -07:00
Alex Dvoretsky
b1e0672403 igb: remove napi_synchronize() in igb_down()
When an AF_XDP zero-copy application terminates abruptly (e.g., kill -9),
the XSK buffer pool is destroyed but NAPI polling continues.
igb_clean_rx_irq_zc() repeatedly returns the full budget, preventing
napi_complete_done() from clearing NAPI_STATE_SCHED.

igb_down() calls napi_synchronize() before napi_disable() for each queue
vector. napi_synchronize() spins waiting for NAPI_STATE_SCHED to clear,
which never happens. igb_down() blocks indefinitely, the TX watchdog
fires, and the TX queue remains permanently stalled.

napi_disable() already handles this correctly: it sets NAPI_STATE_DISABLE.
After a full-budget poll, __napi_poll() checks napi_disable_pending(). If
set, it forces completion and clears NAPI_STATE_SCHED, breaking the loop
that napi_synchronize() cannot.

napi_synchronize() was added in commit 41f149a285 ("igb: Fix possible
panic caused by Rx traffic arrival while interface is down").
napi_disable() provides stronger guarantees: it prevents further
scheduling and waits for any active poll to exit.
Other Intel drivers (ixgbe, ice, i40e) use napi_disable() without a
preceding napi_synchronize() in their down paths.

Remove redundant napi_synchronize() call and reorder napi_disable()
before igb_set_queue_napi() so the queue-to-NAPI mapping is only
cleared after polling has fully stopped.

Fixes: 2c6196013f ("igb: Add AF_XDP zero-copy Rx support")
Cc: stable@vger.kernel.org
Suggested-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Alex Dvoretsky <advoretsky@gmail.com>
Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Tested-by: Patryk Holda <patryk.holda@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-04-06 14:13:19 -07:00
Michal Schmidt
4821d563cd ixgbevf: add missing negotiate_features op to Hyper-V ops table
Commit a7075f501b ("ixgbevf: fix mailbox API compatibility by
negotiating supported features") added the .negotiate_features callback
to ixgbe_mac_operations and populated it in ixgbevf_mac_ops, but forgot
to add it to ixgbevf_hv_mac_ops. This leaves the function pointer NULL
on Hyper-V VMs.

During probe, ixgbevf_negotiate_api() calls ixgbevf_set_features(),
which unconditionally dereferences hw->mac.ops.negotiate_features().
On Hyper-V this results in a NULL pointer dereference:

  BUG: kernel NULL pointer dereference, address: 0000000000000000
  [...]
  Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine [...]
  Workqueue: events work_for_cpu_fn
  RIP: 0010:0x0
  [...]
  Call Trace:
   ixgbevf_negotiate_api+0x66/0x160 [ixgbevf]
   ixgbevf_sw_init+0xe4/0x1f0 [ixgbevf]
   ixgbevf_probe+0x20f/0x4a0 [ixgbevf]
   local_pci_probe+0x50/0xa0
   work_for_cpu_fn+0x1a/0x30
   [...]

Add ixgbevf_hv_negotiate_features_vf() that returns -EOPNOTSUPP and
wire it into ixgbevf_hv_mac_ops. The caller already handles -EOPNOTSUPP
gracefully.

Fixes: a7075f501b ("ixgbevf: fix mailbox API compatibility by negotiating supported features")
Reported-by: Xiaoqiang Xiong <xxiong@redhat.com>
Closes: https://issues.redhat.com/browse/RHEL-155455
Assisted-by: Claude:claude-4.6-opus-high Cursor
Tested-by: Xiaoqiang Xiong <xxiong@redhat.com>
Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-04-06 13:39:23 -07:00
Aleksandr Loktionov
d8ae40dc20 ixgbe: stop re-reading flash on every get_drvinfo for e610
ixgbe_get_drvinfo() calls ixgbe_refresh_fw_version() on every ethtool
query for e610 adapters.  That ends up in ixgbe_discover_flash_size(),
which bisects the full 16 MB NVM space issuing one ACI command per
step (~20 ms each, ~24 steps total = ~500 ms).

Profiling on an idle E610-XAT2 system with telegraf scraping ethtool
stats every 10 seconds:

  kretprobe:ixgbe_get_drvinfo took 527603 us
  kretprobe:ixgbe_get_drvinfo took 523978 us
  kretprobe:ixgbe_get_drvinfo took 552975 us
  kretprobe:ice_get_drvinfo   took       3 us
  kretprobe:igb_get_drvinfo   took       2 us
  kretprobe:i40e_get_drvinfo  took       5 us

The half-second stall happens under the RTNL lock, causing visible
latency on ip-link and friends.

The FW version can only change after an EMPR reset.  All flash data is
already populated at probe time and the cached adapter->eeprom_id is
what get_drvinfo should be returning.  The only place that needs to
trigger a re-read is ixgbe_devlink_reload_empr_finish(), right after
the EMPR completes and new firmware is running.  Additionally, refresh
the FW version in ixgbe_reinit_locked() so that any PF that undergoes a
reinit after an EMPR (e.g. triggered by another PF's devlink reload)
also picks up the new version in adapter->eeprom_id.

ixgbe_devlink_info_get() keeps its refresh call for explicit
"devlink dev info" queries, which is fine given those are user-initiated.

Fixes: c9e563cae1 ("ixgbe: add support for devlink reload")
Co-developed-by: Jedrzej Jagielski <jedrzej.jagielski@intel.com>
Signed-off-by: Jedrzej Jagielski <jedrzej.jagielski@intel.com>
Signed-off-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Tested-by: Rinitha S <sx.rinitha@intel.com> (A Contingent worker at Intel)
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-04-06 13:39:23 -07:00
Petr Oros
bf6dbadb72 ice: fix PTP timestamping broken by SyncE code on E825C
The E825C SyncE support added in commit ad1df4f2d5 ("ice: dpll:
Support E825-C SyncE and dynamic pin discovery") introduced a SyncE
reconfiguration block in ice_ptp_link_change() that prevents
ice_ptp_port_phy_restart() from being called in several error paths.
Without the PHY restart, PTP timestamps stop working after any link
change event.

There are three ways the PHY restart gets blocked:

1. When DPLL initialization fails (e.g. missing ACPI firmware node
   properties), ICE_FLAG_DPLL is not set and the function returns early
   before reaching the PHY restart.

2. When ice_tspll_bypass_mux_active_e825c() fails to read the CGU
   register, WARN_ON_ONCE fires and the function returns early.

3. When ice_tspll_cfg_synce_ethdiv_e825c() fails to configure the
   clock divider for an active pin, same early return.

SyncE and PTP are independent features. SyncE reconfiguration failures
must not prevent the PTP PHY restart that is essential for timestamp
recovery after link changes.

Fix by making the entire SyncE block conditional on ICE_FLAG_DPLL
without an early return, and replacing the WARN_ON_ONCE + return error
handling inside the loop with dev_err_once + break. The function always
proceeds to ice_ptp_port_phy_restart() regardless of SyncE errors.

Fixes: ad1df4f2d5 ("ice: dpll: Support E825-C SyncE and dynamic pin discovery")
Signed-off-by: Petr Oros <poros@redhat.com>
Reviewed-by: Grzegorz Nitka <grzegorz.nitka@intel.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Tested-by: Sunitha Mekala <sunithax.d.mekala@intel.com> (A Contingent worker at Intel)
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-04-06 13:39:23 -07:00
Kohei Enju
bb3f21edc7 ice: ptp: don't WARN when controlling PF is unavailable
In VFIO passthrough setups, it is possible to pass through only a PF
which doesn't own the source timer. In that case the PTP controlling PF
(adapter->ctrl_pf) is never initialized in the VM, so ice_get_ctrl_ptp()
returns NULL and triggers WARN_ON() in ice_ptp_setup_pf().

Since this is an expected behavior in that configuration, replace
WARN_ON() with an informational message and return -EOPNOTSUPP.

Fixes: e800654e85 ("ice: Use ice_adapter for PTP shared data instead of auxdev")
Signed-off-by: Kohei Enju <kohei@enjuk.jp>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-04-06 13:39:23 -07:00
Emil Tantilov
8e2a2420e2 idpf: set the payload size before calling the async handler
Set the payload size before forwarding the reply to the async handler.
Without this, xn->reply_sz will be 0 and idpf_mac_filter_async_handler()
will never get past the size check.

Fixes: 34c21fa894 ("idpf: implement virtchnl transaction manager")
Cc: stable@vger.kernel.org
Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Reviewed-by: Li Li <boolli@google.com>
Acked-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Tested-by: Samuel Salin <Samuel.salin@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-04-06 13:39:15 -07:00
Emil Tantilov
d086fae650 idpf: improve locking around idpf_vc_xn_push_free()
Protect the set_bit() operation for the free_xn bitmask in
idpf_vc_xn_push_free(), to make the locking consistent with rest of the
code and avoid potential races in that logic.

Fixes: 34c21fa894 ("idpf: implement virtchnl transaction manager")
Cc: stable@vger.kernel.org
Reported-by: Ray Zhang <sgzhang@google.com>
Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Acked-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Tested-by: Samuel Salin <Samuel.salin@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-04-06 13:39:08 -07:00
Emil Tantilov
5914781182 idpf: fix PREEMPT_RT raw/bh spinlock nesting for async VC handling
Switch from using the completion's raw spinlock to a local lock in the
idpf_vc_xn struct. The conversion is safe because complete/_all() are
called outside the lock and there is no reason to share the completion
lock in the current logic. This avoids invalid wait context reported by
the kernel due to the async handler taking BH spinlock:

[  805.726977] =============================
[  805.726991] [ BUG: Invalid wait context ]
[  805.727006] 7.0.0-rc2-net-devq-031026+ #28 Tainted: G S         OE
[  805.727026] -----------------------------
[  805.727038] kworker/u261:0/572 is trying to lock:
[  805.727051] ff190da6a8dbb6a0 (&vport_config->mac_filter_list_lock){+...}-{3:3}, at: idpf_mac_filter_async_handler+0xe9/0x260 [idpf]
[  805.727099] other info that might help us debug this:
[  805.727111] context-{5:5}
[  805.727119] 3 locks held by kworker/u261:0/572:
[  805.727132]  #0: ff190da6db3e6148 ((wq_completion)idpf-0000:83:00.0-mbx){+.+.}-{0:0}, at: process_one_work+0x4b5/0x730
[  805.727163]  #1: ff3c6f0a6131fe50 ((work_completion)(&(&adapter->mbx_task)->work)){+.+.}-{0:0}, at: process_one_work+0x1e5/0x730
[  805.727191]  #2: ff190da765190020 (&x->wait#34){+.+.}-{2:2}, at: idpf_recv_mb_msg+0xc8/0x710 [idpf]
[  805.727218] stack backtrace:
...
[  805.727238] Workqueue: idpf-0000:83:00.0-mbx idpf_mbx_task [idpf]
[  805.727247] Call Trace:
[  805.727249]  <TASK>
[  805.727251]  dump_stack_lvl+0x77/0xb0
[  805.727259]  __lock_acquire+0xb3b/0x2290
[  805.727268]  ? __irq_work_queue_local+0x59/0x130
[  805.727275]  lock_acquire+0xc6/0x2f0
[  805.727277]  ? idpf_mac_filter_async_handler+0xe9/0x260 [idpf]
[  805.727284]  ? _printk+0x5b/0x80
[  805.727290]  _raw_spin_lock_bh+0x38/0x50
[  805.727298]  ? idpf_mac_filter_async_handler+0xe9/0x260 [idpf]
[  805.727303]  idpf_mac_filter_async_handler+0xe9/0x260 [idpf]
[  805.727310]  idpf_recv_mb_msg+0x1c8/0x710 [idpf]
[  805.727317]  process_one_work+0x226/0x730
[  805.727322]  worker_thread+0x19e/0x340
[  805.727325]  ? __pfx_worker_thread+0x10/0x10
[  805.727328]  kthread+0xf4/0x130
[  805.727333]  ? __pfx_kthread+0x10/0x10
[  805.727336]  ret_from_fork+0x32c/0x410
[  805.727345]  ? __pfx_kthread+0x10/0x10
[  805.727347]  ret_from_fork_asm+0x1a/0x30
[  805.727354]  </TASK>

Fixes: 34c21fa894 ("idpf: implement virtchnl transaction manager")
Cc: stable@vger.kernel.org
Suggested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reported-by: Ray Zhang <sgzhang@google.com>
Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Acked-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Tested-by: Samuel Salin <Samuel.salin@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-04-06 13:38:48 -07:00
Haoze Xie
82d8701b2c batman-adv: hold claim backbone gateways by reference
batadv_bla_add_claim() can replace claim->backbone_gw and drop the old
gateway's last reference while readers still follow the pointer.

The netlink claim dump path dereferences claim->backbone_gw->orig and
takes claim->backbone_gw->crc_lock without pinning the underlying
backbone gateway. batadv_bla_check_claim() still has the same naked
pointer access pattern.

Reuse batadv_bla_claim_get_backbone_gw() in both readers so they operate
on a stable gateway reference until the read-side work is complete.
This keeps the dump and claim-check paths aligned with the lifetime
rules introduced for the other BLA claim readers.

Fixes: 23721387c4 ("batman-adv: add basic bridge loop avoidance code")
Fixes: 04f3f5bf18 ("batman-adv: add B.A.T.M.A.N. Dump BLA claims via netlink")
Cc: stable@vger.kernel.org
Reported-by: Yifan Wu <yifanwucs@gmail.com>
Reported-by: Juefei Pu <tomapufckgml@gmail.com>
Co-developed-by: Yuan Tan <yuantan098@gmail.com>
Signed-off-by: Yuan Tan <yuantan098@gmail.com>
Suggested-by: Xin Liu <bird@lzu.edu.cn>
Signed-off-by: Haoze Xie <royenheart@gmail.com>
Signed-off-by: Ao Zhou <n05ec@lzu.edu.cn>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
2026-04-06 15:42:29 +02:00
Jakub Kicinski
1caa871bb0 Merge branch 'net-stmmac-fix-tegra234-mgbe-clock'
Jon Hunter says:

====================
net: stmmac: Fix Tegra234 MGBE clock

The name of the PTP ref clock for the Tegra234 MGBE ethernet controller
does not match the generic name in the stmmac platform driver. Despite
this basic ethernet is functional on the Tegra234 platforms that use
this driver and as far as I know, we have not tested PTP support with
this driver. Hence, the risk of breaking any functionality is low.

The previous attempt to fix this in the stmmac platform driver, by
supporting the Tegra234 PTP clock name, was rejected [0]. The preference
from the netdev maintainers is to fix this in the DT binding for
Tegra234.

This series fixes this by correcting the device-tree binding to align
with the generic name for the PTP clock. I understand that this is
breaking the ABI for this device, which we should never do, but this
is a last resort for getting this fixed. I am open to any better ideas
to fix this. Please note that we still maintain backward compatibility
in the driver to allow older device-trees to work, but we don't
advertise this via the binding, because I did not see any value in doing
so.
====================

Link: https://patch.msgid.link/20260401102941.17466-1-jonathanh@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-03 16:02:31 -07:00
Jon Hunter
fb22b1fc5b dt-bindings: net: Fix Tegra234 MGBE PTP clock
The PTP clock for the Tegra234 MGBE device is incorrectly named
'ptp-ref' and should be 'ptp_ref'. This is causing the following
warning to be observed on Tegra234 platforms that use this device:

 ERR KERN tegra-mgbe 6800000.ethernet eth0: Invalid PTP clock rate
 WARNING KERN tegra-mgbe 6800000.ethernet eth0: PTP init failed

Although this constitutes an ABI breakage in the binding for this
device, PTP support has clearly never worked and so fix this now
so we can correct the device-tree for this device. Note that the
MGBE driver still supports the legacy 'ptp-ref' clock name and so
older/existing device-trees will still work, but given that this
is not the correct name, there is no point to advertise this in the
binding.

Fixes: 189c2e5c76 ("dt-bindings: net: Add Tegra234 MGBE")
Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
Link: https://patch.msgid.link/20260401102941.17466-3-jonathanh@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-03 16:02:30 -07:00
Jon Hunter
1345e9f4e3 net: stmmac: Fix PTP ref clock for Tegra234
Since commit 030ce919e1 ("net: stmmac: make sure that ptp_rate is not
0 before configuring timestamping") was added the following error is
observed on Tegra234:

 ERR KERN tegra-mgbe 6800000.ethernet eth0: Invalid PTP clock rate
 WARNING KERN tegra-mgbe 6800000.ethernet eth0: PTP init failed

It turns out that the Tegra234 device-tree binding defines the PTP ref
clock name as 'ptp-ref' and not 'ptp_ref' and the above commit now
exposes this and that the PTP clock is not configured correctly.

In order to update device-tree to use the correct 'ptp_ref' name, update
the Tegra MGBE driver to use 'ptp_ref' by default and fallback to using
'ptp-ref' if this clock name is present.

Fixes: d8ca113724 ("net: stmmac: tegra: Add MGBE support")
Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20260401102941.17466-2-jonathanh@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-03 16:02:21 -07:00
Pengpeng Hou
5c14a19d5b nfc: s3fwrn5: allocate rx skb before consuming bytes
s3fwrn82_uart_read() reports the number of accepted bytes to the serdev
core. The current code consumes bytes into recv_skb and may already
deliver a complete frame before allocating a fresh receive buffer.

If that alloc_skb() fails, the callback returns 0 even though it has
already consumed bytes, and it leaves recv_skb as NULL for the next
receive callback. That breaks the receive_buf() accounting contract and
can also lead to a NULL dereference on the next skb_put_u8().

Allocate the receive skb lazily before consuming the next byte instead.
If allocation fails, return the number of bytes already accepted.

Fixes: 3f52c2cb7e ("nfc: s3fwrn5: Support a UART interface")
Signed-off-by: Pengpeng Hou <pengpeng@iscas.ac.cn>
Link: https://patch.msgid.link/20260402042148.65236-1-pengpeng@iscas.ac.cn
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-03 15:57:46 -07:00
Chris J Arges
77facb3522 net: increase IP_TUNNEL_RECURSION_LIMIT to 5
In configurations with multiple tunnel layers and MPLS lwtunnel routing, a
single tunnel hop can increment the counter beyond this limit. This causes
packets to be dropped with the "Dead loop on virtual device" message even
when a routing loop doesn't exist.

Increase IP_TUNNEL_RECURSION_LIMIT from 4 to 5 to handle this use-case.

Fixes: 6f1a9140ec ("net: add xmit recursion limit to tunnel xmit functions")
Link: https://lore.kernel.org/netdev/88deb91b-ef1b-403c-8eeb-0f971f27e34f@redhat.com/
Signed-off-by: Chris J Arges <carges@cloudflare.com>
Link: https://patch.msgid.link/20260402222401.3408368-1-carges@cloudflare.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-03 15:52:10 -07:00
Yiqi Sun
fde29fd934 ipv4: icmp: fix null-ptr-deref in icmp_build_probe()
ipv6_stub->ipv6_dev_find() may return ERR_PTR(-EAFNOSUPPORT) when the
IPv6 stack is not active (CONFIG_IPV6=m and not loaded), and passing
this error pointer to dev_hold() will cause a kernel crash with
null-ptr-deref.

Instead, silently discard the request. RFC 8335 does not appear to
define a specific response for the case where an IPv6 interface
identifier is syntactically valid but the implementation cannot perform
the lookup at runtime, and silently dropping the request may safer than
misreporting "No Such Interface".

Fixes: d329ea5bd8 ("icmp: add response to RFC 8335 PROBE messages")
Signed-off-by: Yiqi Sun <sunyiqixm@gmail.com>
Link: https://patch.msgid.link/20260402070419.2291578-1-sunyiqixm@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-03 15:46:17 -07:00
Fernando Fernandez Mancera
14cf0cd353 ipv4: nexthop: allocate skb dynamically in rtm_get_nexthop()
When querying a nexthop object via RTM_GETNEXTHOP, the kernel currently
allocates a fixed-size skb using NLMSG_GOODSIZE. While sufficient for
single nexthops and small Equal-Cost Multi-Path groups, this fixed
allocation fails for large nexthop groups like 512 nexthops.

This results in the following warning splat:

 WARNING: net/ipv4/nexthop.c:3395 at rtm_get_nexthop+0x176/0x1c0, CPU#20: rep/4608
 [...]
 RIP: 0010:rtm_get_nexthop (net/ipv4/nexthop.c:3395)
 [...]
 Call Trace:
  <TASK>
  rtnetlink_rcv_msg (net/core/rtnetlink.c:6989)
  netlink_rcv_skb (net/netlink/af_netlink.c:2550)
  netlink_unicast (net/netlink/af_netlink.c:1319 net/netlink/af_netlink.c:1344)
  netlink_sendmsg (net/netlink/af_netlink.c:1894)
  ____sys_sendmsg (net/socket.c:721 net/socket.c:736 net/socket.c:2585)
  ___sys_sendmsg (net/socket.c:2641)
  __sys_sendmsg (net/socket.c:2671)
  do_syscall_64 (arch/x86/entry/syscall_64.c:63 arch/x86/entry/syscall_64.c:94)
  entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)
  </TASK>

Fix this by allocating the size dynamically using nh_nlmsg_size() and
using nlmsg_new(), this is consistent with nexthop_notify() behavior. In
addition, adjust nh_nlmsg_size_grp() so it calculates the size needed
based on flags passed. While at it, also add the size of NHA_FDB for
nexthop group size calculation as it was missing too.

This cannot be reproduced via iproute2 as the group size is currently
limited and the command fails as follows:

addattr_l ERROR: message exceeded bound of 1048

Fixes: 430a049190 ("nexthop: Add support for nexthop groups")
Reported-by: Yiming Qian <yimingqian591@gmail.com>
Closes: https://lore.kernel.org/netdev/CAL_bE8Li2h4KO+AQFXW4S6Yb_u5X4oSKnkywW+LPFjuErhqELA@mail.gmail.com/
Signed-off-by: Fernando Fernandez Mancera <fmancera@suse.de>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Link: https://patch.msgid.link/20260402072613.25262-2-fmancera@suse.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-03 15:34:27 -07:00
Fernando Fernandez Mancera
06aaf04ca8 ipv4: nexthop: avoid duplicate NHA_HW_STATS_ENABLE on nexthop group dump
Currently NHA_HW_STATS_ENABLE is included twice everytime a dump of
nexthop group is performed with NHA_OP_FLAG_DUMP_STATS. As all the stats
querying were moved to nla_put_nh_group_stats(), leave only that
instance of the attribute querying.

Fixes: 5072ae00ae ("net: nexthop: Expose nexthop group HW stats to user space")
Signed-off-by: Fernando Fernandez Mancera <fmancera@suse.de>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Link: https://patch.msgid.link/20260402072613.25262-1-fmancera@suse.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-03 15:34:27 -07:00
Pengpeng Hou
b76254c55d net: qualcomm: qca_uart: report the consumed byte on RX skb allocation failure
qca_tty_receive() consumes each input byte before checking whether a
completed frame needs a fresh receive skb. When the current byte completes
a frame, the driver delivers that frame and then allocates a new skb for
the next one.

If that allocation fails, the current code returns i even though data[i]
has already been consumed and may already have completed the delivered
frame. Since serdev interprets the return value as the number of accepted
bytes, this under-reports progress by one byte and can replay the final
byte of the completed frame into a fresh parser state on the next call.

Return i + 1 in that failure path so the accepted-byte count matches the
actual receive-state progress.

Fixes: dfc768fbe6 ("net: qualcomm: add QCA7000 UART driver")
Cc: stable@vger.kernel.org
Signed-off-by: Pengpeng Hou <pengpeng@iscas.ac.cn>
Reviewed-by: Stefan Wahren <wahrenst@gmx.net>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20260402071207.4036-1-pengpeng@iscas.ac.cn
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-03 15:32:56 -07:00
Oleh Konko
48a5fe3877 tipc: fix bc_ackers underflow on duplicate GRP_ACK_MSG
The GRP_ACK_MSG handler in tipc_group_proto_rcv() currently decrements
bc_ackers on every inbound group ACK, even when the same member has
already acknowledged the current broadcast round.

Because bc_ackers is a u16, a duplicate ACK received after the last
legitimate ACK wraps the counter to 65535. Once wrapped,
tipc_group_bc_cong() keeps reporting congestion and later group
broadcasts on the affected socket stay blocked until the group is
recreated.

Fix this by ignoring duplicate or stale ACKs before touching bc_acked or
bc_ackers. This makes repeated GRP_ACK_MSG handling idempotent and
prevents the underflow path.

Fixes: 2f487712b8 ("tipc: guarantee that group broadcast doesn't bypass group unicast")
Cc: stable@vger.kernel.org
Signed-off-by: Oleh Konko <security@1seal.org>
Reviewed-by: Tung Nguyen <tung.quang.nguyen@est.tech>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/41a4833f368641218e444fdcff822039.security@1seal.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-03 15:31:17 -07:00
Nikolaos Gkarlis
7b735ef812 rtnetlink: add missing netlink_ns_capable() check for peer netns
rtnl_newlink() lacks a CAP_NET_ADMIN capability check on the peer
network namespace when creating paired devices (veth, vxcan,
netkit). This allows an unprivileged user with a user namespace
to create interfaces in arbitrary network namespaces, including
init_net.

Add a netlink_ns_capable() check for CAP_NET_ADMIN in the peer
namespace before allowing device creation to proceed.

Fixes: 81adee47df ("net: Support specifying the network namespace upon device creation.")
Signed-off-by: Nikolaos Gkarlis <nickgarlis@gmail.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20260402181432.4126920-1-nickgarlis@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-03 15:07:18 -07:00
Zijing Yin
1979645e18 bridge: guard local VLAN-0 FDB helpers against NULL vlan group
When CONFIG_BRIDGE_VLAN_FILTERING is not set, br_vlan_group() and
nbp_vlan_group() return NULL (br_private.h stub definitions). The
BR_BOOLOPT_FDB_LOCAL_VLAN_0 toggle code is compiled unconditionally and
reaches br_fdb_delete_locals_per_vlan_port() and
br_fdb_insert_locals_per_vlan_port(), where the NULL vlan group pointer
is dereferenced via list_for_each_entry(v, &vg->vlan_list, vlist).

The observed crash is in the delete path, triggered when creating a
bridge with IFLA_BR_MULTI_BOOLOPT containing BR_BOOLOPT_FDB_LOCAL_VLAN_0
via RTM_NEWLINK. The insert helper has the same bug pattern.

  Oops: general protection fault, probably for non-canonical address 0xdffffc0000000056: 0000 [#1] KASAN NOPTI
  KASAN: null-ptr-deref in range [0x00000000000002b0-0x00000000000002b7]
  RIP: 0010:br_fdb_delete_locals_per_vlan+0x2b9/0x310
  Call Trace:
   br_fdb_toggle_local_vlan_0+0x452/0x4c0
   br_toggle_fdb_local_vlan_0+0x31/0x80 net/bridge/br.c:276
   br_boolopt_toggle net/bridge/br.c:313
   br_boolopt_multi_toggle net/bridge/br.c:364
   br_changelink net/bridge/br_netlink.c:1542
   br_dev_newlink net/bridge/br_netlink.c:1575

Add NULL checks for the vlan group pointer in both helpers, returning
early when there are no VLANs to iterate. This matches the existing
pattern used by other bridge FDB functions such as br_fdb_add() and
br_fdb_delete().

Fixes: 21446c06b4 ("net: bridge: Introduce UAPI for BR_BOOLOPT_FDB_LOCAL_VLAN_0")
Signed-off-by: Zijing Yin <yzjaurora@gmail.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Link: https://patch.msgid.link/20260402140153.3925663-1-yzjaurora@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-03 14:45:51 -07:00
Eric Dumazet
4e65a8b8da ipv6: ioam: fix potential NULL dereferences in __ioam6_fill_trace_data()
We need to check __in6_dev_get() for possible NULL value, as
suggested by Yiming Qian.

Also add skb_dst_dev_rcu() instead of skb_dst_dev(),
and two missing READ_ONCE().

Note that @dev can't be NULL.

Fixes: 9ee11f0fff ("ipv6: ioam: Data plane support for Pre-allocated Trace")
Reported-by: Yiming Qian <yimingqian591@gmail.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Justin Iurman <justin.iurman@gmail.com>
Link: https://patch.msgid.link/20260402101732.1188059-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-03 14:44:43 -07:00
Lorenzo Bianconi
285fa6b1e0 net: airoha: Fix memory leak in airoha_qdma_rx_process()
If an error occurs on the subsequents buffers belonging to the
non-linear part of the skb (e.g. due to an error in the payload length
reported by the NIC or if we consumed all the available fragments for
the skb), the page_pool fragment will not be linked to the skb so it will
not return to the pool in the airoha_qdma_rx_process() error path. Fix the
memory leak partially reverting commit 'd6d2b0e1538d ("net: airoha: Fix
page recycling in airoha_qdma_rx_process()")' and always running
page_pool_put_full_page routine in the airoha_qdma_rx_process() error
path.

Fixes: d6d2b0e153 ("net: airoha: Fix page recycling in airoha_qdma_rx_process()")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20260402-airoha_qdma_rx_process-mem-leak-fix-v1-1-b5706f402d3c@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-03 14:42:51 -07:00
Eric Dumazet
b120e4432f net: lapbether: handle NETDEV_PRE_TYPE_CHANGE
lapbeth_data_transmit() expects the underlying device type
to be ARPHRD_ETHER.

Returning NOTIFY_BAD from lapbeth_device_event() makes sure
bonding driver can not break this expectation.

Fixes: 872254dd6b ("net/bonding: Enable bonding to enslave non ARPHRD_ETHER")
Reported-by: syzbot+d8c285748fa7292580a9@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/netdev/69cd22a1.050a0220.70c3a.0002.GAE@google.com/T/#u
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Martin Schiller <ms@dev.tdt.de>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20260402103519.1201565-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-03 14:40:36 -07:00
Arnd Bergmann
e16a0d3677 net: fec: make FIXED_PHY dependency unconditional
When CONFIG_FIXED_PHY is in a loadable module, the fec driver cannot be
built-in any more:

x86_64-linux-ld: vmlinux.o: in function `fec_enet_mii_probe':
fec_main.c:(.text+0xc4f367): undefined reference to `fixed_phy_unregister'
x86_64-linux-ld: vmlinux.o: in function `fec_enet_close':
fec_main.c:(.text+0xc59591): undefined reference to `fixed_phy_unregister'
x86_64-linux-ld: vmlinux.o: in function `fec_enet_mii_probe.cold':

Select the fixed phy support on all targets to make this build
correctly, not just on coldfire.

Notat that Essentially the stub helpers in include/linux/phy_fixed.h
cannot be used correctly because of this build time dependency,
and we could just remove them to hit the build failure more often
when a driver uses them without the 'select FIXED_PHY'.

Fixes: dc86b621e1 ("net: fec: register a fixed phy using fixed_phy_register_100fd if needed")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20260402141048.2713445-1-arnd@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-03 14:39:37 -07:00
Ruide Cao
c842743d07 net: sched: act_csum: validate nested VLAN headers
tcf_csum_act() walks nested VLAN headers directly from skb->data when an
skb still carries in-payload VLAN tags. The current code reads
vlan->h_vlan_encapsulated_proto and then pulls VLAN_HLEN bytes without
first ensuring that the full VLAN header is present in the linear area.

If only part of an inner VLAN header is linearized, accessing
h_vlan_encapsulated_proto reads past the linear area, and the following
skb_pull(VLAN_HLEN) may violate skb invariants.

Fix this by requiring pskb_may_pull(skb, VLAN_HLEN) before accessing and
pulling each nested VLAN header. If the header still is not fully
available, drop the packet through the existing error path.

Fixes: 2ecba2d1e4 ("net: sched: act_csum: Fix csum calc for tagged packets")
Reported-by: Yifan Wu <yifanwucs@gmail.com>
Reported-by: Juefei Pu <tomapufckgml@gmail.com>
Co-developed-by: Yuan Tan <yuantan098@gmail.com>
Signed-off-by: Yuan Tan <yuantan098@gmail.com>
Suggested-by: Xin Liu <bird@lzu.edu.cn>
Tested-by: Ren Wei <enjou1224z@gmail.com>
Signed-off-by: Ruide Cao <caoruide123@gmail.com>
Signed-off-by: Ren Wei <n05ec@lzu.edu.cn>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/22df2fcb49f410203eafa5d97963dd36089f4ecf.1774892775.git.caoruide123@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-03 14:34:56 -07:00
Tyllis Xu
51f4e090b9 net: stmmac: fix integer underflow in chain mode
The jumbo_frm() chain-mode implementation unconditionally computes

    len = nopaged_len - bmax;

where nopaged_len = skb_headlen(skb) (linear bytes only) and bmax is
BUF_SIZE_8KiB or BUF_SIZE_2KiB.  However, the caller stmmac_xmit()
decides to invoke jumbo_frm() based on skb->len (total length including
page fragments):

    is_jumbo = stmmac_is_jumbo_frm(priv, skb->len, enh_desc);

When a packet has a small linear portion (nopaged_len <= bmax) but a
large total length due to page fragments (skb->len > bmax), the
subtraction wraps as an unsigned integer, producing a huge len value
(~0xFFFFxxxx).  This causes the while (len != 0) loop to execute
hundreds of thousands of iterations, passing skb->data + bmax * i
pointers far beyond the skb buffer to dma_map_single().  On IOMMU-less
SoCs (the typical deployment for stmmac), this maps arbitrary kernel
memory to the DMA engine, constituting a kernel memory disclosure and
potential memory corruption from hardware.

Fix this by introducing a buf_len local variable clamped to
min(nopaged_len, bmax).  Computing len = nopaged_len - buf_len is then
always safe: it is zero when the linear portion fits within a single
descriptor, causing the while (len != 0) loop to be skipped naturally,
and the fragment loop in stmmac_xmit() handles page fragments afterward.

Fixes: 286a837217 ("stmmac: add CHAINED descriptor mode support (V4)")
Cc: stable@vger.kernel.org
Signed-off-by: Tyllis Xu <LivelyCarpet87@gmail.com>
Link: https://patch.msgid.link/20260401044708.1386919-1-LivelyCarpet87@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-02 18:28:10 -07:00
David Carlier
6dede39676 net: altera-tse: fix skb leak on DMA mapping error in tse_start_xmit()
When dma_map_single() fails in tse_start_xmit(), the function returns
NETDEV_TX_OK without freeing the skb. Since NETDEV_TX_OK tells the
stack the packet was consumed, the skb is never freed, leaking memory
on every DMA mapping failure.

Add dev_kfree_skb_any() before returning to properly free the skb.

Fixes: bbd2190ce9 ("Altera TSE: Add main and header file for Altera Ethernet Driver")
Cc: stable@vger.kernel.org
Signed-off-by: David Carlier <devnexen@gmail.com>
Link: https://patch.msgid.link/20260401211218.279185-1-devnexen@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-02 18:25:23 -07:00
Allison Henderson
e9c9f084cd MAINTAINERS: Update email for Allison Henderson
Switch active email address to kernel.org alias

Signed-off-by: Allison Henderson <achender@kernel.org>
Link: https://patch.msgid.link/20260402005833.38376-1-achender@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-02 18:06:03 -07:00
Qingfang Deng
2fd68c7ea2 MAINTAINERS: orphan PPP over Ethernet driver
We haven't seen activities from Michal Ostrowski for quite a long time.
The last commit from him is fb64bb560e ("PPPoE: Fix flush/close
races."), which was in 2009. Email to mostrows@earthlink.net also
bounces.

Signed-off-by: Qingfang Deng <dqfext@gmail.com>
Link: https://patch.msgid.link/20260401022842.15082-1-dqfext@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-02 18:03:47 -07:00
Linus Torvalds
f8f5627a8a Merge tag 'net-7.0-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Jakub Kicinski:
 "With fixes from wireless, bluetooth and netfilter included we're back
  to each PR carrying 30%+ more fixes than in previous era.

  The good news is that so far none of the "extra" fixes are themselves
  causing real regressions. Not sure how much comfort that is.

  Current release - fix to a fix:

   - netdevsim: fix build if SKB_EXTENSIONS=n

   - eth: stmmac: skip VLAN restore when VLAN hash ops are missing

  Previous releases - regressions:

   - wifi: iwlwifi: mvm: don't send a 6E related command when
     not supported

  Previous releases - always broken:

   - some info leak fixes

   - add missing clearing of skb->cb[] on ICMP paths from tunnels

   - ipv6:
      - flowlabel: defer exclusive option free until RCU teardown
      - avoid overflows in ip6_datagram_send_ctl()

   - mpls: add seqcount to protect platform_labels from OOB access

   - bridge: improve safety of parsing ND options

   - bluetooth: fix leaks, overflows and races in hci_sync

   - netfilter: add more input validation, some to address bugs directly
     some to prevent exploits from cooking up broken configurations

   - wifi:
      - ath: avoid poor performance due to stopping the wrong
        aggregation session
      - virt_wifi: remove SET_NETDEV_DEV to avoid use-after-free

   - eth:
      - fec: fix the PTP periodic output sysfs interface
      - enetc: safely reinitialize TX BD ring when it has unsent frames"

* tag 'net-7.0-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (95 commits)
  eth: fbnic: Increase FBNIC_QUEUE_SIZE_MIN to 64
  ipv6: avoid overflows in ip6_datagram_send_ctl()
  net: hsr: fix VLAN add unwind on slave errors
  net: hsr: serialize seq_blocks merge across nodes
  vsock: initialize child_ns_mode_locked in vsock_net_init()
  selftests/tc-testing: add tests for cls_fw and cls_flow on shared blocks
  net/sched: cls_flow: fix NULL pointer dereference on shared blocks
  net/sched: cls_fw: fix NULL pointer dereference on shared blocks
  net/x25: Fix overflow when accumulating packets
  net/x25: Fix potential double free of skb
  bnxt_en: Restore default stat ctxs for ULP when resource is available
  bnxt_en: Don't assume XDP is never enabled in bnxt_init_dflt_ring_mode()
  bnxt_en: Refactor some basic ring setup and adjustment logic
  net/mlx5: Fix switchdev mode rollback in case of failure
  net/mlx5: Avoid "No data available" when FW version queries fail
  net/mlx5: lag: Check for LAG device before creating debugfs
  net: macb: properly unregister fixed rate clocks
  net: macb: fix clk handling on PCI glue driver removal
  virtio_net: clamp rss_max_key_size to NETDEV_RSS_KEY_LEN
  net/sched: sch_netem: fix out-of-bounds access in packet corruption
  ...
2026-04-02 09:57:06 -07:00
Linus Torvalds
4c2c526b5a Merge tag 'iommu-fixes-v7.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux
Pull iommu fixes from Joerg Roedel:

 - IOMMU-PT related compile breakage in for AMD driver

 - IOTLB flushing behavior when unmapped region is larger than requested
   due to page-sizes

 - Fix IOTLB flush behavior with empty gathers

* tag 'iommu-fixes-v7.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux:
  iommupt/amdv1: mark amdv1pt_install_leaf_entry as __always_inline
  iommupt: Fix short gather if the unmap goes into a large mapping
  iommu: Do not call drivers for empty gathers
2026-04-02 09:53:16 -07:00
Linus Torvalds
2ec9074b28 Merge tag 'sound-7.0-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound
Pull sound fixes from Takashi Iwai:
 "People have been so busy for hunting and we're still getting more
  changes than wished for, but it doesn't look too scary; almost all
  changes are device-specific small fixes.

  I guess it's rather a casual bump, and no more Easter eggs are left
  for 7.0 (hopefully)...

   - Fixes for the recent regression on ctxfi driver

   - Fix missing INIT_LIST_HEAD() for ASoC card_aux_list

   - Usual HD- and USB-audio, and ASoC AMD quirk updates

   - ASoC fixes for AMD and Intel"

* tag 'sound-7.0-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound: (24 commits)
  ASoC: amd: ps: Fix missing leading zeros in subsystem_device SSID log
  ALSA: usb-audio: Exclude Scarlett 2i2 1st Gen (8016) from SKIP_IFACE_SETUP
  ALSA: hda/realtek: add quirk for Acer Swift SFG14-73
  ALSA: hda/realtek: Add quirk for Lenovo Yoga Pro 7 14IMH9
  ASoC: Intel: boards: fix unmet dependency on PINCTRL
  ASoC: Intel: ehl_rt5660: Use the correct rtd->dev device in hw_params
  ALSA: ctxfi: Don't enumerate SPDIF1 at DAIO initialization
  ALSA: hda/realtek: Add quirk for Lenovo Yoga Slim 7 14AKP10
  ALSA: hda/realtek: add quirk for HP Laptop 15-fc0xxx
  ASoC: ep93xx: Fix unchecked clk_prepare_enable() and add rollback on failure
  ASoC: soc-core: call missing INIT_LIST_HEAD() for card_aux_list
  ALSA: hda/realtek: Add quirk for Samsung Book2 Pro 360 (NP950QED)
  ASoC: amd: yc: Add DMI entry for HP Laptop 15-fc0xxx
  ASoC: amd: yc: Add DMI quirk for ASUS Vivobook Pro 16X OLED M7601RM
  ALSA: hda/realtek: Add quirk for ASUS ROG Strix SCAR 15
  ALSA: usb-audio: Exclude Scarlett Solo 1st Gen from SKIP_IFACE_SETUP
  ALSA: caiaq: fix stack out-of-bounds read in init_card
  ALSA: ctxfi: Check the error for index mapping
  ALSA: ctxfi: Fix missing SPDIFI1 index handling
  ALSA: hda/realtek: add quirk for HP Victus 15-fb0xxx
  ...
2026-04-02 09:41:21 -07:00
Linus Torvalds
2064d7784e Merge tag 'auxdisplay-v7.0-1' of git://git.kernel.org/pub/scm/linux/kernel/git/andy/linux-auxdisplay
Pull auxdisplay fixes from Andy Shevchenko:

 - Fix NULL dereference in linedisp_release()

 - Fix ht16k33 DT bindings to avoid warnings

 - Handle errors in I²C transfers in lcd2s driver

* tag 'auxdisplay-v7.0-1' of git://git.kernel.org/pub/scm/linux/kernel/git/andy/linux-auxdisplay:
  auxdisplay: line-display: fix NULL dereference in linedisp_release
  auxdisplay: lcd2s: add error handling for i2c transfers
  dt-bindings: auxdisplay: ht16k33: Use unevaluatedProperties to fix common property warning
2026-04-02 09:34:22 -07:00
Dimitri Daskalakis
ec7067e661 eth: fbnic: Increase FBNIC_QUEUE_SIZE_MIN to 64
On systems with 64K pages, RX queues will be wedged if users set the
descriptor count to the current minimum (16). Fbnic fragments large
pages into 4K chunks, and scales down the ring size accordingly. With
64K pages and 16 descriptors, the ring size mask is 0 and will never
be filled.

32 descriptors is another special case that wedges the RX rings.
Internally, the rings track pages for the head/tail pointers, not page
fragments. So with 32 descriptors, there's only 1 usable page as one
ring slot is kept empty to disambiguate between an empty/full ring.
As a result, the head pointer never advances and the HW stalls after
consuming 16 page fragments.

Fixes: 0cb4c0a137 ("eth: fbnic: Implement Rx queue alloc/start/stop/free")
Signed-off-by: Dimitri Daskalakis <daskald@meta.com>
Link: https://patch.msgid.link/20260401162848.2335350-1-dimitri.daskalakis1@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-02 08:38:34 -07:00
Ruide Cao
3a359bf5c6 batman-adv: reject oversized global TT response buffers
batadv_tt_prepare_tvlv_global_data() builds the allocation length for a
global TT response in 16-bit temporaries. When a remote originator
advertises a large enough global TT, the TT payload length plus the VLAN
header offset can exceed 65535 and wrap before kmalloc().

The full-table response path still uses the original TT payload length when
it fills tt_change, so the wrapped allocation is too small and
batadv_tt_prepare_tvlv_global_data() writes past the end of the heap object
before the later packet-size check runs.

Fix this by rejecting TT responses whose TVLV value length cannot fit in
the 16-bit TVLV payload length field.

Fixes: 7ea7b4a142 ("batman-adv: make the TT CRC logic VLAN specific")
Cc: stable@vger.kernel.org
Reported-by: Yifan Wu <yifanwucs@gmail.com>
Reported-by: Juefei Pu <tomapufckgml@gmail.com>
Co-developed-by: Yuan Tan <yuantan098@gmail.com>
Signed-off-by: Yuan Tan <yuantan098@gmail.com>
Suggested-by: Xin Liu <bird@lzu.edu.cn>
Tested-by: Ren Wei <enjou1224z@gmail.com>
Signed-off-by: Ruide Cao <caoruide123@gmail.com>
Signed-off-by: Ren Wei <n05ec@lzu.edu.cn>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
2026-04-02 17:32:55 +02:00
Eric Dumazet
4e45337556 ipv6: avoid overflows in ip6_datagram_send_ctl()
Yiming Qian reported :
<quote>
 I believe I found a locally triggerable kernel bug in the IPv6 sendmsg
 ancillary-data path that can panic the kernel via `skb_under_panic()`
 (local DoS).

 The core issue is a mismatch between:

 - a 16-bit length accumulator (`struct ipv6_txoptions::opt_flen`, type
 `__u16`) and
 - a pointer to the *last* provided destination-options header (`opt->dst1opt`)

 when multiple `IPV6_DSTOPTS` control messages (cmsgs) are provided.

 - `include/net/ipv6.h`:
   - `struct ipv6_txoptions::opt_flen` is `__u16` (wrap possible).
 (lines 291-307, especially 298)
 - `net/ipv6/datagram.c:ip6_datagram_send_ctl()`:
   - Accepts repeated `IPV6_DSTOPTS` and accumulates into `opt_flen`
 without rejecting duplicates. (lines 909-933)
 - `net/ipv6/ip6_output.c:__ip6_append_data()`:
   - Uses `opt->opt_flen + opt->opt_nflen` to compute header
 sizes/headroom decisions. (lines 1448-1466, especially 1463-1465)
 - `net/ipv6/ip6_output.c:__ip6_make_skb()`:
   - Calls `ipv6_push_frag_opts()` if `opt->opt_flen` is non-zero.
 (lines 1930-1934)
 - `net/ipv6/exthdrs.c:ipv6_push_frag_opts()` / `ipv6_push_exthdr()`:
   - Push size comes from `ipv6_optlen(opt->dst1opt)` (based on the
 pointed-to header). (lines 1179-1185 and 1206-1211)

 1. `opt_flen` is a 16-bit accumulator:

 - `include/net/ipv6.h:298` defines `__u16 opt_flen; /* after fragment hdr */`.

 2. `ip6_datagram_send_ctl()` accepts *repeated* `IPV6_DSTOPTS` cmsgs
 and increments `opt_flen` each time:

 - In `net/ipv6/datagram.c:909-933`, for `IPV6_DSTOPTS`:
   - It computes `len = ((hdr->hdrlen + 1) << 3);`
   - It checks `CAP_NET_RAW` using `ns_capable(net->user_ns,
 CAP_NET_RAW)`. (line 922)
   - Then it does:
     - `opt->opt_flen += len;` (line 927)
     - `opt->dst1opt = hdr;` (line 928)

 There is no duplicate rejection here (unlike the legacy
 `IPV6_2292DSTOPTS` path which rejects duplicates at
 `net/ipv6/datagram.c:901-904`).

 If enough large `IPV6_DSTOPTS` cmsgs are provided, `opt_flen` wraps
 while `dst1opt` still points to a large (2048-byte)
 destination-options header.

 In the attached PoC (`poc.c`):

 - 32 cmsgs with `hdrlen=255` => `len = (255+1)*8 = 2048`
 - 1 cmsg with `hdrlen=0` => `len = 8`
 - Total increment: `32*2048 + 8 = 65544`, so `(__u16)opt_flen == 8`
 - The last cmsg is 2048 bytes, so `dst1opt` points to a 2048-byte header.

 3. The transmit path sizes headers using the wrapped `opt_flen`:

- In `net/ipv6/ip6_output.c:1463-1465`:
  - `headersize = sizeof(struct ipv6hdr) + (opt ? opt->opt_flen +
 opt->opt_nflen : 0) + ...;`

 With wrapped `opt_flen`, `headersize`/headroom decisions underestimate
 what will be pushed later.

 4. When building the final skb, the actual push length comes from
 `dst1opt` and is not limited by wrapped `opt_flen`:

 - In `net/ipv6/ip6_output.c:1930-1934`:
   - `if (opt->opt_flen) proto = ipv6_push_frag_opts(skb, opt, proto);`
 - In `net/ipv6/exthdrs.c:1206-1211`, `ipv6_push_frag_opts()` pushes
 `dst1opt` via `ipv6_push_exthdr()`.
 - In `net/ipv6/exthdrs.c:1179-1184`, `ipv6_push_exthdr()` does:
   - `skb_push(skb, ipv6_optlen(opt));`
   - `memcpy(h, opt, ipv6_optlen(opt));`

 With insufficient headroom, `skb_push()` underflows and triggers
 `skb_under_panic()` -> `BUG()`:

 - `net/core/skbuff.c:2669-2675` (`skb_push()` calls `skb_under_panic()`)
 - `net/core/skbuff.c:207-214` (`skb_panic()` ends in `BUG()`)

 - The `IPV6_DSTOPTS` cmsg path requires `CAP_NET_RAW` in the target
 netns user namespace (`ns_capable(net->user_ns, CAP_NET_RAW)`).
 - Root (or any task with `CAP_NET_RAW`) can trigger this without user
 namespaces.
 - An unprivileged `uid=1000` user can trigger this if unprivileged
 user namespaces are enabled and it can create a userns+netns to obtain
 namespaced `CAP_NET_RAW` (the attached PoC does this).

 - Local denial of service: kernel BUG/panic (system crash).
 - Reproducible with a small userspace PoC.
</quote>

This patch does not reject duplicated options, as this might break
some user applications.

Instead, it makes sure to adjust opt_flen and opt_nflen to correctly
reflect the size of the current option headers, preventing the overflows
and the potential for panics.

This applies to IPV6_DSTOPTS, IPV6_HOPOPTS, and IPV6_RTHDR.

Specifically:

When a new IPV6_DSTOPTS is processed, the length of the old opt->dst1opt
is subtracted from opt->opt_flen before adding the new length.

When a new IPV6_HOPOPTS is processed, the length of the old opt->dst0opt
is subtracted from opt->opt_nflen.

When a new Routing Header (IPV6_RTHDR or IPV6_2292RTHDR) is processed,
the length of the old opt->srcrt is subtracted from opt->opt_nflen.

In the special case within IPV6_2292RTHDR handling where dst1opt is moved
to dst0opt, the length of the old opt->dst0opt is subtracted from
opt->opt_nflen before the new one is added.

Fixes: 333fad5364 ("[IPV6]: Support several new sockopt / ancillary data in Advanced API (RFC3542).")
Reported-by: Yiming Qian <yimingqian591@gmail.com>
Closes: https://lore.kernel.org/netdev/CAL_bE8JNzawgr5OX5m+3jnQDHry2XxhQT5=jThW1zDPtUikRYA@mail.gmail.com/
Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20260401154721.3740056-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-02 08:25:22 -07:00
Jakub Kicinski
be193568be Merge branch 'net-hsr-fixes-for-prp-duplication-and-vlan-unwind'
Luka Gejak says:

====================
net: hsr: fixes for PRP duplication and VLAN unwind

This series addresses two logic bugs in the HSR/PRP implementation
identified during a protocol audit. These are targeted for the 'net'
tree as they fix potential memory corruption and state inconsistency.

The primary change resolves a race condition in the node merging path by
implementing address-based lock ordering. This ensures that concurrent
mutations of sequence blocks do not lead to state corruption or
deadlocks.

An additional fix corrects asymmetric VLAN error unwinding by
implementing a centralized unwind path on slave errors.
====================

Link: https://patch.msgid.link/20260401092243.52121-1-luka.gejak@linux.dev
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-02 08:23:55 -07:00
Luka Gejak
2e3514e63b net: hsr: fix VLAN add unwind on slave errors
When vlan_vid_add() fails for a secondary slave, the error path calls
vlan_vid_del() on the failing port instead of the peer slave that had
already succeeded. This results in asymmetric VLAN state across the HSR
pair.

Fix this by switching to a centralized unwind path that removes the VID
from any slave device that was already programmed.

Fixes: 1a8a63a530 ("net: hsr: Add VLAN CTAG filter support")
Signed-off-by: Luka Gejak <luka.gejak@linux.dev>
Link: https://patch.msgid.link/20260401092243.52121-3-luka.gejak@linux.dev
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-02 08:23:49 -07:00
Luka Gejak
f5df2990c3 net: hsr: serialize seq_blocks merge across nodes
During node merging, hsr_handle_sup_frame() walks node_curr->seq_blocks
to update node_real without holding node_curr->seq_out_lock. This
allows concurrent mutations from duplicate registration paths, risking
inconsistent state or XArray/bitmap corruption.

Fix this by locking both nodes' seq_out_lock during the merge.
To prevent ABBA deadlocks, locks are acquired in order of memory
address.

Reviewed-by: Felix Maurer <fmaurer@redhat.com>
Fixes: 415e636751 ("hsr: Implement more robust duplicate discard for PRP")
Signed-off-by: Luka Gejak <luka.gejak@linux.dev>
Link: https://patch.msgid.link/20260401092243.52121-2-luka.gejak@linux.dev
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-02 08:23:49 -07:00
Stefano Garzarella
b18c833888 vsock: initialize child_ns_mode_locked in vsock_net_init()
The `child_ns_mode_locked` field lives in `struct net`, which persists
across vsock module reloads. When the module is unloaded and reloaded,
`vsock_net_init()` resets `mode` and `child_ns_mode` back to their
default values, but does not reset `child_ns_mode_locked`.

The stale lock from the previous module load causes subsequent writes
to `child_ns_mode` to silently fail: `vsock_net_set_child_mode()` sees
the old lock, skips updating the actual value, and returns success
when the requested mode matches the stale lock. The sysctl handler
reports no error, but `child_ns_mode` remains unchanged.

Steps to reproduce:
    $ modprobe vsock
    $ echo local > /proc/sys/net/vsock/child_ns_mode
    $ cat /proc/sys/net/vsock/child_ns_mode
    local
    $ modprobe -r vsock
    $ modprobe vsock
    $ echo local > /proc/sys/net/vsock/child_ns_mode
    $ cat /proc/sys/net/vsock/child_ns_mode
    global    <--- expected "local"

Fix this by initializing `child_ns_mode_locked` to 0 (unlocked) in
`vsock_net_init()`, so the write-once mechanism works correctly after
module reload.

Fixes: 102eab95f0 ("vsock: lock down child_ns_mode as write-once")
Reported-by: Jin Liu <jinl@redhat.com>
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Reviewed-by: Bobby Eshleman <bobbyeshleman@meta.com>
Link: https://patch.msgid.link/20260401092153.28462-1-sgarzare@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-02 08:18:56 -07:00
Xiang Mei
70f73562d2 selftests/tc-testing: add tests for cls_fw and cls_flow on shared blocks
Regression tests for the shared-block NULL derefs fixed in the previous
two patches:

  - fw: attempt to attach an empty fw filter to a shared block and
    verify the configuration is rejected with EINVAL.
  - flow: create a flow filter on a shared block without a baseclass
    and verify the configuration is rejected with EINVAL.

Signed-off-by: Xiang Mei <xmei5@asu.edu>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Reviewed-by: Victor Nogueira <victor@mojatatu.com>
Link: https://patch.msgid.link/20260331050217.504278-3-xmei5@asu.edu
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-04-02 15:08:42 +02:00
Xiang Mei
1a280dd4bd net/sched: cls_flow: fix NULL pointer dereference on shared blocks
flow_change() calls tcf_block_q() and dereferences q->handle to derive
a default baseclass.  Shared blocks leave block->q NULL, causing a NULL
deref when a flow filter without a fully qualified baseclass is created
on a shared block.

Check tcf_block_shared() before accessing block->q and return -EINVAL
for shared blocks.  This avoids the null-deref shown below:

=======================================================================
KASAN: null-ptr-deref in range [0x0000000000000038-0x000000000000003f]
RIP: 0010:flow_change (net/sched/cls_flow.c:508)
Call Trace:
 tc_new_tfilter (net/sched/cls_api.c:2432)
 rtnetlink_rcv_msg (net/core/rtnetlink.c:6980)
 [...]
=======================================================================

Fixes: 1abf272022 ("net: sched: tcindex, fw, flow: use tcf_block_q helper to get struct Qdisc")
Reported-by: Weiming Shi <bestswngs@gmail.com>
Signed-off-by: Xiang Mei <xmei5@asu.edu>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://patch.msgid.link/20260331050217.504278-2-xmei5@asu.edu
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-04-02 15:08:42 +02:00
Xiang Mei
faeea8bbf6 net/sched: cls_fw: fix NULL pointer dereference on shared blocks
The old-method path in fw_classify() calls tcf_block_q() and
dereferences q->handle.  Shared blocks leave block->q NULL, causing a
NULL deref when an empty cls_fw filter is attached to a shared block
and a packet with a nonzero major skb mark is classified.

Reject the configuration in fw_change() when the old method (no
TCA_OPTIONS) is used on a shared block, since fw_classify()'s
old-method path needs block->q which is NULL for shared blocks.

The fixed null-ptr-deref calling stack:
 KASAN: null-ptr-deref in range [0x0000000000000038-0x000000000000003f]
 RIP: 0010:fw_classify (net/sched/cls_fw.c:81)
 Call Trace:
  tcf_classify (./include/net/tc_wrapper.h:197 net/sched/cls_api.c:1764 net/sched/cls_api.c:1860)
  tc_run (net/core/dev.c:4401)
  __dev_queue_xmit (net/core/dev.c:4535 net/core/dev.c:4790)

Fixes: 1abf272022 ("net: sched: tcindex, fw, flow: use tcf_block_q helper to get struct Qdisc")
Reported-by: Weiming Shi <bestswngs@gmail.com>
Signed-off-by: Xiang Mei <xmei5@asu.edu>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://patch.msgid.link/20260331050217.504278-1-xmei5@asu.edu
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-04-02 15:08:41 +02:00
Paolo Abeni
a80a014f83 Merge branch 'net-x25-fix-overflow-and-double-free'
Martin Schiller says:

====================
net/x25: Fix overflow and double free

This patch set includes 2 fixes:

The first removes a potential double free of received skb
The second fixes an overflow when accumulating packets with the more-bit
set.

Signed-off-by: Martin Schiller <ms@dev.tdt.de>
====================

Link: https://patch.msgid.link/20260331-x25_fraglen-v4-0-3e69f18464b4@dev.tdt.de
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-04-02 13:36:10 +02:00
Martin Schiller
a1822cb524 net/x25: Fix overflow when accumulating packets
Add a check to ensure that `x25_sock.fraglen` does not overflow.

The `fraglen` also needs to be resetted when purging `fragment_queue` in
`x25_clear_queues()`.

Fixes: 1da177e4c3 ("Linux-2.6.12-rc2")
Suggested-by: Yiming Qian <yimingqian591@gmail.com>
Signed-off-by: Martin Schiller <ms@dev.tdt.de>
Link: https://patch.msgid.link/20260331-x25_fraglen-v4-2-3e69f18464b4@dev.tdt.de
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-04-02 13:36:08 +02:00
Martin Schiller
d10a26aa4d net/x25: Fix potential double free of skb
When alloc_skb fails in x25_queue_rx_frame it calls kfree_skb(skb) at
line 48 and returns 1 (error).
This error propagates back through the call chain:

x25_queue_rx_frame returns 1
    |
    v
x25_state3_machine receives the return value 1 and takes the else
branch at line 278, setting queued=0 and returning 0
    |
    v
x25_process_rx_frame returns queued=0
    |
    v
x25_backlog_rcv at line 452 sees queued=0 and calls kfree_skb(skb)
again

This would free the same skb twice. Looking at x25_backlog_rcv:

net/x25/x25_in.c:x25_backlog_rcv() {
    ...
    queued = x25_process_rx_frame(sk, skb);
    ...
    if (!queued)
        kfree_skb(skb);
}

Fixes: 1da177e4c3 ("Linux-2.6.12-rc2")
Signed-off-by: Martin Schiller <ms@dev.tdt.de>
Link: https://patch.msgid.link/20260331-x25_fraglen-v4-1-3e69f18464b4@dev.tdt.de
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-04-02 13:36:08 +02:00
Takashi Iwai
b477ab8893 Merge tag 'asoc-fix-v7.0-rc6' of https://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-linus
ASoC: Fixes for v7.0

Another smallish batch of fixes and quirks, these days it's AMD that is
getting all the DMI entries added.  We've got one core fix for a missing
list initialisation with auxiliary devices, otherwise it's all fairly
small things.
2026-04-02 09:08:03 +02:00
Jakub Kicinski
9351edf65c Merge branch 'bnxt_en-bug-fixes'
Michael Chan says:

====================
bnxt_en: Bug fixes

The first patch is a refactor patch needed by the second patch to
fix XDP ring initialization during FW reset.  The third patch
fixes an issue related to stats context reservation for RoCE.
====================

Link: https://patch.msgid.link/20260331065138.948205-1-michael.chan@broadcom.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-01 20:13:00 -07:00
Pavan Chebbi
071dbfa304 bnxt_en: Restore default stat ctxs for ULP when resource is available
During resource reservation, if the L2 driver does not have enough
MSIX vectors to provide to the RoCE driver, it sets the stat ctxs for
ULP also to 0 so that we don't have to reserve it unnecessarily.

However, subsequently the user may reduce L2 rings thereby freeing up
some resources that the L2 driver can now earmark for RoCE. In this
case, the driver should restore the default ULP stat ctxs to make
sure that all RoCE resources are ready for use.

The RoCE driver may fail to initialize in this scenario without this
fix.

Fixes: d630624ebd ("bnxt_en: Utilize ulp client resources if RoCE is not registered")
Reviewed-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Signed-off-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Link: https://patch.msgid.link/20260331065138.948205-4-michael.chan@broadcom.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-01 20:12:56 -07:00
Michael Chan
e4bf81dcad bnxt_en: Don't assume XDP is never enabled in bnxt_init_dflt_ring_mode()
The original code made the assumption that when we set up the initial
default ring mode, we must be just loading the driver and XDP cannot
be enabled yet.  This is not true when the FW goes through a resource
or capability change.  Resource reservations will be cancelled and
reinitialized with XDP already enabled.  devlink reload with XDP enabled
will also have the same issue.  This scenario will cause the ring
arithmetic to be all wrong in the bnxt_init_dflt_ring_mode() path
causing failure:

bnxt_en 0000:a1:00.0 ens2f0np0: bnxt_setup_int_mode err: ffffffea
bnxt_en 0000:a1:00.0 ens2f0np0: bnxt_request_irq err: ffffffea
bnxt_en 0000:a1:00.0 ens2f0np0: nic open fail (rc: ffffffea)

Fix it by properly accounting for XDP in the bnxt_init_dflt_ring_mode()
path by using the refactored helper functions in the previous patch.

Reviewed-by: Andy Gospodarek <andrew.gospodarek@broadcom.com>
Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Reviewed-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Fixes: ec5d31e3c1 ("bnxt_en: Handle firmware reset status during IF_UP.")
Fixes: 228ea8c187 ("bnxt_en: implement devlink dev reload driver_reinit")
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Link: https://patch.msgid.link/20260331065138.948205-3-michael.chan@broadcom.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-01 20:12:56 -07:00
Michael Chan
ceee35e567 bnxt_en: Refactor some basic ring setup and adjustment logic
Refactor out the basic code that trims the default rings, sets up and
adjusts XDP TX rings and CP rings.  There is no change in behavior.
This is to prepare for the next bug fix patch.

Reviewed-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Reviewed-by: Andy Gospodarek <andrew.gospodarek@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Link: https://patch.msgid.link/20260331065138.948205-2-michael.chan@broadcom.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-01 20:12:56 -07:00
Jakub Kicinski
a59dc0f871 Merge branch 'mlx5-misc-fixes-2026-03-30'
Tariq Toukan says:

====================
mlx5 misc fixes 2026-03-30

This patchset provides misc bug fixes from the team to the mlx5
core driver.
====================

Link: https://patch.msgid.link/20260330194015.53585-1-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-01 20:10:46 -07:00
Saeed Mahameed
403186400a net/mlx5: Fix switchdev mode rollback in case of failure
If for some internal reason switchdev mode fails, we rollback to legacy
mode, before this patch, rollback will unregister the uplink netdev and
leave it unregistered causing the below kernel bug.

To fix this, we need to avoid netdev unregister by setting the proper
rollback flag 'MLX5_PRIV_FLAGS_SWITCH_LEGACY' to indicate legacy mode.

devlink (431) used greatest stack depth: 11048 bytes left
mlx5_core 0000:00:03.0: E-Switch: Disable: mode(LEGACY), nvfs(0), \
	necvfs(0), active vports(0)
mlx5_core 0000:00:03.0: E-Switch: Supported tc chains and prios offload
mlx5_core 0000:00:03.0: Loading uplink representor for vport 65535
mlx5_core 0000:00:03.0: mlx5_cmd_out_err:816:(pid 456): \
	QUERY_HCA_CAP(0x100) op_mod(0x0) failed, \
	status bad parameter(0x3), syndrome (0x3a3846), err(-22)
mlx5_core 0000:00:03.0 enp0s3np0 (unregistered): Unloading uplink \
	representor for vport 65535
 ------------[ cut here ]------------
kernel BUG at net/core/dev.c:12070!
Oops: invalid opcode: 0000 [#1] SMP NOPTI
CPU: 2 UID: 0 PID: 456 Comm: devlink Not tainted 6.16.0-rc3+ \
	#9 PREEMPT(voluntary)
RIP: 0010:unregister_netdevice_many_notify+0x123/0xae0
...
Call Trace:
[   90.923094]  unregister_netdevice_queue+0xad/0xf0
[   90.923323]  unregister_netdev+0x1c/0x40
[   90.923522]  mlx5e_vport_rep_unload+0x61/0xc6
[   90.923736]  esw_offloads_enable+0x8e6/0x920
[   90.923947]  mlx5_eswitch_enable_locked+0x349/0x430
[   90.924182]  ? is_mp_supported+0x57/0xb0
[   90.924376]  mlx5_devlink_eswitch_mode_set+0x167/0x350
[   90.924628]  devlink_nl_eswitch_set_doit+0x6f/0xf0
[   90.924862]  genl_family_rcv_msg_doit+0xe8/0x140
[   90.925088]  genl_rcv_msg+0x18b/0x290
[   90.925269]  ? __pfx_devlink_nl_pre_doit+0x10/0x10
[   90.925506]  ? __pfx_devlink_nl_eswitch_set_doit+0x10/0x10
[   90.925766]  ? __pfx_devlink_nl_post_doit+0x10/0x10
[   90.926001]  ? __pfx_genl_rcv_msg+0x10/0x10
[   90.926206]  netlink_rcv_skb+0x52/0x100
[   90.926393]  genl_rcv+0x28/0x40
[   90.926557]  netlink_unicast+0x27d/0x3d0
[   90.926749]  netlink_sendmsg+0x1f7/0x430
[   90.926942]  __sys_sendto+0x213/0x220
[   90.927127]  ? __sys_recvmsg+0x6a/0xd0
[   90.927312]  __x64_sys_sendto+0x24/0x30
[   90.927504]  do_syscall_64+0x50/0x1c0
[   90.927687]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
[   90.927929] RIP: 0033:0x7f7d0363e047

Fixes: 2a4f56fbcc ("net/mlx5e: Keep netdev when leave switchdev for devlink set legacy only")
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Reviewed-by: Jianbo Liu <jianbol@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/20260330194015.53585-4-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-01 20:10:41 -07:00
Saeed Mahameed
10dc35f6a4 net/mlx5: Avoid "No data available" when FW version queries fail
Avoid printing the misleading "kernel answers: No data available" devlink
output when querying firmware or pending firmware version fails
(e.g. MLX5 fw state errors / flash failures).

FW can fail on loading the pending flash image and get its version due
to various reasons, examples:

mlxfw: Firmware flash failed: key not applicable, err (7)
mlx5_fw_image_pending: can't read pending fw version while fw state is 1

and the resulting:
$ devlink dev info
kernel answers: No data available

Instead, just report 0 or 0xfff.. versions in case of failure to indicate
a problem, and let other information be shown.

after the fix:
$ devlink dev info
pci/0000:00:06.0:
  driver mlx5_core
  serial_number xxx...
  board.serial_number MT2225300179
  versions:
      fixed:
        fw.psid MT_0000000436
      running:
        fw.version 22.41.0188
        fw 22.41.0188
      stored:
        fw.version 255.255.65535
        fw 255.255.65535

Fixes: 9c86b07e30 ("net/mlx5: Added fw version query command")
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/20260330194015.53585-3-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-01 20:10:41 -07:00
Shay Drory
bf16bca665 net/mlx5: lag: Check for LAG device before creating debugfs
__mlx5_lag_dev_add_mdev() may return 0 (success) even when an error
occurs that is handled gracefully. Consequently, the initialization
flow proceeds to call mlx5_ldev_add_debugfs() even when there is no
valid LAG context.

mlx5_ldev_add_debugfs() blindly created the debugfs directory and
attributes. This exposed interfaces (like the members file) that rely on
a valid ldev pointer, leading to potential NULL pointer dereferences if
accessed when ldev is NULL.

Add a check to verify that mlx5_lag_dev(dev) returns a valid pointer
before attempting to create the debugfs entries.

Fixes: 7f46a0b732 ("net/mlx5: Lag, add debugfs to query hardware lag state")
Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/20260330194015.53585-2-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-01 20:10:40 -07:00
Fedor Pchelkin
f0f367a4f4 net: macb: properly unregister fixed rate clocks
The additional resources allocated with clk_register_fixed_rate() need
to be released with clk_unregister_fixed_rate(), otherwise they are lost.

Fixes: 83a77e9ec4 ("net: macb: Added PCI wrapper for Platform Driver.")
Signed-off-by: Fedor Pchelkin <pchelkin@ispras.ru>
Link: https://patch.msgid.link/20260330184542.626619-2-pchelkin@ispras.ru
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-01 19:57:20 -07:00
Fedor Pchelkin
ce8fe5287b net: macb: fix clk handling on PCI glue driver removal
platform_device_unregister() may still want to use the registered clks
during runtime resume callback.

Note that there is a commit d82d5303c4 ("net: macb: fix use after free
on rmmod") that addressed the similar problem of clk vs platform device
unregistration but just moved the bug to another place.

Save the pointers to clks into local variables for reuse after platform
device is unregistered.

BUG: KASAN: use-after-free in clk_prepare+0x5a/0x60
Read of size 8 at addr ffff888104f85e00 by task modprobe/597

CPU: 2 PID: 597 Comm: modprobe Not tainted 6.1.164+ #114
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.1-0-g3208b098f51a-prebuilt.qemu.org 04/01/2014
Call Trace:
 <TASK>
 dump_stack_lvl+0x8d/0xba
 print_report+0x17f/0x496
 kasan_report+0xd9/0x180
 clk_prepare+0x5a/0x60
 macb_runtime_resume+0x13d/0x410 [macb]
 pm_generic_runtime_resume+0x97/0xd0
 __rpm_callback+0xc8/0x4d0
 rpm_callback+0xf6/0x230
 rpm_resume+0xeeb/0x1a70
 __pm_runtime_resume+0xb4/0x170
 bus_remove_device+0x2e3/0x4b0
 device_del+0x5b3/0xdc0
 platform_device_del+0x4e/0x280
 platform_device_unregister+0x11/0x50
 pci_device_remove+0xae/0x210
 device_remove+0xcb/0x180
 device_release_driver_internal+0x529/0x770
 driver_detach+0xd4/0x1a0
 bus_remove_driver+0x135/0x260
 driver_unregister+0x72/0xb0
 pci_unregister_driver+0x26/0x220
 __do_sys_delete_module+0x32e/0x550
 do_syscall_64+0x35/0x80
 entry_SYSCALL_64_after_hwframe+0x6e/0xd8
 </TASK>

Allocated by task 519:
 kasan_save_stack+0x2c/0x50
 kasan_set_track+0x21/0x30
 __kasan_kmalloc+0x8e/0x90
 __clk_register+0x458/0x2890
 clk_hw_register+0x1a/0x60
 __clk_hw_register_fixed_rate+0x255/0x410
 clk_register_fixed_rate+0x3c/0xa0
 macb_probe+0x1d8/0x42e [macb_pci]
 local_pci_probe+0xd7/0x190
 pci_device_probe+0x252/0x600
 really_probe+0x255/0x7f0
 __driver_probe_device+0x1ee/0x330
 driver_probe_device+0x4c/0x1f0
 __driver_attach+0x1df/0x4e0
 bus_for_each_dev+0x15d/0x1f0
 bus_add_driver+0x486/0x5e0
 driver_register+0x23a/0x3d0
 do_one_initcall+0xfd/0x4d0
 do_init_module+0x18b/0x5a0
 load_module+0x5663/0x7950
 __do_sys_finit_module+0x101/0x180
 do_syscall_64+0x35/0x80
 entry_SYSCALL_64_after_hwframe+0x6e/0xd8

Freed by task 597:
 kasan_save_stack+0x2c/0x50
 kasan_set_track+0x21/0x30
 kasan_save_free_info+0x2a/0x50
 __kasan_slab_free+0x106/0x180
 __kmem_cache_free+0xbc/0x320
 clk_unregister+0x6de/0x8d0
 macb_remove+0x73/0xc0 [macb_pci]
 pci_device_remove+0xae/0x210
 device_remove+0xcb/0x180
 device_release_driver_internal+0x529/0x770
 driver_detach+0xd4/0x1a0
 bus_remove_driver+0x135/0x260
 driver_unregister+0x72/0xb0
 pci_unregister_driver+0x26/0x220
 __do_sys_delete_module+0x32e/0x550
 do_syscall_64+0x35/0x80
 entry_SYSCALL_64_after_hwframe+0x6e/0xd8

Fixes: d82d5303c4 ("net: macb: fix use after free on rmmod")
Signed-off-by: Fedor Pchelkin <pchelkin@ispras.ru>
Link: https://patch.msgid.link/20260330184542.626619-1-pchelkin@ispras.ru
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-01 19:57:16 -07:00
Srujana Challa
b4e5f04c58 virtio_net: clamp rss_max_key_size to NETDEV_RSS_KEY_LEN
rss_max_key_size in the virtio spec is the maximum key size supported by
the device, not a mandatory size the driver must use. Also the value 40
is a spec minimum, not a spec maximum.

The current code rejects RSS and can fail probe when the device reports a
larger rss_max_key_size than the driver buffer limit. Instead, clamp the
effective key length to min(device rss_max_key_size, NETDEV_RSS_KEY_LEN)
and keep RSS enabled.

This keeps probe working on devices that advertise larger maximum key sizes
while respecting the netdev RSS key buffer size limit.

Fixes: 3f7d9c1964 ("virtio_net: Add hash_key_length check")
Cc: stable@vger.kernel.org
Signed-off-by: Srujana Challa <schalla@marvell.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Link: https://patch.msgid.link/20260326142344.1171317-1-schalla@marvell.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-01 19:47:44 -07:00
Yucheng Lu
d64cb81dcb net/sched: sch_netem: fix out-of-bounds access in packet corruption
In netem_enqueue(), the packet corruption logic uses
get_random_u32_below(skb_headlen(skb)) to select an index for
modifying skb->data. When an AF_PACKET TX_RING sends fully non-linear
packets over an IPIP tunnel, skb_headlen(skb) evaluates to 0.

Passing 0 to get_random_u32_below() takes the variable-ceil slow path
which returns an unconstrained 32-bit random integer. Using this
unconstrained value as an offset into skb->data results in an
out-of-bounds memory access.

Fix this by verifying skb_headlen(skb) is non-zero before attempting
to corrupt the linear data area. Fully non-linear packets will silently
bypass the corruption logic.

Fixes: c865e5d99e ("[PKT_SCHED] netem: packet corruption option")
Reported-by: Yifan Wu <yifanwucs@gmail.com>
Reported-by: Juefei Pu <tomapufckgml@gmail.com>
Signed-off-by: Yuan Tan <tanyuan98@outlook.com>
Signed-off-by: Xin Liu <bird@lzu.edu.cn>
Signed-off-by: Yuhang Zheng <z1652074432@gmail.com>
Signed-off-by: Yucheng Lu <kanolyc@gmail.com>
Reviewed-by: Stephen Hemminger <stephen@networkplumber.org>
Link: https://patch.msgid.link/45435c0935df877853a81e6d06205ac738ec65fa.1774941614.git.kanolyc@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-01 19:24:20 -07:00
Jakub Kicinski
aba53ccf05 Merge tag 'nf-26-04-01' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf
Pablo Neira Ayuso says:

====================
Netfilter fixes for net

The following patchset contains Netfilter fixes for net. Note that most
of the bugs fixed here are >5 years old.  The large PR is not due to an
increase in regressions.

1) Flowtable hardware offload support in IPv6 can lead to out-of-bounds
   when populating the rule action array when combined with double-tagged
   vlan. Bump the maximum number of actions from 16 to 24 and check that
   such limit is never reached, otherwise bail out.  This bugs stems from
   the original flowtable hardware offload support.

2) nfnetlink_log does not include the netlink header size of the trailing
   NLMSG_DONE message when calculating the skb size. From Florian Westphal.

3) Reject names in xt_cgroup and xt_rateest extensions which are not
   nul-terminated. Also from Florian.

4) Use nla_strcmp in ipset lookup by set name, since IPSET_ATTR_NAME and
   IPSET_ATTR_NAMEREF are of NLA_STRING type. From Florian Westphal.

5) When unregistering conntrack helpers, pass the helper that is going
   away so the expectation cleanup is done accordingly, otherwise UaF is
   possible when accessing expectation that refer to the helper that is
   gone. From Qi Tang.

6) Zero expectation NAT fields to address leaking kernel memory through
   the expectation netlink dump when unset. Also from Qi Tang.

7) Use the master conntrack helper when creating expectations via
   ctnetlink, ignore the suggested helper through CTA_EXPECT_HELP_NAME.
   This allows to address a possible read of kernel memory off the
   expectation object boundary.

8) Fix incorrect release of the hash bucket logic in ipset when the
   bucket is empty, leading to shrinking the hash bucket to size 0
   which deals to out-of-bound write in next element additions.
   From Yifan Wu.

9) Allow the use of x_tables extensions that explicitly declare
   NFPROTO_ARP support only. This is to avoid an incorrect hook number
   validation due to non-overlapping arp and inet hook number
   definitions.

10) Reject immediate NF_QUEUE verdict in nf_tables. The userspace
    nft tool always uses the nft_queue expression for queueing.
    This ensures this verdict cannot be used for the arp family,
    which does supported this.

* tag 'nf-26-04-01' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf:
  netfilter: nf_tables: reject immediate NF_QUEUE verdict
  netfilter: x_tables: restrict xt_check_match/xt_check_target extensions for NFPROTO_ARP
  netfilter: ipset: drop logically empty buckets in mtype_del
  netfilter: ctnetlink: ignore explicit helper on new expectations
  netfilter: ctnetlink: zero expect NAT fields when CTA_EXPECT_NAT absent
  netfilter: nf_conntrack_helper: pass helper to expect cleanup
  netfilter: ipset: use nla_strcmp for IPSET_ATTR_NAME attr
  netfilter: x_tables: ensure names are nul-terminated
  netfilter: nfnetlink_log: account for netlink header size
  netfilter: flowtable: strictly check for maximum number of actions
====================

Link: https://patch.msgid.link/20260401103646.1015423-1-pablo@netfilter.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-01 19:19:36 -07:00
Jakub Kicinski
6d6be7070e Merge tag 'for-net-2026-04-01' of git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth
Luiz Augusto von Dentz says:

====================
bluetooth pull request for net:

 - hci_sync: Fix UAF in le_read_features_complete
 - hci_sync: call destroy in hci_cmd_sync_run if immediate
 - hci_sync: hci_cmd_sync_queue_once() return -EEXIST if exists
 - hci_sync: fix leaks when hci_cmd_sync_queue_once fails
 - hci_sync: fix stack buffer overflow in hci_le_big_create_sync
 - hci_conn: fix potential UAF in set_cig_params_sync
 - hci_event: fix potential UAF in hci_le_remote_conn_param_req_evt
 - hci_event: move wake reason storage into validated event handlers
 - SMP: force responder MITM requirements before building the pairing response
 - SMP: derive legacy responder STK authentication from MITM state
 - MGMT: validate LTK enc_size on load
 - MGMT: validate mesh send advertising payload length
 - SCO: fix race conditions in sco_sock_connect()
 - hci_h4: Fix race during initialization

* tag 'for-net-2026-04-01' of git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth:
  Bluetooth: hci_sync: fix stack buffer overflow in hci_le_big_create_sync
  Bluetooth: SMP: derive legacy responder STK authentication from MITM state
  Bluetooth: SMP: force responder MITM requirements before building the pairing response
  Bluetooth: MGMT: validate mesh send advertising payload length
  Bluetooth: hci_event: fix potential UAF in hci_le_remote_conn_param_req_evt
  Bluetooth: hci_conn: fix potential UAF in set_cig_params_sync
  Bluetooth: MGMT: validate LTK enc_size on load
  Bluetooth: hci_h4: Fix race during initialization
  Bluetooth: hci_sync: Fix UAF in le_read_features_complete
  Bluetooth: hci_sync: fix leaks when hci_cmd_sync_queue_once fails
  Bluetooth: hci_sync: hci_cmd_sync_queue_once() return -EEXIST if exists
  Bluetooth: hci_event: move wake reason storage into validated event handlers
  Bluetooth: SCO: fix race conditions in sco_sock_connect()
  Bluetooth: hci_sync: call destroy in hci_cmd_sync_run if immediate
====================

Link: https://patch.msgid.link/20260401205834.2189162-1-luiz.dentz@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-01 19:12:23 -07:00
Weiming Shi
a54ecccfae rds: ib: reject FRMR registration before IB connection is established
rds_ib_get_mr() extracts the rds_ib_connection from conn->c_transport_data
and passes it to rds_ib_reg_frmr() for FRWR memory registration. On a
fresh outgoing connection, ic is allocated in rds_ib_conn_alloc() with
i_cm_id = NULL because the connection worker has not yet called
rds_ib_conn_path_connect() to create the rdma_cm_id. When sendmsg() with
RDS_CMSG_RDMA_MAP is called on such a connection, the sendmsg path parses
the control message before any connection establishment, allowing
rds_ib_post_reg_frmr() to dereference ic->i_cm_id->qp and crash the
kernel.

The existing guard in rds_ib_reg_frmr() only checks for !ic (added in
commit 9e630bcb77), which does not catch this case since ic is allocated
early and is always non-NULL once the connection object exists.

 KASAN: null-ptr-deref in range [0x0000000000000010-0x0000000000000017]
 RIP: 0010:rds_ib_post_reg_frmr+0x50e/0x920
 Call Trace:
  rds_ib_post_reg_frmr (net/rds/ib_frmr.c:167)
  rds_ib_map_frmr (net/rds/ib_frmr.c:252)
  rds_ib_reg_frmr (net/rds/ib_frmr.c:430)
  rds_ib_get_mr (net/rds/ib_rdma.c:615)
  __rds_rdma_map (net/rds/rdma.c:295)
  rds_cmsg_rdma_map (net/rds/rdma.c:860)
  rds_sendmsg (net/rds/send.c:1363)
  ____sys_sendmsg
  do_syscall_64

Add a check in rds_ib_get_mr() that verifies ic, i_cm_id, and qp are all
non-NULL before proceeding with FRMR registration, mirroring the guard
already present in rds_ib_post_inv(). Return -ENODEV when the connection
is not ready, which the existing error handling in rds_cmsg_send() converts
to -EAGAIN for userspace retry and triggers rds_conn_connect_if_down() to
start the connection worker.

Fixes: 1659185fb4 ("RDS: IB: Support Fastreg MR (FRMR) memory registration mode")
Reported-by: Xiang Mei <xmei5@asu.edu>
Signed-off-by: Weiming Shi <bestswngs@gmail.com>
Reviewed-by: Allison Henderson <achender@kernel.org>
Link: https://patch.msgid.link/20260330163237.2752440-2-bestswngs@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-01 17:52:40 -07:00
Hangbin Liu
ffb5a4843c ipv6: fix data race in fib6_metric_set() using cmpxchg
fib6_metric_set() may be called concurrently from softirq context without
holding the FIB table lock. A typical path is:

  ndisc_router_discovery()
    spin_unlock_bh(&table->tb6_lock)        <- lock released
    fib6_metric_set(rt, RTAX_HOPLIMIT, ...) <- lockless call

When two CPUs process Router Advertisement packets for the same router
simultaneously, they can both arrive at fib6_metric_set() with the same
fib6_info pointer whose fib6_metrics still points to dst_default_metrics.

  if (f6i->fib6_metrics == &dst_default_metrics) {   /* both CPUs: true */
      struct dst_metrics *p = kzalloc_obj(*p, GFP_ATOMIC);
      refcount_set(&p->refcnt, 1);
      f6i->fib6_metrics = p;   /* CPU1 overwrites CPU0's p -> p0 leaked */
  }

The dst_metrics allocated by the losing CPU has refcnt=1 but no pointer
to it anywhere in memory, producing a kmemleak report:

  unreferenced object 0xff1100025aca1400 (size 96):
    comm "softirq", pid 0, jiffies 4299271239
    backtrace:
      kmalloc_trace+0x28a/0x380
      fib6_metric_set+0xcd/0x180
      ndisc_router_discovery+0x12dc/0x24b0
      icmpv6_rcv+0xc16/0x1360

Fix this by:
 - Set val for p->metrics before published via cmpxchg() so the metrics
   value is ready before the pointer becomes visible to other CPUs.
 - Replace the plain pointer store with cmpxchg() and free the allocation
   safely when competition failed.
 - Add READ_ONCE()/WRITE_ONCE() for metrics[] setting in the non-default
   metrics path to prevent compiler-based data races.

Fixes: d4ead6b34b ("net/ipv6: move metrics from dst to rt6_info")
Reported-by: Fei Liu <feliu@redhat.com>
Reviewed-by: Jiayuan Chen <jiayuan.chen@linux.dev>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20260331-b4-fib6_metric_set-kmemleak-v3-1-88d27f4d8825@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-01 17:44:35 -07:00
hkbinbin
bc39a09473 Bluetooth: hci_sync: fix stack buffer overflow in hci_le_big_create_sync
hci_le_big_create_sync() uses DEFINE_FLEX to allocate a
struct hci_cp_le_big_create_sync on the stack with room for 0x11 (17)
BIS entries.  However, conn->num_bis can hold up to HCI_MAX_ISO_BIS (31)
entries — validated against ISO_MAX_NUM_BIS (0x1f) in the caller
hci_conn_big_create_sync().  When conn->num_bis is between 18 and 31,
the memcpy that copies conn->bis into cp->bis writes up to 14 bytes
past the stack buffer, corrupting adjacent stack memory.

This is trivially reproducible: binding an ISO socket with
bc_num_bis = ISO_MAX_NUM_BIS (31) and calling listen() will
eventually trigger hci_le_big_create_sync() from the HCI command
sync worker, causing a KASAN-detectable stack-out-of-bounds write:

  BUG: KASAN: stack-out-of-bounds in hci_le_big_create_sync+0x256/0x3b0
  Write of size 31 at addr ffffc90000487b48 by task kworker/u9:0/71

Fix this by changing the DEFINE_FLEX count from the incorrect 0x11 to
HCI_MAX_ISO_BIS, which matches the maximum number of BIS entries that
conn->bis can actually carry.

Fixes: 42ecf19471 ("Bluetooth: ISO: Do not emit LE BIG Create Sync if previous is pending")
Cc: stable@vger.kernel.org
Signed-off-by: hkbinbin <hkbinbinbin@gmail.com>
Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-04-01 16:48:28 -04:00
Oleh Konko
20756fec2f Bluetooth: SMP: derive legacy responder STK authentication from MITM state
The legacy responder path in smp_random() currently labels the stored
STK as authenticated whenever pending_sec_level is BT_SECURITY_HIGH.
That reflects what the local service requested, not what the pairing
flow actually achieved.

For Just Works/Confirm legacy pairing, SMP_FLAG_MITM_AUTH stays clear
and the resulting STK should remain unauthenticated even if the local
side requested HIGH security. Use the established MITM state when
storing the responder STK so the key metadata matches the pairing result.

This also keeps the legacy path aligned with the Secure Connections code,
which already treats JUST_WORKS/JUST_CFM as unauthenticated.

Fixes: fff3490f47 ("Bluetooth: Fix setting correct authentication information for SMP STK")
Cc: stable@vger.kernel.org
Signed-off-by: Oleh Konko <security@1seal.org>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-04-01 16:48:06 -04:00
Oleh Konko
d05111bfe3 Bluetooth: SMP: force responder MITM requirements before building the pairing response
smp_cmd_pairing_req() currently builds the pairing response from the
initiator auth_req before enforcing the local BT_SECURITY_HIGH
requirement. If the initiator omits SMP_AUTH_MITM, the response can
also omit it even though the local side still requires MITM.

tk_request() then sees an auth value without SMP_AUTH_MITM and may
select JUST_CFM, making method selection inconsistent with the pairing
policy the responder already enforces.

When the local side requires HIGH security, first verify that MITM can
be achieved from the IO capabilities and then force SMP_AUTH_MITM in the
response in both rsp.auth_req and auth. This keeps the responder auth bits
and later method selection aligned.

Fixes: 2b64d153a0 ("Bluetooth: Add MITM mechanism to LE-SMP")
Cc: stable@vger.kernel.org
Suggested-by: Luiz Augusto von Dentz <luiz.dentz@gmail.com>
Signed-off-by: Oleh Konko <security@1seal.org>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-04-01 16:47:43 -04:00
Keenan Dong
bda93eec78 Bluetooth: MGMT: validate mesh send advertising payload length
mesh_send() currently bounds MGMT_OP_MESH_SEND by total command
length, but it never verifies that the bytes supplied for the
flexible adv_data[] array actually match the embedded adv_data_len
field. MGMT_MESH_SEND_SIZE only covers the fixed header, so a
truncated command can still pass the existing 20..50 byte range
check and later drive the async mesh send path past the end of the
queued command buffer.

Keep rejecting zero-length and oversized advertising payloads, but
validate adv_data_len explicitly and require the command length to
exactly match the flexible array size before queueing the request.

Fixes: b338d91703 ("Bluetooth: Implement support for Mesh")
Reported-by: Keenan Dong <keenanat2000@gmail.com>
Signed-off-by: Keenan Dong <keenanat2000@gmail.com>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-04-01 16:47:19 -04:00
Pauli Virtanen
b255531b27 Bluetooth: hci_event: fix potential UAF in hci_le_remote_conn_param_req_evt
hci_conn lookup and field access must be covered by hdev lock in
hci_le_remote_conn_param_req_evt, otherwise it's possible it is freed
concurrently.

Extend the hci_dev_lock critical section to cover all conn usage.

Fixes: 95118dd4ed ("Bluetooth: hci_event: Use of a function table to handle LE subevents")
Signed-off-by: Pauli Virtanen <pav@iki.fi>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-04-01 16:46:55 -04:00
Pauli Virtanen
a2639a7f0f Bluetooth: hci_conn: fix potential UAF in set_cig_params_sync
hci_conn lookup and field access must be covered by hdev lock in
set_cig_params_sync, otherwise it's possible it is freed concurrently.

Take hdev lock to prevent hci_conn from being deleted or modified
concurrently.  Just RCU lock is not suitable here, as we also want to
avoid "tearing" in the configuration.

Fixes: a091289218 ("Bluetooth: hci_conn: Fix hci_le_set_cig_params")
Signed-off-by: Pauli Virtanen <pav@iki.fi>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-04-01 16:46:33 -04:00
Keenan Dong
b8dbe9648d Bluetooth: MGMT: validate LTK enc_size on load
Load Long Term Keys stores the user-provided enc_size and later uses
it to size fixed-size stack operations when replying to LE LTK
requests. An enc_size larger than the 16-byte key buffer can therefore
overflow the reply stack buffer.

Reject oversized enc_size values while validating the management LTK
record so invalid keys never reach the stored key state.

Fixes: 346af67b8d ("Bluetooth: Add MGMT handlers for dealing with SMP LTK's")
Reported-by: Keenan Dong <keenanat2000@gmail.com>
Signed-off-by: Keenan Dong <keenanat2000@gmail.com>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-04-01 16:46:09 -04:00
Jonathan Rissanen
0ffac654e9 Bluetooth: hci_h4: Fix race during initialization
Commit 5df5dafc17 ("Bluetooth: hci_uart: Fix another race during
initialization") fixed a race for hci commands sent during initialization.
However, there is still a race that happens if an hci event from one of
these commands is received before HCI_UART_REGISTERED has been set at
the end of hci_uart_register_dev(). The event will be ignored which
causes the command to fail with a timeout in the log:

"Bluetooth: hci0: command 0x1003 tx timeout"

This is because the hci event receive path (hci_uart_tty_receive ->
h4_recv) requires HCI_UART_REGISTERED to be set in h4_recv(), while the
hci command transmit path (hci_uart_send_frame -> h4_enqueue) only
requires HCI_UART_PROTO_INIT to be set in hci_uart_send_frame().

The check for HCI_UART_REGISTERED was originally added in commit
c257820291 ("Bluetooth: Fix H4 crash from incoming UART packets")
to fix a crash caused by hu->hdev being null dereferenced. That can no
longer happen: once HCI_UART_PROTO_INIT is set in hci_uart_register_dev()
all pointers (hu, hu->priv and hu->hdev) are valid, and
hci_uart_tty_receive() already calls h4_recv() on HCI_UART_PROTO_INIT
or HCI_UART_PROTO_READY.

Remove the check for HCI_UART_REGISTERED in h4_recv() to fix the race
condition.

Fixes: 5df5dafc17 ("Bluetooth: hci_uart: Fix another race during initialization")
Signed-off-by: Jonathan Rissanen <jonathan.rissanen@axis.com>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-04-01 16:45:47 -04:00
Luiz Augusto von Dentz
035c25007c Bluetooth: hci_sync: Fix UAF in le_read_features_complete
This fixes the following backtrace caused by hci_conn being freed
before le_read_features_complete but after
hci_le_read_remote_features_sync so hci_conn_del -> hci_cmd_sync_dequeue
is not able to prevent it:

==================================================================
BUG: KASAN: slab-use-after-free in instrument_atomic_read_write include/linux/instrumented.h:96 [inline]
BUG: KASAN: slab-use-after-free in atomic_dec_and_test include/linux/atomic/atomic-instrumented.h:1383 [inline]
BUG: KASAN: slab-use-after-free in hci_conn_drop include/net/bluetooth/hci_core.h:1688 [inline]
BUG: KASAN: slab-use-after-free in le_read_features_complete+0x5b/0x340 net/bluetooth/hci_sync.c:7344
Write of size 4 at addr ffff8880796b0010 by task kworker/u9:0/52

CPU: 0 UID: 0 PID: 52 Comm: kworker/u9:0 Not tainted syzkaller #0 PREEMPT(full)
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/25/2025
Workqueue: hci0 hci_cmd_sync_work
Call Trace:
 <TASK>
 __dump_stack lib/dump_stack.c:94 [inline]
 dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:120
 print_address_description mm/kasan/report.c:378 [inline]
 print_report+0xcd/0x630 mm/kasan/report.c:482
 kasan_report+0xe0/0x110 mm/kasan/report.c:595
 check_region_inline mm/kasan/generic.c:194 [inline]
 kasan_check_range+0x100/0x1b0 mm/kasan/generic.c:200
 instrument_atomic_read_write include/linux/instrumented.h:96 [inline]
 atomic_dec_and_test include/linux/atomic/atomic-instrumented.h:1383 [inline]
 hci_conn_drop include/net/bluetooth/hci_core.h:1688 [inline]
 le_read_features_complete+0x5b/0x340 net/bluetooth/hci_sync.c:7344
 hci_cmd_sync_work+0x1ff/0x430 net/bluetooth/hci_sync.c:334
 process_one_work+0x9ba/0x1b20 kernel/workqueue.c:3257
 process_scheduled_works kernel/workqueue.c:3340 [inline]
 worker_thread+0x6c8/0xf10 kernel/workqueue.c:3421
 kthread+0x3c5/0x780 kernel/kthread.c:463
 ret_from_fork+0x983/0xb10 arch/x86/kernel/process.c:158
 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:246
 </TASK>

Allocated by task 5932:
 kasan_save_stack+0x33/0x60 mm/kasan/common.c:56
 kasan_save_track+0x14/0x30 mm/kasan/common.c:77
 poison_kmalloc_redzone mm/kasan/common.c:400 [inline]
 __kasan_kmalloc+0xaa/0xb0 mm/kasan/common.c:417
 kmalloc_noprof include/linux/slab.h:957 [inline]
 kzalloc_noprof include/linux/slab.h:1094 [inline]
 __hci_conn_add+0xf8/0x1c70 net/bluetooth/hci_conn.c:963
 hci_conn_add_unset+0x76/0x100 net/bluetooth/hci_conn.c:1084
 le_conn_complete_evt+0x639/0x1f20 net/bluetooth/hci_event.c:5714
 hci_le_enh_conn_complete_evt+0x23d/0x380 net/bluetooth/hci_event.c:5861
 hci_le_meta_evt+0x357/0x5e0 net/bluetooth/hci_event.c:7408
 hci_event_func net/bluetooth/hci_event.c:7716 [inline]
 hci_event_packet+0x685/0x11c0 net/bluetooth/hci_event.c:7773
 hci_rx_work+0x2c9/0xeb0 net/bluetooth/hci_core.c:4076
 process_one_work+0x9ba/0x1b20 kernel/workqueue.c:3257
 process_scheduled_works kernel/workqueue.c:3340 [inline]
 worker_thread+0x6c8/0xf10 kernel/workqueue.c:3421
 kthread+0x3c5/0x780 kernel/kthread.c:463
 ret_from_fork+0x983/0xb10 arch/x86/kernel/process.c:158
 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:246

Freed by task 5932:
 kasan_save_stack+0x33/0x60 mm/kasan/common.c:56
 kasan_save_track+0x14/0x30 mm/kasan/common.c:77
 __kasan_save_free_info+0x3b/0x60 mm/kasan/generic.c:587
 kasan_save_free_info mm/kasan/kasan.h:406 [inline]
 poison_slab_object mm/kasan/common.c:252 [inline]
 __kasan_slab_free+0x5f/0x80 mm/kasan/common.c:284
 kasan_slab_free include/linux/kasan.h:234 [inline]
 slab_free_hook mm/slub.c:2540 [inline]
 slab_free mm/slub.c:6663 [inline]
 kfree+0x2f8/0x6e0 mm/slub.c:6871
 device_release+0xa4/0x240 drivers/base/core.c:2565
 kobject_cleanup lib/kobject.c:689 [inline]
 kobject_release lib/kobject.c:720 [inline]
 kref_put include/linux/kref.h:65 [inline]
 kobject_put+0x1e7/0x590 lib/kobject.c:737
 put_device drivers/base/core.c:3797 [inline]
 device_unregister+0x2f/0xc0 drivers/base/core.c:3920
 hci_conn_del_sysfs+0xb4/0x180 net/bluetooth/hci_sysfs.c:79
 hci_conn_cleanup net/bluetooth/hci_conn.c:173 [inline]
 hci_conn_del+0x657/0x1180 net/bluetooth/hci_conn.c:1234
 hci_disconn_complete_evt+0x410/0xa00 net/bluetooth/hci_event.c:3451
 hci_event_func net/bluetooth/hci_event.c:7719 [inline]
 hci_event_packet+0xa10/0x11c0 net/bluetooth/hci_event.c:7773
 hci_rx_work+0x2c9/0xeb0 net/bluetooth/hci_core.c:4076
 process_one_work+0x9ba/0x1b20 kernel/workqueue.c:3257
 process_scheduled_works kernel/workqueue.c:3340 [inline]
 worker_thread+0x6c8/0xf10 kernel/workqueue.c:3421
 kthread+0x3c5/0x780 kernel/kthread.c:463
 ret_from_fork+0x983/0xb10 arch/x86/kernel/process.c:158
 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:246

The buggy address belongs to the object at ffff8880796b0000
 which belongs to the cache kmalloc-8k of size 8192
The buggy address is located 16 bytes inside of
 freed 8192-byte region [ffff8880796b0000, ffff8880796b2000)

The buggy address belongs to the physical page:
page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x796b0
head: order:3 mapcount:0 entire_mapcount:0 nr_pages_mapped:0 pincount:0
anon flags: 0xfff00000000040(head|node=0|zone=1|lastcpupid=0x7ff)
page_type: f5(slab)
raw: 00fff00000000040 ffff88813ff27280 0000000000000000 0000000000000001
raw: 0000000000000000 0000000000020002 00000000f5000000 0000000000000000
head: 00fff00000000040 ffff88813ff27280 0000000000000000 0000000000000001
head: 0000000000000000 0000000000020002 00000000f5000000 0000000000000000
head: 00fff00000000003 ffffea0001e5ac01 00000000ffffffff 00000000ffffffff
head: ffffffffffffffff 0000000000000000 00000000ffffffff 0000000000000008
page dumped because: kasan: bad access detected
page_owner tracks the page as allocated
page last allocated via order 3, migratetype Unmovable, gfp_mask 0xd2040(__GFP_IO|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC), pid 5657, tgid 5657 (dhcpcd-run-hook), ts 79819636908, free_ts 79814310558
 set_page_owner include/linux/page_owner.h:32 [inline]
 post_alloc_hook+0x1af/0x220 mm/page_alloc.c:1845
 prep_new_page mm/page_alloc.c:1853 [inline]
 get_page_from_freelist+0xd0b/0x31a0 mm/page_alloc.c:3879
 __alloc_frozen_pages_noprof+0x25f/0x2440 mm/page_alloc.c:5183
 alloc_pages_mpol+0x1fb/0x550 mm/mempolicy.c:2416
 alloc_slab_page mm/slub.c:3075 [inline]
 allocate_slab mm/slub.c:3248 [inline]
 new_slab+0x2c3/0x430 mm/slub.c:3302
 ___slab_alloc+0xe18/0x1c90 mm/slub.c:4651
 __slab_alloc.constprop.0+0x63/0x110 mm/slub.c:4774
 __slab_alloc_node mm/slub.c:4850 [inline]
 slab_alloc_node mm/slub.c:5246 [inline]
 __kmalloc_cache_noprof+0x477/0x800 mm/slub.c:5766
 kmalloc_noprof include/linux/slab.h:957 [inline]
 kzalloc_noprof include/linux/slab.h:1094 [inline]
 tomoyo_print_bprm security/tomoyo/audit.c:26 [inline]
 tomoyo_init_log+0xc8a/0x2140 security/tomoyo/audit.c:264
 tomoyo_supervisor+0x302/0x13b0 security/tomoyo/common.c:2198
 tomoyo_audit_env_log security/tomoyo/environ.c:36 [inline]
 tomoyo_env_perm+0x191/0x200 security/tomoyo/environ.c:63
 tomoyo_environ security/tomoyo/domain.c:672 [inline]
 tomoyo_find_next_domain+0xec1/0x20b0 security/tomoyo/domain.c:888
 tomoyo_bprm_check_security security/tomoyo/tomoyo.c:102 [inline]
 tomoyo_bprm_check_security+0x12d/0x1d0 security/tomoyo/tomoyo.c:92
 security_bprm_check+0x1b9/0x1e0 security/security.c:794
 search_binary_handler fs/exec.c:1659 [inline]
 exec_binprm fs/exec.c:1701 [inline]
 bprm_execve fs/exec.c:1753 [inline]
 bprm_execve+0x81e/0x1620 fs/exec.c:1729
 do_execveat_common.isra.0+0x4a5/0x610 fs/exec.c:1859
page last free pid 5657 tgid 5657 stack trace:
 reset_page_owner include/linux/page_owner.h:25 [inline]
 free_pages_prepare mm/page_alloc.c:1394 [inline]
 __free_frozen_pages+0x7df/0x1160 mm/page_alloc.c:2901
 discard_slab mm/slub.c:3346 [inline]
 __put_partials+0x130/0x170 mm/slub.c:3886
 qlink_free mm/kasan/quarantine.c:163 [inline]
 qlist_free_all+0x4c/0xf0 mm/kasan/quarantine.c:179
 kasan_quarantine_reduce+0x195/0x1e0 mm/kasan/quarantine.c:286
 __kasan_slab_alloc+0x69/0x90 mm/kasan/common.c:352
 kasan_slab_alloc include/linux/kasan.h:252 [inline]
 slab_post_alloc_hook mm/slub.c:4948 [inline]
 slab_alloc_node mm/slub.c:5258 [inline]
 __kmalloc_cache_noprof+0x274/0x800 mm/slub.c:5766
 kmalloc_noprof include/linux/slab.h:957 [inline]
 tomoyo_print_header security/tomoyo/audit.c:156 [inline]
 tomoyo_init_log+0x197/0x2140 security/tomoyo/audit.c:255
 tomoyo_supervisor+0x302/0x13b0 security/tomoyo/common.c:2198
 tomoyo_audit_env_log security/tomoyo/environ.c:36 [inline]
 tomoyo_env_perm+0x191/0x200 security/tomoyo/environ.c:63
 tomoyo_environ security/tomoyo/domain.c:672 [inline]
 tomoyo_find_next_domain+0xec1/0x20b0 security/tomoyo/domain.c:888
 tomoyo_bprm_check_security security/tomoyo/tomoyo.c:102 [inline]
 tomoyo_bprm_check_security+0x12d/0x1d0 security/tomoyo/tomoyo.c:92
 security_bprm_check+0x1b9/0x1e0 security/security.c:794
 search_binary_handler fs/exec.c:1659 [inline]
 exec_binprm fs/exec.c:1701 [inline]
 bprm_execve fs/exec.c:1753 [inline]
 bprm_execve+0x81e/0x1620 fs/exec.c:1729
 do_execveat_common.isra.0+0x4a5/0x610 fs/exec.c:1859
 do_execve fs/exec.c:1933 [inline]
 __do_sys_execve fs/exec.c:2009 [inline]
 __se_sys_execve fs/exec.c:2004 [inline]
 __x64_sys_execve+0x8e/0xb0 fs/exec.c:2004
 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
 do_syscall_64+0xcd/0xf80 arch/x86/entry/syscall_64.c:94

Memory state around the buggy address:
 ffff8880796aff00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
 ffff8880796aff80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
>ffff8880796b0000: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
                         ^
 ffff8880796b0080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
 ffff8880796b0100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
==================================================================

Fixes: a106e50be7 ("Bluetooth: HCI: Add support for LL Extended Feature Set")
Reported-by: syzbot+87badbb9094e008e0685@syzkaller.appspotmail.com
Tested-by: syzbot+87badbb9094e008e0685@syzkaller.appspotmail.com
Closes: https://syzbot.org/bug?extid=87badbb9094e008e0685
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Pauli Virtanen <pav@iki.fi>
2026-04-01 16:45:24 -04:00
Pauli Virtanen
aca377208e Bluetooth: hci_sync: fix leaks when hci_cmd_sync_queue_once fails
When hci_cmd_sync_queue_once() returns with error, the destroy callback
will not be called.

Fix leaking references / memory on these failures.

Signed-off-by: Pauli Virtanen <pav@iki.fi>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-04-01 16:45:00 -04:00
Pauli Virtanen
2969554bcf Bluetooth: hci_sync: hci_cmd_sync_queue_once() return -EEXIST if exists
hci_cmd_sync_queue_once() needs to indicate whether a queue item was
added, so caller can know if callbacks are called, so it can avoid
leaking resources.

Change the function to return -EEXIST if queue item already exists.

Modify all callsites to handle that.

Signed-off-by: Pauli Virtanen <pav@iki.fi>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-04-01 16:44:38 -04:00
Oleh Konko
2b2bf47cd7 Bluetooth: hci_event: move wake reason storage into validated event handlers
hci_store_wake_reason() is called from hci_event_packet() immediately
after stripping the HCI event header but before hci_event_func()
enforces the per-event minimum payload length from hci_ev_table.
This means a short HCI event frame can reach bacpy() before any bounds
check runs.

Rather than duplicating skb parsing and per-event length checks inside
hci_store_wake_reason(), move wake-address storage into the individual
event handlers after their existing event-length validation has
succeeded. Convert hci_store_wake_reason() into a small helper that only
stores an already-validated bdaddr while the caller holds hci_dev_lock().
Use the same helper after hci_event_func() with a NULL address to
preserve the existing unexpected-wake fallback semantics when no
validated event handler records a wake address.

Annotate the helper with __must_hold(&hdev->lock) and add
lockdep_assert_held(&hdev->lock) so future call paths keep the lock
contract explicit.

Call the helper from hci_conn_request_evt(), hci_conn_complete_evt(),
hci_sync_conn_complete_evt(), le_conn_complete_evt(),
hci_le_adv_report_evt(), hci_le_ext_adv_report_evt(),
hci_le_direct_adv_report_evt(), hci_le_pa_sync_established_evt(), and
hci_le_past_received_evt().

Fixes: 2f20216c1d ("Bluetooth: Emit controller suspend and resume events")
Cc: stable@vger.kernel.org
Signed-off-by: Oleh Konko <security@1seal.org>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-04-01 16:44:15 -04:00
Cen Zhang
8a5b0135d4 Bluetooth: SCO: fix race conditions in sco_sock_connect()
sco_sock_connect() checks sk_state and sk_type without holding
the socket lock. Two concurrent connect() syscalls on the same
socket can both pass the check and enter sco_connect(), leading
to use-after-free.

The buggy scenario involves three participants and was confirmed
with additional logging instrumentation:

  Thread A (connect):    HCI disconnect:      Thread B (connect):

  sco_sock_connect(sk)                        sco_sock_connect(sk)
  sk_state==BT_OPEN                           sk_state==BT_OPEN
  (pass, no lock)                             (pass, no lock)
  sco_connect(sk):                            sco_connect(sk):
    hci_dev_lock                                hci_dev_lock
    hci_connect_sco                               <- blocked
      -> hcon1
    sco_conn_add->conn1
    lock_sock(sk)
    sco_chan_add:
      conn1->sk = sk
      sk->conn = conn1
    sk_state=BT_CONNECT
    release_sock
    hci_dev_unlock
                           hci_dev_lock
                           sco_conn_del:
                             lock_sock(sk)
                             sco_chan_del:
                               sk->conn=NULL
                               conn1->sk=NULL
                               sk_state=
                                 BT_CLOSED
                               SOCK_ZAPPED
                             release_sock
                           hci_dev_unlock
                                                  (unblocked)
                                                  hci_connect_sco
                                                    -> hcon2
                                                  sco_conn_add
                                                    -> conn2
                                                  lock_sock(sk)
                                                  sco_chan_add:
                                                    sk->conn=conn2
                                                  sk_state=
                                                    BT_CONNECT
                                                  // zombie sk!
                                                  release_sock
                                                  hci_dev_unlock

Thread B revives a BT_CLOSED + SOCK_ZAPPED socket back to
BT_CONNECT. Subsequent cleanup triggers double sock_put() and
use-after-free. Meanwhile conn1 is leaked as it was orphaned
when sco_conn_del() cleared the association.

Fix this by:
- Moving lock_sock() before the sk_state/sk_type checks in
  sco_sock_connect() to serialize concurrent connect attempts
- Fixing the sk_type != SOCK_SEQPACKET check to actually
  return the error instead of just assigning it
- Adding a state re-check in sco_connect() after lock_sock()
  to catch state changes during the window between the locks
- Adding sco_pi(sk)->conn check in sco_chan_add() to prevent
  double-attach of a socket to multiple connections
- Adding hci_conn_drop() on sco_chan_add failure to prevent
  HCI connection leaks

Fixes: 9a8ec9e8eb ("Bluetooth: SCO: Fix possible circular locking dependency on sco_connect_cfm")
Signed-off-by: Cen Zhang <zzzccc427@gmail.com>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-04-01 16:43:53 -04:00
Pauli Virtanen
a834a0b66e Bluetooth: hci_sync: call destroy in hci_cmd_sync_run if immediate
hci_cmd_sync_run() may run the work immediately if called from existing
sync work (otherwise it queues a new sync work). In this case it fails
to call the destroy() function.

On immediate run, make it behave same way as if item was queued
successfully: call destroy, and return 0.

The only callsite is hci_abort_conn() via hci_cmd_sync_run_once(), and
this changes its return value. However, its return value is not used
except as the return value for hci_disconnect(), and nothing uses the
return value of hci_disconnect(). Hence there should be no behavior
change anywhere.

Fixes: c898f6d7b0 ("Bluetooth: hci_sync: Introduce hci_cmd_sync_run/hci_cmd_sync_run_once")
Signed-off-by: Pauli Virtanen <pav@iki.fi>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-04-01 16:39:43 -04:00
Simon Trimmer
e74c38ef6f ASoC: amd: ps: Fix missing leading zeros in subsystem_device SSID log
Ensure that subsystem_device is printed with leading zeros when combined
with subsystem_vendor to form the SSID. Without this, devices with upper
bits unset may appear to have an incorrect SSID in the debug output.

Signed-off-by: Simon Trimmer <simont@opensource.cirrus.com>
Link: https://patch.msgid.link/20260331131916.145546-1-simont@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-04-01 13:08:47 +01:00
Pablo Neira Ayuso
da107398cb netfilter: nf_tables: reject immediate NF_QUEUE verdict
nft_queue is always used from userspace nftables to deliver the NF_QUEUE
verdict. Immediately emitting an NF_QUEUE verdict is never used by the
userspace nft tools, so reject immediate NF_QUEUE verdicts.

The arp family does not provide queue support, but such an immediate
verdict is still reachable. Globally reject NF_QUEUE immediate verdicts
to address this issue.

Fixes: f342de4e2f ("netfilter: nf_tables: reject QUEUE/DROP verdict parameters")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2026-04-01 11:55:30 +02:00
Pablo Neira Ayuso
3d5d488f11 netfilter: x_tables: restrict xt_check_match/xt_check_target extensions for NFPROTO_ARP
Weiming Shi says:

xt_match and xt_target structs registered with NFPROTO_UNSPEC can be
loaded by any protocol family through nft_compat. When such a
match/target sets .hooks to restrict which hooks it may run on, the
bitmask uses NF_INET_* constants. This is only correct for families
whose hook layout matches NF_INET_*: IPv4, IPv6, INET, and bridge
all share the same five hooks (PRE_ROUTING ... POST_ROUTING).

ARP only has three hooks (IN=0, OUT=1, FORWARD=2) with different
semantics. Because NF_ARP_OUT == 1 == NF_INET_LOCAL_IN, the .hooks
validation silently passes for the wrong reasons, allowing matches to
run on ARP chains where the hook assumptions (e.g. state->in being
set on input hooks) do not hold. This leads to NULL pointer
dereferences; xt_devgroup is one concrete example:

 Oops: general protection fault, probably for non-canonical address 0xdffffc0000000044: 0000 [#1] SMP KASAN NOPTI
 KASAN: null-ptr-deref in range [0x0000000000000220-0x0000000000000227]
 RIP: 0010:devgroup_mt+0xff/0x350
 Call Trace:
  <TASK>
  nft_match_eval (net/netfilter/nft_compat.c:407)
  nft_do_chain (net/netfilter/nf_tables_core.c:285)
  nft_do_chain_arp (net/netfilter/nft_chain_filter.c:61)
  nf_hook_slow (net/netfilter/core.c:623)
  arp_xmit (net/ipv4/arp.c:666)
  </TASK>
 Kernel panic - not syncing: Fatal exception in interrupt

Fix it by restricting arptables to NFPROTO_ARP extensions only.
Note that arptables-legacy only supports:

- arpt_CLASSIFY
- arpt_mangle
- arpt_MARK

that provide explicit NFPROTO_ARP match/target declarations.

Fixes: 9291747f11 ("netfilter: xtables: add device group match")
Reported-by: Xiang Mei <xmei5@asu.edu>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2026-04-01 11:55:29 +02:00
Yifan Wu
9862ef9ab0 netfilter: ipset: drop logically empty buckets in mtype_del
mtype_del() counts empty slots below n->pos in k, but it only drops the
bucket when both n->pos and k are zero. This misses buckets whose live
entries have all been removed while n->pos still points past deleted slots.

Treat a bucket as empty when all positions below n->pos are unused and
release it directly instead of shrinking it further.

Fixes: 8af1c6fbd9 ("netfilter: ipset: Fix forceadd evaluation path")
Cc: stable@vger.kernel.org
Reported-by: Juefei Pu <tomapufckgml@gmail.com>
Reported-by: Xin Liu <dstsmallbird@foxmail.com>
Signed-off-by: Yifan Wu <yifanwucs@gmail.com>
Co-developed-by: Yuan Tan <yuantan098@gmail.com>
Signed-off-by: Yuan Tan <yuantan098@gmail.com>
Reviewed-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2026-04-01 11:55:29 +02:00
Pablo Neira Ayuso
917b61fa20 netfilter: ctnetlink: ignore explicit helper on new expectations
Use the existing master conntrack helper, anything else is not really
supported and it just makes validation more complicated, so just ignore
what helper userspace suggests for this expectation.

This was uncovered when validating CTA_EXPECT_CLASS via different helper
provided by userspace than the existing master conntrack helper:

  BUG: KASAN: slab-out-of-bounds in nf_ct_expect_related_report+0x2479/0x27c0
  Read of size 4 at addr ffff8880043fe408 by task poc/102
  Call Trace:
   nf_ct_expect_related_report+0x2479/0x27c0
   ctnetlink_create_expect+0x22b/0x3b0
   ctnetlink_new_expect+0x4bd/0x5c0
   nfnetlink_rcv_msg+0x67a/0x950
   netlink_rcv_skb+0x120/0x350

Allowing to read kernel memory bytes off the expectation boundary.

CTA_EXPECT_HELP_NAME is still used to offer the helper name to userspace
via netlink dump.

Fixes: bd07793705 ("netfilter: nfnetlink_queue: allow to attach expectations to conntracks")
Reported-by: Qi Tang <tpluszz77@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2026-04-01 11:55:29 +02:00
Qi Tang
35177c6877 netfilter: ctnetlink: zero expect NAT fields when CTA_EXPECT_NAT absent
ctnetlink_alloc_expect() allocates expectations from a non-zeroing
slab cache via nf_ct_expect_alloc().  When CTA_EXPECT_NAT is not
present in the netlink message, saved_addr and saved_proto are
never initialized.  Stale data from a previous slab occupant can
then be dumped to userspace by ctnetlink_exp_dump_expect(), which
checks these fields to decide whether to emit CTA_EXPECT_NAT.

The safe sibling nf_ct_expect_init(), used by the packet path,
explicitly zeroes these fields.

Zero saved_addr, saved_proto and dir in the else branch, guarded
by IS_ENABLED(CONFIG_NF_NAT) since these fields only exist when
NAT is enabled.

Confirmed by priming the expect slab with NAT-bearing expectations,
freeing them, creating a new expectation without CTA_EXPECT_NAT,
and observing that the ctnetlink dump emits a spurious
CTA_EXPECT_NAT containing stale data from the prior allocation.

Fixes: 076a0ca026 ("netfilter: ctnetlink: add NAT support for expectations")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Qi Tang <tpluszz77@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2026-04-01 11:55:29 +02:00
Qi Tang
a242a9ae58 netfilter: nf_conntrack_helper: pass helper to expect cleanup
nf_conntrack_helper_unregister() calls nf_ct_expect_iterate_destroy()
to remove expectations belonging to the helper being unregistered.
However, it passes NULL instead of the helper pointer as the data
argument, so expect_iter_me() never matches any expectation and all
of them survive the cleanup.

After unregister returns, nfnl_cthelper_del() frees the helper
object immediately.  Subsequent expectation dumps or packet-driven
init_conntrack() calls then dereference the freed exp->helper,
causing a use-after-free.

Pass the actual helper pointer so expectations referencing it are
properly destroyed before the helper object is freed.

  BUG: KASAN: slab-use-after-free in string+0x38f/0x430
  Read of size 1 at addr ffff888003b14d20 by task poc/103
  Call Trace:
   string+0x38f/0x430
   vsnprintf+0x3cc/0x1170
   seq_printf+0x17a/0x240
   exp_seq_show+0x2e5/0x560
   seq_read_iter+0x419/0x1280
   proc_reg_read+0x1ac/0x270
   vfs_read+0x179/0x930
   ksys_read+0xef/0x1c0
  Freed by task 103:
  The buggy address is located 32 bytes inside of
   freed 192-byte region [ffff888003b14d00, ffff888003b14dc0)

Fixes: ac7b848390 ("netfilter: expect: add and use nf_ct_expect_iterate helpers")
Signed-off-by: Qi Tang <tpluszz77@gmail.com>
Reviewed-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2026-04-01 11:55:29 +02:00
Florian Westphal
b7e8590987 netfilter: ipset: use nla_strcmp for IPSET_ATTR_NAME attr
IPSET_ATTR_NAME and IPSET_ATTR_NAMEREF are of NLA_STRING type, they
cannot be treated like a c-string.

They either have to be switched to NLA_NUL_STRING, or the compare
operations need to use the nla functions.

Fixes: f830837f0e ("netfilter: ipset: list:set set type support")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2026-04-01 11:55:29 +02:00
Florian Westphal
a958a4f90d netfilter: x_tables: ensure names are nul-terminated
Reject names that lack a \0 character before feeding them
to functions that expect c-strings.

Fixes tag is the most recent commit that needs this change.

Fixes: c38c4597e4 ("netfilter: implement xt_cgroup cgroup2 path match")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2026-04-01 11:55:29 +02:00
Florian Westphal
6d52a4a052 netfilter: nfnetlink_log: account for netlink header size
This is a followup to an old bug fix: NLMSG_DONE needs to account
for the netlink header size, not just the attribute size.

This can result in a WARN splat + drop of the netlink message,
but other than this there are no ill effects.

Fixes: 9dfa1dfe4d ("netfilter: nf_log: account for size of NLMSG_DONE attribute")
Reported-by: Yiming Qian <yimingqian591@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2026-04-01 11:55:29 +02:00
Pablo Neira Ayuso
76522fcdbc netfilter: flowtable: strictly check for maximum number of actions
The maximum number of flowtable hardware offload actions in IPv6 is:

* ethernet mangling (4 payload actions, 2 for each ethernet address)
* SNAT (4 payload actions)
* DNAT (4 payload actions)
* Double VLAN (4 vlan actions, 2 for popping vlan, and 2 for pushing)
  for QinQ.
* Redirect (1 action)

Which makes 17, while the maximum is 16. But act_ct supports for tunnels
actions too. Note that payload action operates at 32-bit word level, so
mangling an IPv6 address takes 4 payload actions.

Update flow_action_entry_next() calls to check for the maximum number of
supported actions.

While at it, rise the maximum number of actions per flow from 16 to 24
so this works fine with IPv6 setups.

Fixes: c29f74e0df ("netfilter: nf_flow_table: hardware offload support")
Reported-by: Hyunwoo Kim <imv4bel@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2026-04-01 11:50:14 +02:00
Geoffrey D. Bennett
a0dafdbd10 ALSA: usb-audio: Exclude Scarlett 2i2 1st Gen (8016) from SKIP_IFACE_SETUP
Same issue as the other 1st Gen Scarletts: QUIRK_FLAG_SKIP_IFACE_SETUP
causes distorted audio on this revision of the Scarlett 2i2 1st Gen
(1235:8016).

Fixes: 38c322068a ("ALSA: usb-audio: Add QUIRK_FLAG_SKIP_IFACE_SETUP")
Reported-by: lukas-reineke [https://github.com/geoffreybennett/linux-fcp/issues/54]
Signed-off-by: Geoffrey D. Bennett <g@b4.vu>
Link: https://patch.msgid.link/acytr8aEUba4VXmZ@m.b4.vu
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-04-01 09:43:21 +02:00
Michal Piekos
48b3cd6926 net: stmmac: skip VLAN restore when VLAN hash ops are missing
stmmac_vlan_restore() unconditionally calls stmmac_vlan_update() when
NETIF_F_VLAN_FEATURES is set. On platforms where priv->hw->vlan (or
->update_vlan_hash) is not provided, stmmac_update_vlan_hash() returns
-EINVAL via stmmac_do_void_callback(), resulting in a spurious
"Failed to restore VLANs" error even when no VLAN filtering is in use.

Remove not needed comment.
Remove not used return value from stmmac_vlan_restore().

Tested on Orange Pi Zero 3.

Fixes: bd7ad51253 ("net: stmmac: Fix VLAN HW state restore")
Signed-off-by: Michal Piekos <michal.piekos@mmpsystems.pl>
Link: https://patch.msgid.link/20260328-vlan-restore-error-v4-1-f88624c530dc@mmpsystems.pl
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-31 19:45:26 -07:00
Yufan Chen
c0fd0fe745 net: ftgmac100: fix ring allocation unwind on open failure
ftgmac100_alloc_rings() allocates rx_skbs, tx_skbs, rxdes, txdes, and
rx_scratch in stages. On intermediate failures it returned -ENOMEM
directly, leaking resources allocated earlier in the function.

Rework the failure path to use staged local unwind labels and free
allocated resources in reverse order before returning -ENOMEM. This
matches common netdev allocation cleanup style.

Fixes: d72e01a043 ("ftgmac100: Use a scratch buffer for failed RX allocations")
Cc: stable@vger.kernel.org
Signed-off-by: Yufan Chen <yufan.chen@linux.dev>
Link: https://patch.msgid.link/20260328163257.60836-1-yufan.chen@linux.dev
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-31 19:38:36 -07:00
Li Xiasong
5dd8025a49 mptcp: fix soft lockup in mptcp_recvmsg()
syzbot reported a soft lockup in mptcp_recvmsg() [0].

When receiving data with MSG_PEEK | MSG_WAITALL flags, the skb is not
removed from the sk_receive_queue. This causes sk_wait_data() to always
find available data and never perform actual waiting, leading to a soft
lockup.

Fix this by adding a 'last' parameter to track the last peeked skb.
This allows sk_wait_data() to make informed waiting decisions and prevent
infinite loops when MSG_PEEK is used.

[0]:
watchdog: BUG: soft lockup - CPU#2 stuck for 156s! [server:1963]
Modules linked in:
CPU: 2 UID: 0 PID: 1963 Comm: server Not tainted 6.19.0-rc8 #61 PREEMPT(none)
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
RIP: 0010:sk_wait_data+0x15/0x190
Code: 80 00 00 00 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 41 56 41 55 41 54 49 89 f4 55 48 89 d5 53 48 89 fb <48> 83 ec 30 65 48 8b 05 17 a4 6b 01 48 89 44 24 28 31 c0 65 48 8b
RSP: 0018:ffffc90000603ca0 EFLAGS: 00000246
RAX: 0000000000000000 RBX: ffff888102bf0800 RCX: 0000000000000001
RDX: 0000000000000000 RSI: ffffc90000603d18 RDI: ffff888102bf0800
RBP: 0000000000000000 R08: 0000000000000002 R09: 0000000000000101
R10: 0000000000000000 R11: 0000000000000075 R12: ffffc90000603d18
R13: ffff888102bf0800 R14: ffff888102bf0800 R15: 0000000000000000
FS:  00007f6e38b8c4c0(0000) GS:ffff8881b877e000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 000055aa7bff1680 CR3: 0000000105cbe000 CR4: 00000000000006f0
Call Trace:
 <TASK>
 mptcp_recvmsg+0x547/0x8c0 net/mptcp/protocol.c:2329
 inet_recvmsg+0x11f/0x130 net/ipv4/af_inet.c:891
 sock_recvmsg+0x94/0xc0 net/socket.c:1100
 __sys_recvfrom+0xb2/0x130 net/socket.c:2256
 __x64_sys_recvfrom+0x1f/0x30 net/socket.c:2267
 do_syscall_64+0x59/0x2d0 arch/x86/entry/syscall_64.c:94
 entry_SYSCALL_64_after_hwframe+0x76/0x7e arch/x86/entry/entry_64.S:131
RIP: 0033:0x7f6e386a4a1d
Code: 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 8d 05 f1 de 2c 00 41 89 ca 8b 00 85 c0 75 20 45 31 c9 45 31 c0 b8 2d 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 6b f3 c3 66 0f 1f 84 00 00 00 00 00 41 56 41
RSP: 002b:00007ffc3c4bb078 EFLAGS: 00000246 ORIG_RAX: 000000000000002d
RAX: ffffffffffffffda RBX: 000000000000861e RCX: 00007f6e386a4a1d
RDX: 00000000000003ff RSI: 00007ffc3c4bb150 RDI: 0000000000000004
RBP: 00007ffc3c4bb570 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000103 R11: 0000000000000246 R12: 00005605dbc00be0
R13: 00007ffc3c4bb650 R14: 0000000000000000 R15: 0000000000000000
 </TASK>

Fixes: 8e04ce45a8 ("mptcp: fix MSG_PEEK stream corruption")
Signed-off-by: Li Xiasong <lixiasong1@huawei.com>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20260330120335.659027-1-lixiasong1@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-31 18:58:37 -07:00
Zhengchuan Liang
9ca562bb8e net: ipv6: flowlabel: defer exclusive option free until RCU teardown
`ip6fl_seq_show()` walks the global flowlabel hash under the seq-file
RCU read-side lock and prints `fl->opt->opt_nflen` when an option block
is present.

Exclusive flowlabels currently free `fl->opt` as soon as `fl->users`
drops to zero in `fl_release()`. However, the surrounding
`struct ip6_flowlabel` remains visible in the global hash table until
later garbage collection removes it and `fl_free_rcu()` finally tears it
down.

A concurrent `/proc/net/ip6_flowlabel` reader can therefore race that
early `kfree()` and dereference freed option state, triggering a crash
in `ip6fl_seq_show()`.

Fix this by keeping `fl->opt` alive until `fl_free_rcu()`. That matches
the lifetime already required for the enclosing flowlabel while readers
can still reach it under RCU.

Fixes: d3aedd5ebd ("ipv6 flowlabel: Convert hash list to RCU.")
Reported-by: Yifan Wu <yifanwucs@gmail.com>
Reported-by: Juefei Pu <tomapufckgml@gmail.com>
Co-developed-by: Yuan Tan <yuantan098@gmail.com>
Signed-off-by: Yuan Tan <yuantan098@gmail.com>
Suggested-by: Xin Liu <bird@lzu.edu.cn>
Tested-by: Ren Wei <enjou1224z@gmail.com>
Signed-off-by: Zhengchuan Liang <zcliangcn@gmail.com>
Signed-off-by: Ren Wei <n05ec@lzu.edu.cn>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/07351f0ec47bcee289576f39f9354f4a64add6e4.1774855883.git.zcliangcn@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-31 15:44:29 -07:00
Linus Torvalds
9147566d80 Merge tag 'sched_ext-for-7.0-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext
Pull sched_ext fixes from Tejun Heo:

 - Fix SCX_KICK_WAIT deadlock where multiple CPUs waiting for each other
   in hardirq context form a cycle. Move the wait to a balance callback
   which can drop the rq lock and process IPIs.

 - Fix inconsistent NUMA node lookup in scx_select_cpu_dfl() where
   the waker_node used cpu_to_node() while prev_cpu used
   scx_cpu_node_if_enabled(), leading to undefined behavior when
   per-node idle tracking is disabled.

* tag 'sched_ext-for-7.0-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext:
  selftests/sched_ext: Add cyclic SCX_KICK_WAIT stress test
  sched_ext: Fix SCX_KICK_WAIT deadlock by deferring wait to balance callback
  sched_ext: Fix inconsistent NUMA node lookup in scx_select_cpu_dfl()
2026-03-31 14:23:12 -07:00
Linus Torvalds
0958d657b4 Merge tag 'wq-for-7.0-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq
Pull workqueue fix from Tejun Heo:

 - Fix false positive stall reports on weakly ordered architectures
   where the lockless worklist/timestamp check in the watchdog can
   observe stale values due to memory reordering.

   Recheck under pool->lock to confirm.

* tag 'wq-for-7.0-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq:
  workqueue: Better describe stall check
  workqueue: Fix false positive stall reports
2026-03-31 14:20:39 -07:00
Linus Torvalds
53d85a2056 Merge tag 'cgroup-for-7.0-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup fixes from Tejun Heo:

 - Fix cgroup rmdir racing with dying tasks.

   Deferred task cgroup unlink introduced a window where cgroup.procs
   is empty but the cgroup is still populated, causing rmdir to fail
   with -EBUSY and selftest failures.

   Make rmdir wait for dying tasks to fully leave and fix selftests to
   not depend on synchronous populated updates.

 - Fix cpuset v1 task migration failure from empty cpusets under strict
   security policies.

   When CPU hotplug removes the last CPU from a v1 cpuset, tasks must be
   migrated to an ancestor without a security_task_setscheduler() check
   that would block the migration.

* tag 'cgroup-for-7.0-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  cgroup/cpuset: Skip security check for hotplug induced v1 task migration
  cgroup/cpuset: Simplify setsched decision check in task iteration loop of cpuset_can_attach()
  cgroup: Fix cgroup_drain_dying() testing the wrong condition
  selftests/cgroup: Don't require synchronous populated update on task exit
  cgroup: Wait for dying tasks to leave on rmdir
2026-03-31 13:59:51 -07:00
Waiman Long
089f3fcd69 cgroup/cpuset: Skip security check for hotplug induced v1 task migration
When a CPU hot removal causes a v1 cpuset to lose all its CPUs, the
cpuset hotplug handler will schedule a work function to migrate tasks
in that cpuset with no CPU to its ancestor to enable those tasks to
continue running.

If a strict security policy is in place, however, the task migration
may fail when security_task_setscheduler() call in cpuset_can_attach()
returns a -EACCES error. That will mean that those tasks will have
no CPU to run on. The system administrators will have to explicitly
intervene to either add CPUs to that cpuset or move the tasks elsewhere
if they are aware of it.

This problem was found by a reported test failure in the LTP's
cpuset_hotplug_test.sh. Fix this problem by treating this special case as
an exception to skip the setsched security check in cpuset_can_attach()
when a v1 cpuset with tasks have no CPU left.

With that patch applied, the cpuset_hotplug_test.sh test can be run
successfully without failure.

Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-03-31 09:14:13 -10:00
Waiman Long
bbe5ab8191 cgroup/cpuset: Simplify setsched decision check in task iteration loop of cpuset_can_attach()
Centralize the check required to run security_task_setscheduler() in
the task iteration loop of cpuset_can_attach() outside of the loop as
it has no dependency on the characteristics of the tasks themselves.

There is no functional change.

Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-03-31 09:14:13 -10:00
Linus Torvalds
dbf00d8d23 Merge tag 'fs_for_v7.0-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs
Pull udf fix from Jan Kara:
 "Fix for a race in UDF that can lead to memory corruption"

* tag 'fs_for_v7.0-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs:
  udf: Fix race between file type conversion and writeback
  mpage: Provide variant of mpage_writepages() with own optional folio handler
2026-03-31 10:28:08 -07:00
Zhang Heng
dd9b99b822 ALSA: hda/realtek: add quirk for Acer Swift SFG14-73
fix mute/micmute LEDs and headset microphone for Acer Swift SFG14-73.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=220279
Cc: stable@vger.kernel.org
Signed-off-by: Zhang Heng <zhangheng@kylinos.cn>
Link: https://patch.msgid.link/20260331094614.186063-1-zhangheng@kylinos.cn
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-03-31 16:21:15 +02:00
Alexander Savenko
217d5bc9f9 ALSA: hda/realtek: Add quirk for Lenovo Yoga Pro 7 14IMH9
The Lenovo Yoga Pro 7 14IMH9 (DMI: 83E2) shares PCI SSID 17aa:3847
with the Legion 7 16ACHG6, but has a different codec subsystem ID
(17aa:38cf). The existing SND_PCI_QUIRK for 17aa:3847 applies
ALC287_FIXUP_LEGION_16ACHG6, which attempts to initialize an external
I2C amplifier (CLSA0100) that is not present on the Yoga Pro 7 14IMH9.

As a result, pin 0x17 (bass speakers) is connected to DAC 0x06 which
has no volume control, making hardware volume adjustment completely
non-functional. Audio is either silent or at maximum volume regardless
of the slider position.

Add a HDA_CODEC_QUIRK entry using the codec subsystem ID (17aa:38cf)
to correctly identify the Yoga Pro 7 14IMH9 and apply
ALC287_FIXUP_YOGA9_14IMH9_BASS_SPK_PIN, which redirects pin 0x17 to
DAC 0x02 and restores proper volume control. The existing Legion entry
is preserved unchanged.

This follows the same pattern used for 17aa:386e, where Legion Y9000X
and Yoga Pro 7 14ARP8 share a PCI SSID but are distinguished via
HDA_CODEC_QUIRK.

Link: https://github.com/nomad4tech/lenovo-yoga-pro-7-linux
Tested-by: Alexander Savenko <alex.sav4387@gmail.com>
Signed-off-by: Alexander Savenko <alex.sav4387@gmail.com>
Link: https://patch.msgid.link/20260331082929.44890-1-alex.sav4387@gmail.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-03-31 16:21:00 +02:00
Xiang Mei
fa6e249633 bridge: mrp: reject zero test interval to avoid OOM panic
br_mrp_start_test() and br_mrp_start_in_test() accept the user-supplied
interval value from netlink without validation. When interval is 0,
usecs_to_jiffies(0) yields 0, causing the delayed work
(br_mrp_test_work_expired / br_mrp_in_test_work_expired) to reschedule
itself with zero delay. This creates a tight loop on system_percpu_wq
that allocates and transmits MRP test frames at maximum rate, exhausting
all system memory and causing a kernel panic via OOM deadlock.

The same zero-interval issue applies to br_mrp_start_in_test_parse()
for interconnect test frames.

Use NLA_POLICY_MIN(NLA_U32, 1) in the nla_policy tables for both
IFLA_BRIDGE_MRP_START_TEST_INTERVAL and
IFLA_BRIDGE_MRP_START_IN_TEST_INTERVAL, so zero is rejected at the
netlink attribute parsing layer before the value ever reaches the
workqueue scheduling code. This is consistent with how other bridge
subsystems (br_fdb, br_mst) enforce range constraints on netlink
attributes.

Fixes: 20f6a05ef6 ("bridge: mrp: Rework the MRP netlink interface")
Fixes: 7ab1748e4c ("bridge: mrp: Extend MRP netlink interface for configuring MRP interconnect")
Reported-by: Weiming Shi <bestswngs@gmail.com>
Signed-off-by: Xiang Mei <xmei5@asu.edu>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Link: https://patch.msgid.link/20260328063000.1845376-1-xmei5@asu.edu
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-31 16:11:24 +02:00
Julian Braha
e920c36f20 ASoC: Intel: boards: fix unmet dependency on PINCTRL
This reverts commit c073f07576 ("ASoC: Intel: sof_sdw: select PINCTRL_CS42L43 and SPI_CS42L43")

Currently, SND_SOC_INTEL_SOUNDWIRE_SOF_MACH selects PINCTRL_CS42L43
without also selecting or depending on PINCTRL, despite PINCTRL_CS42L43
depending on PINCTRL.

See the following Kbuild warning:

WARNING: unmet direct dependencies detected for PINCTRL_CS42L43
  Depends on [n]: PINCTRL [=n] && MFD_CS42L43 [=m]
  Selected by [m]:
  - SND_SOC_INTEL_SOUNDWIRE_SOF_MACH [=m] && SOUND [=y] && SND [=m] && SND_SOC [=m] && SND_SOC_INTEL_MACH [=y] && (SND_SOC_SOF_INTEL_COMMON [=m] || !SND_SOC_SOF_INTEL_COMMON [=m]) && SND_SOC_SOF_INTEL_SOUNDWIRE [=m] && I2C [=y] && SPI_MASTER [=y] && ACPI [=y] && (MFD_INTEL_LPSS [=n] || COMPILE_TEST [=y]) && (SND_SOC_INTEL_USER_FRIENDLY_LONG_NAMES [=n] || COMPILE_TEST [=y]) && SOUNDWIRE [=m]

In response to v1 of this patch [1], Arnd pointed out that there is
no compile-time dependency sof_sdw and the PINCTRL_CS42L43 driver.
After testing, I can confirm that the kernel compiled with
SND_SOC_INTEL_SOUNDWIRE_SOF_MACH enabled and PINCTRL_CS42L43 disabled.

This unmet dependency was detected by kconfirm, a static analysis
tool for Kconfig.

Link: https://lore.kernel.org/all/b8aecc71-1fed-4f52-9f6c-263fbe56d493@app.fastmail.com/ [1]
Fixes: c073f07576 ("ASoC: Intel: sof_sdw: select PINCTRL_CS42L43 and SPI_CS42L43")
Signed-off-by: Julian Braha <julianbraha@gmail.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Link: https://patch.msgid.link/20260325001522.1727678-1-julianbraha@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-31 14:05:17 +01:00
Sachin Mokashi
51e3eb3d07 ASoC: Intel: ehl_rt5660: Use the correct rtd->dev device in hw_params
In rt5660_hw_params(), the error path for snd_soc_dai_set_sysclk()
correctly uses rtd->dev as the logging device, but the error path for
snd_soc_dai_set_pll() uses codec_dai->dev instead.

These two devices are distinct:
- rtd->dev is the platform device of the PCM runtime (the Intel HDA/SSP
  controller, e.g. 0000:00:1f.3), which owns the machine driver callback.
- codec_dai->dev is the I2C device of the rt5660 codec itself
  (i2c-10EC5660:00).

Since hw_params is a machine driver operation and both calls are made
within the same function from the machine driver's context, all error
messages should be attributed to rtd->dev. Using codec_dai->dev for one
of them would suggest the error originates inside the codec driver,
which is misleading.

Align the PLL error log with the sysclk one to use rtd->dev, matching
the convention used by all other Intel board drivers in this directory.

Signed-off-by: Sachin Mokashi <sachin.mokashi@intel.com>
Link: https://patch.msgid.link/20260327131439.1330373-1-sachin.mokashi@intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-31 12:37:13 +01:00
Paolo Abeni
47ab2c12c8 Merge branch 'correct-bd-length-masks-and-bql-accounting-for-multi-bd-tx-packets'
Suraj Gupta says:

====================
Correct BD length masks and BQL accounting for multi-BD TX packets

This patch series fixes two issues in the Xilinx AXI Ethernet driver:
 1. Corrects the BD length masks to match the AXIDMA IP spec.
 2. Fixes BQL accounting for multi-BD TX packets.
====================

Link: https://patch.msgid.link/20260327073238.134948-1-suraj.gupta2@amd.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-31 12:09:26 +02:00
Suraj Gupta
d1978d03e8 net: xilinx: axienet: Fix BQL accounting for multi-BD TX packets
When a TX packet spans multiple buffer descriptors (scatter-gather),
axienet_free_tx_chain sums the per-BD actual length from descriptor
status into a caller-provided accumulator. That sum is reset on each
NAPI poll. If the BDs for a single packet complete across different
polls, the earlier bytes are lost and never credited to BQL. This
causes BQL to think bytes are permanently in-flight, eventually
stalling the TX queue.

The SKB pointer is stored only on the last BD of a packet. When that
BD completes, use skb->len for the byte count instead of summing
per-BD status lengths. This matches netdev_sent_queue(), which debits
skb->len, and naturally survives across polls because no partial
packet contributes to the accumulator.

Fixes: c900e49d58 ("net: xilinx: axienet: Implement BQL")
Signed-off-by: Suraj Gupta <suraj.gupta2@amd.com>
Reviewed-by: Sean Anderson <sean.anderson@linux.dev>
Link: https://patch.msgid.link/20260327073238.134948-3-suraj.gupta2@amd.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-31 12:09:12 +02:00
Suraj Gupta
393e0b4f17 net: xilinx: axienet: Correct BD length masks to match AXIDMA IP spec
The XAXIDMA_BD_CTRL_LENGTH_MASK and XAXIDMA_BD_STS_ACTUAL_LEN_MASK
macros were defined as 0x007FFFFF (23 bits), but the AXI DMA IP
product guide (PG021) specifies the buffer length field as bits 25:0
(26 bits). Update both masks to match the IP documentation.

In practice this had no functional impact, since Ethernet frames are
far smaller than 2^23 bytes and the extra bits were always zero, but
the masks should still reflect the hardware specification.

Fixes: 8a3b7a252d ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet driver")
Signed-off-by: Suraj Gupta <suraj.gupta2@amd.com>
Reviewed-by: Sean Anderson <sean.anderson@linux.dev>
Link: https://patch.msgid.link/20260327073238.134948-2-suraj.gupta2@amd.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-31 12:09:12 +02:00
Pengpeng Hou
30fe3f5f64 NFC: pn533: bound the UART receive buffer
pn532_receive_buf() appends every incoming byte to dev->recv_skb and
only resets the buffer after pn532_uart_rx_is_frame() recognizes a
complete frame. A continuous stream of bytes without a valid PN532 frame
header therefore keeps growing the skb until skb_put_u8() hits the tail
limit.

Drop the accumulated partial frame once the fixed receive buffer is full
so malformed UART traffic cannot grow the skb past
PN532_UART_SKB_BUFF_LEN.

Fixes: c656aa4c27 ("nfc: pn533: add UART phy driver")
Signed-off-by: Pengpeng Hou <pengpeng@iscas.ac.cn>
Link: https://patch.msgid.link/20260326142033.82297-1-pengpeng@iscas.ac.cn
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-31 11:04:30 +02:00
Xiang Mei
2884bf72fb net: bonding: fix use-after-free in bond_xmit_broadcast()
bond_xmit_broadcast() reuses the original skb for the last slave
(determined by bond_is_last_slave()) and clones it for others.
Concurrent slave enslave/release can mutate the slave list during
RCU-protected iteration, changing which slave is "last" mid-loop.
This causes the original skb to be double-consumed (double-freed).

Replace the racy bond_is_last_slave() check with a simple index
comparison (i + 1 == slaves_count) against the pre-snapshot slave
count taken via READ_ONCE() before the loop.  This preserves the
zero-copy optimization for the last slave while making the "last"
determination stable against concurrent list mutations.

The UAF can trigger the following crash:

==================================================================
BUG: KASAN: slab-use-after-free in skb_clone
Read of size 8 at addr ffff888100ef8d40 by task exploit/147

CPU: 1 UID: 0 PID: 147 Comm: exploit Not tainted 7.0.0-rc3+ #4 PREEMPTLAZY
Call Trace:
 <TASK>
 dump_stack_lvl (lib/dump_stack.c:123)
 print_report (mm/kasan/report.c:379 mm/kasan/report.c:482)
 kasan_report (mm/kasan/report.c:597)
 skb_clone (include/linux/skbuff.h:1724 include/linux/skbuff.h:1792 include/linux/skbuff.h:3396 net/core/skbuff.c:2108)
 bond_xmit_broadcast (drivers/net/bonding/bond_main.c:5334)
 bond_start_xmit (drivers/net/bonding/bond_main.c:5567 drivers/net/bonding/bond_main.c:5593)
 dev_hard_start_xmit (include/linux/netdevice.h:5325 include/linux/netdevice.h:5334 net/core/dev.c:3871 net/core/dev.c:3887)
 __dev_queue_xmit (include/linux/netdevice.h:3601 net/core/dev.c:4838)
 ip6_finish_output2 (include/net/neighbour.h:540 include/net/neighbour.h:554 net/ipv6/ip6_output.c:136)
 ip6_finish_output (net/ipv6/ip6_output.c:208 net/ipv6/ip6_output.c:219)
 ip6_output (net/ipv6/ip6_output.c:250)
 ip6_send_skb (net/ipv6/ip6_output.c:1985)
 udp_v6_send_skb (net/ipv6/udp.c:1442)
 udpv6_sendmsg (net/ipv6/udp.c:1733)
 __sys_sendto (net/socket.c:730 net/socket.c:742 net/socket.c:2206)
 __x64_sys_sendto (net/socket.c:2209)
 do_syscall_64 (arch/x86/entry/syscall_64.c:63 arch/x86/entry/syscall_64.c:94)
 entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)
 </TASK>

Allocated by task 147:

Freed by task 147:

The buggy address belongs to the object at ffff888100ef8c80
 which belongs to the cache skbuff_head_cache of size 224
The buggy address is located 192 bytes inside of
 freed 224-byte region [ffff888100ef8c80, ffff888100ef8d60)

Memory state around the buggy address:
 ffff888100ef8c00: fb fb fb fb fc fc fc fc fc fc fc fc fc fc fc fc
 ffff888100ef8c80: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
>ffff888100ef8d00: fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc
                                                    ^
 ffff888100ef8d80: fc fc fc fc fc fc fc fc fa fb fb fb fb fb fb fb
 ffff888100ef8e00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
==================================================================

Fixes: 4e5bd03ae3 ("net: bonding: fix bond_xmit_broadcast return value error bug")
Reported-by: Weiming Shi <bestswngs@gmail.com>
Signed-off-by: Xiang Mei <xmei5@asu.edu>
Link: https://patch.msgid.link/20260326075553.3960562-1-xmei5@asu.edu
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-31 10:49:25 +02:00
Takashi Iwai
75dc1980cf ALSA: ctxfi: Don't enumerate SPDIF1 at DAIO initialization
The recent refactoring of xfi driver changed the assignment of
atc->daios[] at atc_get_resources(); now it loops over all enum
DAIOTYP entries while it looped formerly only a part of them.
The problem is that the last entry, SPDIF1, is a special type that
is used only for hw20k1 CTSB073X model (as a replacement of SPDIFIO),
and there is no corresponding definition for hw20k2.  Due to the lack
of the info, it caused a kernel crash on hw20k2, which was already
worked around by the commit b045ab3dff ("ALSA: ctxfi: Fix missing
SPDIFI1 index handling").

This patch addresses the root cause of the regression above properly,
simply by skipping the incorrect SPDIF1 type in the parser loop.

For making the change clearer, the code is slightly arranged, too.

Fixes: a2dbaeb5c6 ("ALSA: ctxfi: Refactor resource alloc for sparse mappings")
Cc: <stable@vger.kernel.org>
Link: https://bugzilla.suse.com/show_bug.cgi?id=1259925
Link: https://patch.msgid.link/20260331081227.216134-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-03-31 10:13:10 +02:00
songxiebing
e6c8882022 ALSA: hda/realtek: Add quirk for Lenovo Yoga Slim 7 14AKP10
The Pin Complex 0x17 (bass/woofer speakers) is incorrectly reported as
unconnected in the BIOS (pin default 0x411111f0 = N/A). This causes the
kernel to configure speaker_outs=0, meaning only the tweeters (pin 0x14)
are used. The result is very low, tinny audio with no bass.

The existing quirk ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN (already present
in patch_realtek.c for SSID 0x17aa3801) fixes the issue completely.

Reported-by: Garcicasti <andresgarciacastilla@gmail.com>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=221298
Signed-off-by: songxiebing <songxiebing@kylinos.cn>
Link: https://patch.msgid.link/20260331033650.285601-1-songxiebing@kylinos.cn
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-03-31 08:49:58 +02:00
Zhang Heng
7204607223 ALSA: hda/realtek: add quirk for HP Laptop 15-fc0xxx
For the HP Laptop 15-fc0xxx with ALC236, the built-in mic 0x12 was
not set up, making it unusable; after adding it, it now works properly.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=221233
Signed-off-by: Zhang Heng <zhangheng@kylinos.cn>
Link: https://patch.msgid.link/20260331013536.13778-1-zhangheng@kylinos.cn
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-03-31 08:49:49 +02:00
Pengpeng Hou
4ee937107d bnxt_en: set backing store type from query type
bnxt_hwrm_func_backing_store_qcaps_v2() stores resp->type from the
firmware response in ctxm->type and later uses that value to index
fixed backing-store metadata arrays such as ctx_arr[] and
bnxt_bstore_to_trace[].

ctxm->type is fixed by the current backing-store query type and matches
the array index of ctx->ctx_arr. Set ctxm->type from the current loop
variable instead of depending on resp->type.

Also update the loop to advance type from next_valid_type in the for
statement, which keeps the control flow simpler for non-valid and
unchanged entries.

Fixes: 6a4d0774f0 ("bnxt_en: Add support for new backing store query firmware API")
Signed-off-by: Pengpeng Hou <pengpeng@iscas.ac.cn>
Reviewed-by: Michael Chan <michael.chan@broadcom.com>
Tested-by: Michael Chan <michael.chan@broadcom.com>
Link: https://patch.msgid.link/20260328234357.43669-1-pengpeng@iscas.ac.cn
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-30 17:59:13 -07:00
Lorenzo Bianconi
cedc1bf327 net: airoha: Delay offloading until all net_devices are fully registered
Netfilter flowtable can theoretically try to offload flower rules as soon
as a net_device is registered while all the other ones are not
registered or initialized, triggering a possible NULL pointer dereferencing
of qdma pointer in airoha_ppe_set_cpu_port routine. Moreover, if
register_netdev() fails for a particular net_device, there is a small
race if Netfilter tries to offload flowtable rules before all the
net_devices are properly unregistered in airoha_probe() error patch,
triggering a NULL pointer dereferencing in airoha_ppe_set_cpu_port
routine. In order to avoid any possible race, delay offloading until
all net_devices are registered in the networking subsystem.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://patch.msgid.link/20260329-airoha-regiser-race-fix-v2-1-f4ebb139277b@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-30 17:58:40 -07:00
Yochai Eisenrich
e6e3eb5ee8 net: sched: cls_api: fix tc_chain_fill_node to initialize tcm_info to zero to prevent an info-leak
When building netlink messages, tc_chain_fill_node() never initializes
the tcm_info field of struct tcmsg. Since the allocation is not zeroed,
kernel heap memory is leaked to userspace through this 4-byte field.

The fix simply zeroes tcm_info alongside the other fields that are
already initialized.

Fixes: 32a4f5ecd7 ("net: sched: introduce chain object to uapi")
Signed-off-by: Yochai Eisenrich <echelonh@gmail.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://patch.msgid.link/20260328211436.1010152-1-echelonh@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-30 17:56:40 -07:00
Guoyu Su
ddc748a391 net: use skb_header_pointer() for TCPv4 GSO frag_off check
Syzbot reported a KMSAN uninit-value warning in gso_features_check()
called from netif_skb_features() [1].

gso_features_check() reads iph->frag_off to decide whether to clear
mangleid_features. Accessing the IPv4 header via ip_hdr()/inner_ip_hdr()
can rely on skb header offsets that are not always safe for direct
dereference on packets injected from PF_PACKET paths.

Use skb_header_pointer() for the TCPv4 frag_off check so the header read
is robust whether data is already linear or needs copying.

[1] https://syzkaller.appspot.com/bug?extid=1543a7d954d9c6d00407

Link: https://lore.kernel.org/netdev/willemdebruijn.kernel.1a9f35039caab@gmail.com/
Fixes: cbc53e08a7 ("GSO: Add GSO type for fixed IPv4 ID")
Reported-by: syzbot+1543a7d954d9c6d00407@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=1543a7d954d9c6d00407
Tested-by: syzbot+1543a7d954d9c6d00407@syzkaller.appspotmail.com
Signed-off-by: Guoyu Su <yss2813483011xxl@gmail.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Link: https://patch.msgid.link/20260327153507.39742-1-yss2813483011xxl@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-30 17:35:21 -07:00
Lorenzo Bianconi
514aac3599 net: airoha: Add missing cleanup bits in airoha_qdma_cleanup_rx_queue()
In order to properly cleanup hw rx QDMA queues and bring the device to
the initial state, reset rx DMA queue head/tail index. Moreover, reset
queued DMA descriptor fields.

Fixes: 23020f0493 ("net: airoha: Introduce ethernet support for EN7581 SoC")
Tested-by: Madhur Agrawal <Madhur.Agrawal@airoha.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://patch.msgid.link/20260327-airoha_qdma_cleanup_rx_queue-fix-v1-1-369d6ab1511a@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-30 17:29:16 -07:00
Paolo Abeni
fd63f18597 ipv6: prevent possible UaF in addrconf_permanent_addr()
The mentioned helper try to warn the user about an exceptional
condition, but the message is delivered too late, accessing the ipv6
after its possible deletion.

Reorder the statement to avoid the possible UaF; while at it, place the
warning outside the idev->lock as it needs no protection.

Reported-by: Jakub Kicinski <kuba@kernel.org>
Closes: https://sashiko.dev/#/patchset/8c8bfe2e1a324e501f0e15fef404a77443fd8caf.1774365668.git.pabeni%40redhat.com
Fixes: f1705ec197 ("net: ipv6: Make address flushing on ifdown optional")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Link: https://patch.msgid.link/ef973c3a8cb4f8f1787ed469f3e5391b9fe95aa0.1774601542.git.pabeni@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-30 17:25:10 -07:00
Linus Torvalds
d0c3bcd5b8 Merge tag 'libcrypto-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux
Pull crypto library fix from Eric Biggers:
 "Fix missing zeroization of the ChaCha state"

* tag 'libcrypto-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux:
  lib/crypto: chacha: Zeroize permuted_state before it leaves scope
2026-03-30 13:40:48 -07:00
Linus Torvalds
f1b24d8bdd Merge tag 'trace-rtla-v7.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace
Pull rtla build fix from Steven Rostedt:

 - Fix build failure when libbpf does not exist

   RTLA supports building without BPF libraries, but a recent change
   added a libbpf.h include outside of the BPF protection which caused
   build failures when libbpf was not installed.

* tag 'trace-rtla-v7.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace:
  rtla: Fix build without libbpf header
2026-03-30 13:12:00 -07:00
Jihed Chaibi
622363757b ASoC: ep93xx: Fix unchecked clk_prepare_enable() and add rollback on failure
ep93xx_i2s_enable() calls clk_prepare_enable() on three clocks in
sequence (mclk, sclk, lrclk) without checking the return value of any
of them. If an intermediate enable fails, the clocks that were already
enabled are never rolled back, leaking them until the next disable cycle
— which may never come if the stream never started cleanly.

Change ep93xx_i2s_enable() from void to int. Add error checking after
each clk_prepare_enable() call and unwind already-enabled clocks on
failure. Propagate the error through ep93xx_i2s_startup() and
ep93xx_i2s_resume(), both of which already return int.

Signed-off-by: Jihed Chaibi <jihed.chaibi.dev@gmail.com>
Fixes: f4ff6b56bc ("ASoC: cirrus: i2s: Prepare clock before using it")
Link: https://patch.msgid.link/20260324210909.45494-1-jihed.chaibi.dev@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-30 19:38:22 +01:00
Tejun Heo
090d34f0f0 selftests/sched_ext: Add cyclic SCX_KICK_WAIT stress test
Add a test that creates a 3-CPU kick_wait cycle (A->B->C->A). A BPF
scheduler kicks the next CPU in the ring with SCX_KICK_WAIT on every
enqueue while userspace workers generate continuous scheduling churn via
sched_yield(). Without the preceding fix, this hangs the machine within seconds.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Christian Loehle <christian.loehle@arm.com>
Tested-by: Christian Loehle <christian.loehle@arm.com>
2026-03-30 08:37:55 -10:00
Tejun Heo
415cb193bb sched_ext: Fix SCX_KICK_WAIT deadlock by deferring wait to balance callback
SCX_KICK_WAIT busy-waits in kick_cpus_irq_workfn() using
smp_cond_load_acquire() until the target CPU's kick_sync advances. Because
the irq_work runs in hardirq context, the waiting CPU cannot reschedule and
its own kick_sync never advances. If multiple CPUs form a wait cycle, all
CPUs deadlock.

Replace the busy-wait in kick_cpus_irq_workfn() with resched_curr() to
force the CPU through do_pick_task_scx(), which queues a balance callback
to perform the wait. The balance callback drops the rq lock and enables
IRQs following the sched_core_balance() pattern, so the CPU can process
IPIs while waiting. The local CPU's kick_sync is advanced on entry to
do_pick_task_scx() and continuously during the wait, ensuring any CPU that
starts waiting for us sees the advancement and cannot form cyclic
dependencies.

Fixes: 90e55164da ("sched_ext: Implement SCX_KICK_WAIT")
Cc: stable@vger.kernel.org # v6.12+
Reported-by: Christian Loehle <christian.loehle@arm.com>
Link: https://lore.kernel.org/r/20260316100249.1651641-1-christian.loehle@arm.com
Signed-off-by: Tejun Heo <tj@kernel.org>
Tested-by: Christian Loehle <christian.loehle@arm.com>
2026-03-30 08:37:27 -10:00
Kuninori Morimoto
b9eff9732c ASoC: soc-core: call missing INIT_LIST_HEAD() for card_aux_list
Component has "card_aux_list" which is added/deled in bind/unbind aux dev
function (A), and used in for_each_card_auxs() loop (B).

	static void soc_unbind_aux_dev(...)
	{
		...
		for_each_card_auxs_safe(...) {
			...
(A)			list_del(&component->card_aux_list);
		}			     ^^^^^^^^^^^^^
	}

	static int soc_bind_aux_dev(...)
	{
		...
		for_each_card_pre_auxs(...) {
			...
(A)			list_add(&component->card_aux_list, ...);
		}			     ^^^^^^^^^^^^^
		...
	}

	#define for_each_card_auxs(card, component)	\
(B)		list_for_each_entry(component, ..., card_aux_list)
						    ^^^^^^^^^^^^^

But it has been used without calling INIT_LIST_HEAD().

	> git grep card_aux_list sound/soc
	sound/soc/soc-core.c:           list_del(&component->card_aux_list);
	sound/soc/soc-core.c:           list_add(&component->card_aux_list, ...);

call missing INIT_LIST_HEAD() for it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://patch.msgid.link/87341mxa8l.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-30 18:24:56 +01:00
Tomas Glozar
2e8b1a1d12 rtla: Fix build without libbpf header
rtla supports building without libbpf. However, BPF actions
patchset [1] adds an include of bpf/libbpf.h into timerlat_bpf.h,
which breaks build on systems that don't have libbpf headers
installed.

This is a leftover from a draft version of the patchset where
timerlat_bpf_set_action() (which takes a struct bpf_program * argument)
was defined in the header. timerlat_bpf.c already includes bpf/libbpf.h
via timerlat.skel.h when libbpf is present.

Remove the redundant include to fix build on systems without libbpf
headers.

[1] https://lore.kernel.org/linux-trace-kernel/20251126144205.331954-1-tglozar@redhat.com/T/

Cc: John Kacur <jkacur@redhat.com>
Cc: Luis Goncalves <lgoncalv@redhat.com>
Cc: Crystal Wood <crwood@redhat.com>
Cc: Costa Shulyupin <costa.shul@redhat.com>
Link: https://patch.msgid.link/20260330091207.16184-1-tglozar@redhat.com
Reported-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Closes: https://lore.kernel.org/linux-trace-kernel/20260329122202.65a8b575@robin/
Fixes: 8cd0f08ac7 ("rtla/timerlat: Support tail call from BPF program")
Signed-off-by: Tomas Glozar <tglozar@redhat.com>
Reviewed-by: Wander Lairson Costa <wander@redhat.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2026-03-30 12:44:48 -04:00
Takashi Iwai
ea31be8a2c ALSA: hda/realtek: Add quirk for Samsung Book2 Pro 360 (NP950QED)
There is another Book2 Pro model (NP950QED) that seems equipped with
the same speaker module as the non-360 model, which requires
ALC298_FIXUP_SAMSUNG_AMP_V2_2_AMPS quirk.

Reported-by: Throw <zakkabj@gmail.com>
Link: https://patch.msgid.link/20260330162249.147665-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-03-30 18:24:16 +02:00
Gilson Marquato Júnior
8ec017cf31 ASoC: amd: yc: Add DMI entry for HP Laptop 15-fc0xxx
The HP Laptop 15-fc0xxx (subsystem ID 0x103c8dc9) has an internal
DMIC connected to the AMD ACP6x audio coprocessor. Add a DMI quirk
entry so the internal microphone is properly detected on this model.

Tested on HP Laptop 15-fc0237ns with Fedora 43 (kernel 6.19.9).

Signed-off-by: Gilson Marquato Júnior <gilsonmandalogo@hotmail.com>
Link: https://patch.msgid.link/20260330-hp-15-fc0xxx-dmic-v2-v1-1-6dd6f53a1917@hotmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-30 15:16:28 +01:00
Zhang Heng
27c2996984 ASoC: amd: yc: Add DMI quirk for ASUS Vivobook Pro 16X OLED M7601RM
Add a DMI quirk for the ASUS Vivobook Pro 16X OLED M7601RM fixing the
issue where the internal microphone was not detected.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=221288
Signed-off-by: Zhang Heng <zhangheng@kylinos.cn>
Link: https://patch.msgid.link/20260330095133.81786-1-zhangheng@kylinos.cn
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-30 14:46:09 +01:00
Zhang Heng
f1af71d568 ALSA: hda/realtek: Add quirk for ASUS ROG Strix SCAR 15
ASUS ROG Strix SCAR 15, like the Strix G15, requires the
ALC285_FIXUP_ASUS_G533Z_PINS quirk to work properly.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=221247
Cc: <stable@vger.kernel.org>
Signed-off-by: Zhang Heng <zhangheng@kylinos.cn>
Link: https://patch.msgid.link/20260330075334.50962-2-zhangheng@kylinos.cn
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-03-30 13:45:14 +02:00
Dag Smedberg
f025ac8c69 ALSA: usb-audio: Exclude Scarlett Solo 1st Gen from SKIP_IFACE_SETUP
Same issue that the Scarlett 2i2 1st Gen had:
QUIRK_FLAG_SKIP_IFACE_SETUP causes distorted audio on the
Scarlett Solo 1st Gen (1235:801c).

Fixes: 38c322068a ("ALSA: usb-audio: Add QUIRK_FLAG_SKIP_IFACE_SETUP")
Reported-by: Dag Smedberg <dag@dsmedberg.se>
Tested-by: Dag Smedberg <dag@dsmedberg.se>
Signed-off-by: Dag Smedberg <dag@dsmedberg.se>
Link: https://patch.msgid.link/20260329170420.4122-1-dag@dsmedberg.se
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-03-30 09:33:29 +02:00
Berk Cem Goksel
45424e871a ALSA: caiaq: fix stack out-of-bounds read in init_card
The loop creates a whitespace-stripped copy of the card shortname
where `len < sizeof(card->id)` is used for the bounds check. Since
sizeof(card->id) is 16 and the local id buffer is also 16 bytes,
writing 16 non-space characters fills the entire buffer,
overwriting the terminating nullbyte.

When this non-null-terminated string is later passed to
snd_card_set_id() -> copy_valid_id_string(), the function scans
forward with `while (*nid && ...)` and reads past the end of the
stack buffer, reading the contents of the stack.

A USB device with a product name containing many non-ASCII, non-space
characters (e.g. multibyte UTF-8) will reliably trigger this as follows:

  BUG: KASAN: stack-out-of-bounds in copy_valid_id_string
       sound/core/init.c:696 [inline]
  BUG: KASAN: stack-out-of-bounds in snd_card_set_id_no_lock+0x698/0x74c
       sound/core/init.c:718

The off-by-one has been present since commit bafeee5b1f ("ALSA:
snd_usb_caiaq: give better shortname") from June 2009 (v2.6.31-rc1),
which first introduced this whitespace-stripping loop. The original
code never accounted for the null terminator when bounding the copy.

Fix this by changing the loop bound to `sizeof(card->id) - 1`,
ensuring at least one byte remains as the null terminator.

Fixes: bafeee5b1f ("ALSA: snd_usb_caiaq: give better shortname")
Cc: stable@vger.kernel.org
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Reported-by: Berk Cem Goksel <berkcgoksel@gmail.com>
Signed-off-by: Berk Cem Goksel <berkcgoksel@gmail.com>
Link: https://patch.msgid.link/20260329133825.581585-1-berkcgoksel@gmail.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-03-30 09:31:35 +02:00
Keenan Dong
7081d46d32 xfrm: account XFRMA_IF_ID in aevent size calculation
xfrm_get_ae() allocates the reply skb with xfrm_aevent_msgsize(), then
build_aevent() appends attributes including XFRMA_IF_ID when x->if_id is
set.

xfrm_aevent_msgsize() does not include space for XFRMA_IF_ID. For states
with if_id, build_aevent() can fail with -EMSGSIZE and hit BUG_ON(err < 0)
in xfrm_get_ae(), turning a malformed netlink interaction into a kernel
panic.

Account XFRMA_IF_ID in the size calculation unconditionally and replace
the BUG_ON with normal error unwinding.

Fixes: 7e6526404a ("xfrm: Add a new lookup key to match xfrm interfaces.")
Reported-by: Keenan Dong <keenanat2000@gmail.com>
Signed-off-by: Keenan Dong <keenanat2000@gmail.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2026-03-30 07:53:55 +02:00
Yasuaki Torimaru
71a98248c6 xfrm: clear trailing padding in build_polexpire()
build_expire() clears the trailing padding bytes of struct
xfrm_user_expire after setting the hard field via memset_after(),
but the analogous function build_polexpire() does not do this for
struct xfrm_user_polexpire.

The padding bytes after the __u8 hard field are left
uninitialized from the heap allocation, and are then sent to
userspace via netlink multicast to XFRMNLGRP_EXPIRE listeners,
leaking kernel heap memory contents.

Add the missing memset_after() call, matching build_expire().

Fixes: 1da177e4c3 ("Linux-2.6.12-rc2")
Cc: stable@vger.kernel.org
Signed-off-by: Yasuaki Torimaru <yasuakitorimaru@gmail.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Reviewed-by: Breno Leitao <leitao@debian.org>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2026-03-30 07:47:32 +02:00
Linus Torvalds
7aaa8047ea Linux 7.0-rc6 2026-03-29 15:40:00 -07:00
Linus Torvalds
d1384f70b2 Merge tag 'vfs-7.0-rc6.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull vfs fixes from Christian Brauner:

 - Fix netfs_limit_iter() hitting BUG() when an ITER_KVEC iterator
   reaches it via core dump writes to 9P filesystems. Add ITER_KVEC
   handling following the same pattern as the existing ITER_BVEC code.

 - Fix a NULL pointer dereference in the netfs unbuffered write retry
   path when the filesystem (e.g., 9P) doesn't set the prepare_write
   operation.

 - Clear I_DIRTY_TIME in sync_lazytime for filesystems implementing
  ->sync_lazytime. Without this the flag stays set and may cause
   additional unnecessary calls during inode deactivation.

 - Increase tmpfs size in mount_setattr selftests. A recent commit
   bumped the ext4 image size to 2 GB but didn't adjust the tmpfs
   backing store, so mkfs.ext4 fails with ENOSPC writing metadata.

 - Fix an invalid folio access in iomap when i_blkbits matches the folio
   size but differs from the I/O granularity. The cur_folio pointer
   would not get invalidated and iomap_read_end() would still be called
   on it despite the IO helper owning it.

 - Fix hash_name() docstring.

 - Fix read abandonment during netfs retry where the subreq variable
   used for abandonment could be uninitialized on the first pass or
   point to a deleted subrequest on later passes.

 - Don't block sync for filesystems with no data integrity guarantees.
   Add a SB_I_NO_DATA_INTEGRITY superblock flag replacing the per-inode
   AS_NO_DATA_INTEGRITY mapping flag so sync kicks off writeback but
   doesn't wait for flusher threads. This fixes a suspend-to-RAM hang on
   fuse-overlayfs where the flusher thread blocks when the fuse daemon
   is frozen.

 - Fix a lockdep splat in iomap when reads fail. iomap_read_end_io()
   invokes fserror_report() which calls igrab() taking i_lock in hardirq
   context while i_lock is normally held with interrupts enabled. Kick
   failed read handling to a workqueue.

 - Remove the redundant netfs_io_stream::front member and use
   stream->subrequests.next instead, fixing a potential issue in the
   direct write code path.

* tag 'vfs-7.0-rc6.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  netfs: Fix the handling of stream->front by removing it
  iomap: fix lockdep complaint when reads fail
  writeback: don't block sync for filesystems with no data integrity guarantees
  netfs: Fix read abandonment during retry
  vfs: fix docstring of hash_name()
  iomap: fix invalid folio access when i_blkbits differs from I/O granularity
  selftests/mount_setattr: increase tmpfs size for idmapped mount tests
  fs: clear I_DIRTY_TIME in sync_lazytime
  netfs: Fix NULL pointer dereference in netfs_unbuffered_write() on retry
  netfs: Fix kernel BUG in netfs_limit_iter() for ITER_KVEC iterators
2026-03-29 15:24:28 -07:00
Linus Torvalds
fc9eae25ec Merge tag 'phy-fixes-7.0' of git://git.kernel.org/pub/scm/linux/kernel/git/phy/linux-phy
Pull phy fixes from Vinod Koul:

 - Qualcomm PCS table fix for ufs phy

 - TI device node reference fix

 - Common prop kconfig fix

 - lynx CDR lock workaround for lanes disabled

 - usb disconnect function fix of k1 driver

* tag 'phy-fixes-7.0' of git://git.kernel.org/pub/scm/linux/kernel/git/phy/linux-phy:
  phy: qcom: qmp-ufs: Fix SM8650 PCS table for Gear 4
  phy: ti: j721e-wiz: Fix device node reference leak in wiz_get_lane_phy_types()
  phy: k1-usb: add disconnect function support
  phy: lynx-28g: skip CDR lock workaround for lanes disabled in the device tree
  phy: make PHY_COMMON_PROPS Kconfig symbol conditionally user-selectable
2026-03-29 12:48:52 -07:00
Linus Torvalds
a516c618a6 Merge tag 'dmaengine-fix-7.0' of git://git.kernel.org/pub/scm/linux/kernel/git/vkoul/dmaengine
Pull dmaengine fixes from Vinod Koul:
 "A bunch of driver fixes with idxd ones being the biggest:

   - Xilinx regmap init error handling, dma_device directions, residue
     calculation, and reset related timeout fixes

   - Renesas CHCTRL updates and driver list fixes

   - DW HDMA cycle bits and MSI data programming fix

   - IDXD pile of fixes for memeory leak and FLR fixes"

* tag 'dmaengine-fix-7.0' of git://git.kernel.org/pub/scm/linux/kernel/git/vkoul/dmaengine: (21 commits)
  dmaengine: xilinx_dma: Fix reset related timeout with two-channel AXIDMA
  dmaengine: xilinx: xilinx_dma: Fix unmasked residue subtraction
  dmaengine: xilinx: xilinx_dma: Fix residue calculation for cyclic DMA
  dmaengine: xilinx: xilinx_dma: Fix dma_device directions
  dmaengine: sh: rz-dmac: Move CHCTRL updates under spinlock
  dmaengine: sh: rz-dmac: Protect the driver specific lists
  dmaengine: idxd: fix possible wrong descriptor completion in llist_abort_desc()
  dmaengine: xilinx: xdma: Fix regmap init error handling
  dmaengine: dw-edma: Fix multiple times setting of the CYCLE_STATE and CYCLE_BIT bits for HDMA.
  dmaengine: idxd: Fix leaking event log memory
  dmaengine: idxd: Fix freeing the allocated ida too late
  dmaengine: idxd: Fix memory leak when a wq is reset
  dmaengine: idxd: Fix not releasing workqueue on .release()
  dmaengine: idxd: Wait for submitted operations on .device_synchronize()
  dmaengine: idxd: Flush all pending descriptors
  dmaengine: idxd: Flush kernel workqueues on Function Level Reset
  dmaengine: idxd: Fix possible invalid memory access after FLR
  dmaengine: idxd: Fix crash when the event log is disabled
  dmaengine: idxd: Fix lockdep warnings when calling idxd_device_config()
  dmaengine: dw-edma: fix MSI data programming for multi-IRQ case
  ...
2026-03-29 12:42:31 -07:00
Linus Torvalds
32ee88daf7 Merge tag 'i2c-for-7.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux
Pull i2c fixes from Wolfram Sang:

 - designware: fix resume-probe race causing NULL-deref in amdisp

 - imx: fix timeout on repeated reads and extra clock at end

 - MAINTAINERS: drop outdated I2C website

* tag 'i2c-for-7.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux:
  MAINTAINERS: drop outdated I2C website
  i2c: designware: amdisp: Fix resume-probe race condition issue
  i2c: imx: ensure no clock is generated after last read
  i2c: imx: fix i2c issue when reading multiple messages
2026-03-29 12:27:13 -07:00
Linus Torvalds
ac354b5cb0 Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini:
 "s390:

   - Lots of small and not-so-small fixes for the newly rewritten gmap,
     mostly affecting the handling of nested guests.

  x86:

   - Fix an issue with shadow paging, which causes KVM to install an
     MMIO PTE in the shadow page tables without first zapping a non-MMIO
     SPTE if KVM didn't see the write that modified the shadowed guest
     PTE.

     While commit a54aa15c6b ("KVM: x86/mmu: Handle MMIO SPTEs
     directly in mmu_set_spte()") was right about it being impossible to
     miss such a write if it was coming from the guest, it failed to
     account for writes to guest memory that are outside the scope of
     KVM: if userspace modifies the guest PTE, and then the guest hits a
     relevant page fault, KVM will get confused"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: x86/mmu: Only WARN in direct MMUs when overwriting shadow-present SPTE
  KVM: x86/mmu: Drop/zap existing present SPTE even when creating an MMIO SPTE
  KVM: s390: Fix KVM_S390_VCPU_FAULT ioctl
  KVM: s390: vsie: Fix guest page tables protection
  KVM: s390: vsie: Fix unshadowing while shadowing
  KVM: s390: vsie: Fix refcount overflow for shadow gmaps
  KVM: s390: vsie: Fix nested guest memory shadowing
  KVM: s390: Correctly handle guest mappings without struct page
  KVM: s390: Fix gmap_link()
  KVM: s390: vsie: Fix check for pre-existing shadow mapping
  KVM: s390: Remove non-atomic dat_crstep_xchg()
  KVM: s390: vsie: Fix dat_split_ste()
2026-03-29 11:58:47 -07:00
Linus Torvalds
b8a3bc8567 Merge tag 'for-linus-7.0a-rc6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
Pull xen fix from Juergen Gross:
 "A single fix for a very rare bug introduced in rc5"

* tag 'for-linus-7.0a-rc6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
  xen/privcmd: unregister xenstore notifier on module exit
2026-03-29 11:51:37 -07:00
Linus Torvalds
f242ac4a09 Merge tag 'x86-urgent-2026-03-29' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Ingo Molnar:

 - Fix an early boot crash in AMD SEV-SNP guests, caused by incorrect
   FSGSBASE init ordering (Nikunj A Dadhania)

 - Remove X86_CR4_FRED from the CR4 pinned bits mask, to fix a race
   window during the bootup of SEV-{ES,SNP} or TDX guests, which can
   crash them if they trigger exceptions in that window (Borislav
   Petkov)

 - Fix early boot failures on SEV-ES/SNP guests, due to incorrect early
   GHCB access (Nikunj A Dadhania)

 - Add clarifying comment to the CRn pinning logic, to avoid future
   confusion & bugs (Peter Zijlstra)

* tag 'x86-urgent-2026-03-29' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/cpu: Add comment clarifying CRn pinning
  x86/fred: Fix early boot failures on SEV-ES/SNP guests
  x86/cpu: Remove X86_CR4_FRED from the CR4 pinned bits mask
  x86/cpu: Enable FSGSBASE early in cpu_init_exception_handling()
2026-03-29 10:04:37 -07:00
Linus Torvalds
47e3f23f0e Merge tag 'timers-urgent-2026-03-29' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull timer fix from Ingo Molnar:
 "Fix an argument order bug in the alarm timer forwarding logic, which
  may cause missed expirations or incorrect overrun accounting"

* tag 'timers-urgent-2026-03-29' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  alarmtimer: Fix argument order in alarm_timer_forward()
2026-03-29 10:02:38 -07:00
Linus Torvalds
f087b0bad4 Merge tag 'locking-urgent-2026-03-29' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull futex fixes from Ingo Molnar:

 - Tighten up the sys_futex_requeue() ABI a bit, to disallow dissimilar
   futex flags and potential UaF access (Peter Zijlstra)

 - Fix UaF between futex_key_to_node_opt() and vma_replace_policy()
   (Hao-Yu Yang)

 - Clear stale exiting pointer in futex_lock_pi() retry path, which
   triggered a warning (and potential misbehavior) in stress-testing
   (Davidlohr Bueso)

* tag 'locking-urgent-2026-03-29' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  futex: Clear stale exiting pointer in futex_lock_pi() retry path
  futex: Fix UaF between futex_key_to_node_opt() and vma_replace_policy()
  futex: Require sys_futex_requeue() to have identical flags
2026-03-29 09:59:46 -07:00
Linus Torvalds
21047b17b3 Merge tag 'irq-urgent-2026-03-29' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull irq fixes from Ingo Molnar:

 - Fix TX completion signaling bug in the Qualcomm MPM irqchip driver

 - Fix probe error handling in the Renesas RZ/V2H(P) irqchip driver

* tag 'irq-urgent-2026-03-29' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  irqchip/renesas-rzv2h: Fix error path in rzv2h_icu_probe_common()
  irqchip/qcom-mpm: Add missing mailbox TX done acknowledgment
2026-03-29 09:53:01 -07:00
Linus Torvalds
a3d97d1d3f Merge tag 'ovl-fixes-7.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/overlayfs/vfs
Pull overlayfs fixes from Amir Goldstein:

 - Fix regression in 'xino' feature detection

   I clumsily introduced this regression myself when working on another
   subsystem (fsnotify). Both the regression and the fix have almost no
   visible impact on users except for some kmsg prints.

 - Fix to performance regression in v6.12.

   This regression was reported by Google COS developers.

   It is not uncommon these days for the year-old mature LTS to get
   adopted by distros and get exposed to many new workloads. We made a
   sub-smart move of making a behavior change in v6.12 which could
   impact performance, without making it opt-in. Fixing this mistake
   retroactively, to be picked by LTS.

* tag 'ovl-fixes-7.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/overlayfs/vfs:
  ovl: make fsync after metadata copy-up opt-in mount option
  ovl: fix wrong detection of 32bit inode numbers
2026-03-29 09:34:50 -07:00
Linus Torvalds
241d4ca15d Merge tag 'ext4_for_linus-7.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 fixes from Ted Ts'o:

 - Update the MAINTAINERS file to add reviewers for the ext4 file system

 - Add a test issue an ext4 warning (not a WARN_ON) if there are still
   dirty pages attached to an evicted inode.

 - Fix a number of Syzkaller issues

 - Fix memory leaks on error paths

 - Replace some BUG and WARN with EFSCORRUPTED reporting

 - Fix a potential crash when disabling discard via remount followed by
   an immediate unmount. (Found by Sashiko)

 - Fix a corner case which could lead to allocating blocks for an
   indirect-mapped inode block numbers > 2**32

 - Fix a race when reallocating a freed inode that could result in a
   deadlock

 - Fix a user-after-free in update_super_work when racing with umount

 - Fix build issues when trying to build ext4's kunit tests as a module

 - Fix a bug where ext4_split_extent_zeroout() could fail to pass back
   an error from ext4_ext_dirty()

 - Avoid allocating blocks from a corrupted block group in
   ext4_mb_find_by_goal()

 - Fix a percpu_counters list corruption BUG triggered by an ext4
   extents kunit

 - Fix a potetial crash caused by the fast commit flush path potentially
   accessing the jinode structure before it is fully initialized

 - Fix fsync(2) in no-journal mode to make sure the dirtied inode is
   write to storage

 - Fix a bug when in no-journal mode, when ext4 tries to avoid using
   recently deleted inodes, if lazy itable initialization is enabled,
   can lead to an unitialized inode getting skipped and triggering an
   e2fsck complaint

 - Fix journal credit calculation when setting an xattr when both the
   encryption and ea_inode feeatures are enabled

 - Fix corner cases which could result in stale xarray tags after
   writeback

 - Fix generic/475 failures caused by ENOSPC errors while creating a
   symlink when the system crashes resulting to a file system
   inconsistency when replaying the fast commit journal

* tag 'ext4_for_linus-7.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (27 commits)
  ext4: always drain queued discard work in ext4_mb_release()
  ext4: handle wraparound when searching for blocks for indirect mapped blocks
  ext4: skip split extent recovery on corruption
  ext4: fix iloc.bh leak in ext4_fc_replay_inode() error paths
  ext4: fix deadlock on inode reallocation
  ext4: fix use-after-free in update_super_work when racing with umount
  ext4: fix the might_sleep() warnings in kvfree()
  ext4: reject mount if bigalloc with s_first_data_block != 0
  ext4: fix extents-test.c is not compiled when EXT4_KUNIT_TESTS=M
  ext4: fix mballoc-test.c is not compiled when EXT4_KUNIT_TESTS=M
  ext4: introduce EXPORT_SYMBOL_FOR_EXT4_TEST() helper
  jbd2: gracefully abort on checkpointing state corruptions
  ext4: avoid infinite loops caused by residual data
  ext4: validate p_idx bounds in ext4_ext_correct_indexes
  ext4: test if inode's all dirty pages are submitted to disk
  ext4: minor fix for ext4_split_extent_zeroout()
  ext4: avoid allocate block from corrupted group in ext4_mb_find_by_goal()
  ext4: kunit: extents-test: lix percpu_counters list corruption
  ext4: publish jinode after initialization
  ext4: replace BUG_ON with proper error handling in ext4_read_inline_folio
  ...
2026-03-29 09:30:06 -07:00
Takashi Iwai
277c6960d4 ALSA: ctxfi: Check the error for index mapping
The ctxfi driver blindly assumed a proper value returned from
daio_device_index(), but it's not always true.  Add a proper error
check to deal with the error from the function.

Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/87cy149n6k.wl-tiwai@suse.de
Link: https://patch.msgid.link/20260329091240.420194-2-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-03-29 11:13:53 +02:00
Takashi Iwai
b045ab3dff ALSA: ctxfi: Fix missing SPDIFI1 index handling
SPDIF1 DAIO type isn't properly handled in daio_device_index() for
hw20k2, and it returned -EINVAL, which ended up with the out-of-bounds
array access.  Follow the hw20k1 pattern and return the proper index
for this type, too.

Reported-and-tested-by: Karsten Hohmeier <linux@hohmatik.de>
Closes: https://lore.kernel.org/20260315155004.15633-1-linux@hohmatik.de
Cc: <stable@vger.kernel.org>
Link: https://patch.msgid.link/20260329091240.420194-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-03-29 11:13:42 +02:00
Linus Torvalds
b51ad67773 Merge tag 'for-7.0-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs fixes from David Sterba:
 "A few more fixes. There's one that stands out in size as it fixes an
  edge case in fsync.

   - fix issue on fsync where file with zero size appears as a non-zero
     after log replay

   - in zlib compression, handle a crash when data alignment causes
     folio reference issues

   - fix possible crash with enabled tracepoints on a overlayfs mount

   - handle device stats update error

   - on zoned filesystems, fix kobject leak on sub-block groups

   - fix super block offset in an error message in validation"

* tag 'for-7.0-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: fix lost error when running device stats on multiple devices fs
  btrfs: tracepoints: get correct superblock from dentry in event btrfs_sync_file()
  btrfs: zlib: handle page aligned compressed size correctly
  btrfs: fix leak of kobject name for sub-group space_info
  btrfs: fix zero size inode with non-zero size after log replay
  btrfs: fix super block offset in error message in btrfs_validate_super()
2026-03-28 15:23:03 -07:00
Linus Torvalds
0bcb517f0a Merge tag 'mm-hotfixes-stable-2026-03-28-10-45' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull misc fixes from Andrew Morton:
 "10 hotfixes.  8 are cc:stable.  9 are for MM.

  There's a 3-patch series of DAMON fixes from Josh Law and SeongJae
  Park. The rest are singletons - please see the changelogs for details"

* tag 'mm-hotfixes-stable-2026-03-28-10-45' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm:
  mm/mseal: update VMA end correctly on merge
  bug: avoid format attribute warning for clang as well
  mm/pagewalk: fix race between concurrent split and refault
  mm/memory: fix PMD/PUD checks in follow_pfnmap_start()
  mm/damon/sysfs: check contexts->nr in repeat_call_fn
  mm/damon/sysfs: check contexts->nr before accessing contexts_arr[0]
  mm/damon/sysfs: fix param_ctx leak on damon_sysfs_new_test_ctx() failure
  mm/swap: fix swap cache memcg accounting
  MAINTAINERS, mailmap: update email address for Harry Yoo
  mm/huge_memory: fix folio isn't locked in softleaf_to_folio()
2026-03-28 14:19:55 -07:00
Wolfram Sang
b0faf733fc MAINTAINERS: drop outdated I2C website
As stated on the website: "This wiki has been archived and the content
is no longer updated." No need to reference it.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
2026-03-28 20:31:33 +01:00
Linus Torvalds
cbfffcca2b Merge tag 'trace-v7.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace
Pull tracing fixes from Steven Rostedt:

 - Fix potential deadlock in osnoise and hotplug

   The interface_lock can be called by a osnoise thread and the CPU
   shutdown logic of osnoise can wait for this thread to finish. But
   cpus_read_lock() can also be taken while holding the interface_lock.
   This produces a circular lock dependency and can cause a deadlock.

   Swap the ordering of cpus_read_lock() and the interface_lock to have
   interface_lock taken within the cpus_read_lock() context to prevent
   this circular dependency.

 - Fix freeing of event triggers in early boot up

   If the same trigger is added on the kernel command line, the second
   one will fail to be applied and the trigger created will be freed.
   This calls into the deferred logic and creates a kernel thread to do
   the freeing. But the command line logic is called before kernel
   threads can be created and this leads to a NULL pointer dereference.

   Delay freeing event triggers until late init.

* tag 'trace-v7.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace:
  tracing: Drain deferred trigger frees if kthread creation fails
  tracing: Fix potential deadlock in cpu hotplug with osnoise
2026-03-28 09:59:09 -07:00
Linus Torvalds
e522b75c44 Merge tag 's390-7.0-6' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
Pull s390 fixes from Vasily Gorbik:

 - Add array_index_nospec() to syscall dispatch table lookup to prevent
   limited speculative out-of-bounds access with user-controlled syscall
   number

 - Mark array_index_mask_nospec() __always_inline since GCC may emit an
   out-of-line call instead of the inline data dependency sequence the
   mitigation relies on

 - Clear r12 on kernel entry to prevent potential speculative use of
   user value in system_call, ext/io/mcck interrupt handlers

* tag 's390-7.0-6' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux:
  s390/entry: Scrub r12 register on kernel entry
  s390/syscalls: Add spectre boundary for syscall dispatch table
  s390/barrier: Make array_index_mask_nospec() __always_inline
2026-03-28 09:50:11 -07:00
Davidlohr Bueso
210d36d892 futex: Clear stale exiting pointer in futex_lock_pi() retry path
Fuzzying/stressing futexes triggered:

    WARNING: kernel/futex/core.c:825 at wait_for_owner_exiting+0x7a/0x80, CPU#11: futex_lock_pi_s/524

When futex_lock_pi_atomic() sees the owner is exiting, it returns -EBUSY
and stores a refcounted task pointer in 'exiting'.

After wait_for_owner_exiting() consumes that reference, the local pointer
is never reset to nil. Upon a retry, if futex_lock_pi_atomic() returns a
different error, the bogus pointer is passed to wait_for_owner_exiting().

  CPU0			     CPU1		       CPU2
  futex_lock_pi(uaddr)
  // acquires the PI futex
  exit()
    futex_cleanup_begin()
      futex_state = EXITING;
			     futex_lock_pi(uaddr)
			       futex_lock_pi_atomic()
				 attach_to_pi_owner()
				   // observes EXITING
				   *exiting = owner;  // takes ref
				   return -EBUSY
			       wait_for_owner_exiting(-EBUSY, owner)
				 put_task_struct();   // drops ref
			       // exiting still points to owner
			       goto retry;
			       futex_lock_pi_atomic()
				 lock_pi_update_atomic()
				   cmpxchg(uaddr)
					*uaddr ^= WAITERS // whatever
				   // value changed
				 return -EAGAIN;
			       wait_for_owner_exiting(-EAGAIN, exiting) // stale
				 WARN_ON_ONCE(exiting)

Fix this by resetting upon retry, essentially aligning it with requeue_pi.

Fixes: 3ef240eaff ("futex: Prevent exit livelock")
Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Cc: stable@vger.kernel.org
Link: https://patch.msgid.link/20260326001759.4129680-1-dave@stgolabs.net
2026-03-28 13:54:02 +01:00
Wesley Atwell
250ab25391 tracing: Drain deferred trigger frees if kthread creation fails
Boot-time trigger registration can fail before the trigger-data cleanup
kthread exists. Deferring those frees until late init is fine, but the
post-boot fallback must still drain the deferred list if kthread
creation never succeeds.

Otherwise, boot-deferred nodes can accumulate on
trigger_data_free_list, later frees fall back to synchronously freeing
only the current object, and the older queued entries are leaked
forever.

To trigger this, add the following to the kernel command line:

  trace_event=sched_switch trace_trigger=sched_switch.traceon,sched_switch.traceon

The second traceon trigger will fail and be freed. This triggers a NULL
pointer dereference and crashes the kernel.

Keep the deferred boot-time behavior, but when kthread creation fails,
drain the whole queued list synchronously. Do the same in the late-init
drain path so queued entries are not stranded there either.

Cc: stable@vger.kernel.org
Link: https://patch.msgid.link/20260324221326.1395799-3-atwellwea@gmail.com
Fixes: 61d445af0a ("tracing: Add bulk garbage collection of freeing event_trigger_data")
Signed-off-by: Wesley Atwell <atwellwea@gmail.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2026-03-28 08:32:44 -04:00
Sourav Nayak
1fbf85dbf0 ALSA: hda/realtek: add quirk for HP Victus 15-fb0xxx
This adds a mute led quirck for HP Victus 15-fb0xxx (103c:8a3d) model

- As it used 0x8(full bright)/0x7f(little dim) for mute led on and other
  values as 0ff (0x0, 0x4, ...)

- So, use ALC245_FIXUP_HP_MUTE_LED_V2_COEFBIT insted for safer approach

Cc: <stable@vger.kernel.org>
Signed-off-by: Sourav Nayak <nonameblank007@gmail.com>
Link: https://patch.msgid.link/20260327142805.17139-1-nonameblank007@gmail.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-03-28 10:54:19 +01:00
Stuart Hayhurst
696b0a9bd2 ALSA: hda/intel: Add MSI X870E Tomahawk to denylist by DMI ID
This motherboard uses USB audio instead, causing this driver to complain
about "no codecs found!".
Add it to the denylist to silence the warning.

The first attempt only matched on the PCI device, but this caused issues
for some laptops, so DMI match against the board as well.

Signed-off-by: Stuart Hayhurst <stuart.a.hayhurst@gmail.com>
Link: https://patch.msgid.link/20260327155737.21818-2-stuart.a.hayhurst@gmail.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-03-28 10:32:53 +01:00
Dustin L. Howett
bac1e57adf ALSA: hda/realtek: add quirk for Framework F111:000F
Similar to commit 7b509910b3 ("ALSA hda/realtek: Add quirk for
Framework F111:000C") and previous quirks for Framework systems with
Realtek codecs.

000F is another new platform with an ALC285 which needs the same quirk.

Signed-off-by: Dustin L. Howett <dustin@howett.net>
Link: https://patch.msgid.link/20260327-framework-alsa-000f-v1-1-74013aba1c00@howett.net
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-03-28 10:31:30 +01:00
Phil Willoughby
bc5b4e5ae1 ALSA: usb-audio: Fix quirk flags for NeuralDSP Quad Cortex
The NeuralDSP Quad Cortex does not support DSD playback. We need
this product-specific entry with zero quirks because otherwise it
falls through to the vendor-specific entry which marks it as
supporting DSD playback.

Cc: Yue Wang <yuleopen@gmail.com>
Cc: Jaroslav Kysela <perex@perex.cz>
Cc: Takashi Iwai <tiwai@suse.com>
Signed-off-by: Phil Willoughby <willerz@gmail.com>
Link: https://patch.msgid.link/20260328080921.3310-1-willerz@gmail.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-03-28 09:58:44 +01:00
Jakub Kicinski
dc9e9d61e3 Merge branch 'net-enetc-add-more-checks-to-enetc_set_rxfh'
Wei Fang says:

====================
net: enetc: add more checks to enetc_set_rxfh()

ENETC only supports Toeplitz algorithm, and VFs do not support setting
the RSS key, but enetc_set_rxfh() does not check these constraints and
silently accepts unsupported configurations. This may mislead users or
tools into believing that the requested RSS settings have been
successfully applied. So add checks to reject unsupported hash functions
and RSS key updates on VFs, and return "-EOPNOTSUPP" to user space.
====================

Link: https://patch.msgid.link/20260326075233.3628047-1-wei.fang@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-27 20:56:49 -07:00
Wei Fang
a142d13916 net: enetc: do not allow VF to configure the RSS key
VFs do not have privilege to configure the RSS key because the registers
are owned by the PF. Currently, if VF attempts to configure the RSS key,
enetc_set_rxfh() simply skips the configuration and does not generate a
warning, which may mislead users into thinking the feature is supported.
To improve this situation, add a check to reject RSS key configuration
on VFs.

Fixes: d382563f54 ("enetc: Add RFS and RSS support")
Signed-off-by: Wei Fang <wei.fang@nxp.com>
Reviewed-by: Clark Wang <xiaoning.wang@nxp.com>
Reviewed-by: Claudiu Manoil <claudiu.manoil@nxp.com>
Link: https://patch.msgid.link/20260326075233.3628047-3-wei.fang@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-27 20:56:46 -07:00
Wei Fang
d389954a6c net: enetc: check whether the RSS algorithm is Toeplitz
Both ENETC v1 and v4 only provide Toeplitz RSS support. This patch adds
a validation check to reject attempts to configure other RSS algorithms,
avoiding misleading configuration options for users.

Fixes: d382563f54 ("enetc: Add RFS and RSS support")
Signed-off-by: Wei Fang <wei.fang@nxp.com>
Reviewed-by: Clark Wang <xiaoning.wang@nxp.com>
Reviewed-by: Claudiu Manoil <claudiu.manoil@nxp.com>
Link: https://patch.msgid.link/20260326075233.3628047-2-wei.fang@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-27 20:56:45 -07:00
Marek Behún
eeee5a710f net: sfp: Fix Ubiquiti U-Fiber Instant SFP module on mvneta
In commit 8110633db4 ("net: sfp-bus: allow SFP quirks to override
Autoneg and pause bits") we moved the setting of Autoneg and pause bits
before the call to SFP quirk when parsing SFP module support.

Since the quirk for Ubiquiti U-Fiber Instant SFP module zeroes the
support bits and sets 1000baseX_Full only, the above mentioned commit
changed the overall computed support from
  1000baseX_Full, Autoneg, Pause, Asym_Pause
to just
  1000baseX_Full.

This broke the SFP module for mvneta, which requires Autoneg for
1000baseX since commit c762b7fac1 ("net: mvneta: deny disabling
autoneg for 802.3z modes").

Fix this by setting back the Autoneg, Pause and Asym_Pause bits in the
quirk.

Fixes: 8110633db4 ("net: sfp-bus: allow SFP quirks to override Autoneg and pause bits")
Signed-off-by: Marek Behún <kabel@kernel.org>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Link: https://patch.msgid.link/20260326122038.2489589-1-kabel@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-27 20:55:52 -07:00
Lorenzo Stoakes (Oracle)
2697dd8ae7 mm/mseal: update VMA end correctly on merge
Previously we stored the end of the current VMA in curr_end, and then upon
iterating to the next VMA updated curr_start to curr_end to advance to the
next VMA.

However, this doesn't take into account the fact that a VMA might be
updated due to a merge by vma_modify_flags(), which can result in curr_end
being stale and thus, upon setting curr_start to curr_end, ending up with
an incorrect curr_start on the next iteration.

Resolve the issue by setting curr_end to vma->vm_end unconditionally to
ensure this value remains updated should this occur.

While we're here, eliminate this entire class of bug by simply setting
const curr_[start/end] to be clamped to the input range and VMAs, which
also happens to simplify the logic.

Link: https://lkml.kernel.org/r/20260327173104.322405-1-ljs@kernel.org
Fixes: 6c2da14ae1 ("mm/mseal: rework mseal apply logic")
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Reported-by: Antonius <antonius@bluedragonsec.com>
Closes: https://lore.kernel.org/linux-mm/CAK8a0jwWGj9-SgFk0yKFh7i8jMkwKm5b0ao9=kmXWjO54veX2g@mail.gmail.com/
Suggested-by: David Hildenbrand (ARM) <david@kernel.org>
Acked-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
Reviewed-by: Pedro Falcato <pfalcato@suse.de>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Cc: Jann Horn <jannh@google.com>
Cc: Jeff Xu <jeffxu@chromium.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-03-27 20:48:38 -07:00
Arnd Bergmann
2598ab9d63 bug: avoid format attribute warning for clang as well
Like gcc, clang-22 now also warns about a function that it incorrectly
identifies as a printf-style format:

lib/bug.c:190:22: error: diagnostic behavior may be improved by adding the 'format(printf, 1, 0)' attribute to the declaration of '__warn_printf' [-Werror,-Wmissing-format-attribute]
  179 | static void __warn_printf(const char *fmt, struct pt_regs *regs)
      | __attribute__((format(printf, 1, 0)))
  180 | {
  181 |         if (!fmt)
  182 |                 return;
  183 |
  184 | #ifdef HAVE_ARCH_BUG_FORMAT_ARGS
  185 |         if (regs) {
  186 |                 struct arch_va_list _args;
  187 |                 va_list *args = __warn_args(&_args, regs);
  188 |
  189 |                 if (args) {
  190 |                         vprintk(fmt, *args);
      |                                           ^

Revert the change that added a gcc-specific workaround, and instead add
the generic annotation that avoid the warning.

Link: https://lkml.kernel.org/r/20260323205534.1284284-1-arnd@kernel.org
Fixes: d36067d6ea ("bug: Hush suggest-attribute=format for __warn_printf()")
Suggested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Suggested-by: Brendan Jackman <jackmanb@google.com>
Link: https://lore.kernel.org/all/20251208141618.2805983-1-andriy.shevchenko@linux.intel.com/T/#u
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Brendan Jackman <jackmanb@google.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Bill Wendling <morbo@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Justin Stitt <justinstitt@google.com>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-03-27 20:48:38 -07:00
Max Boone
3b89863c3f mm/pagewalk: fix race between concurrent split and refault
The splitting of a PUD entry in walk_pud_range() can race with a
concurrent thread refaulting the PUD leaf entry causing it to try walking
a PMD range that has disappeared.

An example and reproduction of this is to try reading numa_maps of a
process while VFIO-PCI is setting up DMA (specifically the
vfio_pin_pages_remote call) on a large BAR for that process.

This will trigger a kernel BUG:
vfio-pci 0000:03:00.0: enabling device (0000 -> 0002)
BUG: unable to handle page fault for address: ffffa23980000000
PGD 0 P4D 0
Oops: Oops: 0000 [#1] SMP NOPTI
...
RIP: 0010:walk_pgd_range+0x3b5/0x7a0
Code: 8d 43 ff 48 89 44 24 28 4d 89 ce 4d 8d a7 00 00 20 00 48 8b 4c 24
28 49 81 e4 00 00 e0 ff 49 8d 44 24 ff 48 39 c8 4c 0f 43 e3 <49> f7 06
   9f ff ff ff 75 3b 48 8b 44 24 20 48 8b 40 28 48 85 c0 74
RSP: 0018:ffffac23e1ecf808 EFLAGS: 00010287
RAX: 00007f44c01fffff RBX: 00007f4500000000 RCX: 00007f44ffffffff
RDX: 0000000000000000 RSI: 000ffffffffff000 RDI: ffffffff93378fe0
RBP: ffffac23e1ecf918 R08: 0000000000000004 R09: ffffa23980000000
R10: 0000000000000020 R11: 0000000000000004 R12: 00007f44c0200000
R13: 00007f44c0000000 R14: ffffa23980000000 R15: 00007f44c0000000
FS:  00007fe884739580(0000) GS:ffff9b7d7a9c0000(0000)
knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: ffffa23980000000 CR3: 000000c0650e2005 CR4: 0000000000770ef0
PKRU: 55555554
Call Trace:
 <TASK>
 __walk_page_range+0x195/0x1b0
 walk_page_vma+0x62/0xc0
 show_numa_map+0x12b/0x3b0
 seq_read_iter+0x297/0x440
 seq_read+0x11d/0x140
 vfs_read+0xc2/0x340
 ksys_read+0x5f/0xe0
 do_syscall_64+0x68/0x130
 ? get_page_from_freelist+0x5c2/0x17e0
 ? mas_store_prealloc+0x17e/0x360
 ? vma_set_page_prot+0x4c/0xa0
 ? __alloc_pages_noprof+0x14e/0x2d0
 ? __mod_memcg_lruvec_state+0x8d/0x140
 ? __lruvec_stat_mod_folio+0x76/0xb0
 ? __folio_mod_stat+0x26/0x80
 ? do_anonymous_page+0x705/0x900
 ? __handle_mm_fault+0xa8d/0x1000
 ? __count_memcg_events+0x53/0xf0
 ? handle_mm_fault+0xa5/0x360
 ? do_user_addr_fault+0x342/0x640
 ? arch_exit_to_user_mode_prepare.constprop.0+0x16/0xa0
 ? irqentry_exit_to_user_mode+0x24/0x100
 entry_SYSCALL_64_after_hwframe+0x76/0x7e
RIP: 0033:0x7fe88464f47e
Code: c0 e9 b6 fe ff ff 50 48 8d 3d be 07 0b 00 e8 69 01 02 00 66 0f 1f
84 00 00 00 00 00 64 8b 04 25 18 00 00 00 85 c0 75 14 0f 05 <48> 3d 00
   f0 ff ff 77 5a c3 66 0f 1f 84 00 00 00 00 00 48 83 ec 28
RSP: 002b:00007ffe6cd9a9b8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000
RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007fe88464f47e
RDX: 0000000000020000 RSI: 00007fe884543000 RDI: 0000000000000003
RBP: 00007fe884543000 R08: 00007fe884542010 R09: 0000000000000000
R10: fffffffffffffbc5 R11: 0000000000000246 R12: 0000000000000000
R13: 0000000000000003 R14: 0000000000020000 R15: 0000000000020000
 </TASK>

Fix this by validating the PUD entry in walk_pmd_range() using a stable
snapshot (pudp_get()).  If the PUD is not present or is a leaf, retry the
walk via ACTION_AGAIN instead of descending further.  This mirrors the
retry logic in walk_pte_range(), which lets walk_pmd_range() retry if the
PTE is not being got by pte_offset_map_lock().

Link: https://lkml.kernel.org/r/20260325-pagewalk-check-pmd-refault-v2-1-707bff33bc60@akamai.com
Fixes: f9e54c3a2f ("vfio/pci: implement huge_fault support")
Co-developed-by: David Hildenbrand (Arm) <david@kernel.org>
Signed-off-by: David Hildenbrand (Arm) <david@kernel.org>
Signed-off-by: Max Boone <mboone@akamai.com>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-03-27 20:48:38 -07:00
David Hildenbrand (Arm)
ffef67b93a mm/memory: fix PMD/PUD checks in follow_pfnmap_start()
follow_pfnmap_start() suffers from two problems:

(1) We are not re-fetching the pmd/pud after taking the PTL

Therefore, we are not properly stabilizing what the lock actually
protects.  If there is concurrent zapping, we would indicate to the
caller that we found an entry, however, that entry might already have
been invalidated, or contain a different PFN after taking the lock.

Properly use pmdp_get() / pudp_get() after taking the lock.

(2) pmd_leaf() / pud_leaf() are not well defined on non-present entries

pmd_leaf()/pud_leaf() could wrongly trigger on non-present entries.

There is no real guarantee that pmd_leaf()/pud_leaf() returns something
reasonable on non-present entries.  Most architectures indeed either
perform a present check or make it work by smart use of flags.

However, for example loongarch checks the _PAGE_HUGE flag in pmd_leaf(),
and always sets the _PAGE_HUGE flag in __swp_entry_to_pmd().  Whereby
pmd_trans_huge() explicitly checks pmd_present(), pmd_leaf() does not do
that.

Let's check pmd_present()/pud_present() before assuming "the is a present
PMD leaf" when spotting pmd_leaf()/pud_leaf(), like other page table
handling code that traverses user page tables does.

Given that non-present PMD entries are likely rare in VM_IO|VM_PFNMAP, (1)
is likely more relevant than (2).  It is questionable how often (1) would
actually trigger, but let's CC stable to be sure.

This was found by code inspection.

Link: https://lkml.kernel.org/r/20260323-follow_pfnmap_fix-v1-1-5b0ec10872b3@kernel.org
Fixes: 6da8e9634b ("mm: new follow_pfnmap API")
Signed-off-by: David Hildenbrand (Arm) <david@kernel.org>
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-03-27 20:48:38 -07:00
Josh Law
6557004a8b mm/damon/sysfs: check contexts->nr in repeat_call_fn
damon_sysfs_repeat_call_fn() calls damon_sysfs_upd_tuned_intervals(),
damon_sysfs_upd_schemes_stats(), and
damon_sysfs_upd_schemes_effective_quotas() without checking contexts->nr. 
If nr_contexts is set to 0 via sysfs while DAMON is running, these
functions dereference contexts_arr[0] and cause a NULL pointer
dereference.  Add the missing check.

For example, the issue can be reproduced using DAMON sysfs interface and
DAMON user-space tool (damo) [1] like below.

    $ sudo damo start --refresh_interval 1s
    $ echo 0 | sudo tee \
            /sys/kernel/mm/damon/admin/kdamonds/0/contexts/nr_contexts

Link: https://patch.msgid.link/20260320163559.178101-3-objecting@objecting.org
Link: https://lkml.kernel.org/r/20260321175427.86000-4-sj@kernel.org
Link: https://github.com/damonitor/damo [1]
Fixes: d809a7c64b ("mm/damon/sysfs: implement refresh_ms file internal work")
Signed-off-by: Josh Law <objecting@objecting.org>
Reviewed-by: SeongJae Park <sj@kernel.org>
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: <stable@vger.kernel.org>	[6.17+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-03-27 20:48:38 -07:00
Josh Law
1bfe9fb5ed mm/damon/sysfs: check contexts->nr before accessing contexts_arr[0]
Multiple sysfs command paths dereference contexts_arr[0] without first
verifying that kdamond->contexts->nr == 1.  A user can set nr_contexts to
0 via sysfs while DAMON is running, causing NULL pointer dereferences.

In more detail, the issue can be triggered by privileged users like
below.

First, start DAMON and make contexts directory empty
(kdamond->contexts->nr == 0).

    # damo start
    # cd /sys/kernel/mm/damon/admin/kdamonds/0
    # echo 0 > contexts/nr_contexts

Then, each of below commands will cause the NULL pointer dereference.

    # echo update_schemes_stats > state
    # echo update_schemes_tried_regions > state
    # echo update_schemes_tried_bytes > state
    # echo update_schemes_effective_quotas > state
    # echo update_tuned_intervals > state

Guard all commands (except OFF) at the entry point of
damon_sysfs_handle_cmd().

Link: https://lkml.kernel.org/r/20260321175427.86000-3-sj@kernel.org
Fixes: 0ac32b8aff ("mm/damon/sysfs: support DAMOS stats")
Signed-off-by: Josh Law <objecting@objecting.org>
Reviewed-by: SeongJae Park <sj@kernel.org>
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: <stable@vger.kernel.org>	[5.18+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-03-27 20:48:38 -07:00
Josh Law
7fe000eb32 mm/damon/sysfs: fix param_ctx leak on damon_sysfs_new_test_ctx() failure
Patch series "mm/damon/sysfs: fix memory leak and NULL dereference
issues", v4.

DAMON_SYSFS can leak memory under allocation failure, and do NULL pointer
dereference when a privileged user make wrong sequences of control.  Fix
those.


This patch (of 3):

When damon_sysfs_new_test_ctx() fails in damon_sysfs_commit_input(),
param_ctx is leaked because the early return skips the cleanup at the out
label.  Destroy param_ctx before returning.

Link: https://lkml.kernel.org/r/20260321175427.86000-1-sj@kernel.org
Link: https://lkml.kernel.org/r/20260321175427.86000-2-sj@kernel.org
Fixes: f0c5118ebb ("mm/damon/sysfs: catch commit test ctx alloc failure")
Signed-off-by: Josh Law <objecting@objecting.org>
Reviewed-by: SeongJae Park <sj@kernel.org>
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: <stable@vger.kernel.org>	[6.18+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-03-27 20:48:37 -07:00
Alexandre Ghiti
9e0d0ddfbc mm/swap: fix swap cache memcg accounting
The swap readahead path was recently refactored and while doing this, the
order between the charging of the folio in the memcg and the addition of
the folio in the swap cache was inverted.

Since the accounting of the folio is done while adding the folio to the
swap cache and the folio is not charged in the memcg yet, the accounting
is then done at the node level, which is wrong.

Fix this by charging the folio in the memcg before adding it to the swap cache.

Link: https://lkml.kernel.org/r/20260320050601.1833108-1-alex@ghiti.fr
Fixes: 2732acda82 ("mm, swap: use swap cache as the swap in synchronize layer")
Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
Acked-by: Kairui Song <kasong@tencent.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Nhat Pham <nphamcs@gmail.com>
Acked-by: Chris Li <chrisl@kernel.org>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Baoquan He <bhe@redhat.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Kemeng Shi <shikemeng@huaweicloud.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-03-27 20:48:37 -07:00
Harry Yoo (Oracle)
26d3dca201 MAINTAINERS, mailmap: update email address for Harry Yoo
Update my email address to harry@kernel.org.

Link: https://lkml.kernel.org/r/20260320125925.2259998-1-harry@kernel.org
Signed-off-by: Harry Yoo (Oracle) <harry@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-03-27 20:48:37 -07:00
Jinjiang Tu
4c5e7f0fcd mm/huge_memory: fix folio isn't locked in softleaf_to_folio()
On arm64 server, we found folio that get from migration entry isn't locked
in softleaf_to_folio().  This issue triggers when mTHP splitting and
zap_nonpresent_ptes() races, and the root cause is lack of memory barrier
in softleaf_to_folio().  The race is as follows:

	CPU0                                             CPU1

deferred_split_scan()                              zap_nonpresent_ptes()
  lock folio
  split_folio()
    unmap_folio()
      change ptes to migration entries
    __split_folio_to_order()                         softleaf_to_folio()
      set flags(including PG_locked) for tail pages    folio = pfn_folio(softleaf_to_pfn(entry))
      smp_wmb()                                        VM_WARN_ON_ONCE(!folio_test_locked(folio))
      prep_compound_page() for tail pages

In __split_folio_to_order(), smp_wmb() guarantees page flags of tail pages
are visible before the tail page becomes non-compound.  smp_wmb() should
be paired with smp_rmb() in softleaf_to_folio(), which is missed.  As a
result, if zap_nonpresent_ptes() accesses migration entry that stores tail
pfn, softleaf_to_folio() may see the updated compound_head of tail page
before page->flags.

This issue will trigger VM_WARN_ON_ONCE() in pfn_swap_entry_folio()
because of the race between folio split and zap_nonpresent_ptes()
leading to a folio incorrectly undergoing modification without a folio
lock being held.

This is a BUG_ON() before commit 93976a2034 ("mm: eliminate further
swapops predicates"), which in merged in v6.19-rc1.

To fix it, add missing smp_rmb() if the softleaf entry is migration entry
in softleaf_to_folio() and softleaf_to_page().

[tujinjiang@huawei.com: update function name and comments]
  Link: https://lkml.kernel.org/r/20260321075214.3305564-1-tujinjiang@huawei.com
Link: https://lkml.kernel.org/r/20260319012541.4158561-1-tujinjiang@huawei.com
Fixes: e9b61f1985 ("thp: reintroduce split_huge_page()")
Signed-off-by: Jinjiang Tu <tujinjiang@huawei.com>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Barry Song <baohua@kernel.org>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nanyong Sun <sunnanyong@huawei.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-03-27 20:48:37 -07:00
Xiang Mei
5d17af9eb2 selftests/tc-testing: add test for HFSC divide-by-zero in rtsc_min()
Add a regression test for the divide-by-zero in rtsc_min() triggered
when m2sm() converts a large m1 value (e.g. 32gbit) to a u64 scaled
slope reaching 2^32. rtsc_min() stores the difference of two such u64
values (sm1 - sm2) in a u32 variable `dsm`, truncating 2^32 to zero
and causing a divide-by-zero oops in the concave-curve intersection
path. The test configures an HFSC class with m1=32gbit d=1ms m2=0bit,
sends a packet to activate the class, waits for it to drain and go
idle, then sends another packet to trigger reactivation through
rtsc_min().

Signed-off-by: Xiang Mei <xmei5@asu.edu>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Reviewed-by: Victor Nogueira <victor@mojatatu.com>
Link: https://patch.msgid.link/20260326204310.1549327-2-xmei5@asu.edu
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-27 20:41:11 -07:00
Xiang Mei
4576100b8c net/sched: sch_hfsc: fix divide-by-zero in rtsc_min()
m2sm() converts a u32 slope to a u64 scaled value.  For large inputs
(e.g. m1=4000000000), the result can reach 2^32.  rtsc_min() stores
the difference of two such u64 values in a u32 variable `dsm` and
uses it as a divisor.  When the difference is exactly 2^32 the
truncation yields zero, causing a divide-by-zero oops in the
concave-curve intersection path:

  Oops: divide error: 0000
  RIP: 0010:rtsc_min (net/sched/sch_hfsc.c:601)
  Call Trace:
   init_ed (net/sched/sch_hfsc.c:629)
   hfsc_enqueue (net/sched/sch_hfsc.c:1569)
   [...]

Widen `dsm` to u64 and replace do_div() with div64_u64() so the full
difference is preserved.

Fixes: 1da177e4c3 ("Linux-2.6.12-rc2")
Reported-by: Weiming Shi <bestswngs@gmail.com>
Signed-off-by: Xiang Mei <xmei5@asu.edu>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://patch.msgid.link/20260326204310.1549327-1-xmei5@asu.edu
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-27 20:41:11 -07:00
Theodore Ts'o
9ee29d20aa ext4: always drain queued discard work in ext4_mb_release()
While reviewing recent ext4 patch[1], Sashiko raised the following
concern[2]:

> If the filesystem is initially mounted with the discard option,
> deleting files will populate sbi->s_discard_list and queue
> s_discard_work. If it is then remounted with nodiscard, the
> EXT4_MOUNT_DISCARD flag is cleared, but the pending s_discard_work is
> neither cancelled nor flushed.

[1] https://lore.kernel.org/r/20260319094545.19291-1-qiang.zhang@linux.dev/
[2] https://sashiko.dev/#/patchset/20260319094545.19291-1-qiang.zhang%40linux.dev

The concern was valid, but it had nothing to do with the patch[1].
One of the problems with Sashiko in its current (early) form is that
it will detect pre-existing issues and report it as a problem with the
patch that it is reviewing.

In practice, it would be hard to hit deliberately (unless you are a
malicious syzkaller fuzzer), since it would involve mounting the file
system with -o discard, and then deleting a large number of files,
remounting the file system with -o nodiscard, and then immediately
unmounting the file system before the queued discard work has a change
to drain on its own.

Fix it because it's a real bug, and to avoid Sashiko from raising this
concern when analyzing future patches to mballoc.c.

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Fixes: 55cdd0af2b ("ext4: get discard out of jbd2 commit kthread contex")
Cc: stable@kernel.org
2026-03-27 23:39:10 -04:00
Theodore Ts'o
bb81702370 ext4: handle wraparound when searching for blocks for indirect mapped blocks
Commit 4865c768b5 ("ext4: always allocate blocks only from groups
inode can use") restricts what blocks will be allocated for indirect
block based files to block numbers that fit within 32-bit block
numbers.

However, when using a review bot running on the latest Gemini LLM to
check this commit when backporting into an LTS based kernel, it raised
this concern:

   If ac->ac_g_ex.fe_group is >= ngroups (for instance, if the goal
   group was populated via stream allocation from s_mb_last_groups),
   then start will be >= ngroups.

   Does this allow allocating blocks beyond the 32-bit limit for
   indirect block mapped files? The commit message mentions that
   ext4_mb_scan_groups_linear() takes care to not select unsupported
   groups. However, its loop uses group = *start, and the very first
   iteration will call ext4_mb_scan_group() with this unsupported
   group because next_linear_group() is only called at the end of the
   iteration.

After reviewing the code paths involved and considering the LLM
review, I determined that this can happen when there is a file system
where some files/directories are extent-mapped and others are
indirect-block mapped.  To address this, add a safety clamp in
ext4_mb_scan_groups().

Fixes: 4865c768b5 ("ext4: always allocate blocks only from groups inode can use")
Cc: Jan Kara <jack@suse.cz>
Reviewed-by: Baokun Li <libaokun@linux.alibaba.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Link: https://patch.msgid.link/20260326045834.1175822-1-tytso@mit.edu
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
2026-03-27 23:39:02 -04:00
hongao
3ceda17325 ext4: skip split extent recovery on corruption
ext4_split_extent_at() retries after ext4_ext_insert_extent() fails by
refinding the original extent and restoring its length. That recovery is
only safe for transient resource failures such as -ENOSPC, -EDQUOT, and
-ENOMEM.

When ext4_ext_insert_extent() fails because the extent tree is already
corrupted, ext4_find_extent() can return a leaf path without p_ext.
ext4_split_extent_at() then dereferences path[depth].p_ext while trying to
fix up the original extent length, causing a NULL pointer dereference while
handling a pre-existing filesystem corruption.

Do not enter the recovery path for corruption errors, and validate p_ext
after refinding the extent before touching it. This keeps the recovery path
limited to cases it can actually repair and turns the syzbot-triggered crash
into a proper corruption report.

Fixes: 716b9c23b8 ("ext4: refactor split and convert extents")
Reported-by: syzbot+1ffa5d865557e51cb604@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=1ffa5d865557e51cb604
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
Signed-off-by: hongao <hongao@uniontech.com>
Link: https://patch.msgid.link/EF77870F23FF9C90+20260324015815.35248-1-hongao@uniontech.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
2026-03-27 23:38:52 -04:00
Baokun Li
ec0a7500d8 ext4: fix iloc.bh leak in ext4_fc_replay_inode() error paths
During code review, Joseph found that ext4_fc_replay_inode() calls
ext4_get_fc_inode_loc() to get the inode location, which holds a
reference to iloc.bh that must be released via brelse().

However, several error paths jump to the 'out' label without
releasing iloc.bh:

 - ext4_handle_dirty_metadata() failure
 - sync_dirty_buffer() failure
 - ext4_mark_inode_used() failure
 - ext4_iget() failure

Fix this by introducing an 'out_brelse' label placed just before
the existing 'out' label to ensure iloc.bh is always released.

Additionally, make ext4_fc_replay_inode() propagate errors
properly instead of always returning 0.

Reported-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Fixes: 8016e29f43 ("ext4: fast commit recovery path")
Signed-off-by: Baokun Li <libaokun@linux.alibaba.com>
Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/20260323060836.3452660-1-libaokun@linux.alibaba.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
2026-03-27 23:38:01 -04:00
Jan Kara
0c90eed1b9 ext4: fix deadlock on inode reallocation
Currently there is a race in ext4 when reallocating freed inode
resulting in a deadlock:

Task1					Task2
ext4_evict_inode()
  handle = ext4_journal_start();
  ...
  if (IS_SYNC(inode))
    handle->h_sync = 1;
  ext4_free_inode()
					ext4_new_inode()
					  handle = ext4_journal_start()
					  finds the bit in inode bitmap
					    already clear
					  insert_inode_locked()
					    waits for inode to be
					      removed from the hash.
  ext4_journal_stop(handle)
    jbd2_journal_stop(handle)
      jbd2_log_wait_commit(journal, tid);
        - deadlocks waiting for transaction handle Task2 holds

Fix the problem by removing inode from the hash already in
ext4_clear_inode() by which time all IO for the inode is done so reuse
is already fine but we are still before possibly blocking on transaction
commit.

Reported-by: "Lai, Yi" <yi1.lai@linux.intel.com>
Link: https://lore.kernel.org/all/abNvb2PcrKj1FBeC@ly-workstation
Fixes: 88ec797c46 ("fs: make insert_inode_locked() wait for inode destruction")
CC: stable@vger.kernel.org
Signed-off-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/20260320090428.24899-2-jack@suse.cz
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
2026-03-27 23:37:48 -04:00
Jiayuan Chen
d15e4b0a41 ext4: fix use-after-free in update_super_work when racing with umount
Commit b98535d091 ("ext4: fix bug_on in start_this_handle during umount
filesystem") moved ext4_unregister_sysfs() before flushing s_sb_upd_work
to prevent new error work from being queued via /proc/fs/ext4/xx/mb_groups
reads during unmount. However, this introduced a use-after-free because
update_super_work calls ext4_notify_error_sysfs() -> sysfs_notify() which
accesses the kobject's kernfs_node after it has been freed by kobject_del()
in ext4_unregister_sysfs():

  update_super_work                ext4_put_super
  -----------------                --------------
                                   ext4_unregister_sysfs(sb)
                                     kobject_del(&sbi->s_kobj)
                                       __kobject_del()
                                         sysfs_remove_dir()
                                           kobj->sd = NULL
                                         sysfs_put(sd)
                                           kernfs_put()  // RCU free
  ext4_notify_error_sysfs(sbi)
    sysfs_notify(&sbi->s_kobj)
      kn = kobj->sd              // stale pointer
      kernfs_get(kn)             // UAF on freed kernfs_node
                                   ext4_journal_destroy()
                                     flush_work(&sbi->s_sb_upd_work)

Instead of reordering the teardown sequence, fix this by making
ext4_notify_error_sysfs() detect that sysfs has already been torn down
by checking s_kobj.state_in_sysfs, and skipping the sysfs_notify() call
in that case. A dedicated mutex (s_error_notify_mutex) serializes
ext4_notify_error_sysfs() against kobject_del() in ext4_unregister_sysfs()
to prevent TOCTOU races where the kobject could be deleted between the
state_in_sysfs check and the sysfs_notify() call.

Fixes: b98535d091 ("ext4: fix bug_on in start_this_handle during umount filesystem")
Cc: Jiayuan Chen <jiayuan.chen@linux.dev>
Suggested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jiayuan Chen <jiayuan.chen@shopee.com>
Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/20260319120336.157873-1-jiayuan.chen@linux.dev
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
2026-03-27 23:37:39 -04:00
Jakub Kicinski
1a6fdb3518 Merge branch 'bridge-vxlan-harden-nd-option-parsing-paths'
Yang Yang says:

====================
bridge/vxlan: harden ND option parsing paths

This series hardens ND option parsing in bridge and vxlan paths.

Patch 1 linearizes the request skb in br_nd_send() before walking ND
options. Patch 2 adds explicit ND option length validation in
br_nd_send(). Patch 3 adds matching ND option length validation in
vxlan_na_create().
====================

Link: https://patch.msgid.link/20260326034441.2037420-1-n05ec@lzu.edu.cn
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-27 20:37:17 -07:00
afa9a05e6c vxlan: validate ND option lengths in vxlan_na_create
vxlan_na_create() walks ND options according to option-provided
lengths. A malformed option can make the parser advance beyond the
computed option span or use a too-short source LLADDR option payload.

Validate option lengths against the remaining NS option area before
advancing, and only read source LLADDR when the option is large enough
for an Ethernet address.

Fixes: 4b29dba9c0 ("vxlan: fix nonfunctional neigh_reduce()")
Cc: stable@vger.kernel.org
Reported-by: Yifan Wu <yifanwucs@gmail.com>
Reported-by: Juefei Pu <tomapufckgml@gmail.com>
Tested-by: Ao Zhou <n05ec@lzu.edu.cn>
Co-developed-by: Yuan Tan <tanyuan98@outlook.com>
Signed-off-by: Yuan Tan <tanyuan98@outlook.com>
Suggested-by: Xin Liu <bird@lzu.edu.cn>
Signed-off-by: Yang Yang <n05ec@lzu.edu.cn>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Link: https://patch.msgid.link/20260326034441.2037420-4-n05ec@lzu.edu.cn
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-27 20:37:14 -07:00
850837965a bridge: br_nd_send: validate ND option lengths
br_nd_send() walks ND options according to option-provided lengths.
A malformed option can make the parser advance beyond the computed
option span or use a too-short source LLADDR option payload.

Validate option lengths against the remaining NS option area before
advancing, and only read source LLADDR when the option is large enough
for an Ethernet address.

Fixes: ed842faeb2 ("bridge: suppress nd pkts on BR_NEIGH_SUPPRESS ports")
Cc: stable@vger.kernel.org
Reported-by: Yifan Wu <yifanwucs@gmail.com>
Reported-by: Juefei Pu <tomapufckgml@gmail.com>
Tested-by: Ao Zhou <n05ec@lzu.edu.cn>
Co-developed-by: Yuan Tan <tanyuan98@outlook.com>
Signed-off-by: Yuan Tan <tanyuan98@outlook.com>
Suggested-by: Xin Liu <bird@lzu.edu.cn>
Signed-off-by: Yang Yang <n05ec@lzu.edu.cn>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Link: https://patch.msgid.link/20260326034441.2037420-3-n05ec@lzu.edu.cn
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-27 20:37:14 -07:00
a01aee7caf bridge: br_nd_send: linearize skb before parsing ND options
br_nd_send() parses neighbour discovery options from ns->opt[] and
assumes that these options are in the linear part of request.

Its callers only guarantee that the ICMPv6 header and target address
are available, so the option area can still be non-linear. Parsing
ns->opt[] in that case can access data past the linear buffer.

Linearize request before option parsing and derive ns from the linear
network header.

Fixes: ed842faeb2 ("bridge: suppress nd pkts on BR_NEIGH_SUPPRESS ports")
Reported-by: Yifan Wu <yifanwucs@gmail.com>
Reported-by: Juefei Pu <tomapufckgml@gmail.com>
Tested-by: Ao Zhou <n05ec@lzu.edu.cn>
Co-developed-by: Yuan Tan <tanyuan98@outlook.com>
Signed-off-by: Yuan Tan <tanyuan98@outlook.com>
Suggested-by: Xin Liu <bird@lzu.edu.cn>
Signed-off-by: Yang Yang <n05ec@lzu.edu.cn>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Link: https://patch.msgid.link/20260326034441.2037420-2-n05ec@lzu.edu.cn
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-27 20:37:14 -07:00
Zqiang
496bb99b7e ext4: fix the might_sleep() warnings in kvfree()
Use the kvfree() in the RCU read critical section can trigger
the following warnings:

EXT4-fs (vdb): unmounting filesystem cd983e5b-3c83-4f5a-a136-17b00eb9d018.

WARNING: suspicious RCU usage

./include/linux/rcupdate.h:409 Illegal context switch in RCU read-side critical section!

other info that might help us debug this:

rcu_scheduler_active = 2, debug_locks = 1

Call Trace:
 <TASK>
 dump_stack_lvl+0xbb/0xd0
 dump_stack+0x14/0x20
 lockdep_rcu_suspicious+0x15a/0x1b0
 __might_resched+0x375/0x4d0
 ? put_object.part.0+0x2c/0x50
 __might_sleep+0x108/0x160
 vfree+0x58/0x910
 ? ext4_group_desc_free+0x27/0x270
 kvfree+0x23/0x40
 ext4_group_desc_free+0x111/0x270
 ext4_put_super+0x3c8/0xd40
 generic_shutdown_super+0x14c/0x4a0
 ? __pfx_shrinker_free+0x10/0x10
 kill_block_super+0x40/0x90
 ext4_kill_sb+0x6d/0xb0
 deactivate_locked_super+0xb4/0x180
 deactivate_super+0x7e/0xa0
 cleanup_mnt+0x296/0x3e0
 __cleanup_mnt+0x16/0x20
 task_work_run+0x157/0x250
 ? __pfx_task_work_run+0x10/0x10
 ? exit_to_user_mode_loop+0x6a/0x550
 exit_to_user_mode_loop+0x102/0x550
 do_syscall_64+0x44a/0x500
 entry_SYSCALL_64_after_hwframe+0x77/0x7f
 </TASK>

BUG: sleeping function called from invalid context at mm/vmalloc.c:3441
in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 556, name: umount
preempt_count: 1, expected: 0
CPU: 3 UID: 0 PID: 556 Comm: umount
Call Trace:
 <TASK>
 dump_stack_lvl+0xbb/0xd0
 dump_stack+0x14/0x20
 __might_resched+0x275/0x4d0
 ? put_object.part.0+0x2c/0x50
 __might_sleep+0x108/0x160
 vfree+0x58/0x910
 ? ext4_group_desc_free+0x27/0x270
 kvfree+0x23/0x40
 ext4_group_desc_free+0x111/0x270
 ext4_put_super+0x3c8/0xd40
 generic_shutdown_super+0x14c/0x4a0
 ? __pfx_shrinker_free+0x10/0x10
 kill_block_super+0x40/0x90
 ext4_kill_sb+0x6d/0xb0
 deactivate_locked_super+0xb4/0x180
 deactivate_super+0x7e/0xa0
 cleanup_mnt+0x296/0x3e0
 __cleanup_mnt+0x16/0x20
 task_work_run+0x157/0x250
 ? __pfx_task_work_run+0x10/0x10
 ? exit_to_user_mode_loop+0x6a/0x550
 exit_to_user_mode_loop+0x102/0x550
 do_syscall_64+0x44a/0x500
 entry_SYSCALL_64_after_hwframe+0x77/0x7f

The above scenarios occur in initialization failures and teardown
paths, there are no parallel operations on the resources released
by kvfree(), this commit therefore remove rcu_read_lock/unlock() and
use rcu_access_pointer() instead of rcu_dereference() operations.

Fixes: 7c990728b9 ("ext4: fix potential race between s_flex_groups online resizing and access")
Fixes: df3da4ea5a ("ext4: fix potential race between s_group_info online resizing and access")
Signed-off-by: Zqiang <qiang.zhang@linux.dev>
Reviewed-by: Baokun Li <libaokun@linux.alibaba.com>
Link: https://patch.msgid.link/20260319094545.19291-1-qiang.zhang@linux.dev
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
2026-03-27 23:36:20 -04:00
Helen Koike
3822743dc2 ext4: reject mount if bigalloc with s_first_data_block != 0
bigalloc with s_first_data_block != 0 is not supported, reject mounting
it.

Signed-off-by: Helen Koike <koike@igalia.com>
Suggested-by: Theodore Ts'o <tytso@mit.edu>
Reported-by: syzbot+b73703b873a33d8eb8f6@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=b73703b873a33d8eb8f6
Link: https://patch.msgid.link/20260317142325.135074-1-koike@igalia.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
2026-03-27 23:36:11 -04:00
Ye Bin
9e1b14320b ext4: fix extents-test.c is not compiled when EXT4_KUNIT_TESTS=M
Now, only EXT4_KUNIT_TESTS=Y testcase will be compiled in 'extents.c'.
To solve this issue, the ext4 test code needs to be decoupled. The
'extents-test' module is compiled into 'ext4-test' module.

Fixes: cb1e0c1d1f ("ext4: kunit tests for extent splitting and conversion")
Signed-off-by: Ye Bin <yebin10@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/20260314075258.1317579-4-yebin@huaweicloud.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2026-03-27 23:36:06 -04:00
Ye Bin
519b76ac0b ext4: fix mballoc-test.c is not compiled when EXT4_KUNIT_TESTS=M
Now, only EXT4_KUNIT_TESTS=Y testcase will be compiled in 'mballoc.c'.
To solve this issue, the ext4 test code needs to be decoupled. The ext4
test module is compiled into a separate module.

Reported-by: ChenXiaoSong <chenxiaosong@kylinos.cn>
Closes: https://patchwork.kernel.org/project/cifs-client/patch/20260118091313.1988168-2-chenxiaosong.chenxiaosong@linux.dev/
Fixes: 7c9fa399a3 ("ext4: add first unit test for ext4_mb_new_blocks_simple in mballoc")
Signed-off-by: Ye Bin <yebin10@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/20260314075258.1317579-3-yebin@huaweicloud.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2026-03-27 23:36:02 -04:00
Ye Bin
49504a5125 ext4: introduce EXPORT_SYMBOL_FOR_EXT4_TEST() helper
Introduce EXPORT_SYMBOL_FOR_EXT4_TEST() helper for kuint test.

Signed-off-by: Ye Bin <yebin10@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/20260314075258.1317579-2-yebin@huaweicloud.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2026-03-27 23:34:19 -04:00
Milos Nikic
bac3190a8e jbd2: gracefully abort on checkpointing state corruptions
This patch targets two internal state machine invariants in checkpoint.c
residing inside functions that natively return integer error codes.

- In jbd2_cleanup_journal_tail(): A blocknr of 0 indicates a severely
corrupted journal superblock. Replaced the J_ASSERT with a WARN_ON_ONCE
and a graceful journal abort, returning -EFSCORRUPTED.

- In jbd2_log_do_checkpoint(): Replaced the J_ASSERT_BH checking for
an unexpected buffer_jwrite state. If the warning triggers, we
explicitly drop the just-taken get_bh() reference and call __flush_batch()
to safely clean up any previously queued buffers in the j_chkpt_bhs array,
preventing a memory leak before returning -EFSCORRUPTED.

Signed-off-by: Milos Nikic <nikic.milos@gmail.com>
Reviewed-by: Andreas Dilger <adilger@dilger.ca>
Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
Reviewed-by: Baokun Li <libaokun@linux.alibaba.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/20260311041548.159424-1-nikic.milos@gmail.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
2026-03-27 23:34:09 -04:00
Edward Adam Davis
5422fe71d2 ext4: avoid infinite loops caused by residual data
On the mkdir/mknod path, when mapping logical blocks to physical blocks,
if inserting a new extent into the extent tree fails (in this example,
because the file system disabled the huge file feature when marking the
inode as dirty), ext4_ext_map_blocks() only calls ext4_free_blocks() to
reclaim the physical block without deleting the corresponding data in
the extent tree. This causes subsequent mkdir operations to reference
the previously reclaimed physical block number again, even though this
physical block is already being used by the xattr block. Therefore, a
situation arises where both the directory and xattr are using the same
buffer head block in memory simultaneously.

The above causes ext4_xattr_block_set() to enter an infinite loop about
"inserted" and cannot release the inode lock, ultimately leading to the
143s blocking problem mentioned in [1].

If the metadata is corrupted, then trying to remove some extent space
can do even more harm. Also in case EXT4_GET_BLOCKS_DELALLOC_RESERVE
was passed, remove space wrongly update quota information.
Jan Kara suggests distinguishing between two cases:

1) The error is ENOSPC or EDQUOT - in this case the filesystem is fully
consistent and we must maintain its consistency including all the
accounting. However these errors can happen only early before we've
inserted the extent into the extent tree. So current code works correctly
for this case.

2) Some other error - this means metadata is corrupted. We should strive to
do as few modifications as possible to limit damage. So I'd just skip
freeing of allocated blocks.

[1]
INFO: task syz.0.17:5995 blocked for more than 143 seconds.
Call Trace:
 inode_lock_nested include/linux/fs.h:1073 [inline]
 __start_dirop fs/namei.c:2923 [inline]
 start_dirop fs/namei.c:2934 [inline]

Reported-by: syzbot+512459401510e2a9a39f@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=1659aaaaa8d9d11265d7
Tested-by: syzbot+1659aaaaa8d9d11265d7@syzkaller.appspotmail.com
Reported-by: syzbot+1659aaaaa8d9d11265d7@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=512459401510e2a9a39f
Tested-by: syzbot+1659aaaaa8d9d11265d7@syzkaller.appspotmail.com
Signed-off-by: Edward Adam Davis <eadavis@qq.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Tested-by: syzbot+512459401510e2a9a39f@syzkaller.appspotmail.com
Link: https://patch.msgid.link/tencent_43696283A68450B761D76866C6F360E36705@qq.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
2026-03-27 23:33:56 -04:00
Tejas Bharambe
2acb5c12eb ext4: validate p_idx bounds in ext4_ext_correct_indexes
ext4_ext_correct_indexes() walks up the extent tree correcting
index entries when the first extent in a leaf is modified. Before
accessing path[k].p_idx->ei_block, there is no validation that
p_idx falls within the valid range of index entries for that
level.

If the on-disk extent header contains a corrupted or crafted
eh_entries value, p_idx can point past the end of the allocated
buffer, causing a slab-out-of-bounds read.

Fix this by validating path[k].p_idx against EXT_LAST_INDEX() at
both access sites: before the while loop and inside it. Return
-EFSCORRUPTED if the index pointer is out of range, consistent
with how other bounds violations are handled in the ext4 extent
tree code.

Reported-by: syzbot+04c4e65cab786a2e5b7e@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=04c4e65cab786a2e5b7e
Signed-off-by: Tejas Bharambe <tejas.bharambe@outlook.com>
Link: https://patch.msgid.link/JH0PR06MB66326016F9B6AD24097D232B897CA@JH0PR06MB6632.apcprd06.prod.outlook.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
2026-03-27 23:33:46 -04:00
Ye Bin
73bf12adbe ext4: test if inode's all dirty pages are submitted to disk
The commit aa373cf550 ("writeback: stop background/kupdate works from
livelocking other works") introduced an issue where unmounting a filesystem
in a multi-logical-partition scenario could lead to batch file data loss.
This problem was not fixed until the commit d92109891f ("fs/writeback:
bail out if there is no more inodes for IO and queued once"). It took
considerable time to identify the root cause. Additionally, in actual
production environments, we frequently encountered file data loss after
normal system reboots. Therefore, we are adding a check in the inode
release flow to verify whether all dirty pages have been flushed to disk,
in order to determine whether the data loss is caused by a logic issue in
the filesystem code.

Signed-off-by: Ye Bin <yebin10@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/20260303012242.3206465-1-yebin@huaweicloud.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
2026-03-27 23:33:34 -04:00
Ojaswin Mujoo
c4a48e9eee ext4: minor fix for ext4_split_extent_zeroout()
We missed storing the error which triggerd smatch warning:

	fs/ext4/extents.c:3369 ext4_split_extent_zeroout()
	warn: duplicate zero check 'err' (previous on line 3363)

fs/ext4/extents.c
    3361
    3362         err = ext4_ext_get_access(handle, inode, path + depth);
    3363         if (err)
    3364                 return err;
    3365
    3366         ext4_ext_mark_initialized(ex);
    3367
    3368         ext4_ext_dirty(handle, inode, path + depth);
--> 3369         if (err)
    3370                 return err;
    3371
    3372         return 0;
    3373 }

Fix it by correctly storing the err value from ext4_ext_dirty().

Link: https://lore.kernel.org/all/aYXvVgPnKltX79KE@stanley.mountain/
Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
Fixes: a985e07c26 ("ext4: refactor zeroout path and handle all cases")
Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
Signed-off-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
Reviewed-by: Baokun Li <libaokun@linux.alibaba.com>
Reviewed-by: Andreas Dilger <adilger@dilger.ca>
Link: https://patch.msgid.link/20260302143811.605174-1-ojaswin@linux.ibm.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
2026-03-27 23:33:22 -04:00
Ye Bin
46066e3a06 ext4: avoid allocate block from corrupted group in ext4_mb_find_by_goal()
There's issue as follows:
...
EXT4-fs (mmcblk0p1): Delayed block allocation failed for inode 206 at logical offset 0 with max blocks 1 with error 117
EXT4-fs (mmcblk0p1): This should not happen!! Data will be lost

EXT4-fs (mmcblk0p1): Delayed block allocation failed for inode 206 at logical offset 0 with max blocks 1 with error 117
EXT4-fs (mmcblk0p1): This should not happen!! Data will be lost

EXT4-fs (mmcblk0p1): Delayed block allocation failed for inode 206 at logical offset 0 with max blocks 1 with error 117
EXT4-fs (mmcblk0p1): This should not happen!! Data will be lost

EXT4-fs (mmcblk0p1): Delayed block allocation failed for inode 206 at logical offset 0 with max blocks 1 with error 117
EXT4-fs (mmcblk0p1): This should not happen!! Data will be lost

EXT4-fs (mmcblk0p1): Delayed block allocation failed for inode 2243 at logical offset 0 with max blocks 1 with error 117
EXT4-fs (mmcblk0p1): This should not happen!! Data will be lost

EXT4-fs (mmcblk0p1): Delayed block allocation failed for inode 2239 at logical offset 0 with max blocks 1 with error 117
EXT4-fs (mmcblk0p1): This should not happen!! Data will be lost

EXT4-fs (mmcblk0p1): error count since last fsck: 1
EXT4-fs (mmcblk0p1): initial error at time 1765597433: ext4_mb_generate_buddy:760
EXT4-fs (mmcblk0p1): last error at time 1765597433: ext4_mb_generate_buddy:760
...

According to the log analysis, blocks are always requested from the
corrupted block group. This may happen as follows:
ext4_mb_find_by_goal
  ext4_mb_load_buddy
   ext4_mb_load_buddy_gfp
     ext4_mb_init_cache
      ext4_read_block_bitmap_nowait
      ext4_wait_block_bitmap
       ext4_validate_block_bitmap
        if (!grp || EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
         return -EFSCORRUPTED; // There's no logs.
 if (err)
  return err;  // Will return error
ext4_lock_group(ac->ac_sb, group);
  if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) // Unreachable
   goto out;

After commit 9008a58e5d ("ext4: make the bitmap read routines return
real error codes") merged, Commit 163a203ddb ("ext4: mark block group
as corrupt on block bitmap error") is no real solution for allocating
blocks from corrupted block groups. This is because if
'EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)' is true, then
'ext4_mb_load_buddy()' may return an error. This means that the block
allocation will fail.
Therefore, check block group if corrupted when ext4_mb_load_buddy()
returns error.

Fixes: 163a203ddb ("ext4: mark block group as corrupt on block bitmap error")
Fixes: 9008a58e5d ("ext4: make the bitmap read routines return real error codes")
Signed-off-by: Ye Bin <yebin10@huawei.com>
Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
Reviewed-by: Andreas Dilger <adilger@dilger.ca>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/20260302134619.3145520-1-yebin@huaweicloud.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
2026-03-27 23:33:08 -04:00
Ritesh Harjani (IBM)
afe376d2c1 ext4: kunit: extents-test: lix percpu_counters list corruption
commit 82f80e2e3b ("ext4: add extent status cache support to kunit tests"),
added ext4_es_register_shrinker() in extents_kunit_init() function but
failed to add the unregister shrinker routine in extents_kunit_exit().

This could cause the following percpu_counters list corruption bug.

         ok 1 split unwrit extent to 2 extents and convert 1st half writ
  slab kmalloc-4k start c0000002007ff000 pointer offset 1448 size 4096
 list_add corruption. next->prev should be prev (c000000004bc9e60), but was 0000000000000000. (next=c0000002007ff5a8).
 ------------[ cut here ]------------
 kernel BUG at lib/list_debug.c:29!
cpu 0x2: Vector: 700 (Program Check) at [c000000241927a30]
    pc: c000000000f26ed0: __list_add_valid_or_report+0x120/0x164
    lr: c000000000f26ecc: __list_add_valid_or_report+0x11c/0x164
    sp: c000000241927cd0
   msr: 800000000282b033
  current = 0xc000000241215200
  paca    = 0xc0000003fffff300   irqmask: 0x03   irq_happened: 0x09
    pid   = 258, comm = kunit_try_catch
kernel BUG at lib/list_debug.c:29!
enter ? for help
 __percpu_counter_init_many+0x148/0x184
 ext4_es_register_shrinker+0x74/0x23c
 extents_kunit_init+0x100/0x308
 kunit_try_run_case+0x78/0x1f8
 kunit_generic_run_threadfn_adapter+0x40/0x70
 kthread+0x190/0x1a0
 start_kernel_thread+0x14/0x18
2:mon>

This happens because:

extents_kunit_init(test N):
  ext4_es_register_shrinker(sbi)
    percpu_counters_init() x 4; // this adds 4 list nodes to global percpu_counters list
      list_add(&fbc->list, &percpu_counters);
    shrinker_register();

extents_kunit_exit(test N):
  kfree(sbi);			// frees sbi w/o removing those 4 list nodes.
  				// So, those list node now becomes dangling pointers

extents_kunit_init(test N+1):
  kzalloc_obj(ext4_sb_info)	// allocator returns same page, but zeroed.
  ext4_es_register_shrinker(sbi)
    percpu_counters_init()
      list_add(&fbc->list, &percpu_counters);
        __list_add_valid(new, prev, next);
	next->prev != prev 		// list corruption bug detected, since next->prev = NULL

Fixes: 82f80e2e3b ("ext4: add extent status cache support to kunit tests")
Signed-off-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
Reviewed-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
Link: https://patch.msgid.link/5bb9041471dab8ce870c191c19cbe4df57473be8.1772381213.git.ritesh.list@gmail.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
2026-03-27 23:32:23 -04:00
Li Chen
1aec30021e ext4: publish jinode after initialization
ext4_inode_attach_jinode() publishes ei->jinode to concurrent users.
It used to set ei->jinode before jbd2_journal_init_jbd_inode(),
allowing a reader to observe a non-NULL jinode with i_vfs_inode
still unset.

The fast commit flush path can then pass this jinode to
jbd2_wait_inode_data(), which dereferences i_vfs_inode->i_mapping and
may crash.

Below is the crash I observe:
```
BUG: unable to handle page fault for address: 000000010beb47f4
PGD 110e51067 P4D 110e51067 PUD 0
Oops: Oops: 0000 [#1] SMP NOPTI
CPU: 1 UID: 0 PID: 4850 Comm: fc_fsync_bench_ Not tainted 6.18.0-00764-g795a690c06a5 #1 PREEMPT(voluntary)
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.17.0-2-2 04/01/2014
RIP: 0010:xas_find_marked+0x3d/0x2e0
Code: e0 03 48 83 f8 02 0f 84 f0 01 00 00 48 8b 47 08 48 89 c3 48 39 c6 0f 82 fd 01 00 00 48 85 c9 74 3d 48 83 f9 03 77 63 4c 8b 0f <49> 8b 71 08 48 c7 47 18 00 00 00 00 48 89 f1 83 e1 03 48 83 f9 02
RSP: 0018:ffffbbee806e7bf0 EFLAGS: 00010246
RAX: 000000000010beb4 RBX: 000000000010beb4 RCX: 0000000000000003
RDX: 0000000000000001 RSI: 0000002000300000 RDI: ffffbbee806e7c10
RBP: 0000000000000001 R08: 0000002000300000 R09: 000000010beb47ec
R10: ffff9ea494590090 R11: 0000000000000000 R12: 0000002000300000
R13: ffffbbee806e7c90 R14: ffff9ea494513788 R15: ffffbbee806e7c88
FS: 00007fc2f9e3e6c0(0000) GS:ffff9ea6b1444000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 000000010beb47f4 CR3: 0000000119ac5000 CR4: 0000000000750ef0
PKRU: 55555554
Call Trace:
<TASK>
filemap_get_folios_tag+0x87/0x2a0
__filemap_fdatawait_range+0x5f/0xd0
? srso_alias_return_thunk+0x5/0xfbef5
? __schedule+0x3e7/0x10c0
? srso_alias_return_thunk+0x5/0xfbef5
? srso_alias_return_thunk+0x5/0xfbef5
? srso_alias_return_thunk+0x5/0xfbef5
? preempt_count_sub+0x5f/0x80
? srso_alias_return_thunk+0x5/0xfbef5
? cap_safe_nice+0x37/0x70
? srso_alias_return_thunk+0x5/0xfbef5
? preempt_count_sub+0x5f/0x80
? srso_alias_return_thunk+0x5/0xfbef5
filemap_fdatawait_range_keep_errors+0x12/0x40
ext4_fc_commit+0x697/0x8b0
? ext4_file_write_iter+0x64b/0x950
? srso_alias_return_thunk+0x5/0xfbef5
? preempt_count_sub+0x5f/0x80
? srso_alias_return_thunk+0x5/0xfbef5
? vfs_write+0x356/0x480
? srso_alias_return_thunk+0x5/0xfbef5
? preempt_count_sub+0x5f/0x80
ext4_sync_file+0xf7/0x370
do_fsync+0x3b/0x80
? syscall_trace_enter+0x108/0x1d0
__x64_sys_fdatasync+0x16/0x20
do_syscall_64+0x62/0x2c0
entry_SYSCALL_64_after_hwframe+0x76/0x7e
...
```

Fix this by initializing the jbd2_inode first.
Use smp_wmb() and WRITE_ONCE() to publish ei->jinode after
initialization. Readers use READ_ONCE() to fetch the pointer.

Fixes: a361293f5f ("jbd2: Fix oops in jbd2_journal_file_inode()")
Cc: stable@vger.kernel.org
Signed-off-by: Li Chen <me@linux.beauty>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/20260225082617.147957-1-me@linux.beauty
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
2026-03-27 23:32:07 -04:00
Yuto Ohnuki
356227096e ext4: replace BUG_ON with proper error handling in ext4_read_inline_folio
Replace BUG_ON() with proper error handling when inline data size
exceeds PAGE_SIZE. This prevents kernel panic and allows the system to
continue running while properly reporting the filesystem corruption.

The error is logged via ext4_error_inode(), the buffer head is released
to prevent memory leak, and -EFSCORRUPTED is returned to indicate
filesystem corruption.

Signed-off-by: Yuto Ohnuki <ytohnuki@amazon.com>
Link: https://patch.msgid.link/20260223123345.14838-2-ytohnuki@amazon.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
2026-03-27 23:31:52 -04:00
Jan Kara
1308255bbf ext4: fix fsync(2) for nojournal mode
When inode metadata is changed, we sometimes just call
ext4_mark_inode_dirty() to track modified metadata. This copies inode
metadata into block buffer which is enough when we are journalling
metadata. However when we are running in nojournal mode we currently
fail to write the dirtied inode buffer during fsync(2) because the inode
is not marked as dirty. Use explicit ext4_write_inode() call to make
sure the inode table buffer is written to the disk. This is a band aid
solution but proper solution requires a much larger rewrite including
changes in metadata bh tracking infrastructure.

Reported-by: Free Ekanayaka <free.ekanayaka@gmail.com>
Link: https://lore.kernel.org/all/87il8nhxdm.fsf@x1.mail-host-address-is-not-set/
CC: stable@vger.kernel.org
Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
Link: https://patch.msgid.link/20260216164848.3074-4-jack@suse.cz
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
2026-03-27 23:31:43 -04:00
Jan Kara
bd060afa7c ext4: make recently_deleted() properly work with lazy itable initialization
recently_deleted() checks whether inode has been used in the near past.
However this can give false positive result when inode table is not
initialized yet and we are in fact comparing to random garbage (or stale
itable block of a filesystem before mkfs). Ultimately this results in
uninitialized inodes being skipped during inode allocation and possibly
they are never initialized and thus e2fsck complains.  Verify if the
inode has been initialized before checking for dtime.

Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
Link: https://patch.msgid.link/20260216164848.3074-3-jack@suse.cz
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
2026-03-27 23:30:37 -04:00
Jakub Kicinski
c11c731a68 Merge branch 'fix-page-fragment-handling-when-page_size-4k'
Dimitri Daskalakis says:

====================
Fix page fragment handling when PAGE_SIZE > 4K

FBNIC operates on fixed size descriptors (4K). When the OS supports pages
larger than 4K, we fragment the page across multiple descriptors.

While performance testing, I found several issues with our page fragment
handling, resulting in low throughput and potential RX stalls.
====================

Link: https://patch.msgid.link/20260324195123.3486219-1-dimitri.daskalakis1@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-27 20:30:26 -07:00
Simon Weber
b1d682f199 ext4: fix journal credit check when setting fscrypt context
Fix an issue arising when ext4 features has_journal, ea_inode, and encrypt
are activated simultaneously, leading to ENOSPC when creating an encrypted
file.

Fix by passing XATTR_CREATE flag to xattr_set_handle function if a handle
is specified, i.e., when the function is called in the control flow of
creating a new inode. This aligns the number of jbd2 credits set_handle
checks for with the number allocated for creating a new inode.

ext4_set_context must not be called with a non-null handle (fs_data) if
fscrypt context xattr is not guaranteed to not exist yet. The only other
usage of this function currently is when handling the ioctl
FS_IOC_SET_ENCRYPTION_POLICY, which calls it with fs_data=NULL.

Fixes: c1a5d5f6ab ("ext4: improve journal credit handling in set xattr paths")

Co-developed-by: Anthony Durrer <anthonydev@fastmail.com>
Signed-off-by: Anthony Durrer <anthonydev@fastmail.com>
Signed-off-by: Simon Weber <simon.weber.39@gmail.com>
Reviewed-by: Eric Biggers <ebiggers@kernel.org>
Link: https://patch.msgid.link/20260207100148.724275-4-simon.weber.39@gmail.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
2026-03-27 23:30:25 -04:00
Dimitri Daskalakis
f3567dd428 eth: fbnic: Fix debugfs output for BDQ's with page frags
The rings size_mask represents the number of pages, so we need to
determine the number of page frags when dumping the descriptors.

Fixes: df04373b0d ("eth fbnic: Add debugfs hooks for tx/rx rings")
Signed-off-by: Dimitri Daskalakis <daskald@meta.com>
Link: https://patch.msgid.link/20260324195123.3486219-3-dimitri.daskalakis1@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-27 20:30:23 -07:00
Dimitri Daskalakis
b38c55320b eth: fbnic: Account for page fragments when updating BDQ tail
FBNIC supports fixed size buffers of 4K. When PAGE_SIZE > 4K, we
fragment the page across multiple descriptors (FBNIC_BD_FRAG_COUNT).
When refilling the BDQ, the correct number of entries are populated,
but tail was only incremented by one. So on a system with 64K pages,
HW would get one descriptor refilled for every 16 we populate.

Additionally, we program the ring size in the HW when enabling the BDQ.
This was not accounting for page fragments, so on systems with 64K pages,
the HW used 1/16th of the ring.

Fixes: 0cb4c0a137 ("eth: fbnic: Implement Rx queue alloc/start/stop/free")
Signed-off-by: Dimitri Daskalakis <daskald@meta.com>
Link: https://patch.msgid.link/20260324195123.3486219-2-dimitri.daskalakis1@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-27 20:30:23 -07:00
Deepanshu Kartikey
ed9356a30e ext4: convert inline data to extents when truncate exceeds inline size
Add a check in ext4_setattr() to convert files from inline data storage
to extent-based storage when truncate() grows the file size beyond the
inline capacity. This prevents the filesystem from entering an
inconsistent state where the inline data flag is set but the file size
exceeds what can be stored inline.

Without this fix, the following sequence causes a kernel BUG_ON():

1. Mount filesystem with inode that has inline flag set and small size
2. truncate(file, 50MB) - grows size but inline flag remains set
3. sendfile() attempts to write data
4. ext4_write_inline_data() hits BUG_ON(write_size > inline_capacity)

The crash occurs because ext4_write_inline_data() expects inline storage
to accommodate the write, but the actual inline capacity (~60 bytes for
i_block + ~96 bytes for xattrs) is far smaller than the file size and
write request.

The fix checks if the new size from setattr exceeds the inode's actual
inline capacity (EXT4_I(inode)->i_inline_size) and converts the file to
extent-based storage before proceeding with the size change.

This addresses the root cause by ensuring the inline data flag and file
size remain consistent during truncate operations.

Reported-by: syzbot+7de5fe447862fc37576f@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=7de5fe447862fc37576f
Tested-by: syzbot+7de5fe447862fc37576f@syzkaller.appspotmail.com
Signed-off-by: Deepanshu Kartikey <Kartikey406@gmail.com>
Link: https://patch.msgid.link/20260207043607.1175976-1-kartikey406@gmail.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
2026-03-27 23:29:54 -04:00
Jan Kara
f4a2b42e78 ext4: fix stale xarray tags after writeback
There are cases where ext4_bio_write_page() gets called for a page which
has no buffers to submit. This happens e.g. when the part of the file is
actually a hole, when we cannot allocate blocks due to being called from
jbd2, or in data=journal mode when checkpointing writes the buffers
earlier. In these cases we just return from ext4_bio_write_page()
however if the page didn't need redirtying, we will leave stale DIRTY
and/or TOWRITE tags in xarray because those get cleared only in
__folio_start_writeback(). As a result we can leave these tags set in
mappings even after a final sync on filesystem that's getting remounted
read-only or that's being frozen. Various assertions can then get upset
when writeback is started on such filesystems (Gerald reported assertion
in ext4_journal_check_start() firing).

Fix the problem by cycling the page through writeback state even if we
decide nothing needs to be written for it so that xarray tags get
properly updated. This is slightly silly (we could update the xarray
tags directly) but I don't think a special helper messing with xarray
tags is really worth it in this relatively rare corner case.

Reported-by: Gerald Yang <gerald.yang@canonical.com>
Link: https://lore.kernel.org/all/20260128074515.2028982-1-gerald.yang@canonical.com
Fixes: dff4ac75ee ("ext4: move keep_towrite handling to ext4_bio_write_page()")
Signed-off-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/20260205092223.21287-2-jack@suse.cz
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
2026-03-27 23:29:39 -04:00
Eric Dumazet
2edfa31769 ip6_tunnel: clear skb2->cb[] in ip4ip6_err()
Oskar Kjos reported the following problem.

ip4ip6_err() calls icmp_send() on a cloned skb whose cb[] was written
by the IPv6 receive path as struct inet6_skb_parm. icmp_send() passes
IPCB(skb2) to __ip_options_echo(), which interprets that cb[] region
as struct inet_skb_parm (IPv4). The layouts differ: inet6_skb_parm.nhoff
at offset 14 overlaps inet_skb_parm.opt.rr, producing a non-zero rr
value. __ip_options_echo() then reads optlen from attacker-controlled
packet data at sptr[rr+1] and copies that many bytes into dopt->__data,
a fixed 40-byte stack buffer (IP_OPTIONS_DATA_FIXED_SIZE).

To fix this we clear skb2->cb[], as suggested by Oskar Kjos.

Also add minimal IPv4 header validation (version == 4, ihl >= 5).

Fixes: c4d3efafcc ("[IPV6] IP6TUNNEL: Add support to IPv4 over IPv6 tunnel.")
Reported-by: Oskar Kjos <oskar.kjos@hotmail.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Link: https://patch.msgid.link/20260326155138.2429480-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-27 20:24:12 -07:00
Eric Dumazet
86ab3e5567 ipv6: icmp: clear skb2->cb[] in ip6_err_gen_icmpv6_unreach()
Sashiko AI-review observed:

  In ip6_err_gen_icmpv6_unreach(), the skb is an outer IPv4 ICMP error packet
  where its cb contains an IPv4 inet_skb_parm. When skb is cloned into skb2
  and passed to icmp6_send(), it uses IP6CB(skb2).

  IP6CB interprets the IPv4 inet_skb_parm as an inet6_skb_parm. The cipso
  offset in inet_skb_parm.opt directly overlaps with dsthao in inet6_skb_parm
  at offset 18.

  If an attacker sends a forged ICMPv4 error with a CIPSO IP option, dsthao
  would be a non-zero offset. Inside icmp6_send(), mip6_addr_swap() is called
  and uses ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO).

  This would scan the inner, attacker-controlled IPv6 packet starting at that
  offset, potentially returning a fake TLV without checking if the remaining
  packet length can hold the full 18-byte struct ipv6_destopt_hao.

  Could mip6_addr_swap() then perform a 16-byte swap that extends past the end
  of the packet data into skb_shared_info?

  Should the cb array also be cleared in ip6_err_gen_icmpv6_unreach() and
  ip6ip6_err() to prevent this?

This patch implements the first suggestion.

I am not sure if ip6ip6_err() needs to be changed.
A separate patch would be better anyway.

Fixes: ca15a078bd ("sit: generate icmpv6 error when receiving icmpv4 error")
Reported-by: Ido Schimmel <idosch@nvidia.com>
Closes: https://sashiko.dev/#/patchset/20260326155138.2429480-1-edumazet%40google.com
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Oskar Kjos <oskar.kjos@hotmail.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Link: https://patch.msgid.link/20260326202608.2976021-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-27 20:22:46 -07:00
Zhang Yi
84e21e3fb8 ext4: do not check fast symlink during orphan recovery
Commit '5f920d5d6083 ("ext4: verify fast symlink length")' causes the
generic/475 test to fail during orphan cleanup of zero-length symlinks.

  generic/475  84s ... _check_generic_filesystem: filesystem on /dev/vde is inconsistent

The fsck reports are provided below:

  Deleted inode 9686 has zero dtime.
  Deleted inode 158230 has zero dtime.
  ...
  Inode bitmap differences:  -9686 -158230
  Orphan file (inode 12) block 13 is not clean.
  Failed to initialize orphan file.

In ext4_symlink(), a newly created symlink can be added to the orphan
list due to ENOSPC. Its data has not been initialized, and its size is
zero. Therefore, we need to disregard the length check of the symbolic
link when cleaning up orphan inodes. Instead, we should ensure that the
nlink count is zero.

Fixes: 5f920d5d60 ("ext4: verify fast symlink length")
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/20260131091156.1733648-1-yi.zhang@huaweicloud.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
2026-03-27 23:14:25 -04:00
Theodore Ts'o
82c4f23d27 Update MAINTAINERS file to add reviewers for ext4
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
Reviewed-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
2026-03-27 23:14:20 -04:00
Linus Torvalds
be762d8b6d Merge tag 'hwmon-for-v7.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging
Pull hwmon fixes from Guenter Roeck:

 - PMBus driver fixes:
     - Add mutex protection for regulator operations
     - Fix reading from "write-only" attributes
     - Mark lowest/average/highest/rated attributes as read-only
     - isl68137: Add mutex protection for AVS enable sysfs attributes
     - ina233:  Fix error handling and sign extension when reading shunt voltage

 - adm1177: Fix sysfs ABI violation and current unit conversion

 - peci: Fix off-by-one in cputemp_is_visible(), and crit_hyst returning
   delta instead of absolute temperature

* tag 'hwmon-for-v7.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging:
  hwmon: (pmbus/core) Protect regulator operations with mutex
  hwmon: (pmbus) Introduce the concept of "write-only" attributes
  hwmon: (pmbus) Mark lowest/average/highest/rated attributes as read-only
  hwmon: (adm1177) fix sysfs ABI violation and current unit conversion
  hwmon: (peci/cputemp) Fix off-by-one in cputemp_is_visible()
  hwmon: (peci/cputemp) Fix crit_hyst returning delta instead of absolute temperature
  hwmon: (pmbus/isl68137) Add mutex protection for AVS enable sysfs attributes
  hwmon: (pmbus/ina233) Fix error handling and sign extension in shunt voltage read
2026-03-27 20:02:34 -07:00
Linus Torvalds
afb54c1404 Merge tag 'scsi-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
Pull SCSI fixes from James Bottomley:
 "Driver (and enclosure) only fixes. Most are obvious. The big change is
  in the tcm_loop driver to add command draining to error handling (the
  lack of which was causing hangs with the potential for double use
  crashes)"

* tag 'scsi-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi:
  scsi: target: file: Use kzalloc_flex for aio_cmd
  scsi: scsi_transport_sas: Fix the maximum channel scanning issue
  scsi: target: tcm_loop: Drain commands in target_reset handler
  scsi: ibmvfc: Fix OOB access in ibmvfc_discover_targets_done()
  scsi: ses: Handle positive SCSI error from ses_recv_diag()
2026-03-27 19:58:22 -07:00
Linus Torvalds
26df51adf3 Merge tag 'drm-fixes-2026-03-28-1' of https://gitlab.freedesktop.org/drm/kernel
Pull drm fixes from Dave Airlie:
 "Weekly fixes, still a bit busy, but the usual suspects amdgpu and
  i915/xe have a bunch of small fixes, and otherwise it's just a few
  minor driver fixes.

  loognsoon:
   - update MAINTAINERS

  shmem:
   - fault handler fix

  syncobj:
   - fix GFP flags

  amdgpu:
   - DSC fix
   - Module parameter parsing fix
   - PASID reuse fix
   - drm_edid leak fix
   - SMU 13.x fixes
   - SMU 14.x fix
   - Fence fix in amdgpu_amdkfd_submit_ib()
   - LVDS fixes
   - GPU page fault fix for non-4K pages

  amdkfd:
   - Ordering fix in kfd_ioctl_create_process()

  i915/display:
   - DP tunnel error handling fix
   - Spurious GMBUS timeout fix
   - Unlink NV12 planes earlier
   - Order OP vs. timeout correctly in __wait_for()

  xe:
   - Fix UAF in SRIOV migration restore
   - Updates to HW W/a
   - VMBind remap fix

  ivpu:
   - poweroff fix

  mediatek:
   - fix register ordering"

* tag 'drm-fixes-2026-03-28-1' of https://gitlab.freedesktop.org/drm/kernel: (25 commits)
  MAINTAINERS: Update GPU driver maintainer information
  drm/xe: always keep track of remap prev/next
  drm/syncobj: Fix xa_alloc allocation flags
  drm/amd/display: Fix DCE LVDS handling
  drm/amdgpu: Handle GPU page faults correctly on non-4K page systems
  drm/amd/pm: disable OD_FAN_CURVE if temp or pwm range invalid for smu v14
  drm/amdkfd: Fix NULL pointer check order in kfd_ioctl_create_process
  drm/amd/display: check if ext_caps is valid in BL setup
  drm/amdgpu: Fix fence put before wait in amdgpu_amdkfd_submit_ib
  drm/xe: Implement recent spec updates to Wa_16025250150
  accel/ivpu: Add disable clock relinquish workaround for NVL-A0
  drm/i915/dp_tunnel: Fix error handling when clearing stream BW in atomic state
  drm/amd/pm: disable OD_FAN_CURVE if temp or pwm range invalid for smu v13
  drm/amd/pm: Return -EOPNOTSUPP for unsupported OD_MCLK on smu_v13_0_6
  drm/amd/pm: Skip redundant UCLK restore in smu_v13_0_6
  drm/amd/display: Fix drm_edid leak in amdgpu_dm
  drm/amdgpu: prevent immediate PASID reuse case
  drm/amdgpu: fix strsep() corrupting lockup_timeout on multi-GPU (v3)
  drm/amd/display: Do not skip unrelated mode changes in DSC validation
  drm/xe/pf: Fix use-after-free in migration restore
  ...
2026-03-27 17:21:37 -07:00
Vasily Gorbik
0738d395aa s390/entry: Scrub r12 register on kernel entry
Before commit f33f2d4c7c ("s390/bp: remove TIF_ISOLATE_BP"),
all entry handlers loaded r12 with the current task pointer
(lg %r12,__LC_CURRENT) for use by the BPENTER/BPEXIT macros. That
commit removed TIF_ISOLATE_BP, dropping both the branch prediction
macros and the r12 load, but did not add r12 to the register clearing
sequence.

Add the missing xgr %r12,%r12 to make the register scrub consistent
across all entry points.

Fixes: f33f2d4c7c ("s390/bp: remove TIF_ISOLATE_BP")
Cc: stable@kernel.org
Reviewed-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
2026-03-28 00:43:39 +01:00
Greg Kroah-Hartman
48b8814e25 s390/syscalls: Add spectre boundary for syscall dispatch table
The s390 syscall number is directly controlled by userspace, but does
not have an array_index_nospec() boundary to prevent access past the
syscall function pointer tables.

Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Fixes: 56e62a7370 ("s390: convert to generic entry")
Cc: stable@kernel.org
Assisted-by: gkh_clanker_2000
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Vasily Gorbik <gor@linux.ibm.com>
Link: https://lore.kernel.org/r/2026032404-sterling-swoosh-43e6@gregkh
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
2026-03-28 00:43:39 +01:00
Vasily Gorbik
c5c0a268b3 s390/barrier: Make array_index_mask_nospec() __always_inline
Mark array_index_mask_nospec() as __always_inline to guarantee the
mitigation is emitted inline regardless of compiler inlining decisions.

Fixes: e2dd833389 ("s390: add optimized array_index_mask_nospec")
Cc: stable@kernel.org
Reviewed-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
2026-03-28 00:43:24 +01:00
Linus Torvalds
335c9017e3 Merge tag 'spi-fix-v7.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi
Pull spi fixes from Mark Brown:
 "There are two core fixes here. One is from Johan dealing with an issue
  introduced by a devm_ API usage update causing things to be freed
  earlier than they had earlier when we fail to register a device,
  another from Danilo avoids unlocked acccess to data by converting to
  use a driver core API.

  We also have a few relatively minor driver specific fixes"

* tag 'spi-fix-v7.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi:
  spi: spi-fsl-lpspi: fix teardown order issue (UAF)
  spi: fix use-after-free on managed registration failure
  spi: use generic driver_override infrastructure
  spi: meson-spicc: Fix double-put in remove path
  spi: sn-f-ospi: Use devm_mutex_init() to simplify code
  spi: sn-f-ospi: Fix resource leak in f_ospi_probe()
2026-03-27 16:38:55 -07:00
Linus Torvalds
cd0bbd5a66 Merge tag 'regulator-fix-v7.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator
Pull regulator fix from Mark Brown:
 "A fix from Alice for the rust bindings, they didn't handle the stub
  implementation of the C API used when CONFIG_REGULATOR is disabled
  leading to undefined behaviour"

* tag 'regulator-fix-v7.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator:
  rust: regulator: do not assume that regulator_get() returns non-null
2026-03-27 16:36:23 -07:00
Linus Torvalds
30052002e6 Merge tag 'regmap-fix-v7.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regmap
Pull regmap fix from Mark Brown:
 "A fix from Andy Shevchenko for an issue with caching of page selector
  registers which are located inside the page they are switching"

* tag 'regmap-fix-v7.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regmap:
  regmap: Synchronize cache for the page selector
2026-03-27 16:34:25 -07:00
Linus Torvalds
dd09eb4433 Merge tag 'tsm-fixes-7.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/devsec/tsm
Pull tsm fix from Dan Williams:

 - Fix a VMM controlled buffer length used to emit TDX attestation
   reports

* tag 'tsm-fixes-7.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/devsec/tsm:
  virt: tdx-guest: Fix handling of host controlled 'quote' buffer length
2026-03-27 16:19:51 -07:00
Linus Torvalds
faf44e54f6 Merge tag 'vfio-v7.0-rc6' of https://github.com/awilliam/linux-vfio
Pull VFIO fix from Alex Williamson:

 - Fix double-free and reference count underflow if dma-buf file
   allocation fails (Alex Williamson)

* tag 'vfio-v7.0-rc6' of https://github.com/awilliam/linux-vfio:
  vfio/pci: Fix double free in dma-buf feature
2026-03-27 15:59:30 -07:00
Linus Torvalds
56bea42415 Merge tag 'efi-fixes-for-v7.0-3' of git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi
Pull EFI fix from Ard Biesheuvel:
 "Fix a potential buffer overrun issue introduced by the previous fix
  for EFI boot services region reservations on x86"

* tag 'efi-fixes-for-v7.0-3' of git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi:
  x86/efi: efi_unmap_boot_services: fix calculation of ranges_to_free size
2026-03-27 15:55:25 -07:00
Linus Torvalds
a361474ba3 Merge tag 'loongarch-fixes-7.0-2' of git://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson
Pull LoongArch fixes from Huacai Chen:
 "Fix missing NULL checks for kstrdup(), workaround LS2K/LS7A GPU
  DMA hang bug, emit GNU_EH_FRAME for vDSO correctly, and fix some
  KVM-related bugs"

* tag 'loongarch-fixes-7.0-2' of git://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson:
  LoongArch: KVM: Fix base address calculation in kvm_eiointc_regs_access()
  LoongArch: KVM: Handle the case that EIOINTC's coremap is empty
  LoongArch: KVM: Make kvm_get_vcpu_by_cpuid() more robust
  LoongArch: vDSO: Emit GNU_EH_FRAME correctly
  LoongArch: Workaround LS2K/LS7A GPU DMA hang bug
  LoongArch: Fix missing NULL checks for kstrdup()
2026-03-27 15:39:41 -07:00
Linus Torvalds
196ef74abd Merge tag 'io_uring-7.0-20260327' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux
Pull io_uring fixes from Jens Axboe:
 "Just two small fixes, both fixing regressions added in the fdinfo code
  in 6.19 with the SQE mixed size support"

* tag 'io_uring-7.0-20260327' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux:
  io_uring/fdinfo: fix OOB read in SQE_MIXED wrap check
  io_uring/fdinfo: fix SQE_MIXED SQE displaying
2026-03-27 15:35:38 -07:00
Dave Airlie
5ba61d8a25 Merge tag 'mediatek-drm-fixes-20260323' of https://git.kernel.org/pub/scm/linux/kernel/git/chunkuang.hu/linux into drm-fixes
Mediatek DRM Fixes - 20260323

1. dsi: Store driver data before invoking mipi_dsi_host_register

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Chun-Kuang Hu <chunkuang.hu@kernel.org>
Link: https://patch.msgid.link/20260323160135.39609-1-chunkuang.hu@kernel.org
2026-03-28 08:05:36 +10:00
Sean Christopherson
df83746075 KVM: x86/mmu: Only WARN in direct MMUs when overwriting shadow-present SPTE
Adjust KVM's sanity check against overwriting a shadow-present SPTE with a
another SPTE with a different target PFN to only apply to direct MMUs,
i.e. only to MMUs without shadowed gPTEs.  While it's impossible for KVM
to overwrite a shadow-present SPTE in response to a guest write, writes
from outside the scope of KVM, e.g. from host userspace, aren't detected
by KVM's write tracking and so can break KVM's shadow paging rules.

  ------------[ cut here ]------------
  pfn != spte_to_pfn(*sptep)
  WARNING: arch/x86/kvm/mmu/mmu.c:3069 at mmu_set_spte+0x1e4/0x440 [kvm], CPU#0: vmx_ept_stale_r/872
  Modules linked in: kvm_intel kvm irqbypass
  CPU: 0 UID: 1000 PID: 872 Comm: vmx_ept_stale_r Not tainted 7.0.0-rc2-eafebd2d2ab0-sink-vm #319 PREEMPT
  Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
  RIP: 0010:mmu_set_spte+0x1e4/0x440 [kvm]
  Call Trace:
   <TASK>
   ept_page_fault+0x535/0x7f0 [kvm]
   kvm_mmu_do_page_fault+0xee/0x1f0 [kvm]
   kvm_mmu_page_fault+0x8d/0x620 [kvm]
   vmx_handle_exit+0x18c/0x5a0 [kvm_intel]
   kvm_arch_vcpu_ioctl_run+0xc55/0x1c20 [kvm]
   kvm_vcpu_ioctl+0x2d5/0x980 [kvm]
   __x64_sys_ioctl+0x8a/0xd0
   do_syscall_64+0xb5/0x730
   entry_SYSCALL_64_after_hwframe+0x4b/0x53
   </TASK>
  ---[ end trace 0000000000000000 ]---

Fixes: 11d4517511 ("KVM: x86/mmu: Warn if PFN changes on shadow-present SPTE in shadow MMU")
Cc: stable@vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc@google.com>
2026-03-27 22:33:33 +01:00
Sean Christopherson
aad885e774 KVM: x86/mmu: Drop/zap existing present SPTE even when creating an MMIO SPTE
When installing an emulated MMIO SPTE, do so *after* dropping/zapping the
existing SPTE (if it's shadow-present).  While commit a54aa15c6b was
right about it being impossible to convert a shadow-present SPTE to an
MMIO SPTE due to a _guest_ write, it failed to account for writes to guest
memory that are outside the scope of KVM.

E.g. if host userspace modifies a shadowed gPTE to switch from a memslot
to emulted MMIO and then the guest hits a relevant page fault, KVM will
install the MMIO SPTE without first zapping the shadow-present SPTE.

  ------------[ cut here ]------------
  is_shadow_present_pte(*sptep)
  WARNING: arch/x86/kvm/mmu/mmu.c:484 at mark_mmio_spte+0xb2/0xc0 [kvm], CPU#0: vmx_ept_stale_r/4292
  Modules linked in: kvm_intel kvm irqbypass
  CPU: 0 UID: 1000 PID: 4292 Comm: vmx_ept_stale_r Not tainted 7.0.0-rc2-eafebd2d2ab0-sink-vm #319 PREEMPT
  Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
  RIP: 0010:mark_mmio_spte+0xb2/0xc0 [kvm]
  Call Trace:
   <TASK>
   mmu_set_spte+0x237/0x440 [kvm]
   ept_page_fault+0x535/0x7f0 [kvm]
   kvm_mmu_do_page_fault+0xee/0x1f0 [kvm]
   kvm_mmu_page_fault+0x8d/0x620 [kvm]
   vmx_handle_exit+0x18c/0x5a0 [kvm_intel]
   kvm_arch_vcpu_ioctl_run+0xc55/0x1c20 [kvm]
   kvm_vcpu_ioctl+0x2d5/0x980 [kvm]
   __x64_sys_ioctl+0x8a/0xd0
   do_syscall_64+0xb5/0x730
   entry_SYSCALL_64_after_hwframe+0x4b/0x53
  RIP: 0033:0x47fa3f
   </TASK>
  ---[ end trace 0000000000000000 ]---

Reported-by: Alexander Bulekov <bkov@amazon.com>
Debugged-by: Alexander Bulekov <bkov@amazon.com>
Suggested-by: Fred Griffoul <fgriffo@amazon.co.uk>
Fixes: a54aa15c6b ("KVM: x86/mmu: Handle MMIO SPTEs directly in mmu_set_spte()")
Cc: stable@vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc@google.com>
2026-03-27 22:33:33 +01:00
Paolo Bonzini
6c6ba54895 Merge tag 'kvm-s390-master-7.0-2' of https://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into HEAD
KVM: s390: More memory management fixes

Lots of small and not-so-small fixes for the newly rewritten gmap,
mostly affecting the handling of nested guests.
2026-03-27 22:30:44 +01:00
Eric Biggers
e5046823f8 lib/crypto: chacha: Zeroize permuted_state before it leaves scope
Since the ChaCha permutation is invertible, the local variable
'permuted_state' is sufficient to compute the original 'state', and thus
the key, even after the permutation has been done.

While the kernel is quite inconsistent about zeroizing secrets on the
stack (and some prominent userspace crypto libraries don't bother at all
since it's not guaranteed to work anyway), the kernel does try to do it
as a best practice, especially in cases involving the RNG.

Thus, explicitly zeroize 'permuted_state' before it goes out of scope.

Fixes: c08d0e6473 ("crypto: chacha20 - Add a generic ChaCha20 stream cipher implementation")
Cc: stable@vger.kernel.org
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20260326032920.39408-1-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
2026-03-27 13:35:35 -07:00
Linus Torvalds
7df48e3631 Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma fixes from Jason Gunthorpe:

 - Quite a few irdma bug fixes, several user triggerable

 - Fix a 0 SMAC header in ionic

 - Tolerate FW errors for RAAS in bng_re

 - Don't UAF in efa when printing error events

 - Better handle pool exhaustion in the new bvec paths

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
  RDMA/irdma: Harden depth calculation functions
  RDMA/irdma: Return EINVAL for invalid arp index error
  RDMA/irdma: Fix deadlock during netdev reset with active connections
  RDMA/irdma: Remove reset check from irdma_modify_qp_to_err()
  RDMA/irdma: Clean up unnecessary dereference of event->cm_node
  RDMA/irdma: Remove a NOP wait_event() in irdma_modify_qp_roce()
  RDMA/irdma: Update ibqp state to error if QP is already in error state
  RDMA/irdma: Initialize free_qp completion before using it
  RDMA/efa: Fix possible deadlock
  RDMA/rw: Fix MR pool exhaustion in bvec RDMA READ path
  RDMA/rw: Fall back to direct SGE on MR pool exhaustion
  RDMA/efa: Fix use of completion ctx after free
  RDMA/bng_re: Fix silent failure in HWRM version query
  RDMA/ionic: Preserve and set Ethernet source MAC after ib_ud_header_init()
  RDMA/irdma: Fix double free related to rereg_user_mr
2026-03-27 13:30:04 -07:00
Linus Torvalds
8af4fad545 Merge tag 'pci-v7.0-fixes-5' of git://git.kernel.org/pub/scm/linux/kernel/git/pci/pci
Pull pci fixes from Bjorn Helgaas:

 - Remove power-off from pwrctrl drivers since this is now done directly
   by the PCI controller drivers (Chen-Yu Tsai)

 - Fix pwrctrl device node leak (Felix Gu)

 - Document a TLP header decoder for AER log messages (Lukas Wunner)

* tag 'pci-v7.0-fixes-5' of git://git.kernel.org/pub/scm/linux/kernel/git/pci/pci:
  Documentation: PCI: Document PCIe TLP Header decoder for AER messages
  PCI/pwrctrl: Fix pci_pwrctrl_is_required() device node leak
  PCI/pwrctrl: Do not power off on pwrctrl device removal
2026-03-27 13:25:58 -07:00
Linus Torvalds
83ce1c753f Merge tag 'sound-7.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound
Pull sound fixes from Takashi Iwai:
 "This became slightly big partly due to my time off in the last week.
  But all changes are about device-specific fixes, so it should be
  safely applicable.

  ASoC:
   - Fix double free in sma1307
   - Fix uninitialized variables in simple-card-utils/imx-card
   - Address clock leaks and error propagation in ADAU1372
   - Add DMI quirks and ACP/SDW support for ASUS
   - Fix Intel CATPT DMA mask
   - Fix SOF topology parsing
   - Fix DT bindings for RK3576 SPDIF, STM32 SAI and WCD934x

  HD-audio:
   - Quirks for Lenovo, ASUS, and various HP models, as well as
     a speaker pop fix on Star Labs StarFighter
   - Revert MSI X870E Tomahawk denylist again

  USB-Audio:
   - Fix distorted audio on Focusrite Scarlett 2i2/2i4 1st Gen
   - Add iface reset quirk for AB17X
   - Update Qualcomm USB audio Kconfig dependencies and license

  Misc:
   - Fix minor compile warnings for firewire and asihpi drivers"

* tag 'sound-7.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound: (35 commits)
  Revert "ALSA: hda/intel: Add MSI X870E Tomahawk to denylist"
  ALSA: usb-audio: Add iface reset and delay quirk for AB17X USB Audio
  ALSA: hda/realtek: add HP Laptop 15-fd0xxx mute LED quirk
  ALSA: usb-audio: Exclude Scarlett 2i4 1st Gen from SKIP_IFACE_SETUP
  ALSA: hda/realtek: Add mute LED quirk for HP Pavilion 15-eg0xxx
  ALSA: hda/realtek - Fixed Speaker Mute LED for HP EliteBoard G1a platform
  ASoC: SOF: ipc4-topology: Allow bytes controls without initial payload
  ASoC: adau1372: Fix clock leak on PLL lock failure
  ASoC: adau1372: Fix unchecked clk_prepare_enable() return value
  ASoC: SDCA: fix finding wrong entity
  ASoC: SDCA: remove the max count of initialization table
  ASoC: codecs: wcd934x: fix typo in dt parsing
  ASoC: dt-bindings: stm32: Fix incorrect compatible string in stm32h7-sai match
  ASoC: Intel: catpt: Fix the device initialization
  ASoC: amd: acp: add ASUS HN7306EA quirk for legacy SDW machine
  ASoC: SOF: topology: reject invalid vendor array size in token parser
  ASoC: tas2781: Add null check for calibration data
  ALSA: asihpi: avoid write overflow check warning
  ASoC: fsl: imx-card: initialize playback_only and capture_only
  ASoC: simple-card-utils: Check value of is_playback_only and is_capture_only
  ...
2026-03-27 13:16:40 -07:00
Linus Torvalds
f44c65111e Merge tag 'media/v7.0-6' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media
Pull media fixes from Mauro Carvalho Chehab:

 - uvcvideo may cause OOPS when out of memory

 - remove a deadlock in the ccs driver

* tag 'media/v7.0-6' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media:
  media: ccs: Avoid deadlock in ccs_init_state()
  media: uvcvideo: Fix bug in error path of uvc_alloc_urb_buffers
2026-03-27 13:10:49 -07:00
Linus Torvalds
0b8bf3b64e Merge tag 'sysctl-7.00-fixes-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/sysctl/sysctl
Pull sysctl fix from Joel Granados:
 "Fix uninitialized variable error when writing to a sysctl bitmap

  Removed the possibility of returning an unjustified -EINVAL when
  writing to a sysctl bitmap"

* tag 'sysctl-7.00-fixes-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/sysctl/sysctl:
  sysctl: fix uninitialized variable in proc_do_large_bitmap
2026-03-27 13:04:34 -07:00
Linus Torvalds
3577cfd738 Merge tag 'xfs-fixes-7.0-rc6' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull xfs fixes from Carlos Maiolino:
 "This includes a few important bug fixes, and some code refactoring
  that was necessary for one of the fixes"

* tag 'xfs-fixes-7.0-rc6' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  xfs: remove file_path tracepoint data
  xfs: don't irele after failing to iget in xfs_attri_recover_work
  xfs: remove redundant validation in xlog_recover_attri_commit_pass2
  xfs: fix ri_total validation in xlog_recover_attri_commit_pass2
  xfs: close crash window in attr dabtree inactivation
  xfs: factor out xfs_attr3_leaf_init
  xfs: factor out xfs_attr3_node_entry_remove
  xfs: only assert new size for datafork during truncate extents
  xfs: annotate struct xfs_attr_list_context with __counted_by_ptr
  xfs: cleanup buftarg handling in XFS_IOC_VERIFY_MEDIA
  xfs: scrub: unlock dquot before early return in quota scrub
  xfs: refactor xfsaild_push loop into helper
  xfs: save ailp before dropping the AIL lock in push callbacks
  xfs: avoid dereferencing log items after push callbacks
  xfs: stop reclaim before pushing AIL during unmount
2026-03-27 12:22:45 -07:00
Luo Haiyang
1f98857322 tracing: Fix potential deadlock in cpu hotplug with osnoise
The following sequence may leads deadlock in cpu hotplug:

    task1        task2        task3
    -----        -----        -----

 mutex_lock(&interface_lock)

            [CPU GOING OFFLINE]

            cpus_write_lock();
            osnoise_cpu_die();
              kthread_stop(task3);
                wait_for_completion();

                      osnoise_sleep();
                        mutex_lock(&interface_lock);

 cpus_read_lock();

 [DEAD LOCK]

Fix by swap the order of cpus_read_lock() and mutex_lock(&interface_lock).

Cc: stable@vger.kernel.org
Cc: <mathieu.desnoyers@efficios.com>
Cc: <zhang.run@zte.com.cn>
Cc: <yang.tao172@zte.com.cn>
Cc: <ran.xiaokai@zte.com.cn>
Fixes: bce29ac9ce ("trace: Add osnoise tracer")
Link: https://patch.msgid.link/20260326141953414bVSj33dAYktqp9Oiyizq8@zte.com.cn
Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Luo Haiyang <luo.haiyang@zte.com.cn>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2026-03-27 15:18:06 -04:00
Linus Torvalds
34892992d0 Merge tag 'v7.0-rc5-ksmbd-srv-fixes' of git://git.samba.org/ksmbd
Pull smb server fixes from Steve French:

 - Fix out of bounds write

 - Fix for better calculating max output buffers

 - Fix memory leaks in SMB2/SMB3 lock

 - Fix use after free

 - Multichannel fix

* tag 'v7.0-rc5-ksmbd-srv-fixes' of git://git.samba.org/ksmbd:
  ksmbd: fix potencial OOB in get_file_all_info() for compound requests
  ksmbd: replace hardcoded hdr2_len with offsetof() in smb2_calc_max_out_buf_len()
  ksmbd: fix memory leaks and NULL deref in smb2_lock()
  ksmbd: fix use-after-free and NULL deref in smb_grant_oplock()
  ksmbd: do not expire session on binding failure
2026-03-27 12:03:39 -07:00
Jan Kara
102e57d56f udf: Fix race between file type conversion and writeback
udf_setsize() can race with udf_writepages() as follows:

udf_setsize()			udf_writepages()
				  if (iinfo->i_alloc_type ==
						ICBTAG_FLAG_AD_IN_ICB)
  err = udf_expand_file_adinicb(inode);
  err = udf_extend_file(inode, newsize);
				    udf_adinicb_writepages()
				      memcpy_from_file_folio() - crash
					because inode size is too big.

Fix the problem by checking the file type under folio lock in
udf_handle_page_wb() handler called from __mpage_writepages() which
properly serializes with udf_expand_file_adinicb().

Reported-by: Jianzhou Zhao <luckd0g@163.com>
Link: https://lore.kernel.org/all/f622c01.67ac.19cdbdd777d.Coremail.luckd0g@163.com
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://patch.msgid.link/20260326140635.15895-4-jack@suse.cz
Signed-off-by: Jan Kara <jack@suse.cz>
2026-03-27 17:01:40 +01:00
Jan Kara
fffca572f9 mpage: Provide variant of mpage_writepages() with own optional folio handler
Some filesystems need to treat some folios specially (for example for
inodes with inline data). Doing the handling in their .writepages method
in a race-free manner results in duplicating some of the writeback
internals. So provide generalized version of mpage_writepages() that
allows filesystem to provide a handler called for each folio which can
handle the folio in a special way.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://patch.msgid.link/20260326140635.15895-3-jack@suse.cz
Signed-off-by: Jan Kara <jack@suse.cz>
2026-03-27 17:01:36 +01:00
Zhang Heng
73ff3916d8 ALSA: hda/realtek: change quirk for HP OmniBook 7 Laptop 16-bh0xxx
HP OmniBook 7 Laptop 16-bh0xxx has the same PCI subsystem ID 0x103c8e60,
and the ALC245 on it needs this quirk to control the mute LED.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=221214
Cc: <stable@vger.kernel.org>
Tested-by: Artem S. Tashkinov <aros@gmx.com>
Signed-off-by: Zhang Heng <zhangheng@kylinos.cn>
Link: https://patch.msgid.link/20260327101215.481108-1-zhangheng@kylinos.cn
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-03-27 16:27:22 +01:00
Wolfram Sang
4c10830fda Merge tag 'i2c-host-fixes-7.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/andi.shyti/linux into i2c/for-current
i2c-fixes for v7.0-rc6

designware: fix resume-probe race causing NULL-deref in amdisp
imx: fix timeout on repeated reads and extra clock at end
2026-03-27 16:20:24 +01:00
Pratap Nirujogi
e2f1ada8e0 i2c: designware: amdisp: Fix resume-probe race condition issue
Identified resume-probe race condition in kernel v7.0 with the commit
38fa29b01a ("i2c: designware: Combine the init functions"),but this
issue existed from the beginning though not detected.

The amdisp i2c device requires ISP to be in power-on state for probe
to succeed. To meet this requirement, this device is added to genpd
to control ISP power using runtime PM. The pm_runtime_get_sync() called
before i2c_dw_probe() triggers PM resume, which powers on ISP and also
invokes the amdisp i2c runtime resume before the probe completes resulting
in this race condition and a NULL dereferencing issue in v7.0

Fix this race condition by using the genpd APIs directly during probe:
  - Call dev_pm_genpd_resume() to Power ON ISP before probe
  - Call dev_pm_genpd_suspend() to Power OFF ISP after probe
  - Set the device to suspended state with pm_runtime_set_suspended()
  - Enable runtime PM only after the device is fully initialized

Fixes: d6263c468a ("i2c: amd-isp: Add ISP i2c-designware driver")
Co-developed-by: Bin Du <bin.du@amd.com>
Signed-off-by: Bin Du <bin.du@amd.com>
Signed-off-by: Pratap Nirujogi <pratap.nirujogi@amd.com>
Cc: <stable@vger.kernel.org> # v6.16+
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Andi Shyti <andi.shyti@kernel.org>
Link: https://lore.kernel.org/r/20260320201302.3490570-1-pratap.nirujogi@amd.com
2026-03-27 13:51:21 +01:00
Stefan Eichenberger
13101db735 i2c: imx: ensure no clock is generated after last read
When reading from the I2DR register, right after releasing the bus by
clearing MSTA and MTX, the I2C controller might still generate an
additional clock cycle which can cause devices to misbehave. Ensure to
only read from I2DR after the bus is not busy anymore. Because this
requires polling, the read of the last byte is moved outside of the
interrupt handler.

An example for such a failing transfer is this:
i2ctransfer -y -a 0 w1@0x00 0x02 r1
Error: Sending messages failed: Connection timed out
It does not happen with every device because not all devices react to
the additional clock cycle.

Fixes: 5f5c2d4579 ("i2c: imx: prevent rescheduling in non dma mode")
Cc: stable@vger.kernel.org # v6.13+
Signed-off-by: Stefan Eichenberger <stefan.eichenberger@toradex.com>
Signed-off-by: Andi Shyti <andi.shyti@kernel.org>
Link: https://lore.kernel.org/r/20260218150940.131354-3-eichest@gmail.com
2026-03-27 13:51:21 +01:00
Stefan Eichenberger
f88e2e748a i2c: imx: fix i2c issue when reading multiple messages
When reading multiple messages, meaning a repeated start is required,
polling the bus busy bit must be avoided. This must only be done for
the last message. Otherwise, the driver will timeout.

Here an example of such a sequence that fails with an error:
i2ctransfer -y -a 0 w1@0x00 0x02 r1 w1@0x00 0x02 r1
Error: Sending messages failed: Connection timed out

Fixes: 5f5c2d4579 ("i2c: imx: prevent rescheduling in non dma mode")
Cc: stable@vger.kernel.org # v6.13+
Signed-off-by: Stefan Eichenberger <stefan.eichenberger@toradex.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Signed-off-by: Andi Shyti <andi.shyti@kernel.org>
Link: https://lore.kernel.org/r/20260218150940.131354-2-eichest@gmail.com
2026-03-27 13:51:20 +01:00
David Carlier
5597dd284f net: ti: icssg-prueth: fix missing data copy and wrong recycle in ZC RX dispatch
emac_dispatch_skb_zc() allocates a new skb via napi_alloc_skb() but
never copies the packet data from the XDP buffer into it. The skb is
passed up the stack containing uninitialized heap memory instead of
the actual received packet, leaking kernel heap contents to userspace.

Copy the received packet data from the XDP buffer into the skb using
skb_copy_to_linear_data().

Additionally, remove the skb_mark_for_recycle() call since the skb is
backed by the NAPI page frag allocator, not page_pool. Marking a
non-page_pool skb for recycle causes the free path to return pages to
a page_pool that does not own them, corrupting page_pool state.

The non-ZC path (emac_rx_packet) does not have these issues because it
uses napi_build_skb() to wrap the existing page_pool page directly,
requiring no copy, and correctly marks for recycle since the page comes
from page_pool_dev_alloc_pages().

Fixes: 7a64bb388d ("net: ti: icssg-prueth: Add AF_XDP zero copy for RX")
Signed-off-by: David Carlier <devnexen@gmail.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2026-03-27 12:08:26 +00:00
Thomas Bogendoerfer
bb417456c7 tg3: Fix race for querying speed/duplex
When driver signals carrier up via netif_carrier_on() its internal
link_up state isn't updated immediately. This leads to inconsistent
speed/duplex in /proc/net/bonding/bondX where the speed and duplex
is shown as unknown while ethtool shows correct values. Fix this by
using netif_carrier_ok() for link checking in get_ksettings function.

Fixes: 84421b99ce ("tg3: Update link_up flag for phylib devices")
Signed-off-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2026-03-27 12:06:38 +00:00
Pengpeng Hou
5e67ba9bb5 net/ipv6: ioam6: prevent schema length wraparound in trace fill
ioam6_fill_trace_data() stores the schema contribution to the trace
length in a u8. With bit 22 enabled and the largest schema payload,
sclen becomes 1 + 1020 / 4, wraps from 256 to 0, and bypasses the
remaining-space check. __ioam6_fill_trace_data() then positions the
write cursor without reserving the schema area but still copies the
4-byte schema header and the full schema payload, overrunning the trace
buffer.

Keep sclen in an unsigned int so the remaining-space check and the write
cursor calculation both see the full schema length.

Fixes: 8c6f6fa677 ("ipv6: ioam: IOAM Generic Netlink API")
Signed-off-by: Pengpeng Hou <pengpeng@iscas.ac.cn>
Reviewed-by: Justin Iurman <justin.iurman@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2026-03-27 12:05:36 +00:00
Fei Lv
1f6ee9be92 ovl: make fsync after metadata copy-up opt-in mount option
Commit 7d6899fb69 ("ovl: fsync after metadata copy-up") was done to
fix durability of overlayfs copy up on an upper filesystem which does
not enforce ordering on storing of metadata changes (e.g. ubifs).

In an earlier revision of the regressing commit by Lei Lv, the metadata
fsync behavior was opt-in via a new "fsync=strict" mount option.
We were hoping that the opt-in mount option could be avoided, so the
change was only made to depend on metacopy=off, in the hope of not
hurting performance of metadata heavy workloads, which are more likely
to be using metacopy=on.

This hope was proven wrong by a performance regression report from Google
COS workload after upgrade to kernel 6.12.

This is an adaptation of Lei's original "fsync=strict" mount option
to the existing upstream code.

The new mount option is mutually exclusive with the "volatile" mount
option, so the latter is now an alias to the "fsync=volatile" mount
option.

Reported-by: Chenglong Tang <chenglongtang@google.com>
Closes: https://lore.kernel.org/linux-unionfs/CAOdxtTadAFH01Vui1FvWfcmQ8jH1O45owTzUcpYbNvBxnLeM7Q@mail.gmail.com/
Link: https://lore.kernel.org/linux-unionfs/CAOQ4uxgKC1SgjMWre=fUb00v8rxtd6sQi-S+dxR8oDzAuiGu8g@mail.gmail.com/
Fixes: 7d6899fb69 ("ovl: fsync after metadata copy-up")
Depends: 50e638beb6 ("ovl: Use str_on_off() helper in ovl_show_options()")
Cc: stable@vger.kernel.org # v6.12+
Signed-off-by: Fei Lv <feilv@asrmicro.com>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
2026-03-27 12:48:10 +01:00
Mario Limonciello
ed4da361bf Revert "ALSA: hda/intel: Add MSI X870E Tomahawk to denylist"
commit 30b3211aa2 ("ALSA: hda/intel: Add MSI X870E Tomahawk
to denylist") was added to silence a warning, but this effectively
reintroduced commit df42ee7e22 ("ALSA: hda: Add ASRock
X670E Taichi to denylist") which was already reported to cause
problems and reverted in commit ee8f161359 ("Revert "ALSA: hda:
Add ASRock X670E Taichi to denylist"")

Revert it yet again.

Cc: stable@vger.kernel.org
Reported-by: Juhyun Song <juju6985@outlook.kr>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=221274
Cc: Stuart Hayhurst <stuart.a.hayhurst@gmail.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Link: https://patch.msgid.link/20260326190542.524515-1-mario.limonciello@amd.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-03-27 10:54:04 +01:00
Lianqin Hu
ee6c551a7d ALSA: usb-audio: Add iface reset and delay quirk for AB17X USB Audio
Setting up the interface when suspended/resumeing fail on this card.
Adding a reset and delay quirk will eliminate this problem.

usb 1-1: new full-speed USB device number 2 using xhci-hcd
usb 1-1: New USB device found, idVendor=001f, idProduct=0b23
usb 1-1: New USB device strings: Mfr=1, Product=2, SerialNumber=3
usb 1-1: Product: AB17X USB Audio
usb 1-1: Manufacturer: Generic
usb 1-1: SerialNumber: 20241228172028

Signed-off-by: Lianqin Hu <hulianqin@vivo.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Link: https://patch.msgid.link/PUZPR06MB6224CA59AD2B26054120B276D249A@PUZPR06MB6224.apcprd06.prod.outlook.com
2026-03-27 10:50:57 +01:00
Kshamendra Kumar Mishra
faceb5cf5d ALSA: hda/realtek: add HP Laptop 15-fd0xxx mute LED quirk
HP Laptop 15-fd0xxx with ALC236 codec does not handle the toggling of
the mute LED.
This patch adds a quirk entry for subsystem ID 0x8dd7 using
ALC236_FIXUP_HP_MUTE_LED_COEFBIT2 fixup, enabling correct mute LED
behavior.

Signed-off-by: Kshamendra Kumar Mishra <kshamendrakumarmishra@gmail.com>
Link: https://patch.msgid.link/DHAB51ISUM96.2K9SZIABIDEQ0@gmail.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-03-27 10:43:56 +01:00
Geoffrey D. Bennett
990a8b0732 ALSA: usb-audio: Exclude Scarlett 2i4 1st Gen from SKIP_IFACE_SETUP
Same issue that the Scarlett 2i2 1st Gen had:
QUIRK_FLAG_SKIP_IFACE_SETUP causes distorted/flanging audio on the
Scarlett 2i4 1st Gen (1235:800a).

Fixes: 38c322068a ("ALSA: usb-audio: Add QUIRK_FLAG_SKIP_IFACE_SETUP")
Reported-by: dcferreira [https://github.com/geoffreybennett/linux-fcp/issues/54]
Signed-off-by: Geoffrey D. Bennett <g@b4.vu>
Link: https://patch.msgid.link/acEkEbftzyNe8W7C@m.b4.vu
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-03-27 10:43:14 +01:00
César Montoya
2f388b4e8f ALSA: hda/realtek: Add mute LED quirk for HP Pavilion 15-eg0xxx
The HP Pavilion 15-eg0xxx with subsystem ID 0x103c87cb uses a Realtek
ALC287 codec with a mute LED wired to GPIO pin 4 (mask 0x10). The
existing ALC287_FIXUP_HP_GPIO_LED fixup already handles this correctly,
but the subsystem ID was missing from the quirk table.

GPIO pin confirmed via manual hda-verb testing:
  hda-verb SET_GPIO_MASK 0x10
  hda-verb SET_GPIO_DIRECTION 0x10
  hda-verb SET_GPIO_DATA 0x10

Signed-off-by: César Montoya <sprit152009@gmail.com>
Link: https://patch.msgid.link/20260321153603.12771-1-sprit152009@gmail.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-03-27 10:31:47 +01:00
Kailang Yang
d3be95efc6 ALSA: hda/realtek - Fixed Speaker Mute LED for HP EliteBoard G1a platform
On the HP EliteBoard G1a platform (models without a headphone jack).
the speaker mute LED failed to function. The Sysfs ctl-led info showed
empty values because the standard LED registration couldn't correctly
bind to the master switch.
Adding this patch will fix and enable the speaker mute LED feature.

Tested-by: Chris Chiu <chris.chiu@canonical.com>
Signed-off-by: Kailang Yang <kailang@realtek.com>
Link: https://lore.kernel.org/279e929e884849df84687dbd67f20037@realtek.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-03-27 10:27:58 +01:00
Takashi Iwai
50c8f83c41 Merge tag 'asoc-fix-v7.0-rc5' of https://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-linus
ASoC: Fixes for v7.0

This is two week's worth of fixes and quirks so it's a bit larger than
you might expect, there's nothing too exciting individually and nothing
in core code.
2026-03-27 10:16:52 +01:00
Guangshuo Li
7f138de156 auxdisplay: line-display: fix NULL dereference in linedisp_release
linedisp_release() currently retrieves the enclosing struct linedisp via
to_linedisp(). That lookup depends on the attachment list, but the
attachment may already have been removed before put_device() invokes the
release callback. This can happen in linedisp_unregister(), and can also
be reached from some linedisp_register() error paths.

In that case, to_linedisp() returns NULL and linedisp_release()
dereferences it while freeing the display resources.

The struct device released here is the embedded linedisp->dev used by
linedisp_register(), so retrieve the enclosing object directly with
container_of() instead.

Fixes: 66c9380948 ("auxdisplay: linedisp: encapsulate container_of usage within to_linedisp")
Cc: stable@vger.kernel.org
Signed-off-by: Guangshuo Li <lgs201920130244@gmail.com>
Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
2026-03-27 09:54:31 +01:00
Sherry Yang
8b72aa5704 iommupt/amdv1: mark amdv1pt_install_leaf_entry as __always_inline
After enabling CONFIG_GCOV_KERNEL and CONFIG_GCOV_PROFILE_ALL, following
build failure is observed under GCC 14.2.1:

In function 'amdv1pt_install_leaf_entry',
    inlined from '__do_map_single_page' at drivers/iommu/generic_pt/fmt/../iommu_pt.h:650:3,
    inlined from '__map_single_page0' at drivers/iommu/generic_pt/fmt/../iommu_pt.h:661:1,
    inlined from 'pt_descend' at drivers/iommu/generic_pt/fmt/../pt_iter.h:391:9,
    inlined from '__do_map_single_page' at drivers/iommu/generic_pt/fmt/../iommu_pt.h:657:10,
    inlined from '__map_single_page1.constprop' at drivers/iommu/generic_pt/fmt/../iommu_pt.h:661:1:
././include/linux/compiler_types.h:706:45: error: call to '__compiletime_assert_71' declared with attribute error: FIELD_PREP: value too large for the field
  706 |         _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
      |

......

drivers/iommu/generic_pt/fmt/amdv1.h:220:26: note: in expansion of macro 'FIELD_PREP'
  220 |                          FIELD_PREP(AMDV1PT_FMT_OA,
      |                          ^~~~~~~~~~

In the path '__do_map_single_page()', level 0 always invokes
'pt_install_leaf_entry(&pts, map->oa, PAGE_SHIFT, …)'. At runtime that
lands in the 'if (oasz_lg2 == isz_lg2)' arm of 'amdv1pt_install_leaf_entry()';
the contiguous-only 'else' block is unreachable for 4 KiB pages.

With CONFIG_GCOV_KERNEL + CONFIG_GCOV_PROFILE_ALL, the extra
instrumentation changes GCC's inlining so that the "dead" 'else' branch
still gets instantiated. The compiler constant-folds the contiguous OA
expression, runs the 'FIELD_PREP()' compile-time check, and produces:

    FIELD_PREP: value too large for the field

gcov-enabled builds therefore fail even though the code path never executes.

Fix this by marking amdv1pt_install_leaf_entry as __always_inline.

Fixes: dcd6a011a8 ("iommupt: Add map_pages op")
Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Sherry Yang <sherry.yang@oracle.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
2026-03-27 09:41:48 +01:00
Jason Gunthorpe
ee6e69d032 iommupt: Fix short gather if the unmap goes into a large mapping
unmap has the odd behavior that it can unmap more than requested if the
ending point lands within the middle of a large or contiguous IOPTE.

In this case the gather should flush everything unmapped which can be
larger than what was requested to be unmapped. The gather was only
flushing the range requested to be unmapped, not extending to the extra
range, resulting in a short invalidation if the caller hits this special
condition.

This was found by the new invalidation/gather test I am adding in
preparation for ARMv8. Claude deduced the root cause.

As far as I remember nothing relies on unmapping a large entry, so this is
likely not a triggerable bug.

Cc: stable@vger.kernel.org
Fixes: 7c53f4238a ("iommupt: Add unmap_pages op")
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Samiullah Khawaja <skhawaja@google.com>
Reviewed-by: Vasant Hegde <vasant.hegde@amd.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
2026-03-27 09:07:13 +01:00
Jason Gunthorpe
90c5def10b iommu: Do not call drivers for empty gathers
An empty gather is coded with start=U64_MAX, end=0 and several drivers go
on to convert that to a size with:

 end - start + 1

Which gives 2 for an empty gather. This then causes Weird Stuff to
happen (for example an UBSAN splat in VT-d) that is hopefully harmless,
but maybe not.

Prevent drivers from being called right in iommu_iotlb_sync().

Auditing shows that AMD, Intel, Mediatek and RSIC-V drivers all do things
on these empty gathers.

Further, there are several callers that can trigger empty gathers,
especially in unusual conditions. For example iommu_map_nosync() will call
a 0 size unmap on some error paths. Also in VFIO, iommupt and other
places.

Cc: stable@vger.kernel.org
Reported-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
Closes: https://lore.kernel.org/r/11145826.aFP6jjVeTY@jkrzyszt-mobl2.ger.corp.intel.com
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Samiullah Khawaja <skhawaja@google.com>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Reviewed-by: Vasant Hegde <vasant.hegde@amd.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
2026-03-27 09:07:13 +01:00
Dave Airlie
83318d0c1f Merge tag 'drm-xe-fixes-2026-03-26' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes
- Fix UAF in SRIOV migration restore (Winiarski)
- Updates to HW W/a (Roper)
- VMBind remap fix (Auld)

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patch.msgid.link/acUgq2q2DrCUzFql@intel.com
2026-03-27 17:48:48 +10:00
Dave Airlie
aab01a8808 Merge tag 'drm-misc-fixes-2026-03-26' of https://gitlab.freedesktop.org/drm/misc/kernel into drm-fixes
A page mapping fix for shmem fault handler, a power-off fix for ivpu, a
GFP_* flag fix for syncobj, and a MAINTAINERS update.

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Maxime Ripard <mripard@redhat.com>
Link: https://patch.msgid.link/20260326-lush-cuddly-limpet-ab2aa9@houat
2026-03-27 17:46:26 +10:00
Dave Airlie
355223cb84 Merge tag 'drm-intel-fixes-2026-03-26' of https://gitlab.freedesktop.org/drm/i915/kernel into drm-fixes
- DP tunnel error handling fix
- Spurious GMBUS timeout fix
- Unlink NV12 planes earlier
- Order OP vs. timeout correctly in __wait_for()

Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patch.msgid.link/acTdjAoOGkzl3dcc@jlahtine-mobl
2026-03-27 17:19:34 +10:00
Dave Airlie
7261c2fceb Merge tag 'amd-drm-fixes-7.0-2026-03-25' of https://gitlab.freedesktop.org/agd5f/linux into drm-fixes
amd-drm-fixes-7.0-2026-03-25:

amdgpu:
- DSC fix
- Module parameter parsing fix
- PASID reuse fix
- drm_edid leak fix
- SMU 13.x fixes
- SMU 14.x fix
- Fence fix in amdgpu_amdkfd_submit_ib()
- LVDS fixes
- GPU page fault fix for non-4K pages

amdkfd:
- Ordering fix in kfd_ioctl_create_process()

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patch.msgid.link/20260325155600.4184877-1-alexander.deucher@amd.com
2026-03-27 16:57:05 +10:00
Yochai Eisenrich
ae05340cca net: ipv6: ndisc: fix ndisc_ra_useropt to initialize nduseropt_padX fields to zero to prevent an info-leak
When processing Router Advertisements with user options the kernel
builds an RTM_NEWNDUSEROPT netlink message. The nduseroptmsg struct
has three padding fields that are never zeroed and can leak kernel data

The fix is simple, just zeroes the padding fields.

Fixes: 31910575a9 ("[IPv6]: Export userland ND options through netlink (RDNSS support)")
Signed-off-by: Yochai Eisenrich <echelonh@gmail.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20260324224925.2437775-1-echelonh@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-26 20:38:35 -07:00
Jiayuan Chen
2428083101 net: qrtr: replace qrtr_tx_flow radix_tree with xarray to fix memory leak
__radix_tree_create() allocates and links intermediate nodes into the
tree one by one. If a subsequent allocation fails, the already-linked
nodes remain in the tree with no corresponding leaf entry. These orphaned
internal nodes are never reclaimed because radix_tree_for_each_slot()
only visits slots containing leaf values.

The radix_tree API is deprecated in favor of xarray. As suggested by
Matthew Wilcox, migrate qrtr_tx_flow from radix_tree to xarray instead
of fixing the radix_tree itself [1]. xarray properly handles cleanup of
internal nodes — xa_destroy() frees all internal xarray nodes when the
qrtr_node is released, preventing the leak.

[1] https://lore.kernel.org/all/20260225071623.41275-1-jiayuan.chen@linux.dev/T/
Reported-by: syzbot+006987d1be3586e13555@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/000000000000bfba3a060bf4ffcf@google.com/T/
Fixes: 5fdeb0d372 ("net: qrtr: Implement outgoing flow control")
Signed-off-by: Jiayuan Chen <jiayuan.chen@shopee.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20260324080645.290197-1-jiayuan.chen@linux.dev
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-26 20:22:38 -07:00
Jakub Kicinski
04f272188f Merge branch 'net-enetc-safely-reinitialize-tx-bd-ring-when-it-has-unsent-frames'
Wei Fang says:

====================
net: enetc: safely reinitialize TX BD ring when it has unsent frames

Currently the driver does not reset the producer index register (PIR) and
consumer index register (CIR) when initializing a TX BD ring. The driver
only reads the PIR and CIR and initializes the software indexes. If the
TX BD ring is reinitialized when it still contains unsent frames, its PIR
and CIR will not be equal after the reinitialization. However, the BDs
between CIR and PIR have been freed and become invalid and this can lead
to a hardware malfunction, causing the TX BD ring will not work properly.

Since the PIR and CIR are sofeware-configurable on ENETC v4. Therefore,
the driver must reset them if they are not equal when reinitializing
the TX BD ring.

However, resetting the PIR and CIR alone is insufficient, it cannot
completely solve the problem. When a link-down event occurs while the TX
BD ring is transmitting frames, subsequent reinitialization of the TX BD
ring may cause it to malfunction. Because enetc4_pl_mac_link_down() only
clears PMa_COMMAND_CONFIG[TX_EN] to disable MAC transmit data path. It
doesn't set PORT[TXDIS] to 1 to flush the TX BD ring. Therefore, it is
not safe to reinitialize the TX BD ring at this point.

To safely reinitialize the TX BD ring after a link-down event, we checked
with the NETC IP team, a proper Ethernet MAC graceful stop is necessary.
Therefore, add the Ethernet MAC graceful stop to the link-down event
handler enetc4_pl_mac_link_down(). Note that this patch set is not
applicable to ENETC v1 (LS1028A).
====================

Link: https://patch.msgid.link/20260324062121.2745033-1-wei.fang@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-26 20:19:09 -07:00
Wei Fang
f2df9567b1 net: enetc: do not access non-existent registers on pseudo MAC
The ENETC4_PM_IEVENT and ENETC4_PM_CMD_CFG registers do not exist on the
ENETC pseudo MAC, so the driver should prevent from accessing them.

Fixes: 5175c1e4ad ("net: enetc: add basic support for the ENETC with pseudo MAC for i.MX94")
Signed-off-by: Wei Fang <wei.fang@nxp.com>
Tested-by: Claudiu Manoil <claudiu.manoil@nxp.com>
Reviewed-by: Claudiu Manoil <claudiu.manoil@nxp.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20260324062121.2745033-4-wei.fang@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-26 20:19:06 -07:00
Wei Fang
2725d84efe net: enetc: add graceful stop to safely reinitialize the TX Ring
For ENETC v4, the PIR and CIR will be reset if they are not equal when
reinitializing the TX BD ring. However, resetting the PIR and CIR alone
is insufficient. When a link-down event occurs while the TX BD ring is
transmitting frames, subsequent reinitialization of the TX BD ring may
cause it to malfunction. For example, the below steps can reproduce the
problem.

1. Unplug the cable when the TX BD ring is busy transmitting frames.
2. Disable the network interface (ifconfig eth0 down).
3. Re-enable the network interface (ifconfig eth0 up).
4. Plug in the cable, the TX BD ring may fail to transmit packets.

When the link-down event occurs, enetc4_pl_mac_link_down() only clears
PMa_COMMAND_CONFIG[TX_EN] to disable MAC transmit data path. It doesn't
set PORT[TXDIS] to 1 to flush the TX BD ring. Therefore, reinitializing
the TX BD ring at this point is unsafe. To safely reinitialize the TX BD
ring after a link-down event, we checked with the NETC IP team, a proper
Ethernet MAC graceful stop is necessary. Therefore, add the Ethernet MAC
graceful stop to the link-down event handler enetc4_pl_mac_link_down().

Fixes: 99100d0d99 ("net: enetc: add preliminary support for i.MX95 ENETC PF")
Signed-off-by: Wei Fang <wei.fang@nxp.com>
Reviewed-by: Claudiu Manoil <claudiu.manoil@nxp.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20260324062121.2745033-3-wei.fang@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-26 20:19:06 -07:00
Wei Fang
0239fd701d net: enetc: reset PIR and CIR if they are not equal when initializing TX ring
Currently the driver does not reset the producer index register (PIR) and
consumer index register (CIR) when initializing a TX BD ring. The driver
only reads the PIR and CIR and initializes the software indexes. If the
TX BD ring is reinitialized when it still contains unsent frames, its PIR
and CIR will not be equal after the reinitialization. However, the BDs
between CIR and PIR have been freed and become invalid and this can lead
to a hardware malfunction, causing the TX BD ring will not work properly.

For ENETC v4, it supports software to set the PIR and CIR, so the driver
can reset these two registers if they are not equal when reinitializing
the TX BD ring. Therefore, add this solution for ENETC v4. Note that this
patch does not work for ENETC v1 because it does not support software to
set the PIR and CIR.

Fixes: 99100d0d99 ("net: enetc: add preliminary support for i.MX95 ENETC PF")
Signed-off-by: Wei Fang <wei.fang@nxp.com>
Reviewed-by: Claudiu Manoil <claudiu.manoil@nxp.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20260324062121.2745033-2-wei.fang@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-26 20:19:05 -07:00
Buday Csaba
e8e44c98f7 net: fec: fix the PTP periodic output sysfs interface
When the PPS channel configuration was implemented, the channel
index for the periodic outputs was configured as the hardware
channel number.

The sysfs interface uses a logical channel index, and rejects numbers
greater than `n_per_out` (see period_store() in ptp_sysfs.c).
That property was left at 1, since the driver implements channel
selection, not simultaneous operation of multiple PTP hardware timer
channels.

A second check in fec_ptp_enable() returns -EOPNOTSUPP when the two
channel numbers disagree, making channels 1..3 unusable from sysfs.

Fix by removing this redundant check in the FEC PTP driver.

Fixes: 566c2d8388 ("net: fec: make PPS channel configurable")
Signed-off-by: Buday Csaba <buday.csaba@prolan.hu>
Link: https://patch.msgid.link/8ec2afe88423c2231f9cf8044d212ce57846670e.1774359059.git.buday.csaba@prolan.hu
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-26 20:10:15 -07:00
Qingfang Deng
57a04a13aa netdevsim: fix build if SKB_EXTENSIONS=n
__skb_ext_put() is not declared if SKB_EXTENSIONS is not enabled, which
causes a build error:

drivers/net/netdevsim/netdev.c: In function 'nsim_forward_skb':
drivers/net/netdevsim/netdev.c:114:25: error: implicit declaration of function '__skb_ext_put'; did you mean 'skb_ext_put'? [-Werror=implicit-function-declaration]
  114 |                         __skb_ext_put(psp_ext);
      |                         ^~~~~~~~~~~~~
      |                         skb_ext_put
cc1: some warnings being treated as errors

Add a stub to fix the build.

Fixes: 7d9351435e ("netdevsim: drop PSP ext ref on forward failure")
Signed-off-by: Qingfang Deng <dqfext@gmail.com>
Link: https://patch.msgid.link/20260324140857.783-1-dqfext@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-26 20:08:58 -07:00
Nicholas Carlini
5170efd9c3 io_uring/fdinfo: fix OOB read in SQE_MIXED wrap check
__io_uring_show_fdinfo() iterates over pending SQEs and, for 128-byte
SQEs on an IORING_SETUP_SQE_MIXED ring, needs to detect when the second
half of the SQE would be past the end of the sq_sqes array. The current
check tests (++sq_head & sq_mask) == 0, but sq_head is only incremented
when a 128-byte SQE is encountered, not on every iteration. The actual
array index is sq_idx = (i + sq_head) & sq_mask, which can be sq_mask
(the last slot) while the wrap check passes.

Fix by checking sq_idx directly. Keep the sq_head increment so the loop
still skips the second half of the 128-byte SQE on the next iteration.

Fixes: 1cba30bf9f ("io_uring: add support for IORING_SETUP_SQE_MIXED")
Signed-off-by: Nicholas Carlini <nicholas@carlini.com>
Link: https://patch.msgid.link/20260327021823.3138396-1-nicholas@carlini.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2026-03-26 20:28:28 -06:00
Sven Eckelmann (Plasma Cloud)
976ff48c2a net: ethernet: mtk_ppe: avoid NULL deref when gmac0 is disabled
If the gmac0 is disabled, the precheck for a valid ingress device will
cause a NULL pointer deref and crash the system. This happens because
eth->netdev[0] will be NULL but the code will directly try to access
netdev_ops.

Instead of just checking for the first net_device, it must be checked if
any of the mtk_eth net_devices is matching the netdev_ops of the ingress
device.

Cc: stable@vger.kernel.org
Fixes: 73cfd947db ("net: ethernet: mtk_eth_soc: ppe: prevent ppe update for non-mtk devices")
Signed-off-by: Sven Eckelmann (Plasma Cloud) <se@simonwunderlich.de>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20260324-wed-crash-gmac0-disabled-v1-1-3bc388aee565@simonwunderlich.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-26 19:01:25 -07:00
Dipayaan Roy
f73896b419 net: mana: Fix RX skb truesize accounting
MANA passes rxq->alloc_size to napi_build_skb() for all RX buffers.
It is correct for fragment-backed RX buffers, where alloc_size matches
the actual backing allocation used for each packet buffer. However, in
the non-fragment RX path mana allocates a full page, or a higher-order
page, per RX buffer. In that case alloc_size only reflects the usable
packet area and not the actual backing memory.

This causes napi_build_skb() to underestimate the skb backing allocation
in the single-buffer RX path, so skb->truesize is derived from a value
smaller than the real RX buffer allocation.

Fix this by updating alloc_size in the non-fragment RX path to the
actual backing allocation size before it is passed to napi_build_skb().

Fixes: 730ff06d3f ("net: mana: Use page pool fragments for RX buffers instead of full pages to improve memory efficiency.")
Signed-off-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
Link: https://patch.msgid.link/acLUhLpLum6qrD/N@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-26 18:57:07 -07:00
Sabrina Dubroca
629ec78ef8 mpls: add seqcount to protect the platform_label{,s} pair
The RCU-protected codepaths (mpls_forward, mpls_dump_routes) can have
an inconsistent view of platform_labels vs platform_label in case of a
concurrent resize (resize_platform_label_table, under
platform_mutex). This can lead to OOB accesses.

This patch adds a seqcount, so that we get a consistent snapshot.

Note that mpls_label_ok is also susceptible to this, so the check
against RTA_DST in rtm_to_route_config, done outside platform_mutex,
is not sufficient. This value gets passed to mpls_label_ok once more
in both mpls_route_add and mpls_route_del, so there is no issue, but
that additional check must not be removed.

Reported-by: Yuan Tan <tanyuan98@outlook.com>
Reported-by: Yifan Wu <yifanwucs@gmail.com>
Reported-by: Juefei Pu <tomapufckgml@gmail.com>
Reported-by: Xin Liu <bird@lzu.edu.cn>
Fixes: 7720c01f3f ("mpls: Add a sysctl to control the size of the mpls label table")
Fixes: dde1b38e87 ("mpls: Convert mpls_dump_routes() to RCU.")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Link: https://patch.msgid.link/cd8fca15e3eb7e212b094064cd83652e20fd9d31.1774284088.git.sd@queasysnail.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-26 18:32:14 -07:00
Jakub Kicinski
45dbf8fcea Merge tag 'wireless-2026-03-26' of https://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless
Johannes Berg says:

====================
Couple more fixes:

 - virt_wifi: remove SET_NETDEV_DEV to avoid UAF on teardown
 - iwlwifi:
   - fix (some) devices that don't have 6 GHz (WiFi6E)
   - fix potential OOB read of firmware notification
   - set WiFi generation for firmware to avoid packet drops
   - fix multi-link scan timing
 - wilc1000: fix integer overflow
 - ath11k/ath12k: fix TID during A-MPDU session teardown
 - wl1251: don't trust firmware TX status response index

* tag 'wireless-2026-03-26' of https://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless:
  wifi: virt_wifi: remove SET_NETDEV_DEV to avoid use-after-free
  wifi: iwlwifi: mvm: fix potential out-of-bounds read in iwl_mvm_nd_match_info_handler()
  wifi: wl1251: validate packet IDs before indexing tx_frames
  wifi: wilc1000: fix u8 overflow in SSID scan buffer size calculation
  wifi: ath12k: Pass the correct value of each TID during a stop AMPDU session
  wifi: ath11k: Pass the correct value of each TID during a stop AMPDU session
  wifi: iwlwifi: mld: correctly set wifi generation data
  wifi: iwlwifi: mvm: don't send a 6E related command when not supported
  wifi: iwlwifi: mld: Fix MLO scan timing
====================

Link: https://patch.msgid.link/20260326093329.77815-3-johannes@sipsolutions.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-26 17:51:39 -07:00
Linus Torvalds
46b5132504 Merge tag 'v7.0-rc5-smb3-client-fix' of git://git.samba.org/sfrench/cifs-2.6
Pull smb client fix from Steve French:

 - Fix rebuild of mapping table

* tag 'v7.0-rc5-smb3-client-fix' of git://git.samba.org/sfrench/cifs-2.6:
  smb/client: ensure smb2_mapping_table rebuild on cmd changes
2026-03-26 14:01:26 -07:00
Linus Torvalds
d813f42193 Merge tag 'pm-7.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull power management fixes from Rafael Wysocki:
 "These fix two cpufreq issues, one in the core and one in the
  conservative governor, and two issues related to system sleep:

   - Restore the cpufreq core behavior changed inadvertently during the
     6.19 development cycle to call cpufreq_frequency_table_cpuinfo()
     for cpufreq policies getting re-initialized which ensures that
     policy->max and policy->cpuinfo_max_freq will be valid going
     forward (Viresh Kumar)

   - Adjust the cached requested frequency in the conservative cpufreq
     governor on policy limits changes to prevent it from becoming stale
     in some cases (Viresh Kumar)

   - Prevent pm_restore_gfp_mask() from triggering a WARN_ON() in some
     code paths in which it is legitimately called without invoking
     pm_restrict_gfp_mask() previously (Youngjun Park)

   - Update snapshot_write_finalize() to take trailing zero pages into
     account properly which prevents user space restore from failing
     subsequently in some cases (Alberto Garcia)"

* tag 'pm-7.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
  PM: sleep: Drop spurious WARN_ON() from pm_restore_gfp_mask()
  PM: hibernate: Drain trailing zero pages on userspace restore
  cpufreq: conservative: Reset requested_freq on limits change
  cpufreq: Don't skip cpufreq_frequency_table_cpuinfo()
2026-03-26 12:42:28 -07:00
Linus Torvalds
9c2b23a275 Merge tag 'thermal-7.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull thermal control fix from Rafael Wysocki:
 "This prevents the int340x thermal driver from taking the power slider
  offset parameter into account incorrectly in some cases (Srinivas
  Pandruvada)"

* tag 'thermal-7.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
  thermal: intel: int340x: soc_slider: Set offset only for balanced mode
2026-03-26 12:27:17 -07:00
Linus Torvalds
2d74bd3c58 Merge tag 'acpi-7.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull ACPI support fix from Rafael Wysocki:
 "Prevent use-after-free from occurring on reduced-hardware ACPI
  platforms when -EPROBE_DEFER is returned by ec_install_handlers()
  during ACPI EC driver initialization (Weiming Shi)"

* tag 'acpi-7.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
  ACPI: EC: clean up handlers on probe failure in acpi_ec_setup()
2026-03-26 12:06:40 -07:00
Linus Torvalds
25b69ebe28 Merge tag 'landlock-7.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/mic/linux
Pull Landlock fixes from Mickaël Salaün:
 "This mainly fixes Landlock TSYNC issues related to interrupts and
  unexpected task exit.

  Other fixes touch documentation and sample, and a new test extends
  coverage"

* tag 'landlock-7.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/mic/linux:
  landlock: Expand restrict flags example for ABI version 8
  selftests/landlock: Test tsync interruption and cancellation paths
  landlock: Clean up interrupted thread logic in TSYNC
  landlock: Serialize TSYNC thread restriction
  samples/landlock: Bump ABI version to 8
  landlock: Improve TSYNC types
  landlock: Fully release unused TSYNC work entries
  landlock: Fix formatting
2026-03-26 12:03:37 -07:00
Rafael J. Wysocki
042f99c493 Merge branch 'pm-sleep'
Merge fixes related to system sleep for 7.0-rc6:

 - Prevent pm_restore_gfp_mask() from triggering a WARN_ON() in some
   code paths in which it is legitimately called without invoking
   pm_restrict_gfp_mask() previously (Youngjun Park)

 - Update snapshot_write_finalize() to take trailing zero pages into
   account properly which prevents user space restore from failing
   subsequently in some cases (Alberto Garcia)

* pm-sleep:
  PM: sleep: Drop spurious WARN_ON() from pm_restore_gfp_mask()
  PM: hibernate: Drain trailing zero pages on userspace restore
2026-03-26 18:44:46 +01:00
Linus Torvalds
453a4a5f97 Merge tag 'net-7.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Paolo Abeni:
 "Including fixes from Bluetooth, CAN, IPsec and Netfilter.

  Notably, this includes the fix for the Bluetooth regression that you
  were notified about. I'm not aware of any other pending regressions.

  Current release - regressions:

    - bluetooth:
       - fix stack-out-of-bounds read in l2cap_ecred_conn_req
       - fix regressions caused by reusing ident

    - netfilter: revisit array resize logic

    - eth: ice: set max queues in alloc_etherdev_mqs()

  Previous releases - regressions:

    - core: correctly handle tunneled traffic on IPV6_CSUM GSO fallback

    - bluetooth:
       - fix dangling pointer on mgmt_add_adv_patterns_monitor_complete
       - fix deadlock in l2cap_conn_del()

    - sched: codel: fix stale state for empty flows in fq_codel

    - ipv6: remove permanent routes from tb6_gc_hlist when all exceptions expire.

    - xfrm: fix skb_put() panic on non-linear skb during reassembly

    - openvswitch:
       - avoid releasing netdev before teardown completes
       - validate MPLS set/set_masked payload length

    - eth: iavf: fix out-of-bounds writes in iavf_get_ethtool_stats()

  Previous releases - always broken:

    - bluetooth: fix null-ptr-deref on l2cap_sock_ready_cb

    - udp: fix wildcard bind conflict check when using hash2

    - netfilter: fix use of uninitialized rtp_addr in process_sdp

    - tls: Purge async_hold in tls_decrypt_async_wait()

    - xfrm:
       - prevent policy_hthresh.work from racing with netns teardown
       - fix skb leak with espintcp and async crypto

    - smc: fix double-free of smc_spd_priv when tee() duplicates splice pipe buffer

    - can:
       - add missing error handling to call can_ctrlmode_changelink()
       - fix OOB heap access in cgw_csum_crc8_rel()

    - eth:
       - mana: fix use-after-free in add_adev() error path
       - virtio-net: fix for VIRTIO_NET_F_GUEST_HDRLEN
       - bcmasp: fix double free of WoL irq"

* tag 'net-7.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (90 commits)
  net: macb: use the current queue number for stats
  netfilter: ctnetlink: use netlink policy range checks
  netfilter: nf_conntrack_sip: fix use of uninitialized rtp_addr in process_sdp
  netfilter: nf_conntrack_expect: skip expectations in other netns via proc
  netfilter: nf_conntrack_expect: store netns and zone in expectation
  netfilter: ctnetlink: ensure safe access to master conntrack
  netfilter: nf_conntrack_expect: use expect->helper
  netfilter: nf_conntrack_expect: honor expectation helper field
  netfilter: nft_set_rbtree: revisit array resize logic
  netfilter: ip6t_rt: reject oversized addrnr in rt_mt6_check()
  netfilter: nfnetlink_log: fix uninitialized padding leak in NFULA_PAYLOAD
  tls: Purge async_hold in tls_decrypt_async_wait()
  selftests: netfilter: nft_concat_range.sh: add check for flush+reload bug
  netfilter: nft_set_pipapo_avx2: don't return non-matching entry on expiry
  Bluetooth: btusb: clamp SCO altsetting table indices
  Bluetooth: L2CAP: Fix ERTM re-init and zero pdu_len infinite loop
  Bluetooth: L2CAP: Fix deadlock in l2cap_conn_del()
  Bluetooth: btintel: serialize btintel_hw_error() with hci_req_sync_lock
  Bluetooth: L2CAP: Fix send LE flow credits in ACL link
  net: mana: fix use-after-free in add_adev() error path
  ...
2026-03-26 09:53:08 -07:00
Linus Torvalds
75c78a4faa Merge tag 'pinctrl-v7.0-3' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-pinctrl
Pull pin control fixes from Linus Walleij:

 - Implement .get_direction() in the spmi-gpio gpio_chip

   Recent changes makes this start to print warnings and it's not nice,
   let's just fix it

 - Clamp the return value of gpio_get() in the Renesas RZA1 driver

 - Add the GPIO_GENERIC dependency to the STM32 HDP driver

 - Modify the Mediatek driver to accept devices that do not use external
   interrupts (EINT) at all

 - Fix flag propagation in the Sunxi driver, so that we can fix an issue
   with uninitialized pins in a follow-up patch using said flags

* tag 'pinctrl-v7.0-3' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-pinctrl:
  pinctrl: sunxi: fix gpiochip_lock_as_irq() failure when pinmux is unknown
  pinctrl: sunxi: pass down flags to pinctrl routines
  pinctrl: mediatek: common: Fix probe failure for devices without EINT
  pinctrl: stm32: fix HDP driver dependency on GPIO_GENERIC
  pinctrl: renesas: rza1: Normalize return value of gpio_get()
  pinctrl: qcom: spmi-gpio: implement .get_direction()
  pinctrl: renesas: rzt2h: Fix invalid wait context
  pinctrl: renesas: rzt2h: Fix device node leak in rzt2h_gpio_register()
2026-03-26 08:35:51 -07:00
Linus Torvalds
dabb83ecf4 Merge tag 'dma-mapping-7.0-2026-03-25' of git://git.kernel.org/pub/scm/linux/kernel/git/mszyprowski/linux
Pull dma-mapping fixes from Marek Szyprowski:
 "A set of fixes for DMA-mapping subsystem, which resolve false-
  positive warnings from KMSAN and DMA-API debug (Shigeru Yoshida
  and Leon Romanovsky) as well as a simple build fix (Miguel Ojeda)"

* tag 'dma-mapping-7.0-2026-03-25' of git://git.kernel.org/pub/scm/linux/kernel/git/mszyprowski/linux:
  dma-mapping: add missing `inline` for `dma_free_attrs`
  mm/hmm: Indicate that HMM requires DMA coherency
  RDMA/umem: Tell DMA mapping that UMEM requires coherency
  iommu/dma: add support for DMA_ATTR_REQUIRE_COHERENT attribute
  dma-direct: prevent SWIOTLB path when DMA_ATTR_REQUIRE_COHERENT is set
  dma-mapping: Introduce DMA require coherency attribute
  dma-mapping: Clarify valid conditions for CPU cache line overlap
  dma-mapping: handle DMA_ATTR_CPU_CACHE_CLEAN in trace output
  dma-debug: Allow multiple invocations of overlapping entries
  dma: swiotlb: add KMSAN annotations to swiotlb_bounce()
2026-03-26 08:22:07 -07:00
Hao-Yu Yang
190a8c48ff futex: Fix UaF between futex_key_to_node_opt() and vma_replace_policy()
During futex_key_to_node_opt() execution, vma->vm_policy is read under
speculative mmap lock and RCU. Concurrently, mbind() may call
vma_replace_policy() which frees the old mempolicy immediately via
kmem_cache_free().

This creates a race where __futex_key_to_node() dereferences a freed
mempolicy pointer, causing a use-after-free read of mpol->mode.

[  151.412631] BUG: KASAN: slab-use-after-free in __futex_key_to_node (kernel/futex/core.c:349)
[  151.414046] Read of size 2 at addr ffff888001c49634 by task e/87

[  151.415969] Call Trace:

[  151.416732]  __asan_load2 (mm/kasan/generic.c:271)
[  151.416777]  __futex_key_to_node (kernel/futex/core.c:349)
[  151.416822]  get_futex_key (kernel/futex/core.c:374 kernel/futex/core.c:386 kernel/futex/core.c:593)

Fix by adding rcu to __mpol_put().

Fixes: c042c50521 ("futex: Implement FUTEX2_MPOL")
Reported-by: Hao-Yu Yang <naup96721@gmail.com>
Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Hao-Yu Yang <naup96721@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Link: https://patch.msgid.link/20260324174418.GB1850007@noisy.programming.kicks-ass.net
2026-03-26 16:13:48 +01:00
Peter Zijlstra
19f94b3905 futex: Require sys_futex_requeue() to have identical flags
Nicholas reported that his LLM found it was possible to create a UaF
when sys_futex_requeue() is used with different flags. The initial
motivation for allowing different flags was the variable sized futex,
but since that hasn't been merged (yet), simply mandate the flags are
identical, as is the case for the old style sys_futex() requeue
operations.

Fixes: 0f4b5f9722 ("futex: Add sys_futex_requeue()")
Reported-by: Nicholas Carlini <npc@anthropic.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
2026-03-26 16:13:48 +01:00
Claudio Imbrenda
0a28e06575 KVM: s390: Fix KVM_S390_VCPU_FAULT ioctl
A previous commit changed the behaviour of the KVM_S390_VCPU_FAULT
ioctl. The current (wrong) implementation will trigger a guest
addressing exception if the requested address lies outside of a
memslot, unless the VM is UCONTROL.

Restore the previous behaviour by open coding the fault-in logic.

Fixes: 3762e905ec ("KVM: s390: use __kvm_faultin_pfn()")
Acked-by: Christian Borntraeger <borntraeger@linux.ibm.com>
Reviewed-by: Steffen Eiden <seiden@linux.ibm.com>
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
2026-03-26 16:12:38 +01:00
Claudio Imbrenda
a12cc7e3d6 KVM: s390: vsie: Fix guest page tables protection
When shadowing, the guest page tables are write-protected, in order to
trap changes and properly unshadow the shadow mapping for the nested
guest. Already shadowed levels are skipped, so that only the needed
levels are write protected.

Currently the levels that get write protected are exactly one level too
deep: the last level (nested guest memory) gets protected in the wrong
way, and will be protected again correctly a few lines afterwards; most
importantly, the highest non-shadowed level does *not* get write
protected.

Moreover, if the nested guest is running in a real address space, there
are no DAT tables to shadow.

Write protect the correct levels, so that all the levels that need to
be protected are protected, and avoid double protecting the last level;
skip attempting to shadow the DAT tables when the nested guest is
running in a real address space.

Fixes: e38c884df9 ("KVM: s390: Switch to new gmap")
Tested-by: Christian Borntraeger <borntraeger@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
2026-03-26 16:12:34 +01:00
Claudio Imbrenda
19d6c5b804 KVM: s390: vsie: Fix unshadowing while shadowing
If shadowing causes the shadow gmap to get unshadowed, exit early to
prevent an attempt to dereference the parent pointer, which at this
point is NULL.

Opportunistically add some more checks to prevent NULL parents.

Fixes: a2c17f9270 ("KVM: s390: New gmap code")
Fixes: e5f98a6899 ("KVM: s390: Add some helper functions needed for vSIE")
Fixes: e38c884df9 ("KVM: s390: Switch to new gmap")
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
2026-03-26 16:12:30 +01:00
Claudio Imbrenda
0ec456b8a5 KVM: s390: vsie: Fix refcount overflow for shadow gmaps
In most cases gmap_put() was not called when it should have.

Add the missing gmap_put() in vsie_run().

Fixes: e38c884df9 ("KVM: s390: Switch to new gmap")
Reviewed-by: Steffen Eiden <seiden@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
2026-03-26 16:12:25 +01:00
Claudio Imbrenda
fd7bc612cf KVM: s390: vsie: Fix nested guest memory shadowing
Fix _do_shadow_pte() to use the correct pointer (guest pte instead of
nested guest) to set up the new pte.

Add a check to return -EOPNOTSUPP if the mapping for the nested guest
is writeable but the same page in the guest is only read-only.

Fixes: e38c884df9 ("KVM: s390: Switch to new gmap")
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
2026-03-26 16:12:21 +01:00
Claudio Imbrenda
0f2b760a17 KVM: s390: Correctly handle guest mappings without struct page
Introduce a new special softbit for large pages, like already presend
for normal pages, and use it to mark guest mappings that do not have
struct pages.

Whenever a leaf DAT entry becomes dirty, check the special softbit and
only call SetPageDirty() if there is an actual struct page.

Move the logic to mark pages dirty inside _gmap_ptep_xchg() and
_gmap_crstep_xchg_atomic(), to avoid needlessly duplicating the code.

Fixes: 5a74e3d934 ("KVM: s390: KVM-specific bitfields and helper functions")
Fixes: a2c17f9270 ("KVM: s390: New gmap code")
Reviewed-by: Christian Borntraeger <borntraeger@linux.ibm.com>
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
2026-03-26 16:12:18 +01:00
Claudio Imbrenda
45921d0212 KVM: s390: Fix gmap_link()
The slow path of the fault handler ultimately called gmap_link(), which
assumed the fault was a major fault, and blindly called dat_link().

In case of minor faults, things were not always handled properly; in
particular the prefix and vsie marker bits were ignored.

Move dat_link() into gmap.c, renaming it accordingly. Once moved, the
new _gmap_link() function will be able to correctly honour the prefix
and vsie markers.

This will cause spurious unshadows in some uncommon cases.

Fixes: 94fd9b16cc ("KVM: s390: KVM page table management functions: lifecycle management")
Fixes: a2c17f9270 ("KVM: s390: New gmap code")
Reviewed-by: Steffen Eiden <seiden@linux.ibm.com>
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
2026-03-26 16:12:13 +01:00
Claudio Imbrenda
6f93d1ed6f KVM: s390: vsie: Fix check for pre-existing shadow mapping
When shadowing a nested guest, a check is performed and no shadowing is
attempted if the nested guest is already shadowed.

The existing check was incomplete; fix it by also checking whether the
leaf DAT table entry in the existing shadow gmap has the same protection
as the one specified in the guest DAT entry.

Fixes: e38c884df9 ("KVM: s390: Switch to new gmap")
Reviewed-by: Steffen Eiden <seiden@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
2026-03-26 16:12:07 +01:00
Claudio Imbrenda
b827ef02f4 KVM: s390: Remove non-atomic dat_crstep_xchg()
In practice dat_crstep_xchg() is racy and hard to use correctly. Simply
remove it and replace its uses with dat_crstep_xchg_atomic().

This solves some actual races that lead to system hangs / crashes.

Opportunistically fix an alignment issue in _gmap_crstep_xchg_atomic().

Fixes: 589071eaaa ("KVM: s390: KVM page table management functions: clear and replace")
Fixes: 94fd9b16cc ("KVM: s390: KVM page table management functions: lifecycle management")
Reviewed-by: Steffen Eiden <seiden@linux.ibm.com>
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
2026-03-26 16:12:03 +01:00
Biju Das
897cf98926 irqchip/renesas-rzv2h: Fix error path in rzv2h_icu_probe_common()
Replace pm_runtime_put() with pm_runtime_put_sync() when
irq_domain_create_hierarchy() fails to ensure the device suspends
synchronously before devres cleanup disables runtime PM via
pm_runtime_disable().

Fixes: 5ec8cabc3b ("irqchip/renesas-rzv2h: Use devm_pm_runtime_enable()")
Signed-off-by: Biju Das <biju.das.jz@bp.renesas.com>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Link: https://patch.msgid.link/20260323124917.41602-1-biju.das.jz@bp.renesas.com
2026-03-26 16:12:01 +01:00
Claudio Imbrenda
0f54755343 KVM: s390: vsie: Fix dat_split_ste()
If the guest misbehaves and puts the page tables for its nested guest
inside the memory of the nested guest itself, and the guest and nested
guest are being mapped with large pages, the shadow mapping will
lose synchronization with the actual mapping, since this will cause the
large page with the vsie notification bit to be split, but the
vsie notification bit will not be propagated to the resulting small
pages.

Fix this by propagating the vsie_notif bit from large pages to normal
pages when splitting a large page.

Fixes: 2db149a0a6 ("KVM: s390: KVM page table management functions: walks")
Reviewed-by: Christoph Schlameuss <schlameuss@linux.ibm.com>
Reviewed-by: Steffen Eiden <seiden@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
2026-03-26 16:11:58 +01:00
Jassi Brar
cfe02147e8 irqchip/qcom-mpm: Add missing mailbox TX done acknowledgment
The mbox_client for qcom-mpm sends NULL doorbell messages via
mbox_send_message() but never signals TX completion.

Set knows_txdone=true and call mbox_client_txdone() after a successful
send, matching the pattern used by other Qualcomm mailbox clients (smp2p,
smsm, qcom_aoss etc).

Fixes: a6199bb514 "irqchip: Add Qualcomm MPM controller driver"
Signed-off-by: Jassi Brar <jassisinghbrar@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Cc: stable@vger.kernel.org
Link: https://patch.msgid.link/20260322171533.608436-1-jassisinghbrar@gmail.com
2026-03-26 16:11:53 +01:00
Paolo Abeni
db472c34a7 Merge tag 'nf-26-03-26' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf
Pablo Neira Ayuso says:

====================
Netfilter for net

This is v3, I kept back an ipset fix and another to tigthen the xtables
interface to reject invalid combinations with the NFPROTO_ARP family.
They need a bit more discussion. I fixed the issues reported by AI on
patch 9 (add #ifdef to access ct zone, update nf_conntrack_broadcast
and patch 10 (use better Fixes: tag). Thanks!

The following patchset contains Netfilter fixes for *net*.

Note that most bugs fixed here stem from 2.6 days, the large PR is not
due to an increase in regressions.

1) Fix incorrect reject of set updates with nf_tables pipapo set
   avx2 backend.  This comes with a regression test in patch 2.
   From Florian Westphal.

2) nfnetlink_log needs to zero padding to prevent infoleak to userspace,
   from Weiming Shi.

3) xtables ip6t_rt module never validated that addrnr length is within the
   allowed array boundary. Reject bogus values.  From Ren Wei.

4) Fix high memory usage in rbtree set backend that was unwanted side-effect
   of the recently added binary search blob. From Pablo Neira Ayuso.

5) Patches 5 to 10, also from Pablo, address long-standing RCU safety bugs
   in conntracks handling of expectations: We can never safely defer
   a conntrack extension area without holding a reference. Yet expectation
   handling does so in multiple places.  Fix this by avoiding the need to
   look into the master conntrack to begin with and by extending locked
   sections in a few places.

11) Fix use of uninitialized rtp_addr in the sip conntrack helper,
    also from Weiming Shi.

12) Add stricter netlink policy checks in ctnetlink, from David Carlier.
    This avoids undefined behaviour when userspace provides huge wscale
    value.

netfilter pull request 26-03-26

* tag 'nf-26-03-26' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf:
  netfilter: ctnetlink: use netlink policy range checks
  netfilter: nf_conntrack_sip: fix use of uninitialized rtp_addr in process_sdp
  netfilter: nf_conntrack_expect: skip expectations in other netns via proc
  netfilter: nf_conntrack_expect: store netns and zone in expectation
  netfilter: ctnetlink: ensure safe access to master conntrack
  netfilter: nf_conntrack_expect: use expect->helper
  netfilter: nf_conntrack_expect: honor expectation helper field
  netfilter: nft_set_rbtree: revisit array resize logic
  netfilter: ip6t_rt: reject oversized addrnr in rt_mt6_check()
  netfilter: nfnetlink_log: fix uninitialized padding leak in NFULA_PAYLOAD
  selftests: netfilter: nft_concat_range.sh: add check for flush+reload bug
  netfilter: nft_set_pipapo_avx2: don't return non-matching entry on expiry
====================

Link: https://patch.msgid.link/20260326125153.685915-1-pablo@netfilter.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-26 15:38:14 +01:00
David Howells
0e764b9d46 netfs: Fix the handling of stream->front by removing it
The netfs_io_stream::front member is meant to point to the subrequest
currently being collected on a stream, but it isn't actually used this way
by direct write (which mostly ignores it).  However, there's a tracepoint
which looks at it.  Further, stream->front is actually redundant with
stream->subrequests.next.

Fix the potential problem in the direct code by just removing the member
and using stream->subrequests.next instead, thereby also simplifying the
code.

Fixes: a0b4c7a491 ("netfs: Fix unbuffered/DIO writes to dispatch subrequests in strict sequence")
Reported-by: Paulo Alcantara <pc@manguebit.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://patch.msgid.link/4158599.1774426817@warthog.procyon.org.uk
Reviewed-by: Paulo Alcantara (Red Hat) <pc@manguebit.org>
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
2026-03-26 15:18:45 +01:00
Paolo Abeni
deec4f7b41 Merge branch '100GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue
Tony Nguyen says:

====================
For ice:
Michal corrects call to alloc_etherdev_mqs() to provide maximum number
of queues supported rather than currently allocated number of queues.

Petr Oros fixes issues related to some ethtool operations in switchdev
mode.

For iavf:
Kohei Enju corrects number of reported queues for ethtool statistics to
absolute max as using current number could race and cause out-of-bounds
issues.

For idpf:
Josh NULLs cdev_info pointer after freeing to prevent possible subsequent
improper access. He also defers setting of refillqs value until after
allocation to prevent possible NULL pointer dereference.

* '100GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue:
  idpf: only assign num refillqs if allocation was successful
  idpf: clear stale cdev_info ptr
  iavf: fix out-of-bounds writes in iavf_get_ethtool_stats()
  ice: use ice_update_eth_stats() for representor stats
  ice: fix inverted ready check for VF representors
  ice: set max queues in alloc_etherdev_mqs()
====================

Link: https://patch.msgid.link/20260323205843.624704-1-anthony.l.nguyen@intel.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-26 15:14:52 +01:00
Darrick J. Wong
e31c53a806 xfs: remove file_path tracepoint data
The xfile/xmbuf shmem file descriptions are no longer as detailed as
they were when online fsck was first merged, because moving to static
strings in commit 60382993a2 ("xfs: get rid of the
xchk_xfile_*_descr calls") removed a memory allocation and hence a
source of failure.

However this makes encoding the description in the tracepoints sort of a
waste of memory.  David Laight also points out that file_path doesn't
zero the whole buffer which causes exposure of stale trace bytes, and
Steven Rostedt wonders why we're not using a dynamic array for the file
path.

I don't think this is worth fixing, so let's just rip it out.

Cc: rostedt@goodmis.org
Cc: david.laight.linux@gmail.com
Link: https://lore.kernel.org/linux-xfs/20260323172204.work.979-kees@kernel.org/
Cc: stable@vger.kernel.org # v6.11
Fixes: 19ebc8f84e ("xfs: fix file_path handling in tracepoints")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-03-26 14:25:23 +01:00
Darrick J. Wong
70685c291e xfs: don't irele after failing to iget in xfs_attri_recover_work
xlog_recovery_iget* never set @ip to a valid pointer if they return
an error, so this irele will walk off a dangling pointer.  Fix that.

Cc: stable@vger.kernel.org # v6.10
Fixes: ae673f534a ("xfs: record inode generation in xattr update log intent items")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Long Li <leo.lilong@huawei.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-03-26 14:25:06 +01:00
Jens Axboe
b59efde9e6 io_uring/fdinfo: fix SQE_MIXED SQE displaying
When displaying pending SQEs for a MIXED ring, each 128-byte SQE
increments sq_head to skip the second slot, but the loop counter is not
adjusted. This can cause the loop to read past sq_tail by one entry for
each 128-byte SQE encountered, displaying SQEs that haven't been made
consumable yet by the application.

Match the kernel's own consumption logic in io_init_req() which
decrements what's left when consuming the extra slot.

Fixes: 1cba30bf9f ("io_uring: add support for IORING_SETUP_SQE_MIXED")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2026-03-26 07:15:55 -06:00
Paolo Valerio
72d96e4e24 net: macb: use the current queue number for stats
There's a potential mismatch between the memory reserved for statistics
and the amount of memory written.

gem_get_sset_count() correctly computes the number of stats based on the
active queues, whereas gem_get_ethtool_stats() indiscriminately copies
data using the maximum number of queues, and in the case the number of
active queues is less than MACB_MAX_QUEUES, this results in a OOB write
as observed in the KASAN splat.

==================================================================
BUG: KASAN: vmalloc-out-of-bounds in gem_get_ethtool_stats+0x54/0x78
  [macb]
Write of size 760 at addr ffff80008080b000 by task ethtool/1027

CPU: [...]
Tainted: [E]=UNSIGNED_MODULE
Hardware name: raspberrypi rpi/rpi, BIOS 2025.10 10/01/2025
Call trace:
 show_stack+0x20/0x38 (C)
 dump_stack_lvl+0x80/0xf8
 print_report+0x384/0x5e0
 kasan_report+0xa0/0xf0
 kasan_check_range+0xe8/0x190
 __asan_memcpy+0x54/0x98
 gem_get_ethtool_stats+0x54/0x78 [macb
   926c13f3af83b0c6fe64badb21ec87d5e93fcf65]
 dev_ethtool+0x1220/0x38c0
 dev_ioctl+0x4ac/0xca8
 sock_do_ioctl+0x170/0x1d8
 sock_ioctl+0x484/0x5d8
 __arm64_sys_ioctl+0x12c/0x1b8
 invoke_syscall+0xd4/0x258
 el0_svc_common.constprop.0+0xb4/0x240
 do_el0_svc+0x48/0x68
 el0_svc+0x40/0xf8
 el0t_64_sync_handler+0xa0/0xe8
 el0t_64_sync+0x1b0/0x1b8

The buggy address belongs to a 1-page vmalloc region starting at
  0xffff80008080b000 allocated at dev_ethtool+0x11f0/0x38c0
The buggy address belongs to the physical page:
page: refcount:1 mapcount:0 mapping:0000000000000000
  index:0xffff00000a333000 pfn:0xa333
flags: 0x7fffc000000000(node=0|zone=0|lastcpupid=0x1ffff)
raw: 007fffc000000000 0000000000000000 dead000000000122 0000000000000000
raw: ffff00000a333000 0000000000000000 00000001ffffffff 0000000000000000
page dumped because: kasan: bad access detected

Memory state around the buggy address:
 ffff80008080b080: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
 ffff80008080b100: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
>ffff80008080b180: 00 00 00 00 00 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8
                                  ^
 ffff80008080b200: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8
 ffff80008080b280: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8
==================================================================

Fix it by making sure the copied size only considers the active number of
queues.

Fixes: 512286bbd4 ("net: macb: Added some queue statistics")
Signed-off-by: Paolo Valerio <pvalerio@redhat.com>
Reviewed-by: Nicolai Buchwitz <nb@tipi-net.de>
Link: https://patch.msgid.link/20260323191634.2185840-1-pvalerio@redhat.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-26 13:48:21 +01:00
Paolo Abeni
aa637b2cf3 Merge tag 'for-net-2026-03-25' of git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth
Luiz Augusto von Dentz says:

====================
bluetooth pull request for net:

 - L2CAP: Fix deadlock in l2cap_conn_del()
 - L2CAP: Fix ERTM re-init and zero pdu_len infinite loop
 - L2CAP: Fix send LE flow credits in ACL link
 - btintel: serialize btintel_hw_error() with hci_req_sync_lock
 - btusb: clamp SCO altsetting table indices

* tag 'for-net-2026-03-25' of git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth:
  Bluetooth: btusb: clamp SCO altsetting table indices
  Bluetooth: L2CAP: Fix ERTM re-init and zero pdu_len infinite loop
  Bluetooth: L2CAP: Fix deadlock in l2cap_conn_del()
  Bluetooth: btintel: serialize btintel_hw_error() with hci_req_sync_lock
  Bluetooth: L2CAP: Fix send LE flow credits in ACL link
====================

Link: https://patch.msgid.link/20260325194358.618892-1-luiz.dentz@gmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-26 13:46:55 +01:00
Alex Williamson
e98137f0a8 vfio/pci: Fix double free in dma-buf feature
The error path through vfio_pci_core_feature_dma_buf() ignores its
own advice to only use dma_buf_put() after dma_buf_export(), instead
falling through the entire unwind chain.  In the unlikely event that
we encounter file descriptor exhaustion, this can result in an
unbalanced refcount on the vfio device and double free of allocated
objects.

Avoid this by moving the "put" directly into the error path and return
the errno rather than entering the unwind chain.

Reported-by: Renato Marziano <renato@marziano.top>
Fixes: 5d74781ebc ("vfio/pci: Add dma-buf export support for MMIO regions")
Cc: stable@vger.kernel.org
Acked-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Alex Williamson <alex.williamson@nvidia.com>
Link: https://lore.kernel.org/r/20260323215659.2108191-3-alex.williamson@nvidia.com
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Alex Williamson <alex@shazbot.org>
2026-03-26 06:38:27 -06:00
David Carlier
8f15b5071b netfilter: ctnetlink: use netlink policy range checks
Replace manual range and mask validations with netlink policy
annotations in ctnetlink code paths, so that the netlink core rejects
invalid values early and can generate extack errors.

- CTA_PROTOINFO_TCP_STATE: reject values > TCP_CONNTRACK_SYN_SENT2 at
  policy level, removing the manual >= TCP_CONNTRACK_MAX check.
- CTA_PROTOINFO_TCP_WSCALE_ORIGINAL/REPLY: reject values > TCP_MAX_WSCALE
  (14). The normal TCP option parsing path already clamps to this value,
  but the ctnetlink path accepted 0-255, causing undefined behavior when
  used as a u32 shift count.
- CTA_FILTER_ORIG_FLAGS/REPLY_FLAGS: use NLA_POLICY_MASK with
  CTA_FILTER_F_ALL, removing the manual mask checks.
- CTA_EXPECT_FLAGS: use NLA_POLICY_MASK with NF_CT_EXPECT_MASK, adding
  a new mask define grouping all valid expect flags.

Extracted from a broader nf-next patch by Florian Westphal, scoped to
ctnetlink for the fixes tree.

Fixes: c8e2078cfe ("[NETFILTER]: ctnetlink: add support for internal tcp connection tracking flags handling")
Signed-off-by: David Carlier <devnexen@gmail.com>
Co-developed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2026-03-26 13:28:17 +01:00
Weiming Shi
6a2b724460 netfilter: nf_conntrack_sip: fix use of uninitialized rtp_addr in process_sdp
process_sdp() declares union nf_inet_addr rtp_addr on the stack and
passes it to the nf_nat_sip sdp_session hook after walking the SDP
media descriptions. However rtp_addr is only initialized inside the
media loop when a recognized media type with a non-zero port is found.

If the SDP body contains no m= lines, only inactive media sections
(m=audio 0 ...) or only unrecognized media types, rtp_addr is never
assigned. Despite that, the function still calls hooks->sdp_session()
with &rtp_addr, causing nf_nat_sdp_session() to format the stale stack
value as an IP address and rewrite the SDP session owner and connection
lines with it.

With CONFIG_INIT_STACK_ALL_ZERO (default on most distributions) this
results in the session-level o= and c= addresses being rewritten to
0.0.0.0 for inactive SDP sessions. Without stack auto-init the
rewritten address is whatever happened to be on the stack.

Fix this by pre-initializing rtp_addr from the session-level connection
address (caddr) when available, and tracking via a have_rtp_addr flag
whether any valid address was established. Skip the sdp_session hook
entirely when no valid address exists.

Fixes: 4ab9e64e5e ("[NETFILTER]: nf_nat_sip: split up SDP mangling")
Reported-by: Xiang Mei <xmei5@asu.edu>
Signed-off-by: Weiming Shi <bestswngs@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2026-03-26 13:28:17 +01:00
Pablo Neira Ayuso
3db5647984 netfilter: nf_conntrack_expect: skip expectations in other netns via proc
Skip expectations that do not reside in this netns.

Similar to e77e6ff502 ("netfilter: conntrack: do not dump other netns's
conntrack entries via proc").

Fixes: 9b03f38d04 ("netfilter: netns nf_conntrack: per-netns expectations")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2026-03-26 13:28:03 +01:00
Marc Kleine-Budde
b341c1176f spi: spi-fsl-lpspi: fix teardown order issue (UAF)
There is a teardown order issue in the driver. The SPI controller is
registered using devm_spi_register_controller(), which delays
unregistration of the SPI controller until after the fsl_lpspi_remove()
function returns.

As the fsl_lpspi_remove() function synchronously tears down the DMA
channels, a running SPI transfer triggers the following NULL pointer
dereference due to use after free:

| fsl_lpspi 42550000.spi: I/O Error in DMA RX
| Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
[...]
| Call trace:
|  fsl_lpspi_dma_transfer+0x260/0x340 [spi_fsl_lpspi]
|  fsl_lpspi_transfer_one+0x198/0x448 [spi_fsl_lpspi]
|  spi_transfer_one_message+0x49c/0x7c8
|  __spi_pump_transfer_message+0x120/0x420
|  __spi_sync+0x2c4/0x520
|  spi_sync+0x34/0x60
|  spidev_message+0x20c/0x378 [spidev]
|  spidev_ioctl+0x398/0x750 [spidev]
[...]

Switch from devm_spi_register_controller() to spi_register_controller() in
fsl_lpspi_probe() and add the corresponding spi_unregister_controller() in
fsl_lpspi_remove().

Fixes: 5314987de5 ("spi: imx: add lpspi bus driver")
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Link: https://patch.msgid.link/20260319-spi-fsl-lpspi-fixes-v1-1-b433e435b2d8@pengutronix.de
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-26 12:27:42 +00:00
Pablo Neira Ayuso
02a3231b6d netfilter: nf_conntrack_expect: store netns and zone in expectation
__nf_ct_expect_find() and nf_ct_expect_find_get() are called under
rcu_read_lock() but they dereference the master conntrack via
exp->master.

Since the expectation does not hold a reference on the master conntrack,
this could be dying conntrack or different recycled conntrack than the
real master due to SLAB_TYPESAFE_RCU.

Store the netns, the master_tuple and the zone in struct
nf_conntrack_expect as a safety measure.

This patch is required by the follow up fix not to dump expectations
that do not belong to this netns.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2026-03-26 13:24:40 +01:00
Pablo Neira Ayuso
bffcaad9af netfilter: ctnetlink: ensure safe access to master conntrack
Holding reference on the expectation is not sufficient, the master
conntrack object can just go away, making exp->master invalid.

To access exp->master safely:

- Grab the nf_conntrack_expect_lock, this gets serialized with
  clean_from_lists() which also holds this lock when the master
  conntrack goes away.

- Hold reference on master conntrack via nf_conntrack_find_get().
  Not so easy since the master tuple to look up for the master conntrack
  is not available in the existing problematic paths.

This patch goes for extending the nf_conntrack_expect_lock section
to address this issue for simplicity, in the cases that are described
below this is just slightly extending the lock section.

The add expectation command already holds a reference to the master
conntrack from ctnetlink_create_expect().

However, the delete expectation command needs to grab the spinlock
before looking up for the expectation. Expand the existing spinlock
section to address this to cover the expectation lookup. Note that,
the nf_ct_expect_iterate_net() calls already grabs the spinlock while
iterating over the expectation table, which is correct.

The get expectation command needs to grab the spinlock to ensure master
conntrack does not go away. This also expands the existing spinlock
section to cover the expectation lookup too. I needed to move the
netlink skb allocation out of the spinlock to keep it GFP_KERNEL.

For the expectation events, the IPEXP_DESTROY event is already delivered
under the spinlock, just move the delivery of IPEXP_NEW under the
spinlock too because the master conntrack event cache is reached through
exp->master.

While at it, add lockdep notations to help identify what codepaths need
to grab the spinlock.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2026-03-26 13:18:32 +01:00
Pablo Neira Ayuso
f017941060 netfilter: nf_conntrack_expect: use expect->helper
Use expect->helper in ctnetlink and /proc to dump the helper name.
Using nfct_help() without holding a reference to the master conntrack
is unsafe.

Use exp->master->helper in ctnetlink path if userspace does not provide
an explicit helper when creating an expectation to retain the existing
behaviour. The ctnetlink expectation path holds the reference on the
master conntrack and nf_conntrack_expect lock and the nfnetlink glue
path refers to the master ct that is attached to the skb.

Reported-by: Hyunwoo Kim <imv4bel@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2026-03-26 13:18:31 +01:00
Pablo Neira Ayuso
9c42bc9db9 netfilter: nf_conntrack_expect: honor expectation helper field
The expectation helper field is mostly unused. As a result, the
netfilter codebase relies on accessing the helper through exp->master.

Always set on the expectation helper field so it can be used to reach
the helper.

nf_ct_expect_init() is called from packet path where the skb owns
the ct object, therefore accessing exp->master for the newly created
expectation is safe. This saves a lot of updates in all callsites
to pass the ct object as parameter to nf_ct_expect_init().

This is a preparation patches for follow up fixes.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2026-03-26 13:18:31 +01:00
Pablo Neira Ayuso
fafdd92b9e netfilter: nft_set_rbtree: revisit array resize logic
Chris Arges reports high memory consumption with thousands of
containers, this patch revisits the array allocation logic.

For anonymous sets, start by 16 slots (which takes 256 bytes on x86_64).
Expand it by x2 until threshold of 512 slots is reached, over that
threshold, expand it by x1.5.

For non-anonymous set, start by 1024 slots in the array (which takes 16
Kbytes initially on x86_64). Expand it by x1.5.

Use set->ndeact to subtract deactivated elements when calculating the
number of the slots in the array, otherwise the array size array gets
increased artifically. Add special case shrink logic to deal with flush
set too.

The shrink logic is skipped by anonymous sets.

Use check_add_overflow() to calculate the new array size.

Add a WARN_ON_ONCE check to make sure elements fit into the new array
size.

Reported-by: Chris Arges <carges@cloudflare.com>
Fixes: 7e43e0a114 ("netfilter: nft_set_rbtree: translate rbtree to array for binary search")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2026-03-26 13:18:31 +01:00
9d3f027327 netfilter: ip6t_rt: reject oversized addrnr in rt_mt6_check()
Reject rt match rules whose addrnr exceeds IP6T_RT_HOPS.

rt_mt6() expects addrnr to stay within the bounds of rtinfo->addrs[].
Validate addrnr during rule installation so malformed rules are rejected
before the match logic can use an out-of-range value.

Fixes: 1da177e4c3 ("Linux-2.6.12-rc2")
Reported-by: Yifan Wu <yifanwucs@gmail.com>
Reported-by: Juefei Pu <tomapufckgml@gmail.com>
Co-developed-by: Yuan Tan <yuantan098@gmail.com>
Signed-off-by: Yuan Tan <yuantan098@gmail.com>
Suggested-by: Xin Liu <bird@lzu.edu.cn>
Tested-by: Yuhang Zheng <z1652074432@gmail.com>
Signed-off-by: Ren Wei <n05ec@lzu.edu.cn>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2026-03-26 13:18:31 +01:00
Weiming Shi
52025ebaa2 netfilter: nfnetlink_log: fix uninitialized padding leak in NFULA_PAYLOAD
__build_packet_message() manually constructs the NFULA_PAYLOAD netlink
attribute using skb_put() and skb_copy_bits(), bypassing the standard
nla_reserve()/nla_put() helpers. While nla_total_size(data_len) bytes
are allocated (including NLA alignment padding), only data_len bytes
of actual packet data are copied. The trailing nla_padlen(data_len)
bytes (1-3 when data_len is not 4-byte aligned) are never initialized,
leaking stale heap contents to userspace via the NFLOG netlink socket.

Replace the manual attribute construction with nla_reserve(), which
handles the tailroom check, header setup, and padding zeroing via
__nla_reserve(). The subsequent skb_copy_bits() fills in the payload
data on top of the properly initialized attribute.

Fixes: df6fb868d6 ("[NETFILTER]: nfnetlink: convert to generic netlink attribute functions")
Reported-by: Xiang Mei <xmei5@asu.edu>
Signed-off-by: Weiming Shi <bestswngs@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2026-03-26 13:15:46 +01:00
Sakari Ailus
7587fbf5ad media: ccs: Avoid deadlock in ccs_init_state()
The sub-device state lock has been already acquired when ccs_init_state()
is called. Do not try to acquire it again.

Reported-by: David Heidelberg <david@ixit.cz>
Fixes: a88883d120 ("media: ccs: Rely on sub-device state locking")
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Hans Verkuil <hverkuil+cisco@kernel.org>
2026-03-26 13:14:07 +01:00
Ricardo Ribalda
7c39f48568 media: uvcvideo: Fix bug in error path of uvc_alloc_urb_buffers
Recent cleanup introduced a bug in the error path of
uvc_alloc_urb_buffers(). If there is not enough memory for the
allocation the following error will be triggered:

[  739.196672] UBSAN: shift-out-of-bounds in mm/page_alloc.c:1403:22
[  739.196710] shift exponent 52 is too large for 32-bit type 'int'

Resulting in:
[  740.464422] BUG: unable to handle page fault for address: fffffac1c0800000

The reason for the bug is that usb_free_noncoherent is called with an
invalid size (0) instead of the actual size of the urb.

This patch takes care of that.

Reported-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
Closes: https://lore.kernel.org/linux-media/abycbXzYupZpGkvR@hyeyoo/T/#t
Tested-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
Fixes: c824345288 ("media: uvcvideo: Pass allocation size directly to uvc_alloc_urb_buffer")
Signed-off-by: Ricardo Ribalda <ribalda@chromium.org>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Link: https://patch.msgid.link/20260320-uvc-urb-free-error-v1-1-b12cc3762a19@chromium.org
Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil+cisco@kernel.org>
2026-03-26 13:14:07 +01:00
Peter Ujfalusi
d40a198e2b ASoC: SOF: ipc4-topology: Allow bytes controls without initial payload
It is unexpected, but allowed to have no initial payload for a bytes
control and the code is prepared to handle this case, but the size check
missed this corner case.

Update the check for minimal size to allow the initial size to be 0.

Cc: stable@vger.kernel.org
Fixes: a653820700 ("ASoC: SOF: ipc4-topology: Correct the allocation size for bytes controls")
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Liam Girdwood <liam.r.girdwood@intel.com>
Reviewed-by: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
Reviewed-by: Kai Vehmanen <kai.vehmanen@linux.intel.com>
Link: https://patch.msgid.link/20260326075618.1603-1-peter.ujfalusi@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-26 11:16:03 +00:00
Johan Hovold
8d2e0cb322 spi: fix use-after-free on managed registration failure
The SPI API is asymmetric and the controller is freed as part of
deregistration (unless it has been allocated using
devm_spi_alloc_host/target()).

A recent change converting the managed registration function to use
devm_add_action_or_reset() inadvertently introduced a (mostly
theoretical) regression where a non-devres managed controller could be
freed as part of failed registration. This in turn would lead to
use-after-free in controller driver error paths.

Fix this by taking another reference before calling
devm_add_action_or_reset() and not releasing it on errors for
non-devres allocated controllers.

An alternative would be a partial revert of the offending commit, but
it is better to handle this explicitly until the API has been fixed
(e.g. see 5e844cc37a ("spi: Introduce device-managed SPI controller
allocation")).

Fixes: b6376dbed8 ("spi: Simplify devm_spi_*_controller()")
Reported-by: Felix Gu <ustc.gu@gmail.com>
Link: https://lore.kernel.org/all/20260324145548.139952-1-ustc.gu@gmail.com/
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Johan Hovold <johan@kernel.org>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://patch.msgid.link/20260325145319.1132072-1-johan@kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-26 10:45:28 +00:00
Mark Brown
c6eea4ff84 ASoC: adau1372: Fix error handling in adau1372_set_power()
Jihed Chaibi <jihed.chaibi.dev@gmail.com> says:

adau1372_set_power() had two related error handling issues in its enable
path: clk_prepare_enable() was called but its return value discarded, and
adau1372_enable_pll() was a void function that silently swallowed lock
failures, leaving mclk enabled and adau1372->enabled set to true despite
the device being in a broken state.

Patch 1 fixes the unchecked clk_prepare_enable() by making
adau1372_set_power() return int and propagating the error.

Patch 2 converts adau1372_enable_pll() to return int and adds a full
unwind in adau1372_set_power() if PLL lock fails, reversing the regcache,
GPIO power-down, and clock state.
2026-03-26 10:33:38 +00:00
Jihed Chaibi
bfe6a264ef ASoC: adau1372: Fix clock leak on PLL lock failure
adau1372_enable_pll() was a void function that logged a dev_err() on
PLL lock timeout but did not propagate the error. As a result,
adau1372_set_power() would continue with adau1372->enabled set to true
despite the PLL being unlocked, and the mclk left enabled with no
corresponding disable on the error path.

Convert adau1372_enable_pll() to return int, using -ETIMEDOUT on lock
timeout and propagating regmap errors directly. In adau1372_set_power(),
check the return value and unwind in reverse order: restore regcache to
cache-only mode, reassert GPIO power-down, and disable the clock before
returning the error.

Signed-off-by: Jihed Chaibi <jihed.chaibi.dev@gmail.com>
Fixes: 6cd4c6459e ("ASoC: Add ADAU1372 audio CODEC support")
Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Link: https://patch.msgid.link/20260325210704.76847-3-jihed.chaibi.dev@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-26 10:33:36 +00:00
Jihed Chaibi
326fe8104a ASoC: adau1372: Fix unchecked clk_prepare_enable() return value
adau1372_set_power() calls clk_prepare_enable() but discards the return
value. If the clock enable fails, the driver proceeds to access registers
on unpowered hardware, potentially causing silent corruption.

Make adau1372_set_power() return int and propagate the error from
clk_prepare_enable(). Update adau1372_set_bias_level() to return the
error directly for the STANDBY and OFF cases.

Signed-off-by: Jihed Chaibi <jihed.chaibi.dev@gmail.com>
Fixes: 6cd4c6459e ("ASoC: Add ADAU1372 audio CODEC support")
Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Link: https://patch.msgid.link/20260325210704.76847-2-jihed.chaibi.dev@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-26 10:33:35 +00:00
Chuck Lever
84a8335d83 tls: Purge async_hold in tls_decrypt_async_wait()
The async_hold queue pins encrypted input skbs while
the AEAD engine references their scatterlist data. Once
tls_decrypt_async_wait() returns, every AEAD operation
has completed and the engine no longer references those
skbs, so they can be freed unconditionally.

A subsequent patch adds batch async decryption to
tls_sw_read_sock(), introducing a new call site that
must drain pending AEAD operations and release held
skbs. Move __skb_queue_purge(&ctx->async_hold) into
tls_decrypt_async_wait() so the purge is centralized
and every caller -- recvmsg's drain path, the -EBUSY
fallback in tls_do_decryption(), and the new read_sock
batch path -- releases held skbs on synchronization
without each site managing the purge independently.

This fixes a leak when tls_strp_msg_hold() fails part-way through,
after having added some cloned skbs to the async_hold
queue. tls_decrypt_sg() will then call tls_decrypt_async_wait() to
process all pending decrypts, and drop back to synchronous mode, but
tls_sw_recvmsg() only flushes the async_hold queue when one record has
been processed in "fully-async" mode, which may not be the case here.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reported-by: Yiming Qian <yimingqian591@gmail.com>
Fixes: b8a6ff84ab ("tls: wait for pending async decryptions if tls_strp_msg_hold fails")
Link: https://patch.msgid.link/20260324-tls-read-sock-v5-1-5408befe5774@oracle.com
[pabeni@redhat.com: added leak comment]
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-26 09:55:53 +01:00
Marc Buerg
f63a9df7e3 sysctl: fix uninitialized variable in proc_do_large_bitmap
proc_do_large_bitmap() does not initialize variable c, which is expected
to be set to a trailing character by proc_get_long().

However, proc_get_long() only sets c when the input buffer contains a
trailing character after the parsed value.

If c is not initialized it may happen to contain a '-'. If this is the
case proc_do_large_bitmap() expects to be able to parse a second part of
the input buffer. If there is no second part an unjustified -EINVAL will
be returned.

Initialize c to 0 to prevent returning -EINVAL on valid input.

Fixes: 9f977fb7ae ("sysctl: add proc_do_large_bitmap")
Signed-off-by: Marc Buerg <buermarc@googlemail.com>
Reviewed-by: Joel Granados <joel.granados@kernel.org>
Signed-off-by: Joel Granados <joel.granados@kernel.org>
2026-03-26 09:32:19 +01:00
GuoHan Zhao
cd7e1fef5a xen/privcmd: unregister xenstore notifier on module exit
Commit 453b8fb68f ("xen/privcmd: restrict usage in
unprivileged domU") added a xenstore notifier to defer setting the
restriction target until Xenstore is ready.

XEN_PRIVCMD can be built as a module, but privcmd_exit() leaves that
notifier behind. Balance the notifier lifecycle by unregistering it on
module exit.

This is harmless even if xenstore was already ready at registration
time and the notifier was never queued on the chain.

Fixes: 453b8fb68f ("xen/privcmd: restrict usage in unprivileged domU")
Signed-off-by: GuoHan Zhao <zhaoguohan@kylinos.cn>
Reviewed-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Message-ID: <20260325120246.252899-1-zhaoguohan@kylinos.cn>
2026-03-26 08:57:51 +01:00
Bibo Mao
6bcfb7f46d LoongArch: KVM: Fix base address calculation in kvm_eiointc_regs_access()
In function kvm_eiointc_regs_access(), the register base address is
caculated from array base address plus offset, the offset is absolute
value from the base address. The data type of array base address is
u64, it should be converted into the "void *" type and then plus the
offset.

Cc: <stable@vger.kernel.org>
Fixes: d3e43a1f34 ("LoongArch: KVM: Use 64-bit register definition for EIOINTC").
Reported-by: Aurelien Jarno <aurel32@debian.org>
Link: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1131431
Signed-off-by: Bibo Mao <maobibo@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2026-03-26 14:29:09 +08:00
Huacai Chen
b97bd69eb0 LoongArch: KVM: Handle the case that EIOINTC's coremap is empty
EIOINTC's coremap in eiointc_update_sw_coremap() can be empty, currently
we get a cpuid with -1 in this case, but we actually need 0 because it's
similar as the case that cpuid >= 4.

This fix an out-of-bounds access to kvm_arch::phyid_map::phys_map[].

Cc: <stable@vger.kernel.org>
Fixes: 3956a52bc0 ("LoongArch: KVM: Add EIOINTC read and write functions")
Reported-by: Aurelien Jarno <aurel32@debian.org>
Link: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1131431
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2026-03-26 14:29:09 +08:00
Huacai Chen
2db06c15d8 LoongArch: KVM: Make kvm_get_vcpu_by_cpuid() more robust
kvm_get_vcpu_by_cpuid() takes a cpuid parameter whose type is int, so
cpuid can be negative. Let kvm_get_vcpu_by_cpuid() return NULL for this
case so as to make it more robust.

This fix an out-of-bounds access to kvm_arch::phyid_map::phys_map[].

Cc: <stable@vger.kernel.org>
Fixes: 73516e9da5 ("LoongArch: KVM: Add vcpu mapping from physical cpuid")
Reported-by: Aurelien Jarno <aurel32@debian.org>
Link: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1131431
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2026-03-26 14:29:09 +08:00
Xi Ruoyao
e4878c37f6 LoongArch: vDSO: Emit GNU_EH_FRAME correctly
With -fno-asynchronous-unwind-tables and --no-eh-frame-hdr (the default
of the linker), the GNU_EH_FRAME segment (specified by vdso.lds.S) is
empty.  This is not valid, as the current DWARF specification mandates
the first byte of the EH frame to be the version number 1.  It causes
some unwinders to complain, for example the ClickHouse query profiler
spams the log with messages:

    clickhouse-server[365854]: libunwind: unsupported .eh_frame_hdr
    version: 127 at 7ffffffb0000

Here "127" is just the byte located at the p_vaddr (0, i.e. the
beginning of the vDSO) of the empty GNU_EH_FRAME segment. Cross-
checking with /proc/365854/maps has also proven 7ffffffb0000 is the
start of vDSO in the process VM image.

In LoongArch the -fno-asynchronous-unwind-tables option seems just a
MIPS legacy, and MIPS only uses this option to satisfy the MIPS-specific
"genvdso" program, per the commit cfd75c2db1 ("MIPS: VDSO: Explicitly
use -fno-asynchronous-unwind-tables").  IIRC it indicates some inherent
limitation of the MIPS ELF ABI and has nothing to do with LoongArch.  So
we can simply flip it over to -fasynchronous-unwind-tables and pass
--eh-frame-hdr for linking the vDSO, allowing the profilers to unwind the
stack for statistics even if the sample point is taken when the PC is in
the vDSO.

However simply adjusting the options above would exploit an issue: when
the libgcc unwinder saw the invalid GNU_EH_FRAME segment, it silently
falled back to a machine-specific routine to match the code pattern of
rt_sigreturn() and extract the registers saved in the sigframe if the
code pattern is matched.  As unwinding from signal handlers is vital for
libgcc to support pthread cancellation etc., the fall-back routine had
been silently keeping the LoongArch Linux systems functioning since
Linux 5.19.  But when we start to emit GNU_EH_FRAME with the correct
format, fall-back routine will no longer be used and libgcc will fail
to unwind the sigframe, and unwinding from signal handlers will no
longer work, causing dozens of glibc test failures.  To make it possible
to unwind from signal handlers again, it's necessary to code the unwind
info in __vdso_rt_sigreturn via .cfi_* directives.

The offsets in the .cfi_* directives depend on the layout of struct
sigframe, notably the offset of sigcontext in the sigframe.  To use the
offset in the assembly file, factor out struct sigframe into a header to
allow asm-offsets.c to output the offset for assembly.

To work around a long-term issue in the libgcc unwinder (the pc is
unconditionally substracted by 1: doing so is technically incorrect for
a signal frame), a nop instruction is included with the two real
instructions in __vdso_rt_sigreturn in the same FDE PC range.  The same
hack has been used on x86 for a long time.

Cc: stable@vger.kernel.org
Fixes: c6b99bed6b ("LoongArch: Add VDSO and VSYSCALL support")
Signed-off-by: Xi Ruoyao <xry111@xry111.site>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2026-03-26 14:29:09 +08:00
Huacai Chen
95db0c9f52 LoongArch: Workaround LS2K/LS7A GPU DMA hang bug
1. Hardware limitation: GPU, DC and VPU are typically PCI device 06.0,
06.1 and 06.2. They share some hardware resources, so when configure the
PCI 06.0 device BAR1, DMA memory access cannot be performed through this
BAR, otherwise it will cause hardware abnormalities.

2. In typical scenarios of reboot or S3/S4, DC access to memory through
BAR is not prohibited, resulting in GPU DMA hangs.

3. Workaround method: When configuring the 06.0 device BAR1, turn off
the memory access of DC, GPU and VPU (via DC's CRTC registers).

Cc: stable@vger.kernel.org
Signed-off-by: Qianhai Wu <wuqianhai@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2026-03-26 14:29:09 +08:00
Li Jun
3a28daa9b7 LoongArch: Fix missing NULL checks for kstrdup()
1. Replace "of_find_node_by_path("/")" with "of_root" to avoid multiple
calls to "of_node_put()".

2. Fix a potential kernel oops during early boot when memory allocation
fails while parsing CPU model from device tree.

Cc: stable@vger.kernel.org
Signed-off-by: Li Jun <lijun01@kylinos.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2026-03-26 14:29:08 +08:00
Linus Torvalds
0138af2472 Merge tag 'erofs-for-7.0-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs
Pull erofs fixes from Gao Xiang:

 - Mark I/Os as failed when encountering short reads on file-backed
   mounts

 - Label GFP_NOIO in the BIO completion when the completion is in the
   process context, and directly call into the decompression to avoid
   deadlocks

 - Improve Kconfig descriptions to better highlight the overall efforts

 - Fix .fadvise() for page cache sharing

* tag 'erofs-for-7.0-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs:
  erofs: fix .fadvise() for page cache sharing
  erofs: update the Kconfig description
  erofs: add GFP_NOIO in the bio completion if needed
  erofs: set fileio bio failed in short read case
2026-03-25 18:41:35 -07:00
Linus Torvalds
aba9da0905 Merge tag 'rcu-fixes.v7.0-20260325a' of git://git.kernel.org/pub/scm/linux/kernel/git/rcu/linux
Pull RCU fixes from Boqun Feng:
 "Fix a regression introduced by commit c27cea4416 ("rcu: Re-implement
  RCU Tasks Trace in terms of SRCU-fast"): BPF contexts can run with
  preemption disabled or scheduler locks held, so call_srcu() must work
  in all such contexts.

  Fix this by converting SRCU's spinlocks to raw spinlocks and avoiding
  scheduler lock acquisition in call_srcu() by deferring to an irq_work
  (similar to call_rcu_tasks_generic()), for both tree SRCU and tiny
  SRCU.

  Also fix a follow-on lockdep splat caused by srcu_node allocation
  under the newly introduced raw spinlock by deferring the allocation to
  grace-period worker context"

* tag 'rcu-fixes.v7.0-20260325a' of git://git.kernel.org/pub/scm/linux/kernel/git/rcu/linux:
  srcu: Use irq_work to start GP in tiny SRCU
  rcu: Use an intermediate irq_work to start process_srcu()
  srcu: Push srcu_node allocation to GP when non-preemptible
  srcu: Use raw spinlocks so call_srcu() can be used under preempt_disable()
2026-03-25 18:14:19 -07:00
Tejun Heo
4c56a8ac68 cgroup: Fix cgroup_drain_dying() testing the wrong condition
cgroup_drain_dying() was using cgroup_is_populated() to test whether there are
dying tasks to wait for. cgroup_is_populated() tests nr_populated_csets,
nr_populated_domain_children and nr_populated_threaded_children, but
cgroup_drain_dying() only needs to care about this cgroup's own tasks - whether
there are children is cgroup_destroy_locked()'s concern.

This caused hangs during shutdown. When systemd tried to rmdir a cgroup that had
no direct tasks but had a populated child, cgroup_drain_dying() would enter its
wait loop because cgroup_is_populated() was true from
nr_populated_domain_children. The task iterator found nothing to wait for, yet
the populated state never cleared because it was driven by live tasks in the
child cgroup.

Fix it by using cgroup_has_tasks() which only tests nr_populated_csets.

v3: Fix cgroup_is_populated() -> cgroup_has_tasks() (Sebastian).

v2: https://lore.kernel.org/r/20260323200205.1063629-1-tj@kernel.org

Reported-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Fixes: 1b164b876c ("cgroup: Wait for dying tasks to leave on rmdir")
Signed-off-by: Tejun Heo <tj@kernel.org>
Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
2026-03-25 14:08:04 -10:00
Namjae Jeon
beef2634f8 ksmbd: fix potencial OOB in get_file_all_info() for compound requests
When a compound request consists of QUERY_DIRECTORY + QUERY_INFO
(FILE_ALL_INFORMATION) and the first command consumes nearly the entire
max_trans_size, get_file_all_info() would blindly call smbConvertToUTF16()
with PATH_MAX, causing out-of-bounds write beyond the response buffer.
In get_file_all_info(), there was a missing validation check for
the client-provided OutputBufferLength before copying the filename into
FileName field of the smb2_file_all_info structure.
If the filename length exceeds the available buffer space, it could lead to
potential buffer overflows or memory corruption during smbConvertToUTF16
conversion. This calculating the actual free buffer size using
smb2_calc_max_out_buf_len() and returning -EINVAL if the buffer is
insufficient and updating smbConvertToUTF16 to use the actual filename
length (clamped by PATH_MAX) to ensure a safe copy operation.

Cc: stable@vger.kernel.org
Fixes: e2b76ab8b5 ("ksmbd: add support for read compound")
Reported-by: Asim Viladi Oglu Manizada <manizada@pm.me>
Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-03-25 18:58:40 -05:00
Linus Torvalds
d2a43e7f89 Merge tag 'hardening-v7.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux
Pull hardening fixes from Kees Cook:

 - fix required Clang version for CC_HAS_COUNTED_BY_PTR (Nathan
   Chancellor)

 - update Coccinelle script used for kmalloc_obj

* tag 'hardening-v7.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux:
  init/Kconfig: Require a release version of clang-22 for CC_HAS_COUNTED_BY_PTR
  coccinelle: kmalloc_obj: Remove default GFP_KERNEL arg
2026-03-25 14:47:18 -07:00
Linus Torvalds
51088b9d5f Merge tag 'platform-drivers-x86-v7.0-3' of git://git.kernel.org/pub/scm/linux/kernel/git/pdx86/platform-drivers-x86
Pull x86 platform driver fixes from Ilpo Järvinen:
 "Fixes and New HW Support. The trivial drop of unused gz_chain_head is
  not exactly fixes material but it allows other work to avoid problems
  so I decided to take it in along with the fixes.

   - amd/hsmp: Fix typo in error message

   - asus-armoury: Add support for G614FP, GA503QM, GZ302EAC, and GZ302EAC

   - asus-nb-wmi: Add DMI quirk for ASUS ROG Flow Z13-KJP GZ302EAC

   - hp-wmi: Support for Omen 16-k0xxx, 16-wf1xxx, 16-xf0xxx

   - intel-hid: Disable wakeup_mode during hibernation

   - ISST:
      - Check HWP support before MSR access
      - Correct locked bit width

   - lenovo: wmi-gamezone: Drop unused gz_chain_head

   - olpc-xo175-ec: Fix overflow error message"

* tag 'platform-drivers-x86-v7.0-3' of git://git.kernel.org/pub/scm/linux/kernel/git/pdx86/platform-drivers-x86:
  platform/x86: ISST: Correct locked bit width
  platform/x86: intel-hid: disable wakeup_mode during hibernation
  platform/x86: asus-armoury: add support for GZ302EA and GZ302EAC
  platform/x86: asus-nb-wmi: add DMI quirk for ASUS ROG Flow Z13-KJP GZ302EAC
  platform/x86/amd/hsmp: Fix typo in error message
  platform/olpc: olpc-xo175-ec: Fix overflow error message to print inlen
  platform/x86: lenovo: wmi-gamezone: Drop gz_chain_head
  platform/x86: ISST: Check HWP support before MSR access
  platform/x86: hp-wmi: Add support for Omen 16-k0xxx (8A4D)
  platform/x86: hp-wmi: Add support for Omen 16-wf1xxx (8C76)
  platform/x86: hp-wmi: Add Omen 16-xf0xxx (8BCA) support
  platform/x86: asus-armoury: add support for G614FP
  platform/x86: asus-armoury: add support for GA503QM
  MAINTAINERS: change email address of Denis Benato
2026-03-25 14:43:06 -07:00
Florian Westphal
6caefcd949 selftests: netfilter: nft_concat_range.sh: add check for flush+reload bug
This test will fail without
the preceding commit ("netfilter: nft_set_pipapo_avx2: fix match retart if found element is expired"):

  reject overlapping range on add       0s                              [ OK ]
  reload with flush                 /dev/stdin:59:32-52: Error: Could not process rule: File exists
add element inet filter test { 10.0.0.29 . 10.0.2.29 }

Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2026-03-25 21:40:47 +01:00
Florian Westphal
d3c0037ffe netfilter: nft_set_pipapo_avx2: don't return non-matching entry on expiry
New test case fails unexpectedly when avx2 matching functions are used.

The test first loads a ranomly generated pipapo set
with 'ipv4 . port' key, i.e.  nft -f foo.

This works.  Then, it reloads the set after a flush:
(echo flush set t s; cat foo) | nft -f -

This is expected to work, because its the same set after all and it was
already loaded once.

But with avx2, this fails: nft reports a clashing element.

The reported clash is of following form:

    We successfully re-inserted
      a . b
      c . d

Then we try to insert a . d

avx2 finds the already existing a . d, which (due to 'flush set') is marked
as invalid in the new generation.  It skips the element and moves to next.

Due to incorrect masking, the skip-step finds the next matching
element *only considering the first field*,

i.e. we return the already reinserted "a . b", even though the
last field is different and the entry should not have been matched.

No such error is reported for the generic c implementation (no avx2) or when
the last field has to use the 'nft_pipapo_avx2_lookup_slow' fallback.

Bisection points to
7711f4bb4b ("netfilter: nft_set_pipapo: fix range overlap detection")
but that fix merely uncovers this bug.

Before this commit, the wrong element is returned, but erronously
reported as a full, identical duplicate.

The root-cause is too early return in the avx2 match functions.
When we process the last field, we should continue to process data
until the entire input size has been consumed to make sure no stale
bits remain in the map.

Link: https://lore.kernel.org/netfilter-devel/20260321152506.037f68c0@elisabeth/
Signed-off-by: Florian Westphal <fw@strlen.de>
Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2026-03-25 21:38:27 +01:00
Guenter Roeck
754bd2b4a0 hwmon: (pmbus/core) Protect regulator operations with mutex
The regulator operations pmbus_regulator_get_voltage(),
pmbus_regulator_set_voltage(), and pmbus_regulator_list_voltage()
access PMBus registers and shared data but were not protected by
the update_lock mutex. This could lead to race conditions.

However, adding mutex protection directly to these functions causes
a deadlock because pmbus_regulator_notify() (which calls
regulator_notifier_call_chain()) is often called with the mutex
already held (e.g., from pmbus_fault_handler()). If a regulator
callback then calls one of the now-protected voltage functions,
it will attempt to acquire the same mutex.

Rework pmbus_regulator_notify() to utilize a worker function to
send notifications outside of the mutex protection. Events are
stored as atomics in a per-page bitmask and processed by the worker.

Initialize the worker and its associated data during regulator
registration, and ensure it is cancelled on device removal using
devm_add_action_or_reset().

While at it, remove the unnecessary include of linux/of.h.

Cc: Sanman Pradhan <psanman@juniper.net>
Fixes: ddbb4db4ce ("hwmon: (pmbus) Add regulator support")
Reviewed-by: Sanman Pradhan <psanman@juniper.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
2026-03-25 13:32:37 -07:00
Guenter Roeck
cd658475e7 hwmon: (pmbus) Introduce the concept of "write-only" attributes
Attributes intended to clear sensor history are intended to be writeable
only. Reading those attributes today results in reporting more or less
random values. To avoid ABI surprises, have those attributes explicitly
return 0 when reading.

Fixes: 787c095eda ("hwmon: (pmbus/core) Add support for rated attributes")
Reviewed-by: Sanman Pradhan <psanman@juniper.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
2026-03-25 13:32:33 -07:00
Guenter Roeck
805a5bd1c3 hwmon: (pmbus) Mark lowest/average/highest/rated attributes as read-only
Writing those attributes is not supported, so mark them as read-only.

Prior to this change, attempts to write into these attributes returned
an error.

Mark boolean fields in struct pmbus_limit_attr and in struct
pmbus_sensor_attr as bit fields to reduce configuration data size.
The data is scanned only while probing, so performance is not a concern.

Fixes: 6f183d33a0 ("hwmon: (pmbus) Add support for peak attributes")
Reviewed-by: Sanman Pradhan <psanman@juniper.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
2026-03-25 13:32:24 -07:00
Alexander Popov
789b06f9f3 wifi: virt_wifi: remove SET_NETDEV_DEV to avoid use-after-free
Currently we execute `SET_NETDEV_DEV(dev, &priv->lowerdev->dev)` for
the virt_wifi net devices. However, unregistering a virt_wifi device in
netdev_run_todo() can happen together with the device referenced by
SET_NETDEV_DEV().

It can result in use-after-free during the ethtool operations performed
on a virt_wifi device that is currently being unregistered. Such a net
device can have the `dev.parent` field pointing to the freed memory,
but ethnl_ops_begin() calls `pm_runtime_get_sync(dev->dev.parent)`.

Let's remove SET_NETDEV_DEV for virt_wifi to avoid bugs like this:

 ==================================================================
 BUG: KASAN: slab-use-after-free in __pm_runtime_resume+0xe2/0xf0
 Read of size 2 at addr ffff88810cfc46f8 by task pm/606

 Call Trace:
  <TASK>
  dump_stack_lvl+0x4d/0x70
  print_report+0x170/0x4f3
  ? __pfx__raw_spin_lock_irqsave+0x10/0x10
  kasan_report+0xda/0x110
  ? __pm_runtime_resume+0xe2/0xf0
  ? __pm_runtime_resume+0xe2/0xf0
  __pm_runtime_resume+0xe2/0xf0
  ethnl_ops_begin+0x49/0x270
  ethnl_set_features+0x23c/0xab0
  ? __pfx_ethnl_set_features+0x10/0x10
  ? kvm_sched_clock_read+0x11/0x20
  ? local_clock_noinstr+0xf/0xf0
  ? local_clock+0x10/0x30
  ? kasan_save_track+0x25/0x60
  ? __kasan_kmalloc+0x7f/0x90
  ? genl_family_rcv_msg_attrs_parse.isra.0+0x150/0x2c0
  genl_family_rcv_msg_doit+0x1e7/0x2c0
  ? __pfx_genl_family_rcv_msg_doit+0x10/0x10
  ? __pfx_cred_has_capability.isra.0+0x10/0x10
  ? stack_trace_save+0x8e/0xc0
  genl_rcv_msg+0x411/0x660
  ? __pfx_genl_rcv_msg+0x10/0x10
  ? __pfx_ethnl_set_features+0x10/0x10
  netlink_rcv_skb+0x121/0x380
  ? __pfx_genl_rcv_msg+0x10/0x10
  ? __pfx_netlink_rcv_skb+0x10/0x10
  ? __pfx_down_read+0x10/0x10
  genl_rcv+0x23/0x30
  netlink_unicast+0x60f/0x830
  ? __pfx_netlink_unicast+0x10/0x10
  ? __pfx___alloc_skb+0x10/0x10
  netlink_sendmsg+0x6ea/0xbc0
  ? __pfx_netlink_sendmsg+0x10/0x10
  ? __futex_queue+0x10b/0x1f0
  ____sys_sendmsg+0x7a2/0x950
  ? copy_msghdr_from_user+0x26b/0x430
  ? __pfx_____sys_sendmsg+0x10/0x10
  ? __pfx_copy_msghdr_from_user+0x10/0x10
  ___sys_sendmsg+0xf8/0x180
  ? __pfx____sys_sendmsg+0x10/0x10
  ? __pfx_futex_wait+0x10/0x10
  ? fdget+0x2e4/0x4a0
  __sys_sendmsg+0x11f/0x1c0
  ? __pfx___sys_sendmsg+0x10/0x10
  do_syscall_64+0xe2/0x570
  ? exc_page_fault+0x66/0xb0
  entry_SYSCALL_64_after_hwframe+0x77/0x7f
  </TASK>

This fix may be combined with another one in the ethtool subsystem:
https://lore.kernel.org/all/20260322075917.254874-1-alex.popov@linux.com/T/#u

Fixes: d43c65b05b ("ethtool: runtime-resume netdev parent in ethnl_ops_begin")
Cc: stable@vger.kernel.org
Signed-off-by: Alexander Popov <alex.popov@linux.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Breno Leitao <leitao@debian.org>
Link: https://patch.msgid.link/20260324224607.374327-1-alex.popov@linux.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2026-03-25 20:46:57 +01:00
Pengpeng Hou
129fa608b6 Bluetooth: btusb: clamp SCO altsetting table indices
btusb_work() maps the number of active SCO links to USB alternate
settings through a three-entry lookup table when CVSD traffic uses
transparent voice settings. The lookup currently indexes alts[] with
data->sco_num - 1 without first constraining sco_num to the number of
available table entries.

While the table only defines alternate settings for up to three SCO
links, data->sco_num comes from hci_conn_num() and is used directly.
Cap the lookup to the last table entry before indexing it so the
driver keeps selecting the highest supported alternate setting without
reading past alts[].

Fixes: baac6276c0 ("Bluetooth: btusb: handle mSBC audio over USB Endpoints")
Signed-off-by: Pengpeng Hou <pengpeng@iscas.ac.cn>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-03-25 15:32:55 -04:00
Hyunwoo Kim
25f420a0d4 Bluetooth: L2CAP: Fix ERTM re-init and zero pdu_len infinite loop
l2cap_config_req() processes CONFIG_REQ for channels in BT_CONNECTED
state to support L2CAP reconfiguration (e.g. MTU changes). However,
since both CONF_INPUT_DONE and CONF_OUTPUT_DONE are already set from
the initial configuration, the reconfiguration path falls through to
l2cap_ertm_init(), which re-initializes tx_q, srej_q, srej_list, and
retrans_list without freeing the previous allocations and sets
chan->sdu to NULL without freeing the existing skb. This leaks all
previously allocated ERTM resources.

Additionally, l2cap_parse_conf_req() does not validate the minimum
value of remote_mps derived from the RFC max_pdu_size option. A zero
value propagates to l2cap_segment_sdu() where pdu_len becomes zero,
causing the while loop to never terminate since len is never
decremented, exhausting all available memory.

Fix the double-init by skipping l2cap_ertm_init() and
l2cap_chan_ready() when the channel is already in BT_CONNECTED state,
while still allowing the reconfiguration parameters to be updated
through l2cap_parse_conf_req(). Also add a pdu_len zero check in
l2cap_segment_sdu() as a safeguard.

Fixes: 96298f6401 ("Bluetooth: L2CAP: handle l2cap config request during open state")
Signed-off-by: Hyunwoo Kim <imv4bel@gmail.com>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-03-25 15:32:32 -04:00
Hyunwoo Kim
00fdebbbc5 Bluetooth: L2CAP: Fix deadlock in l2cap_conn_del()
l2cap_conn_del() calls cancel_delayed_work_sync() for both info_timer
and id_addr_timer while holding conn->lock. However, the work functions
l2cap_info_timeout() and l2cap_conn_update_id_addr() both acquire
conn->lock, creating a potential AB-BA deadlock if the work is already
executing when l2cap_conn_del() takes the lock.

Move the work cancellations before acquiring conn->lock and use
disable_delayed_work_sync() to additionally prevent the works from
being rearmed after cancellation, consistent with the pattern used in
hci_conn_del().

Fixes: ab4eedb790 ("Bluetooth: L2CAP: Fix corrupted list in hci_chan_del")
Signed-off-by: Hyunwoo Kim <imv4bel@gmail.com>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-03-25 15:32:09 -04:00
Cen Zhang
94d8e6fe5d Bluetooth: btintel: serialize btintel_hw_error() with hci_req_sync_lock
btintel_hw_error() issues two __hci_cmd_sync() calls (HCI_OP_RESET
and Intel exception-info retrieval) without holding
hci_req_sync_lock().  This lets it race against
hci_dev_do_close() -> btintel_shutdown_combined(), which also runs
__hci_cmd_sync() under the same lock.  When both paths manipulate
hdev->req_status/req_rsp concurrently, the close path may free the
response skb first, and the still-running hw_error path hits a
slab-use-after-free in kfree_skb().

Wrap the whole recovery sequence in hci_req_sync_lock/unlock so it
is serialized with every other synchronous HCI command issuer.

Below is the data race report and the kasan report:

  BUG: data-race in __hci_cmd_sync_sk / btintel_shutdown_combined

  read of hdev->req_rsp at net/bluetooth/hci_sync.c:199
  by task kworker/u17:1/83:
   __hci_cmd_sync_sk+0x12f2/0x1c30 net/bluetooth/hci_sync.c:200
   __hci_cmd_sync+0x55/0x80 net/bluetooth/hci_sync.c:223
   btintel_hw_error+0x114/0x670 drivers/bluetooth/btintel.c:254
   hci_error_reset+0x348/0xa30 net/bluetooth/hci_core.c:1030

  write/free by task ioctl/22580:
   btintel_shutdown_combined+0xd0/0x360
    drivers/bluetooth/btintel.c:3648
   hci_dev_close_sync+0x9ae/0x2c10 net/bluetooth/hci_sync.c:5246
   hci_dev_do_close+0x232/0x460 net/bluetooth/hci_core.c:526

  BUG: KASAN: slab-use-after-free in
   sk_skb_reason_drop+0x43/0x380 net/core/skbuff.c:1202
  Read of size 4 at addr ffff888144a738dc
  by task kworker/u17:1/83:
   __hci_cmd_sync_sk+0x12f2/0x1c30 net/bluetooth/hci_sync.c:200
   __hci_cmd_sync+0x55/0x80 net/bluetooth/hci_sync.c:223
   btintel_hw_error+0x186/0x670 drivers/bluetooth/btintel.c:260

Fixes: 973bb97e5a ("Bluetooth: btintel: Add generic function for handling hardware errors")
Signed-off-by: Cen Zhang <zzzccc427@gmail.com>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-03-25 15:31:48 -04:00
Zhang Chen
f39f905e55 Bluetooth: L2CAP: Fix send LE flow credits in ACL link
When the L2CAP channel mode is L2CAP_MODE_ERTM/L2CAP_MODE_STREAMING,
l2cap_publish_rx_avail will be called and le flow credits will be sent in
l2cap_chan_rx_avail, even though the link type is ACL.

The logs in question as follows:
> ACL Data RX: Handle 129 flags 0x02 dlen 12
      L2CAP: Unknown (0x16) ident 4 len 4
        40 00 ed 05
< ACL Data TX: Handle 129 flags 0x00 dlen 10
      L2CAP: Command Reject (0x01) ident 4 len 2
        Reason: Command not understood (0x0000)

Bluetooth: Unknown BR/EDR signaling command 0x16
Bluetooth: Wrong link type (-22)

Fixes: ce60b9231b ("Bluetooth: compute LE flow credits based on recvbuf space")
Signed-off-by: Zhang Chen <zhangchen01@kylinos.cn>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-03-25 15:24:02 -04:00
Joel Fernandes
a6fc88b22b srcu: Use irq_work to start GP in tiny SRCU
Tiny SRCU's srcu_gp_start_if_needed() directly calls schedule_work(),
which acquires the workqueue pool->lock.

This causes a lockdep splat when call_srcu() is called with a scheduler
lock held, due to:

  call_srcu() [holding pi_lock]
    srcu_gp_start_if_needed()
      schedule_work() -> pool->lock

  workqueue_init() / create_worker() [holding pool->lock]
    wake_up_process() -> try_to_wake_up() -> pi_lock

Also add irq_work_sync() to cleanup_srcu_struct() to prevent a
use-after-free if a queued irq_work fires after cleanup begins.

Tested with rcutorture SRCU-T and no lockdep warnings.

[ Thanks to Boqun for similar fix in patch "rcu: Use an intermediate irq_work
to start process_srcu()" ]

Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Boqun Feng <boqun@kernel.org>
2026-03-25 09:00:05 -07:00
Boqun Feng
7c405fb327 rcu: Use an intermediate irq_work to start process_srcu()
Since commit c27cea4416 ("rcu: Re-implement RCU Tasks Trace in terms
of SRCU-fast") we switched to SRCU in BPF. However as BPF instrument can
happen basically everywhere (including where a scheduler lock is held),
call_srcu() now needs to avoid acquiring scheduler lock because
otherwise it could cause deadlock [1]. Fix this by following what the
previous RCU Tasks Trace did: using an irq_work to delay the queuing of
the work to start process_srcu().

[boqun: Apply Joel's feedback]
[boqun: Apply Andrea's test feedback]

Reported-by: Andrea Righi <arighi@nvidia.com>
Closes: https://lore.kernel.org/all/abjzvz_tL_siV17s@gpd4/
Fixes: commit c27cea4416 ("rcu: Re-implement RCU Tasks Trace in terms of SRCU-fast")
Link: https://lore.kernel.org/rcu/3c4c5a29-24ea-492d-aeee-e0d9605b4183@nvidia.com/ [1]
Suggested-by: Zqiang <qiang.zhang@linux.dev>
Tested-by: Andrea Righi <arighi@nvidia.com>
Tested-by: Paul E. McKenney <paulmck@kernel.org>
Tested-by: Joel Fernandes <joelagnelf@nvidia.com>
Signed-off-by: Boqun Feng <boqun@kernel.org>
2026-03-25 08:59:59 -07:00
Paul E. McKenney
61bbcfb505 srcu: Push srcu_node allocation to GP when non-preemptible
When the srcutree.convert_to_big and srcutree.big_cpu_lim kernel boot
parameters specify initialization-time allocation of the srcu_node
tree for statically allocated srcu_struct structures (for example, in
DEFINE_SRCU() at build time instead of init_srcu_struct() at runtime),
init_srcu_struct_nodes() will attempt to dynamically allocate this tree
at the first run-time update-side use of this srcu_struct structure,
but while holding a raw spinlock.  Because the memory allocator can
acquire non-raw spinlocks, this can result in lockdep splats.

This commit therefore uses the same SRCU_SIZE_ALLOC trick that is used
when the first run-time update-side use of this srcu_struct structure
happens before srcu_init() is called.  The actual allocation then takes
place from workqueue context at the ends of upcoming SRCU grace periods.

[boqun: Adjust the sha1 of the Fixes tag]

Fixes: 175b45ed34 ("srcu: Use raw spinlocks so call_srcu() can be used under preempt_disable()")
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Boqun Feng <boqun@kernel.org>
2026-03-25 08:59:02 -07:00
Paul E. McKenney
175b45ed34 srcu: Use raw spinlocks so call_srcu() can be used under preempt_disable()
Tree SRCU has used non-raw spinlocks for many years, motivated by a desire
to avoid unnecessary real-time latency and the absence of any reason to
use raw spinlocks.  However, the recent use of SRCU in tracing as the
underlying implementation of RCU Tasks Trace means that call_srcu()
is invoked from preemption-disabled regions of code, which in turn
requires that any locks acquired by call_srcu() or its callees must be
raw spinlocks.

This commit therefore converts SRCU's spinlocks to raw spinlocks.

[boqun: Add Fixes tag]

Reported-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Fixes: c27cea4416 ("rcu: Re-implement RCU Tasks Trace in terms of SRCU-fast")
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Boqun Feng <boqun@kernel.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
2026-03-25 08:55:50 -07:00
Petr Mladek
e398978ddf workqueue: Better describe stall check
Try to be more explicit why the workqueue watchdog does not take
pool->lock by default. Spin locks are full memory barriers which
delay anything. Obviously, they would primary delay operations
on the related worker pools.

Explain why it is enough to prevent the false positive by re-checking
the timestamp under the pool->lock.

Finally, make it clear what would be the alternative solution in
__queue_work() which is a hotter path.

Signed-off-by: Petr Mladek <pmladek@suse.com>
Acked-by: Song Liu <song@kernel.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-03-25 05:51:02 -10:00
Shuming Fan
c673efd5db ASoC: SDCA: fix finding wrong entity
This patch fixes an issue like:
where searching for the entity 'FU 11' could incorrectly match 'FU 113' first.
The driver should first perform an exact match on the full string name.
If no exact match is found, it can then fall back to a partial match.

Fixes: 48fa77af2f ("ASoC: SDCA: Add terminal type into input/output widget name")
Reviewed-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Signed-off-by: Shuming Fan <shumingf@realtek.com>
Link: https://patch.msgid.link/20260325110406.3232420-1-shumingf@realtek.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-25 15:37:51 +00:00
Jianmin Lv
87a70013be MAINTAINERS: Update GPU driver maintainer information
I and Qianhai are GPU R&D engineers at Loongson, specializing
in kernel driver development. We understand that the current
Loongson GPU driver lacks dedicated maintenance resources
because of some reasons.

As Loongson GPU driver developers, we have both the capability
and the responsibility to continuously maintain the Loongson
GPU driver, ensuring minimal impact on its users. After internal
discussions, our team has decided to recommend me and Qianhai
to take over the maintenance responsibilities, and recommend
Huacai, Mingcong and Ruoyao to help to review.

And We'll continue to maintain it for current supported chips
and drive future updates according to chip support plan.

Signed-off-by: Jianmin Lv <lvjianmin@loongson.cn>
Acked-by: Thomas Zimmermann <tzimmermann@suse.de>
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://patch.msgid.link/20260320101012.22714-1-lvjianmin@loongson.cn
2026-03-25 15:10:30 +01:00
Sanman Pradhan
bf08749a6a hwmon: (adm1177) fix sysfs ABI violation and current unit conversion
The adm1177 driver exposes the current alert threshold through
hwmon_curr_max_alarm. This violates the hwmon sysfs ABI, where
*_alarm attributes are read-only status flags and writable thresholds
must use currN_max.

The driver also stores the threshold internally in microamps, while
currN_max is defined in milliamps. Convert the threshold accordingly
on both the read and write paths.

Widen the cached threshold and related calculations to 64 bits so
that small shunt resistor values do not cause truncation or overflow.
Also use 64-bit arithmetic for the mA/uA conversions, clamp writes
to the range the hardware can represent, and propagate failures from
adm1177_write_alert_thr() instead of silently ignoring them.

Update the hwmon documentation to reflect the attribute rename and
the correct units returned by the driver.

Fixes: 09b08ac9e8 ("hwmon: (adm1177) Add ADM1177 Hot Swap Controller and Digital Power Monitor driver")
Signed-off-by: Sanman Pradhan <psanman@juniper.net>
Acked-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20260325051246.28262-1-sanman.pradhan@hpe.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
2026-03-25 06:50:13 -07:00
Shuming Fan
c991ca3238 ASoC: SDCA: remove the max count of initialization table
The number of the initialization table may exceed 2048.
Therefore, this patch removes the limitation and allows the driver to
allocate memory dynamically based on the size of the initialization table.

Signed-off-by: Shuming Fan <shumingf@realtek.com>
Reviewed-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://patch.msgid.link/20260325092017.3221640-1-shumingf@realtek.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-25 12:17:43 +00:00
Matthew Auld
bfe9e314d7 drm/xe: always keep track of remap prev/next
During 3D workload, user is reporting hitting:

[  413.361679] WARNING: drivers/gpu/drm/xe/xe_vm.c:1217 at vm_bind_ioctl_ops_unwind+0x1e2/0x2e0 [xe], CPU#7: vkd3d_queue/9925
[  413.361944] CPU: 7 UID: 1000 PID: 9925 Comm: vkd3d_queue Kdump: loaded Not tainted 7.0.0-070000rc3-generic #202603090038 PREEMPT(lazy)
[  413.361949] RIP: 0010:vm_bind_ioctl_ops_unwind+0x1e2/0x2e0 [xe]
[  413.362074] RSP: 0018:ffffd4c25c3df930 EFLAGS: 00010282
[  413.362077] RAX: 0000000000000000 RBX: ffff8f3ee817ed10 RCX: 0000000000000000
[  413.362078] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
[  413.362079] RBP: ffffd4c25c3df980 R08: 0000000000000000 R09: 0000000000000000
[  413.362081] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8f41fbf99380
[  413.362082] R13: ffff8f3ee817e968 R14: 00000000ffffffef R15: ffff8f43d00bd380
[  413.362083] FS:  00000001040ff6c0(0000) GS:ffff8f4696d89000(0000) knlGS:00000000330b0000
[  413.362085] CS:  0010 DS: 002b ES: 002b CR0: 0000000080050033
[  413.362086] CR2: 00007ddfc4747000 CR3: 00000002e6262005 CR4: 0000000000f72ef0
[  413.362088] PKRU: 55555554
[  413.362089] Call Trace:
[  413.362092]  <TASK>
[  413.362096]  xe_vm_bind_ioctl+0xa9a/0xc60 [xe]

Which seems to hint that the vma we are re-inserting for the ops unwind
is either invalid or overlapping with something already inserted in the
vm. It shouldn't be invalid since this is a re-insertion, so must have
worked before. Leaving the likely culprit as something already placed
where we want to insert the vma.

Following from that, for the case where we do something like a rebind in
the middle of a vma, and one or both mapped ends are already compatible,
we skip doing the rebind of those vma and set next/prev to NULL. As well
as then adjust the original unmap va range, to avoid unmapping the ends.
However, if we trigger the unwind path, we end up with three va, with
the two ends never being removed and the original va range in the middle
still being the shrunken size.

If this occurs, one failure mode is when another unwind op needs to
interact with that range, which can happen with a vector of binds. For
example, if we need to re-insert something in place of the original va.
In this case the va is still the shrunken version, so when removing it
and then doing a re-insert it can overlap with the ends, which were
never removed, triggering a warning like above, plus leaving the vm in a
bad state.

With that, we need two things here:

 1) Stop nuking the prev/next tracking for the skip cases. Instead
    relying on checking for skip prev/next, where needed. That way on the
    unwind path, we now correctly remove both ends.

 2) Undo the unmap va shrinkage, on the unwind path. With the two ends
    now removed the unmap va should expand back to the original size again,
    before re-insertion.

v2:
  - Update the explanation in the commit message, based on an actual IGT of
    triggering this issue, rather than conjecture.
  - Also undo the unmap shrinkage, for the skip case. With the two ends
    now removed, the original unmap va range should expand back to the
    original range.
v3:
  - Track the old start/range separately. vma_size/start() uses the va
    info directly.

Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/7602
Fixes: 8f33b4f054 ("drm/xe: Avoid doing rebinds")
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: <stable@vger.kernel.org> # v6.8+
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patch.msgid.link/20260318100208.78097-2-matthew.auld@intel.com
(cherry picked from commit aec6969f75afbf4e01fd5fb5850ed3e9c27043ac)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
2026-03-25 08:05:33 -04:00
Tvrtko Ursulin
06f4297134 drm/syncobj: Fix xa_alloc allocation flags
The xarray conversion blindly and wrongly replaced idr_alloc with xa_alloc
and kept the GFP_NOWAIT. It should have been GFP_KERNEL to account for
idr_preload it removed. Fix it.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Fixes: fec2c3c01f ("drm/syncobj: Convert syncobj idr to xarray")
Reported-by: Himanshu Girotra <himanshu.girotra@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Himanshu Girotra <himanshu.girotra@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Tvrtko Ursulin <tursulin@ursulin.net>
Link: https://lore.kernel.org/r/20260324111019.22467-1-tvrtko.ursulin@igalia.com
2026-03-25 08:05:35 +00:00
Miguel Ojeda
2cdaff22ed dma-mapping: add missing inline for dma_free_attrs
Under an UML build for an upcoming series [1], I got `-Wstatic-in-inline`
for `dma_free_attrs`:

      BINDGEN rust/bindings/bindings_generated.rs - due to target missing
    In file included from rust/helpers/helpers.c:59:
    rust/helpers/dma.c:17:2: warning: static function 'dma_free_attrs' is used in an inline function with external linkage [-Wstatic-in-inline]
       17 |         dma_free_attrs(dev, size, cpu_addr, dma_handle, attrs);
          |         ^
    rust/helpers/dma.c:12:1: note: use 'static' to give inline function 'rust_helper_dma_free_attrs' internal linkage
       12 | __rust_helper void rust_helper_dma_free_attrs(struct device *dev, size_t size,
          | ^
          | static

The issue is that `dma_free_attrs` was not marked `inline` when it was
introduced alongside the rest of the stubs.

Thus mark it.

Fixes: ed6ccf10f2 ("dma-mapping: properly stub out the DMA API for !CONFIG_HAS_DMA")
Closes: https://lore.kernel.org/rust-for-linux/20260322194616.89847-1-ojeda@kernel.org/ [1]
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://lore.kernel.org/r/20260325015548.70912-1-ojeda@kernel.org
2026-03-25 08:01:21 +01:00
Guangshuo Li
c4ea7d8907 net: mana: fix use-after-free in add_adev() error path
If auxiliary_device_add() fails, add_adev() jumps to add_fail and calls
auxiliary_device_uninit(adev).

The auxiliary device has its release callback set to adev_release(),
which frees the containing struct mana_adev. Since adev is embedded in
struct mana_adev, the subsequent fall-through to init_fail and access
to adev->id may result in a use-after-free.

Fix this by saving the allocated auxiliary device id in a local
variable before calling auxiliary_device_add(), and use that saved id
in the cleanup path after auxiliary_device_uninit().

Fixes: a69839d432 ("net: mana: Add support for auxiliary device")
Cc: stable@vger.kernel.org
Reviewed-by: Long Li <longli@microsoft.com>
Signed-off-by: Guangshuo Li <lgs201920130244@gmail.com>
Link: https://patch.msgid.link/20260323165730.945365-1-lgs201920130244@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-24 21:07:58 -07:00
Jonas Köppeler
815980fe6d net_sched: codel: fix stale state for empty flows in fq_codel
When codel_dequeue() finds an empty queue, it resets vars->dropping
but does not reset vars->first_above_time.  The reference CoDel
algorithm (Nichols & Jacobson, ACM Queue 2012) resets both:

  dodeque_result codel_queue_t::dodeque(time_t now) {
      ...
      if (r.p == NULL) {
          first_above_time = 0;   // <-- Linux omits this
      }
      ...
  }

Note that codel_should_drop() does reset first_above_time when called
with a NULL skb, but codel_dequeue() returns early before ever calling
codel_should_drop() in the empty-queue case.  The post-drop code paths
do reach codel_should_drop(NULL) and correctly reset the timer, so a
dropped packet breaks the cycle -- but the next delivered packet
re-arms first_above_time and the cycle repeats.

For sparse flows such as ICMP ping (one packet every 200ms-1s), the
first packet arms first_above_time, the flow goes empty, and the
second packet arrives after the interval has elapsed and gets dropped.
The pattern repeats, producing sustained loss on flows that are not
actually congested.

Test: veth pair, fq_codel, BQL disabled, 30000 iptables rules in the
consumer namespace (NAPI-64 cycle ~14ms, well above fq_codel's 5ms
target), ping at 5 pps under UDP flood:

  Before fix:  26% ping packet loss
  After fix:    0% ping packet loss

Fix by resetting first_above_time to zero in the empty-queue path
of codel_dequeue(), matching the reference algorithm.

Fixes: 76e3cc126b ("codel: Controlled Delay AQM")
Fixes: d068ca2ae2 ("codel: split into multiple files")
Co-developed-by: Jesper Dangaard Brouer <hawk@kernel.org>
Signed-off-by: Jesper Dangaard Brouer <hawk@kernel.org>
Signed-off-by: Jonas Köppeler <j.koeppeler@tu-berlin.de>
Reported-by: Chris Arges <carges@cloudflare.com>
Tested-by: Jonas Köppeler <j.koeppeler@tu-berlin.de>
Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/all/20260318134826.1281205-7-hawk@kernel.org/
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20260323174920.253526-1-hawk@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-24 20:57:57 -07:00
Sabrina Dubroca
09474055f2 rtnetlink: fix leak of SRCU struct in rtnl_link_register
Commit 6b57ff21a3 ("rtnetlink: Protect link_ops by mutex.") swapped
the EEXIST check with the init_srcu_struct, but didn't add cleanup of
the SRCU struct we just allocated in case of error.

Fixes: 6b57ff21a3 ("rtnetlink: Protect link_ops by mutex.")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/e77fe499f9a58c547b33b5212b3596dad417cec6.1774025341.git.sd@queasysnail.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-24 20:56:02 -07:00
Thangaraj Samynathan
7139970787 net: lan743x: fix duplex configuration in mac_link_up
The driver does not explicitly configure the MAC duplex mode when
bringing the link up. As a result, the MAC may retain a stale duplex
setting from a previous link state, leading to duplex mismatches with
the link partner and degraded network performance.

Update lan743x_phylink_mac_link_up() to set or clear the MAC_CR_DPX_
bit according to the negotiated duplex mode.

This ensures the MAC configuration is consistent with the phylink
resolved state.

Fixes: a5f199a8d8 ("net: lan743x: Migrate phylib to phylink")
Signed-off-by: Thangaraj Samynathan <thangaraj.s@microchip.com>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Link: https://patch.msgid.link/20260323065345.144915-1-thangaraj.s@microchip.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-24 20:48:25 -07:00
xietangxin
ba8bda9a08 virtio_net: Fix UAF on dst_ops when IFF_XMIT_DST_RELEASE is cleared and napi_tx is false
A UAF issue occurs when the virtio_net driver is configured with napi_tx=N
and the device's IFF_XMIT_DST_RELEASE flag is cleared
(e.g., during the configuration of tc route filter rules).

When IFF_XMIT_DST_RELEASE is removed from the net_device, the network stack
expects the driver to hold the reference to skb->dst until the packet
is fully transmitted and freed. In virtio_net with napi_tx=N,
skbs may remain in the virtio transmit ring for an extended period.

If the network namespace is destroyed while these skbs are still pending,
the corresponding dst_ops structure has freed. When a subsequent packet
is transmitted, free_old_xmit() is triggered to clean up old skbs.
It then calls dst_release() on the skb associated with the stale dst_entry.
Since the dst_ops (referenced by the dst_entry) has already been freed,
a UAF kernel paging request occurs.

fix it by adds skb_dst_drop(skb) in start_xmit to explicitly release
the dst reference before the skb is queued in virtio_net.

Call Trace:
 Unable to handle kernel paging request at virtual address ffff80007e150000
 CPU: 2 UID: 0 PID: 6236 Comm: ping Kdump: loaded Not tainted 7.0.0-rc1+ #6 PREEMPT
  ...
  percpu_counter_add_batch+0x3c/0x158 lib/percpu_counter.c:98 (P)
  dst_release+0xe0/0x110  net/core/dst.c:177
  skb_release_head_state+0xe8/0x108 net/core/skbuff.c:1177
  sk_skb_reason_drop+0x54/0x2d8 net/core/skbuff.c:1255
  dev_kfree_skb_any_reason+0x64/0x78 net/core/dev.c:3469
  napi_consume_skb+0x1c4/0x3a0 net/core/skbuff.c:1527
  __free_old_xmit+0x164/0x230  drivers/net/virtio_net.c:611 [virtio_net]
  free_old_xmit drivers/net/virtio_net.c:1081 [virtio_net]
  start_xmit+0x7c/0x530 drivers/net/virtio_net.c:3329 [virtio_net]
  ...

Reproduction Steps:
NETDEV="enp3s0"

config_qdisc_route_filter() {
    tc qdisc del dev $NETDEV root
    tc qdisc add dev $NETDEV root handle 1: prio
    tc filter add dev $NETDEV parent 1:0 \
	protocol ip prio 100 route to 100 flowid 1:1
    ip route add 192.168.1.100/32 dev $NETDEV realm 100
}

test_ns() {
    ip netns add testns
    ip link set $NETDEV netns testns
    ip netns exec testns ifconfig $NETDEV  10.0.32.46/24
    ip netns exec testns ping -c 1 10.0.32.1
    ip netns del testns
}

config_qdisc_route_filter

test_ns
sleep 2
test_ns

Fixes: f2fc6a5458 ("[NETNS][IPV6] route6 - move ip6_dst_ops inside the network namespace")
Cc: stable@vger.kernel.org
Signed-off-by: xietangxin <xietangxin@yeah.net>
Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Fixes: 0287587884 ("net: better IFF_XMIT_DST_RELEASE support")
Link: https://patch.msgid.link/20260312025406.15641-1-xietangxin@yeah.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-24 20:46:08 -07:00
Gao Xiang
2f0407ed92 erofs: fix .fadvise() for page cache sharing
Currently, .fadvise() doesn't work well if page cache sharing is on
since shared inodes belong to a pseudo fs generated with init_pseudo(),
and sb->s_bdi is the default one &noop_backing_dev_info.

Then, generic_fadvise() will just behave as a no-op if sb->s_bdi is
&noop_backing_dev_info, but as the bdev fs (the bdev fs changes
inode_to_bdi() instead), it's actually NOT a pure memfs.

Let's generate a real bdi for erofs_ishare_mnt instead.

Fixes: d86d7817c0 ("erofs: implement .fadvise for page cache share")
Reviewed-by: Hongbo Li <lihongbo22@huawei.com>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
2026-03-25 10:40:02 +08:00
Linus Torvalds
bbeb83d318 Merge tag 'kbuild-fixes-7.0-3' of git://git.kernel.org/pub/scm/linux/kernel/git/kbuild/linux
Pull Kbuild fixes from Nathan Chancellor:
 "This mostly addresses some issues with the awk conversion in
  scripts/kconfig/merge_config.sh.

   - Fix typo to ensure .builtin-dtbs.S is properly cleaned

   - Fix '==' bashism in scripts/kconfig/merge_config.sh

   - Fix awk error in scripts/kconfig/merge_config.sh when base
     configuration is empty

   - Fix inconsistent indentation in scripts/kconfig/merge_config.sh"

* tag 'kbuild-fixes-7.0-3' of git://git.kernel.org/pub/scm/linux/kernel/git/kbuild/linux:
  scripts: kconfig: merge_config.sh: fix indentation
  scripts: kconfig: merge_config.sh: pass output file as awk variable
  scripts: kconfig: merge_config.sh: fix unexpected operator warning
  kbuild: Delete .builtin-dtbs.S when running make clean
2026-03-24 16:48:14 -07:00
Zhan Xusheng
5d16467ae5 alarmtimer: Fix argument order in alarm_timer_forward()
alarm_timer_forward() passes arguments to alarm_forward() in the wrong
order:

  alarm_forward(alarm, timr->it_interval, now);

However, alarm_forward() is defined as:

  u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval);

and uses the second argument as the current time:

  delta = ktime_sub(now, alarm->node.expires);

Passing the interval as "now" results in incorrect delta computation,
which can lead to missed expirations or incorrect overrun accounting.

This issue has been present since the introduction of
alarm_timer_forward().

Fix this by swapping the arguments.

Fixes: e7561f1633 ("alarmtimer: Implement forward callback")
Signed-off-by: Zhan Xusheng <zhanxusheng@xiaomi.com>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Cc: stable@vger.kernel.org
Link: https://patch.msgid.link/20260323061130.29991-1-zhanxusheng@xiaomi.com
2026-03-24 23:17:14 +01:00
Tejun Heo
6680c162b4 selftests/cgroup: Don't require synchronous populated update on task exit
test_cgcore_populated (test_core) and test_cgkill_{simple,tree,forkbomb}
(test_kill) check cgroup.events "populated 0" immediately after reaping
child tasks with waitpid(). This used to work because cgroup_task_exit() in
do_exit() unlinked tasks from css_sets before exit_notify() woke up
waitpid().

d245698d72 ("cgroup: Defer task cgroup unlink until after the task is done
switching out") moved the unlink to cgroup_task_dead() in
finish_task_switch(), which runs after exit_notify(). The populated counter
is now decremented after the parent's waitpid() can return, so there is no
longer a synchronous ordering guarantee. On PREEMPT_RT, where
cgroup_task_dead() is further deferred through lazy irq_work, the race
window is even larger.

The synchronous populated transition was never part of the cgroup interface
contract - it was an implementation artifact. Use cg_read_strcmp_wait() which
retries for up to 1 second, matching what these tests actually need to
verify: that the cgroup eventually becomes unpopulated after all tasks exit.

Fixes: d245698d72 ("cgroup: Defer task cgroup unlink until after the task is done switching out")
Reported-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Tejun Heo <tj@kernel.org>
Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Christian Brauner <brauner@kernel.org>
Cc: cgroups@vger.kernel.org
2026-03-24 10:21:57 -10:00
Tejun Heo
1b164b876c cgroup: Wait for dying tasks to leave on rmdir
a72f73c4dd ("cgroup: Don't expose dead tasks in cgroup") hid PF_EXITING
tasks from cgroup.procs so that systemd doesn't see tasks that have already
been reaped via waitpid(). However, the populated counter (nr_populated_csets)
is only decremented when the task later passes through cgroup_task_dead() in
finish_task_switch(). This means cgroup.procs can appear empty while the
cgroup is still populated, causing rmdir to fail with -EBUSY.

Fix this by making cgroup_rmdir() wait for dying tasks to fully leave. If the
cgroup is populated but all remaining tasks have PF_EXITING set (the task
iterator returns none due to the existing filter), wait for a kick from
cgroup_task_dead() and retry. The wait is brief as tasks are removed from the
cgroup's css_set between PF_EXITING assertion in do_exit() and
cgroup_task_dead() in finish_task_switch().

v2: cgroup_is_populated() true to false transition happens under css_set_lock
    not cgroup_mutex, so retest under css_set_lock before sleeping to avoid
    missed wakeups (Sebastian).

Fixes: a72f73c4dd ("cgroup: Don't expose dead tasks in cgroup")
Reported-by: kernel test robot <oliver.sang@intel.com>
Closes: https://lore.kernel.org/oe-lkp/202603222104.2c81684e-lkp@intel.com
Reported-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Bert Karwatzki <spasswolf@web.de>
Cc: Michal Koutny <mkoutny@suse.com>
Cc: cgroups@vger.kernel.org
2026-03-24 10:21:40 -10:00
Linus Torvalds
24f9515de8 Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini:
 "ARM:

   - Clear the pending exception state from a vcpu coming out of reset,
     as it could otherwise affect the first instruction executed in the
     guest

   - Fix pointer arithmetic in address translation emulation, so that
     the Hardware Access bit is set on the correct PTE instead of some
     other location

  s390:

   - Fix deadlock in new memory management

   - Properly handle kernel faults on donated memory

   - Fix bounds checking for irq routing, with selftest

   - Fix invalid machine checks and log all of them"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: arm64: Fix the descriptor address in __kvm_at_swap_desc()
  KVM: s390: vsie: Avoid injecting machine check on signal
  KVM: s390: log machine checks more aggressively
  KVM: s390: selftests: Add IRQ routing address offset tests
  KVM: s390: Limit adapter indicator access to mapped page
  s390/mm: Add missing secure storage access fixups for donated memory
  KVM: arm64: Discard PC update state on vcpu reset
  KVM: s390: Fix a deadlock
2026-03-24 13:11:26 -07:00
Panagiotis "Ivory" Vasilopoulos
a23811061a landlock: Expand restrict flags example for ABI version 8
Add LANDLOCK_RESTRICT_SELF_TSYNC to the backwards compatibility example
for restrict flags. This introduces completeness, similar to that of
the ruleset attributes example. However, as the new example can impact
enforcement in certain cases, an appropriate warning is also included.

Additionally, I modified the two comments of the example to make them
more consistent with the ruleset attributes example's.

Signed-off-by: Panagiotis "Ivory" Vasilopoulos <git@n0toose.net>
Co-developed-by: Dan Cojocaru <dan@dcdev.ro>
Signed-off-by: Dan Cojocaru <dan@dcdev.ro>
Reviewed-by: Günther Noack <gnoack@google.com>
Link: https://lore.kernel.org/r/20260304-landlock-docs-add-tsync-example-v4-1-819a276f05c5@n0toose.net
[mic: Update date, improve comments consistency, fix newline issue]
Signed-off-by: Mickaël Salaün <mic@digikod.net>
2026-03-24 20:55:55 +01:00
Linus Torvalds
45f667ebb0 Merge tag 'cxl-fixes-7.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl
Pull Compute Express Link (CXL) fixes from Dave Jiang:

 - Adjust the startup priority of cxl_pmem to be higher than that of
   cxl_acpi

 - Use proper endpoint validity check upon sanitize

 - Avoid incorrect DVSEC fallback when HDM decoders are enabled

 - Fix CXL_ACPI and CXL_PMEM Kconfig tristate mismatch

 - Fix leakage in __construct_region()

 - Fix use after free of parent_port in cxl_detach_ep()

* tag 'cxl-fixes-7.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl:
  cxl: Adjust the startup priority of cxl_pmem to be higher than that of cxl_acpi
  cxl/mbox: Use proper endpoint validity check upon sanitize
  cxl/hdm: Avoid incorrect DVSEC fallback when HDM decoders are enabled
  cxl/acpi: Fix CXL_ACPI and CXL_PMEM Kconfig tristate mismatch
  cxl/region: Fix leakage in __construct_region()
  cxl/port: Fix use after free of parent_port in cxl_detach_ep()
2026-03-24 12:41:29 -07:00
Srinivas Pandruvada
7dfe984601 thermal: intel: int340x: soc_slider: Set offset only for balanced mode
The slider offset can be set via debugfs for balanced mode. The offset
should be only applicable in balanced mode. For other modes, it should
be 0 when writing to MMIO offset,

Fixes: 8306bcaba0 ("thermal: intel: int340x: Add module parameter to change slider offset")
Tested-by: Erin Park <erin.park@intel.com>
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: 6.18+ <stable@vger.kernel.org> # 6.18+
[ rjw: Subject and changelog tweaks ]
Link: https://patch.msgid.link/20260324172346.3317145-1-srinivas.pandruvada@linux.intel.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2026-03-24 19:00:12 +01:00
Alex Deucher
90d239cc53 drm/amd/display: Fix DCE LVDS handling
LVDS does not use an HPD pin so it may be invalid.  Handle
this case correctly in link encoder creation.

Fixes: 7c8fb3b8e9 ("drm/amd/display: Add hpd_source index check for DCE60/80/100/110/112/120 link encoders")
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/5012
Cc: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Cc: Roman Li <roman.li@amd.com>
Reviewed-by: Roman Li <roman.li@amd.com>
Reviewed-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 3b5620f7ee688177fcf65cf61588c5435bce1872)
Cc: stable@vger.kernel.org
2026-03-24 13:55:47 -04:00
Donet Tom
4e9597f22a drm/amdgpu: Handle GPU page faults correctly on non-4K page systems
During a GPU page fault, the driver restores the SVM range and then maps it
into the GPU page tables. The current implementation passes a GPU-page-size
(4K-based) PFN to svm_range_restore_pages() to restore the range.

SVM ranges are tracked using system-page-size PFNs. On systems where the
system page size is larger than 4K, using GPU-page-size PFNs to restore the
range causes two problems:

Range lookup fails:
Because the restore function receives PFNs in GPU (4K) units, the SVM
range lookup does not find the existing range. This will result in a
duplicate SVM range being created.

VMA lookup failure:
The restore function also tries to locate the VMA for the faulting address.
It converts the GPU-page-size PFN into an address using the system page
size, which results in an incorrect address on non-4K page-size systems.
As a result, the VMA lookup fails with the message: "address 0xxxx VMA is
removed".

This patch passes the system-page-size PFN to svm_range_restore_pages() so
that the SVM range is restored correctly on non-4K page systems.

Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Donet Tom <donettom@linux.ibm.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 074fe395fb13247b057f60004c7ebcca9f38ef46)
2026-03-24 13:55:29 -04:00
Yang Wang
28922a43fd drm/amd/pm: disable OD_FAN_CURVE if temp or pwm range invalid for smu v14
Forcibly disable the OD_FAN_CURVE feature when temperature or PWM range is invalid,
otherwise PMFW will reject this configuration on smu v14.0.2/14.0.3.

example:
$ sudo cat /sys/bus/pci/devices/<BDF>/gpu_od/fan_ctrl/fan_curve

OD_FAN_CURVE:
0: 0C 0%
1: 0C 0%
2: 0C 0%
3: 0C 0%
4: 0C 0%
OD_RANGE:
FAN_CURVE(hotspot temp): 0C 0C
FAN_CURVE(fan speed): 0% 0%

$ echo "0 50 40" | sudo tee fan_curve

kernel log:
[  969.761627] amdgpu 0000:03:00.0: amdgpu: Fan curve temp setting(50) must be within [0, 0]!
[ 1010.897800] amdgpu 0000:03:00.0: amdgpu: Fan curve temp setting(50) must be within [0, 0]!

Signed-off-by: Yang Wang <kevinyang.wang@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit ab4905d466b60f170d85e19ca2a5d2b159aeb780)
Cc: stable@vger.kernel.org
2026-03-24 13:54:42 -04:00
Srinivasan Shanmugam
429aec2bc0 drm/amdkfd: Fix NULL pointer check order in kfd_ioctl_create_process
In kfd_ioctl_create_process(), the pointer 'p' is used before checking
if it is NULL.

The code accesses p->context_id before validating 'p'. This can lead
to a possible NULL pointer dereference.

Move the NULL check before using 'p' so that the pointer is validated
before access.

Fixes the below:
drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_chardev.c:3177 kfd_ioctl_create_process() warn: variable dereferenced before check 'p' (see line 3174)

Fixes: cc6b66d661 ("amdkfd: introduce new ioctl AMDKFD_IOC_CREATE_PROCESS")
Cc: Zhu Lingshan <lingshan.zhu@amd.com>
Cc: Felix Kuehling <felix.kuehling@amd.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Dan Carpenter <dan.carpenter@linaro.org>
Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 19d4149b22f57094bfc4b86b742381b3ca394ead)
2026-03-24 13:54:19 -04:00
Alex Deucher
9da4f9964a drm/amd/display: check if ext_caps is valid in BL setup
LVDS connectors don't have extended backlight caps so check
if the pointer is valid before accessing it.

Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/5012
Fixes: 1454642960 ("drm/amd: Re-introduce property to control adaptive backlight modulation")
Cc: Mario Limonciello <mario.limonciello@amd.com>
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 3f797396d7f4eb9bb6eded184bbc6f033628a6f6)
Cc: stable@vger.kernel.org
2026-03-24 13:53:46 -04:00
Srinivasan Shanmugam
7150850146 drm/amdgpu: Fix fence put before wait in amdgpu_amdkfd_submit_ib
amdgpu_amdkfd_submit_ib() submits a GPU job and gets a fence
from amdgpu_ib_schedule(). This fence is used to wait for job
completion.

Currently, the code drops the fence reference using dma_fence_put()
before calling dma_fence_wait().

If dma_fence_put() releases the last reference, the fence may be
freed before dma_fence_wait() is called. This can lead to a
use-after-free.

Fix this by waiting on the fence first and releasing the reference
only after dma_fence_wait() completes.

Fixes the below:
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c:697 amdgpu_amdkfd_submit_ib() warn: passing freed memory 'f' (line 696)

Fixes: 9ae55f030d ("drm/amdgpu: Follow up change to previous drm scheduler change.")
Cc: Felix Kuehling <Felix.Kuehling@amd.com>
Cc: Dan Carpenter <dan.carpenter@linaro.org>
Cc: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 8b9e5259adc385b61a6590a13b82ae0ac2bd3482)
2026-03-24 13:53:33 -04:00
Weiming Shi
f6484cadbc ACPI: EC: clean up handlers on probe failure in acpi_ec_setup()
When ec_install_handlers() returns -EPROBE_DEFER on reduced-hardware
platforms, it has already started the EC and installed the address
space handler with the struct acpi_ec pointer as handler context.
However, acpi_ec_setup() propagates the error without any cleanup.

The caller acpi_ec_add() then frees the struct acpi_ec for non-boot
instances, leaving a dangling handler context in ACPICA.

Any subsequent AML evaluation that accesses an EC OpRegion field
dispatches into acpi_ec_space_handler() with the freed pointer,
causing a use-after-free:

 BUG: KASAN: slab-use-after-free in mutex_lock (kernel/locking/mutex.c:289)
 Write of size 8 at addr ffff88800721de38 by task init/1
 Call Trace:
  <TASK>
  mutex_lock (kernel/locking/mutex.c:289)
  acpi_ec_space_handler (drivers/acpi/ec.c:1362)
  acpi_ev_address_space_dispatch (drivers/acpi/acpica/evregion.c:293)
  acpi_ex_access_region (drivers/acpi/acpica/exfldio.c:246)
  acpi_ex_field_datum_io (drivers/acpi/acpica/exfldio.c:509)
  acpi_ex_extract_from_field (drivers/acpi/acpica/exfldio.c:700)
  acpi_ex_read_data_from_field (drivers/acpi/acpica/exfield.c:327)
  acpi_ex_resolve_node_to_value (drivers/acpi/acpica/exresolv.c:392)
  </TASK>

 Allocated by task 1:
  acpi_ec_alloc (drivers/acpi/ec.c:1424)
  acpi_ec_add (drivers/acpi/ec.c:1692)

 Freed by task 1:
  kfree (mm/slub.c:6876)
  acpi_ec_add (drivers/acpi/ec.c:1751)

The bug triggers on reduced-hardware EC platforms (ec->gpe < 0)
when the GPIO IRQ provider defers probing. Once the stale handler
exists, any unprivileged sysfs read that causes AML to touch an
EC OpRegion (battery, thermal, backlight) exercises the dangling
pointer.

Fix this by calling ec_remove_handlers() in the error path of
acpi_ec_setup() before clearing first_ec. ec_remove_handlers()
checks each EC_FLAGS_* bit before acting, so it is safe to call
regardless of how far ec_install_handlers() progressed:

  -ENODEV  (handler not installed): only calls acpi_ec_stop()
  -EPROBE_DEFER (handler installed): removes handler, stops EC

Fixes: 03e9a0e057 ("ACPI: EC: Consolidate event handler installation code")
Reported-by: Xiang Mei <xmei5@asu.edu>
Signed-off-by: Weiming Shi <bestswngs@gmail.com>
Link: https://patch.msgid.link/20260324165458.1337233-2-bestswngs@gmail.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2026-03-24 18:47:10 +01:00
Paolo Bonzini
52dad81e4b Merge tag 'kvmarm-fixes-7.0-4' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/arm64 fixes for 7.0, take #4

- Clear the pending exception state from a vcpu coming out of
  reset, as it could otherwise affect the first instruction
  executed in the guest.

- Fix the address translation emulation icode to set the Hardware
  Access bit on the correct PTE instead of some other location.
2026-03-24 17:32:30 +01:00
Paolo Bonzini
12fd965871 Merge tag 'kvm-s390-master-7.0-1' of git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into HEAD
KVM: s390: Fixes for 7.0

- fix deadlock in new memory management
- handle kernel faults on donated memory properly
- fix bounds checking for irq routing + selftest
- fix invalid machine checks + logging
2026-03-24 17:32:13 +01:00
Linus Torvalds
e3c33bc767 Merge tag 'mm-hotfixes-stable-2026-03-23-17-56' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM fixes from Andrew Morton:
 "6 hotfixes.  2 are cc:stable.  All are for MM.

  All are singletons - please see the changelogs for details"

* tag 'mm-hotfixes-stable-2026-03-23-17-56' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm:
  mm/damon/stat: monitor all System RAM resources
  mm/zswap: add missing kunmap_local()
  mailmap: update email address for Muhammad Usama Anjum
  zram: do not slot_free() written-back slots
  mm/damon/core: avoid use of half-online-committed context
  mm/rmap: clear vma->anon_vma on error
2026-03-24 09:12:45 -07:00
Gao Xiang
938c418422 erofs: update the Kconfig description
Refine the description to better highlight its features and use cases.

In addition, add instructions for building it as a module and clarify
the compression option.

Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
2026-03-25 00:04:41 +08:00
Linus Torvalds
26a01984dd Merge tag 'perf-tools-fixes-for-v7.0-2-2026-03-23' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools
Pull perf tools fixes from Arnaldo Carvalho de Melo:

 - Fix parsing 'overwrite' in command line event definitions in
   big-endian machines by writing correct union member

 - Fix finding default metric in 'perf stat'

 - Fix relative paths for including headers in 'perf kvm stat'

 - Sync header copies with the kernel sources: msr-index.h, kvm,
   build_bug.h

* tag 'perf-tools-fixes-for-v7.0-2-2026-03-23' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools:
  tools headers: Synchronize linux/build_bug.h with the kernel sources
  tools headers UAPI: Sync x86's asm/kvm.h with the kernel sources
  tools headers UAPI: Sync linux/kvm.h with the kernel sources
  tools arch x86: Sync the msr-index.h copy with the kernel sources
  perf kvm stat: Fix relative paths for including headers
  perf parse-events: Fix big-endian 'overwrite' by writing correct union member
  perf metricgroup: Fix metricgroup__has_metric_or_groups()
  tools headers: Skip arm64 cputype.h check
2026-03-24 08:58:38 -07:00
Linus Torvalds
97a48d1aab Merge tag 'media/v7.0-5' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media
Pull media fixes from Mauro Carvalho Chehab:

 - rkvdec: fix stack usage with clang and improve handling missing
   short/long term RPS

 - synopsys: fix a Kconfig issue and an out-of-bounds check

 - verisilicon: Fix kernel panic due to __initconst misuse

 - media core: serialize REINIT and REQBUFS with req_queue_mutex

* tag 'media/v7.0-5' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media:
  media: verisilicon: Fix kernel panic due to __initconst misuse
  media: rkvdec: reduce stack usage in rkvdec_init_v4l2_vp9_count_tbl()
  media: rkvdec: reduce excessive stack usage in assemble_hw_pps()
  media: rkvdec: Improve handling missing short/long term RPS
  media: mc, v4l2: serialize REINIT and REQBUFS with req_queue_mutex
  media: synopsys: csi2rx: add missing kconfig dependency
  media: synopsys: csi2rx: fix out-of-bounds check for formats array
2026-03-24 08:56:36 -07:00
Amir Goldstein
53a7c171e9 ovl: fix wrong detection of 32bit inode numbers
The implicit FILEID_INO32_GEN encoder was changed to be explicit,
so we need to fix the detection.

When mounting overlayfs with upperdir and lowerdir on different ext4
filesystems, the expected kmsg log is:

  overlayfs: "xino" feature enabled using 32 upper inode bits.

But instead, since the regressing commit, the kmsg log was:

  overlayfs: "xino" feature enabled using 2 upper inode bits.

Fixes: e21fc2038c ("exportfs: make ->encode_fh() a mandatory method for NFS export")
Cc: stable@vger.kernel.org # v6.7+
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
2026-03-24 16:17:26 +01:00
Alexey Velichayshiy
744fabc338 wifi: iwlwifi: mvm: fix potential out-of-bounds read in iwl_mvm_nd_match_info_handler()
The memcpy function assumes the dynamic array notif->matches is at least
as large as the number of bytes to copy. Otherwise, results->matches may
contain unwanted data. To guarantee safety, extend the validation in one
of the checks to ensure sufficient packet length.

Found by Linux Verification Center (linuxtesting.org) with SVACE.

Cc: stable@vger.kernel.org
Fixes: 5ac54afd4d ("wifi: iwlwifi: mvm: Add handling for scan offload match info notification")
Signed-off-by: Alexey Velichayshiy <a.velichayshiy@ispras.ru>
Link: https://patch.msgid.link/20260207150335.1013646-1-a.velichayshiy@ispras.ru
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2026-03-24 16:03:48 +01:00
Danilo Krummrich
cc34d77dd4 spi: use generic driver_override infrastructure
When a driver is probed through __driver_attach(), the bus' match()
callback is called without the device lock held, thus accessing the
driver_override field without a lock, which can cause a UAF.

Fix this by using the driver-core driver_override infrastructure taking
care of proper locking internally.

Note that calling match() from __driver_attach() without the device lock
held is intentional. [1]

Also note that we do not enable the driver_override feature of struct
bus_type, as SPI - in contrast to most other buses - passes "" to
sysfs_emit() when the driver_override pointer is NULL. Thus, printing
"\n" instead of "(null)\n".

Link: https://lore.kernel.org/driver-core/DGRGTIRHA62X.3RY09D9SOK77P@kernel.org/ [1]
Reported-by: Gui-Dong Han <hanguidong02@gmail.com>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220789
Fixes: 5039563e7c ("spi: Add driver_override SPI device attribute")
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
Link: https://patch.msgid.link/20260324005919.2408620-12-dakr@kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-24 15:00:08 +00:00
Sanman Pradhan
b0c9d8ae71 hwmon: (peci/cputemp) Fix off-by-one in cputemp_is_visible()
cputemp_is_visible() validates the channel index against
CPUTEMP_CHANNEL_NUMS, but currently uses '>' instead of '>='.
As a result, channel == CPUTEMP_CHANNEL_NUMS is not rejected even though
valid indices are 0 .. CPUTEMP_CHANNEL_NUMS - 1.

Fix the bounds check by using '>=' so invalid channel indices are
rejected before indexing the core bitmap.

Fixes: bf3608f338 ("hwmon: peci: Add cputemp driver")
Cc: stable@vger.kernel.org
Signed-off-by: Sanman Pradhan <psanman@juniper.net>
Link: https://lore.kernel.org/r/20260323002352.93417-3-sanman.pradhan@hpe.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
2026-03-24 07:55:34 -07:00
Sanman Pradhan
0adc752b4f hwmon: (peci/cputemp) Fix crit_hyst returning delta instead of absolute temperature
The hwmon sysfs ABI expects tempN_crit_hyst to report the temperature at
which the critical condition clears, not the hysteresis delta from the
critical limit.

The peci cputemp driver currently returns tjmax - tcontrol for
crit_hyst_type, which is the hysteresis margin rather than the
corresponding absolute temperature.

Return tcontrol directly, and update the documentation accordingly.

Fixes: bf3608f338 ("hwmon: peci: Add cputemp driver")
Cc: stable@vger.kernel.org
Signed-off-by: Sanman Pradhan <psanman@juniper.net>
Link: https://lore.kernel.org/r/20260323002352.93417-2-sanman.pradhan@hpe.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
2026-03-24 07:55:34 -07:00
Sanman Pradhan
3075a3951f hwmon: (pmbus/isl68137) Add mutex protection for AVS enable sysfs attributes
The custom avs0_enable and avs1_enable sysfs attributes access PMBus
registers through the exported API helpers (pmbus_read_byte_data,
pmbus_read_word_data, pmbus_write_word_data, pmbus_update_byte_data)
without holding the PMBus update_lock mutex. These exported helpers do
not acquire the mutex internally, unlike the core's internal callers
which hold the lock before invoking them.

The store callback is especially vulnerable: it performs a multi-step
read-modify-write sequence (read VOUT_COMMAND, write VOUT_COMMAND, then
update OPERATION) where concurrent access from another thread could
interleave and corrupt the register state.

Add pmbus_lock_interruptible()/pmbus_unlock() around both the show and
store callbacks to serialize PMBus register access with the rest of the
driver.

Fixes: 038a9c3d1e ("hwmon: (pmbus/isl68137) Add driver for Intersil ISL68137 PWM Controller")
Cc: stable@vger.kernel.org
Signed-off-by: Sanman Pradhan <psanman@juniper.net>
Link: https://lore.kernel.org/r/20260319173055.125271-3-sanman.pradhan@hpe.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
2026-03-24 07:55:34 -07:00
Sanman Pradhan
f7e775c469 hwmon: (pmbus/ina233) Fix error handling and sign extension in shunt voltage read
ina233_read_word_data() reads MFR_READ_VSHUNT via pmbus_read_word_data()
but has two issues:

1. The return value is not checked for errors before being used in
   arithmetic. A negative error code from a failed I2C transaction is
   passed directly to DIV_ROUND_CLOSEST(), producing garbage data.

2. MFR_READ_VSHUNT is a 16-bit two's complement value. Negative shunt
   voltages (values with bit 15 set) are treated as large positive
   values since pmbus_read_word_data() returns them zero-extended in an
   int. This leads to incorrect scaling in the VIN coefficient
   conversion.

Fix both issues by adding an error check, casting to s16 for proper
sign extension, and clamping the result to a valid non-negative range.
The clamp is necessary because read_word_data callbacks must return
non-negative values on success (negative values indicate errors to the
pmbus core).

Fixes: b64b6cb163 ("hwmon: Add driver for TI INA233 Current and Power Monitor")
Cc: stable@vger.kernel.org
Signed-off-by: Sanman Pradhan <psanman@juniper.net>
Link: https://lore.kernel.org/r/20260319173055.125271-2-sanman.pradhan@hpe.com
[groeck: Fixed clamp to avoid losing the sign bit]
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
2026-03-24 07:55:08 -07:00
Johannes Berg
6525a549ec Merge tag 'ath-current-20260324' of git://git.kernel.org/pub/scm/linux/kernel/git/ath/ath
Jeff Johnson says:
==================
ath.git update for v7.0-rc6

For both ath11k and ath12k use the correct TID when stopping an AMPDU
session.
==================

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2026-03-24 15:41:09 +01:00
Johannes Berg
249ddd5fe2 Merge tag 'iwlwifi-fixes-2026-03-24' of https://git.kernel.org/pub/scm/linux/kernel/git/iwlwifi/iwlwifi-next
Miri Korenblit says:
====================
wifi: iwlwifi: fixes - 2026-03-24

- Fix MLO scan timing (record the scan start in FW)
- don't send a 6E related command when not supported
- correctly set wifi generation data
====================

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2026-03-24 15:40:34 +01:00
Pengpeng Hou
0fd56fad9c wifi: wl1251: validate packet IDs before indexing tx_frames
wl1251_tx_packet_cb() uses the firmware completion ID directly to index
the fixed 16-entry wl->tx_frames[] array. The ID is a raw u8 from the
completion block, and the callback does not currently verify that it
fits the array before dereferencing it.

Reject completion IDs that fall outside wl->tx_frames[] and keep the
existing NULL check in the same guard. This keeps the fix local to the
trust boundary and avoids touching the rest of the completion flow.

Signed-off-by: Pengpeng Hou <pengpeng@iscas.ac.cn>
Link: https://patch.msgid.link/20260323080845.40033-1-pengpeng@iscas.ac.cn
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2026-03-24 15:32:31 +01:00
Yasuaki Torimaru
d049e56b17 wifi: wilc1000: fix u8 overflow in SSID scan buffer size calculation
The variable valuesize is declared as u8 but accumulates the total
length of all SSIDs to scan. Each SSID contributes up to 33 bytes
(IEEE80211_MAX_SSID_LEN + 1), and with WILC_MAX_NUM_PROBED_SSID (10)
SSIDs the total can reach 330, which wraps around to 74 when stored
in a u8.

This causes kmalloc to allocate only 75 bytes while the subsequent
memcpy writes up to 331 bytes into the buffer, resulting in a 256-byte
heap buffer overflow.

Widen valuesize from u8 to u32 to accommodate the full range.

Fixes: c5c77ba18e ("staging: wilc1000: Add SDIO/SPI 802.11 driver")
Cc: stable@vger.kernel.org
Signed-off-by: Yasuaki Torimaru <yasuakitorimaru@gmail.com>
Link: https://patch.msgid.link/20260324100624.983458-1-yasuakitorimaru@gmail.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2026-03-24 15:32:14 +01:00
Paolo Abeni
51a209ee33 Merge tag 'ipsec-2026-03-23' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec
Steffen Klassert says:

====================
pull request (net): ipsec 2026-03-23

1) Add missing extack for XFRMA_SA_PCPU in add_acquire and allocspi.
   From Sabrina Dubroca.

2) Fix the condition on x->pcpu_num in xfrm_sa_len by using the
   proper check. From Sabrina Dubroca.

3) Call xdo_dev_state_delete during state update to properly cleanup
   the xdo device state. From Sabrina Dubroca.

4) Fix a potential skb leak in espintcp when async crypto is used.
   From Sabrina Dubroca.

5) Validate inner IPv4 header length in IPTFS payload to avoid
   parsing malformed packets. From Roshan Kumar.

6) Fix skb_put() panic on non-linear skb during IPTFS reassembly.
   From Fernando Fernandez Mancera.

7) Silence various sparse warnings related to RCU, state, and policy
   handling. From Sabrina Dubroca.

8) Fix work re-schedule race after cancel in xfrm_nat_keepalive_net_fini().
   From Hyunwoo Kim.

9) Prevent policy_hthresh.work from racing with netns teardown by using
   a proper cleanup mechanism. From Minwoo Ra.

10) Validate that the family of the source and destination addresses match
    in pfkey_send_migrate(). From Eric Dumazet.

11) Only publish mode_data after the clone is setup in the IPTFS receive path.
    This prevents leaving x->mode_data pointing at freed memory on error.
    From Paul Moses.

Please pull or let me know if there are problems.

ipsec-2026-03-23

* tag 'ipsec-2026-03-23' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec:
  xfrm: iptfs: only publish mode_data after clone setup
  af_key: validate families in pfkey_send_migrate()
  xfrm: prevent policy_hthresh.work from racing with netns teardown
  xfrm: Fix work re-schedule after cancel in xfrm_nat_keepalive_net_fini()
  xfrm: avoid RCU warnings around the per-netns netlink socket
  xfrm: add rcu_access_pointer to silence sparse warning for xfrm_input_afinfo
  xfrm: policy: silence sparse warning in xfrm_policy_unregister_afinfo
  xfrm: policy: fix sparse warnings in xfrm_policy_{init,fini}
  xfrm: state: silence sparse warnings during netns exit
  xfrm: remove rcu/state_hold from xfrm_state_lookup_spi_proto
  xfrm: state: add xfrm_state_deref_prot to state_by* walk under lock
  xfrm: state: fix sparse warnings around XFRM_STATE_INSERT
  xfrm: state: fix sparse warnings in xfrm_state_init
  xfrm: state: fix sparse warnings on xfrm_state_hold_rcu
  xfrm: iptfs: fix skb_put() panic on non-linear skb during reassembly
  xfrm: iptfs: validate inner IPv4 header length in IPTFS payload
  esp: fix skb leak with espintcp and async crypto
  xfrm: call xdo_dev_state_delete during state update
  xfrm: fix the condition on x->pcpu_num in xfrm_sa_len
  xfrm: add missing extack for XFRMA_SA_PCPU in add_acquire and allocspi
====================

Link: https://patch.msgid.link/20260323083440.2741292-1-steffen.klassert@secunet.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-24 15:16:28 +01:00
Reshma Immaculate Rajkumar
4242625f27 wifi: ath12k: Pass the correct value of each TID during a stop AMPDU session
With traffic ongoing for data TID [TID 0], an DELBA request to
stop AMPDU for the BA session was received on management TID [TID 4].
The corresponding TID number was incorrectly passed to stop the BA session,
resulting in the BA session for data TIDs being stopped and the BA size
being reduced to 1, causing an overall dip in TCP throughput.

Fix this issue by passing the correct argument from
ath12k_dp_rx_ampdu_stop() to ath12k_dp_arch_peer_rx_tid_reo_update()
during an AMPDU stop session. Instead of passing peer->dp_peer->rx_tid,
which is the base address of the array, corresponding to TID 0, pass
the value of &peer->dp_peer->rx_tid[params->tid]. With this, the
different TID numbers are accounted for.

Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.5-01651-QCAHKSWPL_SILICONZ-1

Fixes: d889913205 ("wifi: ath12k: driver for Qualcomm Wi-Fi 7 devices")
Signed-off-by: Reshma Immaculate Rajkumar <reshma.rajkumar@oss.qualcomm.com>
Reviewed-by: Baochen Qiang <baochen.qiang@oss.qualcomm.com>
Reviewed-by: Vasanthakumar Thiagarajan <vasanthakumar.thiagarajan@oss.qualcomm.com>
Link: https://patch.msgid.link/20260227110123.3726354-1-reshma.rajkumar@oss.qualcomm.com
Signed-off-by: Jeff Johnson <jeff.johnson@oss.qualcomm.com>
2026-03-24 06:44:00 -07:00
Reshma Immaculate Rajkumar
e225b36f83 wifi: ath11k: Pass the correct value of each TID during a stop AMPDU session
During ongoing traffic, a request to stop an AMPDU session
for one TID could incorrectly affect other active sessions.
This can happen because an incorrect TID reference would be
passed when updating the BA session state, causing the wrong
session to be stopped. As a result, the affected session would
be reduced to a minimal BA size, leading to a noticeable
throughput degradation.

Fix this issue by passing the correct argument from
ath11k_dp_rx_ampdu_stop() to ath11k_peer_rx_tid_reo_update()
during a stop AMPDU session. Instead of passing peer->tx_tid, which
is the base address of the array, corresponding to TID 0; pass
the value of &peer->rx_tid[params->tid], where the different TID numbers
are accounted for.

Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.9.0.1-02146-QCAHKSWPL_SILICONZ-1

Fixes: d5c65159f2 ("ath11k: driver for Qualcomm IEEE 802.11ax devices")
Signed-off-by: Reshma Immaculate Rajkumar <reshma.rajkumar@oss.qualcomm.com>
Reviewed-by: Baochen Qiang <baochen.qiang@oss.qualcomm.com>
Reviewed-by: Vasanthakumar Thiagarajan <vasanthakumar.thiagarajan@oss.qualcomm.com>
Link: https://patch.msgid.link/20260319065608.2408179-1-reshma.rajkumar@oss.qualcomm.com
Signed-off-by: Jeff Johnson <jeff.johnson@oss.qualcomm.com>
2026-03-24 06:44:00 -07:00
Matt Roper
56781a4597 drm/xe: Implement recent spec updates to Wa_16025250150
The hardware teams noticed that the originally documented workaround
steps for Wa_16025250150 may not be sufficient to fully avoid a hardware
issue.  The workaround documentation has been augmented to suggest
programming one additional register; make the corresponding change in
the driver.

Fixes: 7654d51f1f ("drm/xe/xe2hpg: Add Wa_16025250150")
Reviewed-by: Matt Atwood <matthew.s.atwood@intel.com>
Link: https://patch.msgid.link/20260319-wa_16025250150_part2-v1-1-46b1de1a31b2@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
(cherry picked from commit a31566762d4075646a8a2214586158b681e94305)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
2026-03-24 09:29:10 -04:00
Srinivas Kandagatla
cfb385a8dc ASoC: codecs: wcd934x: fix typo in dt parsing
Looks like we ended up with a typo during device tree data parsing
as part of 4f16b6351b ("ASoC: codecs: wcd: add common helper for wcd
codecs") patch.
 This will result in not parsing the device tree data and results in
zero mic bias values.

Fix this by calling wcd_dt_parse_micbias_info instead of
wcd_dt_parse_mbhc_data.

Fixes: 4f16b6351b ("ASoC: codecs: wcd: add common helper for wcd codecs")
Cc: Stable@vger.kernel.org
Reported-by: Joel Selvaraj <foss@joelselvaraj.com>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@oss.qualcomm.com>
Reviewed-by: Konrad Dybcio <konrad.dybcio@oss.qualcomm.com>
Link: https://patch.msgid.link/20260323231748.2217967-1-srinivas.kandagatla@oss.qualcomm.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-24 13:09:09 +00:00
Alice Ryhl
8121353a4b rust: regulator: do not assume that regulator_get() returns non-null
The Rust `Regulator` abstraction uses `NonNull` to wrap the underlying
`struct regulator` pointer. When `CONFIG_REGULATOR` is disabled, the C
stub for `regulator_get` returns `NULL`. `from_err_ptr` does not treat
`NULL` as an error, so it was passed to `NonNull::new_unchecked`,
causing undefined behavior.

Fix this by using a raw pointer `*mut bindings::regulator` instead of
`NonNull`. This allows `inner` to be `NULL` when `CONFIG_REGULATOR` is
disabled, and leverages the C stubs which are designed to handle `NULL`
or are no-ops.

Fixes: 9b614ceada ("rust: regulator: add a bare minimum regulator abstraction")
Reported-by: Miguel Ojeda <ojeda@kernel.org>
Closes: https://lore.kernel.org/r/20260322193830.89324-1-ojeda@kernel.org
Signed-off-by: Alice Ryhl <aliceryhl@google.com>
Reviewed-by: Daniel Almeida <daniel.almeida@collabora.com>
Link: https://patch.msgid.link/20260324-regulator-fix-v1-1-a5244afa3c15@google.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-24 13:08:21 +00:00
Jihed Chaibi
91049ec2e1 ASoC: dt-bindings: stm32: Fix incorrect compatible string in stm32h7-sai match
The conditional block that defines clock constraints for the stm32h7-sai
variant references "st,stm32mph7-sai", which does not match any compatible
string in the enum. As a result, clock validation for the h7 variant is
silently skipped. Correct the compatible string to "st,stm32h7-sai".

Fixes: 8509bb1f11 ("ASoC: dt-bindings: add stm32mp25 support for sai")
Signed-off-by: Jihed Chaibi <jihed.chaibi.dev@gmail.com>
Reviewed-by: Olivier Moysan <olivier.moysan@foss.st.com>
Link: https://patch.msgid.link/20260321012011.125791-1-jihed.chaibi.dev@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-24 12:58:35 +00:00
Kevin Hao
647b8a2fe4 net: macb: Use dev_consume_skb_any() to free TX SKBs
The napi_consume_skb() function is not intended to be called in an IRQ
disabled context. However, after commit 6bc8a5098b ("net: macb: Fix
tx_ptr_lock locking"), the freeing of TX SKBs is performed with IRQs
disabled. To resolve the following call trace, use dev_consume_skb_any()
for freeing TX SKBs:
   WARNING: kernel/softirq.c:430 at __local_bh_enable_ip+0x174/0x188, CPU#0: ksoftirqd/0/15
   Modules linked in:
   CPU: 0 UID: 0 PID: 15 Comm: ksoftirqd/0 Not tainted 7.0.0-rc4-next-20260319-yocto-standard-dirty #37 PREEMPT
   Hardware name: ZynqMP ZCU102 Rev1.1 (DT)
   pstate: 200000c5 (nzCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
   pc : __local_bh_enable_ip+0x174/0x188
   lr : local_bh_enable+0x24/0x38
   sp : ffff800082b3bb10
   x29: ffff800082b3bb10 x28: ffff0008031f3c00 x27: 000000000011ede0
   x26: ffff000800a7ff00 x25: ffff800083937ce8 x24: 0000000000017a80
   x23: ffff000803243a78 x22: 0000000000000040 x21: 0000000000000000
   x20: ffff000800394c80 x19: 0000000000000200 x18: 0000000000000001
   x17: 0000000000000001 x16: ffff000803240000 x15: 0000000000000000
   x14: ffffffffffffffff x13: 0000000000000028 x12: ffff000800395650
   x11: ffff8000821d1528 x10: ffff800081c2bc08 x9 : ffff800081c1e258
   x8 : 0000000100000301 x7 : ffff8000810426ec x6 : 0000000000000000
   x5 : 0000000000000001 x4 : 0000000000000001 x3 : 0000000000000000
   x2 : 0000000000000008 x1 : 0000000000000200 x0 : ffff8000810428dc
   Call trace:
    __local_bh_enable_ip+0x174/0x188 (P)
    local_bh_enable+0x24/0x38
    skb_attempt_defer_free+0x190/0x1d8
    napi_consume_skb+0x58/0x108
    macb_tx_poll+0x1a4/0x558
    __napi_poll+0x50/0x198
    net_rx_action+0x1f4/0x3d8
    handle_softirqs+0x16c/0x560
    run_ksoftirqd+0x44/0x80
    smpboot_thread_fn+0x1d8/0x338
    kthread+0x120/0x150
    ret_from_fork+0x10/0x20
   irq event stamp: 29751
   hardirqs last  enabled at (29750): [<ffff8000813be184>] _raw_spin_unlock_irqrestore+0x44/0x88
   hardirqs last disabled at (29751): [<ffff8000813bdf60>] _raw_spin_lock_irqsave+0x38/0x98
   softirqs last  enabled at (29150): [<ffff8000800f1aec>] handle_softirqs+0x504/0x560
   softirqs last disabled at (29153): [<ffff8000800f2fec>] run_ksoftirqd+0x44/0x80

Fixes: 6bc8a5098b ("net: macb: Fix tx_ptr_lock locking")
Signed-off-by: Kevin Hao <haokexin@gmail.com>
Cc: stable@vger.kernel.org
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20260321-macb-tx-v1-1-b383a58dd4e6@gmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-24 13:24:40 +01:00
Johannes Berg
687a95d204 wifi: iwlwifi: mld: correctly set wifi generation data
In each MAC context, the firmware expects the wifi generation
data, i.e. whether or not HE/EHT (and in the future UHR) is
enabled on that MAC.

However, this is currently handled wrong in two ways:
 - EHT is only enabled when the interface is also an MLD, but
   we currently allow (despite the spec) connecting with EHT
   but without MLO.
 - when HE or EHT are used by TDLS peers, the firmware needs
   to have them enabled regardless of the AP

Fix this by iterating setting up the data depending on the
interface type:
 - for AP, just set it according to the BSS configuration
 - for monitor, set it according to HW capabilities
 - otherwise, particularly for client, iterate all stations
   and then their links on the interface in question and set
   according to their capabilities, this handles the AP and
   TDLS peers. Re-calculate this whenever a TDLS station is
   marked associated or removed so that it's kept updated,
   for the AP it's already updated on assoc/disassoc.

Fixes: d1e879ec60 ("wifi: iwlwifi: add iwlmld sub-driver")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://patch.msgid.link/20260319110722.404713b22177.Ic972b5e557d011a5438f8f97c1e793cc829e2ea9@changeid
Link: https://patch.msgid.link/20260324093333.2953495-1-miriam.rachel.korenblit@intel.com
2026-03-24 13:55:53 +02:00
Emmanuel Grumbach
323156c354 wifi: iwlwifi: mvm: don't send a 6E related command when not supported
MCC_ALLOWED_AP_TYPE_CMD is related to 6E support. Do not send it if the
device doesn't support 6E.
Apparently, the firmware is mistakenly advertising support for this
command even on AX201 which does not support 6E and then the firmware
crashes.

Fixes: 0d2fc8821a ("wifi: iwlwifi: nvm: parse the VLP/AFC bit from regulatory")
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220804
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Reviewed-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://patch.msgid.link/20260324113316.e171f0163f2a.I0c444d1f82d1773054e7ffc391ad49697d58f44e@changeid
2026-03-24 13:55:53 +02:00
Pagadala Yesu Anjaneyulu
ec66ec6a5a wifi: iwlwifi: mld: Fix MLO scan timing
Calculate MLO scan start time based on actual
scan start notification from firmware instead of recording
time when scan command is sent.

Currently, MLO scan start time was captured immediately
after sending the scan command to firmware. However, the
actual scan start time may differ due to the FW being busy
with a previous scan.

In that case, the link selection code will think that the MLO
scan is too old, and will warn.

To fix it, Implement start scan notification handling to
capture the precise moment when firmware begins the scan
operation.

Fixes: 9324731b99 ("wifi: iwlwifi: mld: avoid selecting bad links")
Signed-off-by: Pagadala Yesu Anjaneyulu <pagadala.yesu.anjaneyulu@intel.com>
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://patch.msgid.link/20260324113316.4c56b8bac533.I6e656d8cc30bb82c96aabadedd62bd67f4c46bf9@changeid
2026-03-24 13:55:53 +02:00
Willem de Bruijn
c4336a07eb net: correctly handle tunneled traffic on IPV6_CSUM GSO fallback
NETIF_F_IPV6_CSUM only advertises support for checksum offload of
packets without IPv6 extension headers. Packets with extension
headers must fall back onto software checksumming. Since TSO
depends on checksum offload, those must revert to GSO.

The below commit introduces that fallback. It always checks
network header length. For tunneled packets, the inner header length
must be checked instead. Extend the check accordingly.

A special case is tunneled packets without inner IP protocol. Such as
RFC 6951 SCTP in UDP. Those are not standard IPv6 followed by
transport header either, so also must revert to the software GSO path.

Cc: stable@vger.kernel.org
Fixes: 864e339697 ("net: gso: Forbid IPv6 TSO with extensions on devices with only IPV6_CSUM")
Reported-by: Tangxin Xie <xietangxin@yeah.net>
Closes: https://lore.kernel.org/netdev/0414e7e2-9a1c-4d7c-a99d-b9039cf68f40@yeah.net/
Suggested-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Link: https://patch.msgid.link/20260320190148.2409107-1-willemdebruijn.kernel@gmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-24 12:35:19 +01:00
Paolo Abeni
d9c2a509c9 Merge tag 'linux-can-fixes-for-7.0-20260323' of git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can
Marc Kleine-Budde says:

====================
pull-request: can 2026-03-23

this is a pull request of 5 patches for net/main.

The first patch is by me and adds missing error handling to the CAN
netlink device configuration code.

Wenyuan Li contributes a patch for the mcp251x drier to add missing
error handling for power enabling in th open and resume functions.

Oliver Hartkopp's patch adds missing atomic access in hot path for the
CAN procfs statistics.

A series by Ali Norouzi and Oliver Hartkopp fix a can-Out-of-Bounds
Heap R/W in the can-gw protocol and a UAF in the CAN isotp protocol.

linux-can-fixes-for-7.0-20260323

* tag 'linux-can-fixes-for-7.0-20260323' of git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can:
  can: isotp: fix tx.buf use-after-free in isotp_sendmsg()
  can: gw: fix OOB heap access in cgw_csum_crc8_rel()
  can: statistics: add missing atomic access in hot path
  can: mcp251x: add error handling for power enable in open and resume
  can: netlink: can_changelink(): add missing error handling to call can_ctrlmode_changelink()
====================

Link: https://patch.msgid.link/20260323103224.218099-1-mkl@pengutronix.de
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-24 12:22:52 +01:00
David Carlier
eb8c426c98 net: ti: icssg-prueth: fix use-after-free of CPPI descriptor in RX path
cppi5_hdesc_get_psdata() returns a pointer into the CPPI descriptor.
In both emac_rx_packet() and emac_rx_packet_zc(), the descriptor is
freed via k3_cppi_desc_pool_free() before the psdata pointer is used
by emac_rx_timestamp(), which dereferences psdata[0] and psdata[1].
This constitutes a use-after-free on every received packet that goes
through the timestamp path.

Defer the descriptor free until after all accesses through the psdata
pointer are complete. For emac_rx_packet(), move the free into the
requeue label so both early-exit and success paths free the descriptor
after all accesses are done. For emac_rx_packet_zc(), move the free to
the end of the loop body after emac_dispatch_skb_zc() (which calls
emac_rx_timestamp()) has returned.

Fixes: 46eeb90f03 ("net: ti: icssg-prueth: Use page_pool API for RX buffer allocation")
Signed-off-by: David Carlier <devnexen@gmail.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20260320174439.41080-1-devnexen@gmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-24 12:19:48 +01:00
Srinivas Pandruvada
fbddf68d7b platform/x86: ISST: Correct locked bit width
SST-PP locked bit width is set to three bits. It should be only one bit.
Use SST_PP_LOCK_WIDTH define instead of SST_PP_LEVEL_WIDTH.

Fixes: ea009e4769 ("platform/x86: ISST: Add SST-PP support via TPMI")
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: stable@vger.kernel.org
Link: https://patch.msgid.link/20260323153635.3263828-1-srinivas.pandruvada@linux.intel.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2026-03-24 12:31:10 +02:00
Paolo Abeni
25f5463c91 Merge branch 'team-fix-header_ops-type-confusion-and-add-selftest'
Jiayuan Chen says:

====================
team: fix header_ops type confusion and add selftest

Hi,

This patch series fixes a panic reported by syzkaller in the team/bond/gre
stacked non-Ethernet configuration:
https://syzkaller.appspot.com/bug?extid=3d8bc31c45e11450f24c

The first patch fixes the header_ops type confusion / parse recursion
context issue in team. The second patch adds a selftest to reproduce the
reported scenario and prevent regressions in the future.

v1: https://lore.kernel.org/netdev/20260314062306.212765-1-jiayuan.chen@linux.dev/
v2: https://lore.kernel.org/netdev/20260317124606.157035-1-jiayuan.chen@linux.dev/
====================

Link: https://patch.msgid.link/20260320072139.134249-1-jiayuan.chen@linux.dev
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-24 11:26:34 +01:00
Jiayuan Chen
56063823b9 selftests: team: add non-Ethernet header_ops reproducer
Add a team selftest that sets up:
  g0 (gre) -> b0 (bond) -> t0 (team)

and triggers IPv6 traffic on t0. This reproduces the non-Ethernet
header_ops confusion scenario and protects against regressions in stacked
team/bond/gre configurations.

Using this script, the panic reported by syzkaller can be reproduced [1].

After the fix:

  # ./non_ether_header_ops.sh
  PASS: non-Ethernet header_ops stacking did not crash

[1] https://syzkaller.appspot.com/bug?extid=3d8bc31c45e11450f24c

Cc: Jiayuan Chen <jiayuan.chen@linux.dev>
Signed-off-by: Jiayuan Chen <jiayuan.chen@shopee.com>
Link: https://patch.msgid.link/20260320072139.134249-3-jiayuan.chen@linux.dev
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-24 11:26:32 +01:00
Jiayuan Chen
425000dbf1 team: fix header_ops type confusion with non-Ethernet ports
Similar to commit 950803f725 ("bonding: fix type confusion in
bond_setup_by_slave()") team has the same class of header_ops type
confusion.

For non-Ethernet ports, team_setup_by_port() copies port_dev->header_ops
directly. When the team device later calls dev_hard_header() or
dev_parse_header(), these callbacks can run with the team net_device
instead of the real lower device, so netdev_priv(dev) is interpreted as
the wrong private type and can crash.

The syzbot report shows a crash in bond_header_create(), but the root
cause is in team: the topology is gre -> bond -> team, and team calls
the inherited header_ops with its own net_device instead of the lower
device, so bond_header_create() receives a team device and interprets
netdev_priv() as bonding private data, causing a type confusion crash.

Fix this by introducing team header_ops wrappers for create/parse,
selecting a team port under RCU, and calling the lower device callbacks
with port->dev, so each callback always sees the correct net_device
context.

Also pass the selected lower device to the lower parse callback, so
recursion is bounded in stacked non-Ethernet topologies and parse
callbacks always run with the correct device context.

Fixes: 1d76efe157 ("team: add support for non-ethernet devices")
Reported-by: syzbot+3d8bc31c45e11450f24c@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/69b46af7.050a0220.36eb34.000e.GAE@google.com/T/
Cc: Jiayuan Chen <jiayuan.chen@linux.dev>
Signed-off-by: Jiayuan Chen <jiayuan.chen@shopee.com>
Link: https://patch.msgid.link/20260320072139.134249-2-jiayuan.chen@linux.dev
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-24 11:26:32 +01:00
Paolo Abeni
673bb63d20 Merge branch 'virtio-net-fix-for-virtio_net_f_guest_hdrlen'
Xuan Zhuo says:

====================
virtio-net: fix for VIRTIO_NET_F_GUEST_HDRLEN

The commit be50da3e9d ("net: virtio_net: implement exact header length
guest feature") introduces support for the VIRTIO_NET_F_GUEST_HDRLEN
feature in virtio-net.

This feature requires virtio-net to set hdr_len to the actual header
length of the packet when transmitting, the number of
bytes from the start of the packet to the beginning of the
transport-layer payload.

However, in practice, hdr_len was being set using skb_headlen(skb),
which is clearly incorrect. This path set fixes that issue.

As discussed in [0], this version checks the VIRTIO_NET_F_GUEST_HDRLEN is
negotiated.

[0]: http://lore.kernel.org/all/20251029030913.20423-1-xuanzhuo@linux.alibaba.com

v10: fix http://lore.kernel.org/all/202603122214.8Anoxrmq-lkp@intel.com
====================

Link: https://patch.msgid.link/20260320021818.111741-1-xuanzhuo@linux.alibaba.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-24 11:12:11 +01:00
Xuan Zhuo
6c860dc02a virtio-net: correct hdr_len handling for tunnel gso
The commit a2fb4bc4e2 ("net: implement virtio helpers to handle UDP
GSO tunneling.") introduces support for the UDP GSO tunnel feature in
virtio-net.

The virtio spec says:

    If the \field{gso_type} has the VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV4 bit or
    VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6 bit set, \field{hdr_len} accounts for
    all the headers up to and including the inner transport.

The commit did not update the hdr_len to include the inner transport.

I observed that the "hdr_len" is 116 for this packet:

    17:36:18.241105 52:55:00:d1:27:0a > 2e:2c:df:46:a9:e1, ethertype IPv4 (0x0800), length 2912: (tos 0x0, ttl 64, id 45197, offset 0, flags [none], proto UDP (17), length 2898)
        192.168.122.100.50613 > 192.168.122.1.4789: [bad udp cksum 0x8106 -> 0x26a0!] VXLAN, flags [I] (0x08), vni 1
    fa:c3:ba:82:05:ee > ce:85:0c:31:77:e5, ethertype IPv4 (0x0800), length 2862: (tos 0x0, ttl 64, id 14678, offset 0, flags [DF], proto TCP (6), length 2848)
        192.168.3.1.49880 > 192.168.3.2.9898: Flags [P.], cksum 0x9266 (incorrect -> 0xaa20), seq 515667:518463, ack 1, win 64, options [nop,nop,TS val 2990048824 ecr 2798801412], length 2796

116 = 14(mac) + 20(ip) + 8(udp) + 8(vxlan) + 14(inner mac) + 20(inner ip) + 32(innner tcp)

Fixes: a2fb4bc4e2 ("net: implement virtio helpers to handle UDP GSO tunneling.")
Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Link: https://patch.msgid.link/20260320021818.111741-3-xuanzhuo@linux.alibaba.com
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-24 11:12:08 +01:00
Xuan Zhuo
38ec410b99 virtio-net: correct hdr_len handling for VIRTIO_NET_F_GUEST_HDRLEN
The commit be50da3e9d ("net: virtio_net: implement exact header length
guest feature") introduces support for the VIRTIO_NET_F_GUEST_HDRLEN
feature in virtio-net.

This feature requires virtio-net to set hdr_len to the actual header
length of the packet when transmitting, the number of
bytes from the start of the packet to the beginning of the
transport-layer payload.

However, in practice, hdr_len was being set using skb_headlen(skb),
which is clearly incorrect. This commit fixes that issue.

Fixes: be50da3e9d ("net: virtio_net: implement exact header length guest feature")
Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Link: https://patch.msgid.link/20260320021818.111741-2-xuanzhuo@linux.alibaba.com
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-24 11:12:07 +01:00
Karol Wachowski
e8ab57b564 accel/ivpu: Add disable clock relinquish workaround for NVL-A0
Turn on disable clock relinquish workaround for Nova Lake A0.
Without this workaround NPU may not power off correctly after
inference, leading to unexpected system behavior.

Fixes: 550f4dd2ce ("accel/ivpu: Add support for Nova Lake's NPU")
Cc: <stable@vger.kernel.org> # v6.19+

Reviewed-by: Lizhi.hou <lizhi.hou@amd.com>
Signed-off-by: Karol Wachowski <karol.wachowski@linux.intel.com>
Link: https://patch.msgid.link/20260323095029.64613-1-karol.wachowski@linux.intel.com
2026-03-24 09:29:28 +01:00
Darrick J. Wong
f621324dfb iomap: fix lockdep complaint when reads fail
Zorro Lang reported the following lockdep splat:

"While running fstests xfs/556 on kernel 7.0.0-rc4+ (HEAD=04a9f1766954),
a lockdep warning was triggered indicating an inconsistent lock state
for sb->s_type->i_lock_key.

"The deadlock might occur because iomap_read_end_io (called from a
hardware interrupt completion path) invokes fserror_report, which then
calls igrab.  igrab attempts to acquire the i_lock spinlock. However,
the i_lock is frequently acquired in process context with interrupts
enabled. If an interrupt occurs while a process holds the i_lock, and
that interrupt handler calls fserror_report, the system deadlocks.

"I hit this warning several times by running xfs/556 (mostly) or
generic/648 on xfs. More details refer to below console log."

along with this dmesg, for which I've cleaned up the stacktraces:

 run fstests xfs/556 at 2026-03-18 20:05:30
 XFS (sda3): Mounting V5 Filesystem 396e9164-c45a-4e05-be9d-b38c2c5c6477
 XFS (sda3): Ending clean mount
 XFS (sda3): Unmounting Filesystem 396e9164-c45a-4e05-be9d-b38c2c5c6477
 XFS (sda3): Mounting V5 Filesystem bf3f89c3-3c45-4650-a9c7-744f39c0191e
 XFS (sda3): Ending clean mount
 XFS (sda3): Unmounting Filesystem bf3f89c3-3c45-4650-a9c7-744f39c0191e
 XFS (dm-0): Mounting V5 Filesystem bf3f89c3-3c45-4650-a9c7-744f39c0191e
 XFS (dm-0): Ending clean mount
 device-mapper: table: 253:0: adding target device (start sect 209 len 1) caused an alignment inconsistency
 device-mapper: table: 253:0: adding target device (start sect 210 len 62914350) caused an alignment inconsistency
 buffer_io_error: 6 callbacks suppressed
 Buffer I/O error on dev dm-0, logical block 209, async page read
 Buffer I/O error on dev dm-0, logical block 209, async page read
 XFS (dm-0): Unmounting Filesystem bf3f89c3-3c45-4650-a9c7-744f39c0191e
 XFS (dm-0): Mounting V5 Filesystem bf3f89c3-3c45-4650-a9c7-744f39c0191e
 XFS (dm-0): Ending clean mount

 ================================
 WARNING: inconsistent lock state
 7.0.0-rc4+ #1 Tainted: G S      W
 --------------------------------
 inconsistent {HARDIRQ-ON-W} -> {IN-HARDIRQ-W} usage.
 od/2368602 [HC1[1]:SC0[0]:HE0:SE1] takes:
 ff1100069f2b4a98 (&sb->s_type->i_lock_key#31){?.+.}-{3:3}, at: igrab+0x28/0x1a0
 {HARDIRQ-ON-W} state was registered at:
   __lock_acquire+0x40d/0xbd0
   lock_acquire.part.0+0xbd/0x260
   _raw_spin_lock+0x37/0x80
   unlock_new_inode+0x66/0x2a0
   xfs_iget+0x67b/0x7b0 [xfs]
   xfs_mountfs+0xde4/0x1c80 [xfs]
   xfs_fs_fill_super+0xe86/0x17a0 [xfs]
   get_tree_bdev_flags+0x312/0x590
   vfs_get_tree+0x8d/0x2f0
   vfs_cmd_create+0xb2/0x240
   __do_sys_fsconfig+0x3d8/0x9a0
   do_syscall_64+0x13a/0x1520
   entry_SYSCALL_64_after_hwframe+0x76/0x7e
 irq event stamp: 3118
 hardirqs last  enabled at (3117): [<ffffffffb54e4ad8>] _raw_spin_unlock_irq+0x28/0x50
 hardirqs last disabled at (3118): [<ffffffffb54b84c9>] common_interrupt+0x19/0xe0
 softirqs last  enabled at (3040): [<ffffffffb290ca28>] handle_softirqs+0x6b8/0x950
 softirqs last disabled at (3023): [<ffffffffb290ce4d>] __irq_exit_rcu+0xfd/0x250

 other info that might help us debug this:
  Possible unsafe locking scenario:

        CPU0
        ----
   lock(&sb->s_type->i_lock_key#31);
   <Interrupt>
     lock(&sb->s_type->i_lock_key#31);

  *** DEADLOCK ***

 1 lock held by od/2368602:
  #0: ff1100069f2b4b58 (&sb->s_type->i_mutex_key#19){++++}-{4:4}, at: xfs_ilock+0x324/0x4b0 [xfs]

 stack backtrace:
 CPU: 15 UID: 0 PID: 2368602 Comm: od Kdump: loaded Tainted: G S      W           7.0.0-rc4+ #1 PREEMPT(full)
 Tainted: [S]=CPU_OUT_OF_SPEC, [W]=WARN
 Hardware name: Dell Inc. PowerEdge R660/0R5JJC, BIOS 2.1.5 03/14/2024
 Call Trace:
  <IRQ>
  dump_stack_lvl+0x6f/0xb0
  print_usage_bug.part.0+0x230/0x2c0
  mark_lock_irq+0x3ce/0x5b0
  mark_lock+0x1cb/0x3d0
  mark_usage+0x109/0x120
  __lock_acquire+0x40d/0xbd0
  lock_acquire.part.0+0xbd/0x260
  _raw_spin_lock+0x37/0x80
  igrab+0x28/0x1a0
  fserror_report+0x127/0x2d0
  iomap_finish_folio_read+0x13c/0x280
  iomap_read_end_io+0x10e/0x2c0
  clone_endio+0x37e/0x780 [dm_mod]
  blk_update_request+0x448/0xf00
  scsi_end_request+0x74/0x750
  scsi_io_completion+0xe9/0x7c0
  _scsih_io_done+0x6ba/0x1ca0 [mpt3sas]
  _base_process_reply_queue+0x249/0x15b0 [mpt3sas]
  _base_interrupt+0x95/0xe0 [mpt3sas]
  __handle_irq_event_percpu+0x1f0/0x780
  handle_irq_event+0xa9/0x1c0
  handle_edge_irq+0x2ef/0x8a0
  __common_interrupt+0xa0/0x170
  common_interrupt+0xb7/0xe0
  </IRQ>
  <TASK>
  asm_common_interrupt+0x26/0x40
 RIP: 0010:_raw_spin_unlock_irq+0x2e/0x50
 Code: 0f 1f 44 00 00 53 48 8b 74 24 08 48 89 fb 48 83 c7 18 e8 b5 73 5e fd 48 89 df e8 ed e2 5e fd e8 08 78 8f fd fb bf 01 00 00 00 <e8> 8d 56 4d fd 65 8b 05 46 d5 1d 03 85 c0 74 06 5b c3 cc cc cc cc
 RSP: 0018:ffa0000027d07538 EFLAGS: 00000206
 RAX: 0000000000000c2d RBX: ffffffffb6614bc8 RCX: 0000000000000080
 RDX: 0000000000000000 RSI: ffffffffb6306a01 RDI: 0000000000000001
 RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000
 R10: ffffffffb75efc67 R11: 0000000000000001 R12: ff1100015ada0000
 R13: 0000000000000083 R14: 0000000000000002 R15: ffffffffb6614c10
  folio_wait_bit_common+0x407/0x780
  filemap_update_page+0x8e7/0xbd0
  filemap_get_pages+0x904/0xc50
  filemap_read+0x320/0xc20
  xfs_file_buffered_read+0x2aa/0x380 [xfs]
  xfs_file_read_iter+0x263/0x4a0 [xfs]
  vfs_read+0x6cb/0xb70
  ksys_read+0xf9/0x1d0
  do_syscall_64+0x13a/0x1520

Zorro's diagnosis makes sense, so the solution is to kick the failed
read handling to a workqueue much like we added for writeback ioends in
commit 294f54f849 ("fserror: fix lockdep complaint when igrabbing
inode").

Cc: Zorro Lang <zlang@redhat.com>
Link: https://lore.kernel.org/linux-xfs/20260319194303.efw4wcu7c4idhthz@doltdoltdolt/
Fixes: a9d573ee88 ("iomap: report file I/O errors to the VFS")
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
Link: https://patch.msgid.link/20260323210017.GL6223@frogsfrogsfrogs
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
2026-03-24 09:14:46 +01:00
Imre Deak
77fcf58df1 drm/i915/dp_tunnel: Fix error handling when clearing stream BW in atomic state
Clearing the DP tunnel stream BW in the atomic state involves getting
the tunnel group state, which can fail. Handle the error accordingly.

This fixes at least one issue where drm_dp_tunnel_atomic_set_stream_bw()
failed to get the tunnel group state returning -EDEADLK, which wasn't
handled. This lead to the ctx->contended warn later in modeset_lock()
while taking a WW mutex for another object in the same atomic state, and
thus within the same already contended WW context.

Moving intel_crtc_state_alloc() later would avoid freeing saved_state on
the error path; this stable patch leaves that simplification for a
follow-up.

Cc: Uma Shankar <uma.shankar@intel.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: <stable@vger.kernel.org> # v6.9+
Fixes: a4efae87ec ("drm/i915/dp: Compute DP tunnel BW during encoder state computation")
Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/7617
Reviewed-by: Michał Grzelak <michal.grzelak@intel.com>
Reviewed-by: Uma Shankar <uma.shankar@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patch.msgid.link/20260320092900.13210-1-imre.deak@intel.com
(cherry picked from commit fb69d0076e687421188bc8103ab0e8e5825b1df1)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
2026-03-24 08:00:00 +02:00
Linus Torvalds
a0124352d5 Merge tag 'xsa482-7.0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
Pull xen fixes from Juergen Gross:
 "Restrict the xen privcmd driver in unprivileged domU to only allow
  hypercalls to target domain when using secure boot"

* tag 'xsa482-7.0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
  xen/privcmd: add boot control for restricted usage in domU
  xen/privcmd: restrict usage in unprivileged domU
2026-03-23 21:30:14 -07:00
Wei Fang
70b439bf06 net: enetc: fix the output issue of 'ethtool --show-ring'
Currently, enetc_get_ringparam() only provides rx_pending and tx_pending,
but 'ethtool --show-ring' no longer displays these fields. Because the
ringparam retrieval path has moved to the new netlink interface, where
rings_fill_reply() emits the *x_pending only if the *x_max_pending values
are non-zero. So rx_max_pending and tx_max_pending to are added to
enetc_get_ringparam() to fix the issue.

Note that the maximum tx/rx ring size of hardware is 64K, but we haven't
added set_ringparam() to make the ring size configurable. To avoid users
mistakenly believing that the ring size can be increased, so set
the *x_max_pending to priv->*x_bd_count.

Fixes: e4a1717b67 ("ethtool: provide ring sizes with RINGS_GET request")
Signed-off-by: Wei Fang <wei.fang@nxp.com>
Link: https://patch.msgid.link/20260320094222.706339-1-wei.fang@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-23 18:48:19 -07:00
Martin KaFai Lau
e537dd15d0 udp: Fix wildcard bind conflict check when using hash2
When binding a udp_sock to a local address and port, UDP uses
two hashes (udptable->hash and udptable->hash2) for collision
detection. The current code switches to "hash2" when
hslot->count > 10.

"hash2" is keyed by local address and local port.
"hash" is keyed by local port only.

The issue can be shown in the following bind sequence (pseudo code):

bind(fd1,  "[fd00::1]:8888")
bind(fd2,  "[fd00::2]:8888")
bind(fd3,  "[fd00::3]:8888")
bind(fd4,  "[fd00::4]:8888")
bind(fd5,  "[fd00::5]:8888")
bind(fd6,  "[fd00::6]:8888")
bind(fd7,  "[fd00::7]:8888")
bind(fd8,  "[fd00::8]:8888")
bind(fd9,  "[fd00::9]:8888")
bind(fd10, "[fd00::10]:8888")

/* Correctly return -EADDRINUSE because "hash" is used
 * instead of "hash2". udp_lib_lport_inuse() detects the
 * conflict.
 */
bind(fail_fd, "[::]:8888")

/* After one more socket is bound to "[fd00::11]:8888",
 * hslot->count exceeds 10 and "hash2" is used instead.
 */
bind(fd11, "[fd00::11]:8888")
bind(fail_fd, "[::]:8888")      /* succeeds unexpectedly */

The same issue applies to the IPv4 wildcard address "0.0.0.0"
and the IPv4-mapped wildcard address "::ffff:0.0.0.0". For
example, if there are existing sockets bound to
"192.168.1.[1-11]:8888", then binding "0.0.0.0:8888" or
"[::ffff:0.0.0.0]:8888" can also miss the conflict when
hslot->count > 10.

TCP inet_csk_get_port() already has the correct check in
inet_use_bhash2_on_bind(). Rename it to
inet_use_hash2_on_bind() and move it to inet_hashtables.h
so udp.c can reuse it in this fix.

Fixes: 30fff9231f ("udp: bind() optimisation")
Reported-by: Andrew Onyshchuk <oandrew@meta.com>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20260319181817.1901357-1-martin.lau@linux.dev
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-23 18:46:45 -07:00
Arnd Bergmann
3f0f591b44 net: b44: always select CONFIG_FIXED_PHY
When CONFIG_FIXED_PHY=m but CONFIG_B44=y, the kernel fails to link:

ld.lld: error: undefined symbol: fixed_phy_unregister
>>> referenced by b44.c
>>>               drivers/net/ethernet/broadcom/b44.o:(b44_remove_one) in archive vmlinux.a

ld.lld: error: undefined symbol: fixed_phy_register_100fd
>>> referenced by b44.c
>>>               drivers/net/ethernet/broadcom/b44.o:(b44_register_phy_one) in archive vmlinux.a

The fixed phy support is small enough that just always enabling it
for b44 is the simplest solution, and it avoids adding ugly #ifdef
checks.

Fixes: 10d2f15afb ("net: b44: register a fixed phy using fixed_phy_register_100fd if needed")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Link: https://patch.msgid.link/20260320154927.674555-1-arnd@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-23 17:47:14 -07:00
Qingfang Deng
1065913ded net: airoha: add RCU lock around dev_fill_forward_path
Since 0417adf367 ("ppp: fix race conditions in ppp_fill_forward_path")
dev_fill_forward_path() should be called with RCU read lock held. This
fix was applied to net, while the Airoha flowtable commit was applied to
net-next, so it hadn't been an issue until net was merged into net-next.

Fixes: a8bdd935d1 ("net: airoha: Add wlan flowtable TX offload")
Signed-off-by: Qingfang Deng <dqfext@gmail.com>
Acked-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://patch.msgid.link/20260320094315.525126-1-dqfext@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-23 17:45:40 -07:00
Yochai Eisenrich
42156f93d1 net: fix fanout UAF in packet_release() via NETDEV_UP race
`packet_release()` has a race window where `NETDEV_UP` can re-register a
socket into a fanout group's `arr[]` array. The re-registration is not
cleaned up by `fanout_release()`, leaving a dangling pointer in the fanout
array.
`packet_release()` does NOT zero `po->num` in its `bind_lock` section.
After releasing `bind_lock`, `po->num` is still non-zero and `po->ifindex`
still matches the bound device. A concurrent `packet_notifier(NETDEV_UP)`
that already found the socket in `sklist` can re-register the hook.
For fanout sockets, this re-registration calls `__fanout_link(sk, po)`
which adds the socket back into `f->arr[]` and increments `f->num_members`,
but does NOT increment `f->sk_ref`.

The fix sets `po->num` to zero in `packet_release` while `bind_lock` is
held to prevent NETDEV_UP from linking, preventing the race window.

This bug was found following an additional audit with Claude Code based
on CVE-2025-38617.

Fixes: ce06b03e60 ("packet: Add helpers to register/unregister ->prot_hook")
Link: https://blog.calif.io/p/a-race-within-a-race-exploiting-cve
Signed-off-by: Yochai Eisenrich <echelonh@gmail.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Link: https://patch.msgid.link/20260319200610.25101-1-echelonh@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-23 17:07:19 -07:00
Jakub Kicinski
b1791180a2 Merge branch 'ipv6-fix-two-gc-issues-with-permanent-routes'
Kuniyuki Iwashima says:

====================
ipv6: Fix two GC issues with permanent routes.

Patch 1 fixes the unbounded growth of tb6_gc_hlist due to
permanent routes whose exception routes have all expired.

Patch 2 fixes an issue where exception routes tied to
permanent routes are not properly aged.

Patch 3 is a selftest for the issue fixed by Patch 2.
====================

Link: https://patch.msgid.link/20260320072317.2561779-1-kuniyu@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-23 16:59:36 -07:00
Kuniyuki Iwashima
3e9e84e92c selftest: net: Add GC test for temporary routes with exceptions.
Without the prior commit, IPv6 GC cannot track exceptions tied
to permanent routes if they were originally added as temporary
routes.

Let's add a test case for the issue.

  1. Add temporary routes
  2. Create exceptions for the temporary routes
  3. Promote the routes to permanent routes
  4. Check if GC can find and purge the exceptions

A few notes:

  + At step 4, unlike other test cases, we cannot wait for
    $GC_WAIT_TIME.  While the exceptions are always iterable via
    netlink (since it traverses the entire fib tree instead of
    tb6_gc_hlist), rt6_nh_dump_exceptions() skips expired entries.

    If we waited for the expiration time, we would be unable to
    distinguish whether the exceptions were truly purged by GC or
    just hidden due to being expired.

  + For the same reason, at step 2, we use ICMPv6 redirect message
    instead of Packet Too Big message.  This is because MTU exceptions
    always have RTF_EXPIRES, and rt6_age_examine_exception() does not
    respect the period specified by net.ipv6.route.flush=1.

  + We add a neighbour entry for the redirect target with NTF_ROUTER.
    Without this, the exceptions would be removed at step 3 when the
    fib6_may_remove_gc_list() is called.

Without the fix, the exceptions remain even after GC is triggered
by sysctl -wq net.ipv6.route.flush=1.

  FAIL: Expected 0 routes, got 5
      TEST: ipv6 route garbage collection (promote to permanent routes)   [FAIL]

With the fix, GC purges the exceptions properly.

      TEST: ipv6 route garbage collection (promote to permanent routes)   [ OK ]

Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://patch.msgid.link/20260320072317.2561779-4-kuniyu@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-23 16:59:31 -07:00
Kuniyuki Iwashima
4be7b99c25 ipv6: Don't remove permanent routes with exceptions from tb6_gc_hlist.
The cited commit mechanically put fib6_remove_gc_list()
just after every fib6_clean_expires() call.

When a temporary route is promoted to a permanent route,
there may already be exception routes tied to it.

If fib6_remove_gc_list() removes the route from tb6_gc_hlist,
such exception routes will no longer be aged.

Let's replace fib6_remove_gc_list() with a new helper
fib6_may_remove_gc_list() and use fib6_age_exceptions() there.

Note that net->ipv6 is only compiled when CONFIG_IPV6 is
enabled, so fib6_{add,remove,may_remove}_gc_list() are guarded.

Fixes: 5eb902b8e7 ("net/ipv6: Remove expired routes with a separated list of routes.")
Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://patch.msgid.link/20260320072317.2561779-3-kuniyu@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-23 16:59:31 -07:00
Kuniyuki Iwashima
6af51e9f31 ipv6: Remove permanent routes from tb6_gc_hlist when all exceptions expire.
Commit 5eb902b8e7 ("net/ipv6: Remove expired routes with a
separated list of routes.") introduced a per-table GC list and
changed GC to iterate over that list instead of traversing
the entire route table.

However, it forgot to add permanent routes to tb6_gc_hlist
when exception routes are added.

Commit cfe82469a0 ("ipv6: add exception routes to GC list
in rt6_insert_exception") fixed that issue but introduced
another one.

Even after all exception routes expire, the permanent routes
remain in tb6_gc_hlist, potentially negating the performance
benefits intended by the initial change.

Let's count gc_args->more before and after rt6_age_exceptions()
and remove the permanent route when the delta is 0.

Note that the next patch will reuse fib6_age_exceptions().

Fixes: cfe82469a0 ("ipv6: add exception routes to GC list in rt6_insert_exception")
Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Reviewed-by: Xin Long <lucien.xin@gmail.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://patch.msgid.link/20260320072317.2561779-2-kuniyu@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-23 16:59:31 -07:00
Lukas Wunner
05f643d6f7 Documentation: PCI: Document PCIe TLP Header decoder for AER messages
The prefix/header of a TLP that caused an error may be recorded in the AER
Capability and emitted to the kernel log in raw hex format.  Document the
existence and usage of tlp-tool, which decodes the TLP Header into
human-readable form.

The TLP Header hints at the root cause of an error, yet is often ignored
because of its seeming opaqueness.  Instead, PCIe errors are frequently
worked around by a change in the kernel without fully understanding the
actual source of the problem.  With more documentation on available tools
we'll hopefully come up with better solutions.

There are also wireshark dissectors for TLPs, but it seems they expect a
complete TLP, not just the header, and they cannot grok the hex format
emitted by the kernel directly.  tlp-tool appears to be the most cut and
dried solution out there.

Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Cc: Maciej Grochowski <mx2pg@pm.me>
Link: https://patch.msgid.link/bf826c41b4c1d255c7dcb16e266b52f774d944ed.1774246067.git.lukas@wunner.de
2026-03-23 15:58:02 -05:00
Felix Gu
70bb843794 PCI/pwrctrl: Fix pci_pwrctrl_is_required() device node leak
The for_each_endpoint_of_node() macro requires calling of_node_put() on the
endpoint node when breaking out of the loop early.

Add of_node_put(endpoint) before the early return to release the reference.

Fixes: cf3287fb2c ("PCI/pwrctrl: Ensure that remote endpoint node parent has supply requirement")
Signed-off-by: Felix Gu <ustc.gu@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
Reviewed-by: Manivannan Sadhasivam <mani@kernel.org>
Link: https://patch.msgid.link/20260323-pwctrl-v1-1-f5c03a2df7fb@gmail.com
2026-03-23 15:57:43 -05:00
Joshua Hay
b5e5797e3c idpf: only assign num refillqs if allocation was successful
As reported by AI review [1], if the refillqs allocation fails, refillqs
will be NULL but num_refillqs will be non-zero. The release function
will then dereference refillqs since it thinks the refillqs are present,
resulting in a NULL ptr dereference.

Only assign the num refillqs if the allocation was successful. This will
prevent the release function from entering the loop and accessing
refillqs.

[1] https://lore.kernel.org/netdev/20260227035625.2632753-1-kuba@kernel.org/

Fixes: 95af467d9a ("idpf: configure resources for RX queues")
Signed-off-by: Joshua Hay <joshua.a.hay@intel.com>
Reviewed-by: Madhu Chittim <madhu.chittim@intel.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Tested-by: Samuel Salin <Samuel.salin@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-03-23 13:29:51 -07:00
Joshua Hay
1eb0db7e39 idpf: clear stale cdev_info ptr
Deinit calls idpf_idc_deinit_core_aux_device to free the cdev_info
memory, but leaves the adapter->cdev_info field with a stale pointer
value. This will bypass subsequent "if (!cdev_info)" checks if cdev_info
is not reallocated. For example, if idc_init fails after a reset,
cdev_info will already have been freed during the reset handling, but it
will not have been reallocated. The next reset or rmmod will result in a
crash.

[  +0.000008] BUG: kernel NULL pointer dereference, address: 00000000000000d0
[  +0.000033] #PF: supervisor read access in kernel mode
[  +0.000020] #PF: error_code(0x0000) - not-present page
[  +0.000017] PGD 2097dfa067 P4D 0
[  +0.000017] Oops: Oops: 0000 [#1] SMP NOPTI
...
[  +0.000018] RIP: 0010:device_del+0x3e/0x3d0
[  +0.000010] Call Trace:
[  +0.000010]  <TASK>
[  +0.000012]  idpf_idc_deinit_core_aux_device+0x36/0x70 [idpf]
[  +0.000034]  idpf_vc_core_deinit+0x3e/0x180 [idpf]
[  +0.000035]  idpf_remove+0x40/0x1d0 [idpf]
[  +0.000035]  pci_device_remove+0x42/0xb0
[  +0.000020]  device_release_driver_internal+0x19c/0x200
[  +0.000024]  driver_detach+0x48/0x90
[  +0.000018]  bus_remove_driver+0x6d/0x100
[  +0.000023]  pci_unregister_driver+0x2e/0xb0
[  +0.000022]  __do_sys_delete_module.isra.0+0x18c/0x2b0
[  +0.000025]  ? kmem_cache_free+0x2c2/0x390
[  +0.000023]  do_syscall_64+0x107/0x7d0
[  +0.000023]  entry_SYSCALL_64_after_hwframe+0x76/0x7e

Pass the adapter struct into idpf_idc_deinit_core_aux_device instead and
clear the cdev_info ptr.

Fixes: f4312e6bfa ("idpf: implement core RDMA auxiliary dev create, init, and destroy")
Signed-off-by: Joshua Hay <joshua.a.hay@intel.com>
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Tested-by: Samuel Salin <Samuel.salin@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-03-23 13:29:50 -07:00
Kohei Enju
fecacfc95f iavf: fix out-of-bounds writes in iavf_get_ethtool_stats()
iavf incorrectly uses real_num_tx_queues for ETH_SS_STATS. Since the
value could change in runtime, we should use num_tx_queues instead.

Moreover iavf_get_ethtool_stats() uses num_active_queues while
iavf_get_sset_count() and iavf_get_stat_strings() use
real_num_tx_queues, which triggers out-of-bounds writes when we do
"ethtool -L" and "ethtool -S" simultaneously [1].

For example when we change channels from 1 to 8, Thread 3 could be
scheduled before Thread 2, and out-of-bounds writes could be triggered
in Thread 3:

Thread 1 (ethtool -L)       Thread 2 (work)        Thread 3 (ethtool -S)
iavf_set_channels()
...
iavf_alloc_queues()
-> num_active_queues = 8
iavf_schedule_finish_config()
                                                   iavf_get_sset_count()
                                                   real_num_tx_queues: 1
                                                   -> buffer for 1 queue
                                                   iavf_get_ethtool_stats()
                                                   num_active_queues: 8
                                                   -> out-of-bounds!
                            iavf_finish_config()
                            -> real_num_tx_queues = 8

Use immutable num_tx_queues in all related functions to avoid the issue.

[1]
 BUG: KASAN: vmalloc-out-of-bounds in iavf_add_one_ethtool_stat+0x200/0x270
 Write of size 8 at addr ffffc900031c9080 by task ethtool/5800

 CPU: 1 UID: 0 PID: 5800 Comm: ethtool Not tainted 6.19.0-enjuk-08403-g8137e3db7f1c #241 PREEMPT(full)
 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
 Call Trace:
  <TASK>
  dump_stack_lvl+0x6f/0xb0
  print_report+0x170/0x4f3
  kasan_report+0xe1/0x180
  iavf_add_one_ethtool_stat+0x200/0x270
  iavf_get_ethtool_stats+0x14c/0x2e0
  __dev_ethtool+0x3d0c/0x5830
  dev_ethtool+0x12d/0x270
  dev_ioctl+0x53c/0xe30
  sock_do_ioctl+0x1a9/0x270
  sock_ioctl+0x3d4/0x5e0
  __x64_sys_ioctl+0x137/0x1c0
  do_syscall_64+0xf3/0x690
  entry_SYSCALL_64_after_hwframe+0x77/0x7f
 RIP: 0033:0x7f7da0e6e36d
 ...
  </TASK>

 The buggy address belongs to a 1-page vmalloc region starting at 0xffffc900031c9000 allocated at __dev_ethtool+0x3cc9/0x5830
 The buggy address belongs to the physical page: page: refcount:1 mapcount:0 mapping:0000000000000000
 index:0xffff88813a013de0 pfn:0x13a013
 flags: 0x200000000000000(node=0|zone=2)
 raw: 0200000000000000 0000000000000000 dead000000000122 0000000000000000
 raw: ffff88813a013de0 0000000000000000 00000001ffffffff 0000000000000000
 page dumped because: kasan: bad access detected

 Memory state around the buggy address:
  ffffc900031c8f80: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8
  ffffc900031c9000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
 >ffffc900031c9080: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8
                    ^
  ffffc900031c9100: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8
  ffffc900031c9180: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8

Fixes: 64430f70ba ("iavf: Fix displaying queue statistics shown by ethtool")
Signed-off-by: Kohei Enju <kohei@enjuk.jp>
Reviewed-by: Simon Horman <horms@kernel.org>
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-03-23 13:29:50 -07:00
Petr Oros
2526e440df ice: use ice_update_eth_stats() for representor stats
ice_repr_get_stats64() and __ice_get_ethtool_stats() call
ice_update_vsi_stats() on the VF's src_vsi. This always returns early
because ICE_VSI_DOWN is permanently set for VF VSIs - ice_up() is never
called on them since queues are managed by iavf through virtchnl.

In __ice_get_ethtool_stats() the original code called
ice_update_vsi_stats() for all VSIs including representors, iterated
over ice_gstrings_vsi_stats[] to populate the data, and then bailed out
with an early return before the per-queue ring stats section. That early
return was necessary because representor VSIs have no rings on the PF
side - the rings belong to the VF driver (iavf), so accessing per-queue
stats would be invalid.

Move the representor handling to the top of __ice_get_ethtool_stats()
and call ice_update_eth_stats() directly to read the hardware GLV_*
counters. This matches ice_get_vf_stats() which already uses
ice_update_eth_stats() for the same VF VSI in legacy mode. Apply the
same fix to ice_repr_get_stats64().

Note that ice_gstrings_vsi_stats[] contains five software ring counters
(rx_buf_failed, rx_page_failed, tx_linearize, tx_busy, tx_restart) that
are always zero for representors since the PF never processes packets on
VF rings. This is pre-existing behavior unchanged by this patch.

Fixes: 7aae80cef7 ("ice: add port representor ethtool ops and stats")
Signed-off-by: Petr Oros <poros@redhat.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Tested-by: Patryk Holda <patryk.holda@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-03-23 13:29:07 -07:00
Chen-Yu Tsai
2d8c5098b8 PCI/pwrctrl: Do not power off on pwrctrl device removal
With the move to explicit pwrctrl power on/off APIs, the caller, i.e., the
PCI controller driver, should manage the power state. The pwrctrl drivers
should not try to clean up or power off when they are removed, as this
might end up disabling an already disabled regulator, causing a big
warning.  This can be triggered if a PCI controller driver's .remove()
callback calls pci_pwrctrl_destroy_devices() after
pci_pwrctrl_power_off_devices().

Drop the devm cleanup parts that turn off regulators from the pwrctrl
drivers.

Fixes: b921aa3f8d ("PCI/pwrctrl: Switch to pwrctrl create, power on/off, destroy APIs")
Signed-off-by: Chen-Yu Tsai <wenst@chromium.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
Reviewed-by: Manivannan Sadhasivam <mani@kernel.org>
Link: https://patch.msgid.link/20260226092234.3859740-1-wenst@chromium.org
2026-03-23 15:25:32 -05:00
Petr Oros
ad85de0fc0 ice: fix inverted ready check for VF representors
Commit 0f00a897c9 ("ice: check if SF is ready in ethtool ops")
refactored the VF readiness check into a generic repr->ops.ready()
callback but implemented ice_repr_ready_vf() with inverted logic:

  return !ice_check_vf_ready_for_cfg(repr->vf);

ice_check_vf_ready_for_cfg() returns 0 on success, so the negation
makes ready() return non-zero when the VF is ready. All callers treat
non-zero as "not ready, skip", causing ndo_get_stats64, get_drvinfo,
get_strings and get_ethtool_stats to always bail out in switchdev mode.

Remove the erroneous negation. The SF variant ice_repr_ready_sf() is
already correct (returns !active, i.e. non-zero when not active).

Fixes: 0f00a897c9 ("ice: check if SF is ready in ethtool ops")
Signed-off-by: Petr Oros <poros@redhat.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Reviewed-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
Tested-by: Patryk Holda <patryk.holda@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-03-23 12:54:23 -07:00
Michal Swiatkowski
c7fcd269e1 ice: set max queues in alloc_etherdev_mqs()
When allocating netdevice using alloc_etherdev_mqs() the maximum
supported queues number should be passed. The vsi->alloc_txq/rxq is
storing current number of queues, not the maximum ones.

Use the same function for getting max Tx and Rx queues which is used
during ethtool -l call to set maximum number of queues during netdev
allocation.

Reproduction steps:
$ethtool -l $pf # says current 16, max 64
$ethtool -S $pf # fine
$ethtool -L $pf combined 40 # crash

[491187.472594] Call Trace:
[491187.472829]  <TASK>
[491187.473067]  netif_set_xps_queue+0x26/0x40
[491187.473305]  ice_vsi_cfg_txq+0x265/0x3d0 [ice]
[491187.473619]  ice_vsi_cfg_lan_txqs+0x68/0xa0 [ice]
[491187.473918]  ice_vsi_cfg_lan+0x2b/0xa0 [ice]
[491187.474202]  ice_vsi_open+0x71/0x170 [ice]
[491187.474484]  ice_vsi_recfg_qs+0x17f/0x230 [ice]
[491187.474759]  ? dev_get_min_mp_channel_count+0xab/0xd0
[491187.474987]  ice_set_channels+0x185/0x3d0 [ice]
[491187.475278]  ethnl_set_channels+0x26f/0x340

Fixes: ee13aa1a2c ("ice: use netif_get_num_default_rss_queues()")
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Signed-off-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
Tested-by: Alexander Nowlin <alexander.nowlin@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-03-23 12:54:23 -07:00
Filipe Manana
1c37d896b1 btrfs: fix lost error when running device stats on multiple devices fs
Whenever we get an error updating the device stats item for a device in
btrfs_run_dev_stats() we allow the loop to go to the next device, and if
updating the stats item for the next device succeeds, we end up losing
the error we had from the previous device.

Fix this by breaking out of the loop once we get an error and make sure
it's returned to the caller. Since we are in the transaction commit path
(and in the critical section actually), returning the error will result
in a transaction abort.

Fixes: 733f4fbbc1 ("Btrfs: read device stats on mount, write modified ones during commit")
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-03-23 20:12:12 +01:00
Goldwyn Rodrigues
a85b46db14 btrfs: tracepoints: get correct superblock from dentry in event btrfs_sync_file()
If overlay is used on top of btrfs, dentry->d_sb translates to overlay's
super block and fsid assignment will lead to a crash.

Use file_inode(file)->i_sb to always get btrfs_sb.

Reviewed-by: Boris Burkov <boris@bur.io>
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-03-23 20:11:12 +01:00
Qu Wenruo
0dcabcb920 btrfs: zlib: handle page aligned compressed size correctly
[BUG]
Since commit 3d74a7556f ("btrfs: zlib: introduce zlib_compress_bio()
helper"), there are some reports about different crashes in zlib
compression path. One of the symptoms is list corruption like the
following:

  list_del corruption. next->prev should be fffffbb340204a08, but was ffff8d6517cb7de0. (next=fffffbb3402d62c8)
  ------------[ cut here ]------------
  kernel BUG at lib/list_debug.c:65!
  Oops: invalid opcode: 0000 [#1] SMP NOPTI
  CPU: 1 UID: 0 PID: 21436 Comm: kworker/u16:7 Not tainted 7.0.0-rc2-jcg+ #1 PREEMPT
  Hardware name: LENOVO 10VGS02P00/3130, BIOS M1XKT57A 02/10/2022
  Workqueue: btrfs-delalloc btrfs_work_helper [btrfs]
  RIP: 0010:__list_del_entry_valid_or_report+0xec/0xf0
  Call Trace:
   <TASK>
   btrfs_alloc_compr_folio+0xae/0xc0 [btrfs]
   zlib_compress_bio+0x39d/0x6a0 [btrfs]
   btrfs_compress_bio+0x2e3/0x3d0 [btrfs]
   compress_file_range+0x2b0/0x660 [btrfs]
   btrfs_work_helper+0xdb/0x3e0 [btrfs]
   process_one_work+0x192/0x3d0
   worker_thread+0x19a/0x310
   kthread+0xdf/0x120
   ret_from_fork+0x22e/0x310
   ret_from_fork_asm+0x1a/0x30
   </TASK>
  ---[ end trace 0000000000000000 ]---

Other symptoms include VM_BUG_ON() during folio_put() but it's rarer.

David Sterba firstly reported this during his CI runs but unfortunately
I'm unable to hit it.

Meanwhile zstd/lzo doesn't seem to have the same problem.

[CAUSE]
During zlib_compress_bio() every time the output buffer is full, we
queue the full folio into the compressed bio, and allocate a new folio
as the output folio.

After the input has finished, we loop through zlib_deflate() with
Z_FINISH to flush all output.

And when that is done, we still need to check if the last folio has any
content, and if so we still need to queue that part into the compressed
bio.

The problem is in the final folio handling, if the final folio is full
(for x86_64 the folio size is 4K), the length to queue is calculated by

  u32 cur_len = offset_in_folio(out_folio, workspace->strm.total_out);

But since total_out is 4K aligned, the resulted @cur_len will be 0, then
we hit the bio_add_folio(), which has a quirk that if bio_add_folio()
got an length 0, it will still queue the folio into the bio, but return
false.

In that case we go to out: tag, which calls btrfs_free_compr_folio() to
release @out_folio, which may put the out folio into the btrfs global
pool list.

On the other hand, that @out_folio is already added to the
compressed bio, and will later be released again by
cleanup_compressed_bio(), which results double release.

And if this time we still need to put the folio into the btrfs global
pool list, it will result a list corruption because it's already in the
list.

[FIX]
Instead of offset_inside_folio(), directly use the difference between
strm.total_out and bi_size.
So that if the last folio is completely full, we can still properly
queue the full folio other than queueing zero byte.

Fixes: 3d74a7556f ("btrfs: zlib: introduce zlib_compress_bio() helper")
Reported-by: David Sterba <dsterba@suse.com>
Reported-by: Jean-Christophe Guillain <jean-christophe@guillain.net>
Reported-by: syzbot+3c4d8371d65230f852a2@syzkaller.appspotmail.com
Link: https://bugzilla.kernel.org/show_bug.cgi?id=221176
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-03-23 20:11:07 +01:00
Shin'ichiro Kawasaki
a4376d9a5d btrfs: fix leak of kobject name for sub-group space_info
When create_space_info_sub_group() allocates elements of
space_info->sub_group[], kobject_init_and_add() is called for each
element via btrfs_sysfs_add_space_info_type(). However, when
check_removing_space_info() frees these elements, it does not call
btrfs_sysfs_remove_space_info() on them. As a result, kobject_put() is
not called and the associated kobj->name objects are leaked.

This memory leak is reproduced by running the blktests test case
zbd/009 on kernels built with CONFIG_DEBUG_KMEMLEAK. The kmemleak
feature reports the following error:

unreferenced object 0xffff888112877d40 (size 16):
  comm "mount", pid 1244, jiffies 4294996972
  hex dump (first 16 bytes):
    64 61 74 61 2d 72 65 6c 6f 63 00 c4 c6 a7 cb 7f  data-reloc......
  backtrace (crc 53ffde4d):
    __kmalloc_node_track_caller_noprof+0x619/0x870
    kstrdup+0x42/0xc0
    kobject_set_name_vargs+0x44/0x110
    kobject_init_and_add+0xcf/0x150
    btrfs_sysfs_add_space_info_type+0xfc/0x210 [btrfs]
    create_space_info_sub_group.constprop.0+0xfb/0x1b0 [btrfs]
    create_space_info+0x211/0x320 [btrfs]
    btrfs_init_space_info+0x15a/0x1b0 [btrfs]
    open_ctree+0x33c7/0x4a50 [btrfs]
    btrfs_get_tree.cold+0x9f/0x1ee [btrfs]
    vfs_get_tree+0x87/0x2f0
    vfs_cmd_create+0xbd/0x280
    __do_sys_fsconfig+0x3df/0x990
    do_syscall_64+0x136/0x1540
    entry_SYSCALL_64_after_hwframe+0x76/0x7e

To avoid the leak, call btrfs_sysfs_remove_space_info() instead of
kfree() for the elements.

Fixes: f92ee31e03 ("btrfs: introduce btrfs_space_info sub-group")
Link: https://lore.kernel.org/linux-block/b9488881-f18d-4f47-91a5-3c9bf63955a5@wdc.com/
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-03-23 20:10:27 +01:00
Filipe Manana
5254d4181a btrfs: fix zero size inode with non-zero size after log replay
When logging that an inode exists, as part of logging a new name or
logging new dir entries for a directory, we always set the generation of
the logged inode item to 0. This is to signal during log replay (in
overwrite_item()), that we should not set the i_size since we only logged
that an inode exists, so the i_size of the inode in the subvolume tree
must be preserved (as when we log new names or that an inode exists, we
don't log extents).

This works fine except when we have already logged an inode in full mode
or it's the first time we are logging an inode created in a past
transaction, that inode has a new i_size of 0 and then we log a new name
for the inode (due to a new hardlink or a rename), in which case we log
an i_size of 0 for the inode and a generation of 0, which causes the log
replay code to not update the inode's i_size to 0 (in overwrite_item()).

An example scenario:

  mkdir /mnt/dir
  xfs_io -f -c "pwrite 0 64K" /mnt/dir/foo

  sync

  xfs_io -c "truncate 0" -c "fsync" /mnt/dir/foo

  ln /mnt/dir/foo /mnt/dir/bar

  xfs_io -c "fsync" /mnt/dir

  <power fail>

After log replay the file remains with a size of 64K. This is because when
we first log the inode, when we fsync file foo, we log its current i_size
of 0, and then when we create a hard link we log again the inode in exists
mode (LOG_INODE_EXISTS) but we set a generation of 0 for the inode item we
add to the log tree, so during log replay overwrite_item() sees that the
generation is 0 and i_size is 0 so we skip updating the inode's i_size
from 64K to 0.

Fix this by making sure at fill_inode_item() we always log the real
generation of the inode if it was logged in the current transaction with
the i_size we logged before. Also if an inode created in a previous
transaction is logged in exists mode only, make sure we log the i_size
stored in the inode item located from the commit root, so that if we log
multiple times that the inode exists we get the correct i_size.

A test case for fstests will follow soon.

Reported-by: Vyacheslav Kovalevsky <slava.kovalevskiy.2014@gmail.com>
Link: https://lore.kernel.org/linux-btrfs/af8c15fa-4e41-4bb2-885c-0bc4e97532a6@gmail.com/
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-03-23 20:09:33 +01:00
Mark Harmstone
b52fe51f72 btrfs: fix super block offset in error message in btrfs_validate_super()
Fix the superblock offset mismatch error message in
btrfs_validate_super(): we changed it so that it considers all the
superblocks, but the message still assumes we're only looking at the
first one.

The change from %u to %llu is because we're changing from a constant to
a u64.

Fixes: 069ec957c3 ("btrfs: Refactor btrfs_check_super_valid")
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Mark Harmstone <mark@harmstone.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-03-23 20:09:08 +01:00
Yang Wang
3e6dd28a11 drm/amd/pm: disable OD_FAN_CURVE if temp or pwm range invalid for smu v13
Forcibly disable the OD_FAN_CURVE feature when temperature or PWM range is invalid,
otherwise PMFW will reject this configuration on smu v13.0.x

example:
$ sudo cat /sys/bus/pci/devices/<BDF>/gpu_od/fan_ctrl/fan_curve

OD_FAN_CURVE:
0: 0C 0%
1: 0C 0%
2: 0C 0%
3: 0C 0%
4: 0C 0%
OD_RANGE:
FAN_CURVE(hotspot temp): 0C 0C
FAN_CURVE(fan speed): 0% 0%

$ echo "0 50 40" | sudo tee fan_curve

kernel log:
[  756.442527] amdgpu 0000:03:00.0: amdgpu: Fan curve temp setting(50) must be within [0, 0]!
[  777.345800] amdgpu 0000:03:00.0: amdgpu: Fan curve temp setting(50) must be within [0, 0]!

Closes: https://github.com/ROCm/amdgpu/issues/208
Signed-off-by: Yang Wang <kevinyang.wang@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 470891606c5a97b1d0d937e0aa67a3bed9fcb056)
Cc: stable@vger.kernel.org
2026-03-23 14:49:15 -04:00
Asad Kamal
2f0e491fae drm/amd/pm: Return -EOPNOTSUPP for unsupported OD_MCLK on smu_v13_0_6
When SET_UCLK_MAX capability is absent, return -EOPNOTSUPP from
smu_v13_0_6_emit_clk_levels() for OD_MCLK instead of 0. This makes
unsupported OD_MCLK reporting consistent with other clock types
and allows callers to skip the entry cleanly.

Signed-off-by: Asad Kamal <asad.kamal@amd.com>
Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit d82e0a72d9189e8acd353988e1a57f85ce479e37)
Cc: stable@vger.kernel.org
2026-03-23 14:48:52 -04:00
Asad Kamal
cdbc3b62cf drm/amd/pm: Skip redundant UCLK restore in smu_v13_0_6
Only reapply UCLK soft limits during PP_OD_RESTORE_DEFAULT when the
current max differs from the DPM table max. This avoids redundant
SMC updates and prevents -EINVAL on restore when no change is needed.

Fixes: b7a9003445 ("drm/amd/pm: Allow setting max UCLK on SMU v13.0.6")
Signed-off-by: Asad Kamal <asad.kamal@amd.com>
Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 17f11bbbc76c8e83c8474ea708316b1e3631d927)
2026-03-23 14:48:45 -04:00
Alex Hung
37c2caa167 drm/amd/display: Fix drm_edid leak in amdgpu_dm
[WHAT]
When a sink is connected, aconnector->drm_edid was overwritten without
freeing the previous allocation, causing a memory leak on resume.

[HOW]
Free the previous drm_edid before updating it.

Reviewed-by: Roman Li <roman.li@amd.com>
Signed-off-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Chuanyu Tseng <chuanyu.tseng@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 52024a94e7111366141cfc5d888b2ef011f879e5)
Cc: stable@vger.kernel.org
2026-03-23 14:48:32 -04:00
Eric Huang
14b81abe7b drm/amdgpu: prevent immediate PASID reuse case
PASID resue could cause interrupt issue when process
immediately runs into hw state left by previous
process exited with the same PASID, it's possible that
page faults are still pending in the IH ring buffer when
the process exits and frees up its PASID. To prevent the
case, it uses idr cyclic allocator same as kernel pid's.

Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 8f1de51f49be692de137c8525106e0fce2d1912d)
Cc: stable@vger.kernel.org
2026-03-23 14:48:06 -04:00
Ruijing Dong
2d300ebfc4 drm/amdgpu: fix strsep() corrupting lockup_timeout on multi-GPU (v3)
amdgpu_device_get_job_timeout_settings() passes a pointer directly
to the global amdgpu_lockup_timeout[] buffer into strsep().
strsep() destructively replaces delimiter characters with '\0'
in-place.

On multi-GPU systems, this function is called once per device.
When a multi-value setting like "0,0,0,-1" is used, the first
GPU's call transforms the global buffer into "0\00\00\0-1". The
second GPU then sees only "0" (terminated at the first '\0'),
parses a single value, hits the single-value fallthrough
(index == 1), and applies timeout=0 to all rings — causing
immediate false job timeouts.

Fix this by copying into a stack-local array before calling
strsep(), so the global module parameter buffer remains intact
across calls. The buffer is AMDGPU_MAX_TIMEOUT_PARAM_LENGTH
(256) bytes, which is safe for the stack.

v2: wrap commit message to 72 columns, add Assisted-by tag.
v3: use stack array with strscpy() instead of kstrdup()/kfree()
    to avoid unnecessary heap allocation (Christian).

This patch was developed with assistance from Claude (claude-opus-4-6).

Assisted-by: Claude:claude-opus-4-6
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Ruijing Dong <ruijing.dong@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 94d79f51efecb74be1d88dde66bdc8bfcca17935)
Cc: stable@vger.kernel.org
2026-03-23 14:47:47 -04:00
Yussuf Khalil
aed3d041ab drm/amd/display: Do not skip unrelated mode changes in DSC validation
Starting with commit 17ce8a6907 ("drm/amd/display: Add dsc pre-validation in
atomic check"), amdgpu resets the CRTC state mode_changed flag to false when
recomputing the DSC configuration results in no timing change for a particular
stream.

However, this is incorrect in scenarios where a change in MST/DSC configuration
happens in the same KMS commit as another (unrelated) mode change. For example,
the integrated panel of a laptop may be configured differently (e.g., HDR
enabled/disabled) depending on whether external screens are attached. In this
case, plugging in external DP-MST screens may result in the mode_changed flag
being dropped incorrectly for the integrated panel if its DSC configuration
did not change during precomputation in pre_validate_dsc().

At this point, however, dm_update_crtc_state() has already created new streams
for CRTCs with DSC-independent mode changes. In turn,
amdgpu_dm_commit_streams() will never release the old stream, resulting in a
memory leak. amdgpu_dm_atomic_commit_tail() will never acquire a reference to
the new stream either, which manifests as a use-after-free when the stream gets
disabled later on:

BUG: KASAN: use-after-free in dc_stream_release+0x25/0x90 [amdgpu]
Write of size 4 at addr ffff88813d836524 by task kworker/9:9/29977

Workqueue: events drm_mode_rmfb_work_fn
Call Trace:
 <TASK>
 dump_stack_lvl+0x6e/0xa0
 print_address_description.constprop.0+0x88/0x320
 ? dc_stream_release+0x25/0x90 [amdgpu]
 print_report+0xfc/0x1ff
 ? srso_alias_return_thunk+0x5/0xfbef5
 ? __virt_addr_valid+0x225/0x4e0
 ? dc_stream_release+0x25/0x90 [amdgpu]
 kasan_report+0xe1/0x180
 ? dc_stream_release+0x25/0x90 [amdgpu]
 kasan_check_range+0x125/0x200
 dc_stream_release+0x25/0x90 [amdgpu]
 dc_state_destruct+0x14d/0x5c0 [amdgpu]
 dc_state_release.part.0+0x4e/0x130 [amdgpu]
 dm_atomic_destroy_state+0x3f/0x70 [amdgpu]
 drm_atomic_state_default_clear+0x8ee/0xf30
 ? drm_mode_object_put.part.0+0xb1/0x130
 __drm_atomic_state_free+0x15c/0x2d0
 atomic_remove_fb+0x67e/0x980

Since there is no reliable way of figuring out whether a CRTC has unrelated
mode changes pending at the time of DSC validation, remember the value of the
mode_changed flag from before the point where a CRTC was marked as potentially
affected by a change in DSC configuration. Reset the mode_changed flag to this
earlier value instead in pre_validate_dsc().

Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/5004
Fixes: 17ce8a6907 ("drm/amd/display: Add dsc pre-validation in atomic check")
Signed-off-by: Yussuf Khalil <dev@pp3345.net>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit cc7c7121ae082b7b82891baa7280f1ff2608f22b)
2026-03-23 14:38:56 -04:00
Felix Gu
63542bb402 spi: meson-spicc: Fix double-put in remove path
meson_spicc_probe() registers the controller with
devm_spi_register_controller(), so teardown already drops the
controller reference via devm cleanup.

Calling spi_controller_put() again in meson_spicc_remove()
causes a double-put.

Fixes: 8311ee2164 ("spi: meson-spicc: fix memory leak in meson_spicc_remove")
Signed-off-by: Felix Gu <ustc.gu@gmail.com>
Reviewed-by: Johan Hovold <johan@kernel.org>
Link: https://patch.msgid.link/20260322-rockchip-v1-1-fac3f0c6dad8@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-23 18:32:05 +00:00
Cezary Rojewski
5a184f1cb4 ASoC: Intel: catpt: Fix the device initialization
The DMA mask shall be coerced before any buffer allocations for the
device are done.  At the same time explain why DMA mask of 31 bits is
used in the first place.

Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Fixes: 7a10b66a5d ("ASoC: Intel: catpt: Device driver lifecycle")
Signed-off-by: Cezary Rojewski <cezary.rojewski@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://patch.msgid.link/20260320101217.1243688-1-cezary.rojewski@intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-23 17:32:27 +00:00
SeongJae Park
84481e705a mm/damon/stat: monitor all System RAM resources
DAMON_STAT usage document (Documentation/admin-guide/mm/damon/stat.rst)
says it monitors the system's entire physical memory.  But, it is
monitoring only the biggest System RAM resource of the system.  When there
are multiple System RAM resources, this results in monitoring only an
unexpectedly small fraction of the physical memory.  For example, suppose
the system has a 500 GiB System RAM, 10 MiB non-System RAM, and 500 GiB
System RAM resources in order on the physical address space.  DAMON_STAT
will monitor only the first 500 GiB System RAM.  This situation is
particularly common on NUMA systems.

Select a physical address range that covers all System RAM areas of the
system, to fix this issue and make it work as documented.

[sj@kernel.org: return error if monitoring target region is invalid]
  Link: https://lkml.kernel.org/r/20260317053631.87907-1-sj@kernel.org
Link: https://lkml.kernel.org/r/20260316235118.873-1-sj@kernel.org
Fixes: 369c415e60 ("mm/damon: introduce DAMON_STAT module")
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: <stable@vger.kernel.org>	[6.17+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-03-23 09:35:05 -07:00
Lorenzo Stoakes (Oracle)
631c111150 mm/zswap: add missing kunmap_local()
Commit e2c3b6b21c ("mm: zswap: use SG list decompression APIs from
zsmalloc") updated zswap_decompress() to use the scatterwalk API to copy
data for uncompressed pages.

In doing so, it mapped kernel memory locally for 32-bit kernels using
kmap_local_folio(), however it never unmapped this memory.

This resulted in the linked syzbot report where a BUG_ON() is triggered
due to leaking the kmap slot.

This patch fixes the issue by explicitly unmapping the established kmap.


Also, add flush_dcache_folio() after the kunmap_local() call

I had assumed that a new folio here combined with the flush that is done at
the point of setting the PTE would suffice, but it doesn't seem that's
actually the case, as update_mmu_cache() will in many archtectures only
actually flush entries where a dcache flush was done on a range previously.

I had also wondered whether kunmap_local() might suffice, but it doesn't
seem to be the case.

Some arches do seem to actually dcache flush on unmap, parisc does it if
CONFIG_HIGHMEM is not set by setting ARCH_HAS_FLUSH_ON_KUNMAP and calling
kunmap_flush_on_unmap() from __kunmap_local(), otherwise non-CONFIG_HIGHMEM
callers do nothing here.

Otherwise arch_kmap_local_pre_unmap() is called which does:

* sparc - flush_cache_all()
* arm - if VIVT, __cpuc_flush_dcache_area()
* otherwise - nothing

Also arch_kmap_local_post_unmap() is called which does:

* arm - local_flush_tlb_kernel_page()
* csky - kmap_flush_tlb()
* microblaze, ppc - local_flush_tlb_page()
* mips - local_flush_tlb_one()
* sparc - flush_tlb_all() (again)
* x86 - arch_flush_lazy_mmu_mode()
* otherwise - nothing

But this is only if it's high memory, and doesn't cover all architectures,
so is presumably intended to handle other cache consistency concerns.

In any case, VIPT is problematic here whether low or high memory (in spite
of what the documentation claims, see [0] - 'the kernel did write to a page
that is in the page cache page and / or in high memory'), because dirty
cache lines may exist at the set indexed by the kernel direct mapping,
which won't exist in the set indexed by any subsequent userland mapping,
meaning userland might read stale data from L2 cache.

Even if the documentation is correct and low memory is fine not to be
flushed here, we can't be sure as to whether the memory is low or high
(kmap_local_folio() will be a no-op if low), and this call should be
harmless if it is low.

VIVT would require more work if the memory were shared and already mapped,
but this isn't the case here, and would anyway be handled by the dcache
flush call.

In any case, we definitely need this flush as far as I can tell.

And we should probably consider updating the documentation unless it turns
out there's somehow dcache synchronisation that happens for low
memory/64-bit kernels elsewhere?

[ljs@kernel.org: add flush_dcache_folio() after the kunmap_local() call]
  Link: https://lkml.kernel.org/r/13e09a99-181f-45ac-a18d-057faf94bccb@lucifer.local
Link: https://lkml.kernel.org/r/20260316140122.339697-1-ljs@kernel.org
Link: https://docs.kernel.org/core-api/cachetlb.html [0]
Fixes: e2c3b6b21c ("mm: zswap: use SG list decompression APIs from zsmalloc")
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Reported-by: syzbot+fe426bef95363177631d@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/69b75e2c.050a0220.12d28.015a.GAE@google.com
Acked-by: Yosry Ahmed <yosry@kernel.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: SeongJae Park <sj@kernel.org>
Acked-by: Yosry Ahmed <yosry@kernel.org>
Acked-by: Nhat Pham <nphamcs@gmail.com>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-03-23 09:35:05 -07:00
Muhammad Usama Anjum
38dfd294e2 mailmap: update email address for Muhammad Usama Anjum
Add updated email address.

Link: https://lkml.kernel.org/r/20260310171757.3970390-1-usama.anjum@arm.com
Signed-off-by: Muhammad Usama Anjum <usama.anjum@arm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Carlos Bilbao <carlos.bilbao@kernel.org>
Cc: Hans Verkuil <hverkuil@kernel.org>
Cc: Jakub Kacinski <kuba@kernel.org>
Cc: Martin Kepplinger <martink@posteo.de>
Cc: Shannon Nelson <sln@onemain.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-03-23 09:35:05 -07:00
Felix Gu
a42c9b8b0c spi: sn-f-ospi: Use devm_mutex_init() to simplify code
Switch to devm_mutex_init() to handle mutex destruction automatically.
This simplifies the error paths in probe() and removes the need for an
explicit mutex_destroy() in remove() callback.

Signed-off-by: Felix Gu <ustc.gu@gmail.com>
Link: https://patch.msgid.link/20260319-sn-f-v1-2-33a6738d2da8@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-23 14:51:59 +00:00
Felix Gu
ef3d549e1d spi: sn-f-ospi: Fix resource leak in f_ospi_probe()
In f_ospi_probe(), when num_cs validation fails, it returns without
calling spi_controller_put() on the SPI controller, which causes a
resource leak.

Use devm_spi_alloc_host() instead of spi_alloc_host() to ensure the
SPI controller is properly freed when probe fails.

Fixes: 1b74dd64c8 ("spi: Add Socionext F_OSPI SPI flash controller driver")
Signed-off-by: Felix Gu <ustc.gu@gmail.com>
Link: https://patch.msgid.link/20260319-sn-f-v1-1-33a6738d2da8@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-23 14:51:59 +00:00
Michał Winiarski
87997b6c65 drm/xe/pf: Fix use-after-free in migration restore
When an error is returned from xe_sriov_pf_migration_restore_produce(),
the data pointer is not set to NULL, which can trigger use-after-free
in subsequent .write() calls.
Set the pointer to NULL upon error to fix the problem.

Fixes: 1ed30397c0 ("drm/xe/pf: Add support for encap/decap of bitstream to/from packet")
Reported-by: Sebastian Österlund <sebastian.osterlund@intel.com>
Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/7230
Reviewed-by: Shuicheng Lin <shuicheng.lin@intel.com>
Link: https://patch.msgid.link/20260217154118.176902-1-michal.winiarski@intel.com
Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
(cherry picked from commit 4f53d8c6d23527d734fe3531d08e15cb170a0819)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
2026-03-23 10:10:50 -04:00
Peter Zijlstra
a3e93cac25 x86/cpu: Add comment clarifying CRn pinning
To avoid future confusion on the purpose and design of the CRn pinning code.

Also note that if the attacker controls page-tables, the CRn bits lose much of
the attraction anyway.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://patch.msgid.link/20260320092521.GG3739106@noisy.programming.kicks-ass.net
2026-03-23 14:25:53 +01:00
Michal Piekos
70f8915ea4 pinctrl: sunxi: fix gpiochip_lock_as_irq() failure when pinmux is unknown
Fixes kernel hang during boot due to inability to set up IRQ on AXP313a.

The issue is caused by gpiochip_lock_as_irq() which is failing when gpio
is in uninitialized state.

Solution is to set pinmux to GPIO INPUT in
sunxi_pinctrl_irq_request_resources() if it wasn't initialized
earlier.

Tested on Orange Pi Zero 3.

Fixes: 01e10d0272 ("pinctrl: sunxi: Implement gpiochip::get_direction()")
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Reviewed-by: Chen-Yu Tsai <wens@kernel.org>
Signed-off-by: Michal Piekos <michal.piekos@mmpsystems.pl>
Signed-off-by: Linus Walleij <linusw@kernel.org>
2026-03-23 14:23:09 +01:00
Andre Przywara
42e06688c6 pinctrl: sunxi: pass down flags to pinctrl routines
Recent changes in the Allwinner pinctrl/GPIO IP made us add some quirks,
which the new SoCs (A523 family) need to use. We now have a comfortable
"flags" field on the per-SoC setup side, to tag those quirks we need, but
were translating those flag bits into specific fields for runtime use, in
the init routine.
Now the newest Allwinner GPIO IP adds even more quirks and exceptions,
some of a boolean nature.
To avoid inventing various new boolean flags for the runtime struct
sunxi_pinctrl, let's just directly pass on the flags variable used by the
setup code, so runtime can check for those various quirk bits directly.

Rename the "variant" member to "flags", and directly copy the value from
the setup code into there. Move the variant masking from the init
routine to the functions which actually use the "variant" value.

This mostly paves the way for the new A733 IP generation, which needs
more quirks to be checked at runtime.

Reviewed-by: Chen-Yu Tsai <wens@kernel.org>
Signed-off-by: Andre Przywara <andre.przywara@arm.com>
Signed-off-by: Michal Piekos <michal.piekos@mmpsystems.pl>
Signed-off-by: Linus Walleij <linusw@kernel.org>
2026-03-23 14:23:08 +01:00
Nikunj A Dadhania
3645eb7e39 x86/fred: Fix early boot failures on SEV-ES/SNP guests
FRED-enabled SEV-(ES,SNP) guests fail to boot due to the following issues
in the early boot sequence:

* FRED does not have a #VC exception handler in the dispatch logic

* Early FRED #VC exceptions attempt to use uninitialized per-CPU GHCBs
  instead of boot_ghcb

Add X86_TRAP_VC case to fred_hwexc() with a new exc_vmm_communication()
function that provides the unified entry point FRED requires, dispatching
to existing user/kernel handlers based on privilege level. The function is
already declared via DECLARE_IDTENTRY_VC().

Fix early GHCB access by falling back to boot_ghcb in
__sev_{get,put}_ghcb() when per-CPU GHCBs are not yet initialized.

Fixes: 14619d912b ("x86/fred: FRED entry/exit and dispatch code")
Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Cc: <stable@kernel.org>  # 6.12+
Link: https://patch.msgid.link/20260318075654.1792916-4-nikunj@amd.com
2026-03-23 14:18:18 +01:00
Huiwen He
34420cb92d smb/client: ensure smb2_mapping_table rebuild on cmd changes
The current rule for smb2_mapping_table.c uses `$(call cmd,...)`, which
fails to track command line modifications in the Makefile (e.g., modifying
the command to `perl -d` or `perl -w` for debug will not trigger a rebuild)
and does not generate the required .cmd file for Kbuild.

Fix this by transitioning to the standard `$(call if_changed,...)` macro.
This includes adding the `FORCE` prerequisite and appending the output
file to the `targets` variable so Kbuild can track it properly.

As a result, Kbuild now automatically handles the cleaning of the
generated file, allowing us to safely drop the redundant `clean-files`
assignment.

Fixes: c527e13a7a ("cifs: Autogenerate SMB2 error mapping table")
Signed-off-by: Huiwen He <hehuiwen@kylinos.cn>
Reviewed-by: ChenXiaoSong <chenxiaosong@kylinos.cn>
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-03-23 08:17:26 -05:00
Borislav Petkov (AMD)
411df123c0 x86/cpu: Remove X86_CR4_FRED from the CR4 pinned bits mask
Commit in Fixes added the FRED CR4 bit to the CR4 pinned bits mask so
that whenever something else modifies CR4, that bit remains set. Which
in itself is a perfectly fine idea.

However, there's an issue when during boot FRED is initialized: first on
the BSP and later on the APs. Thus, there's a window in time when
exceptions cannot be handled.

This becomes particularly nasty when running as SEV-{ES,SNP} or TDX
guests which, when they manage to trigger exceptions during that short
window described above, triple fault due to FRED MSRs not being set up
yet.

See Link tag below for a much more detailed explanation of the
situation.

So, as a result, the commit in that Link URL tried to address this
shortcoming by temporarily disabling CR4 pinning when an AP is not
online yet.

However, that is a problem in itself because in this case, an attack on
the kernel needs to only modify the online bit - a single bit in RW
memory - and then disable CR4 pinning and then disable SM*P, leading to
more and worse things to happen to the system.

So, instead, remove the FRED bit from the CR4 pinning mask, thus
obviating the need to temporarily disable CR4 pinning.

If someone manages to disable FRED when poking at CR4, then
idt_invalidate() would make sure the system would crash'n'burn on the
first exception triggered, which is a much better outcome security-wise.

Fixes: ff45746fbf ("x86/cpu: Add X86_CR4_FRED macro")
Suggested-by: Dave Hansen <dave.hansen@linux.intel.com>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Cc: <stable@kernel.org> # 6.12+
Link: https://lore.kernel.org/r/177385987098.1647592.3381141860481415647.tip-bot2@tip-bot2
2026-03-23 14:12:03 +01:00
Youngjun Park
a8d51efb59 PM: sleep: Drop spurious WARN_ON() from pm_restore_gfp_mask()
Commit 35e4a69b20 ("PM: sleep: Allow pm_restrict_gfp_mask()
stacking") introduced refcount-based GFP mask management that warns
when pm_restore_gfp_mask() is called with saved_gfp_count == 0.

Some hibernation paths call pm_restore_gfp_mask() defensively where
the GFP mask may or may not be restricted depending on the execution
path. For example, the uswsusp interface invokes it in
SNAPSHOT_CREATE_IMAGE, SNAPSHOT_UNFREEZE, and snapshot_release().
Before the stacking change this was a silent no-op; it now triggers
a spurious WARNING.

Remove the WARN_ON() wrapper from the !saved_gfp_count check while
retaining the check itself, so that defensive calls remain harmless
without producing false warnings.

Fixes: 35e4a69b20 ("PM: sleep: Allow pm_restrict_gfp_mask() stacking")
Signed-off-by: Youngjun Park <youngjun.park@lge.com>
[ rjw: Subject tweak ]
Link: https://patch.msgid.link/20260322120528.750178-1-youngjun.park@lge.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2026-03-23 13:54:53 +01:00
David McFarland
e02ea3ae8e platform/x86: intel-hid: disable wakeup_mode during hibernation
Add a freeze handler which clears wakeup_mode. This fixes aborted hibernation on
Dell Precision 3880.

  Wakeup event detected during hibernation, rolling back

This system sends power button events during hibernation, even when triggered by
software.

Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218634
Fixes: 0c4cae1bc0 ("PM: hibernate: Avoid missing wakeup events during hibernation")
Signed-off-by: David McFarland <corngood@gmail.com>
Link: https://patch.msgid.link/20260205231629.1336348-1-corngood@gmail.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2026-03-23 14:49:06 +02:00
Matthew Schwartz
8a243d972a platform/x86: asus-armoury: add support for GZ302EA and GZ302EAC
Add TDP data for tablet models GZ302EA and GZ302EAC.

Signed-off-by: Matthew Schwartz <matthew.schwartz@linux.dev>
Reviewed-by: Denis Benato <denis.benato@linux.dev>
Link: https://patch.msgid.link/20260313004939.4103835-1-matthew.schwartz@linux.dev
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2026-03-23 14:49:05 +02:00
Matthew Schwartz
0198d27432 platform/x86: asus-nb-wmi: add DMI quirk for ASUS ROG Flow Z13-KJP GZ302EAC
The ASUS ROG Flow Z13-KJP GZ302EAC model uses sys_vendor name ASUS
rather than ASUSTeK COMPUTER INC., but it needs the same folio quirk as
the other ROG Flow Z13. To keep things simple, just match on sys_vendor
ASUS since it covers both.

Signed-off-by: Matthew Schwartz <matthew.schwartz@linux.dev>
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Reviewed-by: Denis Benato <denis.benato@linux.dev>
Link: https://patch.msgid.link/20260312212246.1608080-1-matthew.schwartz@linux.dev
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2026-03-23 14:49:03 +02:00
Alok Tiwari
7ff61be5a4 platform/x86/amd/hsmp: Fix typo in error message
Fix a typo in the HSMP error message where "tmeout" should be "timeout".

Signed-off-by: Alok Tiwari <alok.a.tiwari@oracle.com>
Link: https://patch.msgid.link/20260310125307.700108-1-alok.a.tiwari@oracle.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2026-03-23 14:49:02 +02:00
Alok Tiwari
2061f7b042 platform/olpc: olpc-xo175-ec: Fix overflow error message to print inlen
The command length check validates inlen (> 5), but the error message
incorrectly printed resp_len. Print inlen so the log reflects the
actual command length.

Fixes: 0c3d931b3a ("Platform: OLPC: Add XO-1.75 EC driver")
Signed-off-by: Alok Tiwari <alok.a.tiwari@oracle.com>
Acked-by: Lubomir Rintel <lkundrak@v3.sk>
Reviewed-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://patch.msgid.link/20260310130138.700687-1-alok.a.tiwari@oracle.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2026-03-23 14:49:00 +02:00
Nathan Chancellor
5a3955f360 platform/x86: lenovo: wmi-gamezone: Drop gz_chain_head
The gz_chain_head variable has been unused since the driver's initial
addition to the tree. Its use was eliminated between v3 and v4 during
development but due to the reference of gz_chain_head's wait_list
member, the compiler could not warn that it was unused.

After a (tip) commit ("locking/rwsem: Remove the list_head from struct
rw_semaphore"), which removed a reference to the variable passed to
__RWSEM_INITIALIZER(), certain configurations show an unused variable
warning from the Lenovo wmi-gamezone driver:

  drivers/platform/x86/lenovo/wmi-gamezone.c:34:31: warning: 'gz_chain_head' defined but not used [-Wunused-variable]
     34 | static BLOCKING_NOTIFIER_HEAD(gz_chain_head);
        |                               ^~~~~~~~~~~~~
  include/linux/notifier.h:119:39: note: in definition of macro 'BLOCKING_NOTIFIER_HEAD'
    119 |         struct blocking_notifier_head name =                    \
        |                                       ^~~~

Remove the variable to prevent the warning from showing up.

Fixes: 22024ac536 ("platform/x86: Add Lenovo Gamezone WMI Driver")
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Mark Pearson <mpearson-lenovo@squebb.ca>
Link: https://patch.msgid.link/20260313-lenovo-wmi-gamezone-remove-gz_chain_head-v1-1-ce5231f0c6fa@kernel.org
[ij: reorganized the changelog]
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2026-03-23 14:48:59 +02:00
Li RongQing
9f11d9b15e platform/x86: ISST: Check HWP support before MSR access
On some systems, HWP can be explicitly disabled in the BIOS settings
When HWP is disabled by firmware, the HWP CPUID bit is not set, and
attempting to read MSR_PM_ENABLE will result in a General Protection
(GP) fault.

  unchecked MSR access error: RDMSR from 0x770 at rIP: 0xffffffffc33db92e (disable_dynamic_sst_features+0xe/0x50 [isst_tpmi_core])
  Call Trace:
   <TASK>
   ? ex_handler_msr+0xf6/0x150
   ? fixup_exception+0x1ad/0x340
   ? gp_try_fixup_and_notify+0x1e/0xb0
   ? exc_general_protection+0xc9/0x390
   ? terminate_walk+0x64/0x100
   ? asm_exc_general_protection+0x22/0x30
   ? disable_dynamic_sst_features+0xe/0x50 [isst_tpmi_core]
   isst_if_def_ioctl+0xece/0x1050 [isst_tpmi_core]
   ? ioctl_has_perm.constprop.42+0xe0/0x130
   isst_if_def_ioctl+0x10d/0x1a0 [isst_if_common]
   __se_sys_ioctl+0x86/0xc0
   do_syscall_64+0x8a/0x100
   entry_SYSCALL_64_after_hwframe+0x78/0xe2
  RIP: 0033:0x7f36eaef54a7

Add a check for X86_FEATURE_HWP before accessing the MSR. If HWP is
not available, return true safely.

Fixes: 12a7d2cb81 ("platform/x86: ISST: Add SST-CP support via TPMI")
Signed-off-by: Li RongQing <lirongqing@baidu.com>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Link: https://patch.msgid.link/20260303074635.2218-1-lirongqing@baidu.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2026-03-23 14:48:57 +02:00
Krishna Chomal
435da77396 platform/x86: hp-wmi: Add support for Omen 16-k0xxx (8A4D)
The HP Omen 16-k0xxx (board ID: 8A4D) has the same WMI interface as
other Victus S boards, but requires additional quirks for correctly
switching thermal profile.

Create a new quirk omen_v1_legacy_thermal_params which allows a board to
use Omen V1 thermal values, but rely on the older legacy
HP_OMEN_EC_THERMAL_PROFILE_OFFSET. Add the DMI board name to
victus_s_thermal_profile_boards[] table and map it to the newly added
quirk.

Testing on board 8A4D confirmed that platform profile is registered
successfully and fan RPMs are readable and controllable.

Tested-by: Qinfeng Wu <qwqgong@gmail.com>
Reported-by: Qinfeng Wu <qwqgong@gmail.com>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=221150
Signed-off-by: Krishna Chomal <krishna.chomal108@gmail.com>
Link: https://patch.msgid.link/20260302073525.71037-1-krishna.chomal108@gmail.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2026-03-23 14:48:56 +02:00
Krishna Chomal
84d29bfd19 platform/x86: hp-wmi: Add support for Omen 16-wf1xxx (8C76)
The HP Omen 16-wf1xxx (board ID: 8C76) has the same WMI interface as
other Victus S boards, but requires quirks for correctly switching
thermal profile (similar to board 8C78).

Add the DMI board name to victus_s_thermal_profile_boards[] table and
map it to omen_v1_thermal_params.

Testing on board 8C76 confirmed that platform profile is registered
successfully and fan RPMs are readable and controllable.

Tested-by: WJ Enderlava <jie7172585@gmail.com>
Reported-by: WJ Enderlava <jie7172585@gmail.com>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=221149
Signed-off-by: Krishna Chomal <krishna.chomal108@gmail.com>
Link: https://patch.msgid.link/20260227154106.226809-1-krishna.chomal108@gmail.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2026-03-23 14:48:54 +02:00
Raed
55b964dfba platform/x86: hp-wmi: Add Omen 16-xf0xxx (8BCA) support
The HP Omen 16-xf0xxx board 8BCA uses the same Victus-S fan and
thermal WMI path as other recently supported Omen/Victus boards,
but it requires Omen v1 thermal profile parameters for correct
platform profile behavior.

Add board 8BCA to victus_s_thermal_profile_boards[] and map it
to omen_v1_thermal_params.

Validated on HP Omen 16-xf0xxx (board 8BCA):
- /sys/firmware/acpi/platform_profile exposes
	low-power/balanced/performance
- fan RPM reporting works (fan1_input/fan2_input)
- manual fan control works through hp-wmi hwmon (pwm1/pwm1_enable)

Signed-off-by: Raed <thisisraed@outlook.com>
Link: https://patch.msgid.link/20260311131338.965249-1-youaretalkingtoraed@gmail.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2026-03-23 14:48:53 +02:00
Denis Benato
d2723918d5 platform/x86: asus-armoury: add support for G614FP
Add TDP data for laptop model G614FP.

Signed-off-by: Denis Benato <denis.benato@linux.dev>
Link: https://patch.msgid.link/20260309183559.433555-3-denis.benato@linux.dev
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2026-03-23 14:48:52 +02:00
Denis Benato
e0836f48e1 platform/x86: asus-armoury: add support for GA503QM
Add TDP data for laptop model GA503QM.

Signed-off-by: Denis Benato <denis.benato@linux.dev>
Link: https://patch.msgid.link/20260309183559.433555-2-denis.benato@linux.dev
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2026-03-23 14:48:50 +02:00
Denis Benato
93702ed64f MAINTAINERS: change email address of Denis Benato
I have been using a linux.dev email since that is hugely better than gmail.

Signed-off-by: Denis Benato <denis.benato@linux.dev>
Signed-off-by: Denis Benato <benato.denis96@gmail.com>
Link: https://patch.msgid.link/20260304141102.63732-1-denis.benato@linux.dev
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2026-03-23 14:48:29 +02:00
Alberto Garcia
734eba62cd PM: hibernate: Drain trailing zero pages on userspace restore
Commit 005e8dddd4 ("PM: hibernate: don't store zero pages in the
image file") added an optimization to skip zero-filled pages in the
hibernation image. On restore, zero pages are handled internally by
snapshot_write_next() in a loop that processes them without returning
to the caller.

With the userspace restore interface, writing the last non-zero page
to /dev/snapshot is followed by the SNAPSHOT_ATOMIC_RESTORE ioctl. At
this point there are no more calls to snapshot_write_next() so any
trailing zero pages are not processed, snapshot_image_loaded() fails
because handle->cur is smaller than expected, the ioctl returns -EPERM
and the image is not restored.

The in-kernel restore path is not affected by this because the loop in
load_image() in swap.c calls snapshot_write_next() until it returns 0.
It is this final call that drains any trailing zero pages.

Fixed by calling snapshot_write_next() in snapshot_write_finalize(),
giving the kernel the chance to drain any trailing zero pages.

Fixes: 005e8dddd4 ("PM: hibernate: don't store zero pages in the image file")
Signed-off-by: Alberto Garcia <berto@igalia.com>
Acked-by: Brian Geffon <bgeffon@google.com>
Link: https://patch.msgid.link/ef5a7c5e3e3dbd17dcb20efaa0c53a47a23498bb.1773075892.git.berto@igalia.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2026-03-23 13:33:06 +01:00
Viresh Kumar
6a28fb8cb2 cpufreq: conservative: Reset requested_freq on limits change
A recently reported issue highlighted that the cached requested_freq
is not guaranteed to stay in sync with policy->cur. If the platform
changes the actual CPU frequency after the governor sets one (e.g.
due to platform-specific frequency scaling) and a re-sync occurs
later, policy->cur may diverge from requested_freq.

This can lead to incorrect behavior in the conservative governor.
For example, the governor may assume the CPU is already running at
the maximum frequency and skip further increases even though there
is still headroom.

Avoid this by resetting the cached requested_freq to policy->cur on
detecting a change in policy limits.

Reported-by: Lifeng Zheng <zhenglifeng1@huawei.com>
Tested-by: Lifeng Zheng <zhenglifeng1@huawei.com>
Link: https://lore.kernel.org/all/20260210115458.3493646-1-zhenglifeng1@huawei.com/
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Zhongqiu Han <zhongqiu.han@oss.qualcomm.com>
Cc: All applicable <stable@vger.kernel.org>
Link: https://patch.msgid.link/d846a141a98ac0482f20560fcd7525c0f0ec2f30.1773999467.git.viresh.kumar@linaro.org
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2026-03-23 13:32:57 +01:00
Viresh Kumar
8f13c0c6cb cpufreq: Don't skip cpufreq_frequency_table_cpuinfo()
The commit 6db0f533d3 ("cpufreq: preserve freq_table_sorted
across suspend/hibernate") unintentionally made a change where
cpufreq_frequency_table_cpuinfo() isn't getting called anymore
for old policies getting re-initialized.

This leads to potentially invalid values of policy->max and
policy->cpuinfo_max_freq.

Fix the issue by reverting the original commit and adding the condition
for just the sorting function.

Fixes: 6db0f533d3 ("cpufreq: preserve freq_table_sorted across suspend/hibernate")
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Cc: 6.19+ <stable@vger.kernel.org> # 6.19+
Link: https://patch.msgid.link/65ba5c45749267c82e8a87af3dc788b37a0b3f48.1773998611.git.viresh.kumar@linaro.org
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2026-03-23 13:32:57 +01:00
Nikunj A Dadhania
05243d490b x86/cpu: Enable FSGSBASE early in cpu_init_exception_handling()
Move FSGSBASE enablement from identify_cpu() to cpu_init_exception_handling()
to ensure it is enabled before any exceptions can occur on both boot and
secondary CPUs.

== Background ==

Exception entry code (paranoid_entry()) uses ALTERNATIVE patching based on
X86_FEATURE_FSGSBASE to decide whether to use RDGSBASE/WRGSBASE instructions
or the slower RDMSR/SWAPGS sequence for saving/restoring GSBASE.

On boot CPU, ALTERNATIVE patching happens after enabling FSGSBASE in CR4.
When the feature is available, the code is permanently patched to use
RDGSBASE/WRGSBASE, which require CR4.FSGSBASE=1 to execute without triggering

== Boot Sequence ==

Boot CPU (with CR pinning enabled):
  trap_init()
    cpu_init()                   <- Uses unpatched code (RDMSR/SWAPGS)
      x2apic_setup()
  ...
  arch_cpu_finalize_init()
    identify_boot_cpu()
      identify_cpu()
        cr4_set_bits(X86_CR4_FSGSBASE)  # Enables the feature
	# This becomes part of cr4_pinned_bits
    ...
    alternative_instructions()   <- Patches code to use RDGSBASE/WRGSBASE

Secondary CPUs (with CR pinning enabled):
  start_secondary()
    cr4_init()                   <- Code already patched, CR4.FSGSBASE=1
                                    set implicitly via cr4_pinned_bits

    cpu_init()                   <- exceptions work because FSGSBASE is
                                    already enabled

Secondary CPU (with CR pinning disabled):
  start_secondary()
    cr4_init()                   <- Code already patched, CR4.FSGSBASE=0
    cpu_init()
      x2apic_setup()
        rdmsrq(MSR_IA32_APICBASE)  <- Triggers #VC in SNP guests
          exc_vmm_communication()
            paranoid_entry()       <- Uses RDGSBASE with CR4.FSGSBASE=0
                                      (patched code)
    ...
    ap_starting()
      identify_secondary_cpu()
        identify_cpu()
	  cr4_set_bits(X86_CR4_FSGSBASE)  <- Enables the feature, which is
                                             too late

== CR Pinning ==

Currently, for secondary CPUs, CR4.FSGSBASE is set implicitly through
CR-pinning: the boot CPU sets it during identify_cpu(), it becomes part of
cr4_pinned_bits, and cr4_init() applies those pinned bits to secondary CPUs.
This works but creates an undocumented dependency between cr4_init() and the
pinning mechanism.

== Problem ==

Secondary CPUs boot after alternatives have been applied globally. They
execute already-patched paranoid_entry() code that uses RDGSBASE/WRGSBASE
instructions, which require CR4.FSGSBASE=1. Upcoming changes to CR pinning
behavior will break the implicit dependency, causing secondary CPUs to
generate #UD.

This issue manifests itself on AMD SEV-SNP guests, where the rdmsrq() in
x2apic_setup() triggers a #VC exception early during cpu_init(). The #VC
handler (exc_vmm_communication()) executes the patched paranoid_entry() path.
Without CR4.FSGSBASE enabled, RDGSBASE instructions trigger #UD.

== Fix ==

Enable FSGSBASE explicitly in cpu_init_exception_handling() before loading
exception handlers. This makes the dependency explicit and ensures both
boot and secondary CPUs have FSGSBASE enabled before paranoid_entry()
executes.

Fixes: c82965f9e5 ("x86/entry/64: Handle FSGSBASE enabled paranoid entry/exit")
Reported-by: Borislav Petkov <bp@alien8.de>
Suggested-by: Sohil Mehta <sohil.mehta@intel.com>
Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Sohil Mehta <sohil.mehta@intel.com>
Cc: <stable@kernel.org>
Link: https://patch.msgid.link/20260318075654.1792916-2-nikunj@amd.com
2026-03-23 13:29:50 +01:00
Long Li
c6c56ff975 xfs: remove redundant validation in xlog_recover_attri_commit_pass2
Remove the redundant post-parse validation switch. By the time that
block is reached, xfs_attri_validate() has already guaranteed all name
lengths are non-zero via xfs_attri_validate_namelen(), and
xfs_attri_validate_name_iovec() has already returned -EFSCORRUPTED for
NULL names. For the REMOVE case, attr_value and value_len are
structurally guaranteed to be NULL/zero because the parsing loop only
populates them when value_len != 0. All checks in that switch are
therefore dead code.

Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Long Li <leo.lilong@huawei.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-03-23 11:00:08 +01:00
Long Li
d72f2084e3 xfs: fix ri_total validation in xlog_recover_attri_commit_pass2
The ri_total checks for SET/REPLACE operations are hardcoded to 3,
but xfs_attri_item_size() only emits a value iovec when value_len > 0,
so ri_total is 2 when value_len == 0.

For PPTR_SET/PPTR_REMOVE/PPTR_REPLACE, value_len is validated by
xfs_attri_validate() to be exactly sizeof(struct xfs_parent_rec) and
is never zero, so their hardcoded checks remain correct.

This problem may cause log recovery failures. The following script can be
used to reproduce the problem:

 #!/bin/bash
 mkfs.xfs -f /dev/sda
 mount /dev/sda /mnt/test/
 touch /mnt/test/file
 for i in {1..200}; do
         attr -s "user.attr_$i" -V "value_$i" /mnt/test/file > /dev/null
 done
 echo 1 > /sys/fs/xfs/debug/larp
 echo 1 > /sys/fs/xfs/sda/errortag/larp
 attr -s "user.zero" -V "" /mnt/test/file
 echo 0 > /sys/fs/xfs/sda/errortag/larp
 umount /mnt/test
 mount /dev/sda /mnt/test/  # mount failed

Fix this by deriving the expected count dynamically as "2 + !!value_len"
for SET/REPLACE operations.

Cc: stable@vger.kernel.org # v6.9
Fixes: ad206ae50e ("xfs: check opcode and iovec count match in xlog_recover_attri_commit_pass2")
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Long Li <leo.lilong@huawei.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-03-23 11:00:08 +01:00
Long Li
b854e1c4ef xfs: close crash window in attr dabtree inactivation
When inactivating an inode with node-format extended attributes,
xfs_attr3_node_inactive() invalidates all child leaf/node blocks via
xfs_trans_binval(), but intentionally does not remove the corresponding
entries from their parent node blocks.  The implicit assumption is that
xfs_attr_inactive() will truncate the entire attr fork to zero extents
afterwards, so log recovery will never reach the root node and follow
those stale pointers.

However, if a log shutdown occurs after the leaf/node block cancellations
commit but before the attr bmap truncation commits, this assumption
breaks.  Recovery replays the attr bmap intact (the inode still has
attr fork extents), but suppresses replay of all cancelled leaf/node
blocks, maybe leaving them as stale data on disk.  On the next mount,
xlog_recover_process_iunlinks() retries inactivation and attempts to
read the root node via the attr bmap. If the root node was not replayed,
reading the unreplayed root block triggers a metadata verification
failure immediately; if it was replayed, following its child pointers
to unreplayed child blocks triggers the same failure:

 XFS (pmem0): Metadata corruption detected at
 xfs_da3_node_read_verify+0x53/0x220, xfs_da3_node block 0x78
 XFS (pmem0): Unmount and run xfs_repair
 XFS (pmem0): First 128 bytes of corrupted metadata buffer:
 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
 00000030: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
 00000040: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
 00000050: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
 00000060: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
 00000070: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
 XFS (pmem0): metadata I/O error in "xfs_da_read_buf+0x104/0x190" at daddr 0x78 len 8 error 117

Fix this in two places:

In xfs_attr3_node_inactive(), after calling xfs_trans_binval() on a
child block, immediately remove the entry that references it from the
parent node in the same transaction.  This eliminates the window where
the parent holds a pointer to a cancelled block.  Once all children are
removed, the now-empty root node is converted to a leaf block within the
same transaction. This node-to-leaf conversion is necessary for crash
safety. If the system shutdown after the empty node is written to the
log but before the second-phase bmap truncation commits, log recovery
will attempt to verify the root block on disk. xfs_da3_node_verify()
does not permit a node block with count == 0; such a block will fail
verification and trigger a metadata corruption shutdown. on the other
hand, leaf blocks are allowed to have this transient state.

In xfs_attr_inactive(), split the attr fork truncation into two explicit
phases.  First, truncate all extents beyond the root block (the child
extents whose parent references have already been removed above).
Second, invalidate the root block and truncate the attr bmap to zero in
a single transaction.  The two operations in the second phase must be
atomic: as long as the attr bmap has any non-zero length, recovery can
follow it to the root block, so the root block invalidation must commit
together with the bmap-to-zero truncation.

Fixes: 1da177e4c3 ("Linux-2.6.12-rc2")
Cc: stable@vger.kernel.org
Signed-off-by: Long Li <leo.lilong@huawei.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-03-23 10:47:28 +01:00
Long Li
e65bb55d7f xfs: factor out xfs_attr3_leaf_init
Factor out wrapper xfs_attr3_leaf_init function, which exported for
external use.

Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Long Li <leo.lilong@huawei.com>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-03-23 10:47:28 +01:00
Long Li
ce4e789cf3 xfs: factor out xfs_attr3_node_entry_remove
Factor out wrapper xfs_attr3_node_entry_remove function, which
exported for external use.

Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Long Li <leo.lilong@huawei.com>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-03-23 10:47:28 +01:00
Long Li
e942498385 xfs: only assert new size for datafork during truncate extents
The assertion functions properly because we currently only truncate the
attr to a zero size. Any other new size of the attr is not preempted.
Make this assertion is specific to the datafork, preparing for
subsequent patches to truncate the attribute to a non-zero size.

Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Long Li <leo.lilong@huawei.com>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-03-23 10:47:27 +01:00
Ville Syrjälä
bfa71b7a9d drm/i915: Unlink NV12 planes earlier
unlink_nv12_plane() will clobber parts of the plane state
potentially already set up by plane_atomic_check(), so we
must make sure not to call the two in the wrong order.
The problem happens when a plane previously selected as
a Y plane is now configured as a normal plane by user space.
plane_atomic_check() will first compute the proper plane
state based on the userspace request, and unlink_nv12_plane()
later clears some of the state.

This used to work on account of unlink_nv12_plane() skipping
the state clearing based on the plane visibility. But I removed
that check, thinking it was an impossible situation. Now when
that situation happens unlink_nv12_plane() will just WARN
and proceed to clobber the state.

Rather than reverting to the old way of doing things, I think
it's more clear if we unlink the NV12 planes before we even
compute the new plane state.

Cc: stable@vger.kernel.org
Reported-by: Khaled Almahallawy <khaled.almahallawy@intel.com>
Closes: https://lore.kernel.org/intel-gfx/20260212004852.1920270-1-khaled.almahallawy@intel.com/
Tested-by: Khaled Almahallawy <khaled.almahallawy@intel.com>
Fixes: 6a01df2f1b ("drm/i915: Remove pointless visible check in unlink_nv12_plane()")
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patch.msgid.link/20260316163953.12905-2-ville.syrjala@linux.intel.com
Reviewed-by: Uma Shankar <uma.shankar@intel.com>
(cherry picked from commit 017ecd04985573eeeb0745fa2c23896fb22ee0cc)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
2026-03-23 09:06:43 +02:00
Ville Syrjälä
6ad2a661ff drm/i915: Order OP vs. timeout correctly in __wait_for()
Put the barrier() before the OP so that anything we read out in
OP and check in COND will actually be read out after the timeout
has been evaluated.

Currently the only place where we use OP is __intel_wait_for_register(),
but the use there is precisely susceptible to this reordering, assuming
the ktime_*() stuff itself doesn't act as a sufficient barrier:

__intel_wait_for_register(...)
{
	...
	ret = __wait_for(reg_value = intel_uncore_read_notrace(...),
 			 (reg_value & mask) == value, ...);
	...
}

Cc: stable@vger.kernel.org
Fixes: 1c3c1dc66a ("drm/i915: Add compiler barrier to wait_for")
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patch.msgid.link/20260313110740.24620-1-ville.syrjala@linux.intel.com
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
(cherry picked from commit a464bace0482aa9a83e9aa7beefbaf44cd58e6cf)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
2026-03-23 09:06:40 +02:00
Samasth Norway Ananda
08441f10f4 drm/i915/gmbus: fix spurious timeout on 512-byte burst reads
When reading exactly 512 bytes with burst read enabled, the
extra_byte_added path breaks out of the inner do-while without
decrementing len. The outer while(len) then re-enters and gmbus_wait()
times out since all data has been delivered. Decrement len before the
break so the outer loop terminates correctly.

Fixes: d5dc0f43f2 ("drm/i915/gmbus: Enable burst read")
Signed-off-by: Samasth Norway Ananda <samasth.norway.ananda@oracle.com>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patch.msgid.link/20260316231920.135438-2-samasth.norway.ananda@oracle.com
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
(cherry picked from commit 4ab0f09ee73fc853d00466682635f67c531f909c)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
2026-03-23 09:06:36 +02:00
Namjae Jeon
0e55f63dd0 ksmbd: replace hardcoded hdr2_len with offsetof() in smb2_calc_max_out_buf_len()
After this commit (e2b76ab8b5 "ksmbd: add support for read compound"),
response buffer management was changed to use dynamic iov array.
In the new design, smb2_calc_max_out_buf_len() expects the second
argument (hdr2_len) to be the offset of ->Buffer field in the
response structure, not a hardcoded magic number.
Fix the remaining call sites to use the correct offsetof() value.

Cc: stable@vger.kernel.org
Fixes: e2b76ab8b5 ("ksmbd: add support for read compound")
Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-03-22 19:10:22 -05:00
Werner Kasselman
309b44ed68 ksmbd: fix memory leaks and NULL deref in smb2_lock()
smb2_lock() has three error handling issues after list_del() detaches
smb_lock from lock_list at no_check_cl:

1) If vfs_lock_file() returns an unexpected error in the non-UNLOCK
   path, goto out leaks smb_lock and its flock because the out:
   handler only iterates lock_list and rollback_list, neither of
   which contains the detached smb_lock.

2) If vfs_lock_file() returns -ENOENT in the UNLOCK path, goto out
   leaks smb_lock and flock for the same reason.  The error code
   returned to the dispatcher is also stale.

3) In the rollback path, smb_flock_init() can return NULL on
   allocation failure.  The result is dereferenced unconditionally,
   causing a kernel NULL pointer dereference.  Add a NULL check to
   prevent the crash and clean up the bookkeeping; the VFS lock
   itself cannot be rolled back without the allocation and will be
   released at file or connection teardown.

Fix cases 1 and 2 by hoisting the locks_free_lock()/kfree() to before
the if(!rc) check in the UNLOCK branch so all exit paths share one
free site, and by freeing smb_lock and flock before goto out in the
non-UNLOCK branch.  Propagate the correct error code in both cases.
Fix case 3 by wrapping the VFS unlock in an if(rlock) guard and adding
a NULL check for locks_free_lock(rlock) in the shared cleanup.

Found via call-graph analysis using sqry.

Fixes: e2f34481b2 ("cifsd: add server-side procedures for SMB3")
Cc: stable@vger.kernel.org
Suggested-by: ChenXiaoSong <chenxiaosong@kylinos.cn>
Signed-off-by: Werner Kasselman <werner@verivus.com>
Reviewed-by: ChenXiaoSong <chenxiaosong@kylinos.cn>
Acked-by: Namjae Jeon <linkinjeon@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-03-22 17:15:00 -05:00
Werner Kasselman
48623ec358 ksmbd: fix use-after-free and NULL deref in smb_grant_oplock()
smb_grant_oplock() has two issues in the oplock publication sequence:

1) opinfo is linked into ci->m_op_list (via opinfo_add) before
   add_lease_global_list() is called.  If add_lease_global_list()
   fails (kmalloc returns NULL), the error path frees the opinfo
   via __free_opinfo() while it is still linked in ci->m_op_list.
   Concurrent m_op_list readers (opinfo_get_list, or direct iteration
   in smb_break_all_levII_oplock) dereference the freed node.

2) opinfo->o_fp is assigned after add_lease_global_list() publishes
   the opinfo on the global lease list.  A concurrent
   find_same_lease_key() can walk the lease list and dereference
   opinfo->o_fp->f_ci while o_fp is still NULL.

Fix by restructuring the publication sequence to eliminate post-publish
failure:

- Set opinfo->o_fp before any list publication (fixes NULL deref).
- Preallocate lease_table via alloc_lease_table() before opinfo_add()
  so add_lease_global_list() becomes infallible after publication.
- Keep the original m_op_list publication order (opinfo_add before
  lease list) so concurrent opens via same_client_has_lease() and
  opinfo_get_list() still see the in-flight grant.
- Use opinfo_put() instead of __free_opinfo() on err_out so that
  the RCU-deferred free path is used.

This also requires splitting add_lease_global_list() to take a
preallocated lease_table and changing its return type from int to void,
since it can no longer fail.

Fixes: 1dfd062caa ("ksmbd: fix use-after-free by using call_rcu() for oplock_info")
Cc: stable@vger.kernel.org
Signed-off-by: Werner Kasselman <werner@verivus.com>
Reviewed-by: ChenXiaoSong <chenxiaosong@kylinos.cn>
Acked-by: Namjae Jeon <linkinjeon@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-03-22 17:15:00 -05:00
Hyunwoo Kim
9bbb19d21d ksmbd: do not expire session on binding failure
When a multichannel session binding request fails (e.g. wrong password),
the error path unconditionally sets sess->state = SMB2_SESSION_EXPIRED.
However, during binding, sess points to the target session looked up via
ksmbd_session_lookup_slowpath() -- which belongs to another connection's
user. This allows a remote attacker to invalidate any active session by
simply sending a binding request with a wrong password (DoS).

Fix this by skipping session expiration when the failed request was
a binding attempt, since the session does not belong to the current
connection. The reference taken by ksmbd_session_lookup_slowpath() is
still correctly released via ksmbd_user_session_put().

Cc: stable@vger.kernel.org
Signed-off-by: Hyunwoo Kim <imv4bel@gmail.com>
Acked-by: Namjae Jeon <linkinjeon@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-03-22 17:15:00 -05:00
Linus Torvalds
c369299895 Linux 7.0-rc5 2026-03-22 14:42:17 -07:00
Arnaldo Carvalho de Melo
493ad070cb tools headers: Synchronize linux/build_bug.h with the kernel sources
To pick up the changes in:

  6ffd853b0b ("build_bug.h: correct function parameters names in kernel-doc")

That just add some comments, addressing this perf tools build warning:

  Warning: Kernel ABI header differences:
    diff -u tools/include/linux/build_bug.h include/linux/build_bug.h

Please take a look at tools/include/uapi/README for further info on this
synchronization process.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ian Rogers <irogers@google.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2026-03-22 18:34:39 -03:00
Arnaldo Carvalho de Melo
0a8b2a0857 tools headers UAPI: Sync x86's asm/kvm.h with the kernel sources
To pick the changes in:

  e2ffe85b6d ("KVM: x86: Introduce KVM_X86_QUIRK_VMCS12_ALLOW_FREEZE_IN_SMM")

That just rebuilds kvm-stat.c on x86, no change in functionality.

This silences these perf build warning:

  Warning: Kernel ABI header differences:
    diff -u tools/arch/x86/include/uapi/asm/kvm.h arch/x86/include/uapi/asm/kvm.h

Please see tools/include/uapi/README for further details.

Cc: Jim Mattson <jmattson@google.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2026-03-22 18:31:54 -03:00
Arnaldo Carvalho de Melo
3c71ae8ec9 tools headers UAPI: Sync linux/kvm.h with the kernel sources
To pick the changes in:

  da142f3d37 ("KVM: Remove subtle "struct kvm_stats_desc" pseudo-overlay")

That just rebuilds perf, as these patches don't add any new KVM ioctl to
be harvested for the 'perf trace' ioctl syscall argument beautifiers.

This addresses this perf build warning:

  Warning: Kernel ABI header differences:
    diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h

Please see tools/include/uapi/README for further details.

Cc: Sean Christopherson <seanjc@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2026-03-22 18:31:54 -03:00
Arnaldo Carvalho de Melo
4ddd7588fa tools arch x86: Sync the msr-index.h copy with the kernel sources
To pick up the changes from these csets:

  9073428bb2 ("x86/sev: Allow IBPB-on-Entry feature for SNP guests")

That cause no changes to tooling as it doesn't include a new MSR to be
captured by the tools/perf/trace/beauty/tracepoints/x86_msr.sh script.

Just silences this perf build warning:

  Warning: Kernel ABI header differences:
    diff -u tools/arch/x86/include/asm/msr-index.h arch/x86/include/asm/msr-index.h

Cc: Borislav Petkov (AMD) <bp@alien8.de>
Cc: Kim Phillips <kim.phillips@amd.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2026-03-22 18:31:54 -03:00
Mikko Perttunen
ec69c9e883 i2c: tegra: Don't mark devices with pins as IRQ safe
I2C devices with associated pinctrl states (DPAUX I2C controllers)
will change pinctrl state during runtime PM. This requires taking
a mutex, so these devices cannot be marked as IRQ safe.

Add PINCTRL as dependency to avoid build errors.

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Reported-by: Russell King <rmk+kernel@armlinux.org.uk>
Link: https://lore.kernel.org/all/E1vsNBv-00000009nfA-27ZK@rmk-PC.armlinux.org.uk/
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2026-03-22 11:37:58 -07:00
Linus Torvalds
d5273fd3ca Merge tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Pull bpf fixes from Alexei Starovoitov:

 - Fix how linked registers track zero extension of subregisters (Daniel
   Borkmann)

 - Fix unsound scalar fork for OR instructions (Daniel Wade)

 - Fix exception exit lock check for subprogs (Ihor Solodrai)

 - Fix undefined behavior in interpreter for SDIV/SMOD instructions
   (Jenny Guanni Qu)

 - Release module's BTF when module is unloaded (Kumar Kartikeya
   Dwivedi)

 - Fix constant blinding for PROBE_MEM32 instructions (Sachin Kumar)

 - Reset register ID for END instructions to prevent incorrect value
   tracking (Yazhou Tang)

* tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf:
  selftests/bpf: Add a test cases for sync_linked_regs regarding zext propagation
  bpf: Fix sync_linked_regs regarding BPF_ADD_CONST32 zext propagation
  selftests/bpf: Add tests for maybe_fork_scalars() OR vs AND handling
  bpf: Fix unsound scalar forking in maybe_fork_scalars() for BPF_OR
  selftests/bpf: Add tests for sdiv32/smod32 with INT_MIN dividend
  bpf: Fix undefined behavior in interpreter sdiv/smod for INT_MIN
  selftests/bpf: Add tests for bpf_throw lock leak from subprogs
  bpf: Fix exception exit lock checking for subprogs
  bpf: Release module BTF IDR before module unload
  selftests/bpf: Fix pkg-config call on static builds
  bpf: Fix constant blinding for PROBE_MEM32 stores
  selftests/bpf: Add test for BPF_END register ID reset
  bpf: Reset register ID for BPF_END value tracking
2026-03-22 11:16:06 -07:00
Linus Torvalds
ac57fa9faf Merge tag 'trace-v7.0-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace
Pull tracing fixes from Steven Rostedt:

 - Revert "tracing: Remove pid in task_rename tracing output"

   A change was made to remove the pid field from the task_rename event
   because it was thought that it was always done for the current task
   and recording the pid would be redundant. This turned out to be
   incorrect and there are a few corner case where this is not true and
   caused some regressions in tooling.

 - Fix the reading from user space for migration

   The reading of user space uses a seq lock type of logic where it uses
   a per-cpu temporary buffer and disables migration, then enables
   preemption, does the copy from user space, disables preemption,
   enables migration and checks if there was any schedule switches while
   preemption was enabled. If there was a context switch, then it is
   considered that the per-cpu buffer could be corrupted and it tries
   again. There's a protection check that tests if it takes a hundred
   tries, it issues a warning and exits out to prevent a live lock.

   This was triggered because the task was selected by the load balancer
   to be migrated to another CPU, every time preemption is enabled the
   migration task would schedule in try to migrate the task but can't
   because migration is disabled and let it run again. This caused the
   scheduler to schedule out the task every time it enabled preemption
   and made the loop never exit (until the 100 iteration test
   triggered).

   Fix this by enabling and disabling preemption and keeping migration
   enabled if the reading from user space needs to be done again. This
   will let the migration thread migrate the task and the copy from user
   space will likely pass on the next iteration.

 - Fix trace_marker copy option freeing

   The "copy_trace_marker" option allows a tracing instance to get a
   copy of a write to the trace_marker file of the top level instance.
   This is managed by a link list protected by RCU. When an instance is
   removed, a check is made if the option is set, and if so
   synchronized_rcu() is called.

   The problem is that an iteration is made to reset all the flags to
   what they were when the instance was created (to perform clean ups)
   was done before the check of the copy_trace_marker option and that
   option was cleared, so the synchronize_rcu() was never called.

   Move the clearing of all the flags after the check of
   copy_trace_marker to do synchronize_rcu() so that the option is still
   set if it was before and the synchronization is performed.

 - Fix entries setting when validating the persistent ring buffer

   When validating the persistent ring buffer on boot up, the number of
   events per sub-buffer is added to the sub-buffer meta page. The
   validator was updating cpu_buffer->head_page (the first sub-buffer of
   the per-cpu buffer) and not the "head_page" variable that was
   iterating the sub-buffers. This was causing the first sub-buffer to
   be assigned the entries for each sub-buffer and not the sub-buffer
   that was supposed to be updated.

 - Use "hash" value to update the direct callers

   When updating the ftrace direct callers, it assigned a temporary
   callback to all the callback functions of the ftrace ops and not just
   the functions represented by the passed in hash. This causes an
   unnecessary slow down of the functions of the ftrace_ops that is not
   being modified. Only update the functions that are going to be
   modified to call the ftrace loop function so that the update can be
   made on those functions.

* tag 'trace-v7.0-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace:
  ftrace: Use hash argument for tmp_ops in update_ftrace_direct_mod
  ring-buffer: Fix to update per-subbuf entries of persistent ring buffer
  tracing: Fix trace_marker copy link list updates
  tracing: Fix failure to read user space from system call trace events
  tracing: Revert "tracing: Remove pid in task_rename tracing output"
2026-03-22 11:10:31 -07:00
Linus Torvalds
11ac4ce3f7 Merge tag 'i2c-for-7.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux
Pull i2c fixes from Wolfram Sang:

 - fix broken I2C communication on Armada 3700 with recovery

 - fix device_node reference leak in probe (fsi)

 - fix NULL-deref when serial string is missing (cp2615)

* tag 'i2c-for-7.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux:
  i2c: pxa: defer reset on Armada 3700 when recovery is used
  i2c: fsi: Fix a potential leak in fsi_i2c_probe()
  i2c: cp2615: fix serial string NULL-deref at probe
2026-03-22 11:05:34 -07:00
Linus Torvalds
8d8bd2a5aa Merge tag 'x86-urgent-2026-03-22' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Ingo Molnar:

 - Improve Qemu MCE-injection behavior by only using AMD SMCA MSRs if
   the feature bit is set

 - Fix the relative path of gettimeofday.c inclusion in vclock_gettime.c

 - Fix a boot crash on UV clusters when a socket is marked as
   'deconfigured' which are mapped to the SOCK_EMPTY node ID by
   the UV firmware, while Linux APIs expect NUMA_NO_NODE.

   The difference being (0xffff [unsigned short ~0]) vs [int -1]

* tag 'x86-urgent-2026-03-22' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/platform/uv: Handle deconfigured sockets
  x86/entry/vdso: Fix path of included gettimeofday.c
  x86/mce/amd: Check SMCA feature bit before accessing SMCA MSRs
2026-03-22 10:54:12 -07:00
Linus Torvalds
ebfd9b7af2 Merge tag 'perf-urgent-2026-03-22' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf fixes from Ingo Molnar:

 - Fix a PMU driver crash on AMD EPYC systems, caused by
   a race condition in x86_pmu_enable()

 - Fix a possible counter-initialization bug in x86_pmu_enable()

 - Fix a counter inheritance bug in inherit_event() and
   __perf_event_read()

 - Fix an Intel PMU driver branch constraints handling bug
   found by UBSAN

 - Fix the Intel PMU driver's new Off-Module Response (OMR)
   support code for Diamond Rapids / Nova lake, to fix a snoop
   information parsing bug

* tag 'perf-urgent-2026-03-22' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf/x86/intel: Fix OMR snoop information parsing issues
  perf/x86/intel: Add missing branch counters constraint apply
  perf: Make sure to use pmu_ctx->pmu for groups
  x86/perf: Make sure to program the counter value for stopped events on migration
  perf/x86: Move event pointer setup earlier in x86_pmu_enable()
2026-03-22 10:31:51 -07:00
Linus Torvalds
dea622e183 Merge tag 'objtool-urgent-2026-03-22' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull objtool fixes from Ingo Molnar:
 "Fix three more livepatching related build environment bugs, and a
  false positive warning with Clang jump tables"

* tag 'objtool-urgent-2026-03-22' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  objtool: Fix Clang jump table detection
  livepatch/klp-build: Fix inconsistent kernel version
  objtool/klp: fix mkstemp() failure with long paths
  objtool/klp: fix data alignment in __clone_symbol()
2026-03-22 10:17:50 -07:00
Linus Torvalds
d56d4a110f Merge tag 'locking-urgent-2026-03-22' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull locking fix from Ingo Molnar:
 "Fix a sparse build error regression in <linux/local_lock_internal.h>
  caused by the locking context-analysis changes"

* tag 'locking-urgent-2026-03-22' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  include/linux/local_lock_internal.h: Make this header file again compatible with sparse
2026-03-22 09:57:20 -07:00
Linus Torvalds
b5fddfad34 Merge tag 'irq-urgent-2026-03-22' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull irq fix from Ingo Molnar:
 "Fix a mailbox channel leak in the riscv-rpmi-sysmsi irqchip driver"

* tag 'irq-urgent-2026-03-22' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  irqchip/riscv-rpmi-sysmsi: Fix mailbox channel leak in rpmi_sysmsi_probe()
2026-03-22 09:55:58 -07:00
Luca Leonardo Scorcia
4cfdfeb6ac drm/mediatek: dsi: Store driver data before invoking mipi_dsi_host_register
The call to mipi_dsi_host_register triggers a callback to mtk_dsi_bind,
which uses dev_get_drvdata to retrieve the mtk_dsi struct, so this
structure needs to be stored inside the driver data before invoking it.

As drvdata is currently uninitialized it leads to a crash when
registering the DSI DRM encoder right after acquiring
the mode_config.idr_mutex, blocking all subsequent DRM operations.

Fixes the following crash during mediatek-drm probe (tested on Xiaomi
Smart Clock x04g):

Unable to handle kernel NULL pointer dereference at virtual address
 0000000000000040
[...]
Modules linked in: mediatek_drm(+) drm_display_helper cec drm_client_lib
 drm_dma_helper drm_kms_helper panel_simple
[...]
Call trace:
 drm_mode_object_add+0x58/0x98 (P)
 __drm_encoder_init+0x48/0x140
 drm_encoder_init+0x6c/0xa0
 drm_simple_encoder_init+0x20/0x34 [drm_kms_helper]
 mtk_dsi_bind+0x34/0x13c [mediatek_drm]
 component_bind_all+0x120/0x280
 mtk_drm_bind+0x284/0x67c [mediatek_drm]
 try_to_bring_up_aggregate_device+0x23c/0x320
 __component_add+0xa4/0x198
 component_add+0x14/0x20
 mtk_dsi_host_attach+0x78/0x100 [mediatek_drm]
 mipi_dsi_attach+0x2c/0x50
 panel_simple_dsi_probe+0x4c/0x9c [panel_simple]
 mipi_dsi_drv_probe+0x1c/0x28
 really_probe+0xc0/0x3dc
 __driver_probe_device+0x80/0x160
 driver_probe_device+0x40/0x120
 __device_attach_driver+0xbc/0x17c
 bus_for_each_drv+0x88/0xf0
 __device_attach+0x9c/0x1cc
 device_initial_probe+0x54/0x60
 bus_probe_device+0x34/0xa0
 device_add+0x5b0/0x800
 mipi_dsi_device_register_full+0xdc/0x16c
 mipi_dsi_host_register+0xc4/0x17c
 mtk_dsi_probe+0x10c/0x260 [mediatek_drm]
 platform_probe+0x5c/0xa4
 really_probe+0xc0/0x3dc
 __driver_probe_device+0x80/0x160
 driver_probe_device+0x40/0x120
 __driver_attach+0xc8/0x1f8
 bus_for_each_dev+0x7c/0xe0
 driver_attach+0x24/0x30
 bus_add_driver+0x11c/0x240
 driver_register+0x68/0x130
 __platform_register_drivers+0x64/0x160
 mtk_drm_init+0x24/0x1000 [mediatek_drm]
 do_one_initcall+0x60/0x1d0
 do_init_module+0x54/0x240
 load_module+0x1838/0x1dc0
 init_module_from_file+0xd8/0xf0
 __arm64_sys_finit_module+0x1b4/0x428
 invoke_syscall.constprop.0+0x48/0xc8
 do_el0_svc+0x3c/0xb8
 el0_svc+0x34/0xe8
 el0t_64_sync_handler+0xa0/0xe4
 el0t_64_sync+0x198/0x19c
Code: 52800022 941004ab 2a0003f3 37f80040 (29005a80)

Fixes: e4732b590a ("drm/mediatek: dsi: Register DSI host after acquiring clocks and PHY")
Signed-off-by: Luca Leonardo Scorcia <l.scorcia@gmail.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Reviewed-by: CK Hu <ck.hu@mediatek.com>
Link: https://patchwork.kernel.org/project/dri-devel/patch/20260225094047.76780-1-l.scorcia@gmail.com/
Signed-off-by: Chun-Kuang Hu <chunkuang.hu@kernel.org>
2026-03-22 14:15:44 +00:00
Song Liu
c7f27a8ab9 workqueue: Fix false positive stall reports
On weakly ordered architectures (e.g., arm64), the lockless check in
wq_watchdog_timer_fn() can observe a reordering between the worklist
insertion and the last_progress_ts update. Specifically, the watchdog
can see a non-empty worklist (from a list_add) while reading a stale
last_progress_ts value, causing a false positive stall report.

This was confirmed by reading pool->last_progress_ts again after holding
pool->lock in wq_watchdog_timer_fn():

  workqueue watchdog: pool 7 false positive detected!
    lockless_ts=4784580465 locked_ts=4785033728
    diff=453263ms worklist_empty=0

To avoid slowing down the hot path (queue_work, etc.), recheck
last_progress_ts with pool->lock held. This will eliminate the false
positive with minimal overhead.

Remove two extra empty lines in wq_watchdog_timer_fn() as we are on it.

Fixes: 82607adcf9 ("workqueue: implement lockup detector")
Cc: stable@vger.kernel.org # v4.5+
Assisted-by: claude-code:claude-opus-4-6
Signed-off-by: Song Liu <song@kernel.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-03-21 18:34:59 -10:00
Sergey Senozhatsky
b0377ee804 zram: do not slot_free() written-back slots
slot_free() basically completely resets the slots by clearing all of
its flags and attributes.  While zram_writeback_complete() restores
some of flags back (those that are necessary for async read
decompression) we still lose a lot of slot's metadata.  For example,
slot's ac-time, or ZRAM_INCOMPRESSIBLE.

More importantly, restoring flags/attrs requires extra attention as
some of the flags are directly affecting zram device stats.  And the
original code did not pay that attention.  Namely ZRAM_HUGE slots
handling in zram_writeback_complete().  The call to slot_free() would
decrement ->huge_pages, however when zram_writeback_complete() restored
the slot's ZRAM_HUGE flag, it would not get reflected in an incremented
->huge_pages.  So when the slot would finally get freed, slot_free()
would decrement ->huge_pages again, leading to underflow.

Fix this by open-coding the required memory free and stats updates in
zram_writeback_complete(), rather than calling the destructive
slot_free().  Since we now preserve the ZRAM_HUGE flag on written-back
slots (for the deferred decompression path), we also update slot_free()
to skip decrementing ->huge_pages if ZRAM_WB is set.

Link: https://lkml.kernel.org/r/20260320023143.2372879-1-senozhatsky@chromium.org
Link: https://lkml.kernel.org/r/20260319034912.1894770-1-senozhatsky@chromium.org
Fixes: d38fab605c ("zram: introduce compressed data writeback")
Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Acked-by: Minchan Kim <minchan@kernel.org>
Cc: Brian Geffon <bgeffon@google.com>
Cc: Richard Chang <richardycc@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-03-21 17:36:33 -07:00
SeongJae Park
26f775a054 mm/damon/core: avoid use of half-online-committed context
One major usage of damon_call() is online DAMON parameters update.  It is
done by calling damon_commit_ctx() inside the damon_call() callback
function.  damon_commit_ctx() can fail for two reasons: 1) invalid
parameters and 2) internal memory allocation failures.  In case of
failures, the damon_ctx that attempted to be updated (commit destination)
can be partially updated (or, corrupted from a perspective), and therefore
shouldn't be used anymore.  The function only ensures the damon_ctx object
can safely deallocated using damon_destroy_ctx().

The API callers are, however, calling damon_commit_ctx() only after
asserting the parameters are valid, to avoid damon_commit_ctx() fails due
to invalid input parameters.  But it can still theoretically fail if the
internal memory allocation fails.  In the case, DAMON may run with the
partially updated damon_ctx.  This can result in unexpected behaviors
including even NULL pointer dereference in case of damos_commit_dests()
failure [1].  Such allocation failure is arguably too small to fail, so
the real world impact would be rare.  But, given the bad consequence, this
needs to be fixed.

Avoid such partially-committed (maybe-corrupted) damon_ctx use by saving
the damon_commit_ctx() failure on the damon_ctx object.  For this,
introduce damon_ctx->maybe_corrupted field.  damon_commit_ctx() sets it
when it is failed.  kdamond_call() checks if the field is set after each
damon_call_control->fn() is executed.  If it is set, ignore remaining
callback requests and return.  All kdamond_call() callers including
kdamond_fn() also check the maybe_corrupted field right after
kdamond_call() invocations.  If the field is set, break the kdamond_fn()
main loop so that DAMON sill doesn't use the context that might be
corrupted.

[sj@kernel.org: let kdamond_call() with cancel regardless of maybe_corrupted]
  Link: https://lkml.kernel.org/r/20260320031553.2479-1-sj@kernel.org
  Link: https://sashiko.dev/#/patchset/20260319145218.86197-1-sj%40kernel.org
Link: https://lkml.kernel.org/r/20260319145218.86197-1-sj@kernel.org
Link: https://lore.kernel.org/20260319043309.97966-1-sj@kernel.org [1]
Fixes: 3301f1861d ("mm/damon/sysfs: handle commit command using damon_call()")
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: <stable@vger.kernel.org>	[6.15+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-03-21 17:36:33 -07:00
Lorenzo Stoakes (Oracle)
3a206a8649 mm/rmap: clear vma->anon_vma on error
Commit 542eda1a83 ("mm/rmap: improve anon_vma_clone(),
unlink_anon_vmas() comments, add asserts") alters the way errors are
handled, but overlooked one important aspect of clean up.

When a VMA encounters an error state in anon_vma_clone() (that is, on
attempted allocation of anon_vma_chain objects), it cleans up partially
established state in cleanup_partial_anon_vmas(), before returning an
error.

However, this occurs prior to anon_vma->num_active_vmas being incremented,
and it also fails to clear the VMA's vma->anon_vma field, which remains in
place.

This is immediately an inconsistent state, because
anon_vma->num_active_vmas is supposed to track the number of VMAs whose
vma->anon_vma field references that anon_vma, and now that count is
off-by-negative-1 for each VMA for which this error state has occurred.

When VMAs are unlinked from this anon_vma, unlink_anon_vmas() will
eventually underflow anon_vma->num_active_vmas, which will trigger a
warning.

This will always eventually happen, as we unlink anon_vma's at process
teardown.

It could also cause maybe_reuse_anon_vma() to incorrectly permit the reuse
of an anon_vma which has active VMAs attached, which will lead to a
persistently invalid state.

The solution is to clear the VMA's anon_vma field when we clean up partial
state, as the fact we are doing so indicates clearly that the VMA is not
correctly integrated into the anon_vma tree and thus this field is
invalid.

Link: https://lkml.kernel.org/r/20260318122632.63404-1-ljs@kernel.org
Fixes: 542eda1a83 ("mm/rmap: improve anon_vma_clone(), unlink_anon_vmas() comments, add asserts")
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Reported-by: Sasha Levin <sashal@kernel.org>
Closes: https://lore.kernel.org/linux-mm/20260302151547.2389070-1-sashal@kernel.org/
Reported-by: Jiakai Xu <jiakaipeanut@gmail.com>
Closes: https://lore.kernel.org/linux-mm/CAFb8wJvRhatRD-9DVmr5v5pixTMPEr3UKjYBJjCd09OfH55CKg@mail.gmail.com/
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Acked-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
Tested-by: Jiakai Xu <jiakaipeanut@gmail.com>
Acked-by: Harry Yoo <harry.yoo@oracle.com>
Cc: Jann Horn <jannh@google.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Sasha Levin (Microsoft) <sashal@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-03-21 17:36:33 -07:00
Cheng-Yang Chou
db08b1940f sched_ext: Fix inconsistent NUMA node lookup in scx_select_cpu_dfl()
In the WAKE_SYNC path of scx_select_cpu_dfl(), waker_node was computed
with cpu_to_node(), while node (for prev_cpu) was computed with
scx_cpu_node_if_enabled(). When scx_builtin_idle_per_node is disabled,
idle_cpumask(waker_node) is called with a real node ID even though
per-node idle tracking is disabled, resulting in undefined behavior.

Fix by using scx_cpu_node_if_enabled() for waker_node as well, ensuring
both variables are computed consistently.

Fixes: 48849271e6 ("sched_ext: idle: Per-node idle cpumasks")
Cc: stable@vger.kernel.org # v6.15+
Signed-off-by: Cheng-Yang Chou <yphbchou0911@gmail.com>
Reviewed-by: Andrea Righi <arighi@nvidia.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-03-21 14:22:37 -10:00
Linus Torvalds
d723091c8c Merge tag 'driver-core-7.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/driver-core/driver-core
Pull driver core fixes from Danilo Krummrich:

 - Generalize driver_override in the driver core, providing a common
   sysfs implementation and concurrency-safe accessors for bus
   implementations

 - Do not use driver_override as IRQ name in the hwmon axi-fan driver

 - Remove an unnecessary driver_override check in sh platform_early

 - Migrate the platform bus to use the generic driver_override
   infrastructure, fixing a UAF condition caused by accessing the
   driver_override field without proper locking in the platform_match()
   callback

* tag 'driver-core-7.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/driver-core/driver-core:
  driver core: platform: use generic driver_override infrastructure
  sh: platform_early: remove pdev->driver_override check
  hwmon: axi-fan: don't use driver_override as IRQ name
  docs: driver-model: document driver_override
  driver core: generalize driver_override in struct device
2026-03-21 16:59:09 -07:00
Jiri Olsa
50b35c9e50 ftrace: Use hash argument for tmp_ops in update_ftrace_direct_mod
The modify logic registers temporary ftrace_ops object (tmp_ops) to trigger
the slow path for all direct callers to be able to safely modify attached
addresses.

At the moment we use ops->func_hash for tmp_ops filter, which represents all
the systems attachments. It's faster to use just the passed hash filter, which
contains only the modified sites and is always a subset of the ops->func_hash.

Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Menglong Dong <menglong8.dong@gmail.com>
Cc: Song Liu <song@kernel.org>
Link: https://patch.msgid.link/20260312123738.129926-1-jolsa@kernel.org
Fixes: e93672f770 ("ftrace: Add update_ftrace_direct_mod function")
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2026-03-21 16:51:04 -04:00
Masami Hiramatsu (Google)
f35dbac694 ring-buffer: Fix to update per-subbuf entries of persistent ring buffer
Since the validation loop in rb_meta_validate_events() updates the same
cpu_buffer->head_page->entries, the other subbuf entries are not updated.
Fix to use head_page to update the entries field, since it is the cursor
in this loop.

Cc: stable@vger.kernel.org
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Ian Rogers <irogers@google.com>
Fixes: 5f3b6e839f ("ring-buffer: Validate boot range memory events")
Link: https://patch.msgid.link/177391153882.193994.17158784065013676533.stgit@mhiramat.tok.corp.google.com
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2026-03-21 16:47:28 -04:00
Steven Rostedt
07183aac4a tracing: Fix trace_marker copy link list updates
When the "copy_trace_marker" option is enabled for an instance, anything
written into /sys/kernel/tracing/trace_marker is also copied into that
instances buffer. When the option is set, that instance's trace_array
descriptor is added to the marker_copies link list. This list is protected
by RCU, as all iterations uses an RCU protected list traversal.

When the instance is deleted, all the flags that were enabled are cleared.
This also clears the copy_trace_marker flag and removes the trace_array
descriptor from the list.

The issue is after the flags are called, a direct call to
update_marker_trace() is performed to clear the flag. This function
returns true if the state of the flag changed and false otherwise. If it
returns true here, synchronize_rcu() is called to make sure all readers
see that its removed from the list.

But since the flag was already cleared, the state does not change and the
synchronization is never called, leaving a possible UAF bug.

Move the clearing of all flags below the updating of the copy_trace_marker
option which then makes sure the synchronization is performed.

Also use the flag for checking the state in update_marker_trace() instead
of looking at if the list is empty.

Cc: stable@vger.kernel.org
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Link: https://patch.msgid.link/20260318185512.1b6c7db4@gandalf.local.home
Fixes: 7b382efd5e ("tracing: Allow the top level trace_marker to write into another instances")
Reported-by: Sasha Levin <sashal@kernel.org>
Closes: https://lore.kernel.org/all/20260225133122.237275-1-sashal@kernel.org/
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2026-03-21 16:43:53 -04:00
Steven Rostedt
edca33a562 tracing: Fix failure to read user space from system call trace events
The system call trace events call trace_user_fault_read() to read the user
space part of some system calls. This is done by grabbing a per-cpu
buffer, disabling migration, enabling preemption, calling
copy_from_user(), disabling preemption, enabling migration and checking if
the task was preempted while preemption was enabled. If it was, the buffer
is considered corrupted and it tries again.

There's a safety mechanism that will fail out of this loop if it fails 100
times (with a warning). That warning message was triggered in some
pi_futex stress tests. Enabling the sched_switch trace event and
traceoff_on_warning, showed the problem:

 pi_mutex_hammer-1375    [006] d..21   138.981648: sched_switch: prev_comm=pi_mutex_hammer prev_pid=1375 prev_prio=95 prev_state=R+ ==> next_comm=migration/6 next_pid=47 next_prio=0
     migration/6-47      [006] d..2.   138.981651: sched_switch: prev_comm=migration/6 prev_pid=47 prev_prio=0 prev_state=S ==> next_comm=pi_mutex_hammer next_pid=1375 next_prio=95
 pi_mutex_hammer-1375    [006] d..21   138.981656: sched_switch: prev_comm=pi_mutex_hammer prev_pid=1375 prev_prio=95 prev_state=R+ ==> next_comm=migration/6 next_pid=47 next_prio=0
     migration/6-47      [006] d..2.   138.981659: sched_switch: prev_comm=migration/6 prev_pid=47 prev_prio=0 prev_state=S ==> next_comm=pi_mutex_hammer next_pid=1375 next_prio=95
 pi_mutex_hammer-1375    [006] d..21   138.981664: sched_switch: prev_comm=pi_mutex_hammer prev_pid=1375 prev_prio=95 prev_state=R+ ==> next_comm=migration/6 next_pid=47 next_prio=0
     migration/6-47      [006] d..2.   138.981667: sched_switch: prev_comm=migration/6 prev_pid=47 prev_prio=0 prev_state=S ==> next_comm=pi_mutex_hammer next_pid=1375 next_prio=95
 pi_mutex_hammer-1375    [006] d..21   138.981671: sched_switch: prev_comm=pi_mutex_hammer prev_pid=1375 prev_prio=95 prev_state=R+ ==> next_comm=migration/6 next_pid=47 next_prio=0
     migration/6-47      [006] d..2.   138.981675: sched_switch: prev_comm=migration/6 prev_pid=47 prev_prio=0 prev_state=S ==> next_comm=pi_mutex_hammer next_pid=1375 next_prio=95
 pi_mutex_hammer-1375    [006] d..21   138.981679: sched_switch: prev_comm=pi_mutex_hammer prev_pid=1375 prev_prio=95 prev_state=R+ ==> next_comm=migration/6 next_pid=47 next_prio=0
     migration/6-47      [006] d..2.   138.981682: sched_switch: prev_comm=migration/6 prev_pid=47 prev_prio=0 prev_state=S ==> next_comm=pi_mutex_hammer next_pid=1375 next_prio=95
 pi_mutex_hammer-1375    [006] d..21   138.981687: sched_switch: prev_comm=pi_mutex_hammer prev_pid=1375 prev_prio=95 prev_state=R+ ==> next_comm=migration/6 next_pid=47 next_prio=0
     migration/6-47      [006] d..2.   138.981690: sched_switch: prev_comm=migration/6 prev_pid=47 prev_prio=0 prev_state=S ==> next_comm=pi_mutex_hammer next_pid=1375 next_prio=95
 pi_mutex_hammer-1375    [006] d..21   138.981695: sched_switch: prev_comm=pi_mutex_hammer prev_pid=1375 prev_prio=95 prev_state=R+ ==> next_comm=migration/6 next_pid=47 next_prio=0
     migration/6-47      [006] d..2.   138.981698: sched_switch: prev_comm=migration/6 prev_pid=47 prev_prio=0 prev_state=S ==> next_comm=pi_mutex_hammer next_pid=1375 next_prio=95
 pi_mutex_hammer-1375    [006] d..21   138.981703: sched_switch: prev_comm=pi_mutex_hammer prev_pid=1375 prev_prio=95 prev_state=R+ ==> next_comm=migration/6 next_pid=47 next_prio=0
     migration/6-47      [006] d..2.   138.981706: sched_switch: prev_comm=migration/6 prev_pid=47 prev_prio=0 prev_state=S ==> next_comm=pi_mutex_hammer next_pid=1375 next_prio=95
 pi_mutex_hammer-1375    [006] d..21   138.981711: sched_switch: prev_comm=pi_mutex_hammer prev_pid=1375 prev_prio=95 prev_state=R+ ==> next_comm=migration/6 next_pid=47 next_prio=0
     migration/6-47      [006] d..2.   138.981714: sched_switch: prev_comm=migration/6 prev_pid=47 prev_prio=0 prev_state=S ==> next_comm=pi_mutex_hammer next_pid=1375 next_prio=95
 pi_mutex_hammer-1375    [006] d..21   138.981719: sched_switch: prev_comm=pi_mutex_hammer prev_pid=1375 prev_prio=95 prev_state=R+ ==> next_comm=migration/6 next_pid=47 next_prio=0
     migration/6-47      [006] d..2.   138.981722: sched_switch: prev_comm=migration/6 prev_pid=47 prev_prio=0 prev_state=S ==> next_comm=pi_mutex_hammer next_pid=1375 next_prio=95
 pi_mutex_hammer-1375    [006] d..21   138.981727: sched_switch: prev_comm=pi_mutex_hammer prev_pid=1375 prev_prio=95 prev_state=R+ ==> next_comm=migration/6 next_pid=47 next_prio=0
     migration/6-47      [006] d..2.   138.981730: sched_switch: prev_comm=migration/6 prev_pid=47 prev_prio=0 prev_state=S ==> next_comm=pi_mutex_hammer next_pid=1375 next_prio=95
 pi_mutex_hammer-1375    [006] d..21   138.981735: sched_switch: prev_comm=pi_mutex_hammer prev_pid=1375 prev_prio=95 prev_state=R+ ==> next_comm=migration/6 next_pid=47 next_prio=0
     migration/6-47      [006] d..2.   138.981738: sched_switch: prev_comm=migration/6 prev_pid=47 prev_prio=0 prev_state=S ==> next_comm=pi_mutex_hammer next_pid=1375 next_prio=95

What happened was the task 1375 was flagged to be migrated. When
preemption was enabled, the migration thread woke up to migrate that task,
but failed because migration for that task was disabled. This caused the
loop to fail to exit because the task scheduled out while trying to read
user space.

Every time the task enabled preemption the migration thread would schedule
in, try to migrate the task, fail and let the task continue. But because
the loop would only enable preemption with migration disabled, it would
always fail because each time it enabled preemption to read user space,
the migration thread would try to migrate it.

To solve this, when the loop fails to read user space without being
scheduled out, enabled and disable preemption with migration enabled. This
will allow the migration task to successfully migrate the task and the
next loop should succeed to read user space without being scheduled out.

Cc: stable@vger.kernel.org
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Link: https://patch.msgid.link/20260316130734.1858a998@gandalf.local.home
Fixes: 64cf7d058a ("tracing: Have trace_marker use per-cpu data to read user space")
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2026-03-21 16:42:36 -04:00
Xuewen Yan
a6f22e50c7 tracing: Revert "tracing: Remove pid in task_rename tracing output"
This reverts commit e3f6a42272.

The commit says that the tracepoint only deals with the current task,
however the following case is not current task:

comm_write() {
    p = get_proc_task(inode);
    if (!p)
        return -ESRCH;

    if (same_thread_group(current, p))
        set_task_comm(p, buffer);
}
where set_task_comm() calls __set_task_comm() which records
the update of p and not current.

So revert the patch to show pid.

Cc: <mhiramat@kernel.org>
Cc: <mathieu.desnoyers@efficios.com>
Cc: <elver@google.com>
Cc: <kees@kernel.org>
Link: https://patch.msgid.link/20260306075954.4533-1-xuewen.yan@unisoc.com
Fixes: e3f6a42272 ("tracing: Remove pid in task_rename tracing output")
Reported-by: Guohua Yan <guohua.yan@unisoc.com>
Signed-off-by: Xuewen Yan <xuewen.yan@unisoc.com>
Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2026-03-21 16:41:18 -04:00
Daniel Borkmann
4a04d13576 selftests/bpf: Add a test cases for sync_linked_regs regarding zext propagation
Add multiple test cases for linked register tracking with alu32 ops:

  - Add a test that checks sync_linked_regs() regarding reg->id (the linked
    target register) for BPF_ADD_CONST32 rather than known_reg->id (the
    branch register).

  - Add a test case for linked register tracking that exposes the cross-type
    sync_linked_regs() bug. One register uses alu32 (w7 += 1, BPF_ADD_CONST32)
    and another uses alu64 (r8 += 2, BPF_ADD_CONST64), both linked to the
    same base register.

  - Add a test case that exercises regsafe() path pruning when two execution
    paths reach the same program point with linked registers carrying
    different ADD_CONST flags (BPF_ADD_CONST32 from alu32 vs BPF_ADD_CONST64
    from alu64). This particular test passes with and without the fix since
    the pruning will fail due to different ranges, but it would still be
    useful to carry this one as a regression test for the unreachable div
    by zero.

With the fix applied all the tests pass:

  # LDLIBS=-static PKG_CONFIG='pkg-config --static' ./vmtest.sh -- ./test_progs -t verifier_linked_scalars
  [...]
  ./test_progs -t verifier_linked_scalars
  #602/1   verifier_linked_scalars/scalars: find linked scalars:OK
  #602/2   verifier_linked_scalars/sync_linked_regs_preserves_id:OK
  #602/3   verifier_linked_scalars/scalars_neg:OK
  #602/4   verifier_linked_scalars/scalars_neg_sub:OK
  #602/5   verifier_linked_scalars/scalars_neg_alu32_add:OK
  #602/6   verifier_linked_scalars/scalars_neg_alu32_sub:OK
  #602/7   verifier_linked_scalars/scalars_pos:OK
  #602/8   verifier_linked_scalars/scalars_sub_neg_imm:OK
  #602/9   verifier_linked_scalars/scalars_double_add:OK
  #602/10  verifier_linked_scalars/scalars_sync_delta_overflow:OK
  #602/11  verifier_linked_scalars/scalars_sync_delta_overflow_large_range:OK
  #602/12  verifier_linked_scalars/scalars_alu32_big_offset:OK
  #602/13  verifier_linked_scalars/scalars_alu32_basic:OK
  #602/14  verifier_linked_scalars/scalars_alu32_wrap:OK
  #602/15  verifier_linked_scalars/scalars_alu32_zext_linked_reg:OK
  #602/16  verifier_linked_scalars/scalars_alu32_alu64_cross_type:OK
  #602/17  verifier_linked_scalars/scalars_alu32_alu64_regsafe_pruning:OK
  #602/18  verifier_linked_scalars/alu32_negative_offset:OK
  #602/19  verifier_linked_scalars/spurious_precision_marks:OK
  #602     verifier_linked_scalars:OK
  Summary: 1/19 PASSED, 0 SKIPPED, 0 FAILED

Co-developed-by: Puranjay Mohan <puranjay@kernel.org>
Signed-off-by: Puranjay Mohan <puranjay@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20260319211507.213816-2-daniel@iogearbox.net
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-03-21 13:19:40 -07:00
Daniel Borkmann
bc308be380 bpf: Fix sync_linked_regs regarding BPF_ADD_CONST32 zext propagation
Jenny reported that in sync_linked_regs() the BPF_ADD_CONST32 flag is
checked on known_reg (the register narrowed by a conditional branch)
instead of reg (the linked target register created by an alu32 operation).

Example case with reg:

  1. r6 = bpf_get_prandom_u32()
  2. r7 = r6 (linked, same id)
  3. w7 += 5 (alu32 -- r7 gets BPF_ADD_CONST32, zero-extended by CPU)
  4. if w6 < 0xFFFFFFFC goto safe (narrows r6 to [0xFFFFFFFC, 0xFFFFFFFF])
  5. sync_linked_regs() propagates to r7 but does NOT call zext_32_to_64()
  6. Verifier thinks r7 is [0x100000001, 0x100000004] instead of [1, 4]

Since known_reg above does not have BPF_ADD_CONST32 set above, zext_32_to_64()
is never called on alu32-derived linked registers. This causes the verifier
to track incorrect 64-bit bounds, while the CPU correctly zero-extends the
32-bit result.

The code checking known_reg->id was correct however (see scalars_alu32_wrap
selftest case), but the real fix needs to handle both directions - zext
propagation should be done when either register has BPF_ADD_CONST32, since
the linked relationship involves a 32-bit operation regardless of which
side has the flag.

Example case with known_reg (exercised also by scalars_alu32_wrap):

  1. r1 = r0; w1 += 0x100 (alu32 -- r1 gets BPF_ADD_CONST32)
  2. if r1 > 0x80 - known_reg = r1 (has BPF_ADD_CONST32), reg = r0 (doesn't)

Hence, fix it by checking for (reg->id | known_reg->id) & BPF_ADD_CONST32.

Moreover, sync_linked_regs() also has a soundness issue when two linked
registers used different ALU widths: one with BPF_ADD_CONST32 and the
other with BPF_ADD_CONST64. The delta relationship between linked registers
assumes the same arithmetic width though. When one register went through
alu32 (CPU zero-extends the 32-bit result) and the other went through
alu64 (no zero-extension), the propagation produces incorrect bounds.

Example:

  r6 = bpf_get_prandom_u32()     // fully unknown
  if r6 >= 0x100000000 goto out  // constrain r6 to [0, U32_MAX]
  r7 = r6
  w7 += 1                        // alu32: r7.id = N | BPF_ADD_CONST32
  r8 = r6
  r8 += 2                        // alu64: r8.id = N | BPF_ADD_CONST64
  if r7 < 0xFFFFFFFF goto out    // narrows r7 to [0xFFFFFFFF, 0xFFFFFFFF]

At the branch on r7, sync_linked_regs() runs with known_reg=r7
(BPF_ADD_CONST32) and reg=r8 (BPF_ADD_CONST64). The delta path
computes:

  r8 = r7 + (delta_r8 - delta_r7) = 0xFFFFFFFF + (2 - 1) = 0x100000000

Then, because known_reg->id has BPF_ADD_CONST32, zext_32_to_64(r8) is
called, truncating r8 to [0, 0]. But r8 used a 64-bit ALU op -- the
CPU does NOT zero-extend it. The actual CPU value of r8 is
0xFFFFFFFE + 2 = 0x100000000, not 0. The verifier now underestimates
r8's 64-bit bounds, which is a soundness violation.

Fix sync_linked_regs() by skipping propagation when the two registers
have mixed ALU widths (one BPF_ADD_CONST32, the other BPF_ADD_CONST64).

Lastly, fix regsafe() used for path pruning: the existing checks used
"& BPF_ADD_CONST" to test for offset linkage, which treated
BPF_ADD_CONST32 and BPF_ADD_CONST64 as equivalent.

Fixes: 7a433e5193 ("bpf: Support negative offsets, BPF_SUB, and alu32 for linked register tracking")
Reported-by: Jenny Guanni Qu <qguanni@gmail.com>
Co-developed-by: Puranjay Mohan <puranjay@kernel.org>
Signed-off-by: Puranjay Mohan <puranjay@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20260319211507.213816-1-daniel@iogearbox.net
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-03-21 13:19:40 -07:00
Alexei Starovoitov
06880982c6 Merge branch 'bpf-fix-unsound-scalar-forking-for-bpf_or'
Daniel Wade says:

====================
bpf: Fix unsound scalar forking for BPF_OR

maybe_fork_scalars() unconditionally sets the pushed path dst register
to 0 for both BPF_AND and BPF_OR.  For AND this is correct (0 & K == 0),
but for OR it is wrong (0 | K == K, not 0).  This causes the verifier to
track an incorrect value on the pushed path, leading to a verifier/runtime
divergence that allows out-of-bounds map value access.

v4: Use block comment style for multi-line comments in selftests (Amery Hung)
    Add Reviewed-by/Acked-by tags
v3: Use single-line comment style in selftests (Alexei Starovoitov)
v2: Use push_stack(env, env->insn_idx, ...) to re-execute the insn
    on the pushed path (Eduard Zingerman)
====================

Link: https://patch.msgid.link/20260314021521.128361-1-danjwade95@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-03-21 13:14:29 -07:00
Daniel Wade
0ad1734cc5 selftests/bpf: Add tests for maybe_fork_scalars() OR vs AND handling
Add three test cases to verifier_bounds.c to verify that
maybe_fork_scalars() correctly tracks register values for BPF_OR
operations with constant source operands:

1. or_scalar_fork_rejects_oob: After ARSH 63 + OR 8, the pushed
   path should have dst = 8. With value_size = 8, accessing
   map_value + 8 is out of bounds and must be rejected.

2. and_scalar_fork_still_works: Regression test ensuring AND
   forking continues to work. ARSH 63 + AND 4 produces pushed
   dst = 0 and current dst = 4, both within value_size = 8.

3. or_scalar_fork_allows_inbounds: After ARSH 63 + OR 4, the
   pushed path has dst = 4, which is within value_size = 8
   and should be accepted.

These tests exercise the fix in the previous patch, which makes the
pushed path re-execute the ALU instruction so it computes the correct
result for BPF_OR.

Signed-off-by: Daniel Wade <danjwade95@gmail.com>
Reviewed-by: Amery Hung <ameryhung@gmail.com>
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20260314021521.128361-3-danjwade95@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-03-21 13:14:28 -07:00
Daniel Wade
c845894ebd bpf: Fix unsound scalar forking in maybe_fork_scalars() for BPF_OR
maybe_fork_scalars() is called for both BPF_AND and BPF_OR when the
source operand is a constant.  When dst has signed range [-1, 0], it
forks the verifier state: the pushed path gets dst = 0, the current
path gets dst = -1.

For BPF_AND this is correct: 0 & K == 0.
For BPF_OR this is wrong:    0 | K == K, not 0.

The pushed path therefore tracks dst as 0 when the runtime value is K,
producing an exploitable verifier/runtime divergence that allows
out-of-bounds map access.

Fix this by passing env->insn_idx (instead of env->insn_idx + 1) to
push_stack(), so the pushed path re-executes the ALU instruction with
dst = 0 and naturally computes the correct result for any opcode.

Fixes: bffacdb80b ("bpf: Recognize special arithmetic shift in the verifier")
Signed-off-by: Daniel Wade <danjwade95@gmail.com>
Reviewed-by: Amery Hung <ameryhung@gmail.com>
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20260314021521.128361-2-danjwade95@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-03-21 13:14:28 -07:00
Alexei Starovoitov
1abd3feb36 Merge branch 'bpf-fix-abs-int_min-undefined-behavior-in-interpreter-sdiv-smod'
Jenny Guanni Qu says:

====================
bpf: Fix abs(INT_MIN) undefined behavior in interpreter sdiv/smod

The BPF interpreter's signed 32-bit division and modulo handlers use
abs() on s32 operands, which is undefined for S32_MIN. This causes
the interpreter to compute wrong results, creating a mismatch with
the verifier's range tracking.

For example, INT_MIN / 2 returns 0x40000000 instead of the correct
0xC0000000. The verifier tracks the correct range, so a crafted BPF
program can exploit the mismatch for out-of-bounds map value access
(confirmed by KASAN).

Patch 1 introduces abs_s32() which handles S32_MIN correctly and
replaces all 8 abs((s32)...) call sites. s32 is the only affected
case -- the s64 handlers do not use abs().

Patch 2 adds selftests covering sdiv32 and smod32 with INT_MIN
dividend to prevent regression.

Changes since v4:
  - Renamed __safe_abs32() to abs_s32() and dropped inline keyword
    per Alexei Starovoitov's feedback

Changes since v3:
  - Fixed stray blank line deletion in the file header
  - Improved comment per Yonghong Song's suggestion
  - Added JIT vs interpreter context to selftest commit message

Changes since v2:
  - Simplified to use -(u32)x per Mykyta Yatsenko's suggestion

Changes since v1:
  - Moved helper above kerneldoc comment block to fix build warnings
====================

Link: https://patch.msgid.link/20260311011116.2108005-1-qguanni@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-03-21 13:12:17 -07:00
Jenny Guanni Qu
4ac95c65ef selftests/bpf: Add tests for sdiv32/smod32 with INT_MIN dividend
Add tests to verify that signed 32-bit division and modulo operations
produce correct results when the dividend is INT_MIN (0x80000000).

The bug fixed in the previous commit only affects the BPF interpreter
path. When JIT is enabled (the default on most architectures), the
native CPU division instruction produces the correct result and these
tests pass regardless. With bpf_jit_enable=0, the interpreter is used
and without the previous fix, INT_MIN / 2 incorrectly returns
0x40000000 instead of 0xC0000000 due to abs(S32_MIN) undefined
behavior, causing these tests to fail.

Test cases:
  - SDIV32 INT_MIN / 2 = -1073741824 (imm and reg divisor)
  - SMOD32 INT_MIN % 2 = 0 (positive and negative divisor)

Reviewed-by: Jiayuan Chen <jiayuan.chen@linux.dev>
Acked-by: Yonghong Song <yonghong.song@linux.dev>
Signed-off-by: Jenny Guanni Qu <qguanni@gmail.com>
Link: https://lore.kernel.org/r/20260311011116.2108005-3-qguanni@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-03-21 13:12:17 -07:00
Jenny Guanni Qu
c77b30bd1d bpf: Fix undefined behavior in interpreter sdiv/smod for INT_MIN
The BPF interpreter's signed 32-bit division and modulo handlers use
the kernel abs() macro on s32 operands. The abs() macro documentation
(include/linux/math.h) explicitly states the result is undefined when
the input is the type minimum. When DST contains S32_MIN (0x80000000),
abs((s32)DST) triggers undefined behavior and returns S32_MIN unchanged
on arm64/x86. This value is then sign-extended to u64 as
0xFFFFFFFF80000000, causing do_div() to compute the wrong result.

The verifier's abstract interpretation (scalar32_min_max_sdiv) computes
the mathematically correct result for range tracking, creating a
verifier/interpreter mismatch that can be exploited for out-of-bounds
map value access.

Introduce abs_s32() which handles S32_MIN correctly by casting to u32
before negating, avoiding signed overflow entirely. Replace all 8
abs((s32)...) call sites in the interpreter's sdiv32/smod32 handlers.

s32 is the only affected case -- the s64 division/modulo handlers do
not use abs().

Fixes: ec0e2da95f ("bpf: Support new signed div/mod instructions.")
Acked-by: Yonghong Song <yonghong.song@linux.dev>
Acked-by: Mykyta Yatsenko <yatsenko@meta.com>
Signed-off-by: Jenny Guanni Qu <qguanni@gmail.com>
Link: https://lore.kernel.org/r/20260311011116.2108005-2-qguanni@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-03-21 13:12:16 -07:00
Ihor Solodrai
a1e5c46eae selftests/bpf: Add tests for bpf_throw lock leak from subprogs
Add test cases to ensure the verifier correctly rejects bpf_throw from
subprogs when RCU, preempt, or IRQ locks are held:

  * reject_subprog_rcu_lock_throw: subprog acquires bpf_rcu_read_lock and
    then calls bpf_throw
  * reject_subprog_throw_preempt_lock: always-throwing subprog called while
    caller holds bpf_preempt_disable
  * reject_subprog_throw_irq_lock: always-throwing subprog called while
    caller holds bpf_local_irq_save

Assisted-by: Claude:claude-opus-4-6
Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
Acked-by: Yonghong Song <yonghong.song@linux.dev>
Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20260320000809.643798-2-ihor.solodrai@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-03-21 12:51:44 -07:00
Ihor Solodrai
6c2128505f bpf: Fix exception exit lock checking for subprogs
process_bpf_exit_full() passes check_lock = !curframe to
check_resource_leak(), which is false in cases when bpf_throw() is
called from a static subprog. This makes check_resource_leak() to skip
validation of active_rcu_locks, active_preempt_locks, and
active_irq_id on exception exits from subprogs.

At runtime bpf_throw() unwinds the stack via ORC without releasing any
user-acquired locks, which may cause various issues as the result.

Fix by setting check_lock = true for exception exits regardless of
curframe, since exceptions bypass all intermediate frame
cleanup. Update the error message prefix to "bpf_throw" for exception
exits to distinguish them from normal BPF_EXIT.

Fix reject_subprog_with_rcu_read_lock test which was previously
passing for the wrong reason. Test program returned directly from the
subprog call without closing the RCU section, so the error was
triggered by the unclosed RCU lock on normal exit, not by
bpf_throw. Update __msg annotations for affected tests to match the
new "bpf_throw" error prefix.

The spin_lock case is not affected because they are already checked [1]
at the call site in do_check_insn() before bpf_throw can run.

[1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/kernel/bpf/verifier.c?h=v7.0-rc4#n21098

Assisted-by: Claude:claude-opus-4-6
Fixes: f18b03faba ("bpf: Implement BPF exceptions")
Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
Acked-by: Yonghong Song <yonghong.song@linux.dev>
Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20260320000809.643798-1-ihor.solodrai@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-03-21 12:51:44 -07:00
Wolfram Sang
2f42e85622 Merge tag 'i2c-host-fixes-7.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/andi.shyti/linux into i2c/for-current
i2c-fixes for v7.0-rc5

pxa: fix broken I2C communication on Armada 3700 with recovery
fsi: fix device_node reference leak in probe
cp2615: fix NULL-deref when serial string is missing
2026-03-21 19:52:12 +01:00
Linus Torvalds
113ae7b4de Merge tag 'hwmon-for-v7.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging
Pull hwmon fixes from Guenter Roeck:

 - max6639: Fix pulses-per-revolution implementation

 - Several PMBus drivers: Add missing error checks

* tag 'hwmon-for-v7.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging:
  hwmon: (max6639) Fix pulses-per-revolution implementation
  hwmon: (pmbus/isl68137) Fix unchecked return value and use sysfs_emit()
  hwmon: (pmbus/ina233) Add error check for pmbus_read_word_data() return value
  hwmon: (pmbus/mp2869) Check pmbus_read_byte_data() before using its return value
  hwmon: (pmbus/mp2975) Add error check for pmbus_read_word_data() return value
  hwmon: (pmbus/hac300s) Add error check for pmbus_read_word_data() return value
2026-03-21 09:09:51 -07:00
Linus Torvalds
55d55b97c7 Merge tag 'bootconfig-fixes-v7.0-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace
Pull bootconfig fixes from Masami Hiramatsu:

 - Check error code of xbc_init_node() in override value path in
   xbc_parse_kv()

 - Fix fd leak in load_xbc_file() on fstat failure

* tag 'bootconfig-fixes-v7.0-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace:
  tools/bootconfig: fix fd leak in load_xbc_file() on fstat failure
  lib/bootconfig: check xbc_init_node() return in override path
2026-03-21 08:46:13 -07:00
Linus Torvalds
8991448e56 Merge tag 'for-7.0-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs fixes from David Sterba:
 "Another batch of fixes for problems that have been identified by tools
  analyzing code or by fuzzing. Most of them are short, two patches fix
  the same thing in many places so the diffs are bigger.

   - handle potential NULL pointer errors after attempting to read
     extent and checksum trees

   - prevent ENOSPC when creating many qgroups by ioctls in the same
     transaction

   - encoded write ioctl fixes (with 64K page and 4K block size):
       - fix unexpected bio length
       - do not let compressed bios and pages interfere with page cache

   - compression fixes on setups with 64K page and 4K block size: fix
     folio length assertions (zstd and lzo)

   - remap tree fixes:
       - make sure to hold block group reference while moving it
       - handle early exit when moving block group to unused list

   - handle deleted subvolumes with inconsistent state of deletion
     progress"

* tag 'for-7.0-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: reject root items with drop_progress and zero drop_level
  btrfs: check block group before marking it unused in balance_remap_chunks()
  btrfs: hold block group reference during entire move_existing_remap()
  btrfs: fix an incorrect ASSERT() condition inside lzo_decompress_bio()
  btrfs: fix an incorrect ASSERT() condition inside zstd_decompress_bio()
  btrfs: do not touch page cache for encoded writes
  btrfs: fix a bug that makes encoded write bio larger than expected
  btrfs: reserve enough transaction items for qgroup ioctls
  btrfs: check for NULL root after calls to btrfs_csum_root()
  btrfs: check for NULL root after calls to btrfs_extent_root()
2026-03-21 08:42:17 -07:00
Zubin Mithra
c3fd16c3b9 virt: tdx-guest: Fix handling of host controlled 'quote' buffer length
Validate host controlled value `quote_buf->out_len` that determines how
many bytes of the quote are copied out to guest userspace. In TDX
environments with remote attestation, quotes are not considered private,
and can be forwarded to an attestation server.

Catch scenarios where the host specifies a response length larger than
the guest's allocation, or otherwise races modifying the response while
the guest consumes it.

This prevents contents beyond the pages allocated for `quote_buf`
(up to TSM_REPORT_OUTBLOB_MAX) from being read out to guest userspace,
and possibly forwarded in attestation requests.

Recall that some deployments want per-container configs-tsm-report
interfaces, so the leak may cross container protection boundaries, not
just local root.

Fixes: f4738f56d1 ("virt: tdx-guest: Add Quote generation support using TSM_REPORTS")
Cc: stable@vger.kernel.org
Signed-off-by: Zubin Mithra <zsm@google.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Kiryl Shutsemau (Meta) <kas@kernel.org>
Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2026-03-20 21:05:50 -07:00
Jakub Kicinski
bc0151c59e Merge branch 'net-bcmasp-fix-issues-during-driver-unbind'
Justin Chen says:

====================
net: bcmasp: Fix issues during driver unbind

Fix two issues when we unbind the driver. We hit a double free of the
WoL irq and a double disable of the clk.
====================

Link: https://patch.msgid.link/20260319234813.1937315-1-justin.chen@broadcom.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-20 19:07:33 -07:00
Justin Chen
27dfe9030a net: bcmasp: fix double disable of clk
Switch to devm_clk_get_optional() so we can manage the clock ourselves.
We dynamically control the clocks depending on the state of the interface
for power savings. The default state is clock disabled, so unbinding the
driver causes a double disable.

Fixes: 490cb41200 ("net: bcmasp: Add support for ASP2.0 Ethernet controller")
Signed-off-by: Justin Chen <justin.chen@broadcom.com>
Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
Link: https://patch.msgid.link/20260319234813.1937315-3-justin.chen@broadcom.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-20 19:07:28 -07:00
Justin Chen
cbfa5be2bf net: bcmasp: fix double free of WoL irq
We do not need to free wol_irq since it was instantiated with
devm_request_irq(). So devres will free for us.

Fixes: a2f0751206 ("net: bcmasp: Add support for WoL magic packet")
Signed-off-by: Justin Chen <justin.chen@broadcom.com>
Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
Link: https://patch.msgid.link/20260319234813.1937315-2-justin.chen@broadcom.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-20 19:07:28 -07:00
Jakub Kicinski
7a4fc5ca79 Merge branch 'rtnetlink-add-missing-attributes-in-if_nlmsg_size'
Sabrina Dubroca says:

====================
rtnetlink: add missing attributes in if_nlmsg_size

Once again we have some attributes added by rtnl_fill_ifinfo() that
aren't counted in if_nlmsg_size().
====================

Link: https://patch.msgid.link/cover.1773919462.git.sd@queasysnail.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-20 19:05:15 -07:00
Sabrina Dubroca
ee00a12593 rtnetlink: count IFLA_INFO_SLAVE_KIND in if_nlmsg_size
rtnl_link_get_slave_info_data_size counts IFLA_INFO_SLAVE_DATA, but
rtnl_link_slave_info_fill adds both IFLA_INFO_SLAVE_DATA and
IFLA_INFO_SLAVE_KIND.

Fixes: ba7d49b1f0 ("rtnetlink: provide api for getting and setting slave info")
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Link: https://patch.msgid.link/049843b532e23cde7ddba263c0bbe35ba6f0d26d.1773919462.git.sd@queasysnail.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-20 19:05:12 -07:00
Sabrina Dubroca
52501989c7 rtnetlink: count IFLA_PARENT_DEV_{NAME,BUS_NAME} in if_nlmsg_size
Commit 00e77ed8e6 ("rtnetlink: add IFLA_PARENT_[DEV|DEV_BUS]_NAME")
added those attributes to rtnl_fill_ifinfo, but forgot to extend
if_nlmsg_size.

Fixes: 00e77ed8e6 ("rtnetlink: add IFLA_PARENT_[DEV|DEV_BUS]_NAME")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Link: https://patch.msgid.link/0b849da95562af45487080528d60f578636aba5c.1773919462.git.sd@queasysnail.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-20 19:05:11 -07:00
Qi Tang
24dd586bb4 net/smc: fix double-free of smc_spd_priv when tee() duplicates splice pipe buffer
smc_rx_splice() allocates one smc_spd_priv per pipe_buffer and stores
the pointer in pipe_buffer.private.  The pipe_buf_operations for these
buffers used .get = generic_pipe_buf_get, which only increments the page
reference count when tee(2) duplicates a pipe buffer.  The smc_spd_priv
pointer itself was not handled, so after tee() both the original and the
cloned pipe_buffer share the same smc_spd_priv *.

When both pipes are subsequently released, smc_rx_pipe_buf_release() is
called twice against the same object:

  1st call: kfree(priv)  sock_put(sk)  smc_rx_update_cons()  [correct]
  2nd call: kfree(priv)  sock_put(sk)  smc_rx_update_cons()  [UAF]

KASAN reports a slab-use-after-free in smc_rx_pipe_buf_release(), which
then escalates to a NULL-pointer dereference and kernel panic via
smc_rx_update_consumer() when it chases the freed priv->smc pointer:

  BUG: KASAN: slab-use-after-free in smc_rx_pipe_buf_release+0x78/0x2a0
  Read of size 8 at addr ffff888004a45740 by task smc_splice_tee_/74
  Call Trace:
   <TASK>
   dump_stack_lvl+0x53/0x70
   print_report+0xce/0x650
   kasan_report+0xc6/0x100
   smc_rx_pipe_buf_release+0x78/0x2a0
   free_pipe_info+0xd4/0x130
   pipe_release+0x142/0x160
   __fput+0x1c6/0x490
   __x64_sys_close+0x4f/0x90
   do_syscall_64+0xa6/0x1a0
   entry_SYSCALL_64_after_hwframe+0x77/0x7f
   </TASK>

  BUG: kernel NULL pointer dereference, address: 0000000000000020
  RIP: 0010:smc_rx_update_consumer+0x8d/0x350
  Call Trace:
   <TASK>
   smc_rx_pipe_buf_release+0x121/0x2a0
   free_pipe_info+0xd4/0x130
   pipe_release+0x142/0x160
   __fput+0x1c6/0x490
   __x64_sys_close+0x4f/0x90
   do_syscall_64+0xa6/0x1a0
   entry_SYSCALL_64_after_hwframe+0x77/0x7f
   </TASK>
  Kernel panic - not syncing: Fatal exception

Beyond the memory-safety problem, duplicating an SMC splice buffer is
semantically questionable: smc_rx_update_cons() would advance the
consumer cursor twice for the same data, corrupting receive-window
accounting.  A refcount on smc_spd_priv could fix the double-free, but
the cursor-accounting issue would still need to be addressed separately.

The .get callback is invoked by both tee(2) and splice_pipe_to_pipe()
for partial transfers; both will now return -EFAULT.  Users who need
to duplicate SMC socket data must use a copy-based read path.

Fixes: 9014db202c ("smc: add support for splice()")
Signed-off-by: Qi Tang <tpluszz77@gmail.com>
Link: https://patch.msgid.link/20260318064847.23341-1-tpluszz77@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-20 18:59:30 -07:00
546b68ac89 openvswitch: validate MPLS set/set_masked payload length
validate_set() accepted OVS_KEY_ATTR_MPLS as variable-sized payload for
SET/SET_MASKED actions. In action handling, OVS expects fixed-size
MPLS key data (struct ovs_key_mpls).

Use the already normalized key_len (masked case included) and reject
non-matching MPLS action key sizes.

Reject invalid MPLS action payload lengths early.

Fixes: fbdcdd78da ("Change in Openvswitch to support MPLS label depth of 3 in ingress direction")
Reported-by: Yifan Wu <yifanwucs@gmail.com>
Reported-by: Juefei Pu <tomapufckgml@gmail.com>
Tested-by: Ao Zhou <n05ec@lzu.edu.cn>
Co-developed-by: Yuan Tan <tanyuan98@outlook.com>
Signed-off-by: Yuan Tan <tanyuan98@outlook.com>
Suggested-by: Xin Liu <bird@lzu.edu.cn>
Signed-off-by: Yang Yang <n05ec@lzu.edu.cn>
Reviewed-by: Ilya Maximets <i.maximets@ovn.org>
Link: https://patch.msgid.link/20260319080228.3423307-1-n05ec@lzu.edu.cn
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-20 18:37:31 -07:00
6931d21f87 openvswitch: defer tunnel netdev_put to RCU release
ovs_netdev_tunnel_destroy() may run after NETDEV_UNREGISTER already
detached the device. Dropping the netdev reference in destroy can race
with concurrent readers that still observe vport->dev.

Do not release vport->dev in ovs_netdev_tunnel_destroy(). Instead, let
vport_netdev_free() drop the reference from the RCU callback, matching
the non-tunnel destroy path and avoiding additional synchronization
under RTNL.

Fixes: a9020fde67 ("openvswitch: Move tunnel destroy function to oppenvswitch module.")
Reported-by: Yifan Wu <yifanwucs@gmail.com>
Reported-by: Juefei Pu <tomapufckgml@gmail.com>
Tested-by: Ao Zhou <n05ec@lzu.edu.cn>
Co-developed-by: Yuan Tan <tanyuan98@outlook.com>
Signed-off-by: Yuan Tan <tanyuan98@outlook.com>
Suggested-by: Xin Liu <bird@lzu.edu.cn>
Signed-off-by: Yang Yang <n05ec@lzu.edu.cn>
Reviewed-by: Ilya Maximets <i.maximets@ovn.org>
Link: https://patch.msgid.link/20260319074241.3405262-1-n05ec@lzu.edu.cn
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-20 18:36:29 -07:00
Jakub Kicinski
e069034bd6 Merge branch 'net-macb-fix-two-lock-warnings-when-wol-is-used'
Kevin Hao says:

====================
net: macb: Fix two lock warnings when WOL is used

This patch series addresses two lock warnings that occur when using WOL as a
wakeup source on my AMD ZynqMP board.
====================

Link: https://patch.msgid.link/20260318-macb-irq-v2-0-f1179768ab24@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-20 18:25:03 -07:00
Kevin Hao
baa35a698c net: macb: Protect access to net_device::ip_ptr with RCU lock
Access to net_device::ip_ptr and its associated members must be
protected by an RCU lock. Since we are modifying this piece of code,
let's also move it to execute only when WAKE_ARP is enabled.

To minimize the duration of the RCU lock, a local variable is used to
temporarily store the IP address. This change resolves the following
RCU check warning:
  WARNING: suspicious RCU usage
  7.0.0-rc3-next-20260310-yocto-standard+ #122 Not tainted
  -----------------------------
  drivers/net/ethernet/cadence/macb_main.c:5944 suspicious rcu_dereference_check() usage!

  other info that might help us debug this:

  rcu_scheduler_active = 2, debug_locks = 1
  5 locks held by rtcwake/518:
   #0: ffff000803ab1408 (sb_writers#5){.+.+}-{0:0}, at: vfs_write+0xf8/0x368
   #1: ffff0008090bf088 (&of->mutex#2){+.+.}-{4:4}, at: kernfs_fop_write_iter+0xbc/0x1c8
   #2: ffff00080098d588 (kn->active#70){.+.+}-{0:0}, at: kernfs_fop_write_iter+0xcc/0x1c8
   #3: ffff800081c84888 (system_transition_mutex){+.+.}-{4:4}, at: pm_suspend+0x1ec/0x290
   #4: ffff0008009ba0f8 (&dev->mutex){....}-{4:4}, at: device_suspend+0x118/0x4f0

  stack backtrace:
  CPU: 3 UID: 0 PID: 518 Comm: rtcwake Not tainted 7.0.0-rc3-next-20260310-yocto-standard+ #122 PREEMPT
  Hardware name: ZynqMP ZCU102 Rev1.1 (DT)
  Call trace:
   show_stack+0x24/0x38 (C)
   __dump_stack+0x28/0x38
   dump_stack_lvl+0x64/0x88
   dump_stack+0x18/0x24
   lockdep_rcu_suspicious+0x134/0x1d8
   macb_suspend+0xd8/0x4c0
   device_suspend+0x218/0x4f0
   dpm_suspend+0x244/0x3a0
   dpm_suspend_start+0x50/0x78
   suspend_devices_and_enter+0xec/0x560
   pm_suspend+0x194/0x290
   state_store+0x110/0x158
   kobj_attr_store+0x1c/0x30
   sysfs_kf_write+0xa8/0xd0
   kernfs_fop_write_iter+0x11c/0x1c8
   vfs_write+0x248/0x368
   ksys_write+0x7c/0xf8
   __arm64_sys_write+0x28/0x40
   invoke_syscall+0x4c/0xe8
   el0_svc_common+0x98/0xf0
   do_el0_svc+0x28/0x40
   el0_svc+0x54/0x1e0
   el0t_64_sync_handler+0x84/0x130
   el0t_64_sync+0x198/0x1a0

Fixes: 0cb8de39a7 ("net: macb: Add ARP support to WOL")
Signed-off-by: Kevin Hao <haokexin@gmail.com>
Cc: stable@vger.kernel.org
Reviewed-by: Théo Lebrun <theo.lebrun@bootlin.com>
Link: https://patch.msgid.link/20260318-macb-irq-v2-2-f1179768ab24@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-20 18:25:01 -07:00
Kevin Hao
317e49358e net: macb: Move devm_{free,request}_irq() out of spin lock area
The devm_free_irq() and devm_request_irq() functions should not be
executed in an atomic context.

During device suspend, all userspace processes and most kernel threads
are frozen. Additionally, we flush all tx/rx status, disable all macb
interrupts, and halt rx operations. Therefore, it is safe to split the
region protected by bp->lock into two independent sections, allowing
devm_free_irq() and devm_request_irq() to run in a non-atomic context.
This modification resolves the following lockdep warning:
  BUG: sleeping function called from invalid context at kernel/locking/mutex.c:591
  in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 501, name: rtcwake
  preempt_count: 1, expected: 0
  RCU nest depth: 1, expected: 0
  7 locks held by rtcwake/501:
   #0: ffff0008038c3408 (sb_writers#5){.+.+}-{0:0}, at: vfs_write+0xf8/0x368
   #1: ffff0008049a5e88 (&of->mutex#2){+.+.}-{4:4}, at: kernfs_fop_write_iter+0xbc/0x1c8
   #2: ffff00080098d588 (kn->active#70){.+.+}-{0:0}, at: kernfs_fop_write_iter+0xcc/0x1c8
   #3: ffff800081c84888 (system_transition_mutex){+.+.}-{4:4}, at: pm_suspend+0x1ec/0x290
   #4: ffff0008009ba0f8 (&dev->mutex){....}-{4:4}, at: device_suspend+0x118/0x4f0
   #5: ffff800081d00458 (rcu_read_lock){....}-{1:3}, at: rcu_lock_acquire+0x4/0x48
   #6: ffff0008031fb9e0 (&bp->lock){-.-.}-{3:3}, at: macb_suspend+0x144/0x558
  irq event stamp: 8682
  hardirqs last  enabled at (8681): [<ffff8000813c7d7c>] _raw_spin_unlock_irqrestore+0x44/0x88
  hardirqs last disabled at (8682): [<ffff8000813c7b58>] _raw_spin_lock_irqsave+0x38/0x98
  softirqs last  enabled at (7322): [<ffff8000800f1b4c>] handle_softirqs+0x52c/0x588
  softirqs last disabled at (7317): [<ffff800080010310>] __do_softirq+0x20/0x2c
  CPU: 1 UID: 0 PID: 501 Comm: rtcwake Not tainted 7.0.0-rc3-next-20260310-yocto-standard+ #125 PREEMPT
  Hardware name: ZynqMP ZCU102 Rev1.1 (DT)
  Call trace:
   show_stack+0x24/0x38 (C)
   __dump_stack+0x28/0x38
   dump_stack_lvl+0x64/0x88
   dump_stack+0x18/0x24
   __might_resched+0x200/0x218
   __might_sleep+0x38/0x98
   __mutex_lock_common+0x7c/0x1378
   mutex_lock_nested+0x38/0x50
   free_irq+0x68/0x2b0
   devm_irq_release+0x24/0x38
   devres_release+0x40/0x80
   devm_free_irq+0x48/0x88
   macb_suspend+0x298/0x558
   device_suspend+0x218/0x4f0
   dpm_suspend+0x244/0x3a0
   dpm_suspend_start+0x50/0x78
   suspend_devices_and_enter+0xec/0x560
   pm_suspend+0x194/0x290
   state_store+0x110/0x158
   kobj_attr_store+0x1c/0x30
   sysfs_kf_write+0xa8/0xd0
   kernfs_fop_write_iter+0x11c/0x1c8
   vfs_write+0x248/0x368
   ksys_write+0x7c/0xf8
   __arm64_sys_write+0x28/0x40
   invoke_syscall+0x4c/0xe8
   el0_svc_common+0x98/0xf0
   do_el0_svc+0x28/0x40
   el0_svc+0x54/0x1e0
   el0t_64_sync_handler+0x84/0x130
   el0t_64_sync+0x198/0x1a0

Fixes: 558e35ccfe ("net: macb: WoL support for GEM type of Ethernet controller")
Cc: stable@vger.kernel.org
Reviewed-by: Théo Lebrun <theo.lebrun@bootlin.com>
Signed-off-by: Kevin Hao <haokexin@gmail.com>
Link: https://patch.msgid.link/20260318-macb-irq-v2-1-f1179768ab24@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-20 18:25:01 -07:00
Linus Torvalds
a0c8317773 Merge tag 'drm-fixes-2026-03-21' of https://gitlab.freedesktop.org/drm/kernel
Pull drm fixes from Dave Airlie:
 "Regular weekly pull request, from sunny San Diego. Usual suspects in
  xe/i915/amdgpu with small fixes all over, then some minor fixes across
  a few other drivers. It's probably a bit on the heavy side, but most
  of the fix seem well contained,

  core:
   - drm_dev_unplug UAF fix

  pagemap:
   - lock handling fix

  xe:
   - A number of teardown fixes
   - Skip over non-leaf PTE for PRL generation
   - Fix an uninitialized variable
   - Fix a missing runtime PM reference

  i915/display:
   - Fix #15771: Screen corruption and stuttering on P14s w/ 3K display
   - Fix for PSR entry setup frames count on rejected commit
   - Fix OOPS if firmware is not loaded and suspend is attempted
   - Fix unlikely NULL deref due to DC6 on probe

  amdgpu:
   - Fix gamma 2.2 colorop TFs
   - BO list fix
   - LTO fix
   - DC FP fix
   - DisplayID handling fix
   - DCN 2.01 fix
   - MMHUB boundary fixes
   - ISP fix
   - TLB fence fix
   - Hainan pm fix

  radeon:
   - Hainan pm fix

  vmwgfx:
   - memory leak fix
   - doc warning fix

  imagination:
   - deadlock fix
   - interrupt handling fixes

  dw-hdmi-qp:
   - multi channel audio fix"

* tag 'drm-fixes-2026-03-21' of https://gitlab.freedesktop.org/drm/kernel: (40 commits)
  drm/xe: Fix missing runtime PM reference in ccs_mode_store
  drm/xe: Open-code GGTT MMIO access protection
  drm/xe/lrc: Fix uninitialized new_ts when capturing context timestamp
  drm/xe/oa: Allow reading after disabling OA stream
  drm/xe: Skip over non leaf pte for PRL generation
  drm/xe/guc: Ensure CT state transitions via STOP before DISABLED
  drm/xe: Trigger queue cleanup if not in wedged mode 2
  drm/xe: Forcefully tear down exec queues in GuC submit fini
  drm/xe: Always kill exec queues in xe_guc_submit_pause_abort
  drm/xe/guc: Fail immediately on GuC load error
  drm/i915/gt: Check set_default_submission() before deferencing
  drm/radeon: apply state adjust rules to some additional HAINAN vairants
  drm/amdgpu: apply state adjust rules to some additional HAINAN vairants
  drm/amdgpu: rework how we handle TLB fences
  drm/bridge: dw-hdmi-qp: fix multi-channel audio output
  drm: Fix use-after-free on framebuffers and property blobs when calling drm_dev_unplug
  drm/amdgpu: Fix ISP segfault issue in kernel v7.0
  drm/amdgpu/gmc9.0: add bounds checking for cid
  drm/amdgpu/mmhub4.2.0: add bounds checking for cid
  drm/amdgpu/mmhub4.1.0: add bounds checking for cid
  ...
2026-03-20 18:21:27 -07:00
Guenter Roeck
e7bae9a7a5 hwmon: (max6639) Fix pulses-per-revolution implementation
The valid range for the pulses-per-revolution devicetree property is
1..4. The current code checks for a range of 1..5. Fix it.

Declare the variable used to retrieve pulses per revolution from
devicetree as u32 (unsigned) to match the of_property_read_u32() API.

The current code uses a postfix decrement when writing the pulses per
resolution into the chip. This has no effect since the value is evaluated
before it is decremented. Fix it by decrementing before evaluating the
value.

Fixes: 7506ebcd66 ("hwmon: (max6639) : Configure based on DT property")
Cc: Naresh Solanki <naresh.solanki@9elements.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
2026-03-20 16:50:58 -07:00
Linus Torvalds
42bddab056 Merge tag 'execve-v7.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux
Pull execve fixes from Kees Cook:

 - binfmt_elf_fdpic: fix AUXV size calculation (Andrei Vagin)

 - fs/tests: exec: Remove bad test vector

* tag 'execve-v7.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux:
  fs/tests: exec: Remove bad test vector
  binfmt_elf_fdpic: fix AUXV size calculation for ELF_HWCAP3 and ELF_HWCAP4
2026-03-20 11:59:35 -07:00
Linus Torvalds
d46d5c8383 Merge tag 'tty-7.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty
Pull tty/serial fixes from Greg KH:
 "Here are some small tty/vt and serial driver fixes for 7.0-rc5.
  Included in here are:

   - 8250 driver fixes for reported problems

   - serial core lockup fix

   - uartlite driver bugfix

   - vt save/restore bugfix

  All of these have been in linux-next for over a week with no reported
  problems"

* tag 'tty-7.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty:
  vt: save/restore unicode screen buffer for alternate screen
  serial: 8250_dw: Ensure BUSY is deasserted
  serial: 8250: Add late synchronize_irq() to shutdown to handle DW UART BUSY
  serial: 8250_dw: Rework IIR_NO_INT handling to stop interrupt storm
  serial: 8250_dw: Rework dw8250_handle_irq() locking and IIR handling
  serial: 8250: Add serial8250_handle_irq_locked()
  serial: 8250_dw: Avoid unnecessary LCR writes
  serial: 8250: Protect LCR write in shutdown
  serial: 8250_pci: add support for the AX99100
  serial: core: fix infinite loop in handle_tx() for PORT_UNKNOWN
  serial: uartlite: fix PM runtime usage count underflow on probe
  serial: 8250: always disable IRQ during THRE test
  serial: 8250: Fix TX deadlock when using DMA
2026-03-20 11:52:32 -07:00
Nathan Chancellor
7a618ca9b9 init/Kconfig: Require a release version of clang-22 for CC_HAS_COUNTED_BY_PTR
Commit 150a04d817 ("compiler_types.h: Attributes: Add __counted_by_ptr
macro") used Clang 22.0.0 as a minimum supported version for
__counted_by_ptr, which made sense while 22.0.0 was the version of
LLVM's main branch to allow developers to easily test and develop uses
of __counted_by_ptr in their code. However, __counted_by_ptr requires a
change [1] merged towards the end of the 22 development cycle to avoid
errors when applied to void pointers.

  In file included from fs/xfs/xfs_attr_inactive.c:18:
  fs/xfs/libxfs/xfs_attr.h:59:2: error: 'counted_by' cannot be applied to a pointer with pointee of unknown size because 'void' is an incomplete type
     59 |         void                    *buffer __counted_by_ptr(bufsize);
        |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

This is disruptive for deployed prerelease clang-22 builds (such as
Android LLVM) or when bisecting between llvmorg-21-init and the fix.

Require a released version of clang-22 (i.e., 21.1.0 or newer) to
enabled __counted_by_ptr to ensure all fixes needed for proper support
are present.

Fixes: 150a04d817 ("compiler_types.h: Attributes: Add __counted_by_ptr macro")
Link: f29955a594 [1]
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Link: https://patch.msgid.link/20260318-counted_by_ptr-release-clang-22-v1-1-e017da246df0@kernel.org
Signed-off-by: Kees Cook <kees@kernel.org>
2026-03-20 11:51:48 -07:00
Kyle Meyer
1f6aa5bbf1 x86/platform/uv: Handle deconfigured sockets
When a socket is deconfigured, it's mapped to SOCK_EMPTY (0xffff). This causes
a panic while allocating UV hub info structures.

Fix this by using NUMA_NO_NODE, allowing UV hub info structures to be
allocated on valid nodes.

Fixes: 8a50c58519 ("x86/platform/uv: UV support for sub-NUMA clustering")
Signed-off-by: Kyle Meyer <kyle.meyer@hpe.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Steve Wahl <steve.wahl@hpe.com>
Cc: stable@vger.kernel.org
Link: https://patch.msgid.link/ab2BmGL0ehVkkjKk@hpe.com
2026-03-20 19:01:03 +01:00
Kees Cook
1c7bbaeed1 coccinelle: kmalloc_obj: Remove default GFP_KERNEL arg
Remove any GFP_KERNEL arguments found in the new kmalloc_obj-family
helpers. This captures the script used in commit 189f164e57 ("Convert
remaining multi-line kmalloc_obj/flex GFP_KERNEL uses").

Link: https://patch.msgid.link/20260320175113.work.016-kees@kernel.org
Signed-off-by: Kees Cook <kees@kernel.org>
2026-03-20 10:52:52 -07:00
Linus Torvalds
c612261bed Merge tag 'io_uring-7.0-20260320' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux
Pull io_uring fixes from Jens Axboe:

 - A bit of a work-around for AF_UNIX recv multishot, as the in-kernel
   implementation doesn't properly signal EOF. We'll likely rework this
   one going forward, but the fix is sufficient for now

 - Two fixes for incrementally consumed buffers, for non-pollable files
   and for 0 byte reads

* tag 'io_uring-7.0-20260320' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux:
  io_uring/kbuf: propagate BUF_MORE through early buffer commit path
  io_uring/kbuf: fix missing BUF_MORE for incremental buffers at EOF
  io_uring/poll: fix multishot recv missing EOF on wakeup race
2026-03-20 09:58:56 -07:00
Vladimir Oltean
63f8b60151 x86/entry/vdso: Fix path of included gettimeofday.c
Commit in Fixes forgot to convert one include path to be relative to the
kernel source directory after adding latter to flags-y.

Fix it.

  [ bp: Rewrite commit message. ]

Fixes: 693c819fed ("x86/entry/vdso: Refactor the vdso build")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20260307174406.1808981-1-vladimir.oltean@nxp.com
2026-03-20 17:56:52 +01:00
Linus Torvalds
9f582e3971 Merge tag 'spi-fix-v7.0-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi
Pull spi fixes from Mark Brown:
 "There's a couple of core fixes here from Johan, fixing a race
  condition and an error handling path, plus a bunch of driver specific
  fixups.

  The Qualcomm issues could be nasty if you ran into them, especially
  the DMA ordering one"

* tag 'spi-fix-v7.0-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi:
  spi: geni-qcom: Check DMA interrupts early in ISR
  spi: fix statistics allocation
  spi: fix use-after-free on controller registration failure
  spi: geni-qcom: Fix CPHA and CPOL mode change detection
  spi: axiado: Fix double-free in ax_spi_probe()
  spi: amlogic-spisg: Fix memory leak in aml_spisg_probe()
  spi: amlogic: spifc-a4: Remove redundant clock cleanup
2026-03-20 09:54:40 -07:00
Linus Torvalds
007fe23215 Merge tag 'regulator-fix-v7.0-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator
Pull regulator fix from Mark Brown:
 "Just one fix here from Hugo Villeneuve, the documentation for some of
  the regulator DT properties had been cut'n'pasted so that if anyone
  actually read it they'd be informed that those properties had
  completely incorrect meanings"

* tag 'regulator-fix-v7.0-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator:
  regulator: dt-bindings: fix typos in regulator-uv-* descriptions
2026-03-20 09:52:45 -07:00
Linus Torvalds
c715f13bb3 Merge tag 'pmdomain-v7.0-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/linux-pm
Pull pmdomain fixes from Ulf Hansson:

 - bcm: increase ASB control timeout for bcm2835

 - mediatek: fix power domain count

* tag 'pmdomain-v7.0-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/linux-pm:
  pmdomain: bcm: bcm2835-power: Increase ASB control timeout
  pmdomain: mediatek: Fix power domain count
2026-03-20 09:46:15 -07:00
Linus Torvalds
d07252736a Merge tag 'mmc-v7.0-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/mmc
Pull MMC fixes from Ulf Hansson:

 - sdhci: Fix timing selection for 1-bit bus width

 - sdhci-pci-gli: Fix DMA write corruption for GL9750

* tag 'mmc-v7.0-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/mmc:
  mmc: sdhci: fix timing selection for 1-bit bus width
  mmc: sdhci-pci-gli: fix GL9750 DMA write corruption
2026-03-20 09:40:25 -07:00
Linus Torvalds
f374ff79f4 Merge tag 'ata-7.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/libata/linux
Pull ata fixes from Niklas Cassel:

 - ADATA SU680 SSDs are causing command timeouts when LPM is enabled.
   Enable the ATA_QUIRK_NOLPM quirk to prevent LPM from being enabled
   on these devices (Damien)

 - When receiving a REPORT SUPPORTED OPERATION CODES command with an
   invalid REPORTING OPTIONS format, sense data should have the field
   pointer set to byte 2 (the location of the REPORTING OPTIONS field)
   instead of incorrectly pointing to byte 1 (Damien)

* tag 'ata-7.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/libata/linux:
  ata: libata-scsi: report correct sense field pointer in ata_scsiop_maint_in()
  ata: libata-core: disable LPM on ADATA SU680 SSD
2026-03-20 09:38:12 -07:00
Linus Torvalds
6ac513185c Merge tag 'mtd/fixes-for-7.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux
Pull MTD fixes from Miquel Raynal:

 - In SPI NOR, there was an issue with the RDCR capability, leading to
   several platforms no longer capable of using it for wrong reasons
   (the follow-up commit renames the helper to avoid future confusion)

 - NAND controller drivers needed to be improved to fix some timings, a
   locking schenario and avoid certain operations during panic writes

 - The Spear600 DT binding conversion was done partially, leading to
   several warnings which have individually been fixed

 - Tudor gets replaced by Takahiro for the SPI NOR maintainance

 - Plus two more misc fixes

* tag 'mtd/fixes-for-7.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux:
  mtd: rawnand: pl353: make sure optimal timings are applied
  mtd: spi-nor: Rename spi_nor_spimem_check_op()
  mtd: spi-nor: Fix RDCR controller capability core check
  mtd: rawnand: brcmnand: skip DMA during panic write
  mtd: rawnand: serialize lock/unlock against other NAND operations
  dt-bindings: mtd: st,spear600-smi: Fix example
  dt-bindings: mtd: st,spear600-smi: #address/size-cells is mandatory
  dt-bindings: mtd: st,spear600-smi: Fix description
  mtd: rawnand: cadence: Fix error check for dma_alloc_coherent() in cadence_nand_init()
  mtd: Avoid boot crash in RedBoot partition table parser
  MAINTAINERS: add Takahiro Kuwano as SPI NOR reviewer
  MAINTAINERS: remove Tudor Ambarus as SPI NOR maintainer
2026-03-20 09:34:32 -07:00
Linus Torvalds
47e231cbd3 Merge tag 'iommu-fixes-v7.0-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux
Pull iommu fixes from Joerg Roedel:
 "Intel VT-d:
   - Abort all pending requests on dev_tlb_inv timeout to avoid
     hardlockup
   - Limit IOPF handling to PRI-capable device to avoid SVA attach
     failure

  AMD-Vi:
   - Make sure identity domain is not used when SNP is active

  Core fixes:
   - Handle mapping IOVA 0x0 correctly
   - Fix crash in SVA code
   - Kernel-doc fix in IO-PGTable code"

* tag 'iommu-fixes-v7.0-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux:
  iommu/amd: Block identity domain when SNP enabled
  iommu/sva: Fix crash in iommu_sva_unbind_device()
  iommu/io-pgtable: fix all kernel-doc warnings in io-pgtable.h
  iommu: Fix mapping check for 0x0 to avoid re-mapping it
  iommu/vt-d: Only handle IOPF for SVA when PRI is supported
  iommu/vt-d: Fix intel iommu iotlb sync hardlockup and retry
2026-03-20 09:29:03 -07:00
Linus Torvalds
165160265e Merge tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm64 fixes from Will Deacon:
 "There's a small crop of fixes for the MPAM resctrl driver, a fix for
  SCS/PAC patching with the AMDGPU driver and a page-table fix for
  realms running with 52-bit physical addresses:

   - Fix DWARF parsing for SCS/PAC patching to work with very large
     modules (such as the amdgpu driver)

   - Fixes to the mpam resctrl driver

   - Fix broken handling of 52-bit physical addresses when sharing
     memory from within a realm"

* tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux:
  arm64: realm: Fix PTE_NS_SHARED for 52bit PA support
  arm_mpam: Force __iomem casts
  arm_mpam: Disable preemption when making accesses to fake MSC in kunit test
  arm_mpam: Fix null pointer dereference when restoring bandwidth counters
  arm64/scs: Fix handling of advance_loc4
2026-03-20 09:23:01 -07:00
Linus Torvalds
c3d13784d5 Merge tag 'hyperv-fixes-signed-20260319' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux
Pull Hyper-V fixes from Wei Liu:

 - Fix ARM64 MSHV support (Anirudh Rayabharam)

 - Fix MSHV driver memory handling issues (Stanislav Kinsburskii)

 - Update maintainers for Hyper-V DRM driver (Saurabh Sengar)

 - Misc clean up in MSHV crashdump code (Ard Biesheuvel, Uros Bizjak)

 - Minor improvements to MSHV code (Mukesh R, Wei Liu)

 - Revert not yet released MSHV scrub partition hypercall (Wei Liu)

* tag 'hyperv-fixes-signed-20260319' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux:
  mshv: Fix error handling in mshv_region_pin
  MAINTAINERS: Update maintainers for Hyper-V DRM driver
  mshv: Fix use-after-free in mshv_map_user_memory error path
  mshv: pass struct mshv_user_mem_region by reference
  x86/hyperv: Use any general-purpose register when saving %cr2 and %cr8
  x86/hyperv: Use current_stack_pointer to avoid asm() in hv_hvcrash_ctxt_save()
  x86/hyperv: Save segment registers directly to memory in hv_hvcrash_ctxt_save()
  x86/hyperv: Use __naked attribute to fix stackless C function
  Revert "mshv: expose the scrub partition hypercall"
  mshv: add arm64 support for doorbell & intercept SINTs
  mshv: refactor synic init and cleanup
  x86/hyperv: print out reserved vectors in hexadecimal
2026-03-20 09:18:22 -07:00
Dave Airlie
a6e77320ba Merge tag 'drm-xe-fixes-2026-03-19' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes
Driver Changes:
- A number of teardown fixes (Daniele, Matt Brost, Zhanjun, Ashutosh)
- Skip over non-leaf PTE for PRL generation  (Brian)
- Fix an unitialized variable (Umesh)
- Fix a missing runtime PM reference (Sanjay)

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Thomas Hellstrom <thomas.hellstrom@linux.intel.com>
Link: https://patch.msgid.link/abxj4_dBHYBiSvDG@fedora
2026-03-21 02:17:59 +10:00
Linus Torvalds
7006433ca2 Merge tag 'v7.0-rc4-smb3-client-fixes' of git://git.samba.org/sfrench/cifs-2.6
Pull smb client fixes from Steve French:

 - Fix reporting of i_blocks

 - Fix Kerberos mounts with different usernames to same server

 - Trivial comment cleanup

* tag 'v7.0-rc4-smb3-client-fixes' of git://git.samba.org/sfrench/cifs-2.6:
  smb: client: fix generic/694 due to wrong ->i_blocks
  cifs: smb1: fix comment typo
  smb: client: fix krb5 mount with username option
2026-03-20 09:07:29 -07:00
Linus Torvalds
f268964788 Merge tag 'v7.0-rc4-ksmbd-server-fixes' of git://git.samba.org/ksmbd
Pull smb server fixes from Steve French:

 - Three use after free fixes (in close, in compounded ops, and in tree
   disconnect)

 - Multichannel fix

 - return proper volume identifier (superblock uuid if available) in
   FS_OBJECT_ID queries

* tag 'v7.0-rc4-ksmbd-server-fixes' of git://git.samba.org/ksmbd:
  ksmbd: fix use-after-free in durable v2 replay of active file handles
  ksmbd: fix use-after-free of share_conf in compound request
  ksmbd: use volume UUID in FS_OBJECT_ID_INFORMATION
  ksmbd: unset conn->binding on failed binding request
  ksmbd: fix share_conf UAF in tree_conn disconnect
2026-03-20 09:03:37 -07:00
Dave Airlie
a15130d588 Merge tag 'amd-drm-fixes-7.0-2026-03-19' of https://gitlab.freedesktop.org/agd5f/linux into drm-fixes
amd-drm-fixes-7.0-2026-03-19:

amdgpu:
- Fix gamma 2.2 colorop TFs
- BO list fix
- LTO fix
- DC FP fix
- DisplayID handling fix
- DCN 2.01 fix
- MMHUB boundary fixes
- ISP fix
- TLB fence fix
- Hainan pm fix

radeon:
- Hainan pm fix

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patch.msgid.link/20260319131013.36639-1-alexander.deucher@amd.com
2026-03-21 01:58:36 +10:00
Dave Airlie
437eccb1a8 Merge tag 'drm-misc-fixes-2026-03-19' of https://gitlab.freedesktop.org/drm/misc/kernel into drm-fixes
A doc warning fix and a memory leak fix for vmwgfx, a deadlock fix and
interrupt handling fixes for imagination, a locking fix for
pagemap_until, a UAF fix for drm_dev_unplug, and a multi-channel audio
handling fix for dw-hdmi-qp.

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Maxime Ripard <mripard@redhat.com>
Link: https://patch.msgid.link/20260319-lush-righteous-malamute-e7bb98@houat
2026-03-21 01:52:36 +10:00
Dave Airlie
d551d2e876 Merge tag 'drm-intel-fixes-2026-03-19' of https://gitlab.freedesktop.org/drm/i915/kernel into drm-fixes
- Fix #15771: Screen corruption and stuttering on P14s w/ 3K display
- Fix for PSR entry setup frames count on rejected commit
- Fix OOPS if firmware is not loaded and suspend is attempted
- Fix unlikely NULL deref due to DC6 on probe

Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patch.msgid.link/abujAnD-lRDCVSKg@jlahtine-mobl
2026-03-21 01:43:59 +10:00
Mike Rapoport (Microsoft)
217c0a5c17 x86/efi: efi_unmap_boot_services: fix calculation of ranges_to_free size
ranges_to_free array should have enough room to store the entire EFI
memmap plus an extra element for NULL entry.
The calculation of this array size wrongly adds 1 to the overall size
instead of adding 1 to the number of elements.

Add parentheses to properly size the array.

Reported-by: Guenter Roeck <linux@roeck-us.net>
Fixes: a4b0bf6a40 ("x86/efi: defer freeing of boot services memory")
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
2026-03-20 15:31:10 +01:00
Joanne Koong
76f9377cd2 writeback: don't block sync for filesystems with no data integrity guarantees
Add a SB_I_NO_DATA_INTEGRITY superblock flag for filesystems that cannot
guarantee data persistence on sync (eg fuse). For superblocks with this
flag set, sync kicks off writeback of dirty inodes but does not wait
for the flusher threads to complete the writeback.

This replaces the per-inode AS_NO_DATA_INTEGRITY mapping flag added in
commit f9a49aa302 ("fs/writeback: skip AS_NO_DATA_INTEGRITY mappings
in wait_sb_inodes()"). The flag belongs at the superblock level because
data integrity is a filesystem-wide property, not a per-inode one.
Having this flag at the superblock level also allows us to skip having
to iterate every dirty inode in wait_sb_inodes() only to skip each inode
individually.

Prior to this commit, mappings with no data integrity guarantees skipped
waiting on writeback completion but still waited on the flusher threads
to finish initiating the writeback. Waiting on the flusher threads is
unnecessary. This commit kicks off writeback but does not wait on the
flusher threads. This change properly addresses a recent report [1] for
a suspend-to-RAM hang seen on fuse-overlayfs that was caused by waiting
on the flusher threads to finish:

Workqueue: pm_fs_sync pm_fs_sync_work_fn
Call Trace:
 <TASK>
 __schedule+0x457/0x1720
 schedule+0x27/0xd0
 wb_wait_for_completion+0x97/0xe0
 sync_inodes_sb+0xf8/0x2e0
 __iterate_supers+0xdc/0x160
 ksys_sync+0x43/0xb0
 pm_fs_sync_work_fn+0x17/0xa0
 process_one_work+0x193/0x350
 worker_thread+0x1a1/0x310
 kthread+0xfc/0x240
 ret_from_fork+0x243/0x280
 ret_from_fork_asm+0x1a/0x30
 </TASK>

On fuse this is problematic because there are paths that may cause the
flusher thread to block (eg if systemd freezes the user session cgroups
first, which freezes the fuse daemon, before invoking the kernel
suspend. The kernel suspend triggers ->write_node() which on fuse issues
a synchronous setattr request, which cannot be processed since the
daemon is frozen. Or if the daemon is buggy and cannot properly complete
writeback, initiating writeback on a dirty folio already under writeback
leads to writeback_get_folio() -> folio_prepare_writeback() ->
unconditional wait on writeback to finish, which will cause a hang).
This commit restores fuse to its prior behavior before tmp folios were
removed, where sync was essentially a no-op.

[1] https://lore.kernel.org/linux-fsdevel/CAJnrk1a-asuvfrbKXbEwwDSctvemF+6zfhdnuzO65Pt8HsFSRw@mail.gmail.com/T/#m632c4648e9cafc4239299887109ebd880ac6c5c1

Fixes: 0c58a97f91 ("fuse: remove tmp folio for writebacks and internal rb tree")
Reported-by: John <therealgraysky@proton.me>
Cc: stable@vger.kernel.org
Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
Link: https://patch.msgid.link/20260320005145.2483161-2-joannelkoong@gmail.com
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: David Hildenbrand (Arm) <david@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
2026-03-20 14:18:56 +01:00
Hasun Park
2594196f4e ASoC: amd: acp: add ASUS HN7306EA quirk for legacy SDW machine
Add a DMI quirk entry for ASUS HN7306EA in the ACP SoundWire legacy
machine driver.

Set driver_data to ASOC_SDW_ACP_DMIC for this board so the
platform-specific DMIC quirk path is selected.

Signed-off-by: Hasun Park <hasunpark@gmail.com>
Link: https://patch.msgid.link/20260319163321.30326-1-hasunpark@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-20 12:53:35 +00:00
Cássio Gabriel
215e5fe758 ASoC: SOF: topology: reject invalid vendor array size in token parser
sof_parse_token_sets() accepts array->size values that can be invalid
for a vendor tuple array header. In particular, a zero size does not
advance the parser state and can lead to non-progress parsing on
malformed topology data.

Validate array->size against the minimum header size and reject values
smaller than sizeof(*array) before parsing. This preserves behavior for
valid topologies and hardens malformed-input handling.

Signed-off-by: Cássio Gabriel <cassiogabrielcontato@gmail.com>
Acked-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Link: https://patch.msgid.link/20260319-sof-topology-array-size-fix-v1-1-f9191b16b1b7@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-20 12:52:41 +00:00
Juergen Gross
1613462be6 xen/privcmd: add boot control for restricted usage in domU
When running in an unprivileged domU under Xen, the privcmd driver
is restricted to allow only hypercalls against a target domain, for
which the current domU is acting as a device model.

Add a boot parameter "unrestricted" to allow all hypercalls (the
hypervisor will still refuse destructive hypercalls affecting other
guests).

Make this new parameter effective only in case the domU wasn't started
using secure boot, as otherwise hypercalls targeting the domU itself
might result in violating the secure boot functionality.

This is achieved by adding another lockdown reason, which can be
tested to not being set when applying the "unrestricted" option.

This is part of XSA-482

Signed-off-by: Juergen Gross <jgross@suse.com>
---
V2:
- new patch
2026-03-20 12:06:01 +01:00
Leon Romanovsky
f5ebf241c4 mm/hmm: Indicate that HMM requires DMA coherency
HMM is fundamentally about allowing a sophisticated device to perform DMA
directly to a process’s memory while the CPU accesses that same memory at
the same time. It is similar to SVA but does not rely on IOMMU support.
Because the entire model depends on concurrent access to shared memory, it
fails as a uAPI if SWIOTLB substitutes the memory or if the CPU caches are
not coherent with DMA.

Until now, there has been no reliable way to report this, and various
approximations have been used:

int hmm_dma_map_alloc(struct device *dev, struct hmm_dma_map *map,
                      size_t nr_entries, size_t dma_entry_size)
{
<...>
        /*
         * The HMM API violates our normal DMA buffer ownership rules and can't
         * transfer buffer ownership.  The dma_addressing_limited() check is a
         * best approximation to ensure no swiotlb buffering happens.
         */
        dma_need_sync = !dev->dma_skip_sync;
        if (dma_need_sync || dma_addressing_limited(dev))
                return -EOPNOTSUPP;

So let's mark mapped buffers with DMA_ATTR_REQUIRE_COHERENT attribute
to prevent silent data corruption if someone tries to use hmm in a system
with swiotlb or incoherent DMA

Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://lore.kernel.org/r/20260316-dma-debug-overlap-v3-8-1dde90a7f08b@nvidia.com
2026-03-20 12:05:56 +01:00
Leon Romanovsky
d9d43a3f5c RDMA/umem: Tell DMA mapping that UMEM requires coherency
The RDMA subsystem exposes DMA regions through the verbs interface, which
assumes a coherent system. Use the DMA_ATTR_REQUIRE_COHERENCE attribute
to ensure coherency and avoid taking the SWIOTLB path.

The RDMA verbs programming model resembles HMM and assumes concurrent DMA
and CPU access to userspace memory. The hardware and programming model
support "one-sided" operations initiated remotely without any local CPU
involvement or notification. These include ATOMIC compare/swap, READ, and
WRITE. A remote CPU can use these operations to traverse data structures,
manipulate locks, and perform similar tasks without the host CPU’s
awareness. If SWIOTLB substitutes memory or DMA is not cache coherent,
these use cases break entirely.

In-kernel RDMA is fine with incoherent mappings because kernel users do
not rely on one-sided operations in ways that would expose these issues.

A given region may also be exported multiple times, which can trigger
warnings about cacheline overlaps. These warnings are suppressed when the
new attribute is used.

infiniband rocep8s0f0: mlx5_ib_reg_user_mr:1592:(pid 5812): start 0x2b28c000, iova 0x2b28c000, length 0x1000, access_flags 0x1
infiniband rocep8s0f0: mlx5_ib_reg_user_mr:1592:(pid 5812): start 0x2b28c001, iova 0x2b28c001, length 0xfff, access_flags 0x1
 ------------[ cut here ]------------
 DMA-API: mlx5_core 0000:08:00.0: cacheline tracking EEXIST, overlapping mappings aren't supported
 WARNING: kernel/dma/debug.c:620 at add_dma_entry+0x1bb/0x280, CPU#6: ibv_rc_pingpong/5812
 Modules linked in: veth xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink iptable_nat nf_nat xt_addrtype br_netfilter rpcsec_gss_krb5 auth_rpcgss oid_registry overlay mlx5_fwctl zram zsmalloc mlx5_ib fuse rpcrdma rdma_ucm ib_uverbs ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm mlx5_core ib_core
 CPU: 6 UID: 2733 PID: 5812 Comm: ibv_rc_pingpong Tainted: G        W           6.19.0+ #129 PREEMPT
 Tainted: [W]=WARN
 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
 RIP: 0010:add_dma_entry+0x1be/0x280
 Code: 8b 7b 10 48 85 ff 0f 84 c3 00 00 00 48 8b 6f 50 48 85 ed 75 03 48 8b 2f e8 ff 8e 6a 00 48 89 c6 48 8d 3d 55 ef 2d 01 48 89 ea <67> 48 0f b9 3a 48 85 db 74 1a 48 c7 c7 b0 00 2b 82 e8 9c 25 fd ff
 RSP: 0018:ff11000138717978 EFLAGS: 00010286
 RAX: ffffffffa02d7831 RBX: ff1100010246de00 RCX: 0000000000000000
 RDX: ff110001036fac30 RSI: ffffffffa02d7831 RDI: ffffffff82678650
 RBP: ff110001036fac30 R08: ff11000110dcb4a0 R09: ff11000110dcb478
 R10: 0000000000000000 R11: ffffffff824b30a8 R12: 0000000000000000
 R13: 00000000ffffffef R14: 0000000000000202 R15: ff1100010246de00
 FS:  00007f59b411c740(0000) GS:ff110008dcc99000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 00007ffe538f7000 CR3: 000000010e066005 CR4: 0000000000373eb0
 Call Trace:
  <TASK>
  debug_dma_map_sg+0x1b4/0x390
  __dma_map_sg_attrs+0x6d/0x1a0
  dma_map_sgtable+0x19/0x30
  ib_umem_get+0x254/0x380 [ib_uverbs]
  mlx5_ib_reg_user_mr+0x68/0x2a0 [mlx5_ib]
  ib_uverbs_reg_mr+0x17f/0x2a0 [ib_uverbs]
  ib_uverbs_handler_UVERBS_METHOD_INVOKE_WRITE+0xc2/0x130 [ib_uverbs]
  ib_uverbs_cmd_verbs+0xa0b/0xae0 [ib_uverbs]
  ? ib_uverbs_handler_UVERBS_METHOD_QUERY_PORT_SPEED+0xe0/0xe0 [ib_uverbs]
  ? mmap_region+0x7a/0xb0
  ? do_mmap+0x3b8/0x5c0
  ib_uverbs_ioctl+0xa7/0x110 [ib_uverbs]
  __x64_sys_ioctl+0x14f/0x8b0
  ? ksys_mmap_pgoff+0xc5/0x190
  do_syscall_64+0x8c/0xbf0
  entry_SYSCALL_64_after_hwframe+0x4b/0x53
 RIP: 0033:0x7f59b430aeed
 Code: 04 25 28 00 00 00 48 89 45 c8 31 c0 48 8d 45 10 c7 45 b0 10 00 00 00 48 89 45 b8 48 8d 45 d0 48 89 45 c0 b8 10 00 00 00 0f 05 <89> c2 3d 00 f0 ff ff 77 1a 48 8b 45 c8 64 48 2b 04 25 28 00 00 00
 RSP: 002b:00007ffe538f9430 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
 RAX: ffffffffffffffda RBX: 00007ffe538f94c0 RCX: 00007f59b430aeed
 RDX: 00007ffe538f94e0 RSI: 00000000c0181b01 RDI: 0000000000000003
 RBP: 00007ffe538f9480 R08: 0000000000000028 R09: 00007ffe538f9684
 R10: 0000000000000001 R11: 0000000000000246 R12: 00007ffe538f9684
 R13: 000000000000000c R14: 000000002b28d170 R15: 000000000000000c
  </TASK>
 ---[ end trace 0000000000000000 ]---

Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://lore.kernel.org/r/20260316-dma-debug-overlap-v3-7-1dde90a7f08b@nvidia.com
2026-03-20 12:05:56 +01:00
Leon Romanovsky
636e6572e8 iommu/dma: add support for DMA_ATTR_REQUIRE_COHERENT attribute
Add support for the DMA_ATTR_REQUIRE_COHERENT attribute to the exported
functions. This attribute indicates that the SWIOTLB path must not be
used and that no sync operations should be performed.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://lore.kernel.org/r/20260316-dma-debug-overlap-v3-6-1dde90a7f08b@nvidia.com
2026-03-20 12:05:56 +01:00
Leon Romanovsky
2536617f20 dma-direct: prevent SWIOTLB path when DMA_ATTR_REQUIRE_COHERENT is set
DMA_ATTR_REQUIRE_COHERENT indicates that SWIOTLB must not be used.
Ensure the SWIOTLB path is declined whenever the DMA direct path is
selected.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://lore.kernel.org/r/20260316-dma-debug-overlap-v3-5-1dde90a7f08b@nvidia.com
2026-03-20 12:05:56 +01:00
Juergen Gross
453b8fb68f xen/privcmd: restrict usage in unprivileged domU
The Xen privcmd driver allows to issue arbitrary hypercalls from
user space processes. This is normally no problem, as access is
usually limited to root and the hypervisor will deny any hypercalls
affecting other domains.

In case the guest is booted using secure boot, however, the privcmd
driver would be enabling a root user process to modify e.g. kernel
memory contents, thus breaking the secure boot feature.

The only known case where an unprivileged domU is really needing to
use the privcmd driver is the case when it is acting as the device
model for another guest. In this case all hypercalls issued via the
privcmd driver will target that other guest.

Fortunately the privcmd driver can already be locked down to allow
only hypercalls targeting a specific domain, but this mode can be
activated from user land only today.

The target domain can be obtained from Xenstore, so when not running
in dom0 restrict the privcmd driver to that target domain from the
beginning, resolving the potential problem of breaking secure boot.

This is XSA-482

Reported-by: Teddy Astie <teddy.astie@vates.tech>
Fixes: 1c5de1939c ("xen: add privcmd driver")
Signed-off-by: Juergen Gross <jgross@suse.com>
---
V2:
- defer reading from Xenstore if Xenstore isn't ready yet (Jan Beulich)
- wait in open() if target domain isn't known yet
- issue message in case no target domain found (Jan Beulich)
2026-03-20 12:05:41 +01:00
Leon Romanovsky
e6a58fa255 dma-mapping: Introduce DMA require coherency attribute
The mapping buffers which carry this attribute require DMA coherent system.
This means that they can't take SWIOTLB path, can perform CPU cache overlap
and doesn't perform cache flushing.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://lore.kernel.org/r/20260316-dma-debug-overlap-v3-4-1dde90a7f08b@nvidia.com
2026-03-20 12:05:36 +01:00
Leon Romanovsky
9bb0a4d6a4 dma-mapping: Clarify valid conditions for CPU cache line overlap
Rename the DMA_ATTR_CPU_CACHE_CLEAN attribute to better reflect that it
is debugging aid to inform DMA core code that CPU cache line overlaps are
allowed, and refine the documentation describing its use.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://lore.kernel.org/r/20260316-dma-debug-overlap-v3-3-1dde90a7f08b@nvidia.com
2026-03-20 11:33:24 +01:00
Leon Romanovsky
6f45b1604c dma-mapping: handle DMA_ATTR_CPU_CACHE_CLEAN in trace output
Tracing prints decoded DMA attribute flags, but it does not yet
include the recently added DMA_ATTR_CPU_CACHE_CLEAN. Add support
for decoding and displaying this attribute in the trace output.

Fixes: 61868dc55a ("dma-mapping: add DMA_ATTR_CPU_CACHE_CLEAN")
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://lore.kernel.org/r/20260316-dma-debug-overlap-v3-2-1dde90a7f08b@nvidia.com
2026-03-20 11:33:24 +01:00
Leon Romanovsky
eca58535b1 dma-debug: Allow multiple invocations of overlapping entries
Repeated DMA mappings with DMA_ATTR_CPU_CACHE_CLEAN trigger the
following splat. This prevents using the attribute in cases where a DMA
region is shared and reused more than seven times.

 ------------[ cut here ]------------
 DMA-API: exceeded 7 overlapping mappings of cacheline 0x000000000438c440
 WARNING: kernel/dma/debug.c:467 at add_dma_entry+0x219/0x280, CPU#4: ibv_rc_pingpong/1644
 Modules linked in: xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink iptable_nat nf_nat xt_addrtype br_netfilter rpcsec_gss_krb5 auth_rpcgss oid_registry overlay mlx5_fwctl zram zsmalloc mlx5_ib fuse rpcrdma rdma_ucm ib_uverbs ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm mlx5_core ib_core
 CPU: 4 UID: 2733 PID: 1644 Comm: ibv_rc_pingpong Not tainted 6.19.0+ #129 PREEMPT
 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
 RIP: 0010:add_dma_entry+0x221/0x280
 Code: c0 0f 84 f2 fe ff ff 83 e8 01 89 05 6d 99 11 01 e9 e4 fe ff ff 0f 8e 1f ff ff ff 48 8d 3d 07 ef 2d 01 be 07 00 00 00 48 89 e2 <67> 48 0f b9 3a e9 06 ff ff ff 48 c7 c7 98 05 2b 82 c6 05 72 92 28
 RSP: 0018:ff1100010e657970 EFLAGS: 00010002
 RAX: 0000000000000007 RBX: ff1100010234eb00 RCX: 0000000000000000
 RDX: ff1100010e657970 RSI: 0000000000000007 RDI: ffffffff82678660
 RBP: 000000000438c440 R08: 0000000000000228 R09: 0000000000000000
 R10: 00000000000001be R11: 000000000000089d R12: 0000000000000800
 R13: 00000000ffffffef R14: 0000000000000202 R15: ff1100010234eb00
 FS:  00007fb15f3f6740(0000) GS:ff110008dcc19000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 00007fb15f32d3a0 CR3: 0000000116f59001 CR4: 0000000000373eb0
 Call Trace:
  <TASK>
  debug_dma_map_sg+0x1b4/0x390
  __dma_map_sg_attrs+0x6d/0x1a0
  dma_map_sgtable+0x19/0x30
  ib_umem_get+0x284/0x3b0 [ib_uverbs]
  mlx5_ib_reg_user_mr+0x68/0x2a0 [mlx5_ib]
  ib_uverbs_reg_mr+0x17f/0x2a0 [ib_uverbs]
  ib_uverbs_handler_UVERBS_METHOD_INVOKE_WRITE+0xc2/0x130 [ib_uverbs]
  ib_uverbs_cmd_verbs+0xa0b/0xae0 [ib_uverbs]
  ? ib_uverbs_handler_UVERBS_METHOD_QUERY_PORT_SPEED+0xe0/0xe0 [ib_uverbs]
  ? mmap_region+0x7a/0xb0
  ? do_mmap+0x3b8/0x5c0
  ib_uverbs_ioctl+0xa7/0x110 [ib_uverbs]
  __x64_sys_ioctl+0x14f/0x8b0
  ? ksys_mmap_pgoff+0xc5/0x190
  do_syscall_64+0x8c/0xbf0
  entry_SYSCALL_64_after_hwframe+0x4b/0x53
 RIP: 0033:0x7fb15f5e4eed
 Code: 04 25 28 00 00 00 48 89 45 c8 31 c0 48 8d 45 10 c7 45 b0 10 00 00 00 48 89 45 b8 48 8d 45 d0 48 89 45 c0 b8 10 00 00 00 0f 05 <89> c2 3d 00 f0 ff ff 77 1a 48 8b 45 c8 64 48 2b 04 25 28 00 00 00
 RSP: 002b:00007ffe09a5c540 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
 RAX: ffffffffffffffda RBX: 00007ffe09a5c5d0 RCX: 00007fb15f5e4eed
 RDX: 00007ffe09a5c5f0 RSI: 00000000c0181b01 RDI: 0000000000000003
 RBP: 00007ffe09a5c590 R08: 0000000000000028 R09: 00007ffe09a5c794
 R10: 0000000000000001 R11: 0000000000000246 R12: 00007ffe09a5c794
 R13: 000000000000000c R14: 0000000025a49170 R15: 000000000000000c
  </TASK>
 ---[ end trace 0000000000000000 ]---

Fixes: 61868dc55a ("dma-mapping: add DMA_ATTR_CPU_CACHE_CLEAN")
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://lore.kernel.org/r/20260316-dma-debug-overlap-v3-1-1dde90a7f08b@nvidia.com
2026-03-20 11:33:23 +01:00
Damien Le Moal
e6d7eba23b ata: libata-scsi: report correct sense field pointer in ata_scsiop_maint_in()
Commit 4ab7bb9763 ("ata: libata-scsi: Refactor ata_scsiop_maint_in()")
modified ata_scsiop_maint_in() to directly call
ata_scsi_set_invalid_field() to set the field pointer of the sense data
of a failed MAINTENANCE IN command. However, in the case of an invalid
command format, the sense data field incorrectly indicates byte 1 of
the CDB. Fix this to indicate byte 2 of the command.

Reported-by: Guenter Roeck <linux@roeck-us.net>
Fixes: 4ab7bb9763 ("ata: libata-scsi: Refactor ata_scsiop_maint_in()")
Cc: stable@vger.kernel.org
Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Signed-off-by: Niklas Cassel <cassel@kernel.org>
2026-03-20 09:59:30 +01:00
Pedro Demarchi Gomes
fc3bbf34e6 drm/shmem-helper: Fix huge page mapping in fault handler
When running ./tools/testing/selftests/mm/split_huge_page_test multiple
times with /sys/kernel/mm/transparent_hugepage/shmem_enabled and
/sys/kernel/mm/transparent_hugepage/enabled set as always the following BUG
occurs:

[  232.728858] ------------[ cut here ]------------
[  232.729458] kernel BUG at mm/memory.c:2276!
[  232.729726] Oops: invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI
[  232.730217] CPU: 19 UID: 60578 PID: 1497 Comm: llvmpipe-9 Not tainted 7.0.0-rc1mm-new+ #19 PREEMPT(lazy)
[  232.730855] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.17.0-9.fc43 06/10/2025
[  232.731360] RIP: 0010:walk_to_pmd+0x29e/0x3c0
[  232.731569] Code: d8 5b 5d 41 5c 41 5d 41 5e 41 5f c3 cc cc cc cc 48 89 ea 48 89 de 4c 89 f7 e8 ae 85 ff ff 85 c0 0f 84 1f fe ff ff 31 db eb d0 <0f> 0b 48 89 ea 48 89 de 4c 89 f7 e8 92 8b ff ff 85 c0 75 e8 48 b8
[  232.732614] RSP: 0000:ffff8881aa6ff9a8 EFLAGS: 00010282
[  232.732991] RAX: 8000000142e002e7 RBX: ffff8881433cae10 RCX: dffffc0000000000
[  232.733362] RDX: 0000000000000000 RSI: 00007fb47840b000 RDI: 8000000142e002e7
[  232.733801] RBP: 00007fb47840b000 R08: 0000000000000000 R09: 1ffff110354dff46
[  232.734168] R10: fffffbfff0cb921d R11: 00000000910da5ce R12: 1ffffffff0c1fcdd
[  232.734459] R13: 1ffffffff0c23f36 R14: ffff888171628040 R15: 0000000000000000
[  232.734861] FS:  00007fb4907f86c0(0000) GS:ffff888791f2c000(0000) knlGS:0000000000000000
[  232.735265] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  232.735548] CR2: 00007fb47840be00 CR3: 000000015e6dc000 CR4: 00000000000006f0
[  232.736031] Call Trace:
[  232.736273]  <TASK>
[  232.736500]  get_locked_pte+0x1f/0xa0
[  232.736878]  insert_pfn+0x9f/0x350
[  232.737190]  ? __pfx_pat_pagerange_is_ram+0x10/0x10
[  232.737614]  ? __pfx_insert_pfn+0x10/0x10
[  232.737990]  ? __pfx_css_rstat_updated+0x10/0x10
[  232.738281]  ? __pfx_pfn_modify_allowed+0x10/0x10
[  232.738552]  ? lookup_memtype+0x62/0x180
[  232.738761]  vmf_insert_pfn_prot+0x14b/0x340
[  232.739012]  ? __pfx_vmf_insert_pfn_prot+0x10/0x10
[  232.739247]  ? __pfx___might_resched+0x10/0x10
[  232.739475]  drm_gem_shmem_fault.cold+0x18/0x39
[  232.739677]  ? rcu_read_unlock+0x20/0x70
[  232.739882]  __do_fault+0x251/0x7b0
[  232.740028]  do_fault+0x6e1/0xc00
[  232.740167]  ? __lock_acquire+0x590/0xc40
[  232.740335]  handle_pte_fault+0x439/0x760
[  232.740498]  ? mtree_range_walk+0x252/0xae0
[  232.740669]  ? __pfx_handle_pte_fault+0x10/0x10
[  232.740899]  __handle_mm_fault+0xa02/0xf30
[  232.741066]  ? __pfx___handle_mm_fault+0x10/0x10
[  232.741255]  ? find_vma+0xa1/0x120
[  232.741403]  handle_mm_fault+0x2bf/0x8f0
[  232.741564]  do_user_addr_fault+0x2d3/0xed0
[  232.741736]  ? trace_page_fault_user+0x1bf/0x240
[  232.741969]  exc_page_fault+0x87/0x120
[  232.742124]  asm_exc_page_fault+0x26/0x30
[  232.742288] RIP: 0033:0x7fb4d73ed546
[  232.742441] Code: 66 41 0f 6f fb 66 44 0f 6d dc 66 44 0f 6f c6 66 41 0f 6d f1 66 0f 6c fc 66 45 0f 6c c1 66 44 0f 6f c9 66 0f 6d ca 66 0f db f0 <66> 0f df 04 08 66 44 0f 6c ca 66 45 0f db c2 66 44 0f df 10 66 44
[  232.743193] RSP: 002b:00007fb4907f68a0 EFLAGS: 00010206
[  232.743565] RAX: 00007fb47840aa00 RBX: 00007fb4d73ec070 RCX: 0000000000001400
[  232.743871] RDX: 0000000000002800 RSI: 0000000000003c00 RDI: 0000000000000001
[  232.744150] RBP: 0000000000000004 R08: 0000000000001400 R09: 00007fb4d73ec060
[  232.744433] R10: 000055f0261a4288 R11: 00007fb4c013da40 R12: 0000000000000008
[  232.744712] R13: 0000000000000000 R14: 4332322132212110 R15: 0000000000000004
[  232.746616]  </TASK>
[  232.746711] Modules linked in: nft_nat nft_masq veth bridge stp llc snd_seq_dummy snd_hrtimer snd_seq snd_seq_device snd_timer snd soundcore overlay rfkill nf_conntrack_netbios_ns nf_conntrack_broadcast nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nf_tables qrtr ppdev 9pnet_virtio 9pnet parport_pc i2c_piix4 netfs pcspkr parport i2c_smbus joydev sunrpc vfat fat loop dm_multipath nfnetlink vsock_loopback vmw_vsock_virtio_transport_common vmw_vsock_vmci_transport zram lz4hc_compress vmw_vmci lz4_compress vsock e1000 bochs serio_raw ata_generic pata_acpi scsi_dh_rdac scsi_dh_emc scsi_dh_alua i2c_dev fuse qemu_fw_cfg
[  232.749308] ---[ end trace 0000000000000000 ]---
[  232.749507] RIP: 0010:walk_to_pmd+0x29e/0x3c0
[  232.749692] Code: d8 5b 5d 41 5c 41 5d 41 5e 41 5f c3 cc cc cc cc 48 89 ea 48 89 de 4c 89 f7 e8 ae 85 ff ff 85 c0 0f 84 1f fe ff ff 31 db eb d0 <0f> 0b 48 89 ea 48 89 de 4c 89 f7 e8 92 8b ff ff 85 c0 75 e8 48 b8
[  232.750428] RSP: 0000:ffff8881aa6ff9a8 EFLAGS: 00010282
[  232.750645] RAX: 8000000142e002e7 RBX: ffff8881433cae10 RCX: dffffc0000000000
[  232.750954] RDX: 0000000000000000 RSI: 00007fb47840b000 RDI: 8000000142e002e7
[  232.751232] RBP: 00007fb47840b000 R08: 0000000000000000 R09: 1ffff110354dff46
[  232.751514] R10: fffffbfff0cb921d R11: 00000000910da5ce R12: 1ffffffff0c1fcdd
[  232.751837] R13: 1ffffffff0c23f36 R14: ffff888171628040 R15: 0000000000000000
[  232.752124] FS:  00007fb4907f86c0(0000) GS:ffff888791f2c000(0000) knlGS:0000000000000000
[  232.752441] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  232.752674] CR2: 00007fb47840be00 CR3: 000000015e6dc000 CR4: 00000000000006f0
[  232.752983] Kernel panic - not syncing: Fatal exception
[  232.753510] Kernel Offset: disabled
[  232.754643] ---[ end Kernel panic - not syncing: Fatal exception ]---

This happens when two concurrent page faults occur within the same PMD range.
One fault installs a PMD mapping through vmf_insert_pfn_pmd(), while the other
attempts to install a PTE mapping via vmf_insert_pfn(). The bug is
triggered because a pmd_trans_huge is not expected when walking the page
table inside vmf_insert_pfn.

Avoid this race by adding a huge_fault callback to drm_gem_shmem_vm_ops so that
PMD-sized mappings are handled through the appropriate huge page fault path.

Fixes: 211b9a39f2 ("drm/shmem-helper: Map huge pages in fault handler")
Signed-off-by: Pedro Demarchi Gomes <pedrodemargomes@gmail.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Link: https://patch.msgid.link/20260319015224.46896-1-pedrodemargomes@gmail.com
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
2026-03-20 09:15:39 +01:00
Thinh Nguyen
01f784fc9d scsi: target: file: Use kzalloc_flex for aio_cmd
The target_core_file doesn't initialize the aio_cmd->iocb for the
ki_write_stream. When a write command fd_execute_rw_aio() is executed,
we may get a bogus ki_write_stream value, causing unintended write
failure status when checking iocb->ki_write_stream > max_write_streams
in the block device.

Let's just use kzalloc_flex when allocating the aio_cmd and let
ki_write_stream=0 to fix this issue.

Fixes: 732f25a289 ("fs: add a write stream field to the kiocb")
Fixes: c27683da64 ("block: expose write streams for block device nodes")
Cc: stable@vger.kernel.org
Signed-off-by: Thinh Nguyen <Thinh.Nguyen@synopsys.com>
Link: https://patch.msgid.link/f1a2f81c62f043e31f80bb92d5f29893400c8ee2.1773450782.git.Thinh.Nguyen@synopsys.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2026-03-19 22:07:39 -04:00
Yihang Li
d71afa9deb scsi: scsi_transport_sas: Fix the maximum channel scanning issue
After commit 37c4e72b06 ("scsi: Fix sas_user_scan() to handle wildcard
and multi-channel scans"), if the device supports multiple channels (0 to
shost->max_channel), user_scan() invokes updated sas_user_scan() to perform
the scan behavior for a specific transfer.  However, when the user
specifies shost->max_channel, it will return -EINVAL, which is not
expected.

Fix and support specifying the scan shost->max_channel for scanning.

Fixes: 37c4e72b06 ("scsi: Fix sas_user_scan() to handle wildcard and multi-channel scans")
Signed-off-by: Yihang Li <liyihang9@huawei.com>
Reviewed-by: John Garry <john.g.garry@oracle.com>
Link: https://patch.msgid.link/20260317063147.2182562-1-liyihang9@huawei.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2026-03-19 21:57:22 -04:00
Josef Bacik
1333eee56c scsi: target: tcm_loop: Drain commands in target_reset handler
tcm_loop_target_reset() violates the SCSI EH contract: it returns SUCCESS
without draining any in-flight commands.  The SCSI EH documentation
(scsi_eh.rst) requires that when a reset handler returns SUCCESS the driver
has made lower layers "forget about timed out scmds" and is ready for new
commands.  Every other SCSI LLD (virtio_scsi, mpt3sas, ipr, scsi_debug,
mpi3mr) enforces this by draining or completing outstanding commands before
returning SUCCESS.

Because tcm_loop_target_reset() doesn't drain, the SCSI EH reuses in-flight
scsi_cmnd structures for recovery commands (e.g. TUR) while the target core
still has async completion work queued for the old se_cmd.  The memset in
queuecommand zeroes se_lun and lun_ref_active, causing
transport_lun_remove_cmd() to skip its percpu_ref_put().  The leaked LUN
reference prevents transport_clear_lun_ref() from completing, hanging
configfs LUN unlink forever in D-state:

  INFO: task rm:264 blocked for more than 122 seconds.
  rm              D    0   264    258 0x00004000
  Call Trace:
   __schedule+0x3d0/0x8e0
   schedule+0x36/0xf0
   transport_clear_lun_ref+0x78/0x90 [target_core_mod]
   core_tpg_remove_lun+0x28/0xb0 [target_core_mod]
   target_fabric_port_unlink+0x50/0x60 [target_core_mod]
   configfs_unlink+0x156/0x1f0 [configfs]
   vfs_unlink+0x109/0x290
   do_unlinkat+0x1d5/0x2d0

Fix this by making tcm_loop_target_reset() actually drain commands:

 1. Issue TMR_LUN_RESET via tcm_loop_issue_tmr() to drain all commands that
    the target core knows about (those not yet CMD_T_COMPLETE).

 2. Use blk_mq_tagset_busy_iter() to iterate all started requests and
    flush_work() on each se_cmd — this drains any deferred completion work
    for commands that already had CMD_T_COMPLETE set before the TMR (which
    the TMR skips via __target_check_io_state()).  This is the same pattern
    used by mpi3mr, scsi_debug, and libsas to drain outstanding commands
    during reset.

Fixes: e0eb5d38b7 ("scsi: target: tcm_loop: Use block cmd allocator for se_cmds")
Cc: stable@vger.kernel.org
Assisted-by: Claude:claude-opus-4-6
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Link: https://patch.msgid.link/27011aa34c8f6b1b94d2e3cf5655b6d037f53428.1773706803.git.josef@toxicpanda.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2026-03-19 21:54:51 -04:00
Tyllis Xu
61d099ac4a scsi: ibmvfc: Fix OOB access in ibmvfc_discover_targets_done()
A malicious or compromised VIO server can return a num_written value in the
discover targets MAD response that exceeds max_targets. This value is
stored directly in vhost->num_targets without validation, and is then used
as the loop bound in ibmvfc_alloc_targets() to index into disc_buf[], which
is only allocated for max_targets entries. Indices at or beyond max_targets
access kernel memory outside the DMA-coherent allocation.  The
out-of-bounds data is subsequently embedded in Implicit Logout and PLOGI
MADs that are sent back to the VIO server, leaking kernel memory.

Fix by clamping num_written to max_targets before storing it.

Fixes: 072b91f9c6 ("[SCSI] ibmvfc: IBM Power Virtual Fibre Channel Adapter Client Driver")
Reported-by: Yuhao Jiang <danisjiang@gmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: Tyllis Xu <LivelyCarpet87@gmail.com>
Reviewed-by: Dave Marquardt <davemarq@linux.ibm.com>
Acked-by: Tyrel Datwyler <tyreld@linux.ibm.com>
Link: https://patch.msgid.link/20260314170151.548614-1-LivelyCarpet87@gmail.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2026-03-19 21:42:35 -04:00
Greg Kroah-Hartman
7a9f448d44 scsi: ses: Handle positive SCSI error from ses_recv_diag()
ses_recv_diag() can return a positive value, which also means that an
error happened, so do not only test for negative values.

Cc: James E.J. Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: stable <stable@kernel.org>
Assisted-by: gkh_clanker_2000
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://patch.msgid.link/2026022301-bony-overstock-a07f@gregkh
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2026-03-19 21:33:28 -04:00
Toke Høiland-Jørgensen
7c770dadfd net: openvswitch: Avoid releasing netdev before teardown completes
The patch cited in the Fixes tag below changed the teardown code for
OVS ports to no longer unconditionally take the RTNL. After this change,
the netdev_destroy() callback can proceed immediately to the call_rcu()
invocation if the IFF_OVS_DATAPATH flag is already cleared on the
netdev.

The ovs_netdev_detach_dev() function clears the flag before completing
the unregistration, and if it gets preempted after clearing the flag (as
can happen on an -rt kernel), netdev_destroy() can complete and the
device can be freed before the unregistration completes. This leads to a
splat like:

[  998.393867] Oops: general protection fault, probably for non-canonical address 0xff00000001000239: 0000 [#1] SMP PTI
[  998.393877] CPU: 42 UID: 0 PID: 55177 Comm: ip Kdump: loaded Not tainted 6.12.0-211.1.1.el10_2.x86_64+rt #1 PREEMPT_RT
[  998.393886] Hardware name: Dell Inc. PowerEdge R740/0JMK61, BIOS 2.24.0 03/27/2025
[  998.393889] RIP: 0010:dev_set_promiscuity+0x8d/0xa0
[  998.393901] Code: 00 00 75 d8 48 8b 53 08 48 83 ba b0 02 00 00 00 75 ca 48 83 c4 08 5b c3 cc cc cc cc 48 83 bf 48 09 00 00 00 75 91 48 8b 47 08 <48> 83 b8 b0 02 00 00 00 74 97 eb 81 0f 1f 80 00 00 00 00 90 90 90
[  998.393906] RSP: 0018:ffffce5864a5f6a0 EFLAGS: 00010246
[  998.393912] RAX: ff00000000ffff89 RBX: ffff894d0adf5a05 RCX: 0000000000000000
[  998.393917] RDX: 0000000000000000 RSI: 00000000ffffffff RDI: ffff894d0adf5a05
[  998.393921] RBP: ffff894d19252000 R08: ffff894d19252000 R09: 0000000000000000
[  998.393924] R10: ffff894d19252000 R11: ffff894d192521b8 R12: 0000000000000006
[  998.393927] R13: ffffce5864a5f738 R14: 00000000ffffffe2 R15: 0000000000000000
[  998.393931] FS:  00007fad61971800(0000) GS:ffff894cc0140000(0000) knlGS:0000000000000000
[  998.393936] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  998.393940] CR2: 000055df0a2a6e40 CR3: 000000011c7fe003 CR4: 00000000007726f0
[  998.393944] PKRU: 55555554
[  998.393946] Call Trace:
[  998.393949]  <TASK>
[  998.393952]  ? show_trace_log_lvl+0x1b0/0x2f0
[  998.393961]  ? show_trace_log_lvl+0x1b0/0x2f0
[  998.393975]  ? dp_device_event+0x41/0x80 [openvswitch]
[  998.394009]  ? __die_body.cold+0x8/0x12
[  998.394016]  ? die_addr+0x3c/0x60
[  998.394027]  ? exc_general_protection+0x16d/0x390
[  998.394042]  ? asm_exc_general_protection+0x26/0x30
[  998.394058]  ? dev_set_promiscuity+0x8d/0xa0
[  998.394066]  ? ovs_netdev_detach_dev+0x3a/0x80 [openvswitch]
[  998.394092]  dp_device_event+0x41/0x80 [openvswitch]
[  998.394102]  notifier_call_chain+0x5a/0xd0
[  998.394106]  unregister_netdevice_many_notify+0x51b/0xa60
[  998.394110]  rtnl_dellink+0x169/0x3e0
[  998.394121]  ? rt_mutex_slowlock.constprop.0+0x95/0xd0
[  998.394125]  rtnetlink_rcv_msg+0x142/0x3f0
[  998.394128]  ? avc_has_perm_noaudit+0x69/0xf0
[  998.394130]  ? __pfx_rtnetlink_rcv_msg+0x10/0x10
[  998.394132]  netlink_rcv_skb+0x50/0x100
[  998.394138]  netlink_unicast+0x292/0x3f0
[  998.394141]  netlink_sendmsg+0x21b/0x470
[  998.394145]  ____sys_sendmsg+0x39d/0x3d0
[  998.394149]  ___sys_sendmsg+0x9a/0xe0
[  998.394156]  __sys_sendmsg+0x7a/0xd0
[  998.394160]  do_syscall_64+0x7f/0x170
[  998.394162]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
[  998.394165] RIP: 0033:0x7fad61bf4724
[  998.394188] Code: 89 02 b8 ff ff ff ff eb bb 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 f3 0f 1e fa 80 3d c5 e9 0c 00 00 74 13 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 48 83 ec 28 89 54 24 1c 48 89
[  998.394189] RSP: 002b:00007ffd7e2f7cb8 EFLAGS: 00000202 ORIG_RAX: 000000000000002e
[  998.394191] RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007fad61bf4724
[  998.394193] RDX: 0000000000000000 RSI: 00007ffd7e2f7d20 RDI: 0000000000000003
[  998.394194] RBP: 00007ffd7e2f7d90 R08: 0000000000000010 R09: 000000000000003f
[  998.394195] R10: 000055df11558010 R11: 0000000000000202 R12: 00007ffd7e2f8380
[  998.394196] R13: 0000000069b233d7 R14: 000055df0a256040 R15: 0000000000000000
[  998.394200]  </TASK>

To fix this, reorder the operations in ovs_netdev_detach_dev() to only
clear the flag after completing the other operations, and introduce an
smp_wmb() to make the ordering requirement explicit. The smp_wmb() is
paired with a full smp_mb() in netdev_destroy() to make sure the
call_rcu() invocation does not happen before the unregister operations
are visible.

Reported-by: Minxi Hou <mhou@redhat.com>
Tested-by: Minxi Hou <mhou@redhat.com>
Fixes: 5498227676 ("net: openvswitch: Avoid needlessly taking the RTNL on vport destroy")
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://patch.msgid.link/20260318155554.1133405-1-toke@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-19 17:15:37 -07:00
Jakub Kicinski
4527025d44 nfc: nci: fix circular locking dependency in nci_close_device
nci_close_device() flushes rx_wq and tx_wq while holding req_lock.
This causes a circular locking dependency because nci_rx_work()
running on rx_wq can end up taking req_lock too:

  nci_rx_work -> nci_rx_data_packet -> nci_data_exchange_complete
    -> __sk_destruct -> rawsock_destruct -> nfc_deactivate_target
    -> nci_deactivate_target -> nci_request -> mutex_lock(&ndev->req_lock)

Move the flush of rx_wq after req_lock has been released.
This should safe (I think) because NCI_UP has already been cleared
and the transport is closed, so the work will see it and return
-ENETDOWN.

NIPA has been hitting this running the nci selftest with a debug
kernel on roughly 4% of the runs.

Fixes: 6a2968aaf5 ("NFC: basic NCI protocol implementation")
Reviewed-by: Ian Ray <ian.ray@gehealthcare.com>
Link: https://patch.msgid.link/20260317193334.988609-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-19 16:56:18 -07:00
Jakub Kicinski
57ce3b2e9c Merge tag 'for-net-2026-03-19' of git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth
Luiz Augusto von Dentz says:

====================
bluetooth pull request for net:

 - hci_ll: Fix firmware leak on error path
 - hci_sync: annotate data-races around hdev->req_status
 - L2CAP: Fix null-ptr-deref on l2cap_sock_ready_cb
 - L2CAP: Validate PDU length before reading SDU length in l2cap_ecred_data_rcv()
 - L2CAP: Fix regressions caused by reusing ident
 - L2CAP: Fix stack-out-of-bounds read in l2cap_ecred_conn_req
 - MGMT: Fix dangling pointer on mgmt_add_adv_patterns_monitor_complete
 - SCO: Fix use-after-free in sco_recv_frame() due to missing sock_hold

* tag 'for-net-2026-03-19' of git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth:
  Bluetooth: L2CAP: Fix regressions caused by reusing ident
  Bluetooth: L2CAP: Fix null-ptr-deref on l2cap_sock_ready_cb
  Bluetooth: hci_ll: Fix firmware leak on error path
  Bluetooth: hci_sync: annotate data-races around hdev->req_status
  Bluetooth: MGMT: Fix dangling pointer on mgmt_add_adv_patterns_monitor_complete
  Bluetooth: SCO: Fix use-after-free in sco_recv_frame() due to missing sock_hold
  Bluetooth: L2CAP: Validate PDU length before reading SDU length in l2cap_ecred_data_rcv()
  Bluetooth: L2CAP: Fix stack-out-of-bounds read in l2cap_ecred_conn_req
====================

Link: https://patch.msgid.link/20260319190455.135302-1-luiz.dentz@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-19 16:49:38 -07:00
Linus Torvalds
0e4f8f1a3d Merge tag 'parisc-for-7.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux
Pull parisc fix from Helge Deller:
 "Fix for the cacheflush() syscall which had D/I caches mixed up"

* tag 'parisc-for-7.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux:
  parisc: Flush correct cache in cacheflush() syscall
2026-03-19 16:28:41 -07:00
Linus Torvalds
9b70771216 Merge tag 'pci-v7.0-fixes-4' of git://git.kernel.org/pub/scm/linux/kernel/git/pci/pci
Pull pci fixes from Bjorn Helgaas:

 - Create pwrctrl devices only for DT nodes below a PCI controller that
   describe PCI devices and are related to a power supply; this prevents
   waiting indefinitely for pwrctrl drivers that will never probe
   (Manivannan Sadhasivam)

 - Restore endpoint BAR mapping on subrange setup failure to make
   selftest reliable (Koichiro Den)

* tag 'pci-v7.0-fixes-4' of git://git.kernel.org/pub/scm/linux/kernel/git/pci/pci:
  PCI: endpoint: pci-epf-test: Roll back BAR mapping when subrange setup fails
  PCI/pwrctrl: Create pwrctrl devices only for PCI device nodes
  PCI/pwrctrl: Ensure that remote endpoint node parent has supply requirement
2026-03-19 16:13:51 -07:00
Gabor Juhos
78a6ee14f8 i2c: pxa: defer reset on Armada 3700 when recovery is used
The I2C communication is completely broken on the Armada 3700 platform
since commit 0b01392c18 ("i2c: pxa: move to generic GPIO recovery").

For example, on the Methode uDPU board, probing of the two onboard
temperature sensors fails ...

  [    7.271713] i2c i2c-0: using pinctrl states for GPIO recovery
  [    7.277503] i2c i2c-0:  PXA I2C adapter
  [    7.282199] i2c i2c-1: using pinctrl states for GPIO recovery
  [    7.288241] i2c i2c-1:  PXA I2C adapter
  [    7.292947] sfp sfp-eth1: Host maximum power 3.0W
  [    7.299614] sfp sfp-eth0: Host maximum power 3.0W
  [    7.308178] lm75 1-0048: supply vs not found, using dummy regulator
  [   32.489631] lm75 1-0048: probe with driver lm75 failed with error -121
  [   32.496833] lm75 1-0049: supply vs not found, using dummy regulator
  [   82.890614] lm75 1-0049: probe with driver lm75 failed with error -121

... and accessing the plugged-in SFP modules also does not work:

  [  511.298537] sfp sfp-eth1: please wait, module slow to respond
  [  536.488530] sfp sfp-eth0: please wait, module slow to respond
  ...
  [ 1065.688536] sfp sfp-eth1: failed to read EEPROM: -EREMOTEIO
  [ 1090.888532] sfp sfp-eth0: failed to read EEPROM: -EREMOTEIO

After a discussion [1], there was an attempt to fix the problem by
reverting the offending change by commit 7b211c7671 ("Revert "i2c:
pxa: move to generic GPIO recovery""), but that only helped to fix
the issue in the 6.1.y stable tree. The reason behind the partial succes
is that there was another change in commit 20cb3fce4d ("i2c: Set i2c
pinctrl recovery info from it's device pinctrl") in the 6.3-rc1 cycle
which broke things further.

The cause of the problem is the same in case of both offending commits
mentioned above. Namely, the I2C core code changes the pinctrl state to
GPIO while running the recovery initialization code. Although the PXA
specific initialization also does this, but the key difference is that
it happens before the controller is getting enabled in i2c_pxa_reset(),
whereas in the case of the generic initialization it happens after that.

Change the code to reset the controller only before the first transfer
instead of before registering the controller. This ensures that the
controller is not enabled at the time when the generic recovery code
performs the pinctrl state changes, thus avoids the problem described
above.

As the result this change restores the original behaviour, which in
turn makes the I2C communication to work again as it can be seen from
the following log:

  [    7.363250] i2c i2c-0: using pinctrl states for GPIO recovery
  [    7.369041] i2c i2c-0:  PXA I2C adapter
  [    7.373673] i2c i2c-1: using pinctrl states for GPIO recovery
  [    7.379742] i2c i2c-1:  PXA I2C adapter
  [    7.384506] sfp sfp-eth1: Host maximum power 3.0W
  [    7.393013] sfp sfp-eth0: Host maximum power 3.0W
  [    7.399266] lm75 1-0048: supply vs not found, using dummy regulator
  [    7.407257] hwmon hwmon0: temp1_input not attached to any thermal zone
  [    7.413863] lm75 1-0048: hwmon0: sensor 'tmp75c'
  [    7.418746] lm75 1-0049: supply vs not found, using dummy regulator
  [    7.426371] hwmon hwmon1: temp1_input not attached to any thermal zone
  [    7.432972] lm75 1-0049: hwmon1: sensor 'tmp75c'
  [    7.755092] sfp sfp-eth1: module MENTECHOPTO      POS22-LDCC-KR    rev 1.0  sn MNC208U90009     dc 200828
  [    7.764997] mvneta d0040000.ethernet eth1: unsupported SFP module: no common interface modes
  [    7.785362] sfp sfp-eth0: module Mikrotik         S-RJ01           rev 1.0  sn 61B103C55C58     dc 201022
  [    7.803426] hwmon hwmon2: temp1_input not attached to any thermal zone

Link: https://lore.kernel.org/r/20230926160255.330417-1-robert.marko@sartura.hr #1

Cc: stable@vger.kernel.org # 6.3+
Fixes: 20cb3fce4d ("i2c: Set i2c pinctrl recovery info from it's device pinctrl")
Signed-off-by: Gabor Juhos <j4g8y7@gmail.com>
Tested-by: Robert Marko <robert.marko@sartura.hr>
Reviewed-by: Linus Walleij <linusw@kernel.org>
Signed-off-by: Andi Shyti <andi.shyti@kernel.org>
Link: https://lore.kernel.org/r/20260226-i2c-pxa-fix-i2c-communication-v4-1-797a091dae87@gmail.com
2026-03-19 23:54:03 +01:00
Mohammad Heib
cbcb3cfcdc ionic: fix persistent MAC address override on PF
The use of IONIC_CMD_LIF_SETATTR in the MAC address update path causes
the ionic firmware to update the LIF's identity in its persistent state.
Since the firmware state is maintained across host warm boots and driver
reloads, any MAC change on the Physical Function (PF) becomes "sticky.

This is problematic because it causes ethtool -P to report the
user-configured MAC as the permanent factory address, which breaks
system management tools that rely on a stable hardware identity.

While Virtual Functions (VFs) need this hardware-level programming to
properly handle MAC assignments in guest environments, the PF should
maintain standard transient behavior. This patch gates the
ionic_program_mac call using is_virtfn so that PF MAC changes remain
local to the netdev filters and do not overwrite the firmware's
permanent identity block.

Fixes: 19058be7c4 ("ionic: VF initial random MAC address if no assigned mac")
Signed-off-by: Mohammad Heib <mheib@redhat.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Reviewed-by: Brett Creeley <brett.creeley@amd.com>
Link: https://patch.msgid.link/20260317170806.35390-1-mheib@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-19 15:45:30 -07:00
Christophe JAILLET
be627abcc0 i2c: fsi: Fix a potential leak in fsi_i2c_probe()
In the commit in Fixes:, when the code has been updated to use an explicit
for loop, instead of for_each_available_child_of_node(), the assumption
that a reference to a device_node structure would be released at each
iteration has been broken.

Now, an explicit of_node_put() is needed to release the reference.

Fixes: 095561f476 ("i2c: fsi: Create busses for all ports")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Cc: <stable@vger.kernel.org> # v5.3+
Signed-off-by: Andi Shyti <andi.shyti@kernel.org>
Link: https://lore.kernel.org/r/fd805c39f8de51edf303856103d782138a1633c8.1772382022.git.christophe.jaillet@wanadoo.fr
2026-03-19 23:45:06 +01:00
Johan Hovold
aa79f996eb i2c: cp2615: fix serial string NULL-deref at probe
The cp2615 driver uses the USB device serial string as the i2c adapter
name but does not make sure that the string exists.

Verify that the device has a serial number before accessing it to avoid
triggering a NULL-pointer dereference (e.g. with malicious devices).

Fixes: 4a7695429e ("i2c: cp2615: add i2c driver for Silicon Labs' CP2615 Digital Audio Bridge")
Cc: stable@vger.kernel.org	# 5.13
Cc: Bence Csókás <bence98@sch.bme.hu>
Signed-off-by: Johan Hovold <johan@kernel.org>
Reviewed-by: Bence Csókás <bence98@sch.bme.hu>
Signed-off-by: Andi Shyti <andi.shyti@kernel.org>
Link: https://lore.kernel.org/r/20260309075016.25612-1-johan@kernel.org
2026-03-19 23:22:25 +01:00
Cui Chao
be5c5280cf cxl: Adjust the startup priority of cxl_pmem to be higher than that of cxl_acpi
During the cxl_acpi probe process, it checks whether the cxl_nvb device
and driver have been attached. Currently, the startup priority of the
cxl_pmem driver is lower than that of the cxl_acpi driver. At this point,
the cxl_nvb driver has not yet been registered on the cxl_bus, causing
the attachment check to fail. This results in a failure to add the root
nvdimm bridge, leading to a cxl_acpi probe failure and ultimately
affecting the subsequent loading of cxl drivers. As a consequence, only
one mem device object exists on the cxl_bus, while the cxl_port device
objects and decoder device objects are missing.

The solution is to raise the startup priority of cxl_pmem to be higher
than that of cxl_acpi, ensuring that the cxl_pmem driver is registered
before the aforementioned attachment check occurs.

Co-developed-by: Wang Yinfeng <wangyinfeng@phytium.com.cn>
Signed-off-by: Wang Yinfeng <wangyinfeng@phytium.com.cn>
Signed-off-by: Cui Chao <cuichao1753@phytium.com.cn>
Fixes: e7e222ad73 ("cxl: Move devm_cxl_add_nvdimm_bridge() to cxl_pmem.ko")
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Link: https://patch.msgid.link/20260319074535.1709250-1-cuichao1753@phytium.com.cn
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2026-03-19 15:12:40 -07:00
Jens Axboe
418eab7a6f io_uring/kbuf: propagate BUF_MORE through early buffer commit path
When io_should_commit() returns true (eg for non-pollable files), buffer
commit happens at buffer selection time and sel->buf_list is set to
NULL. When __io_put_kbufs() generates CQE flags at completion time, it
calls __io_put_kbuf_ring() which finds a NULL buffer_list and hence
cannot determine whether the buffer was consumed or not. This means that
IORING_CQE_F_BUF_MORE is never set for non-pollable input with
incrementally consumed buffers.

Likewise for io_buffers_select(), which always commits upfront and
discards the return value of io_kbuf_commit().

Add REQ_F_BUF_MORE to store the result of io_kbuf_commit() during early
commit. Then __io_put_kbuf_ring() can check this flag and set
IORING_F_BUF_MORE accordingy.

Reported-by: Martin Michaelis <code@mgjm.de>
Cc: stable@vger.kernel.org
Fixes: ae98dbf43d ("io_uring/kbuf: add support for incremental buffer consumption")
Link: https://github.com/axboe/liburing/issues/1553
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2026-03-19 15:09:48 -06:00
Jens Axboe
3ecd3e0314 io_uring/kbuf: fix missing BUF_MORE for incremental buffers at EOF
For a zero length transfer, io_kbuf_inc_commit() is called with !len.
Since we never enter the while loop to consume the buffers,
io_kbuf_inc_commit() ends up returning true, consuming the buffer. But
if no data was consumed, by definition it cannot have consumed the
buffer. Return false for that case.

Reported-by: Martin Michaelis <code@mgjm.de>
Cc: stable@vger.kernel.org
Fixes: ae98dbf43d ("io_uring/kbuf: add support for incremental buffer consumption")
Link: https://github.com/axboe/liburing/issues/1553
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2026-03-19 15:09:40 -06:00
Mickaël Salaün
a54142d9ff selftests/landlock: Test tsync interruption and cancellation paths
Add tsync_interrupt test to exercise the signal interruption path in
landlock_restrict_sibling_threads().  When a signal interrupts
wait_for_completion_interruptible() while the calling thread waits for
sibling threads to finish credential preparation, the kernel:

1. Sets ERESTARTNOINTR to request a transparent syscall restart.
2. Calls cancel_tsync_works() to opportunistically dequeue task works
   that have not started running yet.
3. Breaks out of the preparation loop, then unblocks remaining
   task works via complete_all() and waits for them to finish.
4. Returns the error, causing abort_creds() in the syscall handler.

Specifically, cancel_tsync_works() in its entirety, the ERESTARTNOINTR
error branch in landlock_restrict_sibling_threads(), and the
abort_creds() error branch in the landlock_restrict_self() syscall
handler are timing-dependent and not exercised by the existing tsync
tests, making code coverage measurements non-deterministic.

The test spawns a signaler thread that rapidly sends SIGUSR1 to the
calling thread while it performs landlock_restrict_self() with
LANDLOCK_RESTRICT_SELF_TSYNC.  Since ERESTARTNOINTR causes a
transparent restart, userspace always sees the syscall succeed.

This is a best-effort coverage test: the interruption path is exercised
when the signal lands during the preparation wait, which depends on
thread scheduling.  The test creates enough idle sibling threads (200)
to ensure multiple serialized waves of credential preparation even on
machines with many cores (e.g., 64), widening the window for the
signaler.  Deterministic coverage would require wrapping the wait call
with ALLOW_ERROR_INJECTION() and using CONFIG_FAIL_FUNCTION.

Test coverage for security/landlock was 90.2% of 2105 lines according to
LLVM 21, and it is now 91.1% of 2105 lines with this new test.

Cc: Günther Noack <gnoack@google.com>
Cc: Justin Suess <utilityemal77@gmail.com>
Cc: Tingmao Wang <m@maowtm.org>
Cc: Yihan Ding <dingyihan@uniontech.com>
Link: https://lore.kernel.org/r/20260310190416.1913908-1-mic@digikod.net
Signed-off-by: Mickaël Salaün <mic@digikod.net>
2026-03-19 20:57:39 +01:00
Paulo Alcantara
23b5df09c2 smb: client: fix generic/694 due to wrong ->i_blocks
When updating ->i_size, make sure to always update ->i_blocks as well
until we query new allocation size from the server.

generic/694 was failing because smb3_simple_falloc() was missing the
update of ->i_blocks after calling cifs_setsize().  So, fix this by
updating ->i_blocks directly in cifs_setsize(), so all places that
call it doesn't need to worry about updating ->i_blocks later.

Reported-by: Shyam Prasad N <sprasad@microsoft.com>
Closes: https://lore.kernel.org/r/CANT5p=rqgRwaADB=b_PhJkqXjtfq3SFv41SSTXSVEHnuh871pA@mail.gmail.com
Signed-off-by: Paulo Alcantara (Red Hat) <pc@manguebit.org>
Cc: David Howells <dhowells@redhat.com>
Cc: linux-cifs@vger.kernel.org
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-03-19 13:56:25 -05:00
Luca Leonardo Scorcia
8f9f64c8f9 pinctrl: mediatek: common: Fix probe failure for devices without EINT
Some pinctrl devices like mt6397 or mt6392 don't support EINT at all, but
the mtk_eint_init function is always called and returns -ENODEV, which
then bubbles up and causes probe failure.

To address this only call mtk_eint_init if EINT pins are present.

Tested on Xiaomi Mi Smart Clock x04g (mt6392).

Fixes: e46df235b4 ("pinctrl: mediatek: refactor EINT related code for all MediaTek pinctrl can fit")
Signed-off-by: Luca Leonardo Scorcia <l.scorcia@gmail.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Signed-off-by: Linus Walleij <linusw@kernel.org>
2026-03-19 19:52:22 +01:00
Luiz Augusto von Dentz
761fb8ec87 Bluetooth: L2CAP: Fix regressions caused by reusing ident
This attempt to fix regressions caused by reusing ident which apparently
is not handled well on certain stacks causing the stack to not respond to
requests, so instead of simple returning the first unallocated id this
stores the last used tx_ident and then attempt to use the next until all
available ids are exausted and then cycle starting over to 1.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=221120
Link: https://bugzilla.kernel.org/show_bug.cgi?id=221177
Fixes: 6c3ea155e5 ("Bluetooth: L2CAP: Fix not tracking outstanding TX ident")
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Tested-by: Christian Eggers <ceggers@arri.de>
2026-03-19 14:44:25 -04:00
Helen Koike
b6552e0503 Bluetooth: L2CAP: Fix null-ptr-deref on l2cap_sock_ready_cb
Before using sk pointer, check if it is null.

Fix the following:

 KASAN: null-ptr-deref in range [0x0000000000000260-0x0000000000000267]
 CPU: 0 UID: 0 PID: 5985 Comm: kworker/0:5 Not tainted 7.0.0-rc4-00029-ga989fde763f4 #1 PREEMPT(full)
 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.17.0-9.fc43 06/10/2025
 Workqueue: events l2cap_info_timeout
 RIP: 0010:kasan_byte_accessible+0x12/0x30
 Code: 79 ff ff ff 0f 1f 40 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 0f 1f 40 d6 48 c1 ef 03 48 b8 00 00 00 00 00 fc ff df <0f> b6 04 07 3c 08 0f 92 c0 c3 cc cce
 veth0_macvtap: entered promiscuous mode
 RSP: 0018:ffffc90006e0f808 EFLAGS: 00010202
 RAX: dffffc0000000000 RBX: ffffffff89746018 RCX: 0000000080000001
 RDX: 0000000000000000 RSI: ffffffff89746018 RDI: 000000000000004c
 RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000
 R10: dffffc0000000000 R11: ffffffff8aae3e70 R12: 0000000000000000
 R13: 0000000000000260 R14: 0000000000000260 R15: 0000000000000001
 FS:  0000000000000000(0000) GS:ffff8880983c2000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 00005582615a5008 CR3: 000000007007e000 CR4: 0000000000752ef0
 PKRU: 55555554
 Call Trace:
  <TASK>
  __kasan_check_byte+0x12/0x40
  lock_acquire+0x79/0x2e0
  lock_sock_nested+0x48/0x100
  ? l2cap_sock_ready_cb+0x46/0x160
  l2cap_sock_ready_cb+0x46/0x160
  l2cap_conn_start+0x779/0xff0
  ? __pfx_l2cap_conn_start+0x10/0x10
  ? l2cap_info_timeout+0x60/0xa0
  ? __pfx___mutex_lock+0x10/0x10
  l2cap_info_timeout+0x68/0xa0
  ? process_scheduled_works+0xa8d/0x18c0
  process_scheduled_works+0xb6e/0x18c0
  ? __pfx_process_scheduled_works+0x10/0x10
  ? assign_work+0x3d5/0x5e0
  worker_thread+0xa53/0xfc0
  kthread+0x388/0x470
  ? __pfx_worker_thread+0x10/0x10
  ? __pfx_kthread+0x10/0x10
  ret_from_fork+0x51e/0xb90
  ? __pfx_ret_from_fork+0x10/0x10
 veth1_macvtap: entered promiscuous mode
  ? __switch_to+0xc7d/0x1450
  ? __pfx_kthread+0x10/0x10
  ret_from_fork_asm+0x1a/0x30
  </TASK>
 Modules linked in:
 ---[ end trace 0000000000000000 ]---
 batman_adv: batadv0: Interface activated: batadv_slave_0
 batman_adv: batadv0: Interface activated: batadv_slave_1
 netdevsim netdevsim7 netdevsim0: set [1, 0] type 2 family 0 port 6081 - 0
 netdevsim netdevsim7 netdevsim1: set [1, 0] type 2 family 0 port 6081 - 0
 netdevsim netdevsim7 netdevsim2: set [1, 0] type 2 family 0 port 6081 - 0
 netdevsim netdevsim7 netdevsim3: set [1, 0] type 2 family 0 port 6081 - 0
 RIP: 0010:kasan_byte_accessible+0x12/0x30
 Code: 79 ff ff ff 0f 1f 40 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 0f 1f 40 d6 48 c1 ef 03 48 b8 00 00 00 00 00 fc ff df <0f> b6 04 07 3c 08 0f 92 c0 c3 cc cce
 ieee80211 phy39: Selected rate control algorithm 'minstrel_ht'
 RSP: 0018:ffffc90006e0f808 EFLAGS: 00010202
 RAX: dffffc0000000000 RBX: ffffffff89746018 RCX: 0000000080000001
 RDX: 0000000000000000 RSI: ffffffff89746018 RDI: 000000000000004c
 RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000
 R10: dffffc0000000000 R11: ffffffff8aae3e70 R12: 0000000000000000
 R13: 0000000000000260 R14: 0000000000000260 R15: 0000000000000001
 FS:  0000000000000000(0000) GS:ffff8880983c2000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 00007f7e16139e9c CR3: 000000000e74e000 CR4: 0000000000752ef0
 PKRU: 55555554
 Kernel panic - not syncing: Fatal exception

Fixes: 54a59aa2b5 ("Bluetooth: Add l2cap_chan->ops->ready()")
Signed-off-by: Helen Koike <koike@igalia.com>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-03-19 14:44:04 -04:00
Anas Iqbal
31148a7be7 Bluetooth: hci_ll: Fix firmware leak on error path
Smatch reports:

drivers/bluetooth/hci_ll.c:587 download_firmware() warn:
'fw' from request_firmware() not released on lines: 544.

In download_firmware(), if request_firmware() succeeds but the returned
firmware content is invalid (no data or zero size), the function returns
without releasing the firmware, resulting in a resource leak.

Fix this by calling release_firmware() before returning when
request_firmware() succeeded but the firmware content is invalid.

Fixes: 371805522f ("bluetooth: hci_uart: add LL protocol serdev driver support")
Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Anas Iqbal <mohd.abd.6602@gmail.com>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-03-19 14:43:43 -04:00
Cen Zhang
b6807cfc19 Bluetooth: hci_sync: annotate data-races around hdev->req_status
__hci_cmd_sync_sk() sets hdev->req_status under hdev->req_lock:

    hdev->req_status = HCI_REQ_PEND;

However, several other functions read or write hdev->req_status without
holding any lock:

  - hci_send_cmd_sync() reads req_status in hci_cmd_work (workqueue)
  - hci_cmd_sync_complete() reads/writes from HCI event completion
  - hci_cmd_sync_cancel() / hci_cmd_sync_cancel_sync() read/write
  - hci_abort_conn() reads in connection abort path

Since __hci_cmd_sync_sk() runs on hdev->req_workqueue while
hci_send_cmd_sync() runs on hdev->workqueue, these are different
workqueues that can execute concurrently on different CPUs. The plain
C accesses constitute a data race.

Add READ_ONCE()/WRITE_ONCE() annotations on all concurrent accesses
to hdev->req_status to prevent potential compiler optimizations that
could affect correctness (e.g., load fusing in the wait_event
condition or store reordering).

Signed-off-by: Cen Zhang <zzzccc427@gmail.com>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-03-19 14:43:20 -04:00
Luiz Augusto von Dentz
5f5fa4cd35 Bluetooth: MGMT: Fix dangling pointer on mgmt_add_adv_patterns_monitor_complete
This fixes the condition checking so mgmt_pending_valid is executed
whenever status != -ECANCELED otherwise calling mgmt_pending_free(cmd)
would kfree(cmd) without unlinking it from the list first, leaving a
dangling pointer. Any subsequent list traversal (e.g.,
mgmt_pending_foreach during __mgmt_power_off, or another
mgmt_pending_valid call) would dereference freed memory.

Link: https://lore.kernel.org/linux-bluetooth/20260315132013.75ab40c5@kernel.org/T/#m1418f9c82eeff8510c1beaa21cf53af20db96c06
Fixes: 302a1f674c ("Bluetooth: MGMT: Fix possible UAFs")
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
2026-03-19 14:42:57 -04:00
Hyunwoo Kim
598dbba991 Bluetooth: SCO: Fix use-after-free in sco_recv_frame() due to missing sock_hold
sco_recv_frame() reads conn->sk under sco_conn_lock() but immediately
releases the lock without holding a reference to the socket. A concurrent
close() can free the socket between the lock release and the subsequent
sk->sk_state access, resulting in a use-after-free.

Other functions in the same file (sco_sock_timeout(), sco_conn_del())
correctly use sco_sock_hold() to safely hold a reference under the lock.

Fix by using sco_sock_hold() to take a reference before releasing the
lock, and adding sock_put() on all exit paths.

Fixes: 1da177e4c3 ("Linux-2.6.12-rc2")
Signed-off-by: Hyunwoo Kim <imv4bel@gmail.com>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-03-19 14:42:35 -04:00
Hyunwoo Kim
c65bd945d1 Bluetooth: L2CAP: Validate PDU length before reading SDU length in l2cap_ecred_data_rcv()
l2cap_ecred_data_rcv() reads the SDU length field from skb->data using
get_unaligned_le16() without first verifying that skb contains at least
L2CAP_SDULEN_SIZE (2) bytes. When skb->len is less than 2, this reads
past the valid data in the skb.

The ERTM reassembly path correctly calls pskb_may_pull() before reading
the SDU length (l2cap_reassemble_sdu, L2CAP_SAR_START case). Apply the
same validation to the Enhanced Credit Based Flow Control data path.

Fixes: aac23bf636 ("Bluetooth: Implement LE L2CAP reassembly")
Signed-off-by: Hyunwoo Kim <imv4bel@gmail.com>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-03-19 14:42:12 -04:00
Minseo Park
9d87cb2219 Bluetooth: L2CAP: Fix stack-out-of-bounds read in l2cap_ecred_conn_req
Syzbot reported a KASAN stack-out-of-bounds read in l2cap_build_cmd()
that is triggered by a malformed Enhanced Credit Based Connection Request.

The vulnerability stems from l2cap_ecred_conn_req(). The function allocates
a local stack buffer (`pdu`) designed to hold a maximum of 5 Source Channel
IDs (SCIDs), totaling 18 bytes. When an attacker sends a request with more
than 5 SCIDs, the function calculates `rsp_len` based on this unvalidated
`cmd_len` before checking if the number of SCIDs exceeds
L2CAP_ECRED_MAX_CID.

If the SCID count is too high, the function correctly jumps to the
`response` label to reject the packet, but `rsp_len` retains the
attacker's oversized value. Consequently, l2cap_send_cmd() is instructed
to read past the end of the 18-byte `pdu` buffer, triggering a
KASAN panic.

Fix this by moving the assignment of `rsp_len` to after the `num_scid`
boundary check. If the packet is rejected, `rsp_len` will safely
remain 0, and the error response will only read the 8-byte base header
from the stack.

Fixes: c28d2bff70 ("Bluetooth: L2CAP: Fix result of L2CAP_ECRED_CONN_RSP when MTU is too short")
Reported-by: syzbot+b7f3e7d9a596bf6a63e3@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=b7f3e7d9a596bf6a63e3
Tested-by: syzbot+b7f3e7d9a596bf6a63e3@syzkaller.appspotmail.com
Signed-off-by: Minseo Park <jacob.park.9436@gmail.com>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-03-19 14:38:07 -04:00
Linus Torvalds
a1d9d8e833 Merge tag 'net-7.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Jakub Kicinski:
 "Including fixes from wireless, Bluetooth and netfilter.

  Nothing too exciting here, mostly fixes for corner cases.

  Current release - fix to a fix:

   - bonding: prevent potential infinite loop in bond_header_parse()

  Current release - new code bugs:

   - wifi: mac80211: check tdls flag in ieee80211_tdls_oper

  Previous releases - regressions:

   - af_unix: give up GC if MSG_PEEK intervened

   - netfilter: conntrack: add missing netlink policy validations

   - NFC: nxp-nci: allow GPIOs to sleep"

* tag 'net-7.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (78 commits)
  MPTCP: fix lock class name family in pm_nl_create_listen_socket
  icmp: fix NULL pointer dereference in icmp_tag_validation()
  net: dsa: bcm_sf2: fix missing clk_disable_unprepare() in error paths
  net: shaper: protect from late creation of hierarchy
  net: shaper: protect late read accesses to the hierarchy
  net: mvpp2: guard flow control update with global_tx_fc in buffer switching
  nfnetlink_osf: validate individual option lengths in fingerprints
  netfilter: nf_tables: release flowtable after rcu grace period on error
  netfilter: bpf: defer hook memory release until rcu readers are done
  net: bonding: fix NULL deref in bond_debug_rlb_hash_show
  udp_tunnel: fix NULL deref caused by udp_sock_create6 when CONFIG_IPV6=n
  net/mlx5e: Fix race condition during IPSec ESN update
  net/mlx5e: Prevent concurrent access to IPSec ASO context
  net/mlx5: qos: Restrict RTNL area to avoid a lock cycle
  ipv6: add NULL checks for idev in SRv6 paths
  NFC: nxp-nci: allow GPIOs to sleep
  net: macb: fix uninitialized rx_fs_lock
  net: macb: fix use-after-free access to PTP clock
  netdevsim: drop PSP ext ref on forward failure
  wifi: mac80211: always free skb on ieee80211_tx_prepare_skb() failure
  ...
2026-03-19 11:25:40 -07:00
Sanjay Yadav
65d046b2d8 drm/xe: Fix missing runtime PM reference in ccs_mode_store
ccs_mode_store() calls xe_gt_reset() which internally invokes
xe_pm_runtime_get_noresume(). That function requires the caller
to already hold an outer runtime PM reference and warns if none
is held:

  [46.891177] xe 0000:03:00.0: [drm] Missing outer runtime PM protection
  [46.891178] WARNING: drivers/gpu/drm/xe/xe_pm.c:885 at
  xe_pm_runtime_get_noresume+0x8b/0xc0

Fix this by protecting xe_gt_reset() with the scope-based
guard(xe_pm_runtime)(xe), which is the preferred form when
the reference lifetime matches a single scope.

v2:
- Use scope-based guard(xe_pm_runtime)(xe) (Shuicheng)
- Update commit message accordingly

Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/7593
Fixes: 480b358e7d ("drm/xe: Do not wake device during a GT reset")
Cc: <stable@vger.kernel.org> # v6.19+
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Shuicheng Lin <shuicheng.lin@intel.com>
Suggested-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Sanjay Yadav <sanjay.kumar.yadav@intel.com>
Reviewed-by: Shuicheng Lin <shuicheng.lin@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patch.msgid.link/20260313071608.3459480-2-sanjay.kumar.yadav@intel.com
(cherry picked from commit 7937ea733f79b3f25e802a0c8360bf7423856f36)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
2026-03-19 18:05:04 +01:00
Li Xiasong
7ab4a7c5d9 MPTCP: fix lock class name family in pm_nl_create_listen_socket
In mptcp_pm_nl_create_listen_socket(), use entry->addr.family
instead of sk->sk_family for lock class setup. The 'sk' parameter
is a netlink socket, not the MPTCP subflow socket being created.

Fixes: cee4034a3d ("mptcp: fix lockdep false positive in mptcp_pm_nl_create_listen_socket()")
Signed-off-by: Li Xiasong <lixiasong1@huawei.com>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20260319112159.3118874-1-lixiasong1@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-19 09:37:48 -07:00
Weiming Shi
614aefe56a icmp: fix NULL pointer dereference in icmp_tag_validation()
icmp_tag_validation() unconditionally dereferences the result of
rcu_dereference(inet_protos[proto]) without checking for NULL.
The inet_protos[] array is sparse -- only about 15 of 256 protocol
numbers have registered handlers. When ip_no_pmtu_disc is set to 3
(hardened PMTU mode) and the kernel receives an ICMP Fragmentation
Needed error with a quoted inner IP header containing an unregistered
protocol number, the NULL dereference causes a kernel panic in
softirq context.

 Oops: general protection fault, probably for non-canonical address 0xdffffc0000000002: 0000 [#1] SMP KASAN NOPTI
 KASAN: null-ptr-deref in range [0x0000000000000010-0x0000000000000017]
 RIP: 0010:icmp_unreach (net/ipv4/icmp.c:1085 net/ipv4/icmp.c:1143)
 Call Trace:
  <IRQ>
  icmp_rcv (net/ipv4/icmp.c:1527)
  ip_protocol_deliver_rcu (net/ipv4/ip_input.c:207)
  ip_local_deliver_finish (net/ipv4/ip_input.c:242)
  ip_local_deliver (net/ipv4/ip_input.c:262)
  ip_rcv (net/ipv4/ip_input.c:573)
  __netif_receive_skb_one_core (net/core/dev.c:6164)
  process_backlog (net/core/dev.c:6628)
  handle_softirqs (kernel/softirq.c:561)
  </IRQ>

Add a NULL check before accessing icmp_strict_tag_validation. If the
protocol has no registered handler, return false since it cannot
perform strict tag validation.

Fixes: 8ed1dc44d3 ("ipv4: introduce hardened ip_no_pmtu_disc mode")
Reported-by: Xiang Mei <xmei5@asu.edu>
Signed-off-by: Weiming Shi <bestswngs@gmail.com>
Link: https://patch.msgid.link/20260318130558.1050247-4-bestswngs@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-19 09:27:36 -07:00
Anas Iqbal
b487318496 net: dsa: bcm_sf2: fix missing clk_disable_unprepare() in error paths
Smatch reports:
drivers/net/dsa/bcm_sf2.c:997 bcm_sf2_sw_resume() warn:
'priv->clk' from clk_prepare_enable() not released on lines: 983,990.

The clock enabled by clk_prepare_enable() in bcm_sf2_sw_resume()
is not released if bcm_sf2_sw_rst() or bcm_sf2_cfp_resume() fails.

Add the missing clk_disable_unprepare() calls in the error paths
to properly release the clock resource.

Fixes: e9ec5c3bd2 ("net: dsa: bcm_sf2: request and handle clocks")
Reviewed-by: Jonas Gorski <jonas.gorski@gmail.com>
Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
Signed-off-by: Anas Iqbal <mohd.abd.6602@gmail.com>
Link: https://patch.msgid.link/20260318084212.1287-1-mohd.abd.6602@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-19 09:26:40 -07:00
Marc Kleine-Budde
cce598ffc6 Merge patch series "can: fix can-gw Out-of-Bounds Heap R/W and isotp UAF"
Marc Kleine-Budde <mkl@pengutronix.de> says:

This series is by Ali Norouzi and Oliver Hartkopp fixing a can-gw
Out-of-Bounds Heap R/W and can-isotp UAF.

Link: https://patch.msgid.link/20260319-fix-can-gw-and-can-isotp-v2-0-c45d52c6d2d8@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
2026-03-19 17:16:03 +01:00
Oliver Hartkopp
424e95d621 can: isotp: fix tx.buf use-after-free in isotp_sendmsg()
isotp_sendmsg() uses only cmpxchg() on so->tx.state to serialize access
to so->tx.buf. isotp_release() waits for ISOTP_IDLE via
wait_event_interruptible() and then calls kfree(so->tx.buf).

If a signal interrupts the wait_event_interruptible() inside close()
while tx.state is ISOTP_SENDING, the loop exits early and release
proceeds to force ISOTP_SHUTDOWN and continues to kfree(so->tx.buf)
while sendmsg may still be reading so->tx.buf for the final CAN frame
in isotp_fill_dataframe().

The so->tx.buf can be allocated once when the standard tx.buf length needs
to be extended. Move the kfree() of this potentially extended tx.buf to
sk_destruct time when either isotp_sendmsg() and isotp_release() are done.

Fixes: 96d1c81e6a ("can: isotp: add module parameter for maximum pdu size")
Cc: stable@vger.kernel.org
Reported-by: Ali Norouzi <ali.norouzi@keysight.com>
Co-developed-by: Ali Norouzi <ali.norouzi@keysight.com>
Signed-off-by: Ali Norouzi <ali.norouzi@keysight.com>
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Link: https://patch.msgid.link/20260319-fix-can-gw-and-can-isotp-v2-2-c45d52c6d2d8@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
2026-03-19 17:16:02 +01:00
Ali Norouzi
b9c310d727 can: gw: fix OOB heap access in cgw_csum_crc8_rel()
cgw_csum_crc8_rel() correctly computes bounds-safe indices via calc_idx():

    int from = calc_idx(crc8->from_idx, cf->len);
    int to   = calc_idx(crc8->to_idx,   cf->len);
    int res  = calc_idx(crc8->result_idx, cf->len);

    if (from < 0 || to < 0 || res < 0)
        return;

However, the loop and the result write then use the raw s8 fields directly
instead of the computed variables:

    for (i = crc8->from_idx; ...)        /* BUG: raw negative index */
    cf->data[crc8->result_idx] = ...;    /* BUG: raw negative index */

With from_idx = to_idx = result_idx = -64 on a 64-byte CAN FD frame,
calc_idx(-64, 64) = 0 so the guard passes, but the loop iterates with
i = -64, reading cf->data[-64], and the write goes to cf->data[-64].
This write might end up to 56 (7.0-rc) or 40 (<= 6.19) bytes before the
start of the canfd_frame on the heap.

The companion function cgw_csum_xor_rel() uses `from`/`to`/`res`
correctly throughout; fix cgw_csum_crc8_rel() to match.

Confirmed with KASAN on linux-7.0-rc2:
  BUG: KASAN: slab-out-of-bounds in cgw_csum_crc8_rel+0x515/0x5b0
  Read of size 1 at addr ffff8880076619c8 by task poc_cgw_oob/62

To configure the can-gw crc8 checksums CAP_NET_ADMIN is needed.

Fixes: 456a8a646b ("can: gw: add support for CAN FD frames")
Cc: stable@vger.kernel.org
Reported-by: Ali Norouzi <ali.norouzi@keysight.com>
Reviewed-by: Oliver Hartkopp <socketcan@hartkopp.net>
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Ali Norouzi <ali.norouzi@keysight.com>
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Link: https://patch.msgid.link/20260319-fix-can-gw-and-can-isotp-v2-1-c45d52c6d2d8@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
2026-03-19 17:16:02 +01:00
Matthew Brost
01f2557aa6 drm/xe: Open-code GGTT MMIO access protection
GGTT MMIO access is currently protected by hotplug (drm_dev_enter),
which works correctly when the driver loads successfully and is later
unbound or unloaded. However, if driver load fails, this protection is
insufficient because drm_dev_unplug() is never called.

Additionally, devm release functions cannot guarantee that all BOs with
GGTT mappings are destroyed before the GGTT MMIO region is removed, as
some BOs may be freed asynchronously by worker threads.

To address this, introduce an open-coded flag, protected by the GGTT
lock, that guards GGTT MMIO access. The flag is cleared during the
dev_fini_ggtt devm release function to ensure MMIO access is disabled
once teardown begins.

Cc: stable@vger.kernel.org
Fixes: 919bb54e98 ("drm/xe: Fix missing runtime outer protection for ggtt_remove_node")
Reviewed-by: Zhanjun Dong <zhanjun.dong@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patch.msgid.link/20260310225039.1320161-8-zhanjun.dong@intel.com
(cherry picked from commit 4f3a998a173b4325c2efd90bdadc6ccd3ad9a431)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
2026-03-19 17:13:54 +01:00
Linus Torvalds
e9825d1c79 Merge tag 'pm-7.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull power management fixes from Rafael Wysocki:
 "These fix an idle loop issue exposed by recent changes and a race
  condition related to device removal in the runtime PM core code:

   - Consolidate the handling of two special cases in the idle loop that
     occur when only one CPU idle state is present (Rafael Wysocki)

   - Fix a race condition related to device removal in the runtime PM
     core code that may cause a stale device object pointer to be
     dereferenced (Bart Van Assche)"

* tag 'pm-7.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
  PM: runtime: Fix a race condition related to device removal
  sched: idle: Consolidate the handling of two special cases
2026-03-19 08:45:34 -07:00
Amelie Delaunay
c8cfeb4b9d pinctrl: stm32: fix HDP driver dependency on GPIO_GENERIC
The HDP driver uses the generic GPIO chip API, but this configuration
may not be enabled.
Ensure it is enabled by selecting the appropriate option.

Fixes: 4bcff9c05b ("pinctrl: stm32: use new generic GPIO chip API")
Signed-off-by: Amelie Delaunay <amelie.delaunay@foss.st.com>
Signed-off-by: Linus Walleij <linusw@kernel.org>
2026-03-19 16:44:45 +01:00
Linus Torvalds
d107dc8c9c Merge tag 'acpi-7.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull ACPI support fixes from Rafael Wysocki:
 "These fix an MFD child automatic modprobe issue introduced recently,
  an ACPI processor driver issue introduced by a previous fix and an
  ACPICA issue causing confusing messages regarding _DSM arguments to be
  printed:

   - Update the format of the last argument of _DSM to avoid printing
     confusing error messages in some cases (Saket Dumbre)

   - Fix MFD child automatic modprobe issue by removing a stale check
     from acpi_companion_match() (Pratap Nirujogi)

   - Prevent possible use-after-free in acpi_processor_errata_piix4()
     from occurring by rearranging the code to print debug messages
     while holding references to relevant device objects (Rafael
     Wysocki)"

* tag 'acpi-7.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
  ACPI: bus: Fix MFD child automatic modprobe issue
  ACPI: processor: Fix previous acpi_processor_errata_piix4() fix
  ACPICA: Update the format of Arg3 of _DSM
2026-03-19 08:42:59 -07:00
Paolo Abeni
e7577a06ae Merge tag 'nf-26-03-19' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf
Florian Westphal says:

====================
netfilter: updates for net

The following patchset contains Netfilter fixes for *net*:

1) Fix UaF when netfilter bpf link goes away while nfnetlink dumps
   current hook list, we have to wait until rcu readers are gone.

2) Fix UaF when flowtable fails to register all devices, similar
   bug as 1). From Pablo Neira Ayuso.

3) nfnetlink_osf fails to properly validate option length fields.
   From Weiming Shi.

netfilter pull request nf-26-03-19

* tag 'nf-26-03-19' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf:
  nfnetlink_osf: validate individual option lengths in fingerprints
  netfilter: nf_tables: release flowtable after rcu grace period on error
  netfilter: bpf: defer hook memory release until rcu readers are done
====================

Link: https://patch.msgid.link/20260319093834.19933-1-fw@strlen.de
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-19 15:39:33 +01:00
Rafael J. Wysocki
5cbcd6c074 Merge branches 'acpica' and 'acpi-bus'
Merge an ACPICA fix and a core ACPI support code fix for 7.0-rc5:

 - Update the format of the last argument of _DSM to avoid printing
   confusing error messages in some cases (Saket Dumbre)

 - Fix MFD child automatic modprobe issue by removing a stale check
   from acpi_companion_match() (Pratap Nirujogi)

* acpica:
  ACPICA: Update the format of Arg3 of _DSM

* acpi-bus:
  ACPI: bus: Fix MFD child automatic modprobe issue
2026-03-19 14:57:06 +01:00
Oliver Hartkopp
46eee1661a can: statistics: add missing atomic access in hot path
Commit 80b5f90158 ("can: statistics: use atomic access in hot path")
fixed a KCSAN issue in can_receive() but missed to convert the 'matches'
variable used in can_rcv_filter().

Fixes: 80b5f90158 ("can: statistics: use atomic access in hot path")
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Link: https://patch.msgid.link/20260318173413.28235-1-socketcan@hartkopp.net
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
2026-03-19 14:53:19 +01:00
Rafael J. Wysocki
9633370653 Merge branch 'pm-runtime'
Merge a fix for a race condition related to device removal (Bart Van
Assche) for 7.0-rc5.

* pm-runtime:
  PM: runtime: Fix a race condition related to device removal
2026-03-19 14:49:44 +01:00
Wenyuan Li
7a57354756 can: mcp251x: add error handling for power enable in open and resume
Add missing error handling for mcp251x_power_enable() calls in both
mcp251x_open() and mcp251x_can_resume() functions.

In mcp251x_open(), if power enable fails, jump to error path to close
candev without attempting to disable power again.

In mcp251x_can_resume(), properly check return values of power enable calls
for both power and transceiver regulators. If any fails, return the error
code to the PM framework and log the failure.

This ensures the driver properly handles power control failures and
maintains correct device state.

Signed-off-by: Wenyuan Li <2063309626@qq.com>
Link: https://patch.msgid.link/tencent_F3EFC5D7738AC548857B91657715E2D3AA06@qq.com
[mkl: fix patch description]
[mkl: mcp251x_can_resume(): replace goto by return]
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
2026-03-19 14:25:04 +01:00
Marc Kleine-Budde
cadf601923 can: netlink: can_changelink(): add missing error handling to call can_ctrlmode_changelink()
In commit e1a5cd9d66 ("can: netlink: add can_ctrlmode_changelink()") the
CAN Control Mode (IFLA_CAN_CTRLMODE) handling was factored out into the
can_ctrlmode_changelink() function. But the call to
can_ctrlmode_changelink() is missing the error handling.

Add the missing error handling and propagation to the call
can_ctrlmode_changelink().

Cc: stable@vger.kernel.org
Fixes: e1a5cd9d66 ("can: netlink: add can_ctrlmode_changelink()")
Link: https://patch.msgid.link/20260310-can_ctrlmode_changelink-add-error-handling-v1-1-0daf63d85922@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
2026-03-19 14:25:04 +01:00
Umesh Nerlige Ramappa
e6e3ea52bf drm/xe/lrc: Fix uninitialized new_ts when capturing context timestamp
Getting engine specific CTX TIMESTAMP register can fail. In that case,
if the context is active, new_ts is uninitialized. Fix that case by
initializing new_ts to the last value that was sampled in SW -
lrc->ctx_timestamp.

Flagged by static analysis.

v2: Fix new_ts initialization (Ashutosh)

Fixes: bb63e7257e ("drm/xe: Avoid toggling schedule state to check LRC timestamp in TDR")
Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Link: https://patch.msgid.link/20260312125308.3126607-2-umesh.nerlige.ramappa@intel.com
(cherry picked from commit 466e75d48038af252187855058a7a9312db9d2f8)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
2026-03-19 14:23:04 +01:00
Ashutosh Dixit
9be6fd9fbd drm/xe/oa: Allow reading after disabling OA stream
Some OA data might be present in the OA buffer when OA stream is
disabled. Allow UMD's to retrieve this data, so that all data till the
point when OA stream is disabled can be retrieved.

v2: Update tail pointer after disable (Umesh)

Fixes: efb315d0a0 ("drm/xe/oa/uapi: Read file_operation")
Cc: stable@vger.kernel.org
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Reviewed-by: Umesh Nerlige Ramappa<umesh.nerlige.ramappa@intel.com>
Link: https://patch.msgid.link/20260313053630.3176100-1-ashutosh.dixit@intel.com
(cherry picked from commit 4ff57c5e8dbba23b5457be12f9709d5c016da16e)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
2026-03-19 14:22:58 +01:00
Brian Nguyen
38b8dcde23 drm/xe: Skip over non leaf pte for PRL generation
The check using xe_child->base.children was insufficient in determining
if a pte was a leaf node. So explicitly skip over every non-leaf pt and
conditionally abort if there is a scenario where a non-leaf pt is
interleaved between leaf pt, which results in the page walker skipping
over some leaf pt.

Note that the behavior being targeted for abort is
PD[0] = 2M PTE
PD[1] = PT -> 512 4K PTEs
PD[2] = 2M PTE

results in abort, page walker won't descend PD[1].

With new abort, ensuring valid PRL before handling a second abort.

v2:
 - Revert to previous assert.
 - Revised non-leaf handling for interleaf child pt and leaf pte.
 - Update comments to specifications. (Stuart)
 - Remove unnecessary XE_PTE_PS64. (Matthew B)

v3:
 - Modify secondary abort to only check non-leaf PTEs. (Matthew B)

Fixes: b912138df2 ("drm/xe: Create page reclaim list on unbind")
Signed-off-by: Brian Nguyen <brian3.nguyen@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Cc: Stuart Summers <stuart.summers@intel.com>
Link: https://patch.msgid.link/20260305171546.67691-6-brian3.nguyen@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
(cherry picked from commit 1d123587525db86cc8f0d2beb35d9e33ca3ade83)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
2026-03-19 14:22:53 +01:00
Zhanjun Dong
7838dd8367 drm/xe/guc: Ensure CT state transitions via STOP before DISABLED
The GuC CT state transition requires moving to the STOP state before
entering the DISABLED state. Update the driver teardown sequence to make
the proper state machine transitions.

Fixes: ee4b32220a ("drm/xe/guc: Add devm release action to safely tear down CT")
Cc: stable@vger.kernel.org
Signed-off-by: Zhanjun Dong <zhanjun.dong@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patch.msgid.link/20260310225039.1320161-6-zhanjun.dong@intel.com
(cherry picked from commit dace8cb0032f57ea67c87b3b92ad73c89dd2db44)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
2026-03-19 14:22:39 +01:00
Matthew Brost
e0f82655df drm/xe: Trigger queue cleanup if not in wedged mode 2
The intent of wedging a device is to allow queues to continue running
only in wedged mode 2. In other modes, queues should initiate cleanup
and signal all remaining fences. Fix xe_guc_submit_wedge to correctly
clean up queues when wedge mode != 2.

Fixes: 7dbe8af13c ("drm/xe: Wedge the entire device")
Cc: stable@vger.kernel.org
Reviewed-by: Zhanjun Dong <zhanjun.dong@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patch.msgid.link/20260310225039.1320161-4-zhanjun.dong@intel.com
(cherry picked from commit e25ba41c8227c5393c16e4aab398076014bd345f)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
2026-03-19 14:22:33 +01:00
Matthew Brost
fb3738693c drm/xe: Forcefully tear down exec queues in GuC submit fini
In GuC submit fini, forcefully tear down any exec queues by disabling
CTs, stopping the scheduler (which cleans up lost G2H), killing all
remaining queues, and resuming scheduling to allow any remaining cleanup
actions to complete and signal any remaining fences.

Split guc_submit_fini into device related and software only part. Using
device-managed and drm-managed action guarantees the correct ordering of
cleanup.

Fixes: dd08ebf6c3 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Cc: stable@vger.kernel.org
Reviewed-by: Zhanjun Dong <zhanjun.dong@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patch.msgid.link/20260310225039.1320161-3-zhanjun.dong@intel.com
(cherry picked from commit a6ab444a111a59924bd9d0c1e0613a75a0a40b89)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
2026-03-19 14:22:28 +01:00
Matthew Brost
26c638d560 drm/xe: Always kill exec queues in xe_guc_submit_pause_abort
xe_guc_submit_pause_abort is intended to be called after something
disastrous occurs (e.g., VF migration fails, device wedging, or driver
unload) and should immediately trigger the teardown of remaining
submission state. With that, kill any remaining queues in this function.

Fixes: 7c4b7e34c8 ("drm/xe/vf: Abort VF post migration recovery on failure")
Cc: stable@vger.kernel.org
Signed-off-by: Zhanjun Dong <zhanjun.dong@intel.com>
Reviewed-by: Stuart Summers <stuart.summers@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patch.msgid.link/20260310225039.1320161-2-zhanjun.dong@intel.com
(cherry picked from commit 78f3bf00be4f15daead02ba32d4737129419c902)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
2026-03-19 14:22:22 +01:00
Daniele Ceraolo Spurio
9b72283ec9 drm/xe/guc: Fail immediately on GuC load error
By using the same variable for both the return of poll_timeout_us and
the return of the polled function guc_wait_ucode, the return value of
the latter is overwritten and lost after exiting the polling loop. Since
guc_wait_ucode returns -1 on GuC load failure, we lose that information
and always continue as if the GuC had been loaded correctly.

This is fixed by simply using 2 separate variables.

Fixes: a4916b4da4 ("drm/xe/guc: Refactor GuC load to use poll_timeout_us()")
Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Link: https://patch.msgid.link/20260303001732.2540493-2-daniele.ceraolospurio@intel.com
(cherry picked from commit c85ec5c5753a46b5c2aea1292536487be9470ffe)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
2026-03-19 14:22:17 +01:00
Baojun Xu
a437601a0a ASoC: tas2781: Add null check for calibration data
For avoid null pointer problem if no calibration data exist.

Fixes: 55137f5a68 ("ASoC: tas2781: Put three different calibrated data solution into the same data structure")

Signed-off-by: Baojun Xu <baojun.xu@ti.com>
Link: https://patch.msgid.link/20260319090747.2090-1-baojun.xu@ti.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-19 12:52:55 +00:00
Jakub Kicinski
d75ec7e8ba net: shaper: protect from late creation of hierarchy
We look up a netdev during prep of Netlink ops (pre- callbacks)
and take a ref to it. Then later in the body of the callback
we take its lock or RCU which are the actual protections.

The netdev may get unregistered in between the time we take
the ref and the time we lock it. We may allocate the hierarchy
after flush has already run, which would lead to a leak.

Take the instance lock in pre- already, this saves us from the race
and removes the need for dedicated lock/unlock callbacks completely.
After all, if there's any chance of write happening concurrently
with the flush - we're back to leaking the hierarchy.

We may take the lock for devices which don't support shapers but
we're only dealing with SET operations here, not taking the lock
would be optimizing for an error case.

Fixes: 93954b40f6 ("net-shapers: implement NL set and delete operations")
Link: https://lore.kernel.org/20260309173450.538026-1-p@1g4.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Link: https://patch.msgid.link/20260317161014.779569-2-kuba@kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-19 13:47:15 +01:00
Jakub Kicinski
0f9ea7141f net: shaper: protect late read accesses to the hierarchy
We look up a netdev during prep of Netlink ops (pre- callbacks)
and take a ref to it. Then later in the body of the callback
we take its lock or RCU which are the actual protections.

This is not proper, a conversion from a ref to a locked netdev
must include a liveness check (a check if the netdev hasn't been
unregistered already). Fix the read cases (those under RCU).
Writes needs a separate change to protect from creating the
hierarchy after flush has already run.

Fixes: 4b623f9f0f ("net-shapers: implement NL get operation")
Reported-by: Paul Moses <p@1g4.org>
Link: https://lore.kernel.org/20260309173450.538026-1-p@1g4.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Link: https://patch.msgid.link/20260317161014.779569-1-kuba@kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-19 13:47:15 +01:00
Suzuki K Poulose
8c6e9b60f5 arm64: realm: Fix PTE_NS_SHARED for 52bit PA support
With LPA/LPA2, the top bits of the PFN (Bits[51:48]) end up in the lower bits
of the PTE. So, simply creating a mask of the "top IPA bit" doesn't work well
for these configurations to set the "top" bit at the output of Stage1
translation.

Fix this by using the __phys_to_pte_val() to do the right thing for all
configurations.

Tested using, kvmtool, placing the memory at a higher address (-m <size>@<Addr>).

 e.g:
 # lkvm run --realm -c 4 -m 512M@@128T -k Image --console serial

 sh-5.0# dmesg | grep "LPA2\|RSI"
[    0.000000] RME: Using RSI version 1.0
[    0.000000] CPU features: detected: 52-bit Virtual Addressing (LPA2)
[    0.777354] CPU features: detected: 52-bit Virtual Addressing for KVM (LPA2)

Fixes: 3993069549 ("arm64: realm: Query IPA size from the RMM")
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Steven Price <steven.price@arm.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will@kernel.org>
2026-03-19 12:46:05 +00:00
David Howells
7e57523490 netfs: Fix read abandonment during retry
Under certain circumstances, all the remaining subrequests from a read
request will get abandoned during retry.  The abandonment process expects
the 'subreq' variable to be set to the place to start abandonment from, but
it doesn't always have a useful value (it will be uninitialised on the
first pass through the loop and it may point to a deleted subrequest on
later passes).

Fix the first jump to "abandon:" to set subreq to the start of the first
subrequest expected to need retry (which, in this abandonment case, turned
out unexpectedly to no longer have NEED_RETRY set).

Also clear the subreq pointer after discarding superfluous retryable
subrequests to cause an oops if we do try to access it.

Fixes: ee4cdf7ba8 ("netfs: Speed up buffered reading")
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://patch.msgid.link/3775287.1773848338@warthog.procyon.org.uk
Reviewed-by: Paulo Alcantara (Red Hat) <pc@manguebit.org>
cc: Paulo Alcantara <pc@manguebit.org>
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
2026-03-19 11:20:21 +01:00
Jori Koolstra
8fb6857f2f vfs: fix docstring of hash_name()
The docstring of hash_name() is falsely reporting that it returns the
component length, whereas it returns a pointer to the terminating '/'
or NUL character in the pathname being resolved.

Signed-off-by: Jori Koolstra <jkoolstra@xs4all.nl>
Link: https://patch.msgid.link/20260318203953.5770-1-jkoolstra@xs4all.nl
Signed-off-by: Christian Brauner <brauner@kernel.org>
2026-03-19 11:18:01 +01:00
Muhammad Hammad Ijaz
8a63baadf0 net: mvpp2: guard flow control update with global_tx_fc in buffer switching
mvpp2_bm_switch_buffers() unconditionally calls
mvpp2_bm_pool_update_priv_fc() when switching between per-cpu and
shared buffer pool modes. This function programs CM3 flow control
registers via mvpp2_cm3_read()/mvpp2_cm3_write(), which dereference
priv->cm3_base without any NULL check.

When the CM3 SRAM resource is not present in the device tree (the
third reg entry added by commit 60523583b0 ("dts: marvell: add CM3
SRAM memory to cp11x ethernet device tree")), priv->cm3_base remains
NULL and priv->global_tx_fc is false. Any operation that triggers
mvpp2_bm_switch_buffers(), for example an MTU change that crosses
the jumbo frame threshold, will crash:

  Unable to handle kernel NULL pointer dereference at
  virtual address 0000000000000000
  Mem abort info:
    ESR = 0x0000000096000006
    EC = 0x25: DABT (current EL), IL = 32 bits
  pc : readl+0x0/0x18
  lr : mvpp2_cm3_read.isra.0+0x14/0x20
  Call trace:
   readl+0x0/0x18
   mvpp2_bm_pool_update_fc+0x40/0x12c
   mvpp2_bm_pool_update_priv_fc+0x94/0xd8
   mvpp2_bm_switch_buffers.isra.0+0x80/0x1c0
   mvpp2_change_mtu+0x140/0x380
   __dev_set_mtu+0x1c/0x38
   dev_set_mtu_ext+0x78/0x118
   dev_set_mtu+0x48/0xa8
   dev_ifsioc+0x21c/0x43c
   dev_ioctl+0x2d8/0x42c
   sock_ioctl+0x314/0x378

Every other flow control call site in the driver already guards
hardware access with either priv->global_tx_fc or port->tx_fc.
mvpp2_bm_switch_buffers() is the only place that omits this check.

Add the missing priv->global_tx_fc guard to both the disable and
re-enable calls in mvpp2_bm_switch_buffers(), consistent with the
rest of the driver.

Fixes: 3a616b92a9 ("net: mvpp2: Add TX flow control support for jumbo frames")
Signed-off-by: Muhammad Hammad Ijaz <mhijaz@amazon.com>
Reviewed-by: Gunnar Kudrjavets <gunnarku@amazon.com>
Link: https://patch.msgid.link/20260316193157.65748-1-mhijaz@amazon.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-19 10:31:19 +01:00
Weiming Shi
dbdfaae960 nfnetlink_osf: validate individual option lengths in fingerprints
nfnl_osf_add_callback() validates opt_num bounds and string
NUL-termination but does not check individual option length fields.
A zero-length option causes nf_osf_match_one() to enter the option
matching loop even when foptsize sums to zero, which matches packets
with no TCP options where ctx->optp is NULL:

 Oops: general protection fault
 KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007]
 RIP: 0010:nf_osf_match_one (net/netfilter/nfnetlink_osf.c:98)
 Call Trace:
  nf_osf_match (net/netfilter/nfnetlink_osf.c:227)
  xt_osf_match_packet (net/netfilter/xt_osf.c:32)
  ipt_do_table (net/ipv4/netfilter/ip_tables.c:293)
  nf_hook_slow (net/netfilter/core.c:623)
  ip_local_deliver (net/ipv4/ip_input.c:262)
  ip_rcv (net/ipv4/ip_input.c:573)

Additionally, an MSS option (kind=2) with length < 4 causes
out-of-bounds reads when nf_osf_match_one() unconditionally accesses
optp[2] and optp[3] for MSS value extraction.  While RFC 9293
section 3.2 specifies that the MSS option is always exactly 4
bytes (Kind=2, Length=4), the check uses "< 4" rather than
"!= 4" because lengths greater than 4 do not cause memory
safety issues -- the buffer is guaranteed to be at least
foptsize bytes by the ctx->optsize == foptsize check.

Reject fingerprints where any option has zero length, or where an MSS
option has length less than 4, at add time rather than trusting these
values in the packet matching hot path.

Fixes: 11eeef41d5 ("netfilter: passive OS fingerprint xtables match")
Reported-by: Xiang Mei <xmei5@asu.edu>
Signed-off-by: Weiming Shi <bestswngs@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
2026-03-19 10:27:07 +01:00
Pablo Neira Ayuso
d73f4b53aa netfilter: nf_tables: release flowtable after rcu grace period on error
Call synchronize_rcu() after unregistering the hooks from error path,
since a hook that already refers to this flowtable can be already
registered, exposing this flowtable to packet path and nfnetlink_hook
control plane.

This error path is rare, it should only happen by reaching the maximum
number hooks or by failing to set up to hardware offload, just call
synchronize_rcu().

There is a check for already used device hooks by different flowtable
that could result in EEXIST at this late stage. The hook parser can be
updated to perform this check earlier to this error path really becomes
rarely exercised.

Uncovered by KASAN reported as use-after-free from nfnetlink_hook path
when dumping hooks.

Fixes: 3b49e2e94e ("netfilter: nf_tables: add flow table netlink frontend")
Reported-by: Yiming Qian <yimingqian591@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
2026-03-19 10:26:31 +01:00
Florian Westphal
24f90fa399 netfilter: bpf: defer hook memory release until rcu readers are done
Yiming Qian reports UaF when concurrent process is dumping hooks via
nfnetlink_hooks:

BUG: KASAN: slab-use-after-free in nfnl_hook_dump_one.isra.0+0xe71/0x10f0
Read of size 8 at addr ffff888003edbf88 by task poc/79
Call Trace:
 <TASK>
 nfnl_hook_dump_one.isra.0+0xe71/0x10f0
 netlink_dump+0x554/0x12b0
 nfnl_hook_get+0x176/0x230
 [..]

Defer release until after concurrent readers have completed.

Reported-by: Yiming Qian <yimingqian591@gmail.com>
Fixes: 84601d6ee6 ("bpf: add bpf_link support for BPF_NETFILTER programs")
Signed-off-by: Florian Westphal <fw@strlen.de>
2026-03-19 10:26:31 +01:00
Jakub Kicinski
7c46bd845d Merge tag 'wireless-2026-03-18' of https://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless
Johannes Berg says:

====================
Just a few updates:
 - cfg80211:
   - guarantee pmsr work is cancelled
 - mac80211:
   - reject TDLS operations on non-TDLS stations
   - fix crash in AP_VLAN bandwidth change
   - fix leak or double-free on some TX preparation
     failures
   - remove keys needed for beacons _after_ stopping
     those
   - fix debugfs static branch race
   - avoid underflow in inactive time
   - fix another NULL dereference in mesh on invalid
     frames
 - ti/wlcore: avoid infinite realloc loop

* tag 'wireless-2026-03-18' of https://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless:
  wifi: mac80211: always free skb on ieee80211_tx_prepare_skb() failure
  wifi: wlcore: Return -ENOMEM instead of -EAGAIN if there is not enough headroom
  wifi: mac80211: fix NULL deref in mesh_matches_local()
  wifi: mac80211: check tdls flag in ieee80211_tdls_oper
  wifi: cfg80211: cancel pmsr_free_wk in cfg80211_pmsr_wdev_down
  wifi: mac80211: Fix static_branch_dec() underflow for aql_disable.
  mac80211: fix crash in ieee80211_chan_bw_change for AP_VLAN stations
  wifi: mac80211: use jiffies_delta_to_msecs() for sta_info inactive times
  wifi: mac80211: remove keys after disabling beaconing
  wifi: mac80211_hwsim: fully initialise PMSR capabilities
====================

Link: https://patch.msgid.link/20260318172515.381148-3-johannes@sipsolutions.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-18 19:25:41 -07:00
Xiang Mei
605b52497b net: bonding: fix NULL deref in bond_debug_rlb_hash_show
rlb_clear_slave intentionally keeps RLB hash-table entries on
the rx_hashtbl_used_head list with slave set to NULL when no
replacement slave is available. However, bond_debug_rlb_hash_show
visites client_info->slave without checking if it's NULL.

Other used-list iterators in bond_alb.c already handle this NULL-slave
state safely:

- rlb_update_client returns early on !client_info->slave
- rlb_req_update_slave_clients, rlb_clear_slave, and rlb_rebalance
compare slave values before visiting
- lb_req_update_subnet_clients continues if slave is NULL

The following NULL deref crash can be trigger in
bond_debug_rlb_hash_show:

[    1.289791] BUG: kernel NULL pointer dereference, address: 0000000000000000
[    1.292058] RIP: 0010:bond_debug_rlb_hash_show (drivers/net/bonding/bond_debugfs.c:41)
[    1.293101] RSP: 0018:ffffc900004a7d00 EFLAGS: 00010286
[    1.293333] RAX: 0000000000000000 RBX: ffff888102b48200 RCX: ffff888102b48204
[    1.293631] RDX: ffff888102b48200 RSI: ffffffff839daad5 RDI: ffff888102815078
[    1.293924] RBP: ffff888102815078 R08: ffff888102b4820e R09: 0000000000000000
[    1.294267] R10: 0000000000000000 R11: 0000000000000000 R12: ffff888100f929c0
[    1.294564] R13: ffff888100f92a00 R14: 0000000000000001 R15: ffffc900004a7ed8
[    1.294864] FS:  0000000001395380(0000) GS:ffff888196e75000(0000) knlGS:0000000000000000
[    1.295239] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    1.295480] CR2: 0000000000000000 CR3: 0000000102adc004 CR4: 0000000000772ef0
[    1.295897] Call Trace:
[    1.296134]  seq_read_iter (fs/seq_file.c:231)
[    1.296341]  seq_read (fs/seq_file.c:164)
[    1.296493]  full_proxy_read (fs/debugfs/file.c:378 (discriminator 1))
[    1.296658]  vfs_read (fs/read_write.c:572)
[    1.296981]  ksys_read (fs/read_write.c:717)
[    1.297132]  do_syscall_64 (arch/x86/entry/syscall_64.c:63 (discriminator 1) arch/x86/entry/syscall_64.c:94 (discriminator 1))
[    1.297325]  entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)

Add a NULL check and print "(none)" for entries with no assigned slave.

Fixes: caafa84251 ("bonding: add the debugfs interface to see RLB hash table")
Reported-by: Weiming Shi <bestswngs@gmail.com>
Signed-off-by: Xiang Mei <xmei5@asu.edu>
Link: https://patch.msgid.link/20260317005034.1888794-1-xmei5@asu.edu
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-18 18:05:40 -07:00
Xiang Mei
b3a6df291f udp_tunnel: fix NULL deref caused by udp_sock_create6 when CONFIG_IPV6=n
When CONFIG_IPV6 is disabled, the udp_sock_create6() function returns 0
(success) without actually creating a socket. Callers such as
fou_create() then proceed to dereference the uninitialized socket
pointer, resulting in a NULL pointer dereference.

The captured NULL deref crash:
  BUG: kernel NULL pointer dereference, address: 0000000000000018
  RIP: 0010:fou_nl_add_doit (net/ipv4/fou_core.c:590 net/ipv4/fou_core.c:764)
  [...]
  Call Trace:
    <TASK>
    genl_family_rcv_msg_doit.constprop.0 (net/netlink/genetlink.c:1114)
    genl_rcv_msg (net/netlink/genetlink.c:1194 net/netlink/genetlink.c:1209)
    [...]
    netlink_rcv_skb (net/netlink/af_netlink.c:2550)
    genl_rcv (net/netlink/genetlink.c:1219)
    netlink_unicast (net/netlink/af_netlink.c:1319 net/netlink/af_netlink.c:1344)
    netlink_sendmsg (net/netlink/af_netlink.c:1894)
    __sock_sendmsg (net/socket.c:727 (discriminator 1) net/socket.c:742 (discriminator 1))
    __sys_sendto (./include/linux/file.h:62 (discriminator 1) ./include/linux/file.h:83 (discriminator 1) net/socket.c:2183 (discriminator 1))
    __x64_sys_sendto (net/socket.c:2213 (discriminator 1) net/socket.c:2209 (discriminator 1) net/socket.c:2209 (discriminator 1))
    do_syscall_64 (arch/x86/entry/syscall_64.c:63 (discriminator 1) arch/x86/entry/syscall_64.c:94 (discriminator 1))
    entry_SYSCALL_64_after_hwframe (net/arch/x86/entry/entry_64.S:130)

This patch makes udp_sock_create6 return -EPFNOSUPPORT instead, so
callers correctly take their error paths. There is only one caller of
the vulnerable function and only privileged users can trigger it.

Fixes: fd384412e1 ("udp_tunnel: Seperate ipv6 functions into its own file.")
Reported-by: Weiming Shi <bestswngs@gmail.com>
Signed-off-by: Xiang Mei <xmei5@asu.edu>
Link: https://patch.msgid.link/20260317010241.1893893-1-xmei5@asu.edu
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-18 18:00:07 -07:00
Jakub Kicinski
6d43a9f6a1 Merge branch 'mlx5-misc-fixes-2026-03-16'
Tariq Toukan says:

====================
mlx5 misc fixes 2026-03-16

This patchset provides misc bug fixes from the team to the mlx5
core and Eth drivers.
====================

Link: https://patch.msgid.link/20260316094603.6999-1-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-18 17:54:59 -07:00
Jianbo Liu
beb6e2e597 net/mlx5e: Fix race condition during IPSec ESN update
In IPSec full offload mode, the device reports an ESN (Extended
Sequence Number) wrap event to the driver. The driver validates this
event by querying the IPSec ASO and checking that the esn_event_arm
field is 0x0, which indicates an event has occurred. After handling
the event, the driver must re-arm the context by setting esn_event_arm
back to 0x1.

A race condition exists in this handling path. After validating the
event, the driver calls mlx5_accel_esp_modify_xfrm() to update the
kernel's xfrm state. This function temporarily releases and
re-acquires the xfrm state lock.

So, need to acknowledge the event first by setting esn_event_arm to
0x1. This prevents the driver from reprocessing the same ESN update if
the hardware sends events for other reason. Since the next ESN update
only occurs after nearly 2^31 packets are received, there's no risk of
missing an update, as it will happen long after this handling has
finished.

Processing the event twice causes the ESN high-order bits (esn_msb) to
be incremented incorrectly. The driver then programs the hardware with
this invalid ESN state, which leads to anti-replay failures and a
complete halt of IPSec traffic.

Fix this by re-arming the ESN event immediately after it is validated,
before calling mlx5_accel_esp_modify_xfrm(). This ensures that any
spurious, duplicate events are correctly ignored, closing the race
window.

Fixes: fef0667893 ("net/mlx5e: Fix ESN update kernel panic")
Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/20260316094603.6999-4-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-18 17:54:53 -07:00
Jianbo Liu
99b36850d8 net/mlx5e: Prevent concurrent access to IPSec ASO context
The query or updating IPSec offload object is through Access ASO WQE.
The driver uses a single mlx5e_ipsec_aso struct for each PF, which
contains a shared DMA-mapped context for all ASO operations.

A race condition exists because the ASO spinlock is released before
the hardware has finished processing WQE. If a second operation is
initiated immediately after, it overwrites the shared context in the
DMA area.

When the first operation's completion is processed later, it reads
this corrupted context, leading to unexpected behavior and incorrect
results.

This commit fixes the race by introducing a private context within
each IPSec offload object. The shared ASO context is now copied to
this private context while the ASO spinlock is held. Subsequent
processing uses this saved, per-object context, ensuring its integrity
is maintained.

Fixes: 1ed78fc033 ("net/mlx5e: Update IPsec soft and hard limits")
Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/20260316094603.6999-3-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-18 17:54:53 -07:00
Cosmin Ratiu
b7e3a5d9c0 net/mlx5: qos: Restrict RTNL area to avoid a lock cycle
A lock dependency cycle exists where:
1. mlx5_ib_roce_init -> mlx5_core_uplink_netdev_event_replay ->
mlx5_blocking_notifier_call_chain (takes notifier_rwsem) ->
mlx5e_mdev_notifier_event -> mlx5_netdev_notifier_register ->
register_netdevice_notifier_dev_net (takes rtnl)
=> notifier_rwsem -> rtnl

2. mlx5e_probe -> _mlx5e_probe ->
mlx5_core_uplink_netdev_set (takes uplink_netdev_lock) ->
mlx5_blocking_notifier_call_chain (takes notifier_rwsem)
=> uplink_netdev_lock -> notifier_rwsem

3: devlink_nl_rate_set_doit -> devlink_nl_rate_set ->
mlx5_esw_devlink_rate_leaf_tx_max_set -> esw_qos_devlink_rate_to_mbps ->
mlx5_esw_qos_max_link_speed_get (takes rtnl) ->
mlx5_esw_qos_lag_link_speed_get_locked ->
mlx5_uplink_netdev_get (takes uplink_netdev_lock)
=> rtnl -> uplink_netdev_lock
=> BOOM! (lock cycle)

Fix that by restricting the rtnl-protected section to just the necessary
part, the call to netdev_master_upper_dev_get and speed querying, so
that the last lock dependency is avoided and the cycle doesn't close.
This is safe because mlx5_uplink_netdev_get uses netdev_hold to keep the
uplink netdev alive while its master device is queried.

Use this opportunity to rename the ambiguously-named "hold_rtnl_lock"
argument to "take_rtnl" and remove the "_locked" suffix from
mlx5_esw_qos_lag_link_speed_get_locked.

Fixes: 6b4be64fd9 ("net/mlx5e: Harden uplink netdev access against device unbind")
Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com>
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/20260316094603.6999-2-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-18 17:54:53 -07:00
Jakub Kicinski
d0f9eca219 Merge tag 'batadv-net-pullrequest-20260317' of https://git.open-mesh.org/linux-merge
Simon Wunderlich says:

====================
Here is a batman-adv bugfix:

- avoid OGM aggregation when skb tailroom is insufficient, by Yang Yang

* tag 'batadv-net-pullrequest-20260317' of https://git.open-mesh.org/linux-merge:
  batman-adv: avoid OGM aggregation when skb tailroom is insufficient
====================

Link: https://patch.msgid.link/20260317160002.1869478-1-sw@simonwunderlich.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-18 17:41:00 -07:00
Jakub Kicinski
cf2ce96c71 Merge branch '1GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue
Tony Nguyen says:

====================
Intel Wired LAN Driver Updates 2026-03-17 (igc, iavf, libie)

Kohei Enju adds use of helper function to add missing update of
skb->tail when padding is needed for igc.

Zdenek Bouska clears stale XSK timestamps when taking down Tx rings on
igc.

Petr Oros changes handling of iavf VLAN filter handling when an added
VLAN is also on the delete list to which can race and cause the VLAN
filter to not be added.

Michal frees cmd_buf for libie firmware logging to stop memory leaks.

* '1GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue:
  libie: prevent memleak in fwlog code
  iavf: fix VLAN filter lost on add/delete race
  igc: fix page fault in XDP TX timestamps handling
  igc: fix missing update of skb->tail in igc_xmit_frame()
====================

Link: https://patch.msgid.link/20260317211906.115505-1-anthony.l.nguyen@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-18 17:38:15 -07:00
Kumar Kartikeya Dwivedi
146bd2a87a bpf: Release module BTF IDR before module unload
Gregory reported in [0] that the global_map_resize test when run in
repeatedly ends up failing during program load. This stems from the fact
that BTF reference has not dropped to zero after the previous run's
module is unloaded, and the older module's BTF is still discoverable and
visible. Later, in libbpf, load_module_btfs() will find the ID for this
stale BTF, open its fd, and then it will be used during program load
where later steps taking module reference using btf_try_get_module()
fail since the underlying module for the BTF is gone.

Logically, once a module is unloaded, it's associated BTF artifacts
should become hidden. The BTF object inside the kernel may still remain
alive as long its reference counts are alive, but it should no longer be
discoverable.

To fix this, let us call btf_free_id() from the MODULE_STATE_GOING case
for the module unload to free the BTF associated IDR entry, and disable
its discovery once module unload returns to user space. If a race
happens during unload, the outcome is non-deterministic anyway. However,
user space should be able to rely on the guarantee that once it has
synchronously established a successful module unload, no more stale
artifacts associated with this module can be obtained subsequently.

Note that we must be careful to not invoke btf_free_id() in btf_put()
when btf_is_module() is true now. There could be a window where the
module unload drops a non-terminal reference, frees the IDR, but the
same ID gets reused and the second unconditional btf_free_id() ends up
releasing an unrelated entry.

To avoid a special case for btf_is_module() case, set btf->id to zero to
make btf_free_id() idempotent, such that we can unconditionally invoke it
from btf_put(), and also from the MODULE_STATE_GOING case. Since zero is
an invalid IDR, the idr_remove() should be a noop.

Note that we can be sure that by the time we reach final btf_put() for
btf_is_module() case, the btf_free_id() is already done, since the
module itself holds the BTF reference, and it will call this function
for the BTF before dropping its own reference.

  [0]: https://lore.kernel.org/bpf/cover.1773170190.git.grbell@redhat.com

Fixes: 36e68442d1 ("bpf: Load and verify kernel module BTFs")
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Suggested-by: Martin KaFai Lau <martin.lau@kernel.org>
Reported-by: Gregory Bell <grbell@redhat.com>
Reviewed-by: Emil Tsalapatis <emil@etsalapatis.com>
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20260312205307.1346991-1-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-03-18 17:26:40 -07:00
Minhong He
0641379352 ipv6: add NULL checks for idev in SRv6 paths
__in6_dev_get() can return NULL when the device has no IPv6 configuration
(e.g. MTU < IPV6_MIN_MTU or after NETDEV_UNREGISTER).

Add NULL checks for idev returned by __in6_dev_get() in both
seg6_hmac_validate_skb() and ipv6_srh_rcv() to prevent potential NULL
pointer dereferences.

Fixes: 1ababeba4a ("ipv6: implement dataplane support for rthdr type 4 (Segment Routing Header)")
Fixes: bf355b8d2c ("ipv6: sr: add core files for SR HMAC support")
Signed-off-by: Minhong He <heminhong@kylinos.cn>
Reviewed-by: Andrea Mayer <andrea.mayer@uniroma2.it>
Link: https://patch.msgid.link/20260316073301.106643-1-heminhong@kylinos.cn
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-18 17:23:43 -07:00
Ian Ray
55dc632ab2 NFC: nxp-nci: allow GPIOs to sleep
Allow the firmware and enable GPIOs to sleep.

This fixes a `WARN_ON' and allows the driver to operate GPIOs which are
connected to I2C GPIO expanders.

-- >8 --
kernel: WARNING: CPU: 3 PID: 2636 at drivers/gpio/gpiolib.c:3880 gpiod_set_value+0x88/0x98
-- >8 --

Fixes: 43201767b4 ("NFC: nxp-nci: Convert to use GPIO descriptor")
Cc: stable@vger.kernel.org
Signed-off-by: Ian Ray <ian.ray@gehealthcare.com>
Link: https://patch.msgid.link/20260317085337.146545-1-ian.ray@gehealthcare.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-18 17:20:51 -07:00
Fedor Pchelkin
34b11cc56e net: macb: fix uninitialized rx_fs_lock
If hardware doesn't support RX Flow Filters, rx_fs_lock spinlock is not
initialized leading to the following assertion splat triggerable via
set_rxnfc callback.

INFO: trying to register non-static key.
The code is fine but needs lockdep annotation, or maybe
you didn't initialize this object before use?
turning off the locking correctness validator.
CPU: 1 PID: 949 Comm: syz.0.6 Not tainted 6.1.164+ #113
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.1-0-g3208b098f51a-prebuilt.qemu.org 04/01/2014
Call Trace:
 <TASK>
 __dump_stack lib/dump_stack.c:88 [inline]
 dump_stack_lvl+0x8d/0xba lib/dump_stack.c:106
 assign_lock_key kernel/locking/lockdep.c:974 [inline]
 register_lock_class+0x141b/0x17f0 kernel/locking/lockdep.c:1287
 __lock_acquire+0x74f/0x6c40 kernel/locking/lockdep.c:4928
 lock_acquire kernel/locking/lockdep.c:5662 [inline]
 lock_acquire+0x190/0x4b0 kernel/locking/lockdep.c:5627
 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
 _raw_spin_lock_irqsave+0x33/0x50 kernel/locking/spinlock.c:162
 gem_del_flow_filter drivers/net/ethernet/cadence/macb_main.c:3562 [inline]
 gem_set_rxnfc+0x533/0xac0 drivers/net/ethernet/cadence/macb_main.c:3667
 ethtool_set_rxnfc+0x18c/0x280 net/ethtool/ioctl.c:961
 __dev_ethtool net/ethtool/ioctl.c:2956 [inline]
 dev_ethtool+0x229c/0x6290 net/ethtool/ioctl.c:3095
 dev_ioctl+0x637/0x1070 net/core/dev_ioctl.c:510
 sock_do_ioctl+0x20d/0x2c0 net/socket.c:1215
 sock_ioctl+0x577/0x6d0 net/socket.c:1320
 vfs_ioctl fs/ioctl.c:51 [inline]
 __do_sys_ioctl fs/ioctl.c:870 [inline]
 __se_sys_ioctl fs/ioctl.c:856 [inline]
 __x64_sys_ioctl+0x18c/0x210 fs/ioctl.c:856
 do_syscall_x64 arch/x86/entry/common.c:46 [inline]
 do_syscall_64+0x35/0x80 arch/x86/entry/common.c:76
 entry_SYSCALL_64_after_hwframe+0x6e/0xd8

A more straightforward solution would be to always initialize rx_fs_lock,
just like rx_fs_list.  However, in this case the driver set_rxnfc callback
would return with a rather confusing error code, e.g. -EINVAL.  So deny
set_rxnfc attempts directly if the RX filtering feature is not supported
by hardware.

Fixes: ae8223de3d ("net: macb: Added support for RX filtering")
Signed-off-by: Fedor Pchelkin <pchelkin@ispras.ru>
Link: https://patch.msgid.link/20260316103826.74506-2-pchelkin@ispras.ru
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-18 17:18:53 -07:00
Fedor Pchelkin
8da13e6d63 net: macb: fix use-after-free access to PTP clock
PTP clock is registered on every opening of the interface and destroyed on
every closing.  However it may be accessed via get_ts_info ethtool call
which is possible while the interface is just present in the kernel.

BUG: KASAN: use-after-free in ptp_clock_index+0x47/0x50 drivers/ptp/ptp_clock.c:426
Read of size 4 at addr ffff8880194345cc by task syz.0.6/948

CPU: 1 PID: 948 Comm: syz.0.6 Not tainted 6.1.164+ #109
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.1-0-g3208b098f51a-prebuilt.qemu.org 04/01/2014
Call Trace:
 <TASK>
 __dump_stack lib/dump_stack.c:88 [inline]
 dump_stack_lvl+0x8d/0xba lib/dump_stack.c:106
 print_address_description mm/kasan/report.c:316 [inline]
 print_report+0x17f/0x496 mm/kasan/report.c:420
 kasan_report+0xd9/0x180 mm/kasan/report.c:524
 ptp_clock_index+0x47/0x50 drivers/ptp/ptp_clock.c:426
 gem_get_ts_info+0x138/0x1e0 drivers/net/ethernet/cadence/macb_main.c:3349
 macb_get_ts_info+0x68/0xb0 drivers/net/ethernet/cadence/macb_main.c:3371
 __ethtool_get_ts_info+0x17c/0x260 net/ethtool/common.c:558
 ethtool_get_ts_info net/ethtool/ioctl.c:2367 [inline]
 __dev_ethtool net/ethtool/ioctl.c:3017 [inline]
 dev_ethtool+0x2b05/0x6290 net/ethtool/ioctl.c:3095
 dev_ioctl+0x637/0x1070 net/core/dev_ioctl.c:510
 sock_do_ioctl+0x20d/0x2c0 net/socket.c:1215
 sock_ioctl+0x577/0x6d0 net/socket.c:1320
 vfs_ioctl fs/ioctl.c:51 [inline]
 __do_sys_ioctl fs/ioctl.c:870 [inline]
 __se_sys_ioctl fs/ioctl.c:856 [inline]
 __x64_sys_ioctl+0x18c/0x210 fs/ioctl.c:856
 do_syscall_x64 arch/x86/entry/common.c:46 [inline]
 do_syscall_64+0x35/0x80 arch/x86/entry/common.c:76
 entry_SYSCALL_64_after_hwframe+0x6e/0xd8
 </TASK>

Allocated by task 457:
 kmalloc include/linux/slab.h:563 [inline]
 kzalloc include/linux/slab.h:699 [inline]
 ptp_clock_register+0x144/0x10e0 drivers/ptp/ptp_clock.c:235
 gem_ptp_init+0x46f/0x930 drivers/net/ethernet/cadence/macb_ptp.c:375
 macb_open+0x901/0xd10 drivers/net/ethernet/cadence/macb_main.c:2920
 __dev_open+0x2ce/0x500 net/core/dev.c:1501
 __dev_change_flags+0x56a/0x740 net/core/dev.c:8651
 dev_change_flags+0x92/0x170 net/core/dev.c:8722
 do_setlink+0xaf8/0x3a80 net/core/rtnetlink.c:2833
 __rtnl_newlink+0xbf4/0x1940 net/core/rtnetlink.c:3608
 rtnl_newlink+0x63/0xa0 net/core/rtnetlink.c:3655
 rtnetlink_rcv_msg+0x3c6/0xed0 net/core/rtnetlink.c:6150
 netlink_rcv_skb+0x15d/0x430 net/netlink/af_netlink.c:2511
 netlink_unicast_kernel net/netlink/af_netlink.c:1318 [inline]
 netlink_unicast+0x6d7/0xa30 net/netlink/af_netlink.c:1344
 netlink_sendmsg+0x97e/0xeb0 net/netlink/af_netlink.c:1872
 sock_sendmsg_nosec net/socket.c:718 [inline]
 __sock_sendmsg+0x14b/0x180 net/socket.c:730
 __sys_sendto+0x320/0x3b0 net/socket.c:2152
 __do_sys_sendto net/socket.c:2164 [inline]
 __se_sys_sendto net/socket.c:2160 [inline]
 __x64_sys_sendto+0xdc/0x1b0 net/socket.c:2160
 do_syscall_x64 arch/x86/entry/common.c:46 [inline]
 do_syscall_64+0x35/0x80 arch/x86/entry/common.c:76
 entry_SYSCALL_64_after_hwframe+0x6e/0xd8

Freed by task 938:
 kasan_slab_free include/linux/kasan.h:177 [inline]
 slab_free_hook mm/slub.c:1729 [inline]
 slab_free_freelist_hook mm/slub.c:1755 [inline]
 slab_free mm/slub.c:3687 [inline]
 __kmem_cache_free+0xbc/0x320 mm/slub.c:3700
 device_release+0xa0/0x240 drivers/base/core.c:2507
 kobject_cleanup lib/kobject.c:681 [inline]
 kobject_release lib/kobject.c:712 [inline]
 kref_put include/linux/kref.h:65 [inline]
 kobject_put+0x1cd/0x350 lib/kobject.c:729
 put_device+0x1b/0x30 drivers/base/core.c:3805
 ptp_clock_unregister+0x171/0x270 drivers/ptp/ptp_clock.c:391
 gem_ptp_remove+0x4e/0x1f0 drivers/net/ethernet/cadence/macb_ptp.c:404
 macb_close+0x1c8/0x270 drivers/net/ethernet/cadence/macb_main.c:2966
 __dev_close_many+0x1b9/0x310 net/core/dev.c:1585
 __dev_close net/core/dev.c:1597 [inline]
 __dev_change_flags+0x2bb/0x740 net/core/dev.c:8649
 dev_change_flags+0x92/0x170 net/core/dev.c:8722
 dev_ifsioc+0x151/0xe00 net/core/dev_ioctl.c:326
 dev_ioctl+0x33e/0x1070 net/core/dev_ioctl.c:572
 sock_do_ioctl+0x20d/0x2c0 net/socket.c:1215
 sock_ioctl+0x577/0x6d0 net/socket.c:1320
 vfs_ioctl fs/ioctl.c:51 [inline]
 __do_sys_ioctl fs/ioctl.c:870 [inline]
 __se_sys_ioctl fs/ioctl.c:856 [inline]
 __x64_sys_ioctl+0x18c/0x210 fs/ioctl.c:856
 do_syscall_x64 arch/x86/entry/common.c:46 [inline]
 do_syscall_64+0x35/0x80 arch/x86/entry/common.c:76
 entry_SYSCALL_64_after_hwframe+0x6e/0xd8

Set the PTP clock pointer to NULL after unregistering.

Fixes: c2594d804d ("macb: Common code to enable ptp support for MACB/GEM")
Cc: stable@vger.kernel.org
Signed-off-by: Fedor Pchelkin <pchelkin@ispras.ru>
Link: https://patch.msgid.link/20260316103826.74506-1-pchelkin@ispras.ru
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-18 17:18:47 -07:00
Wesley Atwell
7d9351435e netdevsim: drop PSP ext ref on forward failure
nsim_do_psp() takes an extra reference to the PSP skb extension so the
extension survives __dev_forward_skb(). That forward path scrubs the skb
and drops attached skb extensions before nsim_psp_handle_ext() can
reattach the PSP metadata.

If __dev_forward_skb() fails in nsim_forward_skb(), the function returns
before nsim_psp_handle_ext() can attach that extension to the skb, leaving
the extra reference leaked.

Drop the saved PSP extension reference before returning from the
forward-failure path. Guard the put because plain or non-decapsulated
traffic can also fail forwarding without ever taking the extra PSP
reference.

Fixes: f857478d62 ("netdevsim: a basic test PSP implementation")
Signed-off-by: Wesley Atwell <atwellwea@gmail.com>
Reviewed-by: Daniel Zahka <daniel.zahka@gmail.com>
Link: https://patch.msgid.link/20260317061431.1482716-1-atwellwea@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-18 17:13:34 -07:00
Josh Law
3b2c2ab4ce tools/bootconfig: fix fd leak in load_xbc_file() on fstat failure
If fstat() fails after open() succeeds, the function returns without
closing the file descriptor. Also preserve errno across close(), since
close() may overwrite it before the error is returned.

Link: https://lore.kernel.org/all/20260318155847.78065-3-objecting@objecting.org/

Fixes: 950313ebf7 ("tools: bootconfig: Add bootconfig command")
Signed-off-by: Josh Law <objecting@objecting.org>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
2026-03-19 08:43:06 +09:00
Josh Law
bb288d7d86 lib/bootconfig: check xbc_init_node() return in override path
The ':=' override path in xbc_parse_kv() calls xbc_init_node() to
re-initialize an existing value node but does not check the return
value. If xbc_init_node() fails (data offset out of range), parsing
silently continues with stale node data.

Add the missing error check to match the xbc_add_node() call path
which already checks for failure.

In practice, a bootconfig using ':=' to override a value near the
32KB data limit could silently retain the old value, meaning a
security-relevant boot parameter override (e.g., a trace filter or
debug setting) would not take effect as intended.

Link: https://lore.kernel.org/all/20260318155847.78065-2-objecting@objecting.org/

Fixes: e5efaeb8a8 ("bootconfig: Support mixing a value and subkeys under a key")
Signed-off-by: Josh Law <objecting@objecting.org>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
2026-03-19 08:43:05 +09:00
Linus Torvalds
1863b4055b Merge tag 'libcrypto-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux
Pull crypto library fixes from Eric Biggers:

 - Disable the "padlock" SHA-1 and SHA-256 driver on Zhaoxin
   processors, since it does not compute hash values correctly

 - Make a generated file be removed by 'make clean'

 - Fix excessive stack usage in some of the arm64 AES code

* tag 'libcrypto-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux:
  lib/crypto: powerpc: Add powerpc/aesp8-ppc.S to clean-files
  crypto: padlock-sha - Disable for Zhaoxin processor
  crypto: arm64/aes-neonbs - Move key expansion off the stack
2026-03-18 15:50:29 -07:00
William Roche
201bc182ad x86/mce/amd: Check SMCA feature bit before accessing SMCA MSRs
People do effort to inject MCEs into guests in order to simulate/test
handling of hardware errors. The real use case behind it is testing the
handling of SIGBUS which the memory failure code sends to the process.

If that process is QEMU, instead of killing the whole guest, the MCE can
be injected into the guest kernel so that latter can attempt proper
handling and kill the user *process*  in the guest, instead, which
caused the MCE. The assumption being here that the whole injection flow
can supply enough information that the guest kernel can pinpoint the
right process. But that's a different topic...

Regardless of virtualization or not, access to SMCA-specific registers
like MCA_DESTAT should only be done after having checked the smca
feature bit. And there are AMD machines like Bulldozer (the one before
Zen1) which do support deferred errors but are not SMCA machines.

Therefore, properly check the feature bit before accessing related MSRs.

  [ bp: Rewrite commit message. ]

Fixes: 7cb735d7c0 ("x86/mce: Unify AMD DFR handler with MCA Polling")
Signed-off-by: William Roche <william.roche@oracle.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Yazen Ghannam <yazen.ghannam@amd.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20260218163025.1316501-1-william.roche@oracle.com
2026-03-18 23:02:16 +01:00
Linus Torvalds
8a30aeb0d1 Merge tag 'nfsd-7.0-2' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux
Pull nfsd fixes from Chuck Lever:

 - Fix cache_request leak in cache_release()

 - Fix heap overflow in the NFSv4.0 LOCK replay cache

 - Hold net reference for the lifetime of /proc/fs/nfs/exports fd

 - Defer sub-object cleanup in export "put" callbacks

* tag 'nfsd-7.0-2' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux:
  nfsd: fix heap overflow in NFSv4.0 LOCK replay cache
  sunrpc: fix cache_request leak in cache_release
  NFSD: Hold net reference for the lifetime of /proc/fs/nfs/exports fd
  NFSD: Defer sub-object cleanup in export put callbacks
2026-03-18 14:27:11 -07:00
Sanman Pradhan
86259558e4 hwmon: (pmbus/isl68137) Fix unchecked return value and use sysfs_emit()
isl68137_avs_enable_show_page() uses the return value of
pmbus_read_byte_data() without checking for errors. If the I2C transaction
fails, a negative error code is passed through bitwise operations,
producing incorrect output.

Add an error check to propagate the return value if it is negative.
Additionally, modernize the callback by replacing sprintf()
with sysfs_emit().

Fixes: 038a9c3d1e ("hwmon: (pmbus/isl68137) Add driver for Intersil ISL68137 PWM Controller")
Cc: stable@vger.kernel.org
Signed-off-by: Sanman Pradhan <psanman@juniper.net>
Link: https://lore.kernel.org/r/20260318193952.47908-2-sanman.pradhan@hpe.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
2026-03-18 13:38:55 -07:00
Kees Cook
c4192754e8 fs/tests: exec: Remove bad test vector
Drop an unusable test in the bprm stack limits.

Reported-by: Guenter Roeck <linux@roeck-us.net>
Closes: https://lore.kernel.org/all/a3e9b1c2-40c1-45df-9fa2-14ee6a7b3fe2@roeck-us.net
Fixes: 60371f43e5 ("exec: Add KUnit test for bprm_stack_limits()")
Signed-off-by: Kees Cook <kees@kernel.org>
2026-03-18 11:41:53 -07:00
Arnaldo Carvalho de Melo
f8e478e3af Merge remote-tracking branch 'torvalds/master' into perf-tools
To pick up some extra files that need to be sync'ed with the kernel
sources to try and reduce the number of PRs.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2026-03-18 15:30:08 -03:00
Maíra Canal
b826d2c0b0 pmdomain: bcm: bcm2835-power: Increase ASB control timeout
The bcm2835_asb_control() function uses a tight polling loop to wait
for the ASB bridge to acknowledge a request. During intensive workloads,
this handshake intermittently fails for V3D's master ASB on BCM2711,
resulting in "Failed to disable ASB master for v3d" errors during
runtime PM suspend. As a consequence, the failed power-off leaves V3D in
a broken state, leading to bus faults or system hangs on later accesses.

As the timeout is insufficient in some scenarios, increase the polling
timeout from 1us to 5us, which is still negligible in the context of a
power domain transition. Also, replace the open-coded ktime_get_ns()/
cpu_relax() polling loop with readl_poll_timeout_atomic().

Cc: stable@vger.kernel.org
Fixes: 670c672608 ("soc: bcm: bcm2835-pm: Add support for power domains under a new binding.")
Signed-off-by: Maíra Canal <mcanal@igalia.com>
Reviewed-by: Stefan Wahren <wahrenst@gmx.net>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2026-03-18 19:15:34 +01:00
Olivier Sobrie
b9465b04de mtd: rawnand: pl353: make sure optimal timings are applied
Timings of the nand are adjusted by pl35x_nfc_setup_interface() but
actually applied by the pl35x_nand_select_target() function.
If there is only one nand chip, the pl35x_nand_select_target() will only
apply the timings once since the test at its beginning will always be true
after the first call to this function. As a result, the hardware will
keep using the default timings set at boot to detect the nand chip, not
the optimal ones.

With this patch, we program directly the new timings when
pl35x_nfc_setup_interface() is called.

Fixes: 08d8c62164 ("mtd: rawnand: pl353: Add support for the ARM PL353 SMC NAND controller")
Signed-off-by: Olivier Sobrie <olivier@sobrie.be>
Cc: stable@vger.kernel.org
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
2026-03-18 18:08:25 +01:00
Miquel Raynal
16dec014db mtd: spi-nor: Rename spi_nor_spimem_check_op()
This helper really is just a little helper for internal purposes, and is
I/O operation oriented, despite its name. It has already been misused
in commit 5008c3ec3f ("mtd: spi-nor: core: Check read CR support"), so
rename it to clarify its purpose: it is only useful for reads and page
programs.

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
2026-03-18 18:08:18 +01:00
Miquel Raynal
ac512cd351 mtd: spi-nor: Fix RDCR controller capability core check
Commit 5008c3ec3f ("mtd: spi-nor: core: Check read CR support") adds a
controller check to make sure the core will not use CR reads on
controllers not supporting them. The approach is valid but the fix is
incorrect. Unfortunately, the author could not catch it, because the
expected behavior was met. The patch indeed drops the RDCR capability,
but it does it for all controllers!

The issue comes from the use of spi_nor_spimem_check_op() which is an
internal helper dedicated to check read/write operations only, despite
its generic name.

This helper looks for the biggest number of address bytes that can be
used for a page operation and tries 4 then 3. It then calls the usual
spi-mem helpers to do the checks. These will always fail because there
is now an inconsistency: the address cycles are forced to 4 (then 3)
bytes, but the bus width during the address cycles rightfully remains
0. There is a non-zero address length but a zero address bus width,
which is an invalid combination.

The correct check in this case is to directly call spi_mem_supports_op()
which doesn't messes up with the operation content.

Fixes: 5008c3ec3f ("mtd: spi-nor: core: Check read CR support")
Cc: stable@vger.kernel.org
Acked-by: Tudor Ambarus <tudor.ambarus@linaro.org>
Acked-by: Takahiro Kuwano <takahiro.kuwano@infineon.com>
Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
2026-03-18 18:08:12 +01:00
Jiucheng Xu
c23df30915 erofs: add GFP_NOIO in the bio completion if needed
The bio completion path in the process context (e.g. dm-verity)
will directly call into decompression rather than trigger another
workqueue context for minimal scheduling latencies, which can
then call vm_map_ram() with GFP_KERNEL.

Due to insufficient memory, vm_map_ram() may generate memory
swapping I/O, which can cause submit_bio_wait to deadlock
in some scenarios.

Trimmed down the call stack, as follows:

f2fs_submit_read_io
  submit_bio                      //bio_list is initialized.
    mmc_blk_mq_recovery
      z_erofs_endio
        vm_map_ram
          __pte_alloc_kernel
            __alloc_pages_direct_reclaim
              shrink_folio_list
                __swap_writepage
                  submit_bio_wait  //bio_list is non-NULL, hang!!!

Use memalloc_noio_{save,restore}() to wrap up this path.

Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Signed-off-by: Jiucheng Xu <jiucheng.xu@amlogic.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
2026-03-19 00:24:21 +08:00
Stanislav Kinsburskii
c0e296f257 mshv: Fix error handling in mshv_region_pin
The current error handling has two issues:

First, pin_user_pages_fast() can return a short pin count (less than
requested but greater than zero) when it cannot pin all requested pages.
This is treated as success, leading to partially pinned regions being
used, which causes memory corruption.

Second, when an error occurs mid-loop, already pinned pages from the
current batch are not properly accounted for before calling
mshv_region_invalidate_pages(), causing a page reference leak.

Treat short pins as errors and fix partial batch accounting before
cleanup.

Signed-off-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
Reviewed-by: Michael Kelley <mhklinux@outlook.com>
Signed-off-by: Wei Liu <wei.liu@kernel.org>
2026-03-18 16:18:49 +00:00
Arnd Bergmann
591721223b ALSA: asihpi: avoid write overflow check warning
clang-22 rightfully warns that the memcpy() in adapter_prepare() copies
between different structures, crossing the boundary of nested
structures inside it:

In file included from sound/pci/asihpi/hpimsgx.c:13:
In file included from include/linux/string.h:386:
include/linux/fortify-string.h:569:4: error: call to '__write_overflow_field' declared with 'warning' attribute: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Werror,-Wattribute-warning]
  569 |                         __write_overflow_field(p_size_field, size);

The two structures seem to refer to the same layout, despite the
separate definitions, so the code is in fact correct.

Avoid the warning by copying the two inner structures separately.
I see the same pattern happens in other functions in the same file,
so there is a chance that this may come back in the future, but
this instance is the only one that I saw in practice, hitting it
multiple times per day in randconfig build.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Link: https://patch.msgid.link/20260318124016.3488566-1-arnd@kernel.org
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-03-18 17:01:17 +01:00
Davidlohr Bueso
9a6a209132 cxl/mbox: Use proper endpoint validity check upon sanitize
Fuzzying CXL triggered:

BUG: KASAN: null-ptr-deref in cxl_num_decoders_committed+0x3e/0x80 drivers/cxl/core/port.c:49
Read of size 4 at addr 0000000000000642 by task syz.0.97/2282

CPU: 2 UID: 0 PID: 2282 Comm: syz.0.97 Not tainted 7.0.0-rc1-gebd11be59f74-dirty #494 PREEMPT(full)
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.17.0-0-gb52ca86e094d-prebuilt.qemu.org 04/01/2014
Call Trace:
 <TASK>
 __dump_stack lib/dump_stack.c:94 [inline]
 dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:120
 kasan_report+0xe0/0x110 mm/kasan/report.c:595
 cxl_num_decoders_committed+0x3e/0x80 drivers/cxl/core/port.c:49
 cxl_mem_sanitize+0x141/0x170 drivers/cxl/core/mbox.c:1304
 security_sanitize_store+0xb0/0x120 drivers/cxl/core/memdev.c:173
 dev_attr_store+0x46/0x70 drivers/base/core.c:2437
 sysfs_kf_write+0x95/0xb0 fs/sysfs/file.c:142
 kernfs_fop_write_iter+0x276/0x330 fs/kernfs/file.c:352
 new_sync_write fs/read_write.c:595 [inline]
 vfs_write+0x5df/0xaa0 fs/read_write.c:688
 ksys_write+0x103/0x1f0 fs/read_write.c:740
 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
 do_syscall_64+0x111/0x680 arch/x86/entry/syscall_64.c:94
 entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7f60a584ba79
Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007f60a42a7038 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 00007f60a5ab5fa0 RCX: 00007f60a584ba79
RDX: 0000000000000002 RSI: 00002000000001c0 RDI: 0000000000000003
RBP: 00007f60a58a49df R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
R13: 00007f60a5ab6038 R14: 00007f60a5ab5fa0 R15: 00007ffe58fad8b8
 </TASK>

This goes away using the correct check instead of abusing cxlmd->endpoint,
which is unusable (ENXIO) until the driver has probed. During that window
the memdev sysfs attributes are already visible, as soon as device_add()
completes.

Fixes: 29317f8dc6 ("cxl/mem: Introduce cxl_memdev_attach for CXL-dependent operation")
Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: Gregory Price <gourry@gourry.net>
Link: https://patch.msgid.link/20260301221739.1726722-1-dave@stgolabs.net
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2026-03-18 08:49:29 -07:00
Linus Torvalds
04a9f17669 Merge tag 'soc-fixes-7.0' of git://git.kernel.org/pub/scm/linux/kernel/git/soc/soc
Pull SoC fixes from Arnd Bergmann:
 "The firmware drivers for ARM SCMI, FF-A and the Tee subsystem, as
  well as the reset controller and cache controller subsystem all see
  small bugfixes for reference ounting errors, ABI correctness, and
  NULL pointer dereferences.

  Similarly, there are multiple reference counting fixes in drivers/soc/
  for vendor specific drivers (rockchips, microchip), while the
  freescale drivers get a fix for a race condition and error handling.

  The devicetree fixes for Rockchips and NXP got held up, so for
  the moment there is only Renesas fixing problesm with SD card
  initialization, a boot hang on one board and incorrect descriptions
  for interrupts and clock registers on some SoCs. The Microchip
  polarfire gets a dts fix for a boot time warning.

  A defconfig fix avoids a warning about a conflicting assignment"

* tag 'soc-fixes-7.0' of git://git.kernel.org/pub/scm/linux/kernel/git/soc/soc: (21 commits)
  ARM: multi_v7_defconfig: Drop duplicate CONFIG_TI_PRUSS=m
  firmware: arm_scmi: Spelling s/mulit/multi/, s/currenly/currently/
  firmware: arm_scmi: Fix NULL dereference on notify error path
  firmware: arm_scpi: Fix device_node reference leak in probe path
  firmware: arm_ffa: Remove vm_id argument in ffa_rxtx_unmap()
  arm64: dts: renesas: r8a78000: Fix out-of-range SPI interrupt numbers
  arm64: dts: renesas: rzg3s-smarc-som: Set bypass for Versa3 PLL2
  arm64: dts: renesas: r9a09g087: Fix CPG register region sizes
  arm64: dts: renesas: r9a09g077: Fix CPG register region sizes
  arm64: dts: renesas: r9a09g057: Remove wdt{0,2,3} nodes
  arm64: dts: renesas: rzv2-evk-cn15-sd: Add ramp delay for SD0 regulator
  arm64: dts: renesas: rzt2h-n2h-evk: Add ramp delay for SD0 card regulator
  tee: shm: Remove refcounting of kernel pages
  reset: rzg2l-usbphy-ctrl: Check pwrrdy is valid before using it
  soc: fsl: cpm1: qmc: Fix error check for devm_ioremap_resource() in qmc_qe_init_resources()
  soc: fsl: qbman: fix race condition in qman_destroy_fq
  soc: rockchip: grf: Add missing of_node_put() when returning
  cache: ax45mp: Fix device node reference leak in ax45mp_cache_init()
  cache: starfive: fix device node leak in starlink_cache_init()
  riscv: dts: microchip: add can resets to mpfs
  ...
2026-03-18 08:28:54 -07:00
Linus Torvalds
c5cb126c48 Merge tag 'v7.0-p3' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto fix from Herbert Xu:

 - Remove duplicate snp_leak_pages call in ccp

* tag 'v7.0-p3' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6:
  crypto: ccp - Fix leaking the same page twice
2026-03-18 08:11:46 -07:00
Linus Torvalds
efa0adb504 Merge tag 'loongarch-fixes-7.0-1' of git://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson
Pull LoongArch fixes from Huacai Chen:

 - only use SC.Q when supported by the assembler to fix a build failure

 - fix calling smp_processor_id() in preemptible code

 - make a BPF helper arch_protect_bpf_trampoline() return 0 to fix a
   kernel memory access failure

 - fix a typo issue in kvm_vm_init_features()

* tag 'loongarch-fixes-7.0-1' of git://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson:
  LoongArch: KVM: Fix typo issue in kvm_vm_init_features()
  LoongArch: BPF: Make arch_protect_bpf_trampoline() return 0
  LoongArch: No need to flush icache if text copy failed
  LoongArch: Check return values for set_memory_{rw,rox}
  LoongArch: Give more information if kmem access failed
  LoongArch: Fix calling smp_processor_id() in preemptible code
  LoongArch: Only use SC.Q when supported by the assembler
2026-03-18 08:06:30 -07:00
Mark Brown
5e4ed0320b ASoC: fix usage of playback_only and capture_only
Shengjiu Wang <shengjiu.wang@nxp.com> says:

Check value of is_playback_only and is_capture_only in
graph_util_parse_link_direction() and initialize playback_only and
capture_only in imx-card.c
2026-03-18 13:26:52 +00:00
Shengjiu Wang
ca67bd564e ASoC: fsl: imx-card: initialize playback_only and capture_only
Fix uninitialized variable playback_only and capture_only because
graph_util_parse_link_direction() may not write them.

Fixes: 1877c3e793 ("ASoC: imx-card: Add playback_only or capture_only support")
Suggested-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Acked-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com>
Link: https://patch.msgid.link/20260318102850.2794029-3-shengjiu.wang@nxp.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-18 13:26:50 +00:00
Shengjiu Wang
0e9fc79132 ASoC: simple-card-utils: Check value of is_playback_only and is_capture_only
The audio-graph-card2 gets the value of 'playback-only' and
'capture_only' property in below sequence, if there is 'playback_only' or
'capture_only' property in port_cpu and port_codec nodes, but no these
properties in ep_cpu and ep_codec nodes, the value of playback_only and
capture_only will be flushed to zero in the end.

graph_util_parse_link_direction(lnk,            &playback_only, &capture_only);
graph_util_parse_link_direction(ports_cpu,      &playback_only, &capture_only);
graph_util_parse_link_direction(ports_codec,    &playback_only, &capture_only);
graph_util_parse_link_direction(port_cpu,       &playback_only, &capture_only);
graph_util_parse_link_direction(port_codec,     &playback_only, &capture_only);
graph_util_parse_link_direction(ep_cpu,         &playback_only, &capture_only);
graph_util_parse_link_direction(ep_codec,       &playback_only, &capture_only);

So check the value of is_playback_only and is_capture_only in
graph_util_parse_link_direction() function, if they are true, then rewrite
the values, and no need to check the np variable as
of_property_read_bool() will ignore if it was NULL.

Fixes: 3cc393d223 ("ASoC: simple-card-utils: Fix pointer check in graph_util_parse_link_direction")
Fixes: 22a507d768 ("ASoC: simple-card-utils: Check device node before overwrite direction")
Suggested-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Acked-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com>
Link: https://patch.msgid.link/20260318102850.2794029-2-shengjiu.wang@nxp.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-18 13:26:49 +00:00
Arnd Bergmann
df3ef89d7e Merge tag 'scmi-fixes-7.0' of git://git.kernel.org/pub/scm/linux/kernel/git/sudeep.holla/linux into arm/fixes
Arm SCMI fixes for v7.0

Few fixes to:
1. Address a NULL dereference in the SCMI notify error path by ensurin
   __scmi_event_handler_get_ops() consistently returns an ERR_PTR on
   failure, as expected by callers.
2. Fix a device_node reference leak in the SCPI probe path by introducing
   scope-based cleanup for acquired DT nodes.
3. Correct minor spelling errors.

* tag 'scmi-fixes-7.0' of git://git.kernel.org/pub/scm/linux/kernel/git/sudeep.holla/linux:
  firmware: arm_scmi: Spelling s/mulit/multi/, s/currenly/currently/
  firmware: arm_scmi: Fix NULL dereference on notify error path
  firmware: arm_scpi: Fix device_node reference leak in probe path

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
2026-03-18 14:06:34 +01:00
Arnd Bergmann
dfc80d650b Merge tag 'ffa-fix-7.0' of git://git.kernel.org/pub/scm/linux/kernel/git/sudeep.holla/linux into arm/fixes
Arm FF-A fix for v7.0

Fix removing the vm_id argument from ffa_rxtx_unmap(), as the FF-A
specification mandates this field be zero in all contexts except a
non-secure physical FF-A instance, where the ID is inherently 0.

* tag 'ffa-fix-7.0' of git://git.kernel.org/pub/scm/linux/kernel/git/sudeep.holla/linux:
  firmware: arm_ffa: Remove vm_id argument in ffa_rxtx_unmap()

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
2026-03-18 14:05:31 +01:00
Arnd Bergmann
b3315ba042 Merge tag 'tee-fix-for-v7.0' of git://git.kernel.org/pub/scm/linux/kernel/git/jenswi/linux-tee into arm/fixes
TEE shared memory update for 7.0

Remove refcounting of kernel pages in register_shm_helper() to support
slab allocations.

* tag 'tee-fix-for-v7.0' of git://git.kernel.org/pub/scm/linux/kernel/git/jenswi/linux-tee:
  tee: shm: Remove refcounting of kernel pages

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
2026-03-18 13:29:06 +01:00
Ming Qian
e8d97c270c media: verisilicon: Fix kernel panic due to __initconst misuse
Fix a kernel panic when probing the driver as a module:

  Unable to handle kernel paging request at virtual address
  ffffd9c18eb05000
  of_find_matching_node_and_match+0x5c/0x1a0
  hantro_probe+0x2f4/0x7d0 [hantro_vpu]

The imx8mq_vpu_shared_resources array is referenced by variant
structures through their shared_devices field. When built as a
module, __initconst causes this data to be freed after module
init, but it's later accessed during probe, causing a page fault.

The imx8mq_vpu_shared_resources is referenced from non-init code,
so keeping __initconst or __initconst_or_module here is wrong.

Drop the __initconst annotation and let it live in the normal .rodata
section.

A bug of __initconst called from regular non-init probe code
leading to bugs during probe deferrals or during unbind-bind cycles.

Reported-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
Closes: https://lore.kernel.org/all/68ef934f-baa0-4bf6-93d8-834bbc441e66@kernel.org/
Reported-by: Franz Schnyder <franz.schnyder@toradex.com>
Closes: https://lore.kernel.org/all/n3qmcb62tepxltoskpf7ws6yiirc2so62ia23b42rj3wlmpl67@rvkbuirx7kkp/
Fixes: e0203ddf9a ("media: verisilicon: Avoid G2 bus error while decoding H.264 and HEVC")
Suggested-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
Suggested-by: Marco Felsch <m.felsch@pengutronix.de>
Reviewed-by: Marco Felsch <m.felsch@pengutronix.de>
Signed-off-by: Ming Qian <ming.qian@oss.nxp.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
Cc: stable@kernel.org
Signed-off-by: Nicolas Dufresne <nicolas.dufresne@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil+cisco@kernel.org>
2026-03-18 13:17:25 +01:00
Daniel Lezcano
8306a78a1c ALSA: usb-audio: qcom: Fix the license marking
The Copyright for Qualcomm changed its format and replaces the old
Qualcomm Innovative Center by Qualcomm Technology Inc.

Signed-off-by: Daniel Lezcano <daniel.lezcano@oss.qualcomm.com>
Link: https://patch.msgid.link/20260317180943.3062085-1-daniel.lezcano@oss.qualcomm.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-03-18 12:37:05 +01:00
Frank Zhang
b8bee48e38 ALSA:usb:qcom: add AUXILIARY_BUS to Kconfig dependencies
The build can fail with:

ERROR: modpost: "__auxiliary_driver_register"
[sound/usb/qcom/snd-usb-audio-qmi.ko] undefined!
ERROR: modpost: "auxiliary_driver_unregister"
[sound/usb/qcom/snd-usb-audio-qmi.ko] undefined!

Select AUXILIARY_BUS when SND_USB_AUDIO_QMI is enabled.

Signed-off-by: Frank Zhang <rmxpzlb@gmail.com>
Link: https://patch.msgid.link/20260317102527.556248-1-rmxpzlb@gmail.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-03-18 12:36:39 +01:00
Pratap Nirujogi
e7648ffecb ACPI: bus: Fix MFD child automatic modprobe issue
MFD child devices sharing parent's ACPI Companion fails to probe as
acpi_companion_match() returns incompatible ACPI Companion handle for
binding with the check for pnp.type.backlight added recently. Remove this
pnp.type.backlight check in acpi_companion_match() to fix the automatic
modprobe issue.

Fixes: 7a7a7ed5f8bdb ("ACPI: scan: Register platform devices for backlight device objects")
Signed-off-by: Pratap Nirujogi <pratap.nirujogi@amd.com>
Link: https://patch.msgid.link/20260318034842.1216536-1-pratap.nirujogi@amd.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2026-03-18 11:57:22 +01:00
Rafael J. Wysocki
bf504b229c ACPI: processor: Fix previous acpi_processor_errata_piix4() fix
After commi f132e089fe ("ACPI: processor: Fix NULL-pointer dereference
in acpi_processor_errata_piix4()"), device pointers may be dereferenced
after dropping references to the device objects pointed to by them,
which may cause a use-after-free to occur.

Moreover, debug messages about enabling the errata may be printed
if the errata flags corresponding to them are unset.

Address all of these issues by moving message printing to the points
in the code where the errata flags are set.

Fixes: f132e089fe ("ACPI: processor: Fix NULL-pointer dereference in acpi_processor_errata_piix4()")
Reported-by: Guenter Roeck <linux@roeck-us.net>
Closes: https://lore.kernel.org/linux-acpi/938e2206-def5-4b7a-9b2c-d1fd37681d8a@roeck-us.net/
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Link: https://patch.msgid.link/5975693.DvuYhMxLoT@rafael.j.wysocki
2026-03-18 11:53:07 +01:00
Arnd Bergmann
c03b7dec3c media: rkvdec: reduce stack usage in rkvdec_init_v4l2_vp9_count_tbl()
The deeply nested loop in rkvdec_init_v4l2_vp9_count_tbl() needs a lot
of registers, so when the clang register allocator runs out, it ends up
spilling countless temporaries to the stack:

drivers/media/platform/rockchip/rkvdec/rkvdec-vp9.c:966:12: error: stack frame size (1472) exceeds limit (1280) in 'rkvdec_vp9_start' [-Werror,-Wframe-larger-than]

Marking this function as noinline_for_stack keeps it out of
rkvdec_vp9_start(), giving the compiler more room for optimization.

The resulting code is good enough that both the total stack usage
and the loop get enough better to stay under the warning limit,
though it's still slow, and would need a larger rework if this
function ends up being called in a fast path.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Nicolas Dufresne <nicolas.dufresne@collabora.com>
Signed-off-by: Nicolas Dufresne <nicolas.dufresne@collabora.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
2026-03-18 11:21:31 +01:00
Arnd Bergmann
446c6a25a4 media: rkvdec: reduce excessive stack usage in assemble_hw_pps()
The rkvdec_pps had a large set of bitfields, all of which
as misaligned. This causes clang-21 and likely other versions to
produce absolutely awful object code and a warning about very
large stack usage, on targets without unaligned access:

drivers/media/platform/rockchip/rkvdec/rkvdec-vp9.c:966:12: error: stack frame size (1472) exceeds limit (1280) in 'rkvdec_vp9_start' [-Werror,-Wframe-larger-than]

Part of the problem here is how all the bitfield accesses are
inlined into a function that already has large structures on
the stack.

Mark set_field_order_cnt() as noinline_for_stack, and split out
the following accesses in assemble_hw_pps() into another noinline
function, both of which now using around 800 bytes of stack in the
same configuration.

There is clearly still something wrong with clang here, but
splitting it into multiple functions reduces the risk of stack
overflow.

Fixes: fde2490757 ("media: rkvdec: Add H264 support for the VDPU383 variant")
Link: https://godbolt.org/z/acP1eKeq9
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Nicolas Dufresne <nicolas.dufresne@collabora.com>
Signed-off-by: Nicolas Dufresne <nicolas.dufresne@collabora.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
2026-03-18 11:21:31 +01:00
Detlev Casanova
daa87ca426 media: rkvdec: Improve handling missing short/long term RPS
The values of ext_sps_st_rps and ext_sps_lt_rps in struct rkvdec_hevc_run
are not initialized when the respective controls are not set by userspace.

When this is the case, set them to NULL so the rkvdec_hevc_run_preamble
function that parses controls does not access garbage data which leads to
a panic on unaccessible memory.

Fixes: c9a59dc2ac ("media: rkvdec: Add HEVC support for the VDPU381 variant")
Reported-by: Christian Hewitt <christianshewitt@gmail.com>
Suggested-by: Jonas Karlman <jonas@kwiboo.se>
Signed-off-by: Detlev Casanova <detlev.casanova@collabora.com>
Tested-by: Christian Hewitt <christianshewitt@gmail.com>
Reviewed-by: Nicolas Dufresne <nicolas.dufresne@collabora.com>
Signed-off-by: Nicolas Dufresne <nicolas.dufresne@collabora.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
2026-03-18 11:21:31 +01:00
Yuchan Nam
bef4f4a88b media: mc, v4l2: serialize REINIT and REQBUFS with req_queue_mutex
MEDIA_REQUEST_IOC_REINIT can run concurrently with VIDIOC_REQBUFS(0)
queue teardown paths. This can race request object cleanup against vb2
queue cancellation and lead to use-after-free reports.

We already serialize request queueing against STREAMON/OFF with
req_queue_mutex. Extend that serialization to REQBUFS, and also take
the same mutex in media_request_ioctl_reinit() so REINIT is in the
same exclusion domain.

This keeps request cleanup and queue cancellation from running in
parallel for request-capable devices.

Fixes: 6093d3002e ("media: vb2: keep a reference to the request until dqbuf")
Cc: stable@vger.kernel.org
Signed-off-by: Yuchan Nam <entropy1110@gmail.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
2026-03-18 11:21:31 +01:00
Michael Riesch
ac62a20035 media: synopsys: csi2rx: add missing kconfig dependency
Fix "ERROR: modpost: "phy_mipi_dphy_get_default_config_for_hsclk"
[drivers/media/platform/synopsys/dw-mipi-csi2rx.ko] undefined!" by
selecting GENERIC_PHY_MIPI_DPHY in the Kconfig entry.

Fixes: 355a110040 ("media: synopsys: add driver for the designware mipi csi-2 receiver")
Cc: stable@kernel.org
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202602130253.BZnVd4jh-lkp@intel.com/
Signed-off-by: Michael Riesch <michael.riesch@collabora.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
2026-03-18 11:21:31 +01:00
Michael Riesch
9232fa0592 media: synopsys: csi2rx: fix out-of-bounds check for formats array
The out-of-bounds check for the format array is off by one. Fix the
check.

Fixes: 355a110040 ("media: synopsys: add driver for the designware mipi csi-2 receiver")
Cc: stable@kernel.org
Suggested-by: Dan Carpenter <dan.carpenter@linaro.org>
Signed-off-by: Michael Riesch <michael.riesch@collabora.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
2026-03-18 11:21:30 +01:00
Shiraz Saleem
e37afcb56a RDMA/irdma: Harden depth calculation functions
An issue was exposed where OS can pass in U32_MAX for SQ/RQ/SRQ size.
This can cause integer overflow and truncation of SQ/RQ/SRQ depth
returning a success when it should have failed.

Harden the functions to do all depth calculations and boundary
checking in u64 sizes.

Fixes: 563e1feb5f ("RDMA/irdma: Add SRQ support")
Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Signed-off-by: Tatyana Nikolova <tatyana.e.nikolova@intel.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
2026-03-18 06:20:53 -04:00
Tatyana Nikolova
7221f581ee RDMA/irdma: Return EINVAL for invalid arp index error
When rdma_connect() fails due to an invalid arp index, user space rdma core
reports ENOMEM which is confusing. Modify irdma_make_cm_node() to return the
correct error code.

Fixes: 146b9756f1 ("RDMA/irdma: Add connection manager")
Signed-off-by: Tatyana Nikolova <tatyana.e.nikolova@intel.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
2026-03-18 06:20:53 -04:00
Anil Samal
6f52370970 RDMA/irdma: Fix deadlock during netdev reset with active connections
Resolve deadlock that occurs when user executes netdev reset while RDMA
applications (e.g., rping) are active. The netdev reset causes ice
driver to remove irdma auxiliary driver, triggering device_delete and
subsequent client removal. During client removal, uverbs_client waits
for QP reference count to reach zero while cma_client holds the final
reference, creating circular dependency and indefinite wait in iWARP
mode. Skip QP reference count wait during device reset to prevent
deadlock.

Fixes: c8f304d75f ("RDMA/irdma: Prevent QP use after free")
Signed-off-by: Anil Samal <anil.samal@intel.com>
Signed-off-by: Tatyana Nikolova <tatyana.e.nikolova@intel.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
2026-03-18 06:20:53 -04:00
Tatyana Nikolova
c45c6ebd69 RDMA/irdma: Remove reset check from irdma_modify_qp_to_err()
During reset, irdma_modify_qp() to error should be called to disconnect
the QP. Without this fix, if not preceded by irdma_modify_qp() to error, the
API call irdma_destroy_qp() gets stuck waiting for the QP refcount to go
to zero, because the cm_node associated with this QP isn't disconnected.

Fixes: 915cc7ac0f ("RDMA/irdma: Add miscellaneous utility definitions")
Signed-off-by: Tatyana Nikolova <tatyana.e.nikolova@intel.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
2026-03-18 06:20:53 -04:00
Ivan Barrera
b415399c9a RDMA/irdma: Clean up unnecessary dereference of event->cm_node
The cm_node is available and the usage of cm_node and event->cm_node
seems arbitrary. Clean up unnecessary dereference of event->cm_node.

Fixes: 146b9756f1 ("RDMA/irdma: Add connection manager")
Signed-off-by: Ivan Barrera <ivan.d.barrera@intel.com>
Signed-off-by: Tatyana Nikolova <tatyana.e.nikolova@intel.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
2026-03-18 06:20:53 -04:00
Tatyana Nikolova
5e8f023973 RDMA/irdma: Remove a NOP wait_event() in irdma_modify_qp_roce()
Remove a NOP wait_event() in irdma_modify_qp_roce() which is relevant
for iWARP and likely a copy and paste artifact for RoCEv2. The wait event
is for sending a reset on a TCP connection, after the reset has been
requested in irdma_modify_qp(), which occurs only in iWarp mode.

Fixes: b48c24c2d7 ("RDMA/irdma: Implement device supported verb APIs")
Signed-off-by: Tatyana Nikolova <tatyana.e.nikolova@intel.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
2026-03-18 06:20:53 -04:00
Tatyana Nikolova
8c1f19a222 RDMA/irdma: Update ibqp state to error if QP is already in error state
In irdma_modify_qp() update ibqp state to error if the irdma QP is already
in error state, otherwise the ibqp state which is visible to the consumer
app remains stale.

Fixes: b48c24c2d7 ("RDMA/irdma: Implement device supported verb APIs")
Signed-off-by: Tatyana Nikolova <tatyana.e.nikolova@intel.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
2026-03-18 06:20:53 -04:00
Jacob Moroni
11a95521fb RDMA/irdma: Initialize free_qp completion before using it
In irdma_create_qp, if ib_copy_to_udata fails, it will call
irdma_destroy_qp to clean up which will attempt to wait on
the free_qp completion, which is not initialized yet. Fix this
by initializing the completion before the ib_copy_to_udata call.

Fixes: b48c24c2d7 ("RDMA/irdma: Implement device supported verb APIs")
Signed-off-by: Jacob Moroni <jmoroni@google.com>
Signed-off-by: Tatyana Nikolova <tatyana.e.nikolova@intel.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
2026-03-18 06:20:53 -04:00
Rahul Bukte
0162ab3220 drm/i915/gt: Check set_default_submission() before deferencing
When the i915 driver firmware binaries are not present, the
set_default_submission pointer is not set. This pointer is
dereferenced during suspend anyways.

Add a check to make sure it is set before dereferencing.

[   23.289926] PM: suspend entry (deep)
[   23.293558] Filesystems sync: 0.000 seconds
[   23.298010] Freezing user space processes
[   23.302771] Freezing user space processes completed (elapsed 0.000 seconds)
[   23.309766] OOM killer disabled.
[   23.313027] Freezing remaining freezable tasks
[   23.318540] Freezing remaining freezable tasks completed (elapsed 0.001 seconds)
[   23.342038] serial 00:05: disabled
[   23.345719] serial 00:02: disabled
[   23.349342] serial 00:01: disabled
[   23.353782] sd 0:0:0:0: [sda] Synchronizing SCSI cache
[   23.358993] sd 1:0:0:0: [sdb] Synchronizing SCSI cache
[   23.361635] ata1.00: Entering standby power mode
[   23.368863] ata2.00: Entering standby power mode
[   23.445187] BUG: kernel NULL pointer dereference, address: 0000000000000000
[   23.452194] #PF: supervisor instruction fetch in kernel mode
[   23.457896] #PF: error_code(0x0010) - not-present page
[   23.463065] PGD 0 P4D 0
[   23.465640] Oops: Oops: 0010 [#1] SMP NOPTI
[   23.469869] CPU: 8 UID: 0 PID: 211 Comm: kworker/u48:18 Tainted: G S      W           6.19.0-rc4-00020-gf0b9d8eb98df #10 PREEMPT(voluntary)
[   23.482512] Tainted: [S]=CPU_OUT_OF_SPEC, [W]=WARN
[   23.496511] Workqueue: async async_run_entry_fn
[   23.501087] RIP: 0010:0x0
[   23.503755] Code: Unable to access opcode bytes at 0xffffffffffffffd6.
[   23.510324] RSP: 0018:ffffb4a60065fca8 EFLAGS: 00010246
[   23.515592] RAX: 0000000000000000 RBX: ffff9f428290e000 RCX: 000000000000000f
[   23.522765] RDX: 0000000000000000 RSI: 0000000000000282 RDI: ffff9f428290e000
[   23.529937] RBP: ffff9f4282907070 R08: ffff9f4281130428 R09: 00000000ffffffff
[   23.537111] R10: 0000000000000000 R11: 0000000000000001 R12: ffff9f42829070f8
[   23.544284] R13: ffff9f4282906028 R14: ffff9f4282900000 R15: ffff9f4282906b68
[   23.551457] FS:  0000000000000000(0000) GS:ffff9f466b2cf000(0000) knlGS:0000000000000000
[   23.559588] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   23.565365] CR2: ffffffffffffffd6 CR3: 000000031c230001 CR4: 0000000000f70ef0
[   23.572539] PKRU: 55555554
[   23.575281] Call Trace:
[   23.577770]  <TASK>
[   23.579905]  intel_engines_reset_default_submission+0x42/0x60
[   23.585695]  __intel_gt_unset_wedged+0x191/0x200
[   23.590360]  intel_gt_unset_wedged+0x20/0x40
[   23.594675]  gt_sanitize+0x15e/0x170
[   23.598290]  i915_gem_suspend_late+0x6b/0x180
[   23.602692]  i915_drm_suspend_late+0x35/0xf0
[   23.607008]  ? __pfx_pci_pm_suspend_late+0x10/0x10
[   23.611843]  dpm_run_callback+0x78/0x1c0
[   23.615817]  device_suspend_late+0xde/0x2e0
[   23.620037]  async_suspend_late+0x18/0x30
[   23.624082]  async_run_entry_fn+0x25/0xa0
[   23.628129]  process_one_work+0x15b/0x380
[   23.632182]  worker_thread+0x2a5/0x3c0
[   23.635973]  ? __pfx_worker_thread+0x10/0x10
[   23.640279]  kthread+0xf6/0x1f0
[   23.643464]  ? __pfx_kthread+0x10/0x10
[   23.647263]  ? __pfx_kthread+0x10/0x10
[   23.651045]  ret_from_fork+0x131/0x190
[   23.654837]  ? __pfx_kthread+0x10/0x10
[   23.658634]  ret_from_fork_asm+0x1a/0x30
[   23.662597]  </TASK>
[   23.664826] Modules linked in:
[   23.667914] CR2: 0000000000000000
[   23.671271] ------------[ cut here ]------------

Signed-off-by: Rahul Bukte <rahul.bukte@sony.com>
Reviewed-by: Suraj Kandpal <suraj.kandpal@intel.com>
Signed-off-by: Suraj Kandpal <suraj.kandpal@intel.com>
Link: https://patch.msgid.link/20260203044839.1555147-1-suraj.kandpal@intel.com
(cherry picked from commit daa199abc3d3d1740c9e3a2c3e9216ae5b447cad)
Fixes: ff44ad51eb ("drm/i915: Move engine->submit_request selection to a vfunc")
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
2026-03-18 11:53:20 +02:00
Joanne Koong
bd71fb3fea iomap: fix invalid folio access when i_blkbits differs from I/O granularity
Commit aa35dd5cbc ("iomap: fix invalid folio access after
folio_end_read()") partially addressed invalid folio access for folios
without an ifs attached, but it did not handle the case where
1 << inode->i_blkbits matches the folio size but is different from the
granularity used for the IO, which means IO can be submitted for less
than the full folio for the !ifs case.

In this case, the condition:

  if (*bytes_submitted == folio_len)
    ctx->cur_folio = NULL;

in iomap_read_folio_iter() will not invalidate ctx->cur_folio, and
iomap_read_end() will still be called on the folio even though the IO
helper owns it and will finish the read on it.

Fix this by unconditionally invalidating ctx->cur_folio for the !ifs
case.

Reported-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Tested-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/linux-fsdevel/b3dfe271-4e3d-4922-b618-e73731242bca@wdc.com/
Fixes: b2f35ac414 ("iomap: add caller-provided callbacks for read and readahead")
Cc: stable@vger.kernel.org
Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
Link: https://patch.msgid.link/20260317203935.830549-1-joannelkoong@gmail.com
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
2026-03-18 10:42:08 +01:00
Bill Wendling
e5966096d0 xfs: annotate struct xfs_attr_list_context with __counted_by_ptr
Add the `__counted_by_ptr` attribute to the `buffer` field of `struct
xfs_attr_list_context`. This field is used to point to a buffer of
size `bufsize`.

The `buffer` field is assigned in:
1. `xfs_ioc_attr_list` in `fs/xfs/xfs_handle.c`
2. `xfs_xattr_list` in `fs/xfs/xfs_xattr.c`
3. `xfs_getparents` in `fs/xfs/xfs_handle.c` (implicitly initialized to NULL)

In `xfs_ioc_attr_list`, `buffer` was assigned before `bufsize`. Reorder
them to ensure `bufsize` is set before `buffer` is assigned, although
no access happens between them.

In `xfs_xattr_list`, `buffer` was assigned before `bufsize`. Reorder
them to ensure `bufsize` is set before `buffer` is assigned.

In `xfs_getparents`, `buffer` is NULL (from zero initialization) and
remains NULL. `bufsize` is set to a non-zero value, but since `buffer`
is NULL, no access occurs.

In all cases, the pointer `buffer` is not accessed before `bufsize` is set.

This patch was generated by CodeMender and reviewed by Bill Wendling.
Tested by running xfstests.

Signed-off-by: Bill Wendling <morbo@google.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-03-18 09:54:39 +01:00
Christoph Hellwig
0c98524ab2 xfs: cleanup buftarg handling in XFS_IOC_VERIFY_MEDIA
The newly added XFS_IOC_VERIFY_MEDIA is a bit unusual in how it handles
buftarg fields.  Update it to be more in line with other XFS code:

 - use btp->bt_dev instead of btp->bt_bdev->bd_dev to retrieve the device
   number for tracing
 - use btp->bt_logical_sectorsize instead of
   bdev_logical_block_size(btp->bt_bdev) to retrieve the logical sector
   size
 - compare the buftarg and not the bdev to see if there is a separate
   log buftarg

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-03-18 09:52:33 +01:00
hongao
268378b6ad xfs: scrub: unlock dquot before early return in quota scrub
xchk_quota_item can return early after calling xchk_fblock_process_error.
When that helper returns false, the function returned immediately without
dropping dq->q_qlock, which can leave the dquot lock held and risk lock
leaks or deadlocks in later quota operations.

Fix this by unlocking dq->q_qlock before the early return.

Signed-off-by: hongao <hongao@uniontech.com>
Fixes: 7d1f0e167a ("xfs: check the ondisk space mapping behind a dquot")
Cc: <stable@vger.kernel.org> # v6.8
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-03-18 09:44:46 +01:00
Yuto Ohnuki
7cac609473 xfs: refactor xfsaild_push loop into helper
Factor the loop body of xfsaild_push() into a separate
xfsaild_process_logitem() helper to improve readability.

This is a pure code movement with no functional change.

Signed-off-by: Yuto Ohnuki <ytohnuki@amazon.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-03-18 09:40:31 +01:00
Yuto Ohnuki
394d70b86f xfs: save ailp before dropping the AIL lock in push callbacks
In xfs_inode_item_push() and xfs_qm_dquot_logitem_push(), the AIL lock
is dropped to perform buffer IO. Once the cluster buffer no longer
protects the log item from reclaim, the log item may be freed by
background reclaim or the dquot shrinker. The subsequent spin_lock()
call dereferences lip->li_ailp, which is a use-after-free.

Fix this by saving the ailp pointer in a local variable while the AIL
lock is held and the log item is guaranteed to be valid.

Reported-by: syzbot+652af2b3c5569c4ab63c@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=652af2b3c5569c4ab63c
Fixes: 90c60e1640 ("xfs: xfs_iflush() is no longer necessary")
Cc: stable@vger.kernel.org # v5.9
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Yuto Ohnuki <ytohnuki@amazon.com>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-03-18 09:40:31 +01:00
Yuto Ohnuki
79ef34ec05 xfs: avoid dereferencing log items after push callbacks
After xfsaild_push_item() calls iop_push(), the log item may have been
freed if the AIL lock was dropped during the push. Background inode
reclaim or the dquot shrinker can free the log item while the AIL lock
is not held, and the tracepoints in the switch statement dereference
the log item after iop_push() returns.

Fix this by capturing the log item type, flags, and LSN before calling
xfsaild_push_item(), and introducing a new xfs_ail_push_class trace
event class that takes these pre-captured values and the ailp pointer
instead of the log item pointer.

Reported-by: syzbot+652af2b3c5569c4ab63c@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=652af2b3c5569c4ab63c
Fixes: 90c60e1640 ("xfs: xfs_iflush() is no longer necessary")
Cc: stable@vger.kernel.org # v5.9
Signed-off-by: Yuto Ohnuki <ytohnuki@amazon.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-03-18 09:40:31 +01:00
Yuto Ohnuki
4f24a767e3 xfs: stop reclaim before pushing AIL during unmount
The unmount sequence in xfs_unmount_flush_inodes() pushed the AIL while
background reclaim and inodegc are still running. This is broken
independently of any use-after-free issues - background reclaim and
inodegc should not be running while the AIL is being pushed during
unmount, as inodegc can dirty and insert inodes into the AIL during the
flush, and background reclaim can race to abort and free dirty inodes.

Reorder xfs_unmount_flush_inodes() to stop inodegc and cancel background
reclaim before pushing the AIL. Stop inodegc before cancelling
m_reclaim_work because the inodegc worker can re-queue m_reclaim_work
via xfs_inodegc_set_reclaimable.

Reported-by: syzbot+652af2b3c5569c4ab63c@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=652af2b3c5569c4ab63c
Fixes: 90c60e1640 ("xfs: xfs_iflush() is no longer necessary")
Cc: stable@vger.kernel.org # v5.9
Signed-off-by: Yuto Ohnuki <ytohnuki@amazon.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-03-18 09:40:31 +01:00
Felix Fietkau
d5ad6ab61c wifi: mac80211: always free skb on ieee80211_tx_prepare_skb() failure
ieee80211_tx_prepare_skb() has three error paths, but only two of them
free the skb. The first error path (ieee80211_tx_prepare() returning
TX_DROP) does not free it, while invoke_tx_handlers() failure and the
fragmentation check both do.

Add kfree_skb() to the first error path so all three are consistent,
and remove the now-redundant frees in callers (ath9k, mt76,
mac80211_hwsim) to avoid double-free.

Document the skb ownership guarantee in the function's kdoc.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Link: https://patch.msgid.link/20260314065455.2462900-1-nbd@nbd.name
Fixes: 06be6b149f ("mac80211: add ieee80211_tx_prepare_skb() helper function")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2026-03-18 09:09:58 +01:00
Guenter Roeck
deb353d9bb wifi: wlcore: Return -ENOMEM instead of -EAGAIN if there is not enough headroom
Since upstream commit e75665dd09 ("wifi: wlcore: ensure skb headroom
before skb_push"), wl1271_tx_allocate() and with it
wl1271_prepare_tx_frame() returns -EAGAIN if pskb_expand_head() fails.
However, in wlcore_tx_work_locked(), a return value of -EAGAIN from
wl1271_prepare_tx_frame() is interpreted as the aggregation buffer being
full. This causes the code to flush the buffer, put the skb back at the
head of the queue, and immediately retry the same skb in a tight while
loop.

Because wlcore_tx_work_locked() holds wl->mutex, and the retry happens
immediately with GFP_ATOMIC, this will result in an infinite loop and a
CPU soft lockup. Return -ENOMEM instead so the packet is dropped and
the loop terminates.

The problem was found by an experimental code review agent based on
gemini-3.1-pro while reviewing backports into v6.18.y.

Assisted-by: Gemini:gemini-3.1-pro
Fixes: e75665dd09 ("wifi: wlcore: ensure skb headroom before skb_push")
Cc: Peter Astrand <astrand@lysator.liu.se>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Link: https://patch.msgid.link/20260318064636.3065925-1-linux@roeck-us.net
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2026-03-18 09:05:06 +01:00
Xiang Mei
c73bb9a2d3 wifi: mac80211: fix NULL deref in mesh_matches_local()
mesh_matches_local() unconditionally dereferences ie->mesh_config to
compare mesh configuration parameters. When called from
mesh_rx_csa_frame(), the parsed action-frame elements may not contain a
Mesh Configuration IE, leaving ie->mesh_config NULL and triggering a
kernel NULL pointer dereference.

The other two callers are already safe:
  - ieee80211_mesh_rx_bcn_presp() checks !elems->mesh_config before
    calling mesh_matches_local()
  - mesh_plink_get_event() is only reached through
    mesh_process_plink_frame(), which checks !elems->mesh_config, too

mesh_rx_csa_frame() is the only caller that passes raw parsed elements
to mesh_matches_local() without guarding mesh_config. An adjacent
attacker can exploit this by sending a crafted CSA action frame that
includes a valid Mesh ID IE but omits the Mesh Configuration IE,
crashing the kernel.

The captured crash log:

Oops: general protection fault, probably for non-canonical address ...
KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007]
Workqueue: events_unbound cfg80211_wiphy_work
[...]
Call Trace:
 <TASK>
 ? __pfx_mesh_matches_local (net/mac80211/mesh.c:65)
 ieee80211_mesh_rx_queued_mgmt (net/mac80211/mesh.c:1686)
 [...]
 ieee80211_iface_work (net/mac80211/iface.c:1754 net/mac80211/iface.c:1802)
 [...]
 cfg80211_wiphy_work (net/wireless/core.c:426)
 process_one_work (net/kernel/workqueue.c:3280)
 ? assign_work (net/kernel/workqueue.c:1219)
 worker_thread (net/kernel/workqueue.c:3352)
 ? __pfx_worker_thread (net/kernel/workqueue.c:3385)
 kthread (net/kernel/kthread.c:436)
 [...]
 ret_from_fork_asm (net/arch/x86/entry/entry_64.S:255)
 </TASK>

This patch adds a NULL check for ie->mesh_config at the top of
mesh_matches_local() to return false early when the Mesh Configuration
IE is absent.

Fixes: 2e3c873682 ("mac80211: support functions for mesh")
Reported-by: Weiming Shi <bestswngs@gmail.com>
Signed-off-by: Xiang Mei <xmei5@asu.edu>
Link: https://patch.msgid.link/20260318034244.2595020-1-xmei5@asu.edu
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2026-03-18 09:01:16 +01:00
Geoffrey D. Bennett
8780f561f6 ALSA: usb-audio: Exclude Scarlett 2i2 1st Gen from SKIP_IFACE_SETUP
The Focusrite Scarlett 2i2 1st Gen (1235:8006) produces
distorted/silent audio when QUIRK_FLAG_SKIP_IFACE_SETUP is active, as
that flag causes the feedback format to be detected as 17.15 instead
of 16.16.

Add a DEVICE_FLG entry for this device before the Focusrite VENDOR_FLG
entry so that it gets no quirk flags, overriding the vendor-wide
SKIP_IFACE_SETUP. This device doesn't have the internal mixer, Air, or
Safe modes that the quirk was designed to protect.

Fixes: 38c322068a ("ALSA: usb-audio: Add QUIRK_FLAG_SKIP_IFACE_SETUP")
Reported-by: pairomaniac [https://github.com/geoffreybennett/linux-fcp/issues/54]
Tested-by: pairomaniac [https://github.com/geoffreybennett/linux-fcp/issues/54]
Signed-off-by: Geoffrey D. Bennett <g@b4.vu>
Link: https://patch.msgid.link/abmsTjKmQMKbhYtK@m.b4.vu
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-03-18 08:20:43 +01:00
Hyunwoo Kim
b425e4d0eb ksmbd: fix use-after-free in durable v2 replay of active file handles
parse_durable_handle_context() unconditionally assigns dh_info->fp->conn
to the current connection when handling a DURABLE_REQ_V2 context with
SMB2_FLAGS_REPLAY_OPERATION. ksmbd_lookup_fd_cguid() does not filter by
fp->conn, so it returns file handles that are already actively connected.
The unconditional overwrite replaces fp->conn, and when the overwriting
connection is subsequently freed, __ksmbd_close_fd() dereferences the
stale fp->conn via spin_lock(&fp->conn->llist_lock), causing a
use-after-free.

KASAN report:

[    7.349357] ==================================================================
[    7.349607] BUG: KASAN: slab-use-after-free in _raw_spin_lock+0x75/0xe0
[    7.349811] Write of size 4 at addr ffff8881056ac18c by task kworker/1:2/108
[    7.350010]
[    7.350064] CPU: 1 UID: 0 PID: 108 Comm: kworker/1:2 Not tainted 7.0.0-rc3+ #58 PREEMPTLAZY
[    7.350068] Hardware name: QEMU Ubuntu 24.04 PC v2 (i440FX + PIIX, arch_caps fix, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
[    7.350070] Workqueue: ksmbd-io handle_ksmbd_work
[    7.350083] Call Trace:
[    7.350087]  <TASK>
[    7.350087]  dump_stack_lvl+0x64/0x80
[    7.350094]  print_report+0xce/0x660
[    7.350100]  ? __pfx__raw_spin_lock_irqsave+0x10/0x10
[    7.350101]  ? __pfx___mod_timer+0x10/0x10
[    7.350106]  ? _raw_spin_lock+0x75/0xe0
[    7.350108]  kasan_report+0xce/0x100
[    7.350109]  ? _raw_spin_lock+0x75/0xe0
[    7.350114]  kasan_check_range+0x105/0x1b0
[    7.350116]  _raw_spin_lock+0x75/0xe0
[    7.350118]  ? __pfx__raw_spin_lock+0x10/0x10
[    7.350119]  ? __call_rcu_common.constprop.0+0x25e/0x780
[    7.350125]  ? close_id_del_oplock+0x2cc/0x4e0
[    7.350128]  __ksmbd_close_fd+0x27f/0xaf0
[    7.350131]  ksmbd_close_fd+0x135/0x1b0
[    7.350133]  smb2_close+0xb19/0x15b0
[    7.350142]  ? __pfx_smb2_close+0x10/0x10
[    7.350143]  ? xas_load+0x18/0x270
[    7.350146]  ? _raw_spin_lock+0x84/0xe0
[    7.350148]  ? __pfx__raw_spin_lock+0x10/0x10
[    7.350150]  ? _raw_spin_unlock+0xe/0x30
[    7.350151]  ? ksmbd_smb2_check_message+0xeb2/0x24c0
[    7.350153]  ? ksmbd_tree_conn_lookup+0xcd/0xf0
[    7.350154]  handle_ksmbd_work+0x40f/0x1080
[    7.350156]  process_one_work+0x5fa/0xef0
[    7.350162]  ? assign_work+0x122/0x3e0
[    7.350163]  worker_thread+0x54b/0xf70
[    7.350165]  ? __pfx_worker_thread+0x10/0x10
[    7.350166]  kthread+0x346/0x470
[    7.350170]  ? recalc_sigpending+0x19b/0x230
[    7.350176]  ? __pfx_kthread+0x10/0x10
[    7.350178]  ret_from_fork+0x4fb/0x6c0
[    7.350183]  ? __pfx_ret_from_fork+0x10/0x10
[    7.350185]  ? __switch_to+0x36c/0xbe0
[    7.350188]  ? __pfx_kthread+0x10/0x10
[    7.350190]  ret_from_fork_asm+0x1a/0x30
[    7.350197]  </TASK>
[    7.350197]
[    7.355160] Allocated by task 123:
[    7.355261]  kasan_save_stack+0x33/0x60
[    7.355373]  kasan_save_track+0x14/0x30
[    7.355484]  __kasan_kmalloc+0x8f/0xa0
[    7.355593]  ksmbd_conn_alloc+0x44/0x6d0
[    7.355711]  ksmbd_kthread_fn+0x243/0xd70
[    7.355839]  kthread+0x346/0x470
[    7.355942]  ret_from_fork+0x4fb/0x6c0
[    7.356051]  ret_from_fork_asm+0x1a/0x30
[    7.356164]
[    7.356214] Freed by task 134:
[    7.356305]  kasan_save_stack+0x33/0x60
[    7.356416]  kasan_save_track+0x14/0x30
[    7.356527]  kasan_save_free_info+0x3b/0x60
[    7.356646]  __kasan_slab_free+0x43/0x70
[    7.356761]  kfree+0x1ca/0x430
[    7.356862]  ksmbd_tcp_disconnect+0x59/0xe0
[    7.356993]  ksmbd_conn_handler_loop+0x77e/0xd40
[    7.357138]  kthread+0x346/0x470
[    7.357240]  ret_from_fork+0x4fb/0x6c0
[    7.357350]  ret_from_fork_asm+0x1a/0x30
[    7.357463]
[    7.357513] The buggy address belongs to the object at ffff8881056ac000
[    7.357513]  which belongs to the cache kmalloc-1k of size 1024
[    7.357857] The buggy address is located 396 bytes inside of
[    7.357857]  freed 1024-byte region [ffff8881056ac000, ffff8881056ac400)

Fix by removing the unconditional fp->conn assignment and rejecting the
replay when fp->conn is non-NULL. This is consistent with
ksmbd_lookup_durable_fd(), which also rejects file handles with a
non-NULL fp->conn. For disconnected file handles (fp->conn == NULL),
ksmbd_reopen_durable_fd() handles setting fp->conn.

Fixes: c8efcc7861 ("ksmbd: add support for durable handles v1/v2")
Signed-off-by: Hyunwoo Kim <imv4bel@gmail.com>
Acked-by: Namjae Jeon <linkinjeon@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-03-17 21:45:29 -05:00
Hyunwoo Kim
c33615f995 ksmbd: fix use-after-free of share_conf in compound request
smb2_get_ksmbd_tcon() reuses work->tcon in compound requests without
validating tcon->t_state. ksmbd_tree_conn_lookup() checks t_state ==
TREE_CONNECTED on the initial lookup path, but the compound reuse path
bypasses this check entirely.

If a prior command in the compound (SMB2_TREE_DISCONNECT) sets t_state
to TREE_DISCONNECTED and frees share_conf via ksmbd_share_config_put(),
subsequent commands dereference the freed share_conf through
work->tcon->share_conf.

KASAN report:

[    4.144653] ==================================================================
[    4.145059] BUG: KASAN: slab-use-after-free in smb2_write+0xc74/0xe70
[    4.145415] Read of size 4 at addr ffff88810430c194 by task kworker/1:1/44
[    4.145772]
[    4.145867] CPU: 1 UID: 0 PID: 44 Comm: kworker/1:1 Not tainted 7.0.0-rc3+ #60 PREEMPTLAZY
[    4.145871] Hardware name: QEMU Ubuntu 24.04 PC v2 (i440FX + PIIX, arch_caps fix, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
[    4.145875] Workqueue: ksmbd-io handle_ksmbd_work
[    4.145888] Call Trace:
[    4.145892]  <TASK>
[    4.145894]  dump_stack_lvl+0x64/0x80
[    4.145910]  print_report+0xce/0x660
[    4.145919]  ? __pfx__raw_spin_lock_irqsave+0x10/0x10
[    4.145928]  ? smb2_write+0xc74/0xe70
[    4.145931]  kasan_report+0xce/0x100
[    4.145934]  ? smb2_write+0xc74/0xe70
[    4.145937]  smb2_write+0xc74/0xe70
[    4.145939]  ? __pfx_smb2_write+0x10/0x10
[    4.145942]  ? _raw_spin_unlock+0xe/0x30
[    4.145945]  ? ksmbd_smb2_check_message+0xeb2/0x24c0
[    4.145948]  ? smb2_tree_disconnect+0x31c/0x480
[    4.145951]  handle_ksmbd_work+0x40f/0x1080
[    4.145953]  process_one_work+0x5fa/0xef0
[    4.145962]  ? assign_work+0x122/0x3e0
[    4.145964]  worker_thread+0x54b/0xf70
[    4.145967]  ? __pfx_worker_thread+0x10/0x10
[    4.145970]  kthread+0x346/0x470
[    4.145976]  ? recalc_sigpending+0x19b/0x230
[    4.145980]  ? __pfx_kthread+0x10/0x10
[    4.145984]  ret_from_fork+0x4fb/0x6c0
[    4.145992]  ? __pfx_ret_from_fork+0x10/0x10
[    4.145995]  ? __switch_to+0x36c/0xbe0
[    4.145999]  ? __pfx_kthread+0x10/0x10
[    4.146003]  ret_from_fork_asm+0x1a/0x30
[    4.146013]  </TASK>
[    4.146014]
[    4.149858] Allocated by task 44:
[    4.149953]  kasan_save_stack+0x33/0x60
[    4.150061]  kasan_save_track+0x14/0x30
[    4.150169]  __kasan_kmalloc+0x8f/0xa0
[    4.150274]  ksmbd_share_config_get+0x1dd/0xdd0
[    4.150401]  ksmbd_tree_conn_connect+0x7e/0x600
[    4.150529]  smb2_tree_connect+0x2e6/0x1000
[    4.150645]  handle_ksmbd_work+0x40f/0x1080
[    4.150761]  process_one_work+0x5fa/0xef0
[    4.150873]  worker_thread+0x54b/0xf70
[    4.150978]  kthread+0x346/0x470
[    4.151071]  ret_from_fork+0x4fb/0x6c0
[    4.151176]  ret_from_fork_asm+0x1a/0x30
[    4.151286]
[    4.151332] Freed by task 44:
[    4.151418]  kasan_save_stack+0x33/0x60
[    4.151526]  kasan_save_track+0x14/0x30
[    4.151634]  kasan_save_free_info+0x3b/0x60
[    4.151751]  __kasan_slab_free+0x43/0x70
[    4.151861]  kfree+0x1ca/0x430
[    4.151952]  __ksmbd_tree_conn_disconnect+0xc8/0x190
[    4.152088]  smb2_tree_disconnect+0x1cd/0x480
[    4.152211]  handle_ksmbd_work+0x40f/0x1080
[    4.152326]  process_one_work+0x5fa/0xef0
[    4.152438]  worker_thread+0x54b/0xf70
[    4.152545]  kthread+0x346/0x470
[    4.152638]  ret_from_fork+0x4fb/0x6c0
[    4.152743]  ret_from_fork_asm+0x1a/0x30
[    4.152853]
[    4.152900] The buggy address belongs to the object at ffff88810430c180
[    4.152900]  which belongs to the cache kmalloc-96 of size 96
[    4.153226] The buggy address is located 20 bytes inside of
[    4.153226]  freed 96-byte region [ffff88810430c180, ffff88810430c1e0)
[    4.153549]
[    4.153596] The buggy address belongs to the physical page:
[    4.153750] page: refcount:0 mapcount:0 mapping:0000000000000000 index:0xffff88810430ce80 pfn:0x10430c
[    4.154000] flags: 0x100000000000200(workingset|node=0|zone=2)
[    4.154160] page_type: f5(slab)
[    4.154251] raw: 0100000000000200 ffff888100041280 ffff888100040110 ffff888100040110
[    4.154461] raw: ffff88810430ce80 0000000800200009 00000000f5000000 0000000000000000
[    4.154668] page dumped because: kasan: bad access detected
[    4.154820]
[    4.154866] Memory state around the buggy address:
[    4.155002]  ffff88810430c080: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[    4.155196]  ffff88810430c100: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[    4.155391] >ffff88810430c180: fa fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc
[    4.155587]                          ^
[    4.155693]  ffff88810430c200: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[    4.155891]  ffff88810430c280: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[    4.156087] ==================================================================

Add the same t_state validation to the compound reuse path, consistent
with ksmbd_tree_conn_lookup().

Fixes: 5005bcb421 ("ksmbd: validate session id and tree id in the compound request")
Signed-off-by: Hyunwoo Kim <imv4bel@gmail.com>
Acked-by: Namjae Jeon <linkinjeon@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-03-17 21:45:29 -05:00
Namjae Jeon
3a64125730 ksmbd: use volume UUID in FS_OBJECT_ID_INFORMATION
Use sb->s_uuid for a proper volume identifier as the primary choice.
For filesystems that do not provide a UUID, fall back to stfs.f_fsid
obtained from vfs_statfs().

Cc: stable@vger.kernel.org
Reported-by: Hyunwoo Kim <imv4bel@gmail.com>
Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-03-17 21:45:29 -05:00
Namjae Jeon
282343cf8a ksmbd: unset conn->binding on failed binding request
When a multichannel SMB2_SESSION_SETUP request with
SMB2_SESSION_REQ_FLAG_BINDING fails ksmbd sets conn->binding = true
but never clears it on the error path. This leaves the connection in
a binding state where all subsequent ksmbd_session_lookup_all() calls
fall back to the global sessions table. This fix it by clearing
conn->binding = false in the error path.

Cc: stable@vger.kernel.org
Reported-by: Hyunwoo Kim <imv4bel@gmail.com>
Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-03-17 21:45:29 -05:00
Nicholas Carlini
5258572aa5 ksmbd: fix share_conf UAF in tree_conn disconnect
__ksmbd_tree_conn_disconnect() drops the share_conf reference before
checking tree_conn->refcount. When someone uses SMB3 multichannel and
binds two connections to one session, a SESSION_LOGOFF on connection A
calls ksmbd_conn_wait_idle(conn) which only drains connection A's
request counter, not connection B's. This means there's a race condition:
requests already dispatched on connection B hold tree_conn references via
work->tcon. The disconnect path frees share_conf while those requests
are still walking work->tcon->share_conf, causing a use-after-free.

This fix combines the share_conf put with the tree_conn free so it
only happens when the last reference is dropped.

Fixes: b39a1833cc ("ksmbd: fix use-after-free in ksmbd_tree_connect_put under concurrency")
Signed-off-by: Nicholas Carlini <nicholas@carlini.com>
Acked-by: Namjae Jeon <linkinjeon@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-03-17 21:45:29 -05:00
Junrui Luo
64dcbde7f8 bnxt_en: fix OOB access in DBG_BUF_PRODUCER async event handler
The ASYNC_EVENT_CMPL_EVENT_ID_DBG_BUF_PRODUCER handler in
bnxt_async_event_process() uses a firmware-supplied 'type' field
directly as an index into bp->bs_trace[] without bounds validation.

The 'type' field is a 16-bit value extracted from DMA-mapped completion
ring memory that the NIC writes directly to host RAM. A malicious or
compromised NIC can supply any value from 0 to 65535, causing an
out-of-bounds access into kernel heap memory.

The bnxt_bs_trace_check_wrap() call then dereferences bs_trace->magic_byte
and writes to bs_trace->last_offset and bs_trace->wrapped, leading to
kernel memory corruption or a crash.

Fix by adding a bounds check and defining BNXT_TRACE_MAX as
DBG_LOG_BUFFER_FLUSH_REQ_TYPE_ERR_QPC_TRACE + 1 to cover all currently
defined firmware trace types (0x0 through 0xc).

Fixes: 84fcd9449f ("bnxt_en: Manage the FW trace context memory")
Reported-by: Yuhao Jiang <danisjiang@gmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: Junrui Luo <moonafterrain@outlook.com>
Reviewed-by: Michael Chan <michael.chan@broadcom.com>
Link: https://patch.msgid.link/SYBPR01MB7881A253A1C9775D277F30E9AF42A@SYBPR01MB7881.ausprd01.prod.outlook.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-17 15:57:57 -07:00
Sanman Pradhan
32f59301b9 hwmon: (pmbus/ina233) Add error check for pmbus_read_word_data() return value
ina233_read_word_data() uses the return value of pmbus_read_word_data()
directly in a DIV_ROUND_CLOSEST() computation without first checking for
errors. If the underlying I2C transaction fails, a negative error code is
used in the arithmetic, producing a garbage sensor value instead of
propagating the error.

Add the missing error check before using the return value.

Fixes: b64b6cb163 ("hwmon: Add driver for TI INA233 Current and Power Monitor")
Cc: stable@vger.kernel.org
Signed-off-by: Sanman Pradhan <psanman@juniper.net>
Link: https://lore.kernel.org/r/20260317174553.385567-1-sanman.pradhan@hpe.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
2026-03-17 15:21:36 -07:00
Sanman Pradhan
c6f45ed26b hwmon: (pmbus/mp2869) Check pmbus_read_byte_data() before using its return value
In mp2869_read_byte_data() and mp2869_read_word_data(), the return value
of pmbus_read_byte_data() for PMBUS_STATUS_MFR_SPECIFIC is used directly
inside FIELD_GET() macro arguments without error checking. If the I2C
transaction fails, a negative error code is passed to FIELD_GET() and
FIELD_PREP(), silently corrupting the status register bits being
constructed.

Extract the nested pmbus_read_byte_data() calls into a separate variable
and check for errors before use. This also eliminates a redundant duplicate
read of the same register in the PMBUS_STATUS_TEMPERATURE case.

Fixes: a3a2923aaf ("hwmon: add MP2869,MP29608,MP29612 and MP29816 series driver")
Cc: stable@vger.kernel.org
Signed-off-by: Sanman Pradhan <psanman@juniper.net>
Link: https://lore.kernel.org/r/20260317173308.382545-4-sanman.pradhan@hpe.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
2026-03-17 15:20:55 -07:00
Sanman Pradhan
19d4b9c8a1 hwmon: (pmbus/mp2975) Add error check for pmbus_read_word_data() return value
mp2973_read_word_data() XORs the return value of pmbus_read_word_data()
with PB_STATUS_POWER_GOOD_N without first checking for errors. If the I2C
transaction fails, a negative error code is XORed with the constant,
producing a corrupted value that is returned as valid status data instead
of propagating the error.

Add the missing error check before modifying the return value.

Fixes: acda945afb ("hwmon: (pmbus/mp2975) Fix PGOOD in READ_STATUS_WORD")
Cc: stable@vger.kernel.org
Signed-off-by: Sanman Pradhan <psanman@juniper.net>
Link: https://lore.kernel.org/r/20260317173308.382545-3-sanman.pradhan@hpe.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
2026-03-17 15:20:24 -07:00
Linus Torvalds
a989fde763 Merge tag 'libnvdimm-fixes-7.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm fix from Ira Weiny:

 - Fix old potential use after free bug

* tag 'libnvdimm-fixes-7.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm:
  nvdimm/bus: Fix potential use after free in asynchronous initialization
2026-03-17 15:19:58 -07:00
Sanman Pradhan
c925fccc4f hwmon: (pmbus/hac300s) Add error check for pmbus_read_word_data() return value
hac300s_read_word_data() passes the return value of pmbus_read_word_data()
directly to FIELD_GET() without checking for errors. If the I2C transaction
fails, a negative error code is sign-extended and passed to FIELD_GET(),
which silently produces garbage data instead of propagating the error.

Add the missing error check before using the return value in
the FIELD_GET() macro.

Fixes: 669cf162f7 ("hwmon: Add support for HiTRON HAC300S PSU")
Cc: stable@vger.kernel.org
Signed-off-by: Sanman Pradhan <psanman@juniper.net>
Link: https://lore.kernel.org/r/20260317173308.382545-2-sanman.pradhan@hpe.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
2026-03-17 15:19:45 -07:00
Alex Deucher
86650ee224 drm/radeon: apply state adjust rules to some additional HAINAN vairants
They need a similar workaround.

Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/1839
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 87327658c848f56eac166cb382b57b83bf06c5ac)
Cc: stable@vger.kernel.org
2026-03-17 18:04:15 -04:00
Alex Deucher
9787f7da18 drm/amdgpu: apply state adjust rules to some additional HAINAN vairants
They need a similar workaround.

Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/1839
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 0de31d92a173d3d94f28051b0b80a6c98913aed4)
Cc: stable@vger.kernel.org
2026-03-17 18:04:03 -04:00
Alex Deucher
e9f58ff991 drm/amdgpu: rework how we handle TLB fences
Add a new VM flag to indicate whether or not we need
a TLB fence.  Userqs (KFD or KGD) require a TLB fence.
A TLB fence is not strictly required for kernel queues,
but it shouldn't hurt.  That said, enabling this
unconditionally should be fine, but it seems to tickle
some issues in KIQ/MES.  Only enable them for KFD,
or when KGD userq queues are enabled (currently via module
parameter).

Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4798
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4749
Fixes: f3854e04b7 ("drm/amdgpu: attach tlb fence to the PTs update")
Cc: Christian König <christian.koenig@amd.com>
Cc: Prike Liang <Prike.Liang@amd.com>
Reviewed-by: Prike Liang <Prike.Liang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 69c5fbd2b93b5ced77c6e79afe83371bca84c788)
Cc: stable@vger.kernel.org
2026-03-17 18:03:09 -04:00
Linus Torvalds
9e22d8e18f Merge tag 'linux_kselftest-kunit-fixes-7.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest
Pull kunit fix from Shuah Khan:

 - Add documentation for --list_suites feature

* tag 'linux_kselftest-kunit-fixes-7.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest:
  kunit: Add documentation of --list_suites
2026-03-17 15:00:53 -07:00
Michal Swiatkowski
6850deb611 libie: prevent memleak in fwlog code
All cmd_buf buffers are allocated and need to be freed after usage.
Add an error unwinding path that properly frees these buffers.

The memory leak happens whenever fwlog configuration is changed. For
example:

$echo 256K > /sys/kernel/debug/ixgbe/0000\:32\:00.0/fwlog/log_size

Fixes: 96a9a9341c ("ice: configure FW logging")
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Tested-by: Rinitha S <sx.rinitha@intel.com> (A Contingent worker at Intel)
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-03-17 14:12:36 -07:00
Linus Torvalds
f0caa1d49c Merge tag 'hid-for-linus-2026031701' of git://git.kernel.org/pub/scm/linux/kernel/git/hid/hid
Pull HID fixes from Jiri Kosina:

 - various fixes dealing with (intentionally) broken devices in HID
   core, logitech-hidpp and multitouch drivers (Lee Jones)

 - fix for OOB in wacom driver (Benoît Sevens)

 - fix for potentialy HID-bpf-induced buffer overflow in () (Benjamin
   Tissoires)

 - various other small fixes and device ID / quirk additions

* tag 'hid-for-linus-2026031701' of git://git.kernel.org/pub/scm/linux/kernel/git/hid/hid:
  HID: multitouch: Check to ensure report responses match the request
  HID: logitech-hidpp: Prevent use-after-free on force feedback initialisation failure
  HID: bpf: prevent buffer overflow in hid_hw_request
  selftests/hid: fix compilation when bpf_wq and hid_device are not exported
  HID: core: Mitigate potential OOB by removing bogus memset()
  HID: intel-thc-hid: Set HID_PHYS with PCI BDF
  HID: appletb-kbd: add .resume method in PM
  HID: logitech-hidpp: Enable MX Master 4 over bluetooth
  HID: input: Add HID_BATTERY_QUIRK_DYNAMIC for Elan touchscreens
  HID: input: Drop Asus UX550* touchscreen ignore battery quirks
  HID: asus: add xg mobile 2022 external hardware support
  HID: wacom: fix out-of-bounds read in wacom_intuos_bt_irq
2026-03-17 13:55:51 -07:00
Petr Oros
fc9c69be59 iavf: fix VLAN filter lost on add/delete race
When iavf_add_vlan() finds an existing filter in IAVF_VLAN_REMOVE
state, it transitions the filter to IAVF_VLAN_ACTIVE assuming the
pending delete can simply be cancelled. However, there is no guarantee
that iavf_del_vlans() has not already processed the delete AQ request
and removed the filter from the PF. In that case the filter remains in
the driver's list as IAVF_VLAN_ACTIVE but is no longer programmed on
the NIC. Since iavf_add_vlans() only picks up filters in
IAVF_VLAN_ADD state, the filter is never re-added, and spoof checking
drops all traffic for that VLAN.

  CPU0                       CPU1                     Workqueue
  ----                       ----                     ---------
  iavf_del_vlan(vlan 100)
    f->state = REMOVE
    schedule AQ_DEL_VLAN
                             iavf_add_vlan(vlan 100)
                               f->state = ACTIVE
                                                      iavf_del_vlans()
                                                        f is ACTIVE, skip
                                                      iavf_add_vlans()
                                                        f is ACTIVE, skip

  Filter is ACTIVE in driver but absent from NIC.

Transition to IAVF_VLAN_ADD instead and schedule
IAVF_FLAG_AQ_ADD_VLAN_FILTER so iavf_add_vlans() re-programs the
filter.  A duplicate add is idempotent on the PF.

Fixes: 0c0da0e951 ("iavf: refactor VLAN filter states")
Signed-off-by: Petr Oros <poros@redhat.com>
Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-03-17 13:48:02 -07:00
Zdenek Bouska
45b33e805b igc: fix page fault in XDP TX timestamps handling
If an XDP application that requested TX timestamping is shutting down
while the link of the interface in use is still up the following kernel
splat is reported:

[  883.803618] [   T1554] BUG: unable to handle page fault for address: ffffcfb6200fd008
...
[  883.803650] [   T1554] Call Trace:
[  883.803652] [   T1554]  <TASK>
[  883.803654] [   T1554]  igc_ptp_tx_tstamp_event+0xdf/0x160 [igc]
[  883.803660] [   T1554]  igc_tsync_interrupt+0x2d5/0x300 [igc]
...

During shutdown of the TX ring the xsk_meta pointers are left behind, so
that the IRQ handler is trying to touch them.

This issue is now being fixed by cleaning up the stale xsk meta data on
TX shutdown. TX timestamps on other queues remain unaffected.

Fixes: 15fd021bc4 ("igc: Add Tx hardware timestamp request for AF_XDP zero-copy packet")
Signed-off-by: Zdenek Bouska <zdenek.bouska@siemens.com>
Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
Reviewed-by: Florian Bezdeka <florian.bezdeka@siemens.com>
Tested-by: Avigail Dahan <avigailx.dahan@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-03-17 13:28:55 -07:00
Kohei Enju
0ffba24665 igc: fix missing update of skb->tail in igc_xmit_frame()
igc_xmit_frame() misses updating skb->tail when the packet size is
shorter than the minimum one.
Use skb_put_padto() in alignment with other Intel Ethernet drivers.

Fixes: 0507ef8a03 ("igc: Add transmit and receive fastpath and interrupt handlers")
Signed-off-by: Kohei Enju <kohei@enjuk.jp>
Reviewed-by: Simon Horman <horms@kernel.org>
Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
Tested-by: Avigail Dahan <avigailx.dahan@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-03-17 13:28:55 -07:00
Leo Yan
563d39928d perf kvm stat: Fix relative paths for including headers
Add an extra "../" to the relative paths so that the uAPI headers
provided by tools can be found correctly.

Fixes: a724a8fce5 ("perf kvm stat: Fix build error")
Reported-by: Namhyung Kim <namhyung@kernel.org>
Suggested-by: Ian Rogers <irogers@google.com>
Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Leo Yan <leo.yan@arm.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2026-03-17 17:16:45 -03:00
Thomas Richter
72a8b9c060 perf parse-events: Fix big-endian 'overwrite' by writing correct union member
The "Read backward ring buffer" test crashes on big-endian (e.g. s390x)
due to a NULL dereference when the backward mmap path isn't enabled.

Reproducer:
  # ./perf test -F 'Read backward ring buffer'
  Segmentation fault (core dumped)
  # uname -m
  s390x
  #

Root cause:
get_config_terms() stores into evsel_config_term::val.val (u64) while later
code reads boolean fields such as evsel_config_term::val.overwrite.
On big-endian the 1-byte boolean is left-aligned, so writing
evsel_config_term::val.val = 1 is read back as
evsel_config_term::val.overwrite = 0,
leaving backward mmap disabled and a NULL map being used.

Store values in the union member that matches the term type, e.g.:
  /* for OVERWRITE */
  new_term->val.overwrite = 1;  /* not new_term->val.val = 1 */
to fix this. Improve add_config_term() and add two more parameters for
string and value. Function add_config_term() now creates a complete node
element of type evsel_config_term and handles all evsel_config_term::val
union members.

Impact:
Enables backward mmap on big-endian and prevents the crash.
No change on little-endian.

Output after:
 # ./perf test -Fv 44
 --- start ---
 Using CPUID IBM,9175,705,ME1,3.8,002f
 mmap size 1052672B
 mmap size 8192B
 ---- end ----
 44: Read backward ring buffer                         : Ok
 #

Fixes: 159ca97cd9 ("perf parse-events: Refactor get_config_terms() to remove macros")
Reviewed-by: James Clark <james.clark@linaro.org>
Reviewed-by: Jan Polensky <japo@linux.ibm.com>
Signed-off-by: Thomas Richter <tmricht@linux.ibm.com>
Acked-by: Ian Rogers <irogers@google.com>
Cc: James Clark <james.clark@linaro.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2026-03-17 17:16:09 -03:00
Ian Rogers
8dd1d9a335 perf metricgroup: Fix metricgroup__has_metric_or_groups()
Use metricgroup__for_each_metric() rather than
pmu_metrics_table__for_each_metric() that combines the
default metric table with, a potentially empty, CPUID table.

Fixes: cee275edcd ("perf metricgroup: Don't early exit if no CPUID table exists")
Reviewed-by: Leo Yan <leo.yan@arm.com>
Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: Leo Yan <leo.yan@arm.com>
Cc: Ian Rogers <irogers@google.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2026-03-17 17:16:05 -03:00
Jens Axboe
a68ed2df72 io_uring/poll: fix multishot recv missing EOF on wakeup race
When a socket send and shutdown() happen back-to-back, both fire
wake-ups before the receiver's task_work has a chance to run. The first
wake gets poll ownership (poll_refs=1), and the second bumps it to 2.
When io_poll_check_events() runs, it calls io_poll_issue() which does a
recv that reads the data and returns IOU_RETRY. The loop then drains all
accumulated refs (atomic_sub_return(2) -> 0) and exits, even though only
the first event was consumed. Since the shutdown is a persistent state
change, no further wakeups will happen, and the multishot recv can hang
forever.

Check specifically for HUP in the poll loop, and ensure that another
loop is done to check for status if more than a single poll activation
is pending. This ensures we don't lose the shutdown event.

Cc: stable@vger.kernel.org
Fixes: dbc2564cfe ("io_uring: let fast poll support multishot")
Reported-by: Francis Brosseau <francis@malagauche.com>
Link: https://github.com/axboe/liburing/issues/1549
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2026-03-17 13:54:08 -06:00
Saket Dumbre
ab93d7eee9 ACPICA: Update the format of Arg3 of _DSM
To get rid of type incompatibility warnings in Linux.

Fixes: 81f92cff6d ("ACPICA: ACPI_TYPE_ANY does not include the package type")
Link: https://github.com/acpica/acpica/commit/4fb74872dcec
Signed-off-by: Saket Dumbre <saket.dumbre@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Link: https://patch.msgid.link/12856643.O9o76ZdvQC@rafael.j.wysocki
2026-03-17 20:51:13 +01:00
Danilo Krummrich
2b38efc05b driver core: platform: use generic driver_override infrastructure
When a driver is probed through __driver_attach(), the bus' match()
callback is called without the device lock held, thus accessing the
driver_override field without a lock, which can cause a UAF.

Fix this by using the driver-core driver_override infrastructure taking
care of proper locking internally.

Note that calling match() from __driver_attach() without the device lock
held is intentional. [1]

Link: https://lore.kernel.org/driver-core/DGRGTIRHA62X.3RY09D9SOK77P@kernel.org/ [1]
Reported-by: Gui-Dong Han <hanguidong02@gmail.com>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220789
Fixes: 3d713e0e38 ("driver core: platform: add device binding path 'driver_override'")
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://patch.msgid.link/20260303115720.48783-5-dakr@kernel.org
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
2026-03-17 20:30:57 +01:00
Danilo Krummrich
c5f60e3f07 sh: platform_early: remove pdev->driver_override check
In commit 507fd01d53 ("drivers: move the early platform device support to
arch/sh") platform_match() was copied over to the sh platform_early
code, accidentally including the driver_override check.

This check does not make sense for platform_early, as sysfs is not even
available in first place at this point in the boot process, hence remove
the check.

Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Fixes: 507fd01d53 ("drivers: move the early platform device support to arch/sh")
Link: https://lore.kernel.org/all/DH4M3DJ4P58T.1BGVAVXN71Z09@kernel.org/
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
2026-03-17 20:30:57 +01:00
Danilo Krummrich
813bbc4d33 hwmon: axi-fan: don't use driver_override as IRQ name
Do not use driver_override as IRQ name, as it is not guaranteed to point
to a valid string; use NULL instead (which makes the devm IRQ helpers
use dev_name()).

Fixes: 8412b410fa ("hwmon: Support ADI Fan Control IP")
Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Acked-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://patch.msgid.link/20260303115720.48783-4-dakr@kernel.org
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
2026-03-17 20:30:57 +01:00
Danilo Krummrich
bcd085d5c7 docs: driver-model: document driver_override
Now that we support driver_override as a driver-core feature through
struct device and struct bus_type, add some documentation in the context
of how a device / driver binding is established.

Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://patch.msgid.link/20260303115720.48783-3-dakr@kernel.org
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
2026-03-17 20:30:57 +01:00
Danilo Krummrich
cb3d1049f4 driver core: generalize driver_override in struct device
Currently, there are 12 busses (including platform and PCI) that
duplicate the driver_override logic for their individual devices.

All of them seem to be prone to the bug described in [1].

While this could be solved for every bus individually using a separate
lock, solving this in the driver-core generically results in less (and
cleaner) changes overall.

Thus, move driver_override to struct device, provide corresponding
accessors for busses and handle locking with a separate lock internally.

In particular, add device_set_driver_override(),
device_has_driver_override(), device_match_driver_override() and
generalize the sysfs store() and show() callbacks via a driver_override
feature flag in struct bus_type.

Until all busses have migrated, keep driver_set_override() in place.

Note that we can't use the device lock for the reasons described in [2].

Link: https://bugzilla.kernel.org/show_bug.cgi?id=220789 [1]
Link: https://lore.kernel.org/driver-core/DGRGTIRHA62X.3RY09D9SOK77P@kernel.org/ [2]
Tested-by: Gui-Dong Han <hanguidong02@gmail.com>
Co-developed-by: Gui-Dong Han <hanguidong02@gmail.com>
Signed-off-by: Gui-Dong Han <hanguidong02@gmail.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://patch.msgid.link/20260303115720.48783-2-dakr@kernel.org
[ Use dev->bus instead of sp->bus for consistency; fix commit message to
  refer to the struct bus_type's driver_override feature flag. - Danilo ]
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
2026-03-17 20:30:23 +01:00
Ethan Tidmore
0f2055db7b RDMA/efa: Fix possible deadlock
In the error path for efa_com_alloc_comp_ctx() the semaphore assigned to
&aq->avail_cmds is not released.

Detected by Smatch:
drivers/infiniband/hw/efa/efa_com.c:662 efa_com_cmd_exec() warn:
inconsistent returns '&aq->avail_cmds'

Add release for &aq->avail_cmds in efa_com_alloc_comp_ctx() error path.

Fixes: ef3b06742c ("RDMA/efa: Fix use of completion ctx after free")
Signed-off-by: Ethan Tidmore <ethantidmore06@gmail.com>
Link: https://patch.msgid.link/20260314045730.1143862-1-ethantidmore06@gmail.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
2026-03-17 15:01:05 -04:00
Chuck Lever
f28599f396 RDMA/rw: Fix MR pool exhaustion in bvec RDMA READ path
When IOVA-based DMA mapping is unavailable (e.g., IOMMU
passthrough mode), rdma_rw_ctx_init_bvec() falls back to
checking rdma_rw_io_needs_mr() with the raw bvec count.
Unlike the scatterlist path in rdma_rw_ctx_init(), which
passes a post-DMA-mapping entry count that reflects
coalescing of physically contiguous pages, the bvec path
passes the pre-mapping page count. This overstates the
number of DMA entries, causing every multi-bvec RDMA READ
to consume an MR from the QP's pool.

Under NFS WRITE workloads the server performs RDMA READs
to pull data from the client. With the inflated MR demand,
the pool is rapidly exhausted, ib_mr_pool_get() returns
NULL, and rdma_rw_init_one_mr() returns -EAGAIN. svcrdma
treats this as a DMA mapping failure, closes the connection,
and the client reconnects -- producing a cycle of 71% RPC
retransmissions and ~100 reconnections per test run. RDMA
WRITEs (NFS READ direction) are unaffected because
DMA_TO_DEVICE never triggers the max_sgl_rd check.

Remove the rdma_rw_io_needs_mr() gate from the bvec path
entirely, so that bvec RDMA operations always use the
map_wrs path (direct WR posting without MR allocation).
The bvec caller has no post-DMA-coalescing segment count
available -- xdr_buf and svc_rqst hold pages as individual
pointers, and physical contiguity is discovered only during
DMA mapping -- so the raw page count cannot serve as a
reliable input to rdma_rw_io_needs_mr(). iWARP devices,
which require MRs unconditionally, are handled by an
earlier check in rdma_rw_ctx_init_bvec() and are unaffected.

Fixes: bea28ac14c ("RDMA/core: add MR support for bvec-based RDMA operations")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://patch.msgid.link/20260313194201.5818-3-cel@kernel.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
2026-03-17 15:00:56 -04:00
Chuck Lever
00da250c21 RDMA/rw: Fall back to direct SGE on MR pool exhaustion
When IOMMU passthrough mode is active, ib_dma_map_sgtable_attrs()
produces no coalescing: each scatterlist page maps 1:1 to a DMA
entry, so sgt.nents equals the raw page count. A 1 MB transfer
yields 256 DMA entries. If that count exceeds the device's
max_sgl_rd threshold (an optimization hint from mlx5 firmware),
rdma_rw_io_needs_mr() steers the operation into the MR
registration path. Each such operation consumes one or more MRs
from a pool sized at max_rdma_ctxs -- roughly one MR per
concurrent context. Under write-intensive workloads that issue
many concurrent RDMA READs, the pool is rapidly exhausted,
ib_mr_pool_get() returns NULL, and rdma_rw_init_one_mr() returns
-EAGAIN. Upper layer protocols treat this as a fatal DMA mapping
failure and tear down the connection.

The max_sgl_rd check is a performance optimization, not a
correctness requirement: the device can handle large SGE counts
via direct posting, just less efficiently than with MR
registration. When the MR pool cannot satisfy a request, falling
back to the direct SGE (map_wrs) path avoids the connection
reset while preserving the MR optimization for the common case
where pool resources are available.

Add a fallback in rdma_rw_ctx_init() so that -EAGAIN from
rdma_rw_init_mr_wrs() triggers direct SGE posting instead of
propagating the error. iWARP devices, which mandate MR
registration for RDMA READs, and force_mr debug mode continue
to treat -EAGAIN as terminal.

Fixes: 00bd1439f4 ("RDMA/rw: Support threshold for registration vs scattering to local pages")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://patch.msgid.link/20260313194201.5818-2-cel@kernel.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
2026-03-17 15:00:38 -04:00
Zhang Heng
1f182ec9d7 ASoC: amd: yc: Add DMI quirk for Thin A15 B7VF
Add a DMI quirk for the Thin A15 B7VF fixing the issue where
the internal microphone was not detected.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=220833
Signed-off-by: Zhang Heng <zhangheng@kylinos.cn>
Link: https://patch.msgid.link/20260316080218.2931304-1-zhangheng@kylinos.cn
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-17 17:38:35 +00:00
Jonas Karlman
cffcb42c57 drm/bridge: dw-hdmi-qp: fix multi-channel audio output
Channel Allocation (PB4) and Level Shift Information (PB5) are
configured with values from PB1 and PB2 due to the wrong offset
being used. This results in missing audio channels or incorrect
speaker placement when playing multi-channel audio.

Use the correct offset to fix multi-channel audio output.

Fixes: fd0141d1a8 ("drm/bridge: synopsys: Add audio support for dw-hdmi-qp")
Reported-by: Christian Hewitt <christianshewitt@gmail.com>
Signed-off-by: Jonas Karlman <jonas@kwiboo.se>
Signed-off-by: Christian Hewitt <christianshewitt@gmail.com>
Reviewed-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
Link: https://patch.msgid.link/20260228112822.4056354-1-christianshewitt@gmail.com
Signed-off-by: Luca Ceresoli <luca.ceresoli@bootlin.com>
2026-03-17 18:15:16 +01:00
Praveen Talari
8c89a077ca spi: geni-qcom: Check DMA interrupts early in ISR
The current interrupt handler only checks the GENI main IRQ status
(m_irq) before deciding to return IRQ_NONE. This can lead to spurious
IRQ_NONE returns when DMA interrupts are pending but m_irq is zero.

Move the DMA TX/RX status register reads to the beginning of the ISR,
right after reading m_irq. Update the early return condition to check
all three status registers (m_irq, dma_tx_status, dma_rx_status) before
returning IRQ_NONE.

Signed-off-by: Praveen Talari <praveen.talari@oss.qualcomm.com>
Reviewed-by: Konrad Dybcio <konrad.dybcio@oss.qualcomm.com>
Link: https://patch.msgid.link/20260313-spi-geni-qcom-fix-dma-irq-handling-v1-1-0bd122589e02@oss.qualcomm.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-17 17:07:47 +00:00
Maarten Lankhorst
6bee098b91 drm: Fix use-after-free on framebuffers and property blobs when calling drm_dev_unplug
When trying to do a rather aggressive test of igt's "xe_module_load
--r reload" with a full desktop environment and game running I noticed
a few OOPSes when dereferencing freed pointers, related to
framebuffers and property blobs after the compositor exits.

Solve this by guarding the freeing in drm_file with drm_dev_enter/exit,
and immediately put the references from struct drm_file objects during
drm_dev_unplug().

Related warnings for framebuffers on the subtest:
[  739.713076] ------------[ cut here ]------------
               WARN_ON(!list_empty(&dev->mode_config.fb_list))
[  739.713079] WARNING: drivers/gpu/drm/drm_mode_config.c:584 at drm_mode_config_cleanup+0x30b/0x320 [drm], CPU#12: xe_module_load/13145
....
[  739.713328] Call Trace:
[  739.713330]  <TASK>
[  739.713335]  ? intel_pmdemand_destroy_state+0x11/0x20 [xe]
[  739.713574]  ? intel_atomic_global_obj_cleanup+0xe4/0x1a0 [xe]
[  739.713794]  intel_display_driver_remove_noirq+0x51/0xb0 [xe]
[  739.714041]  xe_display_fini_early+0x33/0x50 [xe]
[  739.714284]  devm_action_release+0xf/0x20
[  739.714294]  devres_release_all+0xad/0xf0
[  739.714301]  device_unbind_cleanup+0x12/0xa0
[  739.714305]  device_release_driver_internal+0x1b7/0x210
[  739.714311]  device_driver_detach+0x14/0x20
[  739.714315]  unbind_store+0xa6/0xb0
[  739.714319]  drv_attr_store+0x21/0x30
[  739.714322]  sysfs_kf_write+0x48/0x60
[  739.714328]  kernfs_fop_write_iter+0x16b/0x240
[  739.714333]  vfs_write+0x266/0x520
[  739.714341]  ksys_write+0x72/0xe0
[  739.714345]  __x64_sys_write+0x19/0x20
[  739.714347]  x64_sys_call+0xa15/0xa30
[  739.714355]  do_syscall_64+0xd8/0xab0
[  739.714361]  entry_SYSCALL_64_after_hwframe+0x4b/0x53

and

[  739.714459] ------------[ cut here ]------------
[  739.714461] xe 0000:67:00.0: [drm] drm_WARN_ON(!list_empty(&fb->filp_head))
[  739.714464] WARNING: drivers/gpu/drm/drm_framebuffer.c:833 at drm_framebuffer_free+0x6c/0x90 [drm], CPU#12: xe_module_load/13145
[  739.714715] RIP: 0010:drm_framebuffer_free+0x7a/0x90 [drm]
...
[  739.714869] Call Trace:
[  739.714871]  <TASK>
[  739.714876]  drm_mode_config_cleanup+0x26a/0x320 [drm]
[  739.714998]  ? __drm_printfn_seq_file+0x20/0x20 [drm]
[  739.715115]  ? drm_mode_config_cleanup+0x207/0x320 [drm]
[  739.715235]  intel_display_driver_remove_noirq+0x51/0xb0 [xe]
[  739.715576]  xe_display_fini_early+0x33/0x50 [xe]
[  739.715821]  devm_action_release+0xf/0x20
[  739.715828]  devres_release_all+0xad/0xf0
[  739.715843]  device_unbind_cleanup+0x12/0xa0
[  739.715850]  device_release_driver_internal+0x1b7/0x210
[  739.715856]  device_driver_detach+0x14/0x20
[  739.715860]  unbind_store+0xa6/0xb0
[  739.715865]  drv_attr_store+0x21/0x30
[  739.715868]  sysfs_kf_write+0x48/0x60
[  739.715873]  kernfs_fop_write_iter+0x16b/0x240
[  739.715878]  vfs_write+0x266/0x520
[  739.715886]  ksys_write+0x72/0xe0
[  739.715890]  __x64_sys_write+0x19/0x20
[  739.715893]  x64_sys_call+0xa15/0xa30
[  739.715900]  do_syscall_64+0xd8/0xab0
[  739.715905]  entry_SYSCALL_64_after_hwframe+0x4b/0x53

and then finally file close blows up:

[  743.186530] Oops: general protection fault, probably for non-canonical address 0xdead000000000122: 0000 [#1] SMP
[  743.186535] CPU: 3 UID: 1000 PID: 3453 Comm: kwin_wayland Tainted: G        W           7.0.0-rc1-valkyria+ #110 PREEMPT_{RT,(lazy)}
[  743.186537] Tainted: [W]=WARN
[  743.186538] Hardware name: Gigabyte Technology Co., Ltd. X299 AORUS Gaming 3/X299 AORUS Gaming 3-CF, BIOS F8n 12/06/2021
[  743.186539] RIP: 0010:drm_framebuffer_cleanup+0x55/0xc0 [drm]
[  743.186588] Code: d8 72 73 0f b6 42 05 ff c3 39 c3 72 e8 49 8d bd 50 07 00 00 31 f6 e8 3a 80 d3 e1 49 8b 44 24 10 49 8d 7c 24 08 49 8b 54 24 08 <48> 3b 38 0f 85 95 7f 02 00 48 3b 7a 08 0f 85 8b 7f 02 00 48 89 42
[  743.186589] RSP: 0018:ffffc900085e3cf8 EFLAGS: 00010202
[  743.186591] RAX: dead000000000122 RBX: 0000000000000001 RCX: ffffffff8217ed03
[  743.186592] RDX: dead000000000100 RSI: 0000000000000000 RDI: ffff88814675ba08
[  743.186593] RBP: ffffc900085e3d10 R08: 0000000000000000 R09: 0000000000000000
[  743.186593] R10: 0000000000000000 R11: 0000000000000000 R12: ffff88814675ba00
[  743.186594] R13: ffff88810d778000 R14: ffff888119f6dca0 R15: ffff88810c660bb0
[  743.186595] FS:  00007ff377d21280(0000) GS:ffff888cec3f8000(0000) knlGS:0000000000000000
[  743.186596] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  743.186596] CR2: 000055690b55e000 CR3: 0000000113586003 CR4: 00000000003706f0
[  743.186597] Call Trace:
[  743.186598]  <TASK>
[  743.186603]  intel_user_framebuffer_destroy+0x12/0x90 [xe]
[  743.186722]  drm_framebuffer_free+0x3a/0x90 [drm]
[  743.186750]  ? trace_hardirqs_on+0x5f/0x120
[  743.186754]  drm_mode_object_put+0x51/0x70 [drm]
[  743.186786]  drm_fb_release+0x105/0x190 [drm]
[  743.186812]  ? rt_mutex_slowunlock+0x3aa/0x410
[  743.186817]  ? rt_spin_lock+0xea/0x1b0
[  743.186819]  drm_file_free+0x1e0/0x2c0 [drm]
[  743.186843]  drm_release_noglobal+0x91/0xf0 [drm]
[  743.186865]  __fput+0x100/0x2e0
[  743.186869]  fput_close_sync+0x40/0xa0
[  743.186870]  __x64_sys_close+0x3e/0x80
[  743.186873]  x64_sys_call+0xa07/0xa30
[  743.186879]  do_syscall_64+0xd8/0xab0
[  743.186881]  entry_SYSCALL_64_after_hwframe+0x4b/0x53
[  743.186882] RIP: 0033:0x7ff37e567732
[  743.186884] Code: 08 0f 85 a1 38 ff ff 49 89 fb 48 89 f0 48 89 d7 48 89 ce 4c 89 c2 4d 89 ca 4c 8b 44 24 08 4c 8b 4c 24 10 4c 89 5c 24 08 0f 05 <c3> 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 f3 0f 1e fa 55 bf 01 00
[  743.186885] RSP: 002b:00007ffc818169a8 EFLAGS: 00000246 ORIG_RAX: 0000000000000003
[  743.186886] RAX: ffffffffffffffda RBX: 00007ffc81816a30 RCX: 00007ff37e567732
[  743.186887] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000012
[  743.186888] RBP: 00007ffc818169d0 R08: 0000000000000000 R09: 0000000000000000
[  743.186889] R10: 0000000000000000 R11: 0000000000000246 R12: 000055d60a7996e0
[  743.186889] R13: 00007ffc81816a90 R14: 00007ffc81816a90 R15: 000055d60a782a30
[  743.186892]  </TASK>
[  743.186893] Modules linked in: rfcomm snd_hrtimer xt_CHECKSUM xt_MASQUERADE xt_conntrack ipt_REJECT nf_reject_ipv4 xt_tcpudp xt_addrtype nft_compat x_tables nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nf_tables overlay cfg80211 bnep mtd_intel_dg snd_hda_codec_intelhdmi mtd snd_hda_codec_hdmi nls_utf8 mxm_wmi intel_wmi_thunderbolt gigabyte_wmi wmi_bmof xe drm_gpuvm drm_gpusvm_helper i2c_algo_bit drm_buddy drm_ttm_helper ttm video drm_suballoc_helper gpu_sched drm_client_lib drm_exec drm_display_helper cec drm_kunit_helpers drm_kms_helper kunit x86_pkg_temp_thermal intel_powerclamp coretemp snd_hda_codec_alc882 snd_hda_codec_realtek_lib snd_hda_codec_generic snd_hda_intel snd_soc_avs snd_soc_hda_codec snd_hda_ext_core snd_hda_codec snd_hwdep snd_hda_core snd_intel_dspcfg snd_soc_core snd_compress ac97_bus snd_pcm snd_seq snd_seq_device snd_timer i2c_i801 i2c_mux snd i2c_smbus btusb btrtl btbcm btmtk btintel bluetooth ecdh_generic rfkill ecc mei_me mei ioatdma dca wmi nfsd drm i2c_dev fuse nfnetlink
[  743.186938] ---[ end trace 0000000000000000 ]---

And for property blobs:

void drm_mode_config_cleanup(struct drm_device *dev)
{
...
	list_for_each_entry_safe(blob, bt, &dev->mode_config.property_blob_list,
				 head_global) {
		drm_property_blob_put(blob);
	}

Resulting in:

[  371.072940] BUG: unable to handle page fault for address: 000001ffffffffff
[  371.072944] #PF: supervisor read access in kernel mode
[  371.072945] #PF: error_code(0x0000) - not-present page
[  371.072947] PGD 0 P4D 0
[  371.072950] Oops: Oops: 0000 [#1] SMP
[  371.072953] CPU: 0 UID: 1000 PID: 3693 Comm: kwin_wayland Not tainted 7.0.0-rc1-valkyria+ #111 PREEMPT_{RT,(lazy)}
[  371.072956] Hardware name: Gigabyte Technology Co., Ltd. X299 AORUS Gaming 3/X299 AORUS Gaming 3-CF, BIOS F8n 12/06/2021
[  371.072957] RIP: 0010:drm_property_destroy_user_blobs+0x3b/0x90 [drm]
[  371.073019] Code: 00 00 48 83 ec 10 48 8b 86 30 01 00 00 48 39 c3 74 59 48 89 c2 48 8d 48 c8 48 8b 00 4c 8d 60 c8 eb 04 4c 8d 60 c8 48 8b 71 40 <48> 39 16 0f 85 39 32 01 00 48 3b 50 08 0f 85 2f 32 01 00 48 89 70
[  371.073021] RSP: 0018:ffffc90006a73de8 EFLAGS: 00010293
[  371.073022] RAX: 000001ffffffffff RBX: ffff888118a1a930 RCX: ffff8881b92355c0
[  371.073024] RDX: ffff8881b92355f8 RSI: 000001ffffffffff RDI: ffff888118be4000
[  371.073025] RBP: ffffc90006a73e08 R08: ffff8881009b7300 R09: ffff888cecc5b000
[  371.073026] R10: ffffc90006a73e90 R11: 0000000000000002 R12: 000001ffffffffc7
[  371.073027] R13: ffff888118a1a980 R14: ffff88810b366d20 R15: ffff888118a1a970
[  371.073028] FS:  00007f1faccbb280(0000) GS:ffff888cec2db000(0000) knlGS:0000000000000000
[  371.073029] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  371.073030] CR2: 000001ffffffffff CR3: 000000010655c001 CR4: 00000000003706f0
[  371.073031] Call Trace:
[  371.073033]  <TASK>
[  371.073036]  drm_file_free+0x1df/0x2a0 [drm]
[  371.073077]  drm_release_noglobal+0x7a/0xe0 [drm]
[  371.073113]  __fput+0xe2/0x2b0
[  371.073118]  fput_close_sync+0x40/0xa0
[  371.073119]  __x64_sys_close+0x3e/0x80
[  371.073122]  x64_sys_call+0xa07/0xa30
[  371.073126]  do_syscall_64+0xc0/0x840
[  371.073130]  entry_SYSCALL_64_after_hwframe+0x4b/0x53
[  371.073132] RIP: 0033:0x7f1fb3501732
[  371.073133] Code: 08 0f 85 a1 38 ff ff 49 89 fb 48 89 f0 48 89 d7 48 89 ce 4c 89 c2 4d 89 ca 4c 8b 44 24 08 4c 8b 4c 24 10 4c 89 5c 24 08 0f 05 <c3> 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 f3 0f 1e fa 55 bf 01 00
[  371.073135] RSP: 002b:00007ffe8e6f0278 EFLAGS: 00000246 ORIG_RAX: 0000000000000003
[  371.073136] RAX: ffffffffffffffda RBX: 00007ffe8e6f0300 RCX: 00007f1fb3501732
[  371.073137] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000012
[  371.073138] RBP: 00007ffe8e6f02a0 R08: 0000000000000000 R09: 0000000000000000
[  371.073139] R10: 0000000000000000 R11: 0000000000000246 R12: 00005585ba46eea0
[  371.073140] R13: 00007ffe8e6f0360 R14: 00007ffe8e6f0360 R15: 00005585ba458a30
[  371.073143]  </TASK>
[  371.073144] Modules linked in: rfcomm snd_hrtimer xt_addrtype xt_CHECKSUM xt_MASQUERADE xt_conntrack ipt_REJECT nf_reject_ipv4 xt_tcpudp nft_compat x_tables nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nf_tables overlay cfg80211 bnep snd_hda_codec_intelhdmi snd_hda_codec_hdmi mtd_intel_dg mtd nls_utf8 wmi_bmof mxm_wmi gigabyte_wmi intel_wmi_thunderbolt xe drm_gpuvm drm_gpusvm_helper i2c_algo_bit drm_buddy drm_ttm_helper ttm video drm_suballoc_helper gpu_sched drm_client_lib drm_exec drm_display_helper cec drm_kunit_helpers drm_kms_helper kunit x86_pkg_temp_thermal intel_powerclamp coretemp snd_hda_codec_alc882 snd_hda_codec_realtek_lib snd_hda_codec_generic snd_hda_intel snd_soc_avs snd_soc_hda_codec snd_hda_ext_core snd_hda_codec snd_hwdep snd_hda_core snd_intel_dspcfg snd_soc_core snd_compress ac97_bus snd_pcm snd_seq snd_seq_device snd_timer i2c_i801 btusb i2c_mux i2c_smbus btrtl snd btbcm btmtk btintel bluetooth ecdh_generic rfkill ecc mei_me mei ioatdma dca wmi nfsd drm i2c_dev fuse nfnetlink
[  371.073198] CR2: 000001ffffffffff
[  371.073199] ---[ end trace 0000000000000000 ]---

Add a guard around file close, and ensure the warnings from drm_mode_config
do not trigger. Fix those by allowing an open reference to the file descriptor
and cleaning up the file linked list entry in drm_mode_config_cleanup().

Cc: <stable@vger.kernel.org> # v4.18+
Fixes: bee330f3d6 ("drm: Use srcu to protect drm_device.unplugged")
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Link: https://patch.msgid.link/20260313151728.14990-4-dev@lankhorst.se
Signed-off-by: Maarten Lankhorst <dev@lankhorst.se>
2026-03-17 17:49:12 +01:00
Eric Biggers
d5b66179b0 lib/crypto: powerpc: Add powerpc/aesp8-ppc.S to clean-files
Make the generated file powerpc/aesp8-ppc.S be removed by 'make clean'.

Fixes: 7cf2082e74 ("lib/crypto: powerpc/aes: Migrate POWER8 optimized code into library")
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20260317044925.104184-1-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
2026-03-17 09:27:11 -07:00
Pratap Nirujogi
3fc4648b53 drm/amdgpu: Fix ISP segfault issue in kernel v7.0
Add NULL pointer checks for dev->type before accessing
dev->type->name in ISP genpd add/remove functions to
prevent kernel crashes.

This regression was introduced in v7.0 as the wakeup sources
are registered using physical device instead of ACPI device.
This led to adding wakeup source device as the first child of
AMDGPU device without initializing dev-type variable, and
resulted in segfault when accessed it in the amdgpu isp driver.

Fixes: 057edc58aa ("ACPI: PM: Register wakeup sources under physical devices")
Suggested-by: Bin Du <Bin.Du@amd.com>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Pratap Nirujogi <pratap.nirujogi@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit c51632d1ed7ac5aed2d40dbc0718d75342c12c6a)
2026-03-17 12:19:29 -04:00
Alex Deucher
f39e127027 drm/amdgpu/gmc9.0: add bounds checking for cid
The value should never exceed the array size as those
are the only values the hardware is expected to return,
but add checks anyway.

Cc: Benjamin Cheng <benjamin.cheng@amd.com>
Reviewed-by: Benjamin Cheng <benjamin.cheng@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit e14d468304832bcc4a082d95849bc0a41b18ddea)
Cc: stable@vger.kernel.org
2026-03-17 12:19:23 -04:00
Alex Deucher
9c52f49545 drm/amdgpu/mmhub4.2.0: add bounds checking for cid
The value should never exceed the array size as those
are the only values the hardware is expected to return,
but add checks anyway.

Reviewed-by: Benjamin Cheng <benjamin.cheng@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit dea5f235baf3786bfd4fd920b03c19285fdc3d9f)
Cc: stable@vger.kernel.org
2026-03-17 12:19:17 -04:00
Alex Deucher
3cdd405831 drm/amdgpu/mmhub4.1.0: add bounds checking for cid
The value should never exceed the array size as those
are the only values the hardware is expected to return,
but add checks anyway.

Reviewed-by: Benjamin Cheng <benjamin.cheng@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 04f063d85090f5dd0c671010ce88ee49d9dcc8ed)
Cc: stable@vger.kernel.org
2026-03-17 12:19:11 -04:00
Alex Deucher
cdb82ecbec drm/amdgpu/mmhub3.0: add bounds checking for cid
The value should never exceed the array size as those
are the only values the hardware is expected to return,
but add checks anyway.

Reviewed-by: Benjamin Cheng <benjamin.cheng@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit f14f27bbe2a3ed7af32d5f6eaf3f417139f45253)
Cc: stable@vger.kernel.org
2026-03-17 12:19:04 -04:00
Alex Deucher
e5e6d67b1c drm/amdgpu/mmhub3.0.2: add bounds checking for cid
The value should never exceed the array size as those
are the only values the hardware is expected to return,
but add checks anyway.

Reviewed-by: Benjamin Cheng <benjamin.cheng@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 1441f52c7f6ae6553664aa9e3e4562f6fc2fe8ea)
Cc: stable@vger.kernel.org
2026-03-17 12:18:58 -04:00
Alex Deucher
5d4e88bcfe drm/amdgpu/mmhub3.0.1: add bounds checking for cid
The value should never exceed the array size as those
are the only values the hardware is expected to return,
but add checks anyway.

Reviewed-by: Benjamin Cheng <benjamin.cheng@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 5f76083183363c4528a4aaa593f5d38c28fe7d7b)
Cc: stable@vger.kernel.org
2026-03-17 12:18:52 -04:00
Alex Deucher
a54403a534 drm/amdgpu/mmhub2.3: add bounds checking for cid
The value should never exceed the array size as those
are the only values the hardware is expected to return,
but add checks anyway.

Reviewed-by: Benjamin Cheng <benjamin.cheng@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 89cd90375c19fb45138990b70e9f4ba4806f05c4)
Cc: stable@vger.kernel.org
2026-03-17 12:18:46 -04:00
Alex Deucher
0b26edac4a drm/amdgpu/mmhub2.0: add bounds checking for cid
The value should never exceed the array size as those
are the only values the hardware is expected to return,
but add checks anyway.

Reviewed-by: Benjamin Cheng <benjamin.cheng@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit e064cef4b53552602bb6ac90399c18f662f3cacd)
Cc: stable@vger.kernel.org
2026-03-17 12:18:34 -04:00
Andy Nguyen
39f44f54af drm/amd: fix dcn 2.01 check
The ASICREV_IS_BEIGE_GOBY_P check always took precedence, because it includes all chip revisions upto NV_UNKNOWN.

Fixes: 54b822b3ea ("drm/amd/display: Use dce_version instead of chip_id")
Signed-off-by: Andy Nguyen <theofficialflow1996@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 9c7be0efa6f0daa949a5f3e3fdf9ea090b0713cb)
2026-03-17 12:15:57 -04:00
Srinivasan Shanmugam
2323b01965 drm/amd/display: Fix DisplayID not-found handling in parse_edid_displayid_vrr()
parse_edid_displayid_vrr() searches the EDID extension blocks for a
DisplayID extension before parsing the dynamic video timing range.

The code previously checked whether edid_ext was NULL after the search
loop. However, edid_ext is assigned during each iteration of the loop,
so it will never be NULL once the loop has executed. If no DisplayID
extension is found, edid_ext ends up pointing to the last extension
block, and the NULL check does not correctly detect the failure case.

Instead, check whether the loop completed without finding a matching
DisplayID block by testing "i == edid->extensions". This ensures the
function exits early when no DisplayID extension is present and avoids
parsing an unrelated EDID extension block.

Also simplify the EDID validation check using "!edid ||
!edid->extensions".

Fixes the below:
drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:13079 parse_edid_displayid_vrr() warn: variable dereferenced before check 'edid_ext' (see line 13075)

Fixes: a638b837d0 ("drm/amd/display: Fix refresh rate range for some panel")
Cc: Roman Li <roman.li@amd.com>
Cc: Alex Hung <alex.hung@amd.com>
Cc: Jerry Zuo <jerry.zuo@amd.com>
Cc: Sun peng Li <sunpeng.li@amd.com>
Cc: Tom Chung <chiahsuan.chung@amd.com>
Cc: Dan Carpenter <dan.carpenter@linaro.org>
Cc: Aurabindo Pillai <aurabindo.pillai@amd.com>
Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Reviewed-by: Tom Chung <chiahsuan.chung@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 91c7e6342e98c846b259c57273436fdea4c043f2)
2026-03-17 12:15:49 -04:00
Xi Ruoyao
ebe82c6e75 drm/amd/display: Wrap dcn32_override_min_req_memclk() in DC_FP_{START, END}
[Why]
The dcn32_override_min_req_memclk function is in dcn32_fpu.c, which is
compiled with CC_FLAGS_FPU into FP instructions.  So when we call it we
must use DC_FP_{START,END} to save and restore the FP context, and
prepare the FP unit on architectures like LoongArch where the FP unit
isn't always on.

Reported-by: LiarOnce <liaronce@hotmail.com>
Fixes: ee7be8f3de ("drm/amd/display: Limit DCN32 8 channel or less parts to DPM1 for FPO")
Signed-off-by: Xi Ruoyao <xry111@xry111.site>
Reviewed-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 25bb1d54ba3983c064361033a8ec15474fece37e)
Cc: stable@vger.kernel.org
2026-03-17 12:12:11 -04:00
Calvin Owens
1071815989 drm/amd/display: Fix uninitialized variable use which breaks full LTO
Commit e1b385726f ("drm/amd/display: Add additional checks for PSP
footer size") introduced a use of an uninitialized stack variable
in dm_dmub_sw_init() (region_params.bss_data_size).

Interestingly, this seems to cause no issue on normal kernels. But when
full LTO is enabled, it causes the compiler to "optimize" out huge
swaths of amdgpu initialization code, and the driver is unusable:

    amdgpu 0000:03:00.0: [drm] Loading DMUB firmware via PSP: version=0x07002F00
    amdgpu 0000:03:00.0: sw_init of IP block <dm> failed 5
    amdgpu 0000:03:00.0: amdgpu_device_ip_init failed
    amdgpu 0000:03:00.0: Fatal error during GPU init

It surprises me that neither gcc nor clang emit a warning about this: I
only found it by bisecting the LTO breakage.

Fix by using the bss_data_size field from fw_meta_info_params, as was
presumably intended.

Fixes: e1b385726f ("drm/amd/display: Add additional checks for PSP footer size")
Signed-off-by: Calvin Owens <calvin@wbinvd.org>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit b7f1402f6ad24cc6b9a01fa09ebd1c6559d787d0)
2026-03-17 12:11:49 -04:00
Jesse.Zhang
6270b1a5da drm/amdgpu: Limit BO list entry count to prevent resource exhaustion
Userspace can pass an arbitrary number of BO list entries via the
bo_number field. Although the previous multiplication overflow check
prevents out-of-bounds allocation, a large number of entries could still
cause excessive memory allocation (up to potentially gigabytes) and
unnecessarily long list processing times.

Introduce a hard limit of 128k entries per BO list, which is more than
sufficient for any realistic use case (e.g., a single list containing all
buffers in a large scene). This prevents memory exhaustion attacks and
ensures predictable performance.

Return -EINVAL if the requested entry count exceeds the limit

Reviewed-by: Christian König <christian.koenig@amd.com>
Suggested-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Jesse Zhang <jesse.zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 688b87d39e0aa8135105b40dc167d74b5ada5332)
Cc: stable@vger.kernel.org
2026-03-17 12:10:16 -04:00
Alex Hung
b49814033c drm/amd/display: Fix gamma 2.2 colorop TFs
Use GAMMA22 for degamma/blend and GAMMA22_INV for shaper so
curves match the color pipeline.

Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/5016
Tested-by: Xaver Hugl <xaver.hugl@kde.org>
Reviewed-by: Melissa Wen <mwen@igalia.com>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit d8f9f42effd767ffa7bbcd7e05fbd6b20737e468)
2026-03-17 12:08:46 -04:00
Christian Brauner
c465f5591a selftests/mount_setattr: increase tmpfs size for idmapped mount tests
The mount_setattr_idmapped fixture mounts a 2 MB tmpfs at /mnt and then
creates a 2 GB sparse ext4 image at /mnt/C/ext4.img. While ftruncate()
succeeds (sparse file), mkfs.ext4 needs to write actual metadata blocks
(inode tables, journal, bitmaps) which easily exceeds the 2 MB tmpfs
limit, causing ENOSPC and failing the fixture setup for all
mount_setattr_idmapped tests.

This was introduced by commit d37d4720c3 ("selftests/mount_settattr:
ensure that ext4 filesystem can be created") which increased the image
size from 2 MB to 2 GB but didn't adjust the tmpfs size.

Bump the tmpfs size to 256 MB which is sufficient for the ext4 metadata.

Fixes: d37d4720c3 ("selftests/mount_settattr: ensure that ext4 filesystem can be created")
Signed-off-by: Christian Brauner <brauner@kernel.org>
2026-03-17 16:59:45 +01:00
Hugo Villeneuve
4221f30e3e regulator: dt-bindings: fix typos in regulator-uv-* descriptions
Remove word "over".

Signed-off-by: Hugo Villeneuve <hvilleneuve@dimonoff.com>
Link: https://patch.msgid.link/20260317152357.3473584-1-hugo@hugovil.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-17 15:47:48 +00:00
Koichiro Den
21647677ba PCI: endpoint: pci-epf-test: Roll back BAR mapping when subrange setup fails
When the BAR subrange mapping test on DWC-based platforms fails due to
insufficient free inbound iATU regions, pci_epf_test_bar_subrange_setup()
returns an error (-ENOSPC) but does not restore the original BAR mapping.
This causes subsequent test runs to become confusing, since the failure may
leave room for the next subrange mapping test to pass.

Fix this by restoring the original BAR mapping when preparation of the
subrange mapping fails, so that no side effect remains regardless of the
test success or failure.

Fixes: 6c5e610142 ("PCI: endpoint: pci-epf-test: Add BAR subrange mapping test support")
Reported-by: Christian Bruel <christian.bruel@foss.st.com>
Closes: https://lore.kernel.org/linux-pci/b2b03ebe-9482-4a13-b22f-7b44da096eed@foss.st.com/
Signed-off-by: Koichiro Den <den@valinux.co.jp>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Tested-by: Christian Bruel <christian.bruel@foss.st.com>
Reviewed-by: Niklas Cassel <cassel@kernel.org>
Reviewed-by: Manivannan Sadhasivam <mani@kernel.org>
Link: https://patch.msgid.link/20260316140225.1481658-1-den@valinux.co.jp
2026-03-17 10:26:42 -05:00
Jonathan Cavitt
67253b28a6 drm/pagemap_util: Ensure proper cache lock management on free
For the sake of consistency, ensure that the cache lock is always
unlocked after drm_pagemap_cache_fini. Spinlocks typically disable
preemption and if the code-path missing the unlock is hit, preemption
will remain disabled even if the lock is subsequently freed.

Fixes static analysis issue.

v2:
- Use requested code flow (Maarten)

v3:
- Clear cache->dpagemap (Matt Brost, Maarten)

v4:
- Reword commit message (Thomas)

Fixes: 77f14f2f2d ("drm/pagemap: Add a drm_pagemap cache and shrinker")
Signed-off-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Thomas Hellstrom <thomas.hellstrom@linux.intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Link: https://patch.msgid.link/20260316151555.7553-2-jonathan.cavitt@intel.com
2026-03-17 15:39:07 +01:00
Vee Satayamas
f200b2f9a8 ASoC: amd: yc: Add DMI quirk for ASUS EXPERTBOOK BM1403CDA
Add a DMI quirk for the Asus Expertbook BM1403CDA to resolve the issue of the
internal microphone not being detected.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=221236
Signed-off-by: Vee Satayamas <vsatayamas@gmail.com>
Reviewed-by: Zhang Heng <zhangheng@kylinos.cn>
Link: https://patch.msgid.link/20260315142511.66029-2-vsatayamas@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-17 14:28:54 +00:00
Alessio Belle
74ef7844dd drm/imagination: Disable interrupts before suspending the GPU
This is an additional safety layer to ensure no accesses to the GPU
registers can be made while it is powered off.

While we can disable IRQ generation from GPU, META firmware, MIPS
firmware and for safety events, we cannot do the same for the RISC-V
firmware.
To keep a unified approach, once the firmware has completed its power
off sequence, disable IRQs for the while GPU at the kernel level
instead.

Signed-off-by: Alessio Belle <alessio.belle@imgtec.com>
Reviewed-by: Matt Coster <matt.coster@imgtec.com>
Link: https://patch.msgid.link/20260310-drain-irqs-before-suspend-v1-2-bf4f9ed68e75@imgtec.com
Signed-off-by: Matt Coster <matt.coster@imgtec.com>
2026-03-17 14:27:42 +00:00
Alessio Belle
2d7f05cddf drm/imagination: Synchronize interrupts before suspending the GPU
The runtime PM suspend callback doesn't know whether the IRQ handler is
in progress on a different CPU core and doesn't wait for it to finish.

Depending on timing, the IRQ handler could be running while the GPU is
suspended, leading to kernel crashes when trying to access GPU
registers. See example signature below.

In a power off sequence initiated by the runtime PM suspend callback,
wait for any IRQ handlers in progress on other CPU cores to finish, by
calling synchronize_irq().

At the same time, remove the runtime PM resume/put calls in the threaded
IRQ handler. On top of not being the right approach to begin with, and
being at the wrong place as they should have wrapped all GPU register
accesses, the driver would hit a deadlock between synchronize_irq()
being called from a runtime PM suspend callback, holding the device
power lock, and the resume callback requiring the same.

Example crash signature on a TI AM68 SK platform:

  [  337.241218] SError Interrupt on CPU0, code 0x00000000bf000000 -- SError
  [  337.241239] CPU: 0 UID: 0 PID: 112 Comm: irq/234-gpu Tainted: G   M                6.17.7-B2C-00005-g9c7bbe4ea16c #2 PREEMPT
  [  337.241246] Tainted: [M]=MACHINE_CHECK
  [  337.241249] Hardware name: Texas Instruments AM68 SK (DT)
  [  337.241252] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
  [  337.241256] pc : pvr_riscv_irq_pending+0xc/0x24
  [  337.241277] lr : pvr_device_irq_thread_handler+0x64/0x310
  [  337.241282] sp : ffff800085b0bd30
  [  337.241284] x29: ffff800085b0bd50 x28: ffff0008070d9eab x27: ffff800083a5ce10
  [  337.241291] x26: ffff000806e48f80 x25: ffff0008070d9eac x24: 0000000000000000
  [  337.241296] x23: ffff0008068e9bf0 x22: ffff0008068e9bd0 x21: ffff800085b0bd30
  [  337.241301] x20: ffff0008070d9e00 x19: ffff0008068e9000 x18: 0000000000000001
  [  337.241305] x17: 637365645f656c70 x16: 0000000000000000 x15: ffff000b7df9ff40
  [  337.241310] x14: 0000a585fe3c0d0e x13: 000000999704f060 x12: 000000000002771a
  [  337.241314] x11: 00000000000000c0 x10: 0000000000000af0 x9 : ffff800085b0bd00
  [  337.241318] x8 : ffff0008071175d0 x7 : 000000000000b955 x6 : 0000000000000003
  [  337.241323] x5 : 0000000000000000 x4 : 0000000000000002 x3 : 0000000000000000
  [  337.241327] x2 : ffff800080e39d20 x1 : ffff800080e3fc48 x0 : 0000000000000000
  [  337.241333] Kernel panic - not syncing: Asynchronous SError Interrupt
  [  337.241337] CPU: 0 UID: 0 PID: 112 Comm: irq/234-gpu Tainted: G   M                6.17.7-B2C-00005-g9c7bbe4ea16c #2 PREEMPT
  [  337.241342] Tainted: [M]=MACHINE_CHECK
  [  337.241343] Hardware name: Texas Instruments AM68 SK (DT)
  [  337.241345] Call trace:
  [  337.241348]  show_stack+0x18/0x24 (C)
  [  337.241357]  dump_stack_lvl+0x60/0x80
  [  337.241364]  dump_stack+0x18/0x24
  [  337.241368]  vpanic+0x124/0x2ec
  [  337.241373]  abort+0x0/0x4
  [  337.241377]  add_taint+0x0/0xbc
  [  337.241384]  arm64_serror_panic+0x70/0x80
  [  337.241389]  do_serror+0x3c/0x74
  [  337.241392]  el1h_64_error_handler+0x30/0x48
  [  337.241400]  el1h_64_error+0x6c/0x70
  [  337.241404]  pvr_riscv_irq_pending+0xc/0x24 (P)
  [  337.241410]  irq_thread_fn+0x2c/0xb0
  [  337.241416]  irq_thread+0x170/0x334
  [  337.241421]  kthread+0x12c/0x210
  [  337.241428]  ret_from_fork+0x10/0x20
  [  337.241434] SMP: stopping secondary CPUs
  [  337.241451] Kernel Offset: disabled
  [  337.241453] CPU features: 0x040000,02002800,20002001,0400421b
  [  337.241456] Memory Limit: none
  [  337.457921] ---[ end Kernel panic - not syncing: Asynchronous SError Interrupt ]---

Fixes: cc1aeedb98 ("drm/imagination: Implement firmware infrastructure and META FW support")
Fixes: 96822d38ff ("drm/imagination: Handle Rogue safety event IRQs")
Cc: stable@vger.kernel.org # see patch description, needs adjustments for < 6.16
Signed-off-by: Alessio Belle <alessio.belle@imgtec.com>
Reviewed-by: Matt Coster <matt.coster@imgtec.com>
Link: https://patch.msgid.link/20260310-drain-irqs-before-suspend-v1-1-bf4f9ed68e75@imgtec.com
Signed-off-by: Matt Coster <matt.coster@imgtec.com>
2026-03-17 14:27:42 +00:00
Alessio Belle
a55c2a5c8d drm/imagination: Fix deadlock in soft reset sequence
The soft reset sequence is currently executed from the threaded IRQ
handler, hence it cannot call disable_irq() which internally waits
for IRQ handlers, i.e. itself, to complete.

Use disable_irq_nosync() during a soft reset instead.

Fixes: cc1aeedb98 ("drm/imagination: Implement firmware infrastructure and META FW support")
Cc: stable@vger.kernel.org
Signed-off-by: Alessio Belle <alessio.belle@imgtec.com>
Reviewed-by: Matt Coster <matt.coster@imgtec.com>
Link: https://patch.msgid.link/20260309-fix-soft-reset-v1-1-121113be554f@imgtec.com
Signed-off-by: Matt Coster <matt.coster@imgtec.com>
2026-03-17 14:27:04 +00:00
Christoph Hellwig
f8b8820a4a fs: clear I_DIRTY_TIME in sync_lazytime
For file systems implementing ->sync_lazytime, I_DIRTY_TIME fails to get
cleared in sync_lazytime, and might cause additional calls to
sync_lazytime during inode deactivation.  Use the same pattern as in
__mark_inode_dirty to clear the flag under the inode lock.

Fixes: 5cf06ea56e ("fs: add a ->sync_lazytime method")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://patch.msgid.link/20260317134409.1691317-1-hch@lst.de
Signed-off-by: Christian Brauner <brauner@kernel.org>
2026-03-17 15:12:15 +01:00
Zenghui Yu (Huawei)
0496acc42f KVM: arm64: Fix the descriptor address in __kvm_at_swap_desc()
Using "(u64 __user *)hva + offset" to get the virtual addresses of S1/S2
descriptors looks really wrong, if offset is not zero. What we want to get
for swapping is hva + offset, not hva + offset*8. ;-)

Fix it.

Fixes: f6927b41d5 ("KVM: arm64: Add helper for swapping guest descriptor")
Signed-off-by: Zenghui Yu (Huawei) <zenghui.yu@linux.dev>
Link: https://patch.msgid.link/20260317115748.47332-1-zenghui.yu@linux.dev
Signed-off-by: Marc Zyngier <maz@kernel.org>
Cc: stable@vger.kernel.org
2026-03-17 13:40:00 +00:00
Joe Damato
ba17de9854 iommu/amd: Block identity domain when SNP enabled
Previously, commit 8388f7df93 ("iommu/amd: Do not support
IOMMU_DOMAIN_IDENTITY after SNP is enabled") prevented users from
changing the IOMMU domain to identity if SNP was enabled.

This resulted in an error when writing to sysfs:

  # echo "identity" > /sys/kernel/iommu_groups/50/type
  -bash: echo: write error: Cannot allocate memory

However, commit 4402f2627d ("iommu/amd: Implement global identity
domain") changed the flow of the code, skipping the SNP guard and
allowing users to change the IOMMU domain to identity after a machine
has booted.

Once the user does that, they will probably try to bind and the
device/driver will start to do DMA which will trigger errors:

  iommu ivhd3: AMD-Vi: Event logged [ILLEGAL_DEV_TABLE_ENTRY device=0000:43:00.0 pasid=0x00000 address=0x3737b01000 flags=0x0020]
  iommu ivhd3: AMD-Vi: Control Reg : 0xc22000142148d
  AMD-Vi: DTE[0]: 6000000000000003
  AMD-Vi: DTE[1]: 0000000000000001
  AMD-Vi: DTE[2]: 2000003088b3e013
  AMD-Vi: DTE[3]: 0000000000000000
  bnxt_en 0000:43:00.0 (unnamed net_device) (uninitialized): Error (timeout: 500015) msg {0x0 0x0} len:0
  iommu ivhd3: AMD-Vi: Event logged [ILLEGAL_DEV_TABLE_ENTRY device=0000:43:00.0 pasid=0x00000 address=0x3737b01000 flags=0x0020]
  iommu ivhd3: AMD-Vi: Control Reg : 0xc22000142148d
  AMD-Vi: DTE[0]: 6000000000000003
  AMD-Vi: DTE[1]: 0000000000000001
  AMD-Vi: DTE[2]: 2000003088b3e013
  AMD-Vi: DTE[3]: 0000000000000000
  bnxt_en 0000:43:00.0: probe with driver bnxt_en failed with error -16

To prevent this from happening, create an attach wrapper for
identity_domain_ops which returns EINVAL if amd_iommu_snp_en is true.

With this commit applied:

  # echo "identity" > /sys/kernel/iommu_groups/62/type
  -bash: echo: write error: Invalid argument

Fixes: 4402f2627d ("iommu/amd: Implement global identity domain")
Signed-off-by: Joe Damato <joe@dama.to>
Reviewed-by: Vasant Hegde <vasant.hegde@amd.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
2026-03-17 14:02:02 +01:00
Lizhi Hou
06e14c36e2 iommu/sva: Fix crash in iommu_sva_unbind_device()
domain->mm->iommu_mm can be freed by iommu_domain_free():
  iommu_domain_free()
    mmdrop()
      __mmdrop()
        mm_pasid_drop()
After iommu_domain_free() returns, accessing domain->mm->iommu_mm may
dereference a freed mm structure, leading to a crash.

Fix this by moving the code that accesses domain->mm->iommu_mm to before
the call to iommu_domain_free().

Fixes: e37d5a2d60 ("iommu/sva: invalidate stale IOTLB entries for kernel address space")
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
Reviewed-by: Vasant Hegde <vasant.hegde@amd.com>
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
2026-03-17 14:00:36 +01:00
Randy Dunlap
45c6a2dc7e iommu/io-pgtable: fix all kernel-doc warnings in io-pgtable.h
Avoid kernel-doc warnings in io-pgtable.h:
- use the correct struct member names or kernel-doc format
- add a missing struct member description
- add a missing function return comment section

Warning: include/linux/io-pgtable.h:187 struct member 'coherent_walk' not
 described in 'io_pgtable_cfg'
Warning: include/linux/io-pgtable.h:187 struct member 'arm_lpae_s1_cfg' not
 described in 'io_pgtable_cfg'
Warning: include/linux/io-pgtable.h:187 struct member 'arm_lpae_s2_cfg' not
 described in 'io_pgtable_cfg'
Warning: include/linux/io-pgtable.h:187 struct member 'arm_v7s_cfg' not
 described in 'io_pgtable_cfg'
Warning: include/linux/io-pgtable.h:187 struct member 'arm_mali_lpae_cfg'
 not described in 'io_pgtable_cfg'
Warning: include/linux/io-pgtable.h:187 struct member 'apple_dart_cfg' not
 described in 'io_pgtable_cfg'
Warning: include/linux/io-pgtable.h:187 struct member 'amd' not described
 in 'io_pgtable_cfg'
Warning: include/linux/io-pgtable.h:223 struct member
 'read_and_clear_dirty' not described in 'io_pgtable_ops'
Warning: include/linux/io-pgtable.h:237 No description found for return
 value of 'alloc_io_pgtable_ops'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
2026-03-17 13:58:45 +01:00
Nikola Z. Ivanov
069c8f5aeb net: usb: aqc111: Do not perform PM inside suspend callback
syzbot reports "task hung in rpm_resume"

This is caused by aqc111_suspend calling
the PM variant of its write_cmd routine.

The simplified call trace looks like this:

rpm_suspend()
  usb_suspend_both() - here udev->dev.power.runtime_status == RPM_SUSPENDING
    aqc111_suspend() - called for the usb device interface
      aqc111_write32_cmd()
        usb_autopm_get_interface()
          pm_runtime_resume_and_get()
            rpm_resume() - here we call rpm_resume() on our parent
              rpm_resume() - Here we wait for a status change that will never happen.

At this point we block another task which holds
rtnl_lock and locks up the whole networking stack.

Fix this by replacing the write_cmd calls with their _nopm variants

Reported-by: syzbot+48dc1e8dfc92faf1124c@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=48dc1e8dfc92faf1124c
Fixes: e58ba4544c ("net: usb: aqc111: Add support for wake on LAN by MAGIC packet")
Signed-off-by: Nikola Z. Ivanov <zlatistiv@gmail.com>
Link: https://patch.msgid.link/20260313141643.1181386-1-zlatistiv@gmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-17 13:36:12 +01:00
Antheas Kapenekakis
0a4d00e2e9 iommu: Fix mapping check for 0x0 to avoid re-mapping it
Commit 789a5913b2 ("iommu/amd: Use the generic iommu page table")
introduces the shared iommu page table for AMD IOMMU. Some bioses
contain an identity mapping for address 0x0, which is not parsed
properly (e.g., certain Strix Halo devices). This causes the DMA
components of the device to fail to initialize (e.g., the NVMe SSD
controller), leading to a failed post.

Specifically, on the GPD Win 5, the NVME and SSD GPU fail to mount,
making collecting errors difficult. While debugging, it was found that
a -EADDRINUSE error was emitted and its source was traced to
iommu_iova_to_phys(). After adding some debug prints, it was found that
phys_addr becomes 0, which causes the code to try to re-map the 0
address and fail, causing a cascade leading to a failed post. This is
because the GPD Win 5 contains a 0x0-0x1 identity mapping for DMA
devices, causing it to be repeated for each device.

The cause of this failure is the following check in
iommu_create_device_direct_mappings(), where address aliasing is handled
via the following check:

```
phys_addr = iommu_iova_to_phys(domain, addr);
if (!phys_addr) {
        map_size += pg_size;
        continue;
}
````

Obviously, the iommu_iova_to_phys() signature is faulty and aliases
unmapped and 0 together, causing the allocation code to try to
re-allocate the 0 address per device. However, it has too many
instantiations to fix. Therefore, use a ternary so that when addr
is 0, the check is done for address 1 instead.

Suggested-by: Robin Murphy <robin.murphy@arm.com>
Fixes: 789a5913b2 ("iommu/amd: Use the generic iommu page table")
Signed-off-by: Antheas Kapenekakis <lkml@antheas.dev>
Reviewed-by: Vasant Hegde <vasant.hegde@amd.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
2026-03-17 13:33:33 +01:00
Sebastian Reichel
4eae391a8e ASoC: dt-bindings: rockchip: Add compatible for RK3576 SPDIF
Add a compatible string for SPDIF on RK3576, which is similar to the
one on RK3568.

Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Link: https://patch.msgid.link/20260316-rk3576-spdif-v1-1-acb75088b560@collabora.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-17 12:30:51 +00:00
Lu Baolu
39c20c4e83 iommu/vt-d: Only handle IOPF for SVA when PRI is supported
In intel_svm_set_dev_pasid(), the driver unconditionally manages the IOPF
handling during a domain transition. However, commit a86fb77173
("iommu/vt-d: Allow SVA with device-specific IOPF") introduced support for
SVA on devices that handle page faults internally without utilizing the
PCI PRI. On such devices, the IOMMU-side IOPF infrastructure is not
required. Calling iopf_for_domain_replace() on these devices is incorrect
and can lead to unexpected failures during PASID attachment or unwinding.

Add a check for info->pri_supported to ensure that the IOPF queue logic
is only invoked for devices that actually rely on the IOMMU's PRI-based
fault handling.

Fixes: 17fce9d233 ("iommu/vt-d: Put iopf enablement in domain attach path")
Cc: stable@vger.kernel.org
Suggested-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Link: https://lore.kernel.org/r/20260310075520.295104-1-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
2026-03-17 13:20:06 +01:00
Guanghui Feng
fe89277c9c iommu/vt-d: Fix intel iommu iotlb sync hardlockup and retry
During the qi_check_fault process after an IOMMU ITE event, requests at
odd-numbered positions in the queue are set to QI_ABORT, only satisfying
single-request submissions. However, qi_submit_sync now supports multiple
simultaneous submissions, and can't guarantee that the wait_desc will be
at an odd-numbered position. Therefore, if an item times out, IOMMU can't
re-initiate the request, resulting in an infinite polling wait.

This modifies the process by setting the status of all requests already
fetched by IOMMU and recorded as QI_IN_USE status (including wait_desc
requests) to QI_ABORT, thus enabling multiple requests to be resubmitted.

Fixes: 8a1d824625 ("iommu/vt-d: Multiple descriptors per qi_submit_sync()")
Cc: stable@vger.kernel.org
Signed-off-by: Guanghui Feng <guanghuifeng@linux.alibaba.com>
Tested-by: Shuai Xue <xueshuai@linux.alibaba.com>
Reviewed-by: Shuai Xue <xueshuai@linux.alibaba.com>
Reviewed-by: Samiullah Khawaja <skhawaja@google.com>
Link: https://lore.kernel.org/r/20260306101516.3885775-1-guanghuifeng@linux.alibaba.com
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Fixes: 8a1d824625 ("iommu/vt-d: Multiple descriptors per  qi_submit_sync()")
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
2026-03-17 13:20:06 +01:00
Daniel Borkmann
a0671125d4 clsact: Fix use-after-free in init/destroy rollback asymmetry
Fix a use-after-free in the clsact qdisc upon init/destroy rollback asymmetry.
The latter is achieved by first fully initializing a clsact instance, and
then in a second step having a replacement failure for the new clsact qdisc
instance. clsact_init() initializes ingress first and then takes care of the
egress part. This can fail midway, for example, via tcf_block_get_ext(). Upon
failure, the kernel will trigger the clsact_destroy() callback.

Commit 1cb6f0bae5 ("bpf: Fix too early release of tcx_entry") details the
way how the transition is happening. If tcf_block_get_ext on the q->ingress_block
ends up failing, we took the tcx_miniq_inc reference count on the ingress
side, but not yet on the egress side. clsact_destroy() tests whether the
{ingress,egress}_entry was non-NULL. However, even in midway failure on the
replacement, both are in fact non-NULL with a valid egress_entry from the
previous clsact instance.

What we really need to test for is whether the qdisc instance-specific ingress
or egress side previously got initialized. This adds a small helper for checking
the miniq initialization called mini_qdisc_pair_inited, and utilizes that upon
clsact_destroy() in order to fix the use-after-free scenario. Convert the
ingress_destroy() side as well so both are consistent to each other.

Fixes: 1cb6f0bae5 ("bpf: Fix too early release of tcx_entry")
Reported-by: Keenan Dong <keenanat2000@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Martin KaFai Lau <martin.lau@kernel.org>
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://patch.msgid.link/20260313065531.98639-1-daniel@iogearbox.net
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-17 12:09:16 +01:00
Tomi Valkeinen
a17ce4bc6f dmaengine: xilinx_dma: Fix reset related timeout with two-channel AXIDMA
A single AXIDMA controller can have one or two channels. When it has two
channels, the reset for both are tied together: resetting one channel
resets the other as well. This creates a problem where resetting one
channel will reset the registers for both channels, including clearing
interrupt enable bits for the other channel, which can then lead  to
timeouts as the driver is waiting for an interrupt which never comes.

The driver currently has a probe-time work around for this: when a
channel is created, the driver also resets and enables the
interrupts. With two channels the reset for the second channel will
clear the interrupt enables for the first one. The work around in the
driver is just to manually enable the interrupts again in
xilinx_dma_alloc_chan_resources().

This workaround only addresses the probe-time issue. When channels are
reset at runtime (e.g., in xilinx_dma_terminate_all() or during error
recovery), there's no corresponding mechanism to restore the other
channel's interrupt enables. This leads to one channel having its
interrupts disabled while the driver expects them to work, causing
timeouts and DMA failures.

A proper fix is a complicated matter, as we should not reset the other
channel when it's operating normally. So, perhaps, there should be some
kind of synchronization for a common reset, which is not trivial to
implement. To add to the complexity, the driver also supports other DMA
types, like VDMA, CDMA and MCDMA, which don't have a shared reset.

However, when the two-channel AXIDMA is used in the (assumably) normal
use case, providing DMA for a single memory-to-memory device, the common
reset is a bit smaller issue: when something bad happens on one channel,
or when one channel is terminated, the assumption is that we also want
to terminate the other channel. And thus resetting both at the same time
is "ok".

With that line of thinking we can implement a bit better work around
than just the current probe time work around: let's enable the
AXIDMA interrupts at xilinx_dma_start_transfer() instead.
This ensures interrupts are enabled whenever a transfer starts,
regardless of any prior resets that may have cleared them.

This approach is also more logical: enable interrupts only when needed
for a transfer, rather than at resource allocation time, and, I think,
all the other DMA types should also use this model, but I'm reluctant to
do such changes as I cannot test them.

The reset function still enables interrupts even though it's not needed
for AXIDMA anymore, but it's common code for all DMA types (VDMA, CDMA,
MCDMA), so leave it unchanged to avoid affecting other variants.

Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Fixes: c0bba3a99f ("dmaengine: vdma: Add Support for Xilinx AXI Direct Memory Access Engine")
Link: https://patch.msgid.link/20260311-xilinx-dma-fix-v2-1-a725abb66e3c@ideasonboard.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
2026-03-17 16:33:32 +05:30
Marek Vasut
c7d812e33f dmaengine: xilinx: xilinx_dma: Fix unmasked residue subtraction
The segment .control and .status fields both contain top bits which are
not part of the buffer size, the buffer size is located only in the bottom
max_buffer_len bits. To avoid interference from those top bits, mask out
the size using max_buffer_len first, and only then subtract the values.

Fixes: a575d0b4e6 ("dmaengine: xilinx_dma: Introduce xilinx_dma_get_residue")
Signed-off-by: Marek Vasut <marex@nabladev.com>
Link: https://patch.msgid.link/20260316222530.163815-1-marex@nabladev.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
2026-03-17 16:14:34 +05:30
Marek Vasut
f61d145999 dmaengine: xilinx: xilinx_dma: Fix residue calculation for cyclic DMA
The cyclic DMA calculation is currently entirely broken and reports
residue only for the first segment. The problem is twofold.

First, when the first descriptor finishes, it is moved from active_list
to done_list, but it is never returned back into the active_list. The
xilinx_dma_tx_status() expects the descriptor to be in the active_list
to report any meaningful residue information, which never happens after
the first descriptor finishes. Fix this up in xilinx_dma_start_transfer()
and if the descriptor is cyclic, lift it from done_list and place it back
into active_list list.

Second, the segment .status fields of the descriptor remain dirty. Once
the DMA did one pass on the descriptor, the .status fields are populated
with data by the DMA, but the .status fields are not cleared before reuse
during the next cyclic DMA round. The xilinx_dma_get_residue() recognizes
that as if the descriptor was complete and had 0 residue, which is bogus.
Reinitialize the status field before placing the descriptor back into the
active_list.

Fixes: c0bba3a99f ("dmaengine: vdma: Add Support for Xilinx AXI Direct Memory Access Engine")
Signed-off-by: Marek Vasut <marex@nabladev.com>
Link: https://patch.msgid.link/20260316221943.160375-1-marex@nabladev.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
2026-03-17 16:14:20 +05:30
Marek Vasut
e9cc95397b dmaengine: xilinx: xilinx_dma: Fix dma_device directions
Unlike chan->direction , struct dma_device .directions field is a
bitfield. Turn chan->direction into a bitfield to make it compatible
with struct dma_device .directions .

Fixes: 7e01511443 ("dmaengine: xilinx_dma: Set dma_device directions")
Signed-off-by: Marek Vasut <marex@nabladev.com>
Link: https://patch.msgid.link/20260316221728.160139-1-marex@nabladev.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
2026-03-17 16:14:06 +05:30
Paul Moses
d849a2f730 xfrm: iptfs: only publish mode_data after clone setup
iptfs_clone_state() stores x->mode_data before allocating the reorder
window. If that allocation fails, the code frees the cloned state and
returns -ENOMEM, leaving x->mode_data pointing at freed memory.

The xfrm clone unwind later runs destroy_state() through x->mode_data,
so the failed clone path tears down IPTFS state that clone_state()
already freed.

Keep the cloned IPTFS state private until all allocations succeed so
failed clones leave x->mode_data unset. The destroy path already
handles a NULL mode_data pointer.

Fixes: 6be02e3e4f ("xfrm: iptfs: handle reordering of received packets")
Cc: stable@vger.kernel.org
Signed-off-by: Paul Moses <p@1g4.org>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2026-03-17 11:43:14 +01:00
ZhengYuan Huang
b17b79ff89 btrfs: reject root items with drop_progress and zero drop_level
[BUG]
When recovering relocation at mount time, merge_reloc_root() and
btrfs_drop_snapshot() both use BUG_ON(level == 0) to guard against
an impossible state: a non-zero drop_progress combined with a zero
drop_level in a root_item, which can be triggered:

------------[ cut here ]------------
kernel BUG at fs/btrfs/relocation.c:1545!
Oops: invalid opcode: 0000 [#1] SMP KASAN NOPTI
CPU: 1 UID: 0 PID: 283 ... Tainted: 6.18.0+ #16 PREEMPT(voluntary)
Tainted: [O]=OOT_MODULE, [E]=UNSIGNED_MODULE
Hardware name: QEMU Ubuntu 24.04 PC v2, BIOS 1.16.3-debian-1.16.3-2
RIP: 0010:merge_reloc_root+0x1266/0x1650 fs/btrfs/relocation.c:1545
Code: ffff0000 00004589 d7e9acfa ffffe8a1 79bafebe 02000000
Call Trace:
 merge_reloc_roots+0x295/0x890 fs/btrfs/relocation.c:1861
 btrfs_recover_relocation+0xd6e/0x11d0 fs/btrfs/relocation.c:4195
 btrfs_start_pre_rw_mount+0xa4d/0x1810 fs/btrfs/disk-io.c:3130
 open_ctree+0x5824/0x5fe0 fs/btrfs/disk-io.c:3640
 btrfs_fill_super fs/btrfs/super.c:987 [inline]
 btrfs_get_tree_super fs/btrfs/super.c:1951 [inline]
 btrfs_get_tree_subvol fs/btrfs/super.c:2094 [inline]
 btrfs_get_tree+0x111c/0x2190 fs/btrfs/super.c:2128
 vfs_get_tree+0x9a/0x370 fs/super.c:1758
 fc_mount fs/namespace.c:1199 [inline]
 do_new_mount_fc fs/namespace.c:3642 [inline]
 do_new_mount fs/namespace.c:3718 [inline]
 path_mount+0x5b8/0x1ea0 fs/namespace.c:4028
 do_mount fs/namespace.c:4041 [inline]
 __do_sys_mount fs/namespace.c:4229 [inline]
 __se_sys_mount fs/namespace.c:4206 [inline]
 __x64_sys_mount+0x282/0x320 fs/namespace.c:4206
 ...
RIP: 0033:0x7f969c9a8fde
Code: 0f1f4000 48c7c2b0 fffffff7 d8648902 b8ffffff ffc3660f
---[ end trace 0000000000000000 ]---

The bug is reproducible on 7.0.0-rc2-next-20260310 with our dynamic
metadata fuzzing tool that corrupts btrfs metadata at runtime.

[CAUSE]
A non-zero drop_progress.objectid means an interrupted
btrfs_drop_snapshot() left a resume point on disk, and in that case
drop_level must be greater than 0 because the checkpoint is only
saved at internal node levels.

Although this invariant is enforced when the kernel writes the root
item, it is not validated when the root item is read back from disk.
That allows on-disk corruption to provide an invalid state with
drop_progress.objectid != 0 and drop_level == 0.

When relocation recovery later processes such a root item,
merge_reloc_root() reads drop_level and hits BUG_ON(level == 0). The
same invalid metadata can also trigger the corresponding BUG_ON() in
btrfs_drop_snapshot().

[FIX]
Fix this by validating the root_item invariant in tree-checker when
reading root items from disk: if drop_progress.objectid is non-zero,
drop_level must also be non-zero. Reject such malformed metadata with
-EUCLEAN before it reaches merge_reloc_root() or btrfs_drop_snapshot()
and triggers the BUG_ON.

After the fix, the same corruption is correctly rejected by tree-checker
and the BUG_ON is no longer triggered.

Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: ZhengYuan Huang <gality369@gmail.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-03-17 11:43:08 +01:00
Mark Harmstone
adbb0ebacc btrfs: check block group before marking it unused in balance_remap_chunks()
Fix a potential segfault in balance_remap_chunks(): if we quit early
because btrfs_inc_block_group_ro() fails, all the remaining items in the
chunks list will still have their bg value set to NULL. It's thus not
safe to dereference this pointer without checking first.

Reported-by: Chris Mason <clm@fb.com>
Link: https://lore.kernel.org/linux-btrfs/20260125120717.1578828-1-clm@meta.com/
Fixes: 81e5a4551c ("btrfs: allow balancing remap tree")
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Mark Harmstone <mark@harmstone.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-03-17 11:43:08 +01:00
Mark Harmstone
057495ccc0 btrfs: hold block group reference during entire move_existing_remap()
There is a potential use-after-free in move_existing_remap(): we're calling
btrfs_put_block_group() on dest_bg, then passing it to
btrfs_add_block_group_free_space() a few lines later.

Fix this by getting the BG at the start of the function and putting it
near the end. This also means we're not doing a lookup twice for the
same thing.

Reported-by: Chris Mason <clm@fb.com>
Link: https://lore.kernel.org/linux-btrfs/20260125123908.2096548-1-clm@meta.com/
Fixes: bbea42dfb9 ("btrfs: move existing remaps before relocating block group")
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Mark Harmstone <mark@harmstone.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-03-17 11:43:08 +01:00
Qu Wenruo
5118130e72 btrfs: fix an incorrect ASSERT() condition inside lzo_decompress_bio()
[BUG]
When running btrfs/284 with 64K page size and 4K fs block size, it
crashes with the following ASSERT() triggered:

  BTRFS info (device dm-3): use lzo compression, level 1
  assertion failed: folio_size(fi.folio) == sectorsize :: 0, in lzo.c:450
  ------------[ cut here ]------------
  kernel BUG at lzo.c:450!
  Internal error: Oops - BUG: 00000000f2000800 [#1]  SMP
  CPU: 4 UID: 0 PID: 329 Comm: kworker/u37:2 Tainted: G           OE       6.19.0-rc8-custom+ #185 PREEMPT(voluntary)
  Hardware name: QEMU KVM Virtual Machine, BIOS unknown 2/2/2022
  Workqueue: btrfs-endio simple_end_io_work [btrfs]
  pc : lzo_decompress_bio+0x61c/0x630 [btrfs]
  lr : lzo_decompress_bio+0x61c/0x630 [btrfs]
  Call trace:
   lzo_decompress_bio+0x61c/0x630 [btrfs] (P)
   end_bbio_compressed_read+0x2a8/0x2c0 [btrfs]
   btrfs_bio_end_io+0xc4/0x258 [btrfs]
   btrfs_check_read_bio+0x424/0x7e0 [btrfs]
   simple_end_io_work+0x40/0xa8 [btrfs]
   process_one_work+0x168/0x3f0
   worker_thread+0x25c/0x398
   kthread+0x154/0x250
   ret_from_fork+0x10/0x20
  Code: 912a2021 b0000e00 91246000 940244e9 (d4210000)
  ---[ end trace 0000000000000000 ]---

[CAUSE]
Commit 37cc07cab7 ("btrfs: lzo: use folio_iter to handle
lzo_decompress_bio()") added the ASSERT() to make sure the folio size
matches the fs block size.

But the check is completely wrong, the original intention is to make
sure for bs > ps cases, we always got a large folio that covers a full fs
block.

However for bs < ps cases, a folio can never be smaller than page size,
and the ASSERT() gets triggered immediately.

[FIX]
Check the folio size against @min_folio_size instead, which will never
be smaller than PAGE_SIZE, and still cover bs > ps cases.

Fixes: 37cc07cab7 ("btrfs: lzo: use folio_iter to handle lzo_decompress_bio()")
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-03-17 11:43:08 +01:00
Qu Wenruo
96a2d23589 btrfs: fix an incorrect ASSERT() condition inside zstd_decompress_bio()
[BUG]
When running btrfs/284 with 64K page size and 4K fs block size, it
crashes with the following ASSERT() triggered:

  assertion failed: folio_size(fi.folio) == blocksize :: 0, in fs/btrfs/zstd.c:603
  ------------[ cut here ]------------
  kernel BUG at fs/btrfs/zstd.c:603!
  Internal error: Oops - BUG: 00000000f2000800 [#1]  SMP
  CPU: 2 UID: 0 PID: 1183 Comm: kworker/u35:4 Not tainted 6.19.0-rc8-custom+ #185 PREEMPT(voluntary)
  Hardware name: QEMU KVM Virtual Machine, BIOS unknown 2/2/2022
  Workqueue: btrfs-endio simple_end_io_work [btrfs]
  pc : zstd_decompress_bio+0x4f0/0x508 [btrfs]
  lr : zstd_decompress_bio+0x4f0/0x508 [btrfs]
  Call trace:
   zstd_decompress_bio+0x4f0/0x508 [btrfs] (P)
   end_bbio_compressed_read+0x260/0x2c0 [btrfs]
   btrfs_bio_end_io+0xc4/0x258 [btrfs]
   btrfs_check_read_bio+0x424/0x7e0 [btrfs]
   simple_end_io_work+0x40/0xa8 [btrfs]
   process_one_work+0x168/0x3f0
   worker_thread+0x25c/0x398
   kthread+0x154/0x250
   ret_from_fork+0x10/0x20
  ---[ end trace 0000000000000000 ]---

[CAUSE]
Commit 1914b94231 ("btrfs: zstd: use folio_iter to handle
zstd_decompress_bio()") added the ASSERT() to make sure the folio size
matches the fs block size.

But the check is completely wrong, the original intention is to make
sure for bs > ps cases, we always got a large folio that covers a full fs
block.

However for bs < ps cases, a folio can never be smaller than page size,
and the ASSERT() gets triggered immediately.

[FIX]
Check the folio size against @min_folio_size instead, which will never
be smaller than PAGE_SIZE, and still cover bs > ps cases.

Fixes: 1914b94231 ("btrfs: zstd: use folio_iter to handle zstd_decompress_bio()")
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-03-17 11:43:08 +01:00
Qu Wenruo
3adf8f1415 btrfs: do not touch page cache for encoded writes
[BUG]
When running btrfs/284, the following ASSERT() will be triggered with
64K page size and 4K fs block size:

  assertion failed: folio_test_writeback(folio) :: 0, in subpage.c:476
  ------------[ cut here ]------------
  kernel BUG at subpage.c:476!
  Internal error: Oops - BUG: 00000000f2000800 [#1]  SMP
  CPU: 4 UID: 0 PID: 2313 Comm: kworker/u37:2 Tainted: G           OE       6.19.0-rc8-custom+ #185 PREEMPT(voluntary)
  Hardware name: QEMU KVM Virtual Machine, BIOS unknown 2/2/2022
  Workqueue: btrfs-endio simple_end_io_work [btrfs]
  pc : btrfs_subpage_clear_writeback+0x148/0x160 [btrfs]
  lr : btrfs_subpage_clear_writeback+0x148/0x160 [btrfs]
  Call trace:
   btrfs_subpage_clear_writeback+0x148/0x160 [btrfs] (P)
   btrfs_folio_clamp_clear_writeback+0xb4/0xd0 [btrfs]
   end_compressed_writeback+0xe0/0x1e0 [btrfs]
   end_bbio_compressed_write+0x1e8/0x218 [btrfs]
   btrfs_bio_end_io+0x108/0x258 [btrfs]
   simple_end_io_work+0x68/0xa8 [btrfs]
   process_one_work+0x168/0x3f0
   worker_thread+0x25c/0x398
   kthread+0x154/0x250
   ret_from_fork+0x10/0x20
  ---[ end trace 0000000000000000 ]---

[CAUSE]
The offending bio is from an encoded write, where the compressed data is
directly written as a data extent, without touching the page cache.

However the encoded write still utilizes the regular buffered write path
for compressed data, by setting the compressed_bio::writeback flag.

When that flag is set, at end_bbio_compressed_write() btrfs will go
clearing the writeback flag of the folios in the page cache.

However for bs < ps cases, the subpage helper has one extra check to make
sure the folio has a writeback flag set in the first place.

But since it's an encoded write, we never go through page
cache, thus the folio has no writeback flag and triggers the ASSERT().

[FIX]
Do not set compressed_bio::writeback flag for encoded writes, and change
the ASSERT() in btrfs_submit_compressed_write() to make sure that flag
is not set.

Fixes: e1bc83f8b1 ("btrfs: get rid of compressed_folios[] usage for encoded writes")
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-03-17 11:43:07 +01:00
Qu Wenruo
65ee606138 btrfs: fix a bug that makes encoded write bio larger than expected
[BUG]
When running btrfs/284 with 64K page size and 4K fs block size, the
following ASSERT() can be triggered:

  assertion failed: cb->bbio.bio.bi_iter.bi_size == disk_num_bytes :: 0, in inode.c:9991
  ------------[ cut here ]------------
  kernel BUG at inode.c:9991!
  Internal error: Oops - BUG: 00000000f2000800 [#1]  SMP
  CPU: 5 UID: 0 PID: 6787 Comm: btrfs Tainted: G           OE       6.19.0-rc8-custom+ #1 PREEMPT(voluntary)
  Hardware name: QEMU KVM Virtual Machine, BIOS unknown 2/2/2022
  pc : btrfs_do_encoded_write+0x9b0/0x9c0 [btrfs]
  lr : btrfs_do_encoded_write+0x9b0/0x9c0 [btrfs]
  Call trace:
   btrfs_do_encoded_write+0x9b0/0x9c0 [btrfs] (P)
   btrfs_do_write_iter+0x1d8/0x208 [btrfs]
   btrfs_ioctl_encoded_write+0x3c8/0x6d0 [btrfs]
   btrfs_ioctl+0xeb0/0x2b60 [btrfs]
   __arm64_sys_ioctl+0xac/0x110
   invoke_syscall.constprop.0+0x64/0xe8
   el0_svc_common.constprop.0+0x40/0xe8
   do_el0_svc+0x24/0x38
   el0_svc+0x3c/0x1b8
   el0t_64_sync_handler+0xa0/0xe8
   el0t_64_sync+0x1a4/0x1a8
  Code: 91180021 90001080 9111a000 94039d54 (d4210000)
  ---[ end trace 0000000000000000 ]---

[CAUSE]
After commit e1bc83f8b1 ("btrfs: get rid of compressed_folios[] usage
for encoded writes"), the encoded write is changed to copy the content
from the iov into a folio, and queue the folio into the compressed bio.

However we always queue the full folio into the compressed bio, which
can make the compressed bio larger than the on-disk extent, if the folio
size is larger than the fs block size.

Although we have an ASSERT() to catch such problem, for kernels without
CONFIG_BTRFS_ASSERT, such larger than expected bio will just be
submitted, possibly overwrite the next data extent, causing data
corruption.

[FIX]
Instead of blindly queuing the full folio into the compressed bio, only
queue the rounded up range, which is the old behavior before that
offending commit.
This also means we no longer need to zero the tailing range until the
folio end (but still to the block boundary), as such range will not be
submitted anyway.

And since we're here, add a final ASSERT() into
btrfs_submit_compressed_write() as the last safety net for kernels with
btrfs assertions enabled

Fixes: e1bc83f8b1 ("btrfs: get rid of compressed_folios[] usage for encoded writes")
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-03-17 11:43:07 +01:00
Filipe Manana
f9a4e3015d btrfs: reserve enough transaction items for qgroup ioctls
Currently our qgroup ioctls don't reserve any space, they just do a
transaction join, which does not reserve any space, neither for the quota
tree updates nor for the delayed refs generated when updating the quota
tree. The quota root uses the global block reserve, which is fine most of
the time since we don't expect a lot of updates to the quota root, or to
be too close to -ENOSPC such that other critical metadata updates need to
resort to the global reserve.

However this is not optimal, as not reserving proper space may result in a
transaction abort due to not reserving space for delayed refs and then
abusing the use of the global block reserve.

For example, the following reproducer (which is unlikely to model any
real world use case, but just to illustrate the problem), triggers such a
transaction abort due to -ENOSPC when running delayed refs:

  $ cat test.sh
  #!/bin/bash

  DEV=/dev/nullb0
  MNT=/mnt/nullb0

  umount $DEV &> /dev/null
  # Limit device to 1G so that it's much faster to reproduce the issue.
  mkfs.btrfs -f -b 1G $DEV
  mount -o commit=600 $DEV $MNT

  fallocate -l 800M $MNT/filler
  btrfs quota enable $MNT

  for ((i = 1; i <= 400000; i++)); do
      btrfs qgroup create 1/$i $MNT
  done

  umount $MNT

When running this, we can see in dmesg/syslog that a transaction abort
happened:

  [436.490] BTRFS error (device nullb0): failed to run delayed ref for logical 30408704 num_bytes 16384 type 176 action 1 ref_mod 1: -28
  [436.493] ------------[ cut here ]------------
  [436.494] BTRFS: Transaction aborted (error -28)
  [436.495] WARNING: fs/btrfs/extent-tree.c:2247 at btrfs_run_delayed_refs+0xd9/0x110 [btrfs], CPU#4: umount/2495372
  [436.497] Modules linked in: btrfs loop (...)
  [436.508] CPU: 4 UID: 0 PID: 2495372 Comm: umount Tainted: G        W           6.19.0-rc8-btrfs-next-225+ #1 PREEMPT(full)
  [436.510] Tainted: [W]=WARN
  [436.511] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.2-0-gea1b7a073390-prebuilt.qemu.org 04/01/2014
  [436.513] RIP: 0010:btrfs_run_delayed_refs+0xdf/0x110 [btrfs]
  [436.514] Code: 0f 82 ea (...)
  [436.518] RSP: 0018:ffffd511850b7d78 EFLAGS: 00010292
  [436.519] RAX: 00000000ffffffe4 RBX: ffff8f120dad37e0 RCX: 0000000002040001
  [436.520] RDX: 0000000000000002 RSI: 00000000ffffffe4 RDI: ffffffffc090fd80
  [436.522] RBP: 0000000000000000 R08: 0000000000000001 R09: ffffffffc04d1867
  [436.523] R10: ffff8f18dc1fffa8 R11: 0000000000000003 R12: ffff8f173aa89400
  [436.524] R13: 0000000000000000 R14: ffff8f173aa89400 R15: 0000000000000000
  [436.526] FS:  00007fe59045d840(0000) GS:ffff8f192e22e000(0000) knlGS:0000000000000000
  [436.527] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  [436.528] CR2: 00007fe5905ff2b0 CR3: 000000060710a002 CR4: 0000000000370ef0
  [436.530] Call Trace:
  [436.530]  <TASK>
  [436.530]  btrfs_commit_transaction+0x73/0xc00 [btrfs]
  [436.531]  ? btrfs_attach_transaction_barrier+0x1e/0x70 [btrfs]
  [436.532]  sync_filesystem+0x7a/0x90
  [436.533]  generic_shutdown_super+0x28/0x180
  [436.533]  kill_anon_super+0x12/0x40
  [436.534]  btrfs_kill_super+0x12/0x20 [btrfs]
  [436.534]  deactivate_locked_super+0x2f/0xb0
  [436.534]  cleanup_mnt+0xea/0x180
  [436.535]  task_work_run+0x58/0xa0
  [436.535]  exit_to_user_mode_loop+0xed/0x480
  [436.536]  ? __x64_sys_umount+0x68/0x80
  [436.536]  do_syscall_64+0x2a5/0xf20
  [436.537]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
  [436.537] RIP: 0033:0x7fe5906b6217
  [436.538] Code: 0d 00 f7 (...)
  [436.540] RSP: 002b:00007ffcd87a61f8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6
  [436.541] RAX: 0000000000000000 RBX: 00005618b9ecadc8 RCX: 00007fe5906b6217
  [436.541] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 00005618b9ecb100
  [436.542] RBP: 0000000000000000 R08: 00007ffcd87a4fe0 R09: 00000000ffffffff
  [436.544] R10: 0000000000000103 R11: 0000000000000246 R12: 00007fe59081626c
  [436.544] R13: 00005618b9ecb100 R14: 0000000000000000 R15: 00005618b9ecacc0
  [436.545]  </TASK>
  [436.545] ---[ end trace 0000000000000000 ]---

Fix this by changing the qgroup ioctls to use start transaction instead of
joining so that proper space is reserved for the delayed refs generated
for the updates to the quota root. This way we don't get any transaction
abort.

Reviewed-by: Boris Burkov <boris@bur.io>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-03-17 11:43:07 +01:00
Claudiu Beznea
89a8567d84 dmaengine: sh: rz-dmac: Move CHCTRL updates under spinlock
Both rz_dmac_disable_hw() and rz_dmac_irq_handle_channel() update the
CHCTRL register. To avoid concurrency issues when configuring
functionalities exposed by this registers, take the virtual channel lock.
All other CHCTRL updates were already protected by the same lock.

Previously, rz_dmac_disable_hw() disabled and re-enabled local IRQs, before
accessing CHCTRL registers but this does not ensure race-free access.
Remove the local IRQ disable/enable code as well.

Fixes: 5000d37042 ("dmaengine: sh: Add DMAC driver for RZ/G2L SoC")
Cc: stable@vger.kernel.org
Reviewed-by: Biju Das <biju.das.jz@bp.renesas.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Signed-off-by: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
Link: https://patch.msgid.link/20260316133252.240348-3-claudiu.beznea.uj@bp.renesas.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
2026-03-17 16:11:11 +05:30
Claudiu Beznea
abb863e621 dmaengine: sh: rz-dmac: Protect the driver specific lists
The driver lists (ld_free, ld_queue) are used in
rz_dmac_free_chan_resources(), rz_dmac_terminate_all(),
rz_dmac_issue_pending(), and rz_dmac_irq_handler_thread(), all under
the virtual channel lock. Take the same lock in rz_dmac_prep_slave_sg()
and rz_dmac_prep_dma_memcpy() as well to avoid concurrency issues, since
these functions also check whether the lists are empty and update or
remove list entries.

Fixes: 5000d37042 ("dmaengine: sh: Add DMAC driver for RZ/G2L SoC")
Cc: stable@vger.kernel.org
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Signed-off-by: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
Link: https://patch.msgid.link/20260316133252.240348-2-claudiu.beznea.uj@bp.renesas.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
2026-03-17 16:11:11 +05:30
Lee Jones
e716edafed HID: multitouch: Check to ensure report responses match the request
It is possible for a malicious (or clumsy) device to respond to a
specific report's feature request using a completely different report
ID.  This can cause confusion in the HID core resulting in nasty
side-effects such as OOB writes.

Add a check to ensure that the report ID in the response, matches the
one that was requested.  If it doesn't, omit reporting the raw event and
return early.

Signed-off-by: Lee Jones <lee@kernel.org>
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
2026-03-17 11:36:16 +01:00
Filipe Manana
2b4cb4e58f btrfs: check for NULL root after calls to btrfs_csum_root()
btrfs_csum_root() can return a NULL pointer in case the root we are
looking for is not in the rb tree that tracks roots. So add checks to
every caller that is missing such check to log a message and return
an error.

Reported-by: Chris Mason <clm@meta.com>
Link: https://lore.kernel.org/linux-btrfs/20260208161657.3972997-1-clm@meta.com/
Reviewed-by: Boris Burkov <boris@bur.io>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-03-17 11:29:32 +01:00
Filipe Manana
5024282870 btrfs: check for NULL root after calls to btrfs_extent_root()
btrfs_extent_root() can return a NULL pointer in case the root we are
looking for is not in the rb tree that tracks roots. So add checks to
every caller that is missing such check to log a message and return
an error. The same applies to callers of btrfs_block_group_root(),
since it calls btrfs_extent_root().

Reported-by: Chris Mason <clm@meta.com>
Link: https://lore.kernel.org/linux-btrfs/20260208161657.3972997-1-clm@meta.com/
Reviewed-by: Boris Burkov <boris@bur.io>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-03-17 11:22:49 +01:00
Felix Gu
76f0930d6e irqchip/riscv-rpmi-sysmsi: Fix mailbox channel leak in rpmi_sysmsi_probe()
When riscv_acpi_get_gsi_info() fails, the mailbox channel previously
requested via mbox_request_channel() is not freed. Add the missing
mbox_free_channel() call to prevent the resource leak.

Fixes: 4752b0cfbc ("irqchip/riscv-rpmi-sysmsi: Add ACPI support")
Signed-off-by: Felix Gu <ustc.gu@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Cc: stable@vger.kernel.org
Reviewed-by: Rahul Pathak <rahul@summations.net>
Link: https://patch.msgid.link/20260315-sysmsi-v1-1-5f090c86c2ca@gmail.com
2026-03-17 11:16:15 +01:00
Jouni Högander
7caac659a8 drm/i915/psr: Compute PSR entry_setup_frames into intel_crtc_state
PSR entry_setup_frames is currently computed directly into struct
intel_dp:intel_psr:entry_setup_frames. This causes a problem if mode change
gets rejected after PSR compute config: Psr_entry_setup_frames computed for
this rejected state is in intel_dp:intel_psr:entry_setup_frame. Fix this by
computing it into intel_crtc_state and copy the value into
intel_dp:intel_psr:entry_setup_frames on PSR enable.

Fixes: 2b981d57e4 ("drm/i915/display: Support PSR entry VSC packet to be transmitted one frame earlier")
Cc: Mika Kahola <mika.kahola@intel.com>
Cc: <stable@vger.kernel.org> # v6.8+
Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Reviewed-by: Suraj Kandpal <suraj.kandpal@intel.com>
Link: https://patch.msgid.link/20260312083710.1593781-3-jouni.hogander@intel.com
(cherry picked from commit 8c229b4aa00262c13787982e998c61c0783285e0)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
2026-03-17 11:17:46 +02:00
Jouni Högander
b0a4dba7b6 drm/i915/psr: Disable PSR on update_m_n and update_lrr
PSR/PR parameters might change based on update_m_n or update_lrr. Disable
on update_m_n and update_lrr to ensure proper parameters are taken into use
on next PSR enable in intel_psr_post_plane_update.

Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/15771
Fixes: 2bc98c6f97 ("drm/i915/alpm: Compute ALPM parameters into crtc_state->alpm_state")
Cc: <stable@vger.kernel.org> # v6.19+
Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Reviewed-by: Suraj Kandpal <suraj.kandpal@intel.com>
Link: https://patch.msgid.link/20260312083710.1593781-2-jouni.hogander@intel.com
(cherry picked from commit 65852b56bfa929f99e28c96fd98b02058959da7f)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
2026-03-17 11:17:43 +02:00
Alexey Nepomnyashih
bb120ad57d ALSA: firewire-lib: fix uninitialized local variable
Similar to commit d8dc872046 ("ALSA: firewire-lib: fix uninitialized
local variable"), the local variable `curr_cycle_time` in
process_rx_packets() is declared without initialization.

When the tracepoint event is not probed, the variable may appear to be
used without being initialized. In practice the value is only relevant
when the tracepoint is enabled, however initializing it avoids potential
use of an uninitialized value and improves code safety.

Initialize `curr_cycle_time` to zero.

Fixes: fef4e61b0b ("ALSA: firewire-lib: extend tracepoints event including CYCLE_TIME of 1394 OHCI")
Cc: stable@vger.kernel.org
Signed-off-by: Alexey Nepomnyashih <sdl@nppct.ru>
Link: https://patch.msgid.link/20260316191824.83249-1-sdl@nppct.ru
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-03-17 09:54:48 +01:00
Krzysztof Kozlowski
ffe6989c73 Merge tag 'v7.0-rockchip-drvfixes1' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/mmind/linux-rockchip into arm/fixes
Fixing a missing of_node_put() call.

* tag 'v7.0-rockchip-drvfixes1' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/mmind/linux-rockchip:
  soc: rockchip: grf: Add missing of_node_put() when returning

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
2026-03-17 09:35:40 +01:00
Paul SAGE
e4c00ba727 tg3: replace placeholder MAC address with device property
On some systems (e.g. iMac 20,1 with BCM57766), the tg3 driver reads
a default placeholder mac address (00:10:18:00:00:00) from the
mailbox. The correct value on those systems are stored in the
'local-mac-address' property.

This patch, detect the default value and tries to retrieve
the correct address from the device_get_mac_address
function instead.

The patch has been tested on two different systems:
- iMac 20,1 (BCM57766) model which use the local-mac-address property
- iMac 13,2 (BCM57766) model which can use the mailbox,
    NVRAM or MAC control registers

Tested-by: Rishon Jonathan R <mithicalaviator85@gmail.com>

Co-developed-by: Vincent MORVAN <vinc@42.fr>
Signed-off-by: Vincent MORVAN <vinc@42.fr>
Signed-off-by: Paul SAGE <paul.sage@42.fr>
Signed-off-by: Atharva Tiwari <atharvatiwarilinuxdev@gmail.com>
Reviewed-by: Michael Chan <michael.chan@broadcom.com>
Link: https://patch.msgid.link/20260314215432.3589-1-atharvatiwarilinuxdev@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-16 20:22:29 -07:00
Jakub Kicinski
b9ba668296 Merge branch 'net-usb-cdc_ncm-add-ndpoffset-to-ndp-nframes-bounds-check'
tobgaertner says:

====================
net: usb: cdc_ncm: add ndpoffset to NDP nframes bounds check

The nframes bounds check in cdc_ncm_rx_verify_ndp16() and
cdc_ncm_rx_verify_ndp32() does not account for ndpoffset,
allowing out-of-bounds reads when the NDP is placed near the
end of the NTB.
====================

Link: https://patch.msgid.link/20260314054640.2895026-1-tob.gaertner@me.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-16 20:14:50 -07:00
Tobi Gaertner
7791425515 net: usb: cdc_ncm: add ndpoffset to NDP32 nframes bounds check
The same bounds-check bug fixed for NDP16 in the previous patch also
exists in cdc_ncm_rx_verify_ndp32(). The DPE array size is validated
against the total skb length without accounting for ndpoffset, allowing
out-of-bounds reads when the NDP32 is placed near the end of the NTB.

Add ndpoffset to the nframes bounds check and use struct_size_t() to
express the NDP-plus-DPE-array size more clearly.

Compile-tested only.

Fixes: 0fa81b304a ("cdc_ncm: Implement the 32-bit version of NCM Transfer Block")
Signed-off-by: Tobi Gaertner <tob.gaertner@me.com>
Link: https://patch.msgid.link/20260314054640.2895026-3-tob.gaertner@me.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-16 20:14:48 -07:00
Tobi Gaertner
2aa8a4fa8d net: usb: cdc_ncm: add ndpoffset to NDP16 nframes bounds check
cdc_ncm_rx_verify_ndp16() validates that the NDP header and its DPE
entries fit within the skb. The first check correctly accounts for
ndpoffset:

  if ((ndpoffset + sizeof(struct usb_cdc_ncm_ndp16)) > skb_in->len)

but the second check omits it:

  if ((sizeof(struct usb_cdc_ncm_ndp16) +
       ret * (sizeof(struct usb_cdc_ncm_dpe16))) > skb_in->len)

This validates the DPE array size against the total skb length as if
the NDP were at offset 0, rather than at ndpoffset. When the NDP is
placed near the end of the NTB (large wNdpIndex), the DPE entries can
extend past the skb data buffer even though the check passes.
cdc_ncm_rx_fixup() then reads out-of-bounds memory when iterating
the DPE array.

Add ndpoffset to the nframes bounds check and use struct_size_t() to
express the NDP-plus-DPE-array size more clearly.

Fixes: ff06ab13a4 ("net: cdc_ncm: splitting rx_fixup for code reuse")
Signed-off-by: Tobi Gaertner <tob.gaertner@me.com>
Link: https://patch.msgid.link/20260314054640.2895026-2-tob.gaertner@me.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-16 20:14:48 -07:00
Lorenzo Bianconi
d4a533ad24 net: airoha: Remove airoha_dev_stop() in airoha_remove()
Do not run airoha_dev_stop routine explicitly in airoha_remove()
since ndo_stop() callback is already executed by unregister_netdev() in
__dev_close_many routine if necessary and, doing so, we will end up causing
an underflow in the qdma users atomic counters. Rely on networking subsystem
to stop the device removing the airoha_eth module.

Fixes: 23020f0493 ("net: airoha: Introduce ethernet support for EN7581 SoC")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20260313-airoha-remove-ndo_stop-remove-net-v2-1-67542c3ceeca@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-16 20:06:05 -07:00
Jamal Hadi Salim
66360460ca net/sched: teql: Fix double-free in teql_master_xmit
Whenever a TEQL devices has a lockless Qdisc as root, qdisc_reset should
be called using the seq_lock to avoid racing with the datapath. Failure
to do so may cause crashes like the following:

[  238.028993][  T318] BUG: KASAN: double-free in skb_release_data (net/core/skbuff.c:1139)
[  238.029328][  T318] Free of addr ffff88810c67ec00 by task poc_teql_uaf_ke/318
[  238.029749][  T318]
[  238.029900][  T318] CPU: 3 UID: 0 PID: 318 Comm: poc_teql_ke Not tainted 7.0.0-rc3-00149-ge5b31d988a41 #704 PREEMPT(full)
[  238.029906][  T318] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[  238.029910][  T318] Call Trace:
[  238.029913][  T318]  <TASK>
[  238.029916][  T318]  dump_stack_lvl (lib/dump_stack.c:122)
[  238.029928][  T318]  print_report (mm/kasan/report.c:379 mm/kasan/report.c:482)
[  238.029940][  T318]  ? skb_release_data (net/core/skbuff.c:1139)
[  238.029944][  T318]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
...
[  238.029957][  T318]  ? skb_release_data (net/core/skbuff.c:1139)
[  238.029969][  T318]  kasan_report_invalid_free (mm/kasan/report.c:221 mm/kasan/report.c:563)
[  238.029979][  T318]  ? skb_release_data (net/core/skbuff.c:1139)
[  238.029989][  T318]  check_slab_allocation (mm/kasan/common.c:231)
[  238.029995][  T318]  kmem_cache_free (mm/slub.c:2637 (discriminator 1) mm/slub.c:6168 (discriminator 1) mm/slub.c:6298 (discriminator 1))
[  238.030004][  T318]  skb_release_data (net/core/skbuff.c:1139)
...
[  238.030025][  T318]  sk_skb_reason_drop (net/core/skbuff.c:1256)
[  238.030032][  T318]  pfifo_fast_reset (./include/linux/ptr_ring.h:171 ./include/linux/ptr_ring.h:309 ./include/linux/skb_array.h:98 net/sched/sch_generic.c:827)
[  238.030039][  T318]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
...
[  238.030054][  T318]  qdisc_reset (net/sched/sch_generic.c:1034)
[  238.030062][  T318]  teql_destroy (./include/linux/spinlock.h:395 net/sched/sch_teql.c:157)
[  238.030071][  T318]  __qdisc_destroy (./include/net/pkt_sched.h:328 net/sched/sch_generic.c:1077)
[  238.030077][  T318]  qdisc_graft (net/sched/sch_api.c:1062 net/sched/sch_api.c:1053 net/sched/sch_api.c:1159)
[  238.030089][  T318]  ? __pfx_qdisc_graft (net/sched/sch_api.c:1091)
[  238.030095][  T318]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[  238.030102][  T318]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[  238.030106][  T318]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[  238.030114][  T318]  tc_get_qdisc (net/sched/sch_api.c:1529 net/sched/sch_api.c:1556)
...
[  238.072958][  T318] Allocated by task 303 on cpu 5 at 238.026275s:
[  238.073392][  T318]  kasan_save_stack (mm/kasan/common.c:58)
[  238.073884][  T318]  kasan_save_track (mm/kasan/common.c:64 (discriminator 5) mm/kasan/common.c:79 (discriminator 5))
[  238.074230][  T318]  __kasan_slab_alloc (mm/kasan/common.c:369)
[  238.074578][  T318]  kmem_cache_alloc_node_noprof (./include/linux/kasan.h:253 mm/slub.c:4542 mm/slub.c:4869 mm/slub.c:4921)
[  238.076091][  T318]  kmalloc_reserve (net/core/skbuff.c:616 (discriminator 107))
[  238.076450][  T318]  __alloc_skb (net/core/skbuff.c:713)
[  238.076834][  T318]  alloc_skb_with_frags (./include/linux/skbuff.h:1383 net/core/skbuff.c:6763)
[  238.077178][  T318]  sock_alloc_send_pskb (net/core/sock.c:2997)
[  238.077520][  T318]  packet_sendmsg (net/packet/af_packet.c:2926 net/packet/af_packet.c:3019 net/packet/af_packet.c:3108)
[  238.081469][  T318]
[  238.081870][  T318] Freed by task 299 on cpu 1 at 238.028496s:
[  238.082761][  T318]  kasan_save_stack (mm/kasan/common.c:58)
[  238.083481][  T318]  kasan_save_track (mm/kasan/common.c:64 (discriminator 5) mm/kasan/common.c:79 (discriminator 5))
[  238.085348][  T318]  kasan_save_free_info (mm/kasan/generic.c:587 (discriminator 1))
[  238.085900][  T318]  __kasan_slab_free (mm/kasan/common.c:287)
[  238.086439][  T318]  kmem_cache_free (mm/slub.c:6168 (discriminator 3) mm/slub.c:6298 (discriminator 3))
[  238.087007][  T318]  skb_release_data (net/core/skbuff.c:1139)
[  238.087491][  T318]  consume_skb (net/core/skbuff.c:1451)
[  238.087757][  T318]  teql_master_xmit (net/sched/sch_teql.c:358)
[  238.088116][  T318]  dev_hard_start_xmit (./include/linux/netdevice.h:5324 ./include/linux/netdevice.h:5333 net/core/dev.c:3871 net/core/dev.c:3887)
[  238.088468][  T318]  sch_direct_xmit (net/sched/sch_generic.c:347)
[  238.088820][  T318]  __qdisc_run (net/sched/sch_generic.c:420 (discriminator 1))
[  238.089166][  T318]  __dev_queue_xmit (./include/net/sch_generic.h:229 ./include/net/pkt_sched.h:121 ./include/net/pkt_sched.h:117 net/core/dev.c:4196 net/core/dev.c:4802)

Workflow to reproduce:
1. Initialize a TEQL topology (dummy0 and ifb0 as slaves, teql0 up).
2. Start multiple sender workers continuously transmitting packets
   through teql0 to drive teql_master_xmit().
3. In parallel, repeatedly delete and re-add the root qdisc on
   dummy0 and ifb0 via RTNETLINK, forcing frequent teardown and reset activity
   (teql_destroy() / qdisc_reset()).
4. After running both workloads concurrently for several iterations,
   KASAN reports slab-use-after-free or double-free in the skb free path.

Fix this by moving dev_reset_queue to sch_generic.h and calling it, instead
of qdisc_reset, in teql_destroy since it handles both the lock and lockless
cases correctly for root qdiscs.

Fixes: 96009c7d50 ("sched: replace __QDISC_STATE_RUNNING bit with a spin lock")
Reported-by: Xianrui Dong <keenanat2000@gmail.com>
Tested-by: Xianrui Dong <keenanat2000@gmail.com>
Co-developed-by: Victor Nogueira <victor@mojatatu.com>
Signed-off-by: Victor Nogueira <victor@mojatatu.com>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://patch.msgid.link/20260315155422.147256-1-jhs@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-16 19:40:32 -07:00
Jiayuan Chen
6d5e453836 net/smc: fix NULL dereference and UAF in smc_tcp_syn_recv_sock()
Syzkaller reported a panic in smc_tcp_syn_recv_sock() [1].

smc_tcp_syn_recv_sock() is called in the TCP receive path
(softirq) via icsk_af_ops->syn_recv_sock on the clcsock (TCP
listening socket). It reads sk_user_data to get the smc_sock
pointer. However, when the SMC listen socket is being closed
concurrently, smc_close_active() sets clcsock->sk_user_data
to NULL under sk_callback_lock, and then the smc_sock itself
can be freed via sock_put() in smc_release().

This leads to two issues:

1) NULL pointer dereference: sk_user_data is NULL when
   accessed.
2) Use-after-free: sk_user_data is read as non-NULL, but the
   smc_sock is freed before its fields (e.g., queued_smc_hs,
   ori_af_ops) are accessed.

The race window looks like this (the syzkaller crash [1]
triggers via the SYN cookie path: tcp_get_cookie_sock() ->
smc_tcp_syn_recv_sock(), but the normal tcp_check_req() path
has the same race):

  CPU A (softirq)              CPU B (process ctx)

  tcp_v4_rcv()
    TCP_NEW_SYN_RECV:
    sk = req->rsk_listener
    sock_hold(sk)
    /* No lock on listener */
                               smc_close_active():
                                 write_lock_bh(cb_lock)
                                 sk_user_data = NULL
                                 write_unlock_bh(cb_lock)
                                 ...
                                 smc_clcsock_release()
                                 sock_put(smc->sk) x2
                                   -> smc_sock freed!
    tcp_check_req()
      smc_tcp_syn_recv_sock():
        smc = user_data(sk)
          -> NULL or dangling
        smc->queued_smc_hs
          -> crash!

Note that the clcsock and smc_sock are two independent objects
with separate refcounts. TCP stack holds a reference on the
clcsock, which keeps it alive, but this does NOT prevent the
smc_sock from being freed.

Fix this by using RCU and refcount_inc_not_zero() to safely
access smc_sock. Since smc_tcp_syn_recv_sock() is called in
the TCP three-way handshake path, taking read_lock_bh on
sk_callback_lock is too heavy and would not survive a SYN
flood attack. Using rcu_read_lock() is much more lightweight.

- Set SOCK_RCU_FREE on the SMC listen socket so that
  smc_sock freeing is deferred until after the RCU grace
  period. This guarantees the memory is still valid when
  accessed inside rcu_read_lock().
- Use rcu_read_lock() to protect reading sk_user_data.
- Use refcount_inc_not_zero(&smc->sk.sk_refcnt) to pin the
  smc_sock. If the refcount has already reached zero (close
  path completed), it returns false and we bail out safely.

Note: smc_hs_congested() has a similar lockless read of
sk_user_data without rcu_read_lock(), but it only checks for
NULL and accesses the global smc_hs_wq, never dereferencing
any smc_sock field, so it is not affected.

Reproducer was verified with mdelay injection and smc_run,
the issue no longer occurs with this patch applied.

[1] https://syzkaller.appspot.com/bug?extid=827ae2bfb3a3529333e9

Fixes: 8270d9c210 ("net/smc: Limit backlog connections")
Reported-by: syzbot+827ae2bfb3a3529333e9@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/67eaf9b8.050a0220.3c3d88.004a.GAE@google.com/T/
Suggested-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jiayuan Chen <jiayuan.chen@shopee.com>
Link: https://patch.msgid.link/20260312092909.48325-1-jiayuan.chen@linux.dev
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-16 19:31:28 -07:00
Eric Dumazet
b7405dcf73 bonding: prevent potential infinite loop in bond_header_parse()
bond_header_parse() can loop if a stack of two bonding devices is setup,
because skb->dev always points to the hierarchy top.

Add new "const struct net_device *dev" parameter to
(struct header_ops)->parse() method to make sure the recursion
is bounded, and that the final leaf parse method is called.

Fixes: 950803f725 ("bonding: fix type confusion in bond_setup_by_slave()")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Jiayuan Chen <jiayuan.chen@shopee.com>
Tested-by: Jiayuan Chen <jiayuan.chen@shopee.com>
Cc: Jay Vosburgh <jv@jvosburgh.net>
Cc: Andrew Lunn <andrew+netdev@lunn.ch>
Link: https://patch.msgid.link/20260315104152.1436867-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-16 19:29:45 -07:00
Sheng Yong
eade540403 erofs: set fileio bio failed in short read case
For file-backed mount, IO requests are handled by vfs_iocb_iter_read().
However, it can be interrupted by SIGKILL, returning the number of
bytes actually copied. Unused folios in bio are unexpectedly marked
as uptodate.

  vfs_read
    filemap_read
      filemap_get_pages
        filemap_readahead
          erofs_fileio_readahead
            erofs_fileio_rq_submit
              vfs_iocb_iter_read
                filemap_read
                  filemap_get_pages  <= detect signal
              erofs_fileio_ki_complete  <= set all folios uptodate

This patch addresses this by setting short read bio with an error
directly.

Fixes: bc804a8d7e ("erofs: handle end of filesystem properly for file-backed mounts")
Reported-by: chenguanyou <chenguanyou@xiaomi.com>
Signed-off-by: Yunlei He <heyunlei@xiaomi.com>
Signed-off-by: Sheng Yong <shengyong1@xiaomi.com>
Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
2026-03-17 10:27:31 +08:00
Joseph Salisbury
50bfd2a22b cifs: smb1: fix comment typo
The file contains a spelling error in a source comment (resposne).

Typos in comments reduce readability and make text searches less reliable
for developers and maintainers.

Replace 'resposne' with 'response' in the affected comment. This is a
comment-only cleanup and does not change behavior.

[v2: Removed Fixes: and Cc: to stable tags.]

Signed-off-by: Joseph Salisbury <joseph.salisbury@oracle.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-03-16 21:12:40 -05:00
Smita Koralahalli
75cea0776d cxl/hdm: Avoid incorrect DVSEC fallback when HDM decoders are enabled
Check the global CXL_HDM_DECODER_ENABLE bit instead of looping over
per-decoder COMMITTED bits to determine whether to fall back to DVSEC
range emulation. When the HDM decoder capability is globally enabled,
ignore DVSEC range registers regardless of individual decoder commit
state.

should_emulate_decoders() currently loops over per-decoder COMMITTED
bits, which leads to an incorrect DVSEC fallback when those bits are
zero. One way to trigger this is to destroy a region and bounce the
memdev:

  cxl disable-region region0
  cxl destroy-region region0
  cxl disable-memdev mem0
  cxl enable-memdev mem0

Region teardown zeroes the HDM decoder registers including the committed
bits. The subsequent memdev re-probe finds uncommitted decoders and falls
back to DVSEC emulation, even though HDM remains globally enabled.

Observed failures:

  should_emulate_decoders: cxl_port endpoint6: decoder6.0: committed: 0 base: 0x0_00000000 size: 0x0_00000000
  devm_cxl_setup_hdm: cxl_port endpoint6: Fallback map 1 range register
  ..
  devm_cxl_add_region: cxl_acpi ACPI0017:00: decoder0.0: created region0
  __construct_region: cxl_pci 0000:e1:00.0: mem1:decoder6.0:
  __construct_region region0 res: [mem 0x850000000-0x284fffffff flags 0x200] iw: 1 ig: 4096
  cxl region0: pci0000:e0:port1 cxl_port_setup_targets expected iw: 1 ig: 4096 ..
  cxl region0: pci0000:e0:port1 cxl_port_setup_targets got iw: 1 ig: 256 state: disabled ..
  cxl_port endpoint6: failed to attach decoder6.0 to region0: -6
  ..
  devm_cxl_add_region: cxl_acpi ACPI0017:00: decoder0.0: created region4
  alloc_hpa: cxl region4: HPA allocation error (-34) ..

Fixes: 52cc48ad2a ("cxl/hdm: Limit emulation to the number of range registers")
Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Link: https://patch.msgid.link/20260316201950.224567-1-Smita.KoralahalliChannabasappa@amd.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2026-03-16 16:58:32 -07:00
Josh Poimboeuf
4e50192164 objtool: Fix Clang jump table detection
With Clang, there can be a conditional forward jump between the load of
the jump table address and the indirect branch.

Fixes the following warning:

  vmlinux.o: warning: objtool: ___bpf_prog_run+0x1c5: sibling call from callable instruction with modified stack frame

Reported-by: Arnd Bergmann <arnd@arndb.de>
Closes: https://lore.kernel.org/a426d669-58bb-4be1-9eaa-6f3d83109e2d@app.fastmail.com
Link: https://patch.msgid.link/7d8600caed08901b6679767488acd639f6df9688.1773071992.git.jpoimboe@kernel.org
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
2026-03-16 15:31:25 -07:00
Manivannan Sadhasivam
ee226656cd PCI/pwrctrl: Create pwrctrl devices only for PCI device nodes
A PCI host bridge node can have non-PCI child nodes (OPP tables, USB
hub, etc.) as well as PCI device child nodes.

Ensure that pwrctrl devices are only created for PCI device nodes by
checking for the 'pci' prefix in the compatible property.

Fixes: 4c41324892 ("PCI/pwrctrl: Add APIs to create, destroy pwrctrl devices")
Reported-by: Bjorn Andersson <bjorn.andersson@oss.qualcomm.com>
Closes: https://lore.kernel.org/all/20260212-rb3gen2-upd-gl3590-v1-1-18fb04bb32b0@oss.qualcomm.com
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@oss.qualcomm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Link: https://patch.msgid.link/20260223-pwrctrl-fixes-7-0-v2-2-97566dfb1809@oss.qualcomm.com
2026-03-16 17:30:43 -05:00
Manivannan Sadhasivam
cf3287fb2c PCI/pwrctrl: Ensure that remote endpoint node parent has supply requirement
If OF graph is used in the PCI device node, the pwrctrl core creates a
pwrctrl device even if the remote endpoint doesn't have power supply
requirements. Since the device doesn't have any power supply requirements,
there was no pwrctrl driver to probe, leading to PCI controller driver
probe deferral as it waits for all pwrctrl drivers to probe before starting
bus scan.

This issue happens with Qcom ath12k devices with WSI interface attached to
the Qcom IPQ platforms.

Fix this issue by checking for the existence of at least one power supply
property in the remote endpoint parent node. To consolidate all the checks,
create a new helper pci_pwrctrl_is_required() and move all the checks
there.

Fixes: 9db826206f ("PCI/pwrctrl: Create pwrctrl device if graph port is found")
Reported-by: Raj Kumar Bhagat <raj.bhagat@oss.qualcomm.com>
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@oss.qualcomm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Tested-by: Raj Kumar Bhagat <raj.bhagat@oss.qualcomm.com>
Reviewed-by: Krishna Chaitanya Chundru <krishna.chundru@oss.qualcomm.com>
Reviewed-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
Link: https://patch.msgid.link/20260223-pwrctrl-fixes-7-0-v2-1-97566dfb1809@oss.qualcomm.com
2026-03-16 17:30:39 -05:00
Jeff Layton
5133b61aaf nfsd: fix heap overflow in NFSv4.0 LOCK replay cache
The NFSv4.0 replay cache uses a fixed 112-byte inline buffer
(rp_ibuf[NFSD4_REPLAY_ISIZE]) to store encoded operation responses.
This size was calculated based on OPEN responses and does not account
for LOCK denied responses, which include the conflicting lock owner as
a variable-length field up to 1024 bytes (NFS4_OPAQUE_LIMIT).

When a LOCK operation is denied due to a conflict with an existing lock
that has a large owner, nfsd4_encode_operation() copies the full encoded
response into the undersized replay buffer via read_bytes_from_xdr_buf()
with no bounds check. This results in a slab-out-of-bounds write of up
to 944 bytes past the end of the buffer, corrupting adjacent heap memory.

This can be triggered remotely by an unauthenticated attacker with two
cooperating NFSv4.0 clients: one sets a lock with a large owner string,
then the other requests a conflicting lock to provoke the denial.

We could fix this by increasing NFSD4_REPLAY_ISIZE to allow for a full
opaque, but that would increase the size of every stateowner, when most
lockowners are not that large.

Instead, fix this by checking the encoded response length against
NFSD4_REPLAY_ISIZE before copying into the replay buffer. If the
response is too large, set rp_buflen to 0 to skip caching the replay
payload. The status is still cached, and the client already received the
correct response on the original request.

Fixes: 1da177e4c3 ("Linux-2.6.12-rc2")
Cc: stable@kernel.org
Reported-by: Nicholas Carlini <npc@anthropic.com>
Tested-by: Nicholas Carlini <npc@anthropic.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
2026-03-16 16:58:01 -04:00
Josh Poimboeuf
6f93f7b068 livepatch/klp-build: Fix inconsistent kernel version
If .config hasn't been synced with auto.conf, any recent changes to
CONFIG_LOCALVERSION* may not get reflected in the kernel version name.

Use "make syncconfig" to force them to sync, and "make -s kernelrelease"
to get the version instead of having to construct it manually.

Fixes: 24ebfcd65a ("livepatch/klp-build: Introduce klp-build script for generating livepatch modules")
Closes: https://lore.kernel.org/20260217160645.3434685-10-joe.lawrence@redhat.com
Reported-by: Joe Lawrence <joe.lawrence@redhat.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
Signed-off-by: Joe Lawrence <joe.lawrence@redhat.com>
Acked-by: Song Liu <song@kernel.org>
Link: https://patch.msgid.link/20260310203751.1479229-10-joe.lawrence@redhat.com
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
2026-03-16 12:50:17 -07:00
Joe Lawrence
28e367a969 objtool/klp: fix mkstemp() failure with long paths
The elf_create_file() function fails with EINVAL when the build directory
path is long enough to truncate the "XXXXXX" suffix in the 256-byte
tmp_name buffer.

Simplify the code to remove the unnecessary dirname()/basename() split
and concatenation.  Instead, allocate the exact number of bytes needed for
the path.

Acked-by: Song Liu <song@kernel.org>
Signed-off-by: Joe Lawrence <joe.lawrence@redhat.com>
Link: https://patch.msgid.link/20260310203751.1479229-3-joe.lawrence@redhat.com
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
2026-03-16 12:40:31 -07:00
Joe Lawrence
2f2600decb objtool/klp: fix data alignment in __clone_symbol()
Commit 356e4b2f5b ("objtool: Fix data alignment in elf_add_data()")
corrected the alignment of data within a section (honoring the section's
sh_addralign).  Apply the same alignment when klp-diff mode clones a
symbol, adjusting the new symbol's offset for the output section's
sh_addralign.

Fixes: dd590d4d57 ("objtool/klp: Introduce klp diff subcommand for diffing object files")
Signed-off-by: Joe Lawrence <joe.lawrence@redhat.com>
Link: https://patch.msgid.link/20260310203751.1479229-2-joe.lawrence@redhat.com
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
2026-03-16 12:39:45 -07:00
Bart Van Assche
29ab768277 PM: runtime: Fix a race condition related to device removal
The following code in pm_runtime_work() may dereference the dev->parent
pointer after the parent device has been freed:

	/* Maybe the parent is now able to suspend. */
	if (parent && !parent->power.ignore_children) {
		spin_unlock(&dev->power.lock);

		spin_lock(&parent->power.lock);
		rpm_idle(parent, RPM_ASYNC);
		spin_unlock(&parent->power.lock);

		spin_lock(&dev->power.lock);
	}

Fix this by inserting a flush_work() call in pm_runtime_remove().

Without this patch blktest block/001 triggers the following complaint
sporadically:

BUG: KASAN: slab-use-after-free in lock_acquire+0x70/0x160
Read of size 1 at addr ffff88812bef7198 by task kworker/u553:1/3081
Workqueue: pm pm_runtime_work
Call Trace:
 <TASK>
 dump_stack_lvl+0x61/0x80
 print_address_description.constprop.0+0x8b/0x310
 print_report+0xfd/0x1d7
 kasan_report+0xd8/0x1d0
 __kasan_check_byte+0x42/0x60
 lock_acquire.part.0+0x38/0x230
 lock_acquire+0x70/0x160
 _raw_spin_lock+0x36/0x50
 rpm_suspend+0xc6a/0xfe0
 rpm_idle+0x578/0x770
 pm_runtime_work+0xee/0x120
 process_one_work+0xde3/0x1410
 worker_thread+0x5eb/0xfe0
 kthread+0x37b/0x480
 ret_from_fork+0x6cb/0x920
 ret_from_fork_asm+0x11/0x20
 </TASK>

Allocated by task 4314:
 kasan_save_stack+0x2a/0x50
 kasan_save_track+0x18/0x40
 kasan_save_alloc_info+0x3d/0x50
 __kasan_kmalloc+0xa0/0xb0
 __kmalloc_noprof+0x311/0x990
 scsi_alloc_target+0x122/0xb60 [scsi_mod]
 __scsi_scan_target+0x101/0x460 [scsi_mod]
 scsi_scan_channel+0x179/0x1c0 [scsi_mod]
 scsi_scan_host_selected+0x259/0x2d0 [scsi_mod]
 store_scan+0x2d2/0x390 [scsi_mod]
 dev_attr_store+0x43/0x80
 sysfs_kf_write+0xde/0x140
 kernfs_fop_write_iter+0x3ef/0x670
 vfs_write+0x506/0x1470
 ksys_write+0xfd/0x230
 __x64_sys_write+0x76/0xc0
 x64_sys_call+0x213/0x1810
 do_syscall_64+0xee/0xfc0
 entry_SYSCALL_64_after_hwframe+0x4b/0x53

Freed by task 4314:
 kasan_save_stack+0x2a/0x50
 kasan_save_track+0x18/0x40
 kasan_save_free_info+0x3f/0x50
 __kasan_slab_free+0x67/0x80
 kfree+0x225/0x6c0
 scsi_target_dev_release+0x3d/0x60 [scsi_mod]
 device_release+0xa3/0x220
 kobject_cleanup+0x105/0x3a0
 kobject_put+0x72/0xd0
 put_device+0x17/0x20
 scsi_device_dev_release+0xacf/0x12c0 [scsi_mod]
 device_release+0xa3/0x220
 kobject_cleanup+0x105/0x3a0
 kobject_put+0x72/0xd0
 put_device+0x17/0x20
 scsi_device_put+0x7f/0xc0 [scsi_mod]
 sdev_store_delete+0xa5/0x120 [scsi_mod]
 dev_attr_store+0x43/0x80
 sysfs_kf_write+0xde/0x140
 kernfs_fop_write_iter+0x3ef/0x670
 vfs_write+0x506/0x1470
 ksys_write+0xfd/0x230
 __x64_sys_write+0x76/0xc0
 x64_sys_call+0x213/0x1810

Reported-by: Ming Lei <ming.lei@redhat.com>
Closes: https://lore.kernel.org/all/ZxdNvLNI8QaOfD2d@fedora/
Reported-by: syzbot+6c905ab800f20cf4086c@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/68c13942.050a0220.2ff435.000b.GAE@google.com/
Fixes: 5e928f77a0 ("PM: Introduce core framework for run-time PM of I/O devices (rev. 17)")
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://patch.msgid.link/20260312182720.2776083-1-bvanassche@acm.org
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2026-03-16 20:33:09 +01:00
Rafael J. Wysocki
f4c31b07b1 sched: idle: Consolidate the handling of two special cases
There are two special cases in the idle loop that are handled
inconsistently even though they are analogous.

The first one is when a cpuidle driver is absent and the default CPU
idle time power management implemented by the architecture code is used.
In that case, the scheduler tick is stopped every time before invoking
default_idle_call().

The second one is when a cpuidle driver is present, but there is only
one idle state in its table.  In that case, the scheduler tick is never
stopped at all.

Since each of these approaches has its drawbacks, reconcile them with
the help of one simple heuristic.  Namely, stop the tick if the CPU has
been woken up by it in the previous iteration of the idle loop, or let
it tick otherwise.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Christian Loehle <christian.loehle@arm.com>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Reviewed-by: Qais Yousef <qyousef@layalina.io>
Reviewed-by: Aboorva Devarajan <aboorvad@linux.ibm.com>
Fixes: ed98c34919 ("sched: idle: Do not stop the tick before cpuidle_idle_call()")
[ rjw: Added Fixes tag, changelog edits ]
Link: https://patch.msgid.link/4741364.LvFx2qVVIh@rafael.j.wysocki
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2026-03-16 20:29:47 +01:00
Linus Torvalds
8a91ebb337 Merge tag 'mm-hotfixes-stable-2026-03-16-12-15' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull misc fixes from Andrew Morton:
 "6 hotfixes.  4 are cc:stable.  3 are for MM.

  All are singletons - please see the changelogs for details"

* tag 'mm-hotfixes-stable-2026-03-16-12-15' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm:
  MAINTAINERS: update email address for Ignat Korchagin
  mm/huge_memory: fix early failure try_to_migrate() when split huge pmd for shared THP
  mm/rmap: fix incorrect pte restoration for lazyfree folios
  mm/huge_memory: fix use of NULL folio in move_pages_huge_pmd()
  build_bug.h: correct function parameters names in kernel-doc
  crash_dump: don't log dm-crypt key bytes in read_key_from_user_keying
2026-03-16 12:21:00 -07:00
Johan Hovold
dee0774bbb spi: fix statistics allocation
The controller per-cpu statistics is not allocated until after the
controller has been registered with driver core, which leaves a window
where accessing the sysfs attributes can trigger a NULL-pointer
dereference.

Fix this by moving the statistics allocation to controller allocation
while tying its lifetime to that of the controller (rather than using
implicit devres).

Fixes: 6598b91b5a ("spi: spi.c: Convert statistics to per-cpu u64_stats_t")
Cc: stable@vger.kernel.org	# 6.0
Cc: David Jander <david@protonic.nl>
Signed-off-by: Johan Hovold <johan@kernel.org>
Link: https://patch.msgid.link/20260312151817.32100-3-johan@kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-16 18:36:03 +00:00
Johan Hovold
8634e05b08 spi: fix use-after-free on controller registration failure
Make sure to deregister from driver core also in the unlikely event that
per-cpu statistics allocation fails during controller registration to
avoid use-after-free (of driver resources) and unclocked register
accesses.

Fixes: 6598b91b5a ("spi: spi.c: Convert statistics to per-cpu u64_stats_t")
Cc: stable@vger.kernel.org	# 6.0
Cc: David Jander <david@protonic.nl>
Signed-off-by: Johan Hovold <johan@kernel.org>
Link: https://patch.msgid.link/20260312151817.32100-2-johan@kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-16 18:36:03 +00:00
Zhang Heng
0bdf27abaf ALSA: hda/realtek: add quirk for ASUS Strix G16 G615JMR
The machine is equipped with ALC294 and requires the
ALC287_FIXUP_TXNW2781_I2C_ASUS quirk for the amplifier to work properly.
Since the machine's PCI SSID is also 1043:1204, HDA_CODEC_QUIRK is
used to retain the previous quirk.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=221173
Cc: <stable@vger.kernel.org>
Signed-off-by: Zhang Heng <zhangheng@kylinos.cn>
Link: https://patch.msgid.link/20260316022843.2809968-1-zhangheng@kylinos.cn
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-03-16 18:04:36 +01:00
Sean Rhodes
a6919f2a01 ALSA: hda/realtek: Sequence GPIO2 on Star Labs StarFighter
The initial StarFighter quirk fixed the runtime suspend pop by muting
speakers in the shutup callback before power-down. Further hardware
validation showed that the speaker path is controlled directly by LINE2
EAPD on NID 0x1b together with GPIO2 for the external amplifier.

Replace the shutup-delay workaround with explicit sequencing of those
controls at playback start and stop:
- assert LINE2 EAPD and drive GPIO2 high on PREPARE
- deassert LINE2 EAPD and drive GPIO2 low on CLEANUP

This avoids the runtime suspend pop without a sleep, and also fixes pops
around G3 entry and display-manager start that the original workaround
did not cover.

Fixes: 1cb3c20688 ("ALSA: hda/realtek: Fix speaker pop on Star Labs StarFighter")
Tested-by: Sean Rhodes <sean@starlabs.systems>
Signed-off-by: Sean Rhodes <sean@starlabs.systems>
Link: https://patch.msgid.link/20260315201127.33744-1-sean@starlabs.systems
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-03-16 18:04:00 +01:00
Christian Borntraeger
ab5119735e KVM: s390: vsie: Avoid injecting machine check on signal
The recent XFER_TO_GUEST_WORK change resulted in a situation, where the
vsie code would interpret a signal during work as a machine check during
SIE as both use the EINTR return code.
The exit_reason of the sie64a function has nothing to do with the
kvm_run exit_reason. Rename it and define a specific code for machine
checks instead of abusing -EINTR.
rename exit_reason into sie_return to avoid the naming conflict
and change the code flow in vsie.c to have a separate variable for rc
and sie_return.

Fixes: 2bd1337a12 ("KVM: s390: Use generic VIRT_XFER_TO_GUEST_WORK functions")
Signed-off-by: Christian Borntraeger <borntraeger@linux.ibm.com>
Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
2026-03-16 16:56:39 +01:00
Christian Borntraeger
1ca90f4ae5 KVM: s390: log machine checks more aggressively
KVM will reinject machine checks that happen during guest activity.
From a host perspective this machine check is no longer visible
and even for the guest, the guest might decide to only kill a
userspace program or even ignore the machine check.
As this can be a disruptive event nevertheless, we should log this
not only in the VM debug event (that gets lost after guest shutdown)
but also on the global KVM event as well as syslog.
Consolidate the logging and log with loglevel 2 and higher.

Signed-off-by: Christian Borntraeger <borntraeger@linux.ibm.com>
Acked-by: Janosch Frank <frankja@linux.ibm.com>
Acked-by: Hendrik Brueckner <brueckner@linux.ibm.com>
2026-03-16 16:56:39 +01:00
Janosch Frank
0c6294d98a KVM: s390: selftests: Add IRQ routing address offset tests
This test tries to setup routes which have address + offset
combinations which cross a page.

Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
Tested-by: Matthew Rosato <mjrosato@linux.ibm.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@linux.ibm.com>
2026-03-16 16:56:39 +01:00
Janosch Frank
dcf96f7ad5 KVM: s390: Limit adapter indicator access to mapped page
While we check the address for errors, we don't seem to check the bit
offsets and since they are 32 and 64 bits a lot of memory can be
reached indirectly via those offsets.

Fixes: 8422359877 ("KVM: s390: irq routing for adapter interrupts.")
Suggested-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@linux.ibm.com>
Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
Tested-by: Matthew Rosato <mjrosato@linux.ibm.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@linux.ibm.com>
2026-03-16 16:56:39 +01:00
Janosch Frank
b00be77302 s390/mm: Add missing secure storage access fixups for donated memory
There are special cases where secure storage access exceptions happen
in a kernel context for pages that don't have the PG_arch_1 bit
set. That bit is set for non-exported guest secure storage (memory)
but is absent on storage donated to the Ultravisor since the kernel
isn't allowed to export donated pages.

Prior to this patch we would try to export the page by calling
arch_make_folio_accessible() which would instantly return since the
arch bit is absent signifying that the page was already exported and
no further action is necessary. This leads to secure storage access
exception loops which can never be resolved.

With this patch we unconditionally try to export and if that fails we
fixup.

Fixes: 084ea4d611 ("s390/mm: add (non)secure page access exceptions handlers")
Reported-by: Heiko Carstens <hca@linux.ibm.com>
Suggested-by: Heiko Carstens <hca@linux.ibm.com>
Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Tested-by: Christian Borntraeger <borntraeger@linux.ibm.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@linux.ibm.com>
2026-03-16 16:56:28 +01:00
Linus Torvalds
2d1373e424 Merge tag 'for-7.0-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs fixes from David Sterba:

 - fix logging of new dentries when logging parent directory and there
   are conflicting inodes (e.g. deleted directory)

 - avoid taking big device lock for zone setup, this is not necessary
   during mount

 - tune message verbosity when auto-reclaiming zones when low on space

 - fix slightly misleading message of root item check

* tag 'for-7.0-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: tree-checker: fix misleading root drop_level error message
  btrfs: log new dentries when logging parent dir of a conflicting inode
  btrfs: don't take device_list_mutex when querying zone info
  btrfs: pass 'verbose' parameter to btrfs_relocate_block_group
2026-03-16 08:53:06 -07:00
Ian Forbes
c6cb77c474 drm/vmwgfx: Don't overwrite KMS surface dirty tracker
We were overwriting the surface's dirty tracker here causing a memory leak.

Reported-by: Mika Penttilä <mpenttil@redhat.com>
Closes: https://lore.kernel.org/dri-devel/8c53f3c6-c6de-46fe-a8ca-d98dd52b3abe@redhat.com/
Fixes: 965544150d ("drm/vmwgfx: Refactor cursor handling")
Signed-off-by: Ian Forbes <ian.forbes@broadcom.com>
Reviewed-by: Maaz Mombasawala <maaz.mombasawala@broadcom.com>
Signed-off-by: Zack Rusin <zack.rusin@broadcom.com>
Link: https://patch.msgid.link/20260302200330.66763-1-ian.forbes@broadcom.com
2026-03-16 11:39:42 -04:00
Randy Dunlap
c7feff27ea drm/vmwgfx: fix kernel-doc warnings in vmwgfx_drv.h
Fix 45+ kernel-doc warnings in vmwgfx_drv.h:
- spell a struct name correctly
- don't have structs between kernel-doc and its struct
- end description of struct members with ':'
- start all kernel-doc lines with " *"
- mark private struct member and enum value with "private:"
- add kernel-doc for enum vmw_dma_map_mode
- add missing struct member comments
- add missing function parameter comments
- convert "/**" to "/*" for non-kernel-doc comments
- add missing "Returns:" comments for several functions
- correct a function parameter name

to eliminate kernel-doc warnings (examples):

Warning: drivers/gpu/drm/vmwgfx/vmwgfx_drv.h:128 struct vmw_bo; error:
 Cannot parse struct or union!
Warning: drivers/gpu/drm/vmwgfx/vmwgfx_drv.h:151 struct member 'used_prio'
 not described in 'vmw_resource'
Warning: drivers/gpu/drm/vmwgfx/vmwgfx_drv.h:151 struct member 'mob_node'
 not described in 'vmw_resource'
Warning: drivers/gpu/drm/vmwgfx/vmwgfx_drv.h:199 bad line: SM4 device.
Warning: drivers/gpu/drm/vmwgfx/vmwgfx_drv.h:270 struct member 'private'
 not described in 'vmw_res_cache_entry'
Warning: drivers/gpu/drm/vmwgfx/vmwgfx_drv.h:280 Enum value
 'vmw_dma_alloc_coherent' not described in enum 'vmw_dma_map_mode'
Warning: drivers/gpu/drm/vmwgfx/vmwgfx_drv.h:280 Enum value
 'vmw_dma_map_bind' not described in enum 'vmw_dma_map_mode'
Warning: drivers/gpu/drm/vmwgfx/vmwgfx_drv.h:295 struct member 'addrs'
 not described in 'vmw_sg_table'
Warning: drivers/gpu/drm/vmwgfx/vmwgfx_drv.h:295 struct member 'mode'
 not described in 'vmw_sg_table'
vmwgfx_drv.h:309: warning: Excess struct member 'num_regions' description
 in 'vmw_sg_table'
Warning: drivers/gpu/drm/vmwgfx/vmwgfx_drv.h:402 struct member 'filp'
 not described in 'vmw_sw_context'
Warning: drivers/gpu/drm/vmwgfx/vmwgfx_drv.h:732 This comment starts with
 '/**', but isn't a kernel-doc comment.
Warning: drivers/gpu/drm/vmwgfx/vmwgfx_drv.h:742 This comment starts with
 '/**', but isn't a kernel-doc comment.
Warning: drivers/gpu/drm/vmwgfx/vmwgfx_drv.h:762 This comment starts with
 '/**', but isn't a kernel-doc comment.
Warning: drivers/gpu/drm/vmwgfx/vmwgfx_drv.h:887 No description found for
 return value of 'vmw_fifo_caps'
Warning: drivers/gpu/drm/vmwgfx/vmwgfx_drv.h:901 No description found for
 return value of 'vmw_is_cursor_bypass3_enabled'
Warning: drivers/gpu/drm/vmwgfx/vmwgfx_drv.h:906 This comment starts with
 '/**', but isn't a kernel-doc comment.
Warning: drivers/gpu/drm/vmwgfx/vmwgfx_drv.h:961 This comment starts with
 '/**', but isn't a kernel-doc comment.
Warning: drivers/gpu/drm/vmwgfx/vmwgfx_drv.h:996 This comment starts with
 '/**', but isn't a kernel-doc comment.
Warning: drivers/gpu/drm/vmwgfx/vmwgfx_drv.h:1082 cannot understand
 function prototype: 'const struct dma_buf_ops vmw_prime_dmabuf_ops;'
Warning: drivers/gpu/drm/vmwgfx/vmwgfx_drv.h:1303 struct member 'do_cpy'
 not described in 'vmw_diff_cpy'
Warning: drivers/gpu/drm/vmwgfx/vmwgfx_drv.h:1385 function parameter 'fmt'
 not described in 'VMW_DEBUG_KMS'
Warning: drivers/gpu/drm/vmwgfx/vmwgfx_drv.h:1389 This comment starts with
 '/**', but isn't a kernel-doc comment.
Warning: drivers/gpu/drm/vmwgfx/vmwgfx_drv.h:1426 function parameter 'vmw'
 not described in 'vmw_fifo_mem_read'
Warning: drivers/gpu/drm/vmwgfx/vmwgfx_drv.h:1426 No description found for
 return value of 'vmw_fifo_mem_read'
Warning: drivers/gpu/drm/vmwgfx/vmwgfx_drv.h:1441 function parameter
 'fifo_reg' not described in 'vmw_fifo_mem_write'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Zack Rusin <zack.rusin@broadcom.com>
Link: https://patch.msgid.link/20260219215548.470810-1-rdunlap@infradead.org
2026-03-16 11:39:22 -04:00
Lee Jones
f7a4c78bfe HID: logitech-hidpp: Prevent use-after-free on force feedback initialisation failure
Presently, if the force feedback initialisation fails when probing the
Logitech G920 Driving Force Racing Wheel for Xbox One, an error number
will be returned and propagated before the userspace infrastructure
(sysfs and /dev/input) has been torn down.  If userspace ignores the
errors and continues to use its references to these dangling entities, a
UAF will promptly follow.

We have 2 options; continue to return the error, but ensure that all of
the infrastructure is torn down accordingly or continue to treat this
condition as a warning by emitting the message but returning success.
It is thought that the original author's intention was to emit the
warning but keep the device functional, less the force feedback feature,
so let's go with that.

Signed-off-by: Lee Jones <lee@kernel.org>
Reviewed-by: Günther Noack <gnoack@google.com>
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
2026-03-16 16:21:48 +01:00
Benjamin Tissoires
2b658c1c44 HID: bpf: prevent buffer overflow in hid_hw_request
right now the returned value is considered to be always valid. However,
when playing with HID-BPF, the return value can be arbitrary big,
because it's the return value of dispatch_hid_bpf_raw_requests(), which
calls the struct_ops and we have no guarantees that the value makes
sense.

Fixes: 8bd0488b5e ("HID: bpf: add HID-BPF hooks for hid_hw_raw_requests")
Cc: stable@vger.kernel.org
Acked-by: Jiri Kosina <jkosina@suse.com>
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
2026-03-16 16:21:06 +01:00
Benjamin Tissoires
5d4c6c132e selftests/hid: fix compilation when bpf_wq and hid_device are not exported
This can happen in situations when CONFIG_HID_SUPPORT is set to no, or
some complex situations where struct bpf_wq is not exported.

So do the usual dance of hiding them before including vmlinux.h, and
then redefining them and make use of CO-RE to have the correct offsets.

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202603111558.KLCIxsZB-lkp@intel.com/
Fixes: fe8d561db3 ("selftests/hid: add wq test for hid_bpf_input_report()")
Cc: stable@vger.kernel.org
Acked-by: Jiri Kosina <jkosina@suse.com>
Reviewed-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
2026-03-16 16:21:06 +01:00
Lee Jones
0a3fe972a7 HID: core: Mitigate potential OOB by removing bogus memset()
The memset() in hid_report_raw_event() has the good intention of
clearing out bogus data by zeroing the area from the end of the incoming
data string to the assumed end of the buffer.  However, as we have
previously seen, doing so can easily result in OOB reads and writes in
the subsequent thread of execution.

The current suggestion from one of the HID maintainers is to remove the
memset() and simply return if the incoming event buffer size is not
large enough to fill the associated report.

Suggested-by Benjamin Tissoires <bentiss@kernel.org>

Signed-off-by: Lee Jones <lee@kernel.org>
[bentiss: changed the return value]
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
2026-03-16 16:21:06 +01:00
Luke Wang
5e3486e640 mmc: sdhci: fix timing selection for 1-bit bus width
When 1-bit bus width is used with HS200/HS400 capabilities set,
mmc_select_hs200() returns 0 without actually switching. This
causes mmc_select_timing() to skip mmc_select_hs(), leaving eMMC
in legacy mode (26MHz) instead of High Speed SDR (52MHz).

Per JEDEC eMMC spec section 5.3.2, 1-bit mode supports High Speed
SDR. Drop incompatible HS200/HS400/UHS/DDR caps early so timing
selection falls through to mmc_select_hs() correctly.

Fixes: f2119df6b7 ("mmc: sd: add support for signal voltage switch procedure")
Signed-off-by: Luke Wang <ziniu.wang_1@nxp.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: stable@vger.kernel.org
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2026-03-16 16:08:54 +01:00
Maramaina Naresh
ba3402f6c8 spi: geni-qcom: Fix CPHA and CPOL mode change detection
setup_fifo_params computes mode_changed from spi->mode flags but tests
it against SE_SPI_CPHA and SE_SPI_CPOL, which are register offsets,
not SPI mode bits. This causes CPHA and CPOL updates to be skipped
on mode switches, leaving the controller with stale clock phase
and polarity settings.

Fix this by using SPI_CPHA and SPI_CPOL to detect mode changes before
updating the corresponding registers.

Fixes: 781c3e71c9 ("spi: spi-geni-qcom: rework setup_fifo_params")
Signed-off-by: Maramaina Naresh <naresh.maramaina@oss.qualcomm.com>
Reviewed-by: Konrad Dybcio <konrad.dybcio@oss.qualcomm.com>
Link: https://patch.msgid.link/20260316-spi-geni-cpha-cpol-fix-v1-1-4cb44c176b79@oss.qualcomm.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-16 14:43:32 +00:00
Andy Shevchenko
09e70e4f11 regmap: Synchronize cache for the page selector
If the selector register is represented in each page, its value
according to the debugfs is stale because it gets synchronized
only after the real page switch happens. Hence the regmap cache
initialisation from the HW inherits outdated data in the selector
register.

Synchronize cache for the page selector just in time.

Before (offset followed by hexdump, the first byte is selector):

    // Real registers
    18: 05 ff 00 00 ff 0f 00 00 f0 00 00 00
    ...
    // Virtual (per port)
    40: 05 ff 00 00 e0 e0 00 00 00 00 00 1f
    50: 00 ff 00 00 e0 e0 00 00 00 00 00 1f
    60: 01 ff 00 00 ff ff 00 00 00 00 00 00
    70: 02 ff 00 00 cf f3 00 00 00 00 00 0c
    80: 03 ff 00 00 00 00 00 00 00 00 00 ff
    90: 04 ff 00 00 ff 0f 00 00 f0 00 00 00

After:

    // Real registers
    18: 05 ff 00 00 ff 0f 00 00 f0 00 00 00
    ...
    // Virtual (per port)
    40: 00 ff 00 00 e0 e0 00 00 00 00 00 1f
    50: 01 ff 00 00 e0 e0 00 00 00 00 00 1f
    60: 02 ff 00 00 ff ff 00 00 00 00 00 00
    70: 03 ff 00 00 cf f3 00 00 00 00 00 0c
    80: 04 ff 00 00 00 00 00 00 00 00 00 ff
    90: 05 ff 00 00 ff 0f 00 00 f0 00 00 00

Fixes: 6863ca6227 ("regmap: Add support for register indirect addressing.")
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://patch.msgid.link/20260302184753.2693803-1-andriy.shevchenko@linux.intel.com
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-16 13:50:34 +00:00
Leo Yan
81f86728a9 tools headers: Skip arm64 cputype.h check
Some definitions in the arm64 kernel's cputype.h are kernel specific and
cause perf build failures when the header is synced into tools.

Stop checking arm64's cputype.h.  In the future, the header in tools
will be updated manually when teaching tools about new CPUs.

Signed-off-by: Leo Yan <leo.yan@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: James Clark <james.clark@linaro.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2026-03-16 09:26:28 -03:00
Shigeru Yoshida
6f770b73d0 dma: swiotlb: add KMSAN annotations to swiotlb_bounce()
When a device performs DMA to a bounce buffer, KMSAN is unaware of
the write and does not mark the data as initialized.  When
swiotlb_bounce() later copies the bounce buffer back to the original
buffer, memcpy propagates the uninitialized shadow to the original
buffer, causing false positive uninit-value reports.

Fix this by calling kmsan_unpoison_memory() on the bounce buffer
before copying it back in the DMA_FROM_DEVICE path, so that memcpy
naturally propagates initialized shadow to the destination.

Suggested-by: Alexander Potapenko <glider@google.com>
Link: https://lore.kernel.org/CAG_fn=WUGta-paG1BgsGRoAR+fmuCgh3xo=R3XdzOt_-DqSdHw@mail.gmail.com/
Fixes: 7ade4f1077 ("dma: kmsan: unpoison DMA mappings")
Signed-off-by: Shigeru Yoshida <syoshida@redhat.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://lore.kernel.org/r/20260315082750.2375581-1-syoshida@redhat.com
2026-03-16 11:29:20 +01:00
Daniel Schaefer
4bc7bc4579 HID: intel-thc-hid: Set HID_PHYS with PCI BDF
Currently HID_PHYS is empty, which means userspace tools (e.g. fwupd)
that depend on it for distinguishing the devices, are unable to do so.
Other drivers like i2c-hid, usbhid, surface-hid, all populate it.

With this change it's set to, for example: HID_PHYS=0000:00:10.0

Each function has just a single HID device, as far as I can tell, so
there is no need to add a suffix.

Tested with fwupd 2.1.1, can avoid https://github.com/fwupd/fwupd/pull/9995

Cc: Even Xu <even.xu@intel.com>
Cc: Xinpeng Sun <xinpeng.sun@intel.com>
Cc: Jiri Kosina <jikos@kernel.org>
Cc: Benjamin Tissoires <bentiss@kernel.org>
Cc: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Daniel Schaefer <git@danielschaefer.me>
Reviewed-by: Even Xu <even.xu@intel.com>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
2026-03-16 11:25:07 +01:00
Eric Dumazet
eb2d16a7d5 af_key: validate families in pfkey_send_migrate()
syzbot was able to trigger a crash in skb_put() [1]

Issue is that pfkey_send_migrate() does not check old/new families,
and that set_ipsecrequest() @family argument was truncated,
thus possibly overfilling the skb.

Validate families early, do not wait set_ipsecrequest().

[1]

skbuff: skb_over_panic: text:ffffffff8a752120 len:392 put:16 head:ffff88802a4ad040 data:ffff88802a4ad040 tail:0x188 end:0x180 dev:<NULL>
 kernel BUG at net/core/skbuff.c:214 !
Call Trace:
 <TASK>
  skb_over_panic net/core/skbuff.c:219 [inline]
  skb_put+0x159/0x210 net/core/skbuff.c:2655
  skb_put_zero include/linux/skbuff.h:2788 [inline]
  set_ipsecrequest net/key/af_key.c:3532 [inline]
  pfkey_send_migrate+0x1270/0x2e50 net/key/af_key.c:3636
  km_migrate+0x155/0x260 net/xfrm/xfrm_state.c:2848
  xfrm_migrate+0x2140/0x2450 net/xfrm/xfrm_policy.c:4705
  xfrm_do_migrate+0x8ff/0xaa0 net/xfrm/xfrm_user.c:3150

Fixes: 08de61beab ("[PFKEYV2]: Extension for dynamic update of endpoint address(es)")
Reported-by: syzbot+b518dfc8e021988fbd55@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/netdev/69b5933c.050a0220.248e02.00f2.GAE@google.com/T/#u
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2026-03-16 10:58:21 +01:00
Minwoo Ra
29fe3a61bc xfrm: prevent policy_hthresh.work from racing with netns teardown
A XFRM_MSG_NEWSPDINFO request can queue the per-net work item
policy_hthresh.work onto the system workqueue.

The queued callback, xfrm_hash_rebuild(), retrieves the enclosing
struct net via container_of(). If the net namespace is torn down
before that work runs, the associated struct net may already have
been freed, and xfrm_hash_rebuild() may then dereference stale memory.

xfrm_policy_fini() already flushes policy_hash_work during teardown,
but it does not synchronize policy_hthresh.work.

Synchronize policy_hthresh.work in xfrm_policy_fini() as well, so the
queued work cannot outlive the net namespace teardown and access a
freed struct net.

Fixes: 880a6fab8f ("xfrm: configure policy hash table thresholds by netlink")
Signed-off-by: Minwoo Ra <raminwo0202@gmail.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2026-03-16 10:58:21 +01:00
Damien Le Moal
ce5ae93d1a ata: libata-core: disable LPM on ADATA SU680 SSD
ADATA SU680 SSDs suffer from NCQ read and write commands timeouts or bus
errors when link power management (LPM) is enabled. Flag these devices
with the ATA_QUIRK_NOLPM quirk to prevent the use of LPM and avoid these
command failures.

Reported-by: Mohammad Khaled Bayan <mhd.khaled.bayan@gmail.com>
Closes: https://bugs.launchpad.net/ubuntu/+source/linux-hwe-6.17/+bug/2144060
Cc: stable@vger.kernel.org
Tested-by: Mohammad-Khaled Bayan <mhd.khaled.bayan@gmail.com>
Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Niklas Cassel <cassel@kernel.org>
2026-03-16 09:05:18 +01:00
Imre Deak
ac57eb3b7d drm/i915/dmc: Fix an unlikely NULL pointer deference at probe
intel_dmc_update_dc6_allowed_count() oopses when DMC hasn't been
initialized, and dmc is thus NULL.

That would be the case when the call path is
intel_power_domains_init_hw() -> {skl,bxt,icl}_display_core_init() ->
gen9_set_dc_state() -> intel_dmc_update_dc6_allowed_count(), as
intel_power_domains_init_hw() is called *before* intel_dmc_init().

However, gen9_set_dc_state() calls intel_dmc_update_dc6_allowed_count()
conditionally, depending on the current and target DC states. At probe,
the target is disabled, but if DC6 is enabled, the function is called,
and an oops follows. Apparently it's quite unlikely that DC6 is enabled
at probe, as we haven't seen this failure mode before.

It is also strange to have DC6 enabled at boot, since that would require
the DMC firmware (loaded by BIOS); the BIOS loading the DMC firmware and
the driver stopping / reprogramming the firmware is a poorly specified
sequence and as such unlikely an intentional BIOS behaviour. It's more
likely that BIOS is leaving an unintentionally enabled DC6 HW state
behind (without actually loading the required DMC firmware for this).

The tracking of the DC6 allowed counter only works if starting /
stopping the counter depends on the _SW_ DC6 state vs. the current _HW_
DC6 state (since stopping the counter requires the DC5 counter captured
when the counter was started). Thus, using the HW DC6 state is incorrect
and it also leads to the above oops. Fix both issues by using the SW DC6
state for the tracking.

This is v2 of the fix originally sent by Jani, updated based on the
first Link: discussion below.

Link: https://lore.kernel.org/all/3626411dc9e556452c432d0919821b76d9991217@intel.com
Link: https://lore.kernel.org/all/20260228130946.50919-2-ltao@redhat.com
Fixes: 88c1f9a4d3 ("drm/i915/dmc: Create debugfs entry for dc6 counter")
Cc: Mohammed Thasleem <mohammed.thasleem@intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Tao Liu <ltao@redhat.com>
Cc: <stable@vger.kernel.org> # v6.16+
Tested-by: Tao Liu <ltao@redhat.com>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patch.msgid.link/20260309164803.1918158-1-imre.deak@intel.com
(cherry picked from commit 2344b93af8eb5da5d496b4e0529d35f0f559eaf0)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
2026-03-16 09:41:15 +02:00
Bibo Mao
c252c12d1f LoongArch: KVM: Fix typo issue in kvm_vm_init_features()
Most of VM feature detections are integer OR operations, and integer
assignment operation will clear previous integer OR operation. So here
change all integer assignment operations to integer OR operations.

Fixes: 82db90bf46 ("LoongArch: KVM: Move feature detection in kvm_vm_init_features()")
Signed-off-by: Bibo Mao <maobibo@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2026-03-16 10:36:02 +08:00
Tiezhu Yang
b254c629a9 LoongArch: BPF: Make arch_protect_bpf_trampoline() return 0
Occasionally there exist "text_copy_cb: operation failed" when executing
the bpf selftests, the reason is copy_to_kernel_nofault() failed and the
ecode of ESTAT register is 0x4 (PME: Page Modification Exception) due to
the pte is not writeable. The root cause is that there is another place
to set the pte entry as readonly which is in the generic weak version of
arch_protect_bpf_trampoline().

There are two ways to fix this race condition issue: the direct way is
to modify the generic weak arch_protect_bpf_trampoline() to add a mutex
lock for set_memory_rox(), but the other simple and proper way is to
just make arch_protect_bpf_trampoline() return 0 in the arch-specific
code because LoongArch has already use the BPF prog pack allocator for
trampoline.

Here are the trimmed kernel log messages:

  copy_to_kernel_nofault: memory access failed, ecode 0x4
  copy_to_kernel_nofault: the caller is text_copy_cb+0x50/0xa0
  text_copy_cb: operation failed
  ------------[ cut here ]------------
  bpf_prog_pack bug: missing bpf_arch_text_invalidate?
  WARNING: kernel/bpf/core.c:1008 at bpf_prog_pack_free+0x200/0x228
  ...
  Call Trace:
  [<9000000000248914>] show_stack+0x64/0x188
  [<9000000000241308>] dump_stack_lvl+0x6c/0x9c
  [<90000000002705bc>] __warn+0x9c/0x200
  [<9000000001c428c0>] __report_bug+0xa8/0x1c0
  [<9000000001c42b5c>] report_bug+0x64/0x120
  [<9000000001c7dcd0>] do_bp+0x270/0x3c0
  [<9000000000246f40>] handle_bp+0x120/0x1c0
  [<900000000047b030>] bpf_prog_pack_free+0x200/0x228
  [<900000000047b2ec>] bpf_jit_binary_pack_free+0x24/0x60
  [<900000000026989c>] bpf_jit_free+0x54/0xb0
  [<900000000029e10c>] process_one_work+0x184/0x610
  [<900000000029ef8c>] worker_thread+0x24c/0x388
  [<90000000002a902c>] kthread+0x13c/0x170
  [<9000000001c7dfe8>] ret_from_kernel_thread+0x28/0x1c0
  [<9000000000246624>] ret_from_kernel_thread_asm+0xc/0x88

  ---[ end trace 0000000000000000 ]---

Here is a simple shell script to reproduce:

  #!/bin/bash

  for ((i=1; i<=1000; i++))
  do
    echo "Under testing $i ..."
    dmesg -c > /dev/null
    ./test_progs -t fentry_attach_stress > /dev/null
    dmesg -t | grep "text_copy_cb: operation failed"
    if [ $? -eq 0 ]; then
      break
    fi
  done

Cc: stable@vger.kernel.org
Fixes: 4ab17e762b ("LoongArch: BPF: Use BPF prog pack allocator")
Acked-by: Hengqi Chen <hengqi.chen@gmail.com>
Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2026-03-16 10:36:01 +08:00
Tiezhu Yang
d3b8491961 LoongArch: No need to flush icache if text copy failed
If copy_to_kernel_nofault() failed, no need to flush icache and just
return immediately.

Cc: stable@vger.kernel.org
Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2026-03-16 10:36:01 +08:00
Tiezhu Yang
431ce839da LoongArch: Check return values for set_memory_{rw,rox}
set_memory_rw() and set_memory_rox() may fail, so we should check the
return values and return immediately in larch_insn_text_copy().

Cc: stable@vger.kernel.org
Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2026-03-16 10:36:01 +08:00
Tiezhu Yang
a47f0754bd LoongArch: Give more information if kmem access failed
If memory access such as copy_{from, to}_kernel_nofault() failed, its
users do not know what happened, so it is very useful to print the
exception code for such cases. Furthermore, it is better to print the
caller function to know where is the entry.

Here are the low level call chains:

  copy_from_kernel_nofault()
    copy_from_kernel_nofault_loop()
      __get_kernel_nofault()

  copy_to_kernel_nofault()
    copy_to_kernel_nofault_loop()
      __put_kernel_nofault()

Cc: stable@vger.kernel.org
Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2026-03-16 10:36:01 +08:00
Xi Ruoyao
8a69d02481 LoongArch: Fix calling smp_processor_id() in preemptible code
Fix the warning:

  BUG: using smp_processor_id() in preemptible [00000000] code: systemd/1
  caller is larch_insn_text_copy+0x40/0xf0

Simply changing it to raw_smp_processor_id() is not enough: if preempt
and CPU hotplug happens after raw_smp_processor_id() but before calling
stop_machine(), the CPU where raw_smp_processor_id() has run may become
offline when stop_machine() and no CPU will run copy_to_kernel_nofault()
in text_copy_cb().  Thus guard the larch_insn_text_copy() calls with
cpus_read_lock() and change stop_machine() to stop_machine_cpuslocked()
to prevent this.

I've considered moving the locks inside larch_insn_text_copy() but
doing so seems not an easy hack. In bpf_arch_text_poke() obviously the
memcpy() call must be guarded by text_mutex, so we have to leave the
acquire of text_mutex out of larch_insn_text_copy(). But in the entire
kernel the acquire of mutexes is always after cpus_read_lock(), so we
cannot put cpus_read_lock() into larch_insn_text_copy() while leaving
the text_mutex acquire out (or we risk a deadlock due to inconsistent
lock acquire order). So let's fix the bug first and leave the posssible
refactor as future work.

Fixes: 9fbd18cf4c ("LoongArch: BPF: Add dynamic code modification support")
Signed-off-by: Xi Ruoyao <xry111@xry111.site>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2026-03-16 10:36:01 +08:00
Thomas Weißschuh
c8b8f3c50f LoongArch: Only use SC.Q when supported by the assembler
The 128-bit atomic cmpxchg implementation uses the SC.Q instruction.
Older versions of GNU AS do not support that instruction, erroring out:

ERROR:root:{standard input}: Assembler messages:
{standard input}:4831: Error: no match insn: sc.q	$t0,$t1,$r14
{standard input}:6407: Error: no match insn: sc.q	$t0,$t1,$r23
{standard input}:10856: Error: no match insn: sc.q	$t0,$t1,$r14

make[4]: *** [../scripts/Makefile.build:289: mm/slub.o] Error 1

(Binutils 2.41)

So test support for SC.Q in Kconfig and disable the atomics if the
instruction is not available.

Fixes: f0e4b1b6e2 ("LoongArch: Add 128-bit atomic cmpxchg support")
Closes: https://lore.kernel.org/lkml/20260216082834-edc51c46-7b7a-4295-8ea5-4d9a3ca2224f@linutronix.de/
Reviewed-by: Xi Ruoyao <xry111@xry111.site>
Acked-by: Hengqi Chen <hengqi.chen@gmail.com>
Tested-by: Hengqi Chen <hengqi.chen@gmail.com>
Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2026-03-16 10:36:00 +08:00
Paulo Alcantara
12b4c5d98c smb: client: fix krb5 mount with username option
Customer reported that some of their krb5 mounts were failing against
a single server as the client was trying to mount the shares with
wrong credentials.  It turned out the client was reusing SMB session
from first mount to try mounting the other shares, even though a
different username= option had been specified to the other mounts.

By using username mount option along with sec=krb5 to search for
principals from keytab is supported by cifs.upcall(8) since
cifs-utils-4.8.  So fix this by matching username mount option in
match_session() even with Kerberos.

For example, the second mount below should fail with -ENOKEY as there
is no 'foobar' principal in keytab (/etc/krb5.keytab).  The client
ends up reusing SMB session from first mount to perform the second
one, which is wrong.

```
$ ktutil
ktutil:  add_entry -password -p testuser -k 1 -e aes256-cts
Password for testuser@ZELDA.TEST:
ktutil:  write_kt /etc/krb5.keytab
ktutil:  quit
$ klist -ke
Keytab name: FILE:/etc/krb5.keytab
KVNO Principal
 ---- ----------------------------------------------------------------
   1 testuser@ZELDA.TEST (aes256-cts-hmac-sha1-96)
$ mount.cifs //w22-root2/scratch /mnt/1 -o sec=krb5,username=testuser
$ mount.cifs //w22-root2/scratch /mnt/2 -o sec=krb5,username=foobar
$ mount -t cifs | grep -Po 'username=\K\w+'
testuser
testuser
```

Reported-by: Oscar Santos <ossantos@redhat.com>
Signed-off-by: Paulo Alcantara (Red Hat) <pc@manguebit.org>
Cc: David Howells <dhowells@redhat.com>
Cc: linux-cifs@vger.kernel.org
Cc: stable@vger.kernel.org
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-03-15 20:53:09 -05:00
Felix Gu
7fc5e2f560 spi: axiado: Fix double-free in ax_spi_probe()
ctlr is allocated using devm_spi_alloc_host(), which automatically
handles reference counting via the devm framework.

Calling spi_controller_put() manually in the probe error path is
redundant and results in a double-free.

Fixes: e75a6b00ad ("spi: axiado: Add driver for Axiado SPI DB controller")
Signed-off-by: Felix Gu <ustc.gu@gmail.com>
Link: https://patch.msgid.link/20260302-axiado-v1-1-1132819f1cb7@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-16 00:28:54 +00:00
Hasun Park
399b6fd37a ASoC: amd: acp: add PX13 SoundWire machine link for rt721+tas2783x2
Add an ACP70 SoundWire machine entry for ASUS PX13
(HN7306EA/HN7306EAC) with rt721 and two TAS2783 amps on link1.

Describe rt721 with jack/DMIC endpoints on this platform and add
explicit left/right TAS2783 speaker endpoint mapping via name prefixes.

Signed-off-by: Hasun Park <hasunpark@gmail.com>
Link: https://patch.msgid.link/20260308151654.29059-3-hasunpark@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-16 00:14:35 +00:00
Hasun Park
d0426510a9 ASoC: amd: acp: add DMI override for ACP70 flag
Some ASUS ProArt PX13 systems expose ACP ACPI config flags that can
select a non-working fallback path.

Add a DMI override in snd_amd_acp_find_config() for ACP70+ boards and
return 0 so ACP ACPI flag-based selection is skipped on this platform.

This keeps machine driver selection on the intended SoundWire path.

Signed-off-by: Hasun Park <hasunpark@gmail.com>
Link: https://patch.msgid.link/20260308151654.29059-2-hasunpark@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-16 00:14:34 +00:00
Guangshuo Li
fe757092d2 ASoC: sma1307: fix double free of devm_kzalloc() memory
A previous change added NULL checks and cleanup for allocation
failures in sma1307_setting_loaded().

However, the cleanup for mode_set entries is wrong. Those entries are
allocated with devm_kzalloc(), so they are device-managed resources and
must not be freed with kfree(). Manually freeing them in the error path
can lead to a double free when devres later releases the same memory.

Drop the manual kfree() loop and let devres handle the cleanup.

Fixes: 0ec6bd1670 ("ASoC: sma1307: Add NULL check in sma1307_setting_loaded()")
Cc: stable@vger.kernel.org
Signed-off-by: Guangshuo Li <lgs201920130244@gmail.com>
Link: https://patch.msgid.link/20260313040611.391479-1-lgs201920130244@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-16 00:05:44 +00:00
Felix Gu
b8db955299 spi: amlogic-spisg: Fix memory leak in aml_spisg_probe()
In aml_spisg_probe(), ctlr is allocated by
spi_alloc_target()/spi_alloc_host(), but fails to call
spi_controller_put() in several error paths. This leads
to a memory leak whenever the driver fails to probe after
the initial allocation.

Convert to use devm_spi_alloc_host()/devm_spi_alloc_target()
to fix the memory leak.

Fixes: cef9991e04 ("spi: Add Amlogic SPISG driver")
Signed-off-by: Felix Gu <ustc.gu@gmail.com>
Link: https://patch.msgid.link/20260308-spisg-v1-1-2cace5cafc24@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-15 23:58:47 +00:00
Felix Gu
a00da54d06 spi: amlogic: spifc-a4: Remove redundant clock cleanup
The driver uses devm_clk_get_enabled() which enables the clock and
registers a callback to automatically disable it when the device
is unbound.

Remove the redundant aml_sfc_disable_clk() call in the error paths
and remove callback.

Fixes: 4670db6f32 ("spi: amlogic: add driver for Amlogic SPI Flash Controller")
Signed-off-by: Felix Gu <ustc.gu@gmail.com>
Link: https://patch.msgid.link/20260308-spifc-a4-1-v1-1-77e286c26832@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-15 23:57:53 +00:00
Linus Torvalds
f338e77383 Linux 7.0-rc4 2026-03-15 13:52:05 -07:00
Linus Torvalds
5c2fe8d11a Merge tag 'scsi-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
Pull SCSI fixes from James Bottomley:
 "The one core change is a re-roll of the tag allocation fix from the
  last pull request that uses the correct goto to unroll all the
  allocations. The remianing fixes are all small ones in drivers"

* tag 'scsi-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi:
  scsi: hisi_sas: Fix NULL pointer exception during user_scan()
  scsi: qla2xxx: Completely fix fcport double free
  scsi: ufs: core: Fix SError in ufshcd_rtc_work() during UFS suspend
  scsi: core: Fix error handling for scsi_alloc_sdev()
2026-03-15 13:15:39 -07:00
Linus Torvalds
d9bf296c39 Merge tag 'probes-fixes-v7.0-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace
Pull probes fixes from Masami Hiramatsu:

 - Avoid crash when rmmod/insmod after ftrace killed

   This fixes a kernel crash caused by kprobes on the symbol in a module
   which is unloaded after ftrace_kill() is called.

 - Remove unneeded warnings from __arm_kprobe_ftrace()

   Remove unneeded WARN messages which can be triggered if the kprobe is
   using ftrace and it fails to enable the ftrace. Since kprobes
   correctly handle such failure, we don't need to warn it.

* tag 'probes-fixes-v7.0-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace:
  kprobes: Remove unneeded warnings from __arm_kprobe_ftrace()
  kprobes: avoid crash when rmmod/insmod after ftrace killed
2026-03-15 13:08:05 -07:00
Linus Torvalds
62cda74c79 Merge tag 'bootconfig-fixes-v7.0-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace
Pull bootconfig fixes from Masami Hiramatsu:

 - fix off-by-one in xbc_verify_tree() unclosed brace error. This fixes
   a wrong error place in unclosed brace error message

 - check bounds before writing in __xbc_open_brace(). This fixes to
   check the array index before setting array, so that the bootconfig
   can support 16th-depth nested brace correctly

 - fix snprintf truncation check in xbc_node_compose_key_after(). This
   fixes to handle the return value of snprintf() correctly in case of
   the return value == size

 - Add bootconfig tests about braces Add test cases for checking error
   position about unclosed brace and ensuring supporting 16th depth
   nested braces correctly

* tag 'bootconfig-fixes-v7.0-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace:
  bootconfig: Add bootconfig tests about braces
  lib/bootconfig: fix snprintf truncation check in xbc_node_compose_key_after()
  lib/bootconfig: check bounds before writing in __xbc_open_brace()
  lib/bootconfig: fix off-by-one in xbc_verify_tree() unclosed brace error
2026-03-15 12:50:05 -07:00
Linus Torvalds
11e8c7e947 Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini:
 "Quite a large pull request, partly due to skipping last week and
  therefore having material from ~all submaintainers in this one. About
  a fourth of it is a new selftest, and a couple more changes are large
  in number of files touched (fixing a -Wflex-array-member-not-at-end
  compiler warning) or lines changed (reformatting of a table in the API
  documentation, thanks rST).

  But who am I kidding---it's a lot of commits and there are a lot of
  bugs being fixed here, some of them on the nastier side like the
  RISC-V ones.

  ARM:

   - Correctly handle deactivation of interrupts that were activated
     from LRs. Since EOIcount only denotes deactivation of interrupts
     that are not present in an LR, start EOIcount deactivation walk
     *after* the last irq that made it into an LR

   - Avoid calling into the stubs to probe for ICH_VTR_EL2.TDS when pKVM
     is already enabled -- not only thhis isn't possible (pKVM will
     reject the call), but it is also useless: this can only happen for
     a CPU that has already booted once, and the capability will not
     change

   - Fix a couple of low-severity bugs in our S2 fault handling path,
     affecting the recently introduced LS64 handling and the even more
     esoteric handling of hwpoison in a nested context

   - Address yet another syzkaller finding in the vgic initialisation,
     where we would end-up destroying an uninitialised vgic with nasty
     consequences

   - Address an annoying case of pKVM failing to boot when some of the
     memblock regions that the host is faulting in are not page-aligned

   - Inject some sanity in the NV stage-2 walker by checking the limits
     against the advertised PA size, and correctly report the resulting
     faults

  PPC:

   - Fix a PPC e500 build error due to a long-standing wart that was
     exposed by the recent conversion to kmalloc_obj(); rip out all the
     ugliness that led to the wart

  RISC-V:

   - Prevent speculative out-of-bounds access using array_index_nospec()
     in APLIC interrupt handling, ONE_REG regiser access, AIA CSR
     access, float register access, and PMU counter access

   - Fix potential use-after-free issues in kvm_riscv_gstage_get_leaf(),
     kvm_riscv_aia_aplic_has_attr(), and kvm_riscv_aia_imsic_has_attr()

   - Fix potential null pointer dereference in
     kvm_riscv_vcpu_aia_rmw_topei()

   - Fix off-by-one array access in SBI PMU

   - Skip THP support check during dirty logging

   - Fix error code returned for Smstateen and Ssaia ONE_REG interface

   - Check host Ssaia extension when creating AIA irqchip

  x86:

   - Fix cases where CPUID mitigation features were incorrectly marked
     as available whenever the kernel used scattered feature words for
     them

   - Validate _all_ GVAs, rather than just the first GVA, when
     processing a range of GVAs for Hyper-V's TLB flush hypercalls

   - Fix a brown paper bug in add_atomic_switch_msr()

   - Use hlist_for_each_entry_srcu() when traversing mask_notifier_list,
     to fix a lockdep warning; KVM doesn't hold RCU, just irq_srcu

   - Ensure AVIC VMCB fields are initialized if the VM has an in-kernel
     local APIC (and AVIC is enabled at the module level)

   - Update CR8 write interception when AVIC is (de)activated, to fix a
     bug where the guest can run in perpetuity with the CR8 intercept
     enabled

   - Add a quirk to skip the consistency check on FREEZE_IN_SMM, i.e. to
     allow L1 hypervisors to set FREEZE_IN_SMM. This reverts (by
     default) an unintentional tightening of userspace ABI in 6.17, and
     provides some amount of backwards compatibility with hypervisors
     who want to freeze PMCs on VM-Entry

   - Validate the VMCS/VMCB on return to a nested guest from SMM,
     because either userspace or the guest could stash invalid values in
     memory and trigger the processor's consistency checks

  Generic:

   - Remove a subtle pseudo-overlay of kvm_stats_desc, which, aside from
     being unnecessary and confusing, triggered compiler warnings due to
     -Wflex-array-member-not-at-end

   - Document that vcpu->mutex is take outside of kvm->slots_lock and
     kvm->slots_arch_lock, which is intentional and desirable despite
     being rather unintuitive

  Selftests:

   - Increase the maximum number of NUMA nodes in the guest_memfd
     selftest to 64 (from 8)"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (43 commits)
  KVM: selftests: Verify SEV+ guests can read and write EFER, CR0, CR4, and CR8
  Documentation: kvm: fix formatting of the quirks table
  KVM: x86: clarify leave_smm() return value
  selftests: kvm: add a test that VMX validates controls on RSM
  selftests: kvm: extract common functionality out of smm_test.c
  KVM: SVM: check validity of VMCB controls when returning from SMM
  KVM: VMX: check validity of VMCS controls when returning from SMM
  KVM: SVM: Set/clear CR8 write interception when AVIC is (de)activated
  KVM: SVM: Initialize AVIC VMCB fields if AVIC is enabled with in-kernel APIC
  KVM: x86: Introduce KVM_X86_QUIRK_VMCS12_ALLOW_FREEZE_IN_SMM
  KVM: x86: Fix SRCU list traversal in kvm_fire_mask_notifiers()
  KVM: VMX: Fix a wrong MSR update in add_atomic_switch_msr()
  KVM: x86: hyper-v: Validate all GVAs during PV TLB flush
  KVM: x86: synthesize CPUID bits only if CPU capability is set
  KVM: PPC: e500: Rip out "struct tlbe_ref"
  KVM: PPC: e500: Fix build error due to using kmalloc_obj() with wrong type
  KVM: selftests: Increase 'maxnode' for guest_memfd tests
  KVM: arm64: pkvm: Don't reprobe for ICH_VTR_EL2.TDS on CPU hotplug
  KVM: arm64: vgic: Pick EOIcount deactivations from AP-list tail
  KVM: arm64: Remove the redundant ISB in __kvm_at_s1e2()
  ...
2026-03-15 12:22:10 -07:00
Linus Torvalds
4f3df2e5ea Merge tag 'powerpc-7.0-3' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc fixes from Madhavan Srinivasan:

 - Fix KUAP warning in VMX usercopy path

 - Fix lockdep warning during PCI enumeration

 - Fix to move CMA reservations to arch_mm_preinit

 - Fix to check current->mm is alive before getting user callchain

Thanks to Aboorva Devarajan, Christophe Leroy (CS GROUP), Dan Horák,
Nicolin Chen, Nilay Shroff, Qiao Zhao, Ritesh Harjani (IBM), Saket Kumar
Bhaskar, Sayali Patil, Shrikanth Hegde, Venkat Rao Bagalkote, and Viktor
Malik.

* tag 'powerpc-7.0-3' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux:
  powerpc/iommu: fix lockdep warning during PCI enumeration
  powerpc/selftests/copyloops: extend selftest to exercise __copy_tofrom_user_power7_vmx
  powerpc: fix KUAP warning in VMX usercopy path
  powerpc, perf: Check that current->mm is alive before getting user callchain
  powerpc/mem: Move CMA reservations to arch_mm_preinit
2026-03-15 11:36:11 -07:00
Linus Torvalds
13af67f599 Merge tag 'x86-urgent-2026-03-15' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fix from Ingo Molnar:
 "Work around S2RAM hang if the firmware unexpectedly re-enables the
  x2apic hardware while it was disabled by the kernel.

  Force-disable it again and issue a warning into the syslog"

* tag 'x86-urgent-2026-03-15' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/apic: Disable x2apic on resume if the kernel expects so
2026-03-15 11:26:36 -07:00
Linus Torvalds
164cb546e9 Merge tag 'timers-urgent-2026-03-15' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull timer fix from Ingo Molnar:
 "Fix function tracer recursion bug by marking jiffies_64_to_clock_t()
  notrace"

* tag 'timers-urgent-2026-03-15' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  time/jiffies: Mark jiffies_64_to_clock_t() notrace
2026-03-15 11:14:09 -07:00
Linus Torvalds
63724e9519 Merge tag 'sched-urgent-2026-03-15' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler fixes from Ingo Molnar:
 "More MM-CID fixes, mostly fixing hangs/races:

   - Fix CID hangs due to a race between concurrent forks

   - Fix vfork()/CLONE_VM MMCID bug causing hangs

   - Remove pointless preemption guard

   - Fix CID task list walk performance regression on large systems
     by removing the known-flaky and slow counting logic using
     for_each_process_thread() in mm_cid_*fixup_tasks_to_cpus(), and
     implementing a simple sched_mm_cid::node list instead"

* tag 'sched-urgent-2026-03-15' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched/mmcid: Avoid full tasklist walks
  sched/mmcid: Remove pointless preempt guard
  sched/mmcid: Handle vfork()/CLONE_VM correctly
  sched/mmcid: Prevent CID stalls due to concurrent forks
2026-03-15 10:49:47 -07:00
Linus Torvalds
9745031130 Merge tag 'objtool-urgent-2026-03-15' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull objtool fixes from Ingo Molnar:

 - Fix cross-build bug by using HOSTCFLAGS for HAVE_XXHASH test

 - Fix klp bug by fixing detection of corrupt static branch/call entries

 - Handle unsupported pr_debug() usage more gracefully

 - Fix hypothetical klp bug by avoiding NULL pointer dereference when
   printing code symbol name

 - Fix data alignment bug in elf_add_data() causing mangled strings

 - Fix confusing ERROR_INSN() error message

 - Handle unexpected Clang RSP musical chairs causing false positive
   warnings

 - Fix another objtool stack overflow in validate_branch()

* tag 'objtool-urgent-2026-03-15' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  objtool: Fix another stack overflow in validate_branch()
  objtool: Handle Clang RSP musical chairs
  objtool: Fix ERROR_INSN() error message
  objtool: Fix data alignment in elf_add_data()
  objtool: Use HOSTCFLAGS for HAVE_XXHASH test
  objtool/klp: Avoid NULL pointer dereference when printing code symbol name
  objtool/klp: Disable unsupported pr_debug() usage
  objtool/klp: Fix detection of corrupt static branch/call entries
2026-03-15 10:36:01 -07:00
Linus Torvalds
be2e3750ce Merge tag 'irq-urgent-2026-03-15' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull irq fixes from Ingo Molnar:
 "Two fixes for the riscv-aplic irqchip driver:

   - Fix probing dependency bug on probing failure

   - Fix double register_syscore() bug"

* tag 'irq-urgent-2026-03-15' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  irqchip/riscv-aplic: Register syscore operations only once
  irqchip/riscv-aplic: Do not clear ACPI dependencies on probe failure
2026-03-15 10:32:57 -07:00
Marc Zyngier
1744a6ef48 KVM: arm64: Discard PC update state on vcpu reset
Our vcpu reset suffers from a particularly interesting flaw, as it
does not correctly deal with state that will have an effect on the
execution flow out of reset.

Take the following completely random example, never seen in the wild
and that never resulted in a couple of sleepless nights: /s

- vcpu-A issues a PSCI_CPU_OFF using the SMC conduit

- SMC being a trapped instruction (as opposed to HVC which is always
  normally executed), we annotate the vcpu as needing to skip the
  next instruction, which is the SMC itself

- vcpu-A is now safely off

- vcpu-B issues a PSCI_CPU_ON for vcpu-A, providing a starting PC

- vcpu-A gets reset, get the new PC, and is sent on its merry way

- right at the point of entering the guest, we notice that a PC
  increment is pending (remember the earlier SMC?)

- vcpu-A skips its first instruction...

What could possibly go wrong?

Well, I'm glad you asked. For pKVM as a NV guest, that first instruction
is extremely significant, as it indicates whether the CPU is booting
or resuming. Having skipped that instruction, nothing makes any sense
anymore, and CPU hotplugging fails.

This is all caused by the decoupling of PC update from the handling
of an exception that triggers such update, making it non-obvious
what affects what when.

Fix this train wreck by discarding all the PC-affecting state on
vcpu reset.

Fixes: f5e3068061 ("KVM: arm64: Move __adjust_pc out of line")
Cc: stable@vger.kernel.org
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Joey Gouly <joey.gouly@arm.com>
Link: https://patch.msgid.link/20260312140850.822968-1-maz@kernel.org
Signed-off-by: Marc Zyngier <maz@kernel.org>
2026-03-15 15:11:22 +00:00
Andrii Kovalchuk
793b008cd3 ALSA: hda/realtek: Add HP ENVY Laptop 13-ba0xxx quirk
Add a PCI quirk for HP ENVY Laptop 13-ba0xxx (PCI device ID 0x8756)
to enable proper mute LED and mic mute behavior using the
ALC245_FIXUP_HP_X360_MUTE_LEDS fixup.

Signed-off-by: Andrii Kovalchuk <coderpy4@proton.me>
Link: https://patch.msgid.link/u0s-uRVegF9BN0t-4JnOUwsIAR-mVc4U4FJfJHdEHX7ro_laErHD9y35NebWybcN16gVaVHPJo1ap3AoJ1a2gqJImPvThgeNt_SYVY1KaDw=@proton.me
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-03-15 10:28:39 +01:00
Helge Deller
2c98a8fbd6 parisc: Flush correct cache in cacheflush() syscall
The assembly flush instructions were swapped for I- and D-cache flags:

SYSCALL_DEFINE3(cacheflush, ...)
{
	if (cache & DCACHE) {
			"fic ...\n"
	}
	if (cache & ICACHE && error == 0) {
			"fdc ...\n"
	}

Fix it by using fdc for DCACHE, and fic for ICACHE flushing.

Reported-by: Felix Lechner <felix.lechner@lease-up.com>
Fixes: c6d96328fe ("parisc: Add cacheflush() syscall")
Cc: <stable@vger.kernel.org> # v6.5+
Signed-off-by: Helge Deller <deller@gmx.de>
2026-03-15 09:28:49 +01:00
Linus Torvalds
9a48d4a130 Merge tag 'i3c/fixes-for-7.0' of git://git.kernel.org/pub/scm/linux/kernel/git/i3c/linux
Pull i3c fixes from Alexandre Belloni:
 "This introduces the I3C_OR_I2C symbol which is not a fix per se but is
  affecting multiple subsystems so it is included to ease
  synchronization.

  Apart from that, Adrian is mostly fixing the mipi-i3c-hci driver DMA
  handling, and I took the opportunity to add two fixes for the dw-i3c
  driver.

  Subsystem:
   - simplify combined i3c/i2c dependencies

  Drivers:
   - dw: handle 2C properly, fix possible race condition
   - mipi-i3c-hci: many DMA related fixes"

* tag 'i3c/fixes-for-7.0' of git://git.kernel.org/pub/scm/linux/kernel/git/i3c/linux:
  i3c: dw-i3c-master: Set SIR_REJECT in DAT on device attach and reattach
  i3c: master: dw-i3c: Fix missing of_node for virtual I2C adapter
  i3c: mipi-i3c-hci: Fallback to software reset when bus disable fails
  i3c: mipi-i3c-hci: Fix handling of shared IRQs during early initialization
  i3c: mipi-i3c-hci: Fix race in DMA error handling in interrupt context
  i3c: mipi-i3c-hci: Consolidate common xfer processing logic
  i3c: mipi-i3c-hci: Restart DMA ring correctly after dequeue abort
  i3c: mipi-i3c-hci: Add missing TID field to no-op command descriptor
  i3c: mipi-i3c-hci: Correct RING_CTRL_ABORT handling in DMA dequeue
  i3c: mipi-i3c-hci: Fix race between DMA ring dequeue and interrupt handler
  i3c: mipi-i3c-hci: Fix race in DMA ring dequeue
  i3c: mipi-i3c-hci: Fix race in DMA ring enqueue for parallel xfers
  i3c: mipi-i3c-hci: Consolidate spinlocks
  i3c: mipi-i3c-hci: Factor out DMA mapping from queuing path
  i3c: mipi-i3c-hci: Fix Hot-Join NACK
  i3c: mipi-i3c-hci: Use ETIMEDOUT instead of ETIME for timeout errors
  i3c: simplify combined i3c/i2c dependencies
2026-03-14 16:25:10 -07:00
Linus Torvalds
f26de90c68 Merge tag 'i2c-for-7.0-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux
Pull i2c fix from Wolfram Sang:
 "Designware DT binding maintainer update"

* tag 'i2c-for-7.0-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux:
  dt-bindings: i2c: dw: Update maintainer
2026-03-14 16:15:49 -07:00
Linus Torvalds
267594792a Merge tag 'rust-fixes-7.0-2' of git://git.kernel.org/pub/scm/linux/kernel/git/ojeda/linux
Pull Rust fixes from Miguel Ojeda:
 "Toolchain and infrastructure:

   - Remap paths to avoid absolute ones starting with the upcoming Rust
     1.95.0 release. This improves build reproducibility, avoids leaking
     the exact path and avoids having the same path appear in two forms

     The approach here avoids remapping debug information as well, in
     order to avoid breaking tools that used the paths to access source
     files, which was the previous attempt that needed to be reverted

   - Allow 'unused_features' lint for the upcoming Rust 1.96.0 release.
     While well-intentioned, we do not benefit much from the new lint

   - Emit dependency information into '$(depfile)' directly to avoid a
     temporary '.d' file (it was an old approach)

  'kernel' crate:

   - 'str' module: fix warning under '!CONFIG_BLOCK' by making
     'NullTerminatedFormatter' public

   - 'cpufreq' module: suppress false positive Clippy warning

  'pin-init' crate:

   - Remove '#[disable_initialized_field_access]' attribute which was
     unsound. This means removing the support for structs with unaligned
     fields (through the 'repr(packed)' attribute), for now

     And document the load-bearing fact of field accessors (i.e. that
     they are required for soundness)

   - Replace shadowed return token by 'unsafe'-to-create token in order
     to remain sound in the face of the likely upcoming Type Alias Impl
     Trait (TAIT) and the next trait solver in upstream Rust"

* tag 'rust-fixes-7.0-2' of git://git.kernel.org/pub/scm/linux/kernel/git/ojeda/linux:
  rust: kbuild: allow `unused_features`
  rust: cpufreq: suppress clippy::double_parens in Policy doctest
  rust: pin-init: replace shadowed return token by `unsafe`-to-create token
  rust: pin-init: internal: init: document load-bearing fact of field accessors
  rust: pin-init: internal: init: remove `#[disable_initialized_field_access]`
  rust: build: remap path to avoid absolute path
  rust: kbuild: emit dep-info into $(depfile) directly
  rust: str: make NullTerminatedFormatter public
2026-03-14 12:35:16 -07:00
Jakub Kicinski
43d222fbcd Merge branch 'net-macb-fix-ethernet-malfunction-on-amd-versal-board-after-suspend'
Kevin Hao says:

====================
net: macb: Fix Ethernet malfunction on AMD Versal board after suspend

On Versal boards, the tx/rx queue pointer registers are cleared after suspend,
which causes Ethernet malfunction. This patch series addresses this issue by
reinitializing the tx/rx queue pointer registers and the rx ring.
====================

Link: https://patch.msgid.link/20260312-macb-versal-v1-0-467647173fa4@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-14 12:19:49 -07:00
Kevin Hao
718d0766ce net: macb: Reinitialize tx/rx queue pointer registers and rx ring during resume
On certain platforms, such as AMD Versal boards, the tx/rx queue pointer
registers are cleared after suspend, and the rx queue pointer register
is also disabled during suspend if WOL is enabled. Previously, we assumed
that these registers would be restored by macb_mac_link_up(). However,
in commit bf9cf80cab, macb_init_buffers() was moved from
macb_mac_link_up() to macb_open(). Therefore, we should call
macb_init_buffers() to reinitialize the tx/rx queue pointer registers
during resume.

Due to the reset of these two registers, we also need to adjust the
tx/rx rings accordingly. The tx ring will be handled by
gem_shuffle_tx_rings() in macb_mac_link_up(), so we only need to
initialize the rx ring here.

Fixes: bf9cf80cab ("net: macb: Fix tx/rx malfunction after phy link down and up")
Reported-by: Quanyang Wang <quanyang.wang@windriver.com>
Signed-off-by: Kevin Hao <haokexin@gmail.com>
Tested-by: Quanyang Wang <quanyang.wang@windriver.com>
Cc: stable@vger.kernel.org
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20260312-macb-versal-v1-2-467647173fa4@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-14 12:19:47 -07:00
Kevin Hao
1a7124ecd6 net: macb: Introduce gem_init_rx_ring()
Extract the initialization code for the GEM RX ring into a new function.
This change will be utilized in a subsequent patch. No functional changes
are introduced.

Signed-off-by: Kevin Hao <haokexin@gmail.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20260312-macb-versal-v1-1-467647173fa4@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-14 12:19:47 -07:00
Meghana Malladi
719d3e7169 net: ti: icssg-prueth: Fix memory leak in XDP_DROP for non-zero-copy mode
Page recycling was removed from the XDP_DROP path in emac_run_xdp() to
avoid conflicts with AF_XDP zero-copy mode, which uses xsk_buff_free()
instead.

However, this causes a memory leak when running XDP programs that drop
packets in non-zero-copy mode (standard page pool mode). The pages are
never returned to the page pool, leading to OOM conditions.

Fix this by handling cleanup in the caller, emac_rx_packet().
When emac_run_xdp() returns ICSSG_XDP_CONSUMED for XDP_DROP, the
caller now recycles the page back to the page pool. The zero-copy
path, emac_rx_packet_zc() already handles cleanup correctly with
xsk_buff_free().

Fixes: 7a64bb388d ("net: ti: icssg-prueth: Add AF_XDP zero copy for RX")
Signed-off-by: Meghana Malladi <m-malladi@ti.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20260311095441.1691636-1-m-malladi@ti.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-14 12:14:44 -07:00
AlanSong-oc
ebba09f198 crypto: padlock-sha - Disable for Zhaoxin processor
For Zhaoxin processors, the XSHA1 instruction requires the total memory
allocated at %rdi register must be 32 bytes, while the XSHA1 and
XSHA256 instruction doesn't perform any operation when %ecx is zero.

Due to these requirements, the current padlock-sha driver does not work
correctly with Zhaoxin processors. It cannot pass the self-tests and
therefore does not activate the driver on Zhaoxin processors. This issue
has been reported in Debian [1]. The self-tests fail with the
following messages [2]:

alg: shash: sha1-padlock-nano test failed (wrong result) on test vector 0, cfg="init+update+final aligned buffer"
alg: self-tests for sha1 using sha1-padlock-nano failed (rc=-22)

alg: shash: sha256-padlock-nano test failed (wrong result) on test vector 0, cfg="init+update+final aligned buffer"
alg: self-tests for sha256 using sha256-padlock-nano failed (rc=-22)

Disable the padlock-sha driver on Zhaoxin processors with the CPU family
0x07 and newer. Following the suggestion in [3], support for PHE will be
added to lib/crypto/ instead.

[1] https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1113996
[2] https://linux-hardware.org/?probe=271fabb7a4&log=dmesg
[3] https://lore.kernel.org/linux-crypto/aUI4CGp6kK7mxgEr@gondor.apana.org.au/

Fixes: 63dc06cd12 ("crypto: padlock-sha - Use API partial block handling")
Cc: stable@vger.kernel.org
Signed-off-by: AlanSong-oc <AlanSong-oc@zhaoxin.com>
Link: https://lore.kernel.org/r/20260313080150.9393-2-AlanSong-oc@zhaoxin.com
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
2026-03-14 11:34:06 -07:00
Dipayaan Roy
fa103fc8f5 net: mana: fix use-after-free in mana_hwc_destroy_channel() by reordering teardown
A potential race condition exists in mana_hwc_destroy_channel() where
hwc->caller_ctx is freed before the HWC's Completion Queue (CQ) and
Event Queue (EQ) are destroyed. This allows an in-flight CQ interrupt
handler to dereference freed memory, leading to a use-after-free or
NULL pointer dereference in mana_hwc_handle_resp().

mana_smc_teardown_hwc() signals the hardware to stop but does not
synchronize against IRQ handlers already executing on other CPUs. The
IRQ synchronization only happens in mana_hwc_destroy_cq() via
mana_gd_destroy_eq() -> mana_gd_deregister_irq(). Since this runs
after kfree(hwc->caller_ctx), a concurrent mana_hwc_rx_event_handler()
can dereference freed caller_ctx (and rxq->msg_buf) in
mana_hwc_handle_resp().

Fix this by reordering teardown to reverse-of-creation order: destroy
the TX/RX work queues and CQ/EQ before freeing hwc->caller_ctx. This
ensures all in-flight interrupt handlers complete before the memory they
access is freed.

Fixes: ca9c54d2d6 ("net: mana: Add a driver for Microsoft Azure Network Adapter (MANA)")
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/abHA3AjNtqa1nx9k@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-14 10:40:50 -07:00
Linus Torvalds
69237f8c1f Merge tag 'usb-7.0-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb
Pull USB fixes from Greg KH:
 "Here is a large chunk of USB driver fixes for 7.0-rc4. Included in
  here are:

   - usb gadget reverts due to reported issues, and then a follow-on fix
     to hopefully resolve the reported overall problem

   - xhci driver fixes

   - dwc3 driver fixes

   - usb core "killable" bulk message api addition to fix a usbtmc
     driver bug where userspace could hang the driver for forever

   - small USB driver fixes for reported issues

   - new usb device quirks

  All except the last USB device quirk change have been in linux-next
  with no reported issues. That one came in too late, and is 'obviously
  correct' :)"

* tag 'usb-7.0-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb: (35 commits)
  USB: ezcap401 needs USB_QUIRK_NO_BOS to function on 10gbs usb speed
  usb: roles: get usb role switch from parent only for usb-b-connector
  Revert "tcpm: allow looking for role_sw device in the main node"
  usb: gadget: f_ncm: Fix net_device lifecycle with device_move
  Revert "usb: gadget: u_ether: add gether_opts for config caching"
  Revert "usb: gadget: u_ether: use <linux/hex.h> header file"
  Revert "usb: gadget: u_ether: Add auto-cleanup helper for freeing net_device"
  Revert "usb: gadget: f_ncm: align net_device lifecycle with bind/unbind"
  Revert "usb: legacy: ncm: Fix NPE in gncm_bind"
  Revert "usb: gadget: f_ncm: Fix atomic context locking issue"
  usb: typec: altmode/displayport: set displayport signaling rate in configure message
  usb: dwc3: pci: add support for the Intel Nova Lake -H
  usb/core/quirks: Add Huawei ME906S-device to wakeup quirk
  usb: gadget: uvc: fix interval_duration calculation
  xhci: Fix NULL pointer dereference when reading portli debugfs files
  usb: xhci: Prevent interrupt storm on host controller error (HCE)
  usb: xhci: Fix memory leak in xhci_disable_slot()
  usb: class: cdc-wdm: fix reordering issue in read code path
  usb: renesas_usbhs: fix use-after-free in ISR during device removal
  usb: cdc-acm: Restore CAP_BRK functionnality to CH343
  ...
2026-03-14 09:43:12 -07:00
Justin Chen
6cfc3bc02b net: bcmgenet: increase WoL poll timeout
Some systems require more than 5ms to get into WoL mode. Increase the
timeout value to 50ms.

Fixes: c51de7f397 ("net: bcmgenet: add Wake-on-LAN support code")
Signed-off-by: Justin Chen <justin.chen@broadcom.com>
Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
Link: https://patch.msgid.link/20260312191852.3904571-1-justin.chen@broadcom.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-14 09:39:17 -07:00
Linus Torvalds
5c75125672 Merge tag 'char-misc-7.0-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc
Pull char / misc / IIO driver fixes from Greg KH:
 "Here are some char/misc/iio/binder fixes for 7.0-rc4. Nothing major in
  here, just the usual:

   - lots of iio driver fixes for reported issues

   - rust binder fixes for problems found

   - gpib driver binding to the wrong device fix

   - firmware driver fix

  All of these have been in linux-next with no reported issues"

* tag 'char-misc-7.0-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc: (28 commits)
  gpib: lpvo_usb: fix unintended binding of FTDI 8U232AM devices
  firmware: stratix10-svc: Add Multi SVC clients support
  rust_binder: use lock_vma_under_rcu() in use_page_slow()
  rust_binder: call set_notification_done() without proc lock
  rust_binder: avoid reading the written value in offsets array
  rust_binder: check ownership before using vma
  rust_binder: fix oneway spam detection
  firmware: stratix10-rsu: Fix NULL pointer dereference when RSU is disabled
  iio: imu: adis: Fix NULL pointer dereference in adis_init
  iio: imu: inv_icm45600: fix regulator put warning when probe fails
  iio: buffer: Fix wait_queue not being removed
  iio: gyro: mpu3050-core: fix pm_runtime error handling
  iio: gyro: mpu3050-i2c: fix pm_runtime error handling
  iio: adc: ad7768-1: Fix ERR_PTR dereference in ad7768_fill_scale_tbl
  iio: chemical: sps30_serial: fix buffer size in sps30_serial_read_meas()
  iio: chemical: sps30_i2c: fix buffer size in sps30_i2c_read_meas()
  iio: magnetometer: tlv493d: remove erroneous shift in X-axis data
  iio: proximity: hx9023s: Protect against division by zero in set_samp_freq
  iio: proximity: hx9023s: fix assignment order for __counted_by
  iio: chemical: bme680: Fix measurement wait duration calculation
  ...
2026-03-14 09:38:49 -07:00
Linus Torvalds
4dad25aa60 Merge tag 'staging-7.0-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging
Pull staging driver fixes from Greg KH:
 "Here are three small staging driver fixes for 7.0-rc4 that resolve
  some reported problems. They are:

   - two rtl8723bs data validation bugfixes

   - sm750fb removal path bugfix

  All of these have been in linux-next for many weeks with no reported
  issues"

* tag 'staging-7.0-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging:
  staging: rtl8723bs: fix potential out-of-bounds read in rtw_restruct_wmm_ie
  staging: rtl8723bs: properly validate the data in rtw_get_ie_ex()
  staging: sm750fb: add missing pci_release_region on error and removal
2026-03-14 09:33:58 -07:00
Jakub Kicinski
eeeff8dda6 Merge tag 'nf-26-03-13' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf
Florian Westphal says:

====================
netfilter: updates for net

This is a much earlier pull request than usual, due to the large
backlog.  We are aware of several unfixed issues, in particular
in ctnetlink, patches are being worked on.

The following patchset contains Netfilter fixes for *net*:

1) fix a use-after-free in ctnetlink, from Hyunwoo Kim, broken
   since v3.10.
2) add missing netlink range checks in ctnetlink, broken since v2.6
   days.
3) fix content length truncation in sip conntrack helper,
   from Lukas Johannes Möller.  Broken since 2.6.34.
4) Revert a recent patch to add stronger checks for overlapping ranges
   in nf_tables rbtree set type.
   Patch is correct, but several nftables version have a bug (now fixed)
   that trigger the checks incorrectly.
5) Reset mac header before the vlan push to avoid warning splat (and
   make things functional). From Eric Woudstra.
6) Add missing bounds check in H323 conntrack helper, broken since this
   helper was added 20 years ago, from Jenny Guanni Qu.
7) Fix a memory leak in the dynamic set infrastructure, from Pablo Neira
   Ayuso.  Broken since v5.11.
8+9) a few spots failed to purge skbs queued to userspace via nfqueue,
   this causes RCU escape / use-after-free. Also from Pablo. broken
   since v3.4 added the CT target to xtables.
10) Fix undefined behaviour in xt_time, use u32 for a shift-by-31
    operation, not s32, from Jenny Guanni Qu.
11) H323 conntrack helper lacks a check for length variable becoming
    negative after decrement, causes major out-of-bounds read due to
    cast to unsigned size later, also from Jenny.
    Both issues exist since 2.6 days.

* tag 'nf-26-03-13' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf:
  netfilter: nf_conntrack_h323: check for zero length in DecodeQ931()
  netfilter: xt_time: use unsigned int for monthday bit shift
  netfilter: xt_CT: drop pending enqueued packets on template removal
  netfilter: nft_ct: drop pending enqueued packets on removal
  nf_tables: nft_dynset: fix possible stateful expression memleak in error path
  netfilter: nf_conntrack_h323: fix OOB read in decode_int() CONS case
  netfilter: nf_flow_table_ip: reset mac header before vlan push
  netfilter: revert nft_set_rbtree: validate open interval overlap
  netfilter: nf_conntrack_sip: fix Content-Length u32 truncation in sip_help_tcp()
  netfilter: conntrack: add missing netlink policy validations
  netfilter: ctnetlink: fix use-after-free in ctnetlink_dump_exp_ct()
====================

Link: https://patch.msgid.link/20260313150614.21177-1-fw@strlen.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-14 09:13:58 -07:00
Jakub Kicinski
74c1e2737b Merge tag 'for-net-2026-03-12' of git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth
Luiz Augusto von Dentz says:

====================
bluetooth pull request for net:

 - hci_sync: Fix hci_le_create_conn_sync
 - MGMT: Fix list corruption and UAF in command complete handlers
 - L2CAP: Disconnect if received packet's SDU exceeds IMTU
 - L2CAP: Disconnect if sum of payload sizes exceed SDU
 - L2CAP: Fix accepting multiple L2CAP_ECRED_CONN_REQ
 - L2CAP: Fix type confusion in l2cap_ecred_reconf_rsp()
 - L2CAP: Validate L2CAP_INFO_RSP payload length before access
 - L2CAP: Fix use-after-free in l2cap_unregister_user
 - ISO: Fix defer tests being unstable
 - HIDP: Fix possible UAF
 - SMP: make SM/PER/KDU/BI-04-C happy
 - qca: fix ROM version reading on WCN3998 chips

* tag 'for-net-2026-03-12' of git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth:
  Bluetooth: qca: fix ROM version reading on WCN3998 chips
  Bluetooth: L2CAP: Validate L2CAP_INFO_RSP payload length before access
  Bluetooth: L2CAP: Fix type confusion in l2cap_ecred_reconf_rsp()
  Bluetooth: L2CAP: Fix accepting multiple L2CAP_ECRED_CONN_REQ
  Bluetooth: L2CAP: Fix use-after-free in l2cap_unregister_user
  Bluetooth: HIDP: Fix possible UAF
  Bluetooth: MGMT: Fix list corruption and UAF in command complete handlers
  Bluetooth: hci_sync: Fix hci_le_create_conn_sync
  Bluetooth: ISO: Fix defer tests being unstable
  Bluetooth: SMP: make SM/PER/KDU/BI-04-C happy
  Bluetooth: LE L2CAP: Disconnect if sum of payload sizes exceed SDU
  Bluetooth: LE L2CAP: Disconnect if received packet's SDU exceeds IMTU
====================

Link: https://patch.msgid.link/20260312200655.1215688-1-luiz.dentz@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-14 08:39:28 -07:00
Jeff Layton
17ad31b3a4 sunrpc: fix cache_request leak in cache_release
When a reader's file descriptor is closed while in the middle of reading
a cache_request (rp->offset != 0), cache_release() decrements the
request's readers count but never checks whether it should free the
request.

In cache_read(), when readers drops to 0 and CACHE_PENDING is clear, the
cache_request is removed from the queue and freed along with its buffer
and cache_head reference. cache_release() lacks this cleanup.

The only other path that frees requests with readers == 0 is
cache_dequeue(), but it runs only when CACHE_PENDING transitions from
set to clear. If that transition already happened while readers was
still non-zero, cache_dequeue() will have skipped the request, and no
subsequent call will clean it up.

Add the same cleanup logic from cache_read() to cache_release(): after
decrementing readers, check if it reached 0 with CACHE_PENDING clear,
and if so, dequeue and free the cache_request.

Reported-by: NeilBrown <neilb@ownmail.net>
Fixes: 1da177e4c3 ("Linux-2.6.12-rc2")
Cc: stable@kernel.org
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
2026-03-14 11:37:13 -04:00
Chuck Lever
e7fcf179b8 NFSD: Hold net reference for the lifetime of /proc/fs/nfs/exports fd
The /proc/fs/nfs/exports proc entry is created at module init
and persists for the module's lifetime. exports_proc_open()
captures the caller's current network namespace and stores
its svc_export_cache in seq->private, but takes no reference
on the namespace. If the namespace is subsequently torn down
(e.g. container destruction after the opener does setns() to a
different namespace), nfsd_net_exit() calls nfsd_export_shutdown()
which frees the cache. Subsequent reads on the still-open fd
dereference the freed cache_detail, walking a freed hash table.

Hold a reference on the struct net for the lifetime of the open
file descriptor. This prevents nfsd_net_exit() from running --
and thus prevents nfsd_export_shutdown() from freeing the cache
-- while any exports fd is open. cache_detail already stores
its net pointer (cd->net, set by cache_create_net()), so
exports_release() can retrieve it without additional per-file
storage.

Reported-by: Misbah Anjum N <misanjum@linux.ibm.com>
Closes: https://lore.kernel.org/linux-nfs/dcd371d3a95815a84ba7de52cef447b8@linux.ibm.com/
Fixes: 96d851c4d2 ("nfsd: use proper net while reading "exports" file")
Cc: stable@vger.kernel.org
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: NeilBrown <neil@brown.name>
Tested-by: Olga Kornievskaia <okorniev@redhat.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
2026-03-14 11:34:25 -04:00
Chuck Lever
48db892356 NFSD: Defer sub-object cleanup in export put callbacks
svc_export_put() calls path_put() and auth_domain_put() immediately
when the last reference drops, before the RCU grace period. RCU
readers in e_show() and c_show() access both ex_path (via
seq_path/d_path) and ex_client->name (via seq_escape) without
holding a reference. If cache_clean removes the entry and drops the
last reference concurrently, the sub-objects are freed while still
in use, producing a NULL pointer dereference in d_path.

Commit 2530766492 ("nfsd: fix UAF when access ex_uuid or
ex_stats") moved kfree of ex_uuid and ex_stats into the
call_rcu callback, but left path_put() and auth_domain_put() running
before the grace period because both may sleep and call_rcu
callbacks execute in softirq context.

Replace call_rcu/kfree_rcu with queue_rcu_work(), which defers the
callback until after the RCU grace period and executes it in process
context where sleeping is permitted. This allows path_put() and
auth_domain_put() to be moved into the deferred callback alongside
the other resource releases. Apply the same fix to expkey_put(),
which has the identical pattern with ek_path and ek_client.

A dedicated workqueue scopes the shutdown drain to only NFSD
export release work items; flushing the shared
system_unbound_wq would stall on unrelated work from other
subsystems. nfsd_export_shutdown() uses rcu_barrier() followed
by flush_workqueue() to ensure all deferred release callbacks
complete before the export caches are destroyed.

Reported-by: Misbah Anjum N <misanjum@linux.ibm.com>
Closes: https://lore.kernel.org/linux-nfs/dcd371d3a95815a84ba7de52cef447b8@linux.ibm.com/
Fixes: c224edca7a ("nfsd: no need get cache ref when protected by rcu")
Fixes: 1b10f0b603 ("SUNRPC: no need get cache ref when protected by rcu")
Cc: stable@vger.kernel.org
Reviwed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: NeilBrown <neil@brown.name>
Tested-by: Olga Kornievskaia <okorniev@redhat.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
2026-03-14 11:34:25 -04:00
Deepanshu Kartikey
9228148795 atm: lec: fix use-after-free in sock_def_readable()
A race condition exists between lec_atm_close() setting priv->lecd
to NULL and concurrent access to priv->lecd in send_to_lecd(),
lec_handle_bridge(), and lec_atm_send(). When the socket is freed
via RCU while another thread is still using it, a use-after-free
occurs in sock_def_readable() when accessing the socket's wait queue.

The root cause is that lec_atm_close() clears priv->lecd without
any synchronization, while callers dereference priv->lecd without
any protection against concurrent teardown.

Fix this by converting priv->lecd to an RCU-protected pointer:
- Mark priv->lecd as __rcu in lec.h
- Use rcu_assign_pointer() in lec_atm_close() and lecd_attach()
  for safe pointer assignment
- Use rcu_access_pointer() for NULL checks that do not dereference
  the pointer in lec_start_xmit(), lec_push(), send_to_lecd() and
  lecd_attach()
- Use rcu_read_lock/rcu_dereference/rcu_read_unlock in send_to_lecd(),
  lec_handle_bridge() and lec_atm_send() to safely access lecd
- Use rcu_assign_pointer() followed by synchronize_rcu() in
  lec_atm_close() to ensure all readers have completed before
  proceeding. This is safe since lec_atm_close() is called from
  vcc_release() which holds lock_sock(), a sleeping lock.
- Remove the manual sk_receive_queue drain from lec_atm_close()
  since vcc_destroy_socket() already drains it after lec_atm_close()
  returns.

v2: Switch from spinlock + sock_hold/put approach to RCU to properly
    fix the race. The v1 spinlock approach had two issues pointed out
    by Eric Dumazet:
    1. priv->lecd was still accessed directly after releasing the
       lock instead of using a local copy.
    2. The spinlock did not prevent packets being queued after
       lec_atm_close() drains sk_receive_queue since timer and
       workqueue paths bypass netif_stop_queue().

Note: Syzbot patch testing was attempted but the test VM terminated
    unexpectedly with "Connection to localhost closed by remote host",
    likely due to a QEMU AHCI emulation issue unrelated to this fix.
    Compile testing with "make W=1 net/atm/lec.o" passes cleanly.

Reported-by: syzbot+f50072212ab792c86925@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=f50072212ab792c86925
Link: https://lore.kernel.org/all/20260309093614.502094-1-kartikey406@gmail.com/T/ [v1]
Signed-off-by: Deepanshu Kartikey <kartikey406@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20260309155908.508768-1-kartikey406@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-14 08:05:47 -07:00
Matthew Schwartz
59f68dc1d8 ALSA: hda/realtek: Add quirk for ASUS ROG Flow Z13-KJP GZ302EAC
Fixes lack of audio output on the ASUS ROG Flow Z13-KJP GZ302EAC model,
similar to the ASUS ROG Flow Z13 GZ302EA.

Signed-off-by: Matthew Schwartz <matthew.schwartz@linux.dev>
Link: https://patch.msgid.link/20260313172503.285846-1-matthew.schwartz@linux.dev
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-03-14 14:32:05 +01:00
Zhang Heng
7bae956cac ALSA: hda/realtek: add quirk for Lenovo Yoga 7 2-in-1 16AKP10
This machine is equipped with ALC287 and requires the quirk
ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN to fix the issue
where the bass speakers are not configured and the speaker
volume cannot be controlled.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=221210
Signed-off-by: Zhang Heng <zhangheng@kylinos.cn>
Link: https://patch.msgid.link/20260313080624.1395362-1-zhangheng@kylinos.cn
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-03-14 14:31:37 +01:00
Nathan Chancellor
9e22e9c4a5 ARM: multi_v7_defconfig: Drop duplicate CONFIG_TI_PRUSS=m
Commit ee1ab82ee0 ("ARM: defconfig: move entries") added a duplicate
instance of CONFIG_TI_PRUSS=m, causing a Kconfig warning:

  arch/arm/configs/multi_v7_defconfig:1152:warning: override: reassigning to symbol TI_PRUSS

Drop the first instance, as the second instance added by the
aforementioned change is where savedefconfig puts it.

Fixes: ee1ab82ee0 ("ARM: defconfig: move entries")
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Link: https://patch.msgid.link/20260305-arm-defconfig-drop-duplicate-ti-pruss-v1-1-2839e3b42a8b@kernel.org
Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
2026-03-14 12:34:25 +01:00
Krzysztof Kozlowski
e2dcc248c3 Merge tag 'renesas-fixes-for-v7.0-tag1' of https://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-devel into arm/fixes
Renesas fixes for v7.0

  - Fix SD card initialization on the RZ/T2H and RZ/N2H EVK boards,
  - Remove WDT nodes meant for other CPU cores on the RZ/V2H(P) SoC,
  - Fix Clock Pulse Generator registers on the RZ/T2H and RZ/N2H SoCs,
  - Fix Versa3-related boot hangs on the RZ/G3S SoM,
  - Fix Extended SPI interrupts on the R-Car X5H SoC.

* tag 'renesas-fixes-for-v7.0-tag1' of https://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-devel:
  arm64: dts: renesas: r8a78000: Fix out-of-range SPI interrupt numbers
  arm64: dts: renesas: rzg3s-smarc-som: Set bypass for Versa3 PLL2
  arm64: dts: renesas: r9a09g087: Fix CPG register region sizes
  arm64: dts: renesas: r9a09g077: Fix CPG register region sizes
  arm64: dts: renesas: r9a09g057: Remove wdt{0,2,3} nodes
  arm64: dts: renesas: rzv2-evk-cn15-sd: Add ramp delay for SD0 regulator
  arm64: dts: renesas: rzt2h-n2h-evk: Add ramp delay for SD0 card regulator

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
2026-03-14 12:01:58 +01:00
0d4aef630b batman-adv: avoid OGM aggregation when skb tailroom is insufficient
When OGM aggregation state is toggled at runtime, an existing forwarded
packet may have been allocated with only packet_len bytes, while a later
packet can still be selected for aggregation. Appending in this case can
hit skb_put overflow conditions.

Reject aggregation when the target skb tailroom cannot accommodate the new
packet. The caller then falls back to creating a new forward packet
instead of appending.

Fixes: c6c8fea297 ("net: Add batman-adv meshing protocol")
Cc: stable@vger.kernel.org
Reported-by: Yifan Wu <yifanwucs@gmail.com>
Reported-by: Juefei Pu <tomapufckgml@gmail.com>
Signed-off-by: Yuan Tan <tanyuan98@outlook.com>
Signed-off-by: Xin Liu <bird@lzu.edu.cn>
Signed-off-by: Ao Zhou <n05ec@lzu.edu.cn>
Signed-off-by: Yang Yang <n05ec@lzu.edu.cn>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
2026-03-14 08:29:47 +01:00
Guenter Roeck
5c52607c43 crypto: ccp - Fix leaking the same page twice
Commit 551120148b ("crypto: ccp - Fix a case where SNP_SHUTDOWN is
missed") fixed a case where SNP is left in INIT state if page reclaim
fails. It removes the transition to the INIT state for this command and
adjusts the page state management.

While doing this, it added a call to snp_leak_pages() after a call to
snp_reclaim_pages() failed. Since snp_reclaim_pages() already calls
snp_leak_pages() internally on the pages it fails to reclaim, calling
it again leaks the exact same page twice.

Fix by removing the extra call to snp_leak_pages().

The problem was found by an experimental code review agent based on
gemini-3.1-pro while reviewing backports into v6.18.y.

Assisted-by: Gemini:gemini-3.1-pro
Fixes: 551120148b ("crypto: ccp - Fix a case where SNP_SHUTDOWN is missed")
Cc: Tycho Andersen (AMD) <tycho@kernel.org>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Tycho Andersen (AMD) <tycho@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2026-03-14 14:01:37 +09:00
Linus Walleij
4819c64e61 Merge tag 'renesas-pinctrl-fixes-for-v7.0-tag1' of git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-drivers into fixes
pinctrl: renesas: Fixes for v7.0

  - Fix device node leaks and invalid wait contexts on RZ/T2H and
    RZ/N2H,
  - Fix GPIO .get() callback on RZ/A1.

Signed-off-by: Linus Walleij <linusw@kernel.org>
2026-03-14 00:37:21 +01:00
Linus Torvalds
1c9982b496 Merge tag 'drm-fixes-2026-03-14' of https://gitlab.freedesktop.org/drm/kernel
Pull drm fixes from Dave Airlie:
 "The weekly drm fixes. This is mostly msm fixes across the functions,
  with amdgpu and i915. It also has a core rust fix and changes in
  nova-core to take advantage of it, and otherwise just has some minor
  driver fixes, and marks loongsoon as orphaned.

  rust:
   - Fix safety issue in dma_read! and dma_write!

  nova-core:
   - Fix UB in DmaGspMem pointer accessors
   - Fix stack overflow in GSP memory allocation

  loongsoon:
   - mark drm driver as unmaintained

  msm:
   - Core:
      - Adjusted msm_iommu_pagetable_prealloc_allocate() allocation type
   - DPU:
      - Fixed blue screens on Hamoa laptops by reverting the LM
        reservation
      - Fixed the size of the LM block on several platforms
      - Dropped usage of %pK (again)
      - Fixed smatch warning on SSPP v13+ code
      - Fixed INTF_6 interrupts on Lemans
   - DSI:
      - Fixed DSI PHY revision on Kaanapali
      - Fixed pixel clock calculation for the bonded DSI mode panels
        with compression enabled
   - DT bindings:
      - Fixed DisplayPort description on Glymur
      - Fixed model name in SM8750 MDSS schema
   - GPU:
      - Added MODULE_DEVICE_TABLE to the GPU driver
      - Fix bogus protect error on X2-85
      - Fix dma_free_attrs() buffer size
      - Gen8 UBWC fix for Glymur

  i915:
   - Avoid hang when configuring VRR [icl]
   - Fix sg_table overflow with >4GB folios
   - Fix PSR Selective Update handling
   - Fix eDP ALPM read-out sequence

  amdgpu:
   - SMU13 fix
   - SMU14 fix
   - Fixes for bringup hw testing
   - Kerneldoc fix
   - GC12 idle power fix for compute workloads
   - DCCG fixes

  amdkfd:
   - Fix missing BO unreserve in an error path

  ivpu:
   - drop unnecessary bootparams register setting

  amdxdna:
   - fix runtime/suspend resume deadlock

  bridge:
   - ti-sn65dsi83: fix DSI rounding and dual LVDS

  gud:
   - fix NULL crtc dereference on display disable"

* tag 'drm-fixes-2026-03-14' of https://gitlab.freedesktop.org/drm/kernel: (44 commits)
  drm/amd: Set num IP blocks to 0 if discovery fails
  drm/amdkfd: Unreserve bo if queue update failed
  drm/amd/display: Check for S0i3 to be done before DCCG init on DCN21
  drm/amd/display: Add missing DCCG register entries for DCN20-DCN316
  gpu: nova-core: gsp: fix UB in DmaGspMem pointer accessors
  drm/loongson: Mark driver as orphaned
  accel/amdxdna: Fix runtime suspend deadlock when there is pending job
  gpu: nova-core: fix stack overflow in GSP memory allocation
  accel/ivpu: Remove boot params address setting via MMIO register
  drm/i915/dp: Read ALPM caps after DPCD init
  drm/i915/psr: Write DSC parameters on Selective Update in ET mode
  drm/i915/dsc: Add helper for writing DSC Selective Update ET parameters
  drm/i915/dsc: Add Selective Update register definitions
  drm/i915/psr: Repeat Selective Update area alignment
  drm/i915: Fix potential overflow of shmem scatterlist length
  drm/i915/vrr: Configure VRR timings after enabling TRANS_DDI_FUNC_CTL
  drm/bridge: ti-sn65dsi83: halve horizontal syncs for dual LVDS output
  drm/bridge: ti-sn65dsi83: fix CHA_DSI_CLK_RANGE rounding
  drm/gud: fix NULL crtc dereference on display disable
  drm/sitronix/st7586: fix bad pixel data due to byte swap
  ...
2026-03-13 15:38:55 -07:00
Linus Torvalds
9abff5748e Merge tag 'wq-for-7.0-rc3-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq
Pull workqueue fixes from Tejun Heo:

 - Improve workqueue stall diagnostics: dump all busy workers (not just
   running ones), show wall-clock duration of in-flight work items, and
   add a sample module for reproducing stalls

 - Fix POOL_BH vs WQ_BH flag namespace mismatch in pr_cont_worker_id()

 - Rename pool->watchdog_ts to pool->last_progress_ts and related
   functions for clarity

* tag 'wq-for-7.0-rc3-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq:
  workqueue: Rename show_cpu_pool{s,}_hog{s,}() to reflect broadened scope
  workqueue: Add stall detector sample module
  workqueue: Show all busy workers in stall diagnostics
  workqueue: Show in-flight work item duration in stall diagnostics
  workqueue: Rename pool->watchdog_ts to pool->last_progress_ts
  workqueue: Use POOL_BH instead of WQ_BH when checking pool flags
2026-03-13 15:11:05 -07:00
Linus Torvalds
b073bcb8d4 Merge tag 'cgroup-for-7.0-rc3-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup fixes from Tejun Heo:

 - Hide PF_EXITING tasks from cgroup.procs to avoid exposing dead tasks
   that haven't been removed yet, fixing a systemd timeout issue on
   PREEMPT_RT

 - Call rebuild_sched_domains() directly in CPU hotplug instead of
   deferring to a workqueue, fixing a race where online/offline CPUs
   could briefly appear in stale sched domains

* tag 'cgroup-for-7.0-rc3-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  cgroup: Don't expose dead tasks in cgroup
  cgroup/cpuset: Call rebuild_sched_domains() directly in hotplug
2026-03-13 15:06:31 -07:00
Linus Torvalds
8369b2e97d Merge tag 'sched_ext-for-7.0-rc3-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext
Pull sched_ext fixes from Tejun Heo:

 - Fix data races flagged by KCSAN: add missing READ_ONCE()/WRITE_ONCE()
   annotations for lock-free accesses to module parameters and dsq->seq

 - Fix silent truncation of upper 32 enqueue flags (SCX_ENQ_PREEMPT and
   above) when passed through the int sched_class interface

 - Documentation updates: scheduling class precedence, task ownership
   state machine, example scheduler descriptions, config list cleanup

 - Selftest fix for format specifier and buffer length in
   file_write_long()

* tag 'sched_ext-for-7.0-rc3-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext:
  sched_ext: Use WRITE_ONCE() for the write side of scx_enable helper pointer
  sched_ext: Fix enqueue_task_scx() truncation of upper enqueue flags
  sched_ext: Documentation: Update sched-ext.rst
  sched_ext: Use READ_ONCE() for scx_slice_bypass_us in scx_bypass()
  sched_ext: Documentation: Mention scheduling class precedence
  sched_ext: Document task ownership state machine
  sched_ext: Use READ_ONCE() for lock-free reads of module param variables
  sched_ext/selftests: Fix format specifier and buffer length in file_write_long()
  sched_ext: Use WRITE_ONCE() for the write side of dsq->seq update
2026-03-13 14:54:56 -07:00
Linus Torvalds
8040dc41d2 Merge tag 'perf-tools-fixes-for-v7.0-1-2026-03-13' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools
Pull perf tools fixes from Arnaldo Carvalho de Melo:

 - Fix stale build ID in module MMAP2 records in events synthesized for
   pre-existing processes

 - Fix rust cross compilation

 - hashmap__new() error pointer return handling fixes

 - Fix off-by-one bug in outside of functions check on the disasm code

 - Update header copies of kernel headers, including prctl.h, mount.h,
   fs.h, irq_vectors.h, perf_event.h, gfp_types.h, kvm.h, cpufeatures.h
   msr-index.h, also the syscall tables files that introduced the
   'rseq_slice_yield' syscall

 - Finish removal of ETM_OPT_* on the ARM coresight support, needed to
   sync the coresight-pmu.h header with the kernel sources

 - Make in-target rule robust against too long argument error

* tag 'perf-tools-fixes-for-v7.0-1-2026-03-13' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools: (22 commits)
  perf synthetic-events: Fix stale build ID in module MMAP2 records
  perf annotate loongarch: Fix off-by-one bug in outside check
  perf ftrace: Fix hashmap__new() error checking
  perf annotate: Fix hashmap__new() error checking
  perf cs-etm: Sync coresight-pmu.h header with the kernel sources
  perf cs-etm: Finish removal of ETM_OPT_*
  tools headers UAPI: Update tools' copy of linux/coresight-pmu.h
  tools headers: Update the syscall tables and unistd.h, to support the new 'rseq_slice_yield' syscall
  perf disasm: Fix off-by-one bug in outside check
  tools arch x86: Sync msr-index.h to pick MSR_{OMR_[0-3],CORE_PERF_GLOBAL_STATUS_SET}
  tools headers UAPI: Sync x86's asm/kvm.h with the kernel sources
  tools headers x86 cpufeatures: Sync with the kernel sources
  tools headers UAPI: Sync linux/kvm.h with the kernel sources
  tools headers: Update the linux/gfp_types.h copy with the kernel sources
  perf beauty: Update the linux/perf_event.h copy with the kernel sources
  perf beauty: Update the arch/x86/include/asm/irq_vectors.h copy with the kernel sources
  perf beauty: Sync UAPI linux/fs.h with kernel sources
  perf beauty: Sync linux/mount.h copy with the kernel sources
  tools build: Fix rust cross compilation
  perf build: Prevent "argument list too long" error
  ...
2026-03-13 14:24:15 -07:00
Linus Torvalds
8d9968859c Merge tag 's390-7.0-5' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
Pull s390 fixes from Vasily Gorbik:

 - Revert IRQ entry/exit path optimization that incorrectly cleared
   some PSW bits before irqentry_exit(), causing boot failures with
   linux-next and HRTIMER_REARM_DEFERRED (which only uncovered the
   problem)

 - Fix zcrypt code to show CCA card serial numbers even when the
   default crypto domain is offline by selecting any domain available,
   preventing empty sysfs entries

* tag 's390-7.0-5' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux:
  s390/zcrypt: Enable AUTOSEL_DOM for CCA serialnr sysfs attribute
  s390: Revert "s390/irq/idle: Remove psw bits early"
2026-03-13 14:18:13 -07:00
Saurabh Sengar
b2ae73d954 MAINTAINERS: Update maintainers for Hyper-V DRM driver
Add myself, Dexuan, and Long as maintainers. Deepak is stepping down
from these responsibilities.

Signed-off-by: Saurabh Sengar <ssengar@linux.microsoft.com>
Signed-off-by: Wei Liu <wei.liu@kernel.org>
2026-03-13 21:12:33 +00:00
Stanislav Kinsburskii
6922db2504 mshv: Fix use-after-free in mshv_map_user_memory error path
In the error path of mshv_map_user_memory(), calling vfree() directly on
the region leaves the MMU notifier registered. When userspace later unmaps
the memory, the notifier fires and accesses the freed region, causing a
use-after-free and potential kernel panic.

Replace vfree() with mshv_partition_put() to properly unregister
the MMU notifier before freeing the region.

Fixes: b9a66cd5cc ("mshv: Add support for movable memory regions")
Signed-off-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
Signed-off-by: Wei Liu <wei.liu@kernel.org>
2026-03-13 21:11:18 +00:00
Linus Torvalds
2c361c9b7f Merge tag 'ceph-for-7.0-rc4' of https://github.com/ceph/ceph-client
Pull ceph fixes from Ilya Dryomov:
 "A small pile of CephFS and messenger bug fixes, all marked for stable"

* tag 'ceph-for-7.0-rc4' of https://github.com/ceph/ceph-client:
  libceph: Fix potential out-of-bounds access in ceph_handle_auth_reply()
  libceph: Use u32 for non-negative values in ceph_monmap_decode()
  MAINTAINERS: update email address of Dongsheng Yang
  libceph: reject preamble if control segment is empty
  libceph: admit message frames only in CEPH_CON_S_OPEN state
  libceph: prevent potential out-of-bounds reads in process_message_header()
  ceph: do not skip the first folio of the next object in writeback
  ceph: fix memory leaks in ceph_mdsc_build_path()
  ceph: add a bunch of missing ceph_path_info initializers
  ceph: fix i_nlink underrun during async unlink
2026-03-13 14:03:58 -07:00
Linus Torvalds
399af66228 Merge tag 'xfs-fixes-7.0-rc4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull xfs fixes from Carlos Maiolino:
 "A couple race fixes found on the new healthmon mechanism, and another
  flushing dquots during filesystem shutdown"

* tag 'xfs-fixes-7.0-rc4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  xfs: fix integer overflow in bmap intent sort comparator
  xfs: fix undersized l_iclog_roundoff values
  xfs: ensure dquot item is deleted from AIL only after log shutdown
  xfs: remove redundant set null for ip->i_itemp
  xfs: fix returned valued from xfs_defer_can_append
  xfs: Remove redundant NULL check after __GFP_NOFAIL
  xfs: fix race between healthmon unmount and read_iter
  xfs: remove scratch field from struct xfs_gc_bio
2026-03-13 10:49:15 -07:00
Linus Torvalds
d874ca0522 Merge tag 'v7.0-rc3-smb3-client-fixes' of git://git.samba.org/sfrench/cifs-2.6
Pull smb client fixes from Steve French:
 - Fix reconnect when using non-default port
 - Fix default retransmission behavior
 - Fix open handle reuse in cifs_open
 - Fix export for smb2-mapperror-test
 - Fix potential corruption on write retry
 - Fix potentially uninitialized superblock flags
 - Fix missing O_DIRECT and O_SYNC flags on create

* tag 'v7.0-rc3-smb3-client-fixes' of git://git.samba.org/sfrench/cifs-2.6:
  cifs: make default value of retrans as zero
  smb: client: fix open handle lookup in cifs_open()
  smb: client: fix iface port assignment in parse_server_interfaces
  smb/client: only export symbol for 'smb2maperror-test' module
  smb: client: fix in-place encryption corruption in SMB2_write()
  smb: client: fix sbflags initialization
  smb: client: fix atomic open with O_DIRECT & O_SYNC
2026-03-13 10:46:32 -07:00
Linus Torvalds
b36eb6e3f5 Merge tag 'spi-fix-v7.0-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi
Pull spi fixes from Mark Brown:
 "A couple of device ID and quirk updates, plus a bunch of small fixes
  most of which (other than the Cadence one) are unremarkable error
  handling fixes"

* tag 'spi-fix-v7.0-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi:
  spi: atcspi200: Handle invalid buswidth and fix compiler warning
  spi: dt-bindings: sun6i: Allow Dual SPI and Quad SPI for newer SoCs
  spi: intel-pci: Add support for Nova Lake mobile SPI flash
  spi: cadence-qspi: Fix requesting of APB and AHB clocks on JH7110
  spi: rockchip-sfc: Fix double-free in remove() callback
  spi: atcspi200: Fix double-free in atcspi_configure_dma()
  spi: amlogic: spifc-a4: Fix DMA mapping error handling
2026-03-13 10:31:10 -07:00
Linus Torvalds
ff30ea1fb1 Merge tag 'regulator-fix-v7.0-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator
Pull regulator fixes from Mark Brown:
 "A couple of small driver specific fixes for pca9450, cleaning up
  logging and fixing warnings due to confusion with interrupt type"

* tag 'regulator-fix-v7.0-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator:
  regulator: pca9450: Correct probed name for PCA9452
  regulator: pca9450: Correct interrupt type
2026-03-13 10:29:45 -07:00
Vyacheslav Vahnenko
d0d9b1f4f5 USB: ezcap401 needs USB_QUIRK_NO_BOS to function on 10gbs usb speed
Add USB_QUIRK_NO_BOS for ezcap401 capture card, without it dmesg will show
"unable to get BOS descriptor or descriptor too short" and "unable to
read config index 0 descriptor/start: -71" errors and device will not
able to work at full speed at 10gbs

Signed-off-by: Vyacheslav Vahnenko <vahnenko2003@gmail.com>
Cc: stable <stable@kernel.org>
Link: https://patch.msgid.link/20260313123638.20481-1-vahnenko2003@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-13 18:19:07 +01:00
Linus Torvalds
56cf10db2a Merge tag 'sound-7.0-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound
Pull sound fixes from Takashi Iwai:
 "There have been continuous flux but most of them are device-specific
  small fixes, while we see a few core fixes at this time (minor PCM fix
  for linked streams and a few ASoC core fixes for delayed work, etc)

  Core:
   - PCM: Fix use-after-free in linked stream drain

  ASoC:
   - core: Fixes for delayed works, empty DMI string handling and DT overlay
   - qcom: qdsp6: Fix ADSP stop/start crash via component removal ordering
   - tegra: Add support for Tegra238 audio graph card
   - amd: Fix missing error checks for clock acquisition
   - rt1011: Fix incorrect DAPM context retrieval helper

  HD-audio:
   - Add quirk for Gigabyte H610M, ASUS UM6702RC, HP 14s-dr5xxx, and
     ThinkPad X390

  USB-audio:
   - Scarlett2: Fix NULL dereference for malformed endpoint descriptors
   - Add quirk for SPACETOUCH"

* tag 'sound-7.0-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound:
  ASoC: amd: acp-mach-common: Add missing error check for clock acquisition
  ASoC: detect empty DMI strings
  ASoC: amd: acp3x-rt5682-max9836: Add missing error check for clock acquisition
  ALSA: usb-audio: Add iface reset and delay quirk for SPACETOUCH USB Audio
  ASoC: codecs: rt1011: Use component to get the dapm context in spk_mode_put
  ALSA: usb-audio: Check endpoint numbers at parsing Scarlett2 mixer interfaces
  ASoC: simple-card-utils: fix graph_util_is_ports0() for DT overlays
  ASoC: soc-core: flush delayed work before removing DAIs and widgets
  ASoC: soc-core: drop delayed_work_pending() check before flush
  ASoC: tegra: Add support for Tegra238 soundcard
  ALSA: hda/realtek: Add headset jack quirk for Thinkpad X390
  ALSA: hda/realtek: add HP Laptop 14s-dr5xxx mute LED quirk
  ALSA: hda/realtek: add quirk for ASUS UM6702RC
  ALSA: pcm: fix use-after-free on linked stream runtime in snd_pcm_drain()
  ALSA: hda/realtek: Add quirk for Gigabyte Technology to fix headphone
  firmware: cs_dsp: Fix fragmentation regression in firmware download
  ASoC: qcom: qdsp6: Fix q6apm remove ordering during ADSP stop and start
2026-03-13 10:15:14 -07:00
Linus Torvalds
73548503dc Merge tag 'block-7.0-20260312' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux
Pull block fixes from Jens Axboe:

 - NVMe pull request via Keith:
      - Fix nvme-pci IRQ race and slab-out-of-bounds access
      - Fix recursive workqueue locking for target async events
      - Various cleanups

 - Fix a potential NULL pointer dereference in ublk on size setting

 - ublk automatic partition scanning fix

 - Two s390 dasd fixes

* tag 'block-7.0-20260312' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux:
  nvme: Annotate struct nvme_dhchap_key with __counted_by
  nvme-core: do not pass empty queue_limits to blk_mq_alloc_queue()
  nvme-pci: Fix race bug in nvme_poll_irqdisable()
  nvmet: move async event work off nvmet-wq
  nvme-pci: Fix slab-out-of-bounds in nvme_dbbuf_set
  s390/dasd: Copy detected format information to secondary device
  s390/dasd: Move quiesce state with pprc swap
  ublk: don't clear GD_SUPPRESS_PART_SCAN for unprivileged daemons
  ublk: fix NULL pointer dereference in ublk_ctrl_set_size()
2026-03-13 10:13:06 -07:00
Linus Torvalds
e67bf352a0 Merge tag 'io_uring-7.0-20260312' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux
Pull io_uring fixes from Jens Axboe:

 - Fix an inverted true/false comment on task_no_new_privs, from the
   BPF filtering changes merged in this release

 - Use the migration disabling way of running the BPF filters, as the
   io_uring side doesn't do that already

 - Fix an issue with ->rings stability under resize, both for local
   task_work additions and for eventfd signaling

 - Fix an issue with SQE mixed mode, where a bounds check wasn't correct
   for having a 128b SQE

 - Fix an issue where a legacy provided buffer group is changed to to
   ring mapped one while legacy buffers from that group are in flight

* tag 'io_uring-7.0-20260312' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux:
  io_uring/kbuf: check if target buffer list is still legacy on recycle
  io_uring: fix physical SQE bounds check for SQE_MIXED 128-byte ops
  io_uring/eventfd: use ctx->rings_rcu for flags checking
  io_uring: ensure ctx->rings is stable for task work flags manipulation
  io_uring/bpf_filter: use bpf_prog_run_pin_on_cpu() to prevent migration
  io_uring/register: fix comment about task_no_new_privs
2026-03-13 10:09:35 -07:00
Linus Torvalds
8174dafb2d Merge tag 'slab-for-7.0-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab
Pull slab fixes from Vlastimil Babka:

 - Fix for a memory leak that can occur when already so low on memory
   that we can't allocate a new slab anymore (Qing Wang)

 - Fix for a case where slabobj_ext array for a slab might be allocated
   from the same slab, making it permanently non-freeable (Harry Yoo)

* tag 'slab-for-7.0-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab:
  slab: fix memory leak when refill_sheaf() fails
  mm/slab: fix an incorrect check in obj_exts_alloc_size()
2026-03-13 10:07:33 -07:00
Linus Torvalds
92e989acfb Merge tag 'pwrseq-fixes-for-v7.0-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux
Pull power sequencing fix from Bartosz Golaszewski:

 - fix OF-node reference leak in pwrseq-pcie-m2

* tag 'pwrseq-fixes-for-v7.0-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux:
  power: sequencing: pcie-m2: Fix device node reference leak in probe
2026-03-13 10:06:00 -07:00
Jenny Guanni Qu
f173d0f4c0 netfilter: nf_conntrack_h323: check for zero length in DecodeQ931()
In DecodeQ931(), the UserUserIE code path reads a 16-bit length from
the packet, then decrements it by 1 to skip the protocol discriminator
byte before passing it to DecodeH323_UserInformation(). If the encoded
length is 0, the decrement wraps to -1, which is then passed as a
large value to the decoder, leading to an out-of-bounds read.

Add a check to ensure len is positive after the decrement.

Fixes: 5e35941d99 ("[NETFILTER]: Add H.323 conntrack/NAT helper")
Reported-by: Klaudia Kloc <klaudia@vidocsecurity.com>
Reported-by: Dawid Moczadło <dawid@vidocsecurity.com>
Tested-by: Jenny Guanni Qu <qguanni@gmail.com>
Signed-off-by: Jenny Guanni Qu <qguanni@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
2026-03-13 15:31:15 +01:00
Jenny Guanni Qu
00050ec08c netfilter: xt_time: use unsigned int for monthday bit shift
The monthday field can be up to 31, and shifting a signed integer 1
by 31 positions (1 << 31) is undefined behavior in C, as the result
overflows a 32-bit signed int. Use 1U to ensure well-defined behavior
for all valid monthday values.

Change the weekday shift to 1U as well for consistency.

Fixes: ee4411a1b1 ("[NETFILTER]: x_tables: add xt_time match")
Reported-by: Klaudia Kloc <klaudia@vidocsecurity.com>
Reported-by: Dawid Moczadło <dawid@vidocsecurity.com>
Tested-by: Jenny Guanni Qu <qguanni@gmail.com>
Signed-off-by: Jenny Guanni Qu <qguanni@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
2026-03-13 15:31:15 +01:00
Pablo Neira Ayuso
f62a218a94 netfilter: xt_CT: drop pending enqueued packets on template removal
Templates refer to objects that can go away while packets are sitting in
nfqueue refer to:

- helper, this can be an issue on module removal.
- timeout policy, nfnetlink_cttimeout might remove it.

The use of templates with zone and event cache filter are safe, since
this just copies values.

Flush these enqueued packets in case the template rule gets removed.

Fixes: 24de58f465 ("netfilter: xt_CT: allow to attach timeout policy + glue code")
Reported-by: Yiming Qian <yimingqian591@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
2026-03-13 15:31:15 +01:00
Pablo Neira Ayuso
36eae0956f netfilter: nft_ct: drop pending enqueued packets on removal
Packets sitting in nfqueue might hold a reference to:

- templates that specify the conntrack zone, because a percpu area is
  used and module removal is possible.
- conntrack timeout policies and helper, where object removal leave
  a stale reference.

Since these objects can just go away, drop enqueued packets to avoid
stale reference to them.

If there is a need for finer grain removal, this logic can be revisited
to make selective packet drop upon dependencies.

Fixes: 7e0b2b57f0 ("netfilter: nft_ct: add ct timeout support")
Reported-by: Yiming Qian <yimingqian591@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
2026-03-13 15:31:15 +01:00
Pablo Neira Ayuso
0548a13b5a nf_tables: nft_dynset: fix possible stateful expression memleak in error path
If cloning the second stateful expression in the element via GFP_ATOMIC
fails, then the first stateful expression remains in place without being
released.

   unreferenced object (percpu) 0x607b97e9cab8 (size 16):
     comm "softirq", pid 0, jiffies 4294931867
     hex dump (first 16 bytes on cpu 3):
       00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
     backtrace (crc 0):
       pcpu_alloc_noprof+0x453/0xd80
       nft_counter_clone+0x9c/0x190 [nf_tables]
       nft_expr_clone+0x8f/0x1b0 [nf_tables]
       nft_dynset_new+0x2cb/0x5f0 [nf_tables]
       nft_rhash_update+0x236/0x11c0 [nf_tables]
       nft_dynset_eval+0x11f/0x670 [nf_tables]
       nft_do_chain+0x253/0x1700 [nf_tables]
       nft_do_chain_ipv4+0x18d/0x270 [nf_tables]
       nf_hook_slow+0xaa/0x1e0
       ip_local_deliver+0x209/0x330

Fixes: 563125a73a ("netfilter: nftables: generalize set extension to support for several expressions")
Reported-by: Gurpreet Shergill <giki.shergill@proton.me>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
2026-03-13 15:31:15 +01:00
Jenny Guanni Qu
1e3a359316 netfilter: nf_conntrack_h323: fix OOB read in decode_int() CONS case
In decode_int(), the CONS case calls get_bits(bs, 2) to read a length
value, then calls get_uint(bs, len) without checking that len bytes
remain in the buffer. The existing boundary check only validates the
2 bits for get_bits(), not the subsequent 1-4 bytes that get_uint()
reads. This allows a malformed H.323/RAS packet to cause a 1-4 byte
slab-out-of-bounds read.

Add a boundary check for len bytes after get_bits() and before
get_uint().

Fixes: 5e35941d99 ("[NETFILTER]: Add H.323 conntrack/NAT helper")
Reported-by: Klaudia Kloc <klaudia@vidocsecurity.com>
Reported-by: Dawid Moczadło <dawid@vidocsecurity.com>
Signed-off-by: Jenny Guanni Qu <qguanni@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
2026-03-13 15:31:15 +01:00
Eric Woudstra
a3aca98aec netfilter: nf_flow_table_ip: reset mac header before vlan push
With double vlan tagged packets in the fastpath, getting the error:

skb_vlan_push got skb with skb->data not at mac header (offset 18)

Call skb_reset_mac_header() before calling skb_vlan_push().

Fixes: c653d5a78f ("netfilter: flowtable: inline vlan encapsulation in xmit path")
Signed-off-by: Eric Woudstra <ericwouds@gmail.com>
Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
2026-03-13 15:31:15 +01:00
Florian Westphal
598adea720 netfilter: revert nft_set_rbtree: validate open interval overlap
This reverts commit 648946966a ("netfilter: nft_set_rbtree: validate
open interval overlap").

There have been reports of nft failing to laod valid rulesets after this
patch was merged into -stable.

I can reproduce several such problem with recent nft versions, including
nft 1.1.6 which is widely shipped by distributions.

We currently have little choice here.
This commit can be resurrected at some point once the nftables fix that
triggers the false overlap positive has appeared in common distros
(see e83e32c8d1cd ("mnl: restore create element command with large batches" in
 nftables.git).

Fixes: 648946966a ("netfilter: nft_set_rbtree: validate open interval overlap")
Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
2026-03-13 15:31:14 +01:00
Lukas Johannes Möller
fbce58e719 netfilter: nf_conntrack_sip: fix Content-Length u32 truncation in sip_help_tcp()
sip_help_tcp() parses the SIP Content-Length header with
simple_strtoul(), which returns unsigned long, but stores the result in
unsigned int clen.  On 64-bit systems, values exceeding UINT_MAX are
silently truncated before computing the SIP message boundary.

For example, Content-Length 4294967328 (2^32 + 32) is truncated to 32,
causing the parser to miscalculate where the current message ends.  The
loop then treats trailing data in the TCP segment as a second SIP
message and processes it through the SDP parser.

Fix this by changing clen to unsigned long to match the return type of
simple_strtoul(), and reject Content-Length values that exceed the
remaining TCP payload length.

Fixes: f5b321bd37 ("netfilter: nf_conntrack_sip: add TCP support")
Signed-off-by: Lukas Johannes Möller <research@johannes-moeller.dev>
Signed-off-by: Florian Westphal <fw@strlen.de>
2026-03-13 15:31:14 +01:00
Florian Westphal
f900e1d77e netfilter: conntrack: add missing netlink policy validations
Hyunwoo Kim reports out-of-bounds access in sctp and ctnetlink.

These attributes are used by the kernel without any validation.
Extend the netlink policies accordingly.

Quoting the reporter:
  nlattr_to_sctp() assigns the user-supplied CTA_PROTOINFO_SCTP_STATE
  value directly to ct->proto.sctp.state without checking that it is
  within the valid range. [..]

  and: ... with exp->dir = 100, the access at
  ct->master->tuplehash[100] reads 5600 bytes past the start of a
  320-byte nf_conn object, causing a slab-out-of-bounds read confirmed by
  UBSAN.

Fixes: 076a0ca026 ("netfilter: ctnetlink: add NAT support for expectations")
Fixes: a258860e01 ("netfilter: ctnetlink: add full support for SCTP to ctnetlink")
Reported-by: Hyunwoo Kim <imv4bel@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
2026-03-13 15:31:14 +01:00
Hyunwoo Kim
5cb81eeda9 netfilter: ctnetlink: fix use-after-free in ctnetlink_dump_exp_ct()
ctnetlink_dump_exp_ct() stores a conntrack pointer in cb->data for the
netlink dump callback ctnetlink_exp_ct_dump_table(), but drops the
conntrack reference immediately after netlink_dump_start().  When the
dump spans multiple rounds, the second recvmsg() triggers the dump
callback which dereferences the now-freed conntrack via nfct_help(ct),
leading to a use-after-free on ct->ext.

The bug is that the netlink_dump_control has no .start or .done
callbacks to manage the conntrack reference across dump rounds.  Other
dump functions in the same file (e.g. ctnetlink_get_conntrack) properly
use .start/.done callbacks for this purpose.

Fix this by adding .start and .done callbacks that hold and release the
conntrack reference for the duration of the dump, and move the
nfct_help() call after the cb->args[0] early-return check in the dump
callback to avoid dereferencing ct->ext unnecessarily.

 BUG: KASAN: slab-use-after-free in ctnetlink_exp_ct_dump_table+0x4f/0x2e0
 Read of size 8 at addr ffff88810597ebf0 by task ctnetlink_poc/133

 CPU: 1 UID: 0 PID: 133 Comm: ctnetlink_poc Not tainted 7.0.0-rc2+ #3 PREEMPTLAZY
 Call Trace:
  <TASK>
  ctnetlink_exp_ct_dump_table+0x4f/0x2e0
  netlink_dump+0x333/0x880
  netlink_recvmsg+0x3e2/0x4b0
  ? aa_sk_perm+0x184/0x450
  sock_recvmsg+0xde/0xf0

 Allocated by task 133:
  kmem_cache_alloc_noprof+0x134/0x440
  __nf_conntrack_alloc+0xa8/0x2b0
  ctnetlink_create_conntrack+0xa1/0x900
  ctnetlink_new_conntrack+0x3cf/0x7d0
  nfnetlink_rcv_msg+0x48e/0x510
  netlink_rcv_skb+0xc9/0x1f0
  nfnetlink_rcv+0xdb/0x220
  netlink_unicast+0x3ec/0x590
  netlink_sendmsg+0x397/0x690
  __sys_sendmsg+0xf4/0x180

 Freed by task 0:
  slab_free_after_rcu_debug+0xad/0x1e0
  rcu_core+0x5c3/0x9c0

Fixes: e844a92843 ("netfilter: ctnetlink: allow to dump expectation per master conntrack")
Signed-off-by: Hyunwoo Kim <imv4bel@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
2026-03-13 15:31:14 +01:00
Krzysztof Kozlowski
fb75437b44 arm_mpam: Force __iomem casts
Code allocates standard kernel memory to pass to the MPAM, which expects
__iomem.  The code is safe, because __iomem accessors should work fine
on kernel mapped memory, however leads to sparse warnings:

  test_mpam_devices.c:327:42: warning: incorrect type in initializer (different address spaces)
  test_mpam_devices.c:327:42:    expected char [noderef] __iomem *buf
  test_mpam_devices.c:327:42:    got void *
  test_mpam_devices.c:342:24: warning: cast removes address space '__iomem' of expression

Cast the pointer to memory via __force to silence them.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202512160133.eAzPdJv2-lkp@intel.com/
Acked-by: Ben Horgan <ben.horgan@arm.com>
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Will Deacon <will@kernel.org>
2026-03-13 14:17:30 +00:00
Ben Horgan
c1376f1ff3 arm_mpam: Disable preemption when making accesses to fake MSC in kunit test
Accesses to MSC must be made from a cpu that is affine to that MSC and the
driver checks this in __mpam_write_reg() using smp_processor_id(). A fake
in-memory MSC is used for testing. When using that, it doesn't matter which
cpu we access it from but calling smp_processor_id() from a preemptible
context gives warnings when running with CONFIG_DEBUG_PREEMPT.

Add a test helper that wraps mpam_reset_msc_bitmap() with preemption
disabled to ensure all (fake) MSC accesses are made with preemption
disabled.

Signed-off-by: Ben Horgan <ben.horgan@arm.com>
Reviewed-by: James Morse <james.morse@arm.com>
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Will Deacon <will@kernel.org>
2026-03-13 14:17:30 +00:00
Ben Horgan
4ad79c874e arm_mpam: Fix null pointer dereference when restoring bandwidth counters
When an MSC supporting memory bandwidth monitoring is brought offline and
then online, mpam_restore_mbwu_state() calls __ris_msmon_read() via ipi to
restore the configuration of the bandwidth counters. It doesn't care about
the value read, mbwu_arg.val, and doesn't set it leading to a null pointer
dereference when __ris_msmon_read() adds to it. This results in a kernel
oops with a call trace such as:

Call trace:
__ris_msmon_read+0x19c/0x64c (P)
mpam_restore_mbwu_state+0xa0/0xe8
smp_call_on_cpu_callback+0x1c/0x38
process_one_work+0x154/0x4b4
worker_thread+0x188/0x310
kthread+0x11c/0x130
ret_from_fork+0x10/0x20

Provide a local variable for val to avoid __ris_msmon_read() dereferencing
a null pointer when adding to val.

Fixes: 41e8a14950 ("arm_mpam: Track bandwidth counter state for power management")
Signed-off-by: Ben Horgan <ben.horgan@arm.com>
Reviewed-by: James Morse <james.morse@arm.com>
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Will Deacon <will@kernel.org>
2026-03-13 14:17:30 +00:00
Pepper Gray
d499e9627d arm64/scs: Fix handling of advance_loc4
DW_CFA_advance_loc4 is defined but no handler is implemented. Its
CFA opcode defaults to EDYNSCS_INVALID_CFA_OPCODE triggering an
error which wrongfully prevents modules from loading.

Link: https://bugs.gentoo.org/971060
Signed-off-by: Pepper Gray <hello@peppergray.xyz>
Signed-off-by: Will Deacon <will@kernel.org>
2026-03-13 14:15:34 +00:00
Masami Hiramatsu (Google)
5ef268cb7a kprobes: Remove unneeded warnings from __arm_kprobe_ftrace()
Remove unneeded warnings for handled errors from __arm_kprobe_ftrace()
because all caller handled the error correctly.

Link: https://lore.kernel.org/all/177261531182.1312989.8737778408503961141.stgit@mhiramat.tok.corp.google.com/

Reported-by: Zw Tang <shicenci@gmail.com>
Closes: https://lore.kernel.org/all/CAPHJ_V+J6YDb_wX2nhXU6kh466Dt_nyDSas-1i_Y8s7tqY-Mzw@mail.gmail.com/
Fixes: 9c89bb8e32 ("kprobes: treewide: Cleanup the error messages for kprobes")
Cc: stable@vger.kernel.org
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
2026-03-13 23:15:26 +09:00
Masami Hiramatsu (Google)
e113f0b46d kprobes: avoid crash when rmmod/insmod after ftrace killed
After we hit ftrace is killed by some errors, the kernel crash if
we remove modules in which kprobe probes.

BUG: unable to handle page fault for address: fffffbfff805000d
PGD 817fcc067 P4D 817fcc067 PUD 817fc8067 PMD 101555067 PTE 0
Oops: Oops: 0000 [#1] SMP KASAN PTI
CPU: 4 UID: 0 PID: 2012 Comm: rmmod Tainted: G        W  OE
Tainted: [W]=WARN, [O]=OOT_MODULE, [E]=UNSIGNED_MODULE
RIP: 0010:kprobes_module_callback+0x89/0x790
RSP: 0018:ffff88812e157d30 EFLAGS: 00010a02
RAX: 1ffffffff805000d RBX: dffffc0000000000 RCX: ffffffff86a8de90
RDX: ffffed1025c2af9b RSI: 0000000000000008 RDI: ffffffffc0280068
RBP: 0000000000000000 R08: 0000000000000001 R09: ffffed1025c2af9a
R10: ffff88812e157cd7 R11: 205d323130325420 R12: 0000000000000002
R13: ffffffffc0290488 R14: 0000000000000002 R15: ffffffffc0280040
FS:  00007fbc450dd740(0000) GS:ffff888420331000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: fffffbfff805000d CR3: 000000010f624000 CR4: 00000000000006f0
Call Trace:
 <TASK>
 notifier_call_chain+0xc6/0x280
 blocking_notifier_call_chain+0x60/0x90
 __do_sys_delete_module.constprop.0+0x32a/0x4e0
 do_syscall_64+0x5d/0xfa0
 entry_SYSCALL_64_after_hwframe+0x76/0x7e

This is because the kprobe on ftrace does not correctly handles
the kprobe_ftrace_disabled flag set by ftrace_kill().

To prevent this error, check kprobe_ftrace_disabled in
__disarm_kprobe_ftrace() and skip all ftrace related operations.

Link: https://lore.kernel.org/all/176473947565.1727781.13110060700668331950.stgit@mhiramat.tok.corp.google.com/

Reported-by: Ye Bin <yebin10@huawei.com>
Closes: https://lore.kernel.org/all/20251125020536.2484381-1-yebin@huaweicloud.com/
Fixes: ae6aa16fdc ("kprobes: introduce ftrace based optimization")
Cc: stable@vger.kernel.org
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Acked-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2026-03-13 23:14:14 +09:00
Deepanshu Kartikey
7d73872d94 wifi: mac80211: check tdls flag in ieee80211_tdls_oper
When NL80211_TDLS_ENABLE_LINK is called, the code only checks if the
station exists but not whether it is actually a TDLS station. This
allows the operation to proceed for non-TDLS stations, causing
unintended side effects like modifying channel context and HT
protection before failing.

Add a check for sta->sta.tdls early in the ENABLE_LINK case, before
any side effects occur, to ensure the operation is only allowed for
actual TDLS peers.

Reported-by: syzbot+56b6a844a4ea74487b7b@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=56b6a844a4ea74487b7b
Tested-by: syzbot+56b6a844a4ea74487b7b@syzkaller.appspotmail.com
Suggested-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Deepanshu Kartikey <kartikey406@gmail.com>
Link: https://patch.msgid.link/20260313092417.520807-1-kartikey406@gmail.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2026-03-13 14:51:38 +01:00
ZhengYuan Huang
fc1cd1f18c btrfs: tree-checker: fix misleading root drop_level error message
Fix tree-checker error message to report "invalid root drop_level"
instead of the misleading "invalid root level".

Fixes: 259ee7754b ("btrfs: tree-checker: Add ROOT_ITEM check")
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: ZhengYuan Huang <gality369@gmail.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-03-13 12:49:25 +01:00
Filipe Manana
9573a365ff btrfs: log new dentries when logging parent dir of a conflicting inode
If we log the parent directory of a conflicting inode, we are not logging
the new dentries of the directory, so when we finish we have the parent
directory's inode marked as logged but we did not log its new dentries.
As a consequence if the parent directory is explicitly fsynced later and
it does not have any new changes since we logged it, the fsync is a no-op
and after a power failure the new dentries are missing.

Example scenario:

  $ mkdir foo

  $ sync

  $rmdir foo

  $ mkdir dir1
  $ mkdir dir2

  # A file with the same name and parent as the directory we just deleted
  # and was persisted in a past transaction. So the deleted directory's
  # inode is a conflicting inode of this new file's inode.
  $ touch foo

  $ ln foo dir2/link

  # The fsync on dir2 will log the parent directory (".") because the
  # conflicting inode (deleted directory) does not exists anymore, but it
  # it does not log its new dentries (dir1).
  $ xfs_io -c "fsync" dir2

  # This fsync on the parent directory is no-op, since the previous fsync
  # logged it (but without logging its new dentries).
  $ xfs_io -c "fsync" .

  <power failure>

  # After log replay dir1 is missing.

Fix this by ensuring we log new dir dentries whenever we log the parent
directory of a no longer existing conflicting inode.

A test case for fstests will follow soon.

Reported-by: Vyacheslav Kovalevsky <slava.kovalevskiy.2014@gmail.com>
Link: https://lore.kernel.org/linux-btrfs/182055fa-e9ce-4089-9f5f-4b8a23e8dd91@gmail.com/
Fixes: a3baaf0d78 ("Btrfs: fix fsync after succession of renames and unlink/rmdir")
Reviewed-by: Boris Burkov <boris@bur.io>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-03-13 12:48:59 +01:00
Johannes Thumshirn
77603ab104 btrfs: don't take device_list_mutex when querying zone info
Shin'ichiro reported sporadic hangs when running generic/013 in our CI
system. When enabling lockdep, there is a lockdep splat when calling
btrfs_get_dev_zone_info_all_devices() in the mount path that can be
triggered by i.e. generic/013:

  ======================================================
  WARNING: possible circular locking dependency detected
  7.0.0-rc1+ #355 Not tainted
  ------------------------------------------------------
  mount/1043 is trying to acquire lock:
  ffff8881020b5470 (&vblk->vdev_mutex){+.+.}-{4:4}, at: virtblk_report_zones+0xda/0x430

  but task is already holding lock:
  ffff888102a738e0 (&fs_devs->device_list_mutex){+.+.}-{4:4}, at: btrfs_get_dev_zone_info_all_devices+0x45/0x90

  which lock already depends on the new lock.

  the existing dependency chain (in reverse order) is:

  -> #4 (&fs_devs->device_list_mutex){+.+.}-{4:4}:
	 __mutex_lock+0xa3/0x1360
	 btrfs_create_pending_block_groups+0x1f4/0x9d0
	 __btrfs_end_transaction+0x3e/0x2e0
	 btrfs_zoned_reserve_data_reloc_bg+0x2f8/0x390
	 open_ctree+0x1934/0x23db
	 btrfs_get_tree.cold+0x105/0x26c
	 vfs_get_tree+0x28/0xb0
	 __do_sys_fsconfig+0x324/0x680
	 do_syscall_64+0x92/0x4f0
	 entry_SYSCALL_64_after_hwframe+0x76/0x7e

  -> #3 (btrfs_trans_num_extwriters){++++}-{0:0}:
	 join_transaction+0xc2/0x5c0
	 start_transaction+0x17c/0xbc0
	 btrfs_zoned_reserve_data_reloc_bg+0x2b4/0x390
	 open_ctree+0x1934/0x23db
	 btrfs_get_tree.cold+0x105/0x26c
	 vfs_get_tree+0x28/0xb0
	 __do_sys_fsconfig+0x324/0x680
	 do_syscall_64+0x92/0x4f0
	 entry_SYSCALL_64_after_hwframe+0x76/0x7e

  -> #2 (btrfs_trans_num_writers){++++}-{0:0}:
	 lock_release+0x163/0x4b0
	 __btrfs_end_transaction+0x1c7/0x2e0
	 btrfs_dirty_inode+0x6f/0xd0
	 touch_atime+0xe5/0x2c0
	 btrfs_file_mmap_prepare+0x65/0x90
	 __mmap_region+0x4b9/0xf00
	 mmap_region+0xf7/0x120
	 do_mmap+0x43d/0x610
	 vm_mmap_pgoff+0xd6/0x190
	 ksys_mmap_pgoff+0x7e/0xc0
	 do_syscall_64+0x92/0x4f0
	 entry_SYSCALL_64_after_hwframe+0x76/0x7e

  -> #1 (&mm->mmap_lock){++++}-{4:4}:
	 __might_fault+0x68/0xa0
	 _copy_to_user+0x22/0x70
	 blkdev_copy_zone_to_user+0x22/0x40
	 virtblk_report_zones+0x282/0x430
	 blkdev_report_zones_ioctl+0xfd/0x130
	 blkdev_ioctl+0x20f/0x2c0
	 __x64_sys_ioctl+0x86/0xd0
	 do_syscall_64+0x92/0x4f0
	 entry_SYSCALL_64_after_hwframe+0x76/0x7e

  -> #0 (&vblk->vdev_mutex){+.+.}-{4:4}:
	 __lock_acquire+0x1522/0x2680
	 lock_acquire+0xd5/0x2f0
	 __mutex_lock+0xa3/0x1360
	 virtblk_report_zones+0xda/0x430
	 blkdev_report_zones_cached+0x162/0x190
	 btrfs_get_dev_zones+0xdc/0x2e0
	 btrfs_get_dev_zone_info+0x219/0xe80
	 btrfs_get_dev_zone_info_all_devices+0x62/0x90
	 open_ctree+0x1200/0x23db
	 btrfs_get_tree.cold+0x105/0x26c
	 vfs_get_tree+0x28/0xb0
	 __do_sys_fsconfig+0x324/0x680
	 do_syscall_64+0x92/0x4f0
	 entry_SYSCALL_64_after_hwframe+0x76/0x7e

  other info that might help us debug this:

  Chain exists of:
    &vblk->vdev_mutex --> btrfs_trans_num_extwriters --> &fs_devs->device_list_mutex

   Possible unsafe locking scenario:

	 CPU0                    CPU1
	 ----                    ----
    lock(&fs_devs->device_list_mutex);
				 lock(btrfs_trans_num_extwriters);
				 lock(&fs_devs->device_list_mutex);
    lock(&vblk->vdev_mutex);

   *** DEADLOCK ***

  3 locks held by mount/1043:
   #0: ffff88811063e878 (&fc->uapi_mutex){+.+.}-{4:4}, at: __do_sys_fsconfig+0x2ae/0x680
   #1: ffff88810cb9f0e8 (&type->s_umount_key#31/1){+.+.}-{4:4}, at: alloc_super+0xc0/0x3e0
   #2: ffff888102a738e0 (&fs_devs->device_list_mutex){+.+.}-{4:4}, at: btrfs_get_dev_zone_info_all_devices+0x45/0x90

  stack backtrace:
  CPU: 2 UID: 0 PID: 1043 Comm: mount Not tainted 7.0.0-rc1+ #355 PREEMPT(full)
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.17.0-9.fc43 06/10/2025
  Call Trace:
   <TASK>
   dump_stack_lvl+0x5b/0x80
   print_circular_bug.cold+0x18d/0x1d8
   check_noncircular+0x10d/0x130
   __lock_acquire+0x1522/0x2680
   ? vmap_small_pages_range_noflush+0x3ef/0x820
   lock_acquire+0xd5/0x2f0
   ? virtblk_report_zones+0xda/0x430
   ? lock_is_held_type+0xcd/0x130
   __mutex_lock+0xa3/0x1360
   ? virtblk_report_zones+0xda/0x430
   ? virtblk_report_zones+0xda/0x430
   ? __pfx_copy_zone_info_cb+0x10/0x10
   ? virtblk_report_zones+0xda/0x430
   virtblk_report_zones+0xda/0x430
   ? __pfx_copy_zone_info_cb+0x10/0x10
   blkdev_report_zones_cached+0x162/0x190
   ? __pfx_copy_zone_info_cb+0x10/0x10
   btrfs_get_dev_zones+0xdc/0x2e0
   btrfs_get_dev_zone_info+0x219/0xe80
   btrfs_get_dev_zone_info_all_devices+0x62/0x90
   open_ctree+0x1200/0x23db
   btrfs_get_tree.cold+0x105/0x26c
   ? rcu_is_watching+0x18/0x50
   vfs_get_tree+0x28/0xb0
   __do_sys_fsconfig+0x324/0x680
   do_syscall_64+0x92/0x4f0
   entry_SYSCALL_64_after_hwframe+0x76/0x7e
  RIP: 0033:0x7f615e27a40e
  RSP: 002b:00007fff11b18fb8 EFLAGS: 00000246 ORIG_RAX: 00000000000001af
  RAX: ffffffffffffffda RBX: 000055572e92ab10 RCX: 00007f615e27a40e
  RDX: 0000000000000000 RSI: 0000000000000006 RDI: 0000000000000003
  RBP: 00007fff11b19100 R08: 0000000000000000 R09: 0000000000000000
  R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
  R13: 000055572e92bc40 R14: 00007f615e3faa60 R15: 000055572e92bd08
   </TASK>

Don't hold the device_list_mutex while calling into
btrfs_get_dev_zone_info() in btrfs_get_dev_zone_info_all_devices() to
mitigate the issue. This is safe, as no other thread can touch the device
list at the moment of execution.

Reported-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-03-13 12:47:51 +01:00
Johannes Thumshirn
4f6abe9c74 btrfs: pass 'verbose' parameter to btrfs_relocate_block_group
Function `btrfs_relocate_chunk()` always passes verbose=true to
`btrfs_relocate_block_group()` instead of the `verbose` parameter passed
into it by it's callers.

While user initiated rebalancing should be logged in the Kernel's log
buffer. This causes excessive log spamming from automatic rebalancing,
e.g. on zoned filesystems running low on usable space.

Reviewed-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-03-13 12:47:35 +01:00
Hyunwoo Kim
daf8e3b253 xfrm: Fix work re-schedule after cancel in xfrm_nat_keepalive_net_fini()
After cancel_delayed_work_sync() is called from
xfrm_nat_keepalive_net_fini(), xfrm_state_fini() flushes remaining
states via __xfrm_state_delete(), which calls
xfrm_nat_keepalive_state_updated() to re-schedule nat_keepalive_work.

The following is a simple race scenario:

           cpu0                             cpu1

cleanup_net() [Round 1]
  ops_undo_list()
    xfrm_net_exit()
      xfrm_nat_keepalive_net_fini()
        cancel_delayed_work_sync(nat_keepalive_work);
      xfrm_state_fini()
        xfrm_state_flush()
          xfrm_state_delete(x)
            __xfrm_state_delete(x)
              xfrm_nat_keepalive_state_updated(x)
                schedule_delayed_work(nat_keepalive_work);
  rcu_barrier();
  net_complete_free();
  net_passive_dec(net);
    llist_add(&net->defer_free_list, &defer_free_list);

cleanup_net() [Round 2]
  rcu_barrier();
  net_complete_free()
    kmem_cache_free(net_cachep, net);
                                     nat_keepalive_work()
                                       // on freed net

To prevent this, cancel_delayed_work_sync() is replaced with
disable_delayed_work_sync().

Fixes: f531d13bdf ("xfrm: support sending NAT keepalives in ESP in UDP states")
Signed-off-by: Hyunwoo Kim <imv4bel@gmail.com>
Reviewed-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2026-03-13 12:36:59 +01:00
Wang Jun
995a418a6c auxdisplay: lcd2s: add error handling for i2c transfers
The lcd2s_print() and lcd2s_gotoxy() functions currently ignore the
return value of lcd2s_i2c_master_send(), which can fail. This can lead
to silent data loss or incorrect cursor positioning.

Add proper error checking: if the number of bytes sent does not match
the expected length, return -EIO; otherwise propagate any error code
from the I2C transfer.

Signed-off-by: Wang Jun <1742789905@qq.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
2026-03-13 11:00:06 +01:00
Masami Hiramatsu (Google)
e2715ea5fb bootconfig: Add bootconfig tests about braces
Add more bootconfig tests for checking the error message of
non closing brace and max number of nested braces.

Link: https://lore.kernel.org/all/177337553551.416919.11217619471547711262.stgit@devnote2/

Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
2026-03-13 17:49:30 +09:00
Josh Law
1120a36bb1 lib/bootconfig: fix snprintf truncation check in xbc_node_compose_key_after()
snprintf() returns the number of characters that would have been
written excluding the NUL terminator.  Output is truncated when the
return value is >= the buffer size, not just > the buffer size.

When ret == size, the current code takes the non-truncated path,
advancing buf by ret and reducing size to 0.  This is wrong because
the output was actually truncated (the last character was replaced by
NUL).  Fix by using >= so the truncation path is taken correctly.

Link: https://lore.kernel.org/all/20260312191143.28719-4-objecting@objecting.org/

Fixes: 76db5a27a8 ("bootconfig: Add Extra Boot Config support")
Cc: stable@vger.kernel.org
Signed-off-by: Josh Law <objecting@objecting.org>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
2026-03-13 17:48:27 +09:00
Josh Law
560f763baa lib/bootconfig: check bounds before writing in __xbc_open_brace()
The bounds check for brace_index happens after the array write.
While the current call pattern prevents an actual out-of-bounds
access (the previous call would have returned an error), the
write-before-check pattern is fragile and would become a real
out-of-bounds write if the error return were ever not propagated.

Move the bounds check before the array write so the function is
self-contained and safe regardless of caller behavior.

Link: https://lore.kernel.org/all/20260312191143.28719-3-objecting@objecting.org/

Fixes: ead1e19ad9 ("lib/bootconfig: Fix a bug of breaking existing tree nodes")
Cc: stable@vger.kernel.org
Signed-off-by: Josh Law <objecting@objecting.org>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
2026-03-13 17:46:09 +09:00
Nicolas Pitre
5eb608319b vt: save/restore unicode screen buffer for alternate screen
The alternate screen support added by commit 23743ba647 ("vt: add
support for smput/rmput escape codes") only saves and restores the
regular screen buffer (vc_origin), but completely ignores the corresponding
unicode screen buffer (vc_uni_lines) creating a messed-up display.

Add vc_saved_uni_lines to save the unicode screen buffer when entering
the alternate screen, and restore it when leaving. Also ensure proper
cleanup in reset_terminal() and vc_deallocate().

Fixes: 23743ba647 ("vt: add support for smput/rmput escape codes")
Cc: stable <stable@kernel.org>
Signed-off-by: Nicolas Pitre <npitre@baylibre.com>
Link: https://patch.msgid.link/5o2p6qp3-91pq-0p17-or02-1oors4417ns7@onlyvoer.pbz
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-13 09:15:58 +01:00
Steffen Klassert
904eaf90a5 Merge branch 'xfrm-fix-most-sparse-warnings'
Sabrina Dubroca says:

====================
xfrm: fix most sparse warnings

This series fixes most of the sparse warnings currently reported about
RCU pointers for files under net/xfrm. There's no actual bug in the
current code, we only need to use the correct helpers in each context.
====================

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2026-03-13 08:44:04 +01:00
Nilay Shroff
82f73ef9c4 powerpc/iommu: fix lockdep warning during PCI enumeration
Commit a75b2be249 ("iommu: Add iommu_driver_get_domain_for_dev()
helper") introduced iommu_driver_get_domain_for_dev() for driver
code paths that hold iommu_group->mutex while attaching a device
to an IOMMU domain.

The same commit also added a lockdep assertion in
iommu_get_domain_for_dev() to ensure that callers do not hold
iommu_group->mutex when invoking it.

On powerpc platforms, when PCI device ownership is switched from
BLOCKED to the PLATFORM domain, the attach callback
spapr_tce_platform_iommu_attach_dev() still calls
iommu_get_domain_for_dev(). This happens while iommu_group->mutex
is held during domain switching, which triggers the lockdep warning
below during PCI enumeration:

WARNING: drivers/iommu/iommu.c:2252 at iommu_get_domain_for_dev+0x38/0x80, CPU#2: swapper/0/1
Modules linked in:
CPU: 2 UID: 0 PID: 1 Comm: swapper/0 Not tainted 7.0.0-rc2+ #35 PREEMPT
Hardware name: IBM,9105-22A Power11 (architected) 0x820200 0xf000007 of:IBM,FW1120.00 (RB1120_115) hv:phyp pSeries
NIP:  c000000000c244c4 LR: c00000000005b5a4 CTR: c00000000005b578
REGS: c00000000a7bf280 TRAP: 0700   Not tainted  (7.0.0-rc2+)
MSR:  8000000002029033 <SF,VEC,EE,ME,IR,DR,RI,LE>  CR: 22004422  XER: 0000000a
CFAR: c000000000c24508 IRQMASK: 0
GPR00: c00000000005b5a4 c00000000a7bf520 c000000001dc8100 0000000000000001
GPR04: c00000000f972f10 0000000000000000 0000000000000000 0000000000000001
GPR08: 0000001ffbc60000 0000000000000001 0000000000000000 0000000000000000
GPR12: c00000000005b578 c000001fffffe480 c000000000011618 0000000000000000
GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
GPR20: ffffffffffffefff 0000000000000000 c000000002d30eb0 0000000000000001
GPR24: c0000000017881f8 0000000000000000 0000000000000001 c00000000f972e00
GPR28: c00000000bbba0d0 0000000000000000 c00000000bbba0d0 c00000000f972e00
NIP [c000000000c244c4] iommu_get_domain_for_dev+0x38/0x80
LR [c00000000005b5a4] spapr_tce_platform_iommu_attach_dev+0x2c/0x98
Call Trace:
 iommu_get_domain_for_dev+0x68/0x80 (unreliable)
 spapr_tce_platform_iommu_attach_dev+0x2c/0x98
 __iommu_attach_device+0x44/0x220
 __iommu_device_set_domain+0xf4/0x194
 __iommu_group_set_domain_internal+0xec/0x228
 iommu_setup_default_domain+0x5f4/0x6a4
 __iommu_probe_device+0x674/0x724
 iommu_probe_device+0x50/0xb4
 iommu_add_device+0x48/0x198
 pci_dma_dev_setup_pSeriesLP+0x198/0x4f0
 pcibios_bus_add_device+0x80/0x464
 pci_bus_add_device+0x40/0x100
 pci_bus_add_devices+0x54/0xb0
 pcibios_init+0xd8/0x140
 do_one_initcall+0x8c/0x598
 kernel_init_freeable+0x3ec/0x850
 kernel_init+0x34/0x270
 ret_from_kernel_user_thread+0x14/0x1c

Fix this by using iommu_driver_get_domain_for_dev() instead of
iommu_get_domain_for_dev() in spapr_tce_platform_iommu_attach_dev(),
which is the appropriate helper for callers holding the group mutex.

Cc: stable@vger.kernel.org
Fixes: a75b2be249 ("iommu: Add iommu_driver_get_domain_for_dev() helper")
Closes: https://patchwork.ozlabs.org/project/linuxppc-dev/patch/d5c834ff-4c95-44dd-8bef-57242d63aeee@linux.ibm.com/
Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
Reviewed-by: Nicolin Chen <nicolinc@nvidia.com>
Tested-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
[Maddy: Added Closes, tested and reviewed by tags]
Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Link: https://patch.msgid.link/20260310082129.3630996-1-nilay@linux.ibm.com
2026-03-13 12:13:35 +05:30
Sabrina Dubroca
99600f79b2 mpls: add missing unregister_netdevice_notifier to mpls_init
If mpls_init() fails after registering mpls_dev_notifier, it never
gets removed. Add the missing unregister_netdevice_notifier() call to
the error handling path.

Fixes: 5be2062e30 ("mpls: Handle error of rtnl_register_module().")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Link: https://patch.msgid.link/7c55363c4f743d19e2306204a134407c90a69bbb.1773228081.git.sd@queasysnail.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-12 19:25:59 -07:00
Eric Dumazet
8431c602f5 ip_tunnel: adapt iptunnel_xmit_stats() to NETDEV_PCPU_STAT_DSTATS
Blamed commits forgot that vxlan/geneve use udp_tunnel[6]_xmit_skb() which
call iptunnel_xmit_stats().

iptunnel_xmit_stats() was assuming tunnels were only using
NETDEV_PCPU_STAT_TSTATS.

@syncp offset in pcpu_sw_netstats and pcpu_dstats is different.

32bit kernels would either have corruptions or freezes if the syncp
sequence was overwritten.

This patch also moves pcpu_stat_type closer to dev->{t,d}stats to avoid
a potential cache line miss since iptunnel_xmit_stats() needs to read it.

Fixes: 6fa6de3022 ("geneve: Handle stats using NETDEV_PCPU_STAT_DSTATS.")
Fixes: be226352e8 ("vxlan: Handle stats using NETDEV_PCPU_STAT_DSTATS.")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Guillaume Nault <gnault@redhat.com>
Link: https://patch.msgid.link/20260311123110.1471930-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-12 19:24:45 -07:00
Jiayuan Chen
e1f0a18c95 net/rose: fix NULL pointer dereference in rose_transmit_link on reconnect
syzkaller reported a bug [1], and the reproducer is available at [2].

ROSE sockets use four sk->sk_state values: TCP_CLOSE, TCP_LISTEN,
TCP_SYN_SENT, and TCP_ESTABLISHED. rose_connect() already rejects
calls for TCP_ESTABLISHED (-EISCONN) and TCP_CLOSE with SS_CONNECTING
(-ECONNREFUSED), but lacks a check for TCP_SYN_SENT.

When rose_connect() is called a second time while the first connection
attempt is still in progress (TCP_SYN_SENT), it overwrites
rose->neighbour via rose_get_neigh(). If that returns NULL, the socket
is left with rose->state == ROSE_STATE_1 but rose->neighbour == NULL.
When the socket is subsequently closed, rose_release() sees
ROSE_STATE_1 and calls rose_write_internal() ->
rose_transmit_link(skb, NULL), causing a NULL pointer dereference.

Per connect(2), a second connect() while a connection is already in
progress should return -EALREADY. Add this missing check for
TCP_SYN_SENT to complete the state validation in rose_connect().

[1] https://syzkaller.appspot.com/bug?extid=d00f90e0af54102fb271
[2] https://gist.github.com/mrpre/9e6779e0d13e2c66779b1653fef80516

Fixes: 1da177e4c3 ("Linux-2.6.12-rc2")
Reported-by: syzbot+d00f90e0af54102fb271@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/69694d6f.050a0220.58bed.0027.GAE@google.com/T/
Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jiayuan Chen <jiayuan.chen@shopee.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20260311070611.76913-1-jiayuan.chen@linux.dev
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-12 19:23:59 -07:00
Hyunwoo Kim
3715a00855 bridge: cfm: Fix race condition in peer_mep deletion
When a peer MEP is being deleted, cancel_delayed_work_sync() is called
on ccm_rx_dwork before freeing. However, br_cfm_frame_rx() runs in
softirq context under rcu_read_lock (without RTNL) and can re-schedule
ccm_rx_dwork via ccm_rx_timer_start() between cancel_delayed_work_sync()
returning and kfree_rcu() being called.

The following is a simple race scenario:

           cpu0                                     cpu1

mep_delete_implementation()
  cancel_delayed_work_sync(ccm_rx_dwork);
                                           br_cfm_frame_rx()
                                             // peer_mep still in hlist
                                             if (peer_mep->ccm_defect)
                                               ccm_rx_timer_start()
                                                 queue_delayed_work(ccm_rx_dwork)
  hlist_del_rcu(&peer_mep->head);
  kfree_rcu(peer_mep, rcu);
                                           ccm_rx_work_expired()
                                             // on freed peer_mep

To prevent this, cancel_delayed_work_sync() is replaced with
disable_delayed_work_sync() in both peer MEP deletion paths, so
that subsequent queue_delayed_work() calls from br_cfm_frame_rx()
are silently rejected.

The cc_peer_disable() helper retains cancel_delayed_work_sync()
because it is also used for the CC enable/disable toggle path where
the work must remain re-schedulable.

Fixes: dc32cbb3db ("bridge: cfm: Kernel space implementation of CFM. CCM frame RX added.")
Signed-off-by: Hyunwoo Kim <imv4bel@gmail.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Link: https://patch.msgid.link/abBgYT5K_FI9rD1a@v4bel
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-12 18:33:52 -07:00
Josh Law
39ebc8d7f5 lib/bootconfig: fix off-by-one in xbc_verify_tree() unclosed brace error
__xbc_open_brace() pushes entries with post-increment
(open_brace[brace_index++]), so brace_index always points one past
the last valid entry.  xbc_verify_tree() reads open_brace[brace_index]
to report which brace is unclosed, but this is one past the last
pushed entry and contains stale/zero data, causing the error message
to reference the wrong node.

Use open_brace[brace_index - 1] to correctly identify the unclosed
brace.  brace_index is known to be > 0 here since we are inside the
if (brace_index) guard.

Link: https://lore.kernel.org/all/20260312191143.28719-2-objecting@objecting.org/

Fixes: ead1e19ad9 ("lib/bootconfig: Fix a bug of breaking existing tree nodes")
Cc: stable@vger.kernel.org
Signed-off-by: Josh Law <objecting@objecting.org>
Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
2026-03-13 10:29:21 +09:00
Dave Airlie
b28913e897 Merge tag 'drm-rust-fixes-2026-03-12' of https://gitlab.freedesktop.org/drm/rust/kernel into drm-fixes
Core Changes:

- Fix safety issue in dma_read! and dma_write!.

Driver Changes (Nova Core):

- Fix UB in DmaGspMem pointer accessors.
- Fix stack overflow in GSP memory allocation.

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Alice Ryhl <aliceryhl@google.com>
Link: https://patch.msgid.link/abNBSol3CLRCqlkZ@google.com
2026-03-13 10:40:17 +10:00
Dave Airlie
dd0365021b Merge tag 'amd-drm-fixes-7.0-2026-03-12' of https://gitlab.freedesktop.org/agd5f/linux into drm-fixes
amd-drm-fixes-7.0-2026-03-12:

amdgpu:
- SMU13 fix
- SMU14 fix
- Fixes for bringup hw testing
- Kerneldoc fix
- GC12 idle power fix for compute workloads
- DCCG fixes

amdkfd:
- Fix missing BO unreserve in an error path

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patch.msgid.link/20260312180351.3874990-1-alexander.deucher@amd.com
2026-03-13 10:34:41 +10:00
Dave Airlie
8c835a10c0 Merge tag 'drm-intel-fixes-2026-03-12' of https://gitlab.freedesktop.org/drm/i915/kernel into drm-fixes
- Avoid hang when configuring VRR [icl] (Ville Syrjälä)
- Fix sg_table overflow with >4GB folios (Janusz Krzysztofik)
- Fix PSR Selective Update handling [psr] (Jouni Högander)
- Fix eDP ALPM read-out sequence [dp] (Arun R Murthy)

Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Tvrtko Ursulin <tursulin@igalia.com>
Link: https://patch.msgid.link/abJ_MQ7o-5ghyaNW@linux
2026-03-13 08:48:00 +10:00
Dave Airlie
3c9eced537 Merge tag 'drm-misc-fixes-2026-03-12' of https://gitlab.freedesktop.org/drm/misc/kernel into drm-fixes
A pixel byte swap fix for st7586, a null pointer dereference fix for
gud, two timings fixes for ti-sn65dsi83, an initialization fix for ivpu,
and a runtime suspend deadlock fix for amdxdna.

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Maxime Ripard <mripard@redhat.com>
Link: https://patch.msgid.link/20260312-accurate-ambrosial-trout-bfabf8@houat
2026-03-13 08:32:23 +10:00
Jens Axboe
7d0abefec4 Merge tag 'nvme-7.0-2026-03-12' of git://git.infradead.org/nvme into block-7.0
Pull NVMe fixes from Keith:

"- Fix nvme-pci IRQ race and slab-out-of-bounds access (Sungwoo Kim)
 - Fix recursive workqueue locking for target async events (Chaitanya)
 - Various cleanups (Maurizio Lombardi, Thorsten Blum)"

* tag 'nvme-7.0-2026-03-12' of git://git.infradead.org/nvme:
  nvme: Annotate struct nvme_dhchap_key with __counted_by
  nvme-core: do not pass empty queue_limits to blk_mq_alloc_queue()
  nvme-pci: Fix race bug in nvme_poll_irqdisable()
  nvmet: move async event work off nvmet-wq
  nvme-pci: Fix slab-out-of-bounds in nvme_dbbuf_set
2026-03-12 15:15:53 -06:00
Kuniyuki Iwashima
e5b31d988a af_unix: Give up GC if MSG_PEEK intervened.
Igor Ushakov reported that GC purged the receive queue of
an alive socket due to a race with MSG_PEEK with a nice repro.

This is the exact same issue previously fixed by commit
cbcf01128d ("af_unix: fix garbage collect vs MSG_PEEK").

After GC was replaced with the current algorithm, the cited
commit removed the locking dance in unix_peek_fds() and
reintroduced the same issue.

The problem is that MSG_PEEK bumps a file refcount without
interacting with GC.

Consider an SCC containing sk-A and sk-B, where sk-A is
close()d but can be recv()ed via sk-B.

The bad thing happens if sk-A is recv()ed with MSG_PEEK from
sk-B and sk-B is close()d while GC is checking unix_vertex_dead()
for sk-A and sk-B.

  GC thread                    User thread
  ---------                    -----------
  unix_vertex_dead(sk-A)
  -> true   <------.
                    \
                     `------   recv(sk-B, MSG_PEEK)
              invalidate !!    -> sk-A's file refcount : 1 -> 2

                               close(sk-B)
                               -> sk-B's file refcount : 2 -> 1
  unix_vertex_dead(sk-B)
  -> true

Initially, sk-A's file refcount is 1 by the inflight fd in sk-B
recvq.  GC thinks sk-A is dead because the file refcount is the
same as the number of its inflight fds.

However, sk-A's file refcount is bumped silently by MSG_PEEK,
which invalidates the previous evaluation.

At this moment, sk-B's file refcount is 2; one by the open fd,
and one by the inflight fd in sk-A.  The subsequent close()
releases one refcount by the former.

Finally, GC incorrectly concludes that both sk-A and sk-B are dead.

One option is to restore the locking dance in unix_peek_fds(),
but we can resolve this more elegantly thanks to the new algorithm.

The point is that the issue does not occur without the subsequent
close() and we actually do not need to synchronise MSG_PEEK with
the dead SCC detection.

When the issue occurs, close() and GC touch the same file refcount.
If GC sees the refcount being decremented by close(), it can just
give up garbage-collecting the SCC.

Therefore, we only need to signal the race during MSG_PEEK with
a proper memory barrier to make it visible to the GC.

Let's use seqcount_t to notify GC when MSG_PEEK occurs and let
it defer the SCC to the next run.

This way no locking is needed on the MSG_PEEK side, and we can
avoid imposing a penalty on every MSG_PEEK unnecessarily.

Note that we can retry within unix_scc_dead() if MSG_PEEK is
detected, but we do not do so to avoid hung task splat from
abusive MSG_PEEK calls.

Fixes: 118f457da9 ("af_unix: Remove lock dance in unix_peek_fds().")
Reported-by: Igor Ushakov <sysroot314@gmail.com>
Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20260311054043.1231316-1-kuniyu@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-12 13:37:18 -07:00
Linus Torvalds
0257f64bda Merge tag 'pm-7.0-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull power management fixes from Rafael Wysocki:

 - Make the idle loop skip the cpuidle governor .reflect() callback
   after it has skipped the .select() one (Rafael Wysocki)

 - Fix swapped power/energy unit labels in cpupower (Kaushlendra Kumar)

 - Add support for setting EPP via systemd service and intel_pstate
   turbo boost support to cpupower (Jan Kiszka, Zhang Rui)

* tag 'pm-7.0-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
  sched: idle: Make skipping governor callbacks more consistent
  cpupower: Add intel_pstate turbo boost support for Intel platforms
  cpupower: Add support for setting EPP via systemd service
  cpupower: fix swapped power/energy unit labels
2026-03-12 13:01:37 -07:00
Linus Torvalds
61c0b2ae73 Merge tag 'acpi-7.0-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull ACPI fixes from Rafael Wysocki:

 - On some platforms, the ACPI companion object of the ACPI video bus
   platform device is shared with multiple other platform devices which
   leads to driver probe issues, so replace that device with an
   auxiliary one (which arguably is a better match for the given use
   case) and update the ACPI video bus driver accordingly (Rafael
   Wysocki)

 - Address sparse warnings in acpi_os_initialize() by adding __iomem to
   a local variable declaration (Ben Dooks)

* tag 'acpi-7.0-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
  ACPI: OSL: fix __iomem type on return from acpi_os_map_generic_address()
  ACPI: video: Switch over to auxiliary bus type
2026-03-12 12:43:19 -07:00
Linus Torvalds
8004279c41 Merge tag 'nfs-for-7.0-2' of git://git.linux-nfs.org/projects/anna/linux-nfs
Pull NFS client fixes from Anna Schumaker:

 - Fix NFS KConfig typos

 - Decrement re_receiving on the early exit paths

 - return EISDIR on nfs3_proc_create if d_alias is a dir

* tag 'nfs-for-7.0-2' of git://git.linux-nfs.org/projects/anna/linux-nfs:
  NFS: Fix NFS KConfig typos
  xprtrdma: Decrement re_receiving on the early exit paths
  nfs: return EISDIR on nfs3_proc_create if d_alias is a dir
2026-03-12 12:38:17 -07:00
Dmitry Baryshkov
99b2c531e0 Bluetooth: qca: fix ROM version reading on WCN3998 chips
WCN3998 uses a bit different format for rom version:

[    5.479978] Bluetooth: hci0: setting up wcn399x
[    5.633763] Bluetooth: hci0: QCA Product ID   :0x0000000a
[    5.645350] Bluetooth: hci0: QCA SOC Version  :0x40010224
[    5.650906] Bluetooth: hci0: QCA ROM Version  :0x00001001
[    5.665173] Bluetooth: hci0: QCA Patch Version:0x00006699
[    5.679356] Bluetooth: hci0: QCA controller version 0x02241001
[    5.691109] Bluetooth: hci0: QCA Downloading qca/crbtfw21.tlv
[    6.680102] Bluetooth: hci0: QCA Downloading qca/crnv21.bin
[    6.842948] Bluetooth: hci0: QCA setup on UART is completed

Fixes: 523760b7ff ("Bluetooth: hci_qca: Added support for WCN3998")
Reviewed-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-03-12 15:29:29 -04:00
Lukas Johannes Möller
dd815e6e39 Bluetooth: L2CAP: Validate L2CAP_INFO_RSP payload length before access
l2cap_information_rsp() checks that cmd_len covers the fixed
l2cap_info_rsp header (type + result, 4 bytes) but then reads
rsp->data without verifying that the payload is present:

 - L2CAP_IT_FEAT_MASK calls get_unaligned_le32(rsp->data), which reads
   4 bytes past the header (needs cmd_len >= 8).

 - L2CAP_IT_FIXED_CHAN reads rsp->data[0], 1 byte past the header
   (needs cmd_len >= 5).

A truncated L2CAP_INFO_RSP with result == L2CAP_IR_SUCCESS triggers an
out-of-bounds read of adjacent skb data.

Guard each data access with the required payload length check.  If the
payload is too short, skip the read and let the state machine complete
with safe defaults (feat_mask and remote_fixed_chan remain zero from
kzalloc), so the info timer cleanup and l2cap_conn_start() still run
and the connection is not stalled.

Fixes: 4e8402a3f8 ("[Bluetooth] Retrieve L2CAP features mask on connection setup")
Cc: stable@vger.kernel.org
Signed-off-by: Lukas Johannes Möller <research@johannes-moeller.dev>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-03-12 15:29:07 -04:00
Lukas Johannes Möller
1514567569 Bluetooth: L2CAP: Fix type confusion in l2cap_ecred_reconf_rsp()
l2cap_ecred_reconf_rsp() casts the incoming data to struct
l2cap_ecred_conn_rsp (the ECRED *connection* response, 8 bytes with
result at offset 6) instead of struct l2cap_ecred_reconf_rsp (2 bytes
with result at offset 0).

This causes two problems:

 - The sizeof(*rsp) length check requires 8 bytes instead of the
   correct 2, so valid L2CAP_ECRED_RECONF_RSP packets are rejected
   with -EPROTO.

 - rsp->result reads from offset 6 instead of offset 0, returning
   wrong data when the packet is large enough to pass the check.

Fix by using the correct type.  Also pass the already byte-swapped
result variable to BT_DBG instead of the raw __le16 field.

Fixes: 15f02b9105 ("Bluetooth: L2CAP: Add initial code for Enhanced Credit Based Mode")
Cc: stable@vger.kernel.org
Signed-off-by: Lukas Johannes Möller <research@johannes-moeller.dev>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-03-12 15:28:46 -04:00
Luiz Augusto von Dentz
5b3e205233 Bluetooth: L2CAP: Fix accepting multiple L2CAP_ECRED_CONN_REQ
Currently the code attempts to accept requests regardless of the
command identifier which may cause multiple requests to be marked
as pending (FLAG_DEFER_SETUP) which can cause more than
L2CAP_ECRED_MAX_CID(5) to be allocated in l2cap_ecred_rsp_defer
causing an overflow.

The spec is quite clear that the same identifier shall not be used on
subsequent requests:

'Within each signaling channel a different Identifier shall be used
for each successive request or indication.'
https://www.bluetooth.com/wp-content/uploads/Files/Specification/HTML/Core-62/out/en/host/logical-link-control-and-adaptation-protocol-specification.html#UUID-32a25a06-4aa4-c6c7-77c5-dcfe3682355d

So this attempts to check if there are any channels pending with the
same identifier and rejects if any are found.

Fixes: 15f02b9105 ("Bluetooth: L2CAP: Add initial code for Enhanced Credit Based Mode")
Reported-by: Yiming Qian <yimingqian591@gmail.com>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-03-12 15:28:24 -04:00
Shaurya Rane
752a6c9596 Bluetooth: L2CAP: Fix use-after-free in l2cap_unregister_user
After commit ab4eedb790 ("Bluetooth: L2CAP: Fix corrupted list in
hci_chan_del"), l2cap_conn_del() uses conn->lock to protect access to
conn->users. However, l2cap_register_user() and l2cap_unregister_user()
don't use conn->lock, creating a race condition where these functions can
access conn->users and conn->hchan concurrently with l2cap_conn_del().

This can lead to use-after-free and list corruption bugs, as reported
by syzbot.

Fix this by changing l2cap_register_user() and l2cap_unregister_user()
to use conn->lock instead of hci_dev_lock(), ensuring consistent locking
for the l2cap_conn structure.

Reported-by: syzbot+14b6d57fb728e27ce23c@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=14b6d57fb728e27ce23c
Fixes: ab4eedb790 ("Bluetooth: L2CAP: Fix corrupted list in hci_chan_del")
Signed-off-by: Shaurya Rane <ssrane_b23@ee.vjti.ac.in>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-03-12 15:28:03 -04:00
Luiz Augusto von Dentz
dbf666e4fc Bluetooth: HIDP: Fix possible UAF
This fixes the following trace caused by not dropping l2cap_conn
reference when user->remove callback is called:

[   97.809249] l2cap_conn_free: freeing conn ffff88810a171c00
[   97.809907] CPU: 1 UID: 0 PID: 1419 Comm: repro_standalon Not tainted 7.0.0-rc1-dirty #14 PREEMPT(lazy)
[   97.809935] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.17.0-debian-1.17.0-1 04/01/2014
[   97.809947] Call Trace:
[   97.809954]  <TASK>
[   97.809961]  dump_stack_lvl (lib/dump_stack.c:122)
[   97.809990]  l2cap_conn_free (net/bluetooth/l2cap_core.c:1808)
[   97.810017]  l2cap_conn_del (./include/linux/kref.h:66 net/bluetooth/l2cap_core.c:1821 net/bluetooth/l2cap_core.c:1798)
[   97.810055]  l2cap_disconn_cfm (net/bluetooth/l2cap_core.c:7347 (discriminator 1) net/bluetooth/l2cap_core.c:7340 (discriminator 1))
[   97.810086]  ? __pfx_l2cap_disconn_cfm (net/bluetooth/l2cap_core.c:7341)
[   97.810117]  hci_conn_hash_flush (./include/net/bluetooth/hci_core.h:2152 (discriminator 2) net/bluetooth/hci_conn.c:2644 (discriminator 2))
[   97.810148]  hci_dev_close_sync (net/bluetooth/hci_sync.c:5360)
[   97.810180]  ? __pfx_hci_dev_close_sync (net/bluetooth/hci_sync.c:5285)
[   97.810212]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[   97.810242]  ? up_write (./arch/x86/include/asm/atomic64_64.h:87 (discriminator 5) ./include/linux/atomic/atomic-arch-fallback.h:2852 (discriminator 5) ./include/linux/atomic/atomic-long.h:268 (discriminator 5) ./include/linux/atomic/atomic-instrumented.h:3391 (discriminator 5) kernel/locking/rwsem.c:1385 (discriminator 5) kernel/locking/rwsem.c:1643 (discriminator 5))
[   97.810267]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[   97.810290]  ? rcu_is_watching (./arch/x86/include/asm/atomic.h:23 ./include/linux/atomic/atomic-arch-fallback.h:457 ./include/linux/context_tracking.h:128 kernel/rcu/tree.c:752)
[   97.810320]  hci_unregister_dev (net/bluetooth/hci_core.c:504 net/bluetooth/hci_core.c:2716)
[   97.810346]  vhci_release (drivers/bluetooth/hci_vhci.c:691)
[   97.810375]  ? __pfx_vhci_release (drivers/bluetooth/hci_vhci.c:678)
[   97.810404]  __fput (fs/file_table.c:470)
[   97.810430]  task_work_run (kernel/task_work.c:235)
[   97.810451]  ? __pfx_task_work_run (kernel/task_work.c:201)
[   97.810472]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[   97.810495]  ? do_raw_spin_unlock (./include/asm-generic/qspinlock.h:128 (discriminator 5) kernel/locking/spinlock_debug.c:142 (discriminator 5))
[   97.810527]  do_exit (kernel/exit.c:972)
[   97.810547]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[   97.810574]  ? __pfx_do_exit (kernel/exit.c:897)
[   97.810594]  ? lock_acquire (kernel/locking/lockdep.c:470 (discriminator 6) kernel/locking/lockdep.c:5870 (discriminator 6) kernel/locking/lockdep.c:5825 (discriminator 6))
[   97.810616]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[   97.810639]  ? do_raw_spin_lock (kernel/locking/spinlock_debug.c:95 (discriminator 4) kernel/locking/spinlock_debug.c:118 (discriminator 4))
[   97.810664]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[   97.810688]  ? find_held_lock (kernel/locking/lockdep.c:5350 (discriminator 1))
[   97.810721]  do_group_exit (kernel/exit.c:1093)
[   97.810745]  get_signal (kernel/signal.c:3007 (discriminator 1))
[   97.810772]  ? security_file_permission (./arch/x86/include/asm/jump_label.h:37 security/security.c:2366)
[   97.810803]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[   97.810826]  ? vfs_read (fs/read_write.c:555)
[   97.810854]  ? __pfx_get_signal (kernel/signal.c:2800)
[   97.810880]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[   97.810905]  ? __pfx_vfs_read (fs/read_write.c:555)
[   97.810932]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[   97.810960]  arch_do_signal_or_restart (arch/x86/kernel/signal.c:337 (discriminator 1))
[   97.810990]  ? __pfx_arch_do_signal_or_restart (arch/x86/kernel/signal.c:334)
[   97.811021]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[   97.811055]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[   97.811078]  ? ksys_read (fs/read_write.c:707)
[   97.811106]  ? __pfx_ksys_read (fs/read_write.c:707)
[   97.811137]  exit_to_user_mode_loop (kernel/entry/common.c:66 kernel/entry/common.c:98)
[   97.811169]  ? rcu_is_watching (./arch/x86/include/asm/atomic.h:23 ./include/linux/atomic/atomic-arch-fallback.h:457 ./include/linux/context_tracking.h:128 kernel/rcu/tree.c:752)
[   97.811192]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[   97.811215]  ? trace_hardirqs_off (./include/trace/events/preemptirq.h:36 (discriminator 33) kernel/trace/trace_preemptirq.c:95 (discriminator 33) kernel/trace/trace_preemptirq.c:90 (discriminator 33))
[   97.811240]  do_syscall_64 (./include/linux/irq-entry-common.h:226 ./include/linux/irq-entry-common.h:256 ./include/linux/entry-common.h:325 arch/x86/entry/syscall_64.c:100)
[   97.811268]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[   97.811292]  ? exc_page_fault (arch/x86/mm/fault.c:1480 (discriminator 3) arch/x86/mm/fault.c:1527 (discriminator 3))
[   97.811318]  entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)
[   97.811338] RIP: 0033:0x445cfe
[   97.811352] Code: Unable to access opcode bytes at 0x445cd4.

Code starting with the faulting instruction
===========================================
[   97.811360] RSP: 002b:00007f65c41c6dc8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000
[   97.811378] RAX: fffffffffffffe00 RBX: 00007f65c41c76c0 RCX: 0000000000445cfe
[   97.811391] RDX: 0000000000000400 RSI: 00007f65c41c6e40 RDI: 0000000000000004
[   97.811403] RBP: 00007f65c41c7250 R08: 0000000000000000 R09: 0000000000000000
[   97.811415] R10: 0000000000000000 R11: 0000000000000246 R12: ffffffffffffffe8
[   97.811428] R13: 0000000000000000 R14: 00007fff780a8c00 R15: 00007f65c41c76c0
[   97.811453]  </TASK>
[   98.402453] ==================================================================
[   98.403560] BUG: KASAN: use-after-free in __mutex_lock (kernel/locking/mutex.c:199 kernel/locking/mutex.c:694 kernel/locking/mutex.c:776)
[   98.404541] Read of size 8 at addr ffff888113ee40a8 by task khidpd_00050004/1430
[   98.405361]
[   98.405563] CPU: 1 UID: 0 PID: 1430 Comm: khidpd_00050004 Not tainted 7.0.0-rc1-dirty #14 PREEMPT(lazy)
[   98.405588] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.17.0-debian-1.17.0-1 04/01/2014
[   98.405600] Call Trace:
[   98.405607]  <TASK>
[   98.405614]  dump_stack_lvl (lib/dump_stack.c:122)
[   98.405641]  print_report (mm/kasan/report.c:379 mm/kasan/report.c:482)
[   98.405667]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[   98.405691]  ? __virt_addr_valid (arch/x86/mm/physaddr.c:55)
[   98.405724]  ? __mutex_lock (kernel/locking/mutex.c:199 kernel/locking/mutex.c:694 kernel/locking/mutex.c:776)
[   98.405748]  kasan_report (mm/kasan/report.c:221 mm/kasan/report.c:597)
[   98.405778]  ? __mutex_lock (kernel/locking/mutex.c:199 kernel/locking/mutex.c:694 kernel/locking/mutex.c:776)
[   98.405807]  __mutex_lock (kernel/locking/mutex.c:199 kernel/locking/mutex.c:694 kernel/locking/mutex.c:776)
[   98.405832]  ? do_raw_spin_lock (kernel/locking/spinlock_debug.c:95 (discriminator 4) kernel/locking/spinlock_debug.c:118 (discriminator 4))
[   98.405859]  ? l2cap_unregister_user (./include/linux/list.h:381 (discriminator 2) net/bluetooth/l2cap_core.c:1723 (discriminator 2))
[   98.405888]  ? __pfx_do_raw_spin_lock (kernel/locking/spinlock_debug.c:114)
[   98.405915]  ? __pfx___mutex_lock (kernel/locking/mutex.c:775)
[   98.405939]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[   98.405963]  ? lock_acquire (kernel/locking/lockdep.c:470 (discriminator 6) kernel/locking/lockdep.c:5870 (discriminator 6) kernel/locking/lockdep.c:5825 (discriminator 6))
[   98.405984]  ? find_held_lock (kernel/locking/lockdep.c:5350 (discriminator 1))
[   98.406015]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[   98.406038]  ? lock_release (kernel/locking/lockdep.c:5536 kernel/locking/lockdep.c:5889 kernel/locking/lockdep.c:5875)
[   98.406061]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[   98.406085]  ? _raw_spin_unlock_irqrestore (./arch/x86/include/asm/irqflags.h:42 ./arch/x86/include/asm/irqflags.h:119 ./arch/x86/include/asm/irqflags.h:159 ./include/linux/spinlock_api_smp.h:178 kernel/locking/spinlock.c:194)
[   98.406107]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[   98.406130]  ? __timer_delete_sync (kernel/time/timer.c:1592)
[   98.406158]  ? l2cap_unregister_user (./include/linux/list.h:381 (discriminator 2) net/bluetooth/l2cap_core.c:1723 (discriminator 2))
[   98.406186]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[   98.406210]  l2cap_unregister_user (./include/linux/list.h:381 (discriminator 2) net/bluetooth/l2cap_core.c:1723 (discriminator 2))
[   98.406263]  hidp_session_thread (./include/linux/instrumented.h:112 ./include/linux/atomic/atomic-instrumented.h:400 ./include/linux/refcount.h:389 ./include/linux/refcount.h:432 ./include/linux/refcount.h:450 ./include/linux/kref.h:64 net/bluetooth/hidp/core.c:996 net/bluetooth/hidp/core.c:1305)
[   98.406293]  ? __pfx_hidp_session_thread (net/bluetooth/hidp/core.c:1264)
[   98.406323]  ? kthread (kernel/kthread.c:433)
[   98.406340]  ? __pfx_hidp_session_wake_function (net/bluetooth/hidp/core.c:1251)
[   98.406370]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[   98.406393]  ? find_held_lock (kernel/locking/lockdep.c:5350 (discriminator 1))
[   98.406424]  ? __pfx_hidp_session_wake_function (net/bluetooth/hidp/core.c:1251)
[   98.406453]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[   98.406476]  ? trace_hardirqs_on (kernel/trace/trace_preemptirq.c:79 (discriminator 1))
[   98.406499]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[   98.406523]  ? kthread (kernel/kthread.c:433)
[   98.406539]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[   98.406565]  ? kthread (kernel/kthread.c:433)
[   98.406581]  ? __pfx_hidp_session_thread (net/bluetooth/hidp/core.c:1264)
[   98.406610]  kthread (kernel/kthread.c:467)
[   98.406627]  ? __pfx_kthread (kernel/kthread.c:412)
[   98.406645]  ret_from_fork (arch/x86/kernel/process.c:164)
[   98.406674]  ? __pfx_ret_from_fork (arch/x86/kernel/process.c:153)
[   98.406704]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[   98.406728]  ? __pfx_kthread (kernel/kthread.c:412)
[   98.406747]  ret_from_fork_asm (arch/x86/entry/entry_64.S:258)
[   98.406774]  </TASK>
[   98.406780]
[   98.433693] The buggy address belongs to the physical page:
[   98.434405] page: refcount:0 mapcount:0 mapping:0000000000000000 index:0xffff888113ee7c40 pfn:0x113ee4
[   98.435557] flags: 0x200000000000000(node=0|zone=2)
[   98.436198] raw: 0200000000000000 ffffea0004244308 ffff8881f6f3ebc0 0000000000000000
[   98.437195] raw: ffff888113ee7c40 0000000000000000 00000000ffffffff 0000000000000000
[   98.438115] page dumped because: kasan: bad access detected
[   98.438951]
[   98.439211] Memory state around the buggy address:
[   98.439871]  ffff888113ee3f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[   98.440714]  ffff888113ee4000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[   98.441580] >ffff888113ee4080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[   98.442458]                                   ^
[   98.443011]  ffff888113ee4100: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[   98.443889]  ffff888113ee4180: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[   98.444768] ==================================================================
[   98.445719] Disabling lock debugging due to kernel taint
[   98.448074] l2cap_conn_free: freeing conn ffff88810c22b400
[   98.450012] CPU: 1 UID: 0 PID: 1430 Comm: khidpd_00050004 Tainted: G    B               7.0.0-rc1-dirty #14 PREEMPT(lazy)
[   98.450040] Tainted: [B]=BAD_PAGE
[   98.450047] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.17.0-debian-1.17.0-1 04/01/2014
[   98.450059] Call Trace:
[   98.450065]  <TASK>
[   98.450071]  dump_stack_lvl (lib/dump_stack.c:122)
[   98.450099]  l2cap_conn_free (net/bluetooth/l2cap_core.c:1808)
[   98.450125]  l2cap_conn_put (net/bluetooth/l2cap_core.c:1822)
[   98.450154]  session_free (net/bluetooth/hidp/core.c:990)
[   98.450181]  hidp_session_thread (net/bluetooth/hidp/core.c:1307)
[   98.450213]  ? __pfx_hidp_session_thread (net/bluetooth/hidp/core.c:1264)
[   98.450271]  ? kthread (kernel/kthread.c:433)
[   98.450293]  ? __pfx_hidp_session_wake_function (net/bluetooth/hidp/core.c:1251)
[   98.450339]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[   98.450368]  ? find_held_lock (kernel/locking/lockdep.c:5350 (discriminator 1))
[   98.450406]  ? __pfx_hidp_session_wake_function (net/bluetooth/hidp/core.c:1251)
[   98.450442]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[   98.450471]  ? trace_hardirqs_on (kernel/trace/trace_preemptirq.c:79 (discriminator 1))
[   98.450499]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[   98.450528]  ? kthread (kernel/kthread.c:433)
[   98.450547]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[   98.450578]  ? kthread (kernel/kthread.c:433)
[   98.450598]  ? __pfx_hidp_session_thread (net/bluetooth/hidp/core.c:1264)
[   98.450637]  kthread (kernel/kthread.c:467)
[   98.450657]  ? __pfx_kthread (kernel/kthread.c:412)
[   98.450680]  ret_from_fork (arch/x86/kernel/process.c:164)
[   98.450715]  ? __pfx_ret_from_fork (arch/x86/kernel/process.c:153)
[   98.450752]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[   98.450782]  ? __pfx_kthread (kernel/kthread.c:412)
[   98.450804]  ret_from_fork_asm (arch/x86/entry/entry_64.S:258)
[   98.450836]  </TASK>

Fixes: b4f34d8d9d ("Bluetooth: hidp: add new session-management helpers")
Reported-by: soufiane el hachmi <kilwa10@gmail.com>
Tested-by: soufiane el hachmi <kilwa10@gmail.com>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-03-12 15:27:46 -04:00
Wang Tao
17f89341cb Bluetooth: MGMT: Fix list corruption and UAF in command complete handlers
Commit 302a1f674c ("Bluetooth: MGMT: Fix possible UAFs") introduced
mgmt_pending_valid(), which not only validates the pending command but
also unlinks it from the pending list if it is valid. This change in
semantics requires updates to several completion handlers to avoid list
corruption and memory safety issues.

This patch addresses two left-over issues from the aforementioned rework:

1. In mgmt_add_adv_patterns_monitor_complete(), mgmt_pending_remove()
is replaced with mgmt_pending_free() in the success path. Since
mgmt_pending_valid() already unlinks the command at the beginning of
the function, calling mgmt_pending_remove() leads to a double list_del()
and subsequent list corruption/kernel panic.

2. In set_mesh_complete(), the use of mgmt_pending_foreach() in the error
path is removed. Since the current command is already unlinked by
mgmt_pending_valid(), this foreach loop would incorrectly target other
pending mesh commands, potentially freeing them while they are still being
processed concurrently (leading to UAFs). The redundant mgmt_cmd_status()
is also simplified to use cmd->opcode directly.

Fixes: 302a1f674c ("Bluetooth: MGMT: Fix possible UAFs")
Signed-off-by: Wang Tao <wangtao554@huawei.com>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-03-12 15:27:25 -04:00
Michael Grzeschik
2cabe7ff10 Bluetooth: hci_sync: Fix hci_le_create_conn_sync
While introducing hci_le_create_conn_sync the functionality
of hci_connect_le was ported to hci_le_create_conn_sync including
the disable of the scan before starting the connection.

When this code was run non synchronously the immediate call that was
setting the flag HCI_LE_SCAN_INTERRUPTED had an impact. Since the
completion handler for the LE_SCAN_DISABLE was not immediately called.
In the completion handler of the LE_SCAN_DISABLE event, this flag is
checked to set the state of the hdev to DISCOVERY_STOPPED.

With the synchronised approach the later setting of the
HCI_LE_SCAN_INTERRUPTED flag has not the same effect. The completion
handler would immediately fire in the LE_SCAN_DISABLE call, check for
the flag, which is then not yet set and do nothing.

To fix this issue and make the function call work as before, we move the
setting of the flag HCI_LE_SCAN_INTERRUPTED before disabling the scan.

Fixes: 8e8b92ee60 ("Bluetooth: hci_sync: Add hci_le_create_conn_sync")
Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-03-12 15:27:05 -04:00
Luiz Augusto von Dentz
62bcaa6b35 Bluetooth: ISO: Fix defer tests being unstable
iso-tester defer tests seem to fail with hci_conn_hash_lookup_cig
being unable to resolve a cig in set_cig_params_sync due a race
where it is run immediatelly before hci_bind_cis is able to set
the QoS settings into the hci_conn object.

So this moves the assigning of the QoS settings to be done directly
by hci_le_set_cig_params to prevent that from happening again.

Fixes: 26afbd826e ("Bluetooth: Add initial implementation of CIS connections")
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-03-12 15:26:48 -04:00
Christian Eggers
0e4d4dcc1a Bluetooth: SMP: make SM/PER/KDU/BI-04-C happy
The last test step ("Test with Invalid public key X and Y, all set to
0") expects to get an "DHKEY check failed" instead of "unspecified".

Fixes: 6d19628f53 ("Bluetooth: SMP: Fail if remote and local public keys are identical")
Signed-off-by: Christian Eggers <ceggers@arri.de>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-03-12 15:26:30 -04:00
Christian Eggers
b6a2bf43aa Bluetooth: LE L2CAP: Disconnect if sum of payload sizes exceed SDU
Core 6.0, Vol 3, Part A, 3.4.3:
"... If the sum of the payload sizes for the K-frames exceeds the
specified SDU length, the receiver shall disconnect the channel."

This fixes L2CAP/LE/CFC/BV-27-C (running together with 'l2test -r -P
0x0027 -V le_public').

Fixes: aac23bf636 ("Bluetooth: Implement LE L2CAP reassembly")
Signed-off-by: Christian Eggers <ceggers@arri.de>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-03-12 15:26:10 -04:00
Christian Eggers
e1d9a66889 Bluetooth: LE L2CAP: Disconnect if received packet's SDU exceeds IMTU
Core 6.0, Vol 3, Part A, 3.4.3:
"If the SDU length field value exceeds the receiver's MTU, the receiver
shall disconnect the channel..."

This fixes L2CAP/LE/CFC/BV-26-C (running together with 'l2test -r -P
0x0027 -V le_public -I 100').

Fixes: aac23bf636 ("Bluetooth: Implement LE L2CAP reassembly")
Signed-off-by: Christian Eggers <ceggers@arri.de>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-03-12 15:22:52 -04:00
Linus Torvalds
e0b38d286e Merge tag 'for-7.0-rc3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs fixes from David Sterba:

 - detect possible file name hash collision earlier so it does not lead
   to transaction abort

 - handle b-tree leaf overflows when snapshotting a subvolume with set
   received UUID, leading to transaction abort

 - in zoned mode, reorder relocation block group initialization after
   the transaction kthread start

 - fix orphan cleanup state tracking of subvolume, this could lead to
   invalid dentries under some conditions

 - add locking around updates of dynamic reclain state update

 - in subpage mode, add missing RCU unlock when trying to releae extent
   buffer

 - remap tree fixes:
     - add missing description strings for the newly added remap tree
     - properly update search key when iterating backrefs

* tag 'for-7.0-rc3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: remove duplicated definition of btrfs_printk_in_rcu()
  btrfs: remove unnecessary transaction abort in the received subvol ioctl
  btrfs: abort transaction on failure to update root in the received subvol ioctl
  btrfs: fix transaction abort on set received ioctl due to item overflow
  btrfs: fix transaction abort when snapshotting received subvolumes
  btrfs: fix transaction abort on file creation due to name hash collision
  btrfs: read key again after incrementing slot in move_existing_remaps()
  btrfs: add missing RCU unlock in error path in try_release_subpage_extent_buffer()
  btrfs: set BTRFS_ROOT_ORPHAN_CLEANUP during subvol create
  btrfs: zoned: move btrfs_zoned_reserve_data_reloc_bg() after kthread start
  btrfs: hold space_info->lock when clearing periodic reclaim ready
  btrfs: print-tree: add remap tree definitions
2026-03-12 12:15:27 -07:00
Linus Torvalds
2c7e63d702 Merge tag 'net-7.0-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Paolo Abeni:
 "Including fixes from CAN and netfilter.

  Current release - regressions:

   - eth: mana: Null service_wq on setup error to prevent double destroy

  Previous releases - regressions:

   - nexthop: fix percpu use-after-free in remove_nh_grp_entry

   - sched: teql: fix NULL pointer dereference in iptunnel_xmit on TEQL slave xmit

   - bpf: fix nd_tbl NULL dereference when IPv6 is disabled

   - neighbour: restore protocol != 0 check in pneigh update

   - tipc: fix divide-by-zero in tipc_sk_filter_connect()

   - eth:
      - mlx5:
         - fix crash when moving to switchdev mode
         - fix DMA FIFO desync on error CQE SQ recovery
      - iavf: fix PTP use-after-free during reset
      - bonding: fix type confusion in bond_setup_by_slave()
      - lan78xx: fix WARN in __netif_napi_del_locked on disconnect

  Previous releases - always broken:

   - core: add xmit recursion limit to tunnel xmit functions

   - net-shapers: don't free reply skb after genlmsg_reply()

   - netfilter:
      - fix stack out-of-bounds read in pipapo_drop()
      - fix OOB read in nfnl_cthelper_dump_table()

   - mctp:
      - fix device leak on probe failure
      - i2c: fix skb memory leak in receive path

   - can: keep the max bitrate error at 5%

   - eth:
      - bonding: fix nd_tbl NULL dereference when IPv6 is disabled
      - bnxt_en: fix RSS table size check when changing ethtool channels
      - amd-xgbe: prevent CRC errors during RX adaptation with AN disabled
      - octeontx2-af: devlink: fix NIX RAS reporter recovery condition"

* tag 'net-7.0-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (71 commits)
  net: prevent NULL deref in ip[6]tunnel_xmit()
  octeontx2-af: devlink: fix NIX RAS reporter to use RAS interrupt status
  octeontx2-af: devlink: fix NIX RAS reporter recovery condition
  net: ethernet: ti: am65-cpsw-nuss: Fix rx_filter value for PTP support
  net/mana: Null service_wq on setup error to prevent double destroy
  selftests: rtnetlink: add neighbour update test
  neighbour: restore protocol != 0 check in pneigh update
  net: dsa: realtek: Fix LED group port bit for non-zero LED group
  tipc: fix divide-by-zero in tipc_sk_filter_connect()
  net: dsa: microchip: Fix error path in PTP IRQ setup
  bpf: bpf_out_neigh_v6: Fix nd_tbl NULL dereference when IPv6 is disabled
  bpf: bpf_out_neigh_v4: Fix nd_tbl NULL dereference when IPv6 is disabled
  net: bonding: Fix nd_tbl NULL dereference when IPv6 is disabled
  ipv6: move the disable_ipv6_mod knob to core code
  net: bcmgenet: fix broken EEE by converting to phylib-managed state
  net-shapers: don't free reply skb after genlmsg_reply()
  net: dsa: mxl862xx: don't set user_mii_bus
  net: ethernet: arc: emac: quiesce interrupts before requesting IRQ
  page_pool: store detach_time as ktime_t to avoid false-negatives
  net: macb: Shuffle the tx ring before enabling tx
  ...
2026-03-12 11:33:35 -07:00
Rafael J. Wysocki
a076cc7474 Merge branch 'pm-tools'
Merge cpupower utility updates, including a fix and improvements of the
existing functionality, for 7.0-rc4.

* pm-tools:
  cpupower: Add intel_pstate turbo boost support for Intel platforms
  cpupower: Add support for setting EPP via systemd service
  cpupower: fix swapped power/energy unit labels
2026-03-12 19:00:30 +01:00
Linus Torvalds
281f36d4a9 Merge tag 'apparmor-pr-mainline-2026-03-09' of git://git.kernel.org/pub/scm/linux/kernel/git/jj/linux-apparmor
Pull AppArmor fixes from John Johansen:
 - fix race between freeing data and fs accessing it
 - fix race on unreferenced rawdata dereference
 - fix differential encoding verification
 - fix unconfined unprivileged local user can do privileged policy management
 - Fix double free of ns_name in aa_replace_profiles()
 - fix missing bounds check on DEFAULT table in verify_dfa()
 - fix side-effect bug in match_char() macro usage
 - fix: limit the number of levels of policy namespaces
 - replace recursive profile removal with iterative approach
 - fix memory leak in verify_header
 - validate DFA start states are in bounds in unpack_pdb

* tag 'apparmor-pr-mainline-2026-03-09' of git://git.kernel.org/pub/scm/linux/kernel/git/jj/linux-apparmor:
  apparmor: fix race between freeing data and fs accessing it
  apparmor: fix race on rawdata dereference
  apparmor: fix differential encoding verification
  apparmor: fix unprivileged local user can do privileged policy management
  apparmor: Fix double free of ns_name in aa_replace_profiles()
  apparmor: fix missing bounds check on DEFAULT table in verify_dfa()
  apparmor: fix side-effect bug in match_char() macro usage
  apparmor: fix: limit the number of levels of policy namespaces
  apparmor: replace recursive profile removal with iterative approach
  apparmor: fix memory leak in verify_header
  apparmor: validate DFA start states are in bounds in unpack_pdb
2026-03-12 10:58:02 -07:00
Rafael J. Wysocki
97d9960f35 Merge branch 'acpi-osl'
Merge an ACPI OS services layer (OSL) fix that addresses sparse warnings
in acpi_os_initialize() (Ben Dooks)

* acpi-osl:
  ACPI: OSL: fix __iomem type on return from acpi_os_map_generic_address()
2026-03-12 18:42:41 +01:00
Sean Christopherson
d2ea4ff1ce KVM: selftests: Verify SEV+ guests can read and write EFER, CR0, CR4, and CR8
Add "do no harm" testing of EFER, CR0, CR4, and CR8 for SEV+ guests to
verify that the guest can read and write the registers, without hitting
e.g. a #VC on SEV-ES guests due to KVM incorrectly trying to intercept a
register.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-ID: <20260310211841.2552361-3-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2026-03-12 17:31:53 +01:00
Eric Dumazet
c38b8f5f79 net: prevent NULL deref in ip[6]tunnel_xmit()
Blamed commit missed that both functions can be called with dev == NULL.

Also add unlikely() hints for these conditions that only fuzzers can hit.

Fixes: 6f1a9140ec ("net: add xmit recursion limit to tunnel xmit functions")
Signed-off-by: Eric Dumazet <edumazet@google.com>
CC: Weiming Shi <bestswngs@gmail.com>
Link: https://patch.msgid.link/20260312043908.2790803-1-edumazet@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-12 16:03:41 +01:00
Jens Axboe
c2c185be5c io_uring/kbuf: check if target buffer list is still legacy on recycle
There's a gap between when the buffer was grabbed and when it
potentially gets recycled, where if the list is empty, someone could've
upgraded it to a ring provided type. This can happen if the request
is forced via io-wq. The legacy recycling is missing checking if the
buffer_list still exists, and if it's of the correct type. Add those
checks.

Cc: stable@vger.kernel.org
Fixes: c7fb19428d ("io_uring: add support for ring mapped supplied buffers")
Reported-by: Keenan Dong <keenanat2000@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2026-03-12 08:59:25 -06:00
Ilpo Järvinen
a7b9ce39fb serial: 8250_dw: Ensure BUSY is deasserted
DW UART cannot write to LCR, DLL, and DLH while BUSY is asserted.
Existance of BUSY depends on uart_16550_compatible, if UART HW is
configured with it those registers can always be written.

There currently is dw8250_force_idle() which attempts to achieve
non-BUSY state by disabling FIFO, however, the solution is unreliable
when Rx keeps getting more and more characters.

Create a sequence of operations that ensures UART cannot keep BUSY
asserted indefinitely. The new sequence relies on enabling loopback mode
temporarily to prevent incoming Rx characters keeping UART BUSY.

Ensure no Tx in ongoing while the UART is switches into the loopback
mode (requires exporting serial8250_fifo_wait_for_lsr_thre() and adding
DMA Tx pause/resume functions).

According to tests performed by Adriana Nicolae <adriana@arista.com>,
simply disabling FIFO or clearing FIFOs only once does not always
ensure BUSY is deasserted but up to two tries may be needed. This could
be related to ongoing Rx of a character (a guess, not known for sure).
Therefore, retry FIFO clearing a few times (retry limit 4 is arbitrary
number but using, e.g., p->fifosize seems overly large). Tests
performed by others did not exhibit similar challenge but it does not
seem harmful to leave the FIFO clearing loop in place for all DW UARTs
with BUSY functionality.

Use the new dw8250_idle_enter/exit() to do divisor writes and LCR
writes. In case of plain LCR writes, opportunistically try to update
LCR first and only invoke dw8250_idle_enter() if the write did not
succeed (it has been observed that in practice most LCR writes do
succeed without complications).

This issue was first reported by qianfan Zhao who put lots of debugging
effort into understanding the solution space.

Fixes: c49436b657 ("serial: 8250_dw: Improve unwritable LCR workaround")
Fixes: 7d4008ebb1 ("tty: add a DesignWare 8250 driver")
Cc: stable <stable@kernel.org>
Reported-by: qianfan Zhao <qianfanguijin@163.com>
Link: https://lore.kernel.org/linux-serial/289bb78a-7509-1c5c-2923-a04ed3b6487d@163.com/
Reported-by: Adriana Nicolae <adriana@arista.com>
Link: https://lore.kernel.org/linux-serial/20250819182322.3451959-1-adriana@arista.com/
Reported-by: Bandal, Shankar <shankar.bandal@intel.com>
Tested-by: Bandal, Shankar <shankar.bandal@intel.com>
Tested-by: Murthy, Shanth <shanth.murthy@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://patch.msgid.link/20260203171049.4353-8-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-12 15:34:29 +01:00
Ilpo Järvinen
e0a368ae79 serial: 8250: Add late synchronize_irq() to shutdown to handle DW UART BUSY
When DW UART is !uart_16550_compatible, it can indicate BUSY at any
point (when under constant Rx pressure) unless a complex sequence of
steps is performed. Any LCR write can run a foul with the condition
that prevents writing LCR while the UART is BUSY, which triggers
BUSY_DETECT interrupt that seems unmaskable using IER bits.

Normal flow is that dw8250_handle_irq() handles BUSY_DETECT condition
by reading USR register. This BUSY feature, however, breaks the
assumptions made in serial8250_do_shutdown(), which runs
synchronize_irq() after clearing IER and assumes no interrupts can
occur after that point but then proceeds to update LCR, which on DW
UART can trigger an interrupt.

If serial8250_do_shutdown() releases the interrupt handler before the
handler has run and processed the BUSY_DETECT condition by read the USR
register, the IRQ is not deasserted resulting in interrupt storm that
triggers "irq x: nobody cared" warning leading to disabling the IRQ.

Add late synchronize_irq() into serial8250_do_shutdown() to ensure
BUSY_DETECT from DW UART is handled before port's interrupt handler is
released. Alternative would be to add DW UART specific shutdown
function but it would mostly duplicate the generic code and the extra
synchronize_irq() seems pretty harmless in serial8250_do_shutdown().

Fixes: 7d4008ebb1 ("tty: add a DesignWare 8250 driver")
Cc: stable <stable@kernel.org>
Reported-by: Bandal, Shankar <shankar.bandal@intel.com>
Tested-by: Bandal, Shankar <shankar.bandal@intel.com>
Tested-by: Murthy, Shanth <shanth.murthy@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://patch.msgid.link/20260203171049.4353-7-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-12 15:34:25 +01:00
Ilpo Järvinen
73a4ed8f9e serial: 8250_dw: Rework IIR_NO_INT handling to stop interrupt storm
INTC10EE UART can end up into an interrupt storm where it reports
IIR_NO_INT (0x1). If the storm happens during active UART operation, it
is promptly stopped by IIR value change due to Rx or Tx events.
However, when there is no activity, either due to idle serial line or
due to specific circumstances such as during shutdown that writes
IER=0, there is nothing to stop the storm.

During shutdown the storm is particularly problematic because
serial8250_do_shutdown() calls synchronize_irq() that will hang in
waiting for the storm to finish which never happens.

This problem can also result in triggering a warning:

  irq 45: nobody cared (try booting with the "irqpoll" option)
  [...snip...]
  handlers:
    serial8250_interrupt
  Disabling IRQ #45

Normal means to reset interrupt status by reading LSR, MSR, USR, or RX
register do not result in the UART deasserting the IRQ.

Add a quirk to INTC10EE UARTs to enable Tx interrupts if UART's Tx is
currently empty and inactive. Rework IIR_NO_INT to keep track of the
number of consecutive IIR_NO_INT, and on fourth one perform the quirk.
Enabling Tx interrupts should change IIR value from IIR_NO_INT to
IIR_THRI which has been observed to stop the storm.

Fixes: e92fad0249 ("serial: 8250_dw: Add ACPI ID for Granite Rapids-D UART")
Cc: stable <stable@kernel.org>
Reported-by: Bandal, Shankar <shankar.bandal@intel.com>
Tested-by: Bandal, Shankar <shankar.bandal@intel.com>
Tested-by: Murthy, Shanth <shanth.murthy@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://patch.msgid.link/20260203171049.4353-6-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-12 15:34:22 +01:00
Ilpo Järvinen
883c5a2bc9 serial: 8250_dw: Rework dw8250_handle_irq() locking and IIR handling
dw8250_handle_irq() takes port's lock multiple times with no good
reason to release it in between and calls serial8250_handle_irq()
that also takes port's lock.

Take port's lock only once in dw8250_handle_irq() and use
serial8250_handle_irq_locked() to avoid releasing port's lock in
between.

As IIR_NO_INT check in serial8250_handle_irq() was outside of port's
lock, it has to be done already in dw8250_handle_irq().

DW UART can, in addition to IIR_NO_INT, report BUSY_DETECT (0x7) which
collided with the IIR_NO_INT (0x1) check in serial8250_handle_irq()
(because & is used instead of ==) meaning that no other work is done by
serial8250_handle_irq() during an BUSY_DETECT interrupt.

This allows reorganizing code in dw8250_handle_irq() to do both
IIR_NO_INT and BUSY_DETECT handling right at the start simplifying
the logic.

Tested-by: Bandal, Shankar <shankar.bandal@intel.com>
Tested-by: Murthy, Shanth <shanth.murthy@intel.com>
Cc: stable <stable@kernel.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://patch.msgid.link/20260203171049.4353-5-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-12 15:34:18 +01:00
Ilpo Järvinen
8324a54f60 serial: 8250: Add serial8250_handle_irq_locked()
8250_port exports serial8250_handle_irq() to HW specific 8250 drivers.
It takes port's lock within but a HW specific 8250 driver may want to
take port's lock itself, do something, and then call the generic
handler in 8250_port but to do that, the caller has to release port's
lock for no good reason.

Introduce serial8250_handle_irq_locked() which a HW specific driver can
call while already holding port's lock.

As this is new export, put it straight into a namespace (where all 8250
exports should eventually be moved).

Tested-by: Bandal, Shankar <shankar.bandal@intel.com>
Tested-by: Murthy, Shanth <shanth.murthy@intel.com>
Cc: stable <stable@kernel.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://patch.msgid.link/20260203171049.4353-4-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-12 15:34:15 +01:00
Ilpo Järvinen
8002d6d6d0 serial: 8250_dw: Avoid unnecessary LCR writes
When DW UART is configured with BUSY flag, LCR writes may not always
succeed which can make any LCR write complex and very expensive.
Performing write directly can trigger IRQ and the driver has to perform
complex and distruptive sequence while retrying the write.

Therefore, it's better to avoid doing LCR write that would not change
the value of the LCR register. Add LCR write avoidance code into the
8250_dw driver's .serial_out() functions.

Reported-by: Bandal, Shankar <shankar.bandal@intel.com>
Tested-by: Bandal, Shankar <shankar.bandal@intel.com>
Tested-by: Murthy, Shanth <shanth.murthy@intel.com>
Cc: stable <stable@kernel.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://patch.msgid.link/20260203171049.4353-3-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-12 15:34:12 +01:00
Ilpo Järvinen
59a33d83bb serial: 8250: Protect LCR write in shutdown
The 8250_dw driver needs to potentially perform very complex operations
during LCR writes because its BUSY handling prevents updates to LCR
while UART is BUSY (which is not fully under our control without those
complex operations). Thus, LCR writes should occur under port's lock.

Move LCR write under port's lock in serial8250_do_shutdown(). Also
split the LCR RMW so that the logic is on a separate line for clarity.

Reported-by: Bandal, Shankar <shankar.bandal@intel.com>
Tested-by: Bandal, Shankar <shankar.bandal@intel.com>
Tested-by: Murthy, Shanth <shanth.murthy@intel.com>
Cc: stable <stable@kernel.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://patch.msgid.link/20260203171049.4353-2-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-12 15:34:07 +01:00
Martin Roukala (né Peres)
9c0072bc33 serial: 8250_pci: add support for the AX99100
This is found in popular brands such as StarTech.com or Delock, and has
been a source of frustration to quite a few people, if I can trust
Amazon comments complaining about Linux support via the official
out-of-the-tree driver.

Signed-off-by: Martin Roukala (né Peres) <martin.roukala@mupuf.org>
Cc: stable <stable@kernel.org>
Link: https://patch.msgid.link/20260309-8250_pci_ax99100-v1-1-3328bdfd8e94@mupuf.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-12 15:31:51 +01:00
Jiayuan Chen
455ce986fa serial: core: fix infinite loop in handle_tx() for PORT_UNKNOWN
uart_write_room() and uart_write() behave inconsistently when
xmit_buf is NULL (which happens for PORT_UNKNOWN ports that were
never properly initialized):

- uart_write_room() returns kfifo_avail() which can be > 0
- uart_write() checks xmit_buf and returns 0 if NULL

This inconsistency causes an infinite loop in drivers that rely on
tty_write_room() to determine if they can write:

  while (tty_write_room(tty) > 0) {
      written = tty->ops->write(...);
      // written is always 0, loop never exits
  }

For example, caif_serial's handle_tx() enters an infinite loop when
used with PORT_UNKNOWN serial ports, causing system hangs.

Fix by making uart_write_room() also check xmit_buf and return 0 if
it's NULL, consistent with uart_write().

Reproducer: https://gist.github.com/mrpre/d9a694cc0e19828ee3bc3b37983fde13

Signed-off-by: Jiayuan Chen <jiayuan.chen@shopee.com>
Cc: stable <stable@kernel.org>
Link: https://patch.msgid.link/20260204074327.226165-1-jiayuan.chen@linux.dev
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-12 15:31:41 +01:00
Maciej Andrzejewski ICEYE
d54801cd50 serial: uartlite: fix PM runtime usage count underflow on probe
ulite_probe() calls pm_runtime_put_autosuspend() at the end of probe
without holding a corresponding PM runtime reference for non-console
ports.

During ulite_assign(), uart_add_one_port() triggers uart_configure_port()
which calls ulite_pm() via uart_change_pm(). For non-console ports, the
UART core performs a balanced get/put cycle:

  uart_change_pm(ON)  -> ulite_pm() -> pm_runtime_get_sync()        +1
  uart_change_pm(OFF) -> ulite_pm() -> pm_runtime_put_autosuspend() -1

This leaves no spare reference for the pm_runtime_put_autosuspend() at
the end of probe. The PM runtime core prevents the count from actually
going below zero, and instead triggers a
"Runtime PM usage count underflow!" warning.

For console ports the bug is masked: the UART core skips the
uart_change_pm(OFF) call, so the UART core's unbalanced get happens to
pair with probe's trailing put.

Add pm_runtime_get_noresume() before pm_runtime_enable() to take an
explicit probe-owned reference that the trailing
pm_runtime_put_autosuspend() can release. This ensures a correct usage
count regardless of whether the port is a console.

Fixes: 5bbe10a694 ("tty: serial: uartlite: Add runtime pm support")
Cc: stable <stable@kernel.org>
Signed-off-by: Maciej Andrzejewski ICEYE <maciej.andrzejewski@m-works.net>
Link: https://patch.msgid.link/20260305123746.4152800-1-maciej.andrzejewski@m-works.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-12 15:30:51 +01:00
Peng Zhang
24b98e8664 serial: 8250: always disable IRQ during THRE test
commit 039d492637 ("serial: 8250: Toggle IER bits on only after irq
has been set up") moved IRQ setup before the THRE test, in combination
with commit 205d300aea ("serial: 8250: change lock order in
serial8250_do_startup()") the interrupt handler can run during the
test and race with its IIR reads. This can produce wrong THRE test
results and cause spurious registration of the
serial8250_backup_timeout timer. Unconditionally disable the IRQ for
the short duration of the test and re-enable it afterwards to avoid
the race.

Fixes: 039d492637 ("serial: 8250: Toggle IER bits on only after irq has been set up")
Depends-on: 205d300aea ("serial: 8250: change lock order in serial8250_do_startup()")
Cc: stable <stable@kernel.org>
Signed-off-by: Peng Zhang <zhangpeng.00@bytedance.com>
Reviewed-by: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Alban Bedel <alban.bedel@lht.dlh.de>
Tested-by: Maximilian Lueer <maximilian.lueer@lht.dlh.de>
Link: https://patch.msgid.link/20260224121639.579404-1-alban.bedel@lht.dlh.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-12 15:30:30 +01:00
Raul E Rangel
a424a34b8f serial: 8250: Fix TX deadlock when using DMA
`dmaengine_terminate_async` does not guarantee that the
`__dma_tx_complete` callback will run. The callback is currently the
only place where `dma->tx_running` gets cleared. If the transaction is
canceled and the callback never runs, then `dma->tx_running` will never
get cleared and we will never schedule new TX DMA transactions again.

This change makes it so we clear `dma->tx_running` after we terminate
the DMA transaction. This is "safe" because `serial8250_tx_dma_flush`
is holding the UART port lock. The first thing the callback does is also
grab the UART port lock, so access to `dma->tx_running` is serialized.

Fixes: 9e512eaaf8 ("serial: 8250: Fix fifo underflow on flush")
Cc: stable <stable@kernel.org>
Signed-off-by: Raul E Rangel <rrangel@google.com>
Link: https://patch.msgid.link/20260209135815.1.I16366ecb0f62f3c96fe3dd5763fcf6f3c2b4d8cd@changeid
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-12 15:29:59 +01:00
Miguel Ojeda
592c61f3bf rust: kbuild: allow unused_features
Starting with the upcoming Rust 1.96.0 (to be released 2026-05-28),
`rustc` introduces the new lint `unused_features` [1], which warns [2]:

    warning: feature `used_with_arg` is declared but not used
     --> <crate attribute>:1:93
      |
    1 | #![feature(asm_const,asm_goto,arbitrary_self_types,lint_reasons,offset_of_nested,raw_ref_op,used_with_arg)]
      |                                                                                             ^^^^^^^^^^^^^
      |
      = note: `#[warn(unused_features)]` (part of `#[warn(unused)]`) on by default

The original goal of using `-Zcrate-attr` automatically was that there
is a consistent set of features enabled and managed globally for all
Rust kernel code (modulo exceptions like the `rust/` crated).

While we could require crates to enable features manually (even if we
still keep the `-Zallow-features=` list, i.e. removing the `-Zcrate-attr`
list), it is not really worth making all developers worry about it just
for a new lint.

The features are expected to eventually become stable anyway (most already
did), and thus having to remove features in every file that may use them
is not worth it either.

Thus just allow the new lint globally.

The lint actually existed for a long time, which is why `rustc` does
not complain about an unknown lint in the stable versions we support,
but it was "disabled" years ago [3], and now it was made to work again.

For extra context, the new implementation of the lint has already been
improved to avoid linting about features that became stable thanks to
Benno's report and the ensuing discussion [4] [5], but while that helps,
it is still the case that we may have features enabled that are not used
for one reason or another in a particular crate.

Cc: stable@vger.kernel.org # Needed in 6.12.y and later (Rust is pinned in older LTSs).
Link: https://github.com/rust-lang/rust/pull/152164 [1]
Link: https://github.com/Rust-for-Linux/pin-init/pull/114 [2]
Link: https://github.com/rust-lang/rust/issues/44232 [3]
Link: https://github.com/rust-lang/rust/issues/153523 [4]
Link: https://github.com/rust-lang/rust/pull/153610 [5]
Reviewed-by: Benno Lossin <lossin@kernel.org>
Reviewed-by: Gary Guo <gary@garyguo.net>
Link: https://patch.msgid.link/20260312111014.74198-1-ojeda@kernel.org
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
2026-03-12 15:15:16 +01:00
Adam Ford
b22c526569 pmdomain: mediatek: Fix power domain count
The wrong value of the number of domains is wrong which leads to
failures when trying to enumerate nested power domains.

 PM: genpd_xlate_onecell: invalid domain index 0
 PM: genpd_xlate_onecell: invalid domain index 1
 PM: genpd_xlate_onecell: invalid domain index 3
 PM: genpd_xlate_onecell: invalid domain index 4
 PM: genpd_xlate_onecell: invalid domain index 5
 PM: genpd_xlate_onecell: invalid domain index 13
 PM: genpd_xlate_onecell: invalid domain index 14

Attempts to use these power domains fail, so fix this by
using the correct value of calculated power domains.

Signed-off-by: Adam Ford <aford173@gmail.com>
Fixes: 88914db077 ("pmdomain: mediatek: Add support for Hardware Voter power domains")
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Cc: stable@vger.kernel.org
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2026-03-12 14:51:52 +01:00
Aditya Garg
1965445e13 HID: appletb-kbd: add .resume method in PM
Upon resuming from suspend, the Touch Bar driver was missing a resume
method in order to restore the original mode the Touch Bar was on before
suspending. It is the same as the reset_resume method.

[jkosina@suse.com: rebased on top of the pm_ptr() conversion]
Cc: stable@vger.kernel.org
Signed-off-by: Aditya Garg <gargaditya08@live.com>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
2026-03-12 14:01:44 +01:00
Adrian Freund
70031e70ca HID: logitech-hidpp: Enable MX Master 4 over bluetooth
The Logitech MX Master 4 can be connected over bluetooth or through a
Logitech Bolt receiver. This change adds support for non-standard HID
features, such as high resolution scrolling when the mouse is connected
over bluetooth.
Because no Logitech Bolt receiver driver exists yet those features
won't be available when the mouse is connected through the receiver.

Signed-off-by: Adrian Freund <adrian@freund.io>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
2026-03-12 13:43:37 +01:00
Takashi Iwai
9250673cf2 Merge tag 'asoc-fix-v7.0-rc3' of https://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-linus
ASoC: Fixes for v7.0

Quite a large pull request, but nothing too concerning here - everything
is fairly small.  We've got a couple of smaller core fixes for races on
card teardown from Matteo Cotifava, a fix for handling dodgy DMI
information generated by u-boot, some driver specific fixes and some new
device IDs for Tegra.
2026-03-12 12:59:28 +01:00
Dapeng Mi
e7fcc54524 perf/x86/intel: Fix OMR snoop information parsing issues
When omr_source is 0x2, the omr_snoop (bit[6]) and omr_promoted (bit[7])
fields are combined to represent the snoop information. However, the
omr_promoted field was not left-shifted by 1 bit, resulting in incorrect
snoop information.

Besides, the snoop information parsing is not accurate for some OMR
sources, like the snoop information should be SNOOP_NONE for these memory
access (omr_source >= 7) instead of SNOOP_HIT.

Fix these issues.

Closes: https://lore.kernel.org/all/CAP-5=fW4zLWFw1v38zCzB9-cseNSTTCtup=p2SDxZq7dPayVww@mail.gmail.com/
Fixes: d2bdcde962 ("perf/x86/intel: Add support for PEBS memory auxiliary info field in DMR")
Reported-by: Ian Rogers <irogers@google.com>
Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Ian Rogers <irogers@google.com>
Link: https://patch.msgid.link/20260311075201.2951073-1-dapeng1.mi@linux.intel.com
2026-03-12 11:29:16 +01:00
Dapeng Mi
1d07bbd7ea perf/x86/intel: Add missing branch counters constraint apply
When running the command:
'perf record -e "{instructions,instructions:p}" -j any,counter sleep 1',
a "shift-out-of-bounds" warning is reported on CWF.

  UBSAN: shift-out-of-bounds in /kbuild/src/consumer/arch/x86/events/intel/lbr.c:970:15
  shift exponent 64 is too large for 64-bit type 'long long unsigned int'
  ......
  intel_pmu_lbr_counters_reorder.isra.0.cold+0x2a/0xa7
  intel_pmu_lbr_save_brstack+0xc0/0x4c0
  setup_arch_pebs_sample_data+0x114b/0x2400

The warning occurs because the second "instructions:p" event, which
involves branch counters sampling, is incorrectly programmed to fixed
counter 0 instead of the general-purpose (GP) counters 0-3 that support
branch counters sampling. Currently only GP counters 0-3 support branch
counters sampling on CWF, any event involving branch counters sampling
should be programed on GP counters 0-3. Since the counter index of fixed
counter 0 is 32, it leads to the "src" value in below code is right
shifted 64 bits and trigger the "shift-out-of-bounds" warning.

cnt = (src >> (order[j] * LBR_INFO_BR_CNTR_BITS)) & LBR_INFO_BR_CNTR_MASK;

The root cause is the loss of the branch counters constraint for the
new event in the branch counters sampling event group. Since it isn't
yet part of the sibling list. This results in the second
"instructions:p" event being programmed on fixed counter 0 incorrectly
instead of the appropriate GP counters 0-3.

To address this, we apply the missing branch counters constraint for
the last event in the group. Additionally, we introduce a new function,
`intel_set_branch_counter_constr()`, to apply the branch counters
constraint and avoid code duplication.

Fixes: 3374491619 ("perf/x86/intel: Support branch counters logging")
Reported-by: Xudong Hao <xudong.hao@intel.com>
Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260228053320.140406-2-dapeng1.mi@linux.intel.com
Cc: stable@vger.kernel.org
2026-03-12 11:29:16 +01:00
Peter Zijlstra
4b9ce67196 perf: Make sure to use pmu_ctx->pmu for groups
Oliver reported that x86_pmu_del() ended up doing an out-of-bound memory access
when group_sched_in() fails and needs to roll back.

This *should* be handled by the transaction callbacks, but he found that when
the group leader is a software event, the transaction handlers of the wrong PMU
are used. Despite the move_group case in perf_event_open() and group_sched_in()
using pmu_ctx->pmu.

Turns out, inherit uses event->pmu to clone the events, effectively undoing the
move_group case for all inherited contexts. Fix this by also making inherit use
pmu_ctx->pmu, ensuring all inherited counters end up in the same pmu context.

Similarly, __perf_event_read() should use equally use pmu_ctx->pmu for the
group case.

Fixes: bd27568117 ("perf: Rewrite core context handling")
Reported-by: Oliver Rosenberg <olrose55@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Ian Rogers <irogers@google.com>
Link: https://patch.msgid.link/20260309133713.GB606826@noisy.programming.kicks-ass.net
2026-03-12 11:29:16 +01:00
Peter Zijlstra
f1cac6ac62 x86/perf: Make sure to program the counter value for stopped events on migration
Both Mi Dapeng and Ian Rogers noted that not everything that sets HES_STOPPED
is required to EF_UPDATE. Specifically the 'step 1' loop of rescheduling
explicitly does EF_UPDATE to ensure the counter value is read.

However, then 'step 2' simply leaves the new counter uninitialized when
HES_STOPPED, even though, as noted above, the thing that stopped them might not
be aware it needs to EF_RELOAD -- since it didn't EF_UPDATE on stop.

One such location that is affected is throttling, throttle does pmu->stop(, 0);
and unthrottle does pmu->start(, 0); possibly restarting an uninitialized counter.

Fixes: a4eaf7f146 ("perf: Rework the PMU methods")
Reported-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Reported-by: Ian Rogers <irogers@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Link: https://patch.msgid.link/20260311204035.GX606826@noisy.programming.kicks-ass.net
2026-03-12 11:29:15 +01:00
Breno Leitao
8d5fae6011 perf/x86: Move event pointer setup earlier in x86_pmu_enable()
A production AMD EPYC system crashed with a NULL pointer dereference
in the PMU NMI handler:

  BUG: kernel NULL pointer dereference, address: 0000000000000198
  RIP: x86_perf_event_update+0xc/0xa0
  Call Trace:
   <NMI>
   amd_pmu_v2_handle_irq+0x1a6/0x390
   perf_event_nmi_handler+0x24/0x40

The faulting instruction is `cmpq $0x0, 0x198(%rdi)` with RDI=0,
corresponding to the `if (unlikely(!hwc->event_base))` check in
x86_perf_event_update() where hwc = &event->hw and event is NULL.

drgn inspection of the vmcore on CPU 106 showed a mismatch between
cpuc->active_mask and cpuc->events[]:

  active_mask: 0x1e (bits 1, 2, 3, 4)
  events[1]:   0xff1100136cbd4f38  (valid)
  events[2]:   0x0                 (NULL, but active_mask bit 2 set)
  events[3]:   0xff1100076fd2cf38  (valid)
  events[4]:   0xff1100079e990a90  (valid)

The event that should occupy events[2] was found in event_list[2]
with hw.idx=2 and hw.state=0x0, confirming x86_pmu_start() had run
(which clears hw.state and sets active_mask) but events[2] was
never populated.

Another event (event_list[0]) had hw.state=0x7 (STOPPED|UPTODATE|ARCH),
showing it was stopped when the PMU rescheduled events, confirming the
throttle-then-reschedule sequence occurred.

The root cause is commit 7e772a93eb ("perf/x86: Fix NULL event access
and potential PEBS record loss") which moved the cpuc->events[idx]
assignment out of x86_pmu_start() and into step 2 of x86_pmu_enable(),
after the PERF_HES_ARCH check. This broke any path that calls
pmu->start() without going through x86_pmu_enable() -- specifically
the unthrottle path:

  perf_adjust_freq_unthr_events()
    -> perf_event_unthrottle_group()
      -> perf_event_unthrottle()
        -> event->pmu->start(event, 0)
          -> x86_pmu_start()     // sets active_mask but not events[]

The race sequence is:

  1. A group of perf events overflows, triggering group throttle via
     perf_event_throttle_group(). All events are stopped: active_mask
     bits cleared, events[] preserved (x86_pmu_stop no longer clears
     events[] after commit 7e772a93eb).

  2. While still throttled (PERF_HES_STOPPED), x86_pmu_enable() runs
     due to other scheduling activity. Stopped events that need to
     move counters get PERF_HES_ARCH set and events[old_idx] cleared.
     In step 2 of x86_pmu_enable(), PERF_HES_ARCH causes these events
     to be skipped -- events[new_idx] is never set.

  3. The timer tick unthrottles the group via pmu->start(). Since
     commit 7e772a93eb removed the events[] assignment from
     x86_pmu_start(), active_mask[new_idx] is set but events[new_idx]
     remains NULL.

  4. A PMC overflow NMI fires. The handler iterates active counters,
     finds active_mask[2] set, reads events[2] which is NULL, and
     crashes dereferencing it.

Move the cpuc->events[hwc->idx] assignment in x86_pmu_enable() to
before the PERF_HES_ARCH check, so that events[] is populated even
for events that are not immediately started. This ensures the
unthrottle path via pmu->start() always finds a valid event pointer.

Fixes: 7e772a93eb ("perf/x86: Fix NULL event access and potential PEBS record loss")
Signed-off-by: Breno Leitao <leitao@debian.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: stable@vger.kernel.org
Link: https://patch.msgid.link/20260310-perf-v2-1-4a3156fce43c@debian.org
2026-03-12 11:29:15 +01:00
Bart Van Assche
14de1552a4 include/linux/local_lock_internal.h: Make this header file again compatible with sparse
There are two versions of the __this_cpu_local_lock() definitions in
include/linux/local_lock_internal.h: one version that relies on the
Clang overloading functionality and another version that does not.
Select the latter version when using sparse. This patch fixes the
following errors reported by sparse:

   include/linux/local_lock_internal.h:331:40: sparse: sparse: multiple definitions for function '__this_cpu_local_lock'
   include/linux/local_lock_internal.h:325:37: sparse:  the previous one is here

Closes: https://lore.kernel.org/oe-kbuild-all/202603062334.wgI5htP0-lkp@intel.com/
Fixes: d3febf16de ("locking/local_lock: Support Clang's context analysis")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Marco Elver <elver@google.com>
Link: https://patch.msgid.link/20260311231455.1961413-1-bvanassche@acm.org
2026-03-12 11:29:11 +01:00
John Hubbard
487f9b3dc6 rust: cpufreq: suppress clippy::double_parens in Policy doctest
The kernel fmt! proc macro wraps each format argument as &(arg). Passing a
tuple such as (a, b) produces &((a, b)) after expansion. Clippy flags that
as double_parens, but it is a false positive fixed in Clippy 1.92 [1] [2].

Suppress the warning on the affected doctest function with a reason
attribute so it can be removed once the minimum toolchain moves past 1.92.

[ We may end up deciding to support per-version Clippy lints, in which
  case we will need [3].

  In the future, if [4] gets fixed, we may be able to use
  `Delimiter::None` as Gary suggested in [5].

  Link: https://lore.kernel.org/rust-for-linux/20260307170929.153892-1-ojeda@kernel.org/ [3]
  Link: https://github.com/rust-lang/rust/issues/67062 [4]
  Link: https://lore.kernel.org/rust-for-linux/DGUA5GY2DGYN.3PG0FKLG7GFN1@garyguo.net/ [5]

    - Miguel ]

Link: https://github.com/rust-lang/rust-clippy/issues/15852 [1]
Link: https://github.com/rust-lang/rust-clippy/pull/15939 [2]
Suggested-by: Gary Guo <gary@garyguo.net>
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Link: https://patch.msgid.link/20260312041934.362840-2-jhubbard@nvidia.com
[ Reworded to replace GitHub-like short link with full URLs in Link tags.
  Reworded reason string to match the style of a couple others we have
  elsewhere. - Miguel ]
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
2026-03-12 11:01:23 +01:00
Benno Lossin
fdbaa9d2b7 rust: pin-init: replace shadowed return token by unsafe-to-create token
We use a unit struct `__InitOk` in the closure generated by the
initializer macros as the return value. We shadow it by creating a
struct with the same name again inside of the closure, preventing early
returns of `Ok` in the initializer (before all fields have been
initialized).

In the face of Type Alias Impl Trait (TAIT) and the next trait solver,
this solution no longer works [1]. The shadowed struct can be named
through type inference. In addition, there is an RFC proposing to add
the feature of path inference to Rust, which would similarly allow [2].

Thus remove the shadowed token and replace it with an `unsafe` to create
token.

The reason we initially used the shadowing solution was because an
alternative solution used a builder pattern. Gary writes [3]:

    In the early builder-pattern based InitOk, having a single InitOk
    type for token is unsound because one can launder an InitOk token
    used for one place to another initializer. I used a branded lifetime
    solution, and then you figured out that using a shadowed type would
    work better because nobody could construct it at all.

The laundering issue does not apply to the approach we ended up with
today.

With this change, the example by Tim Chirananthavat in [1] no longer
compiles and results in this error:

    error: cannot construct `pin_init::__internal::InitOk` with struct literal syntax due to private fields
      --> src/main.rs:26:17
       |
    26 |                 InferredType {}
       |                 ^^^^^^^^^^^^
       |
       = note: private field `0` that was not provided
    help: you might have meant to use the `new` associated function
       |
    26 -                 InferredType {}
    26 +                 InferredType::new()
       |

Applying the suggestion of using the `::new()` function, results in
another expected error:

    error[E0133]: call to unsafe function `pin_init::__internal::InitOk::new` is unsafe and requires unsafe block
      --> src/main.rs:26:17
       |
    26 |                 InferredType::new()
       |                 ^^^^^^^^^^^^^^^^^^^ call to unsafe function
       |
       = note: consult the function's documentation for information on how to avoid undefined behavior

Reported-by: Tim Chirananthavat <theemathas@gmail.com>
Link: https://github.com/rust-lang/rust/issues/153535 [1]
Link: https://github.com/rust-lang/rfcs/pull/3444#issuecomment-4016145373 [2]
Link: https://github.com/rust-lang/rust/issues/153535#issuecomment-4017620804 [3]
Fixes: fc6c6baa1f ("rust: init: add initialization macros")
Cc: stable@vger.kernel.org
Signed-off-by: Benno Lossin <lossin@kernel.org>
Reviewed-by: Alice Ryhl <aliceryhl@google.com>
Reviewed-by: Gary Guo <gary@garyguo.net>
Link: https://patch.msgid.link/20260311105056.1425041-1-lossin@kernel.org
[ Added period as mentioned. - Miguel ]
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
2026-03-12 08:46:17 +01:00
Sabrina Dubroca
d87f8bc47f xfrm: avoid RCU warnings around the per-netns netlink socket
net->xfrm.nlsk is used in 2 types of contexts:
 - fully under RCU, with rcu_read_lock + rcu_dereference and a NULL check
 - in the netlink handlers, with requests coming from a userspace socket

In the 2nd case, net->xfrm.nlsk is guaranteed to stay non-NULL and the
object is alive, since we can't enter the netns destruction path while
the user socket holds a reference on the netns.

After adding the __rcu annotation to netns_xfrm.nlsk (which silences
sparse warnings in the RCU users and __net_init code), we need to tell
sparse that the 2nd case is safe. Add a helper for that.

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2026-03-12 07:16:02 +01:00
Sabrina Dubroca
103b4f5b40 xfrm: add rcu_access_pointer to silence sparse warning for xfrm_input_afinfo
xfrm_input_afinfo is __rcu, we should use rcu_access_pointer to avoid
a sparse warning:
net/xfrm/xfrm_input.c:78:21: error: incompatible types in comparison expression (different address spaces):
net/xfrm/xfrm_input.c:78:21:    struct xfrm_input_afinfo const [noderef] __rcu *
net/xfrm/xfrm_input.c:78:21:    struct xfrm_input_afinfo const *

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2026-03-12 07:15:57 +01:00
Sabrina Dubroca
2da6901866 xfrm: policy: silence sparse warning in xfrm_policy_unregister_afinfo
xfrm_policy_afinfo is __rcu, use rcu_access_pointer to silence:

net/xfrm/xfrm_policy.c:4152:43: error: incompatible types in comparison expression (different address spaces):
net/xfrm/xfrm_policy.c:4152:43:    struct xfrm_policy_afinfo const [noderef] __rcu *
net/xfrm/xfrm_policy.c:4152:43:    struct xfrm_policy_afinfo const *

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2026-03-12 07:15:50 +01:00
Sabrina Dubroca
b1f9c67781 xfrm: policy: fix sparse warnings in xfrm_policy_{init,fini}
In xfrm_policy_init:
add rcu_assign_pointer to fix warning:
net/xfrm/xfrm_policy.c:4238:29: warning: incorrect type in assignment (different address spaces)
net/xfrm/xfrm_policy.c:4238:29:    expected struct hlist_head [noderef] __rcu *table
net/xfrm/xfrm_policy.c:4238:29:    got struct hlist_head *

add rcu_dereference_protected to silence warning:
net/xfrm/xfrm_policy.c:4265:36: warning: incorrect type in argument 1 (different address spaces)
net/xfrm/xfrm_policy.c:4265:36:    expected struct hlist_head *n
net/xfrm/xfrm_policy.c:4265:36:    got struct hlist_head [noderef] __rcu *table

The netns is being created, no concurrent access is possible yet.

In xfrm_policy_fini, net is going away, there shouldn't be any
concurrent changes to the hashtables, so we can use
rcu_dereference_protected to silence warnings:
net/xfrm/xfrm_policy.c:4291:17: warning: incorrect type in argument 1 (different address spaces)
net/xfrm/xfrm_policy.c:4291:17:    expected struct hlist_head const *h
net/xfrm/xfrm_policy.c:4291:17:    got struct hlist_head [noderef] __rcu *table
net/xfrm/xfrm_policy.c:4292:36: warning: incorrect type in argument 1 (different address spaces)
net/xfrm/xfrm_policy.c:4292:36:    expected struct hlist_head *n
net/xfrm/xfrm_policy.c:4292:36:    got struct hlist_head [noderef] __rcu *table

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2026-03-12 07:15:43 +01:00
Sabrina Dubroca
05b8673963 xfrm: state: silence sparse warnings during netns exit
Silence sparse warnings in xfrm_state_fini:
net/xfrm/xfrm_state.c:3327:9: warning: incorrect type in argument 1 (different address spaces)
net/xfrm/xfrm_state.c:3327:9:    expected struct hlist_head const *h
net/xfrm/xfrm_state.c:3327:9:    got struct hlist_head [noderef] __rcu *state_byseq

Add xfrm_state_deref_netexit() to wrap those calls. The netns is going
away, we don't have to worry about the state_by* pointers being
changed behind our backs.

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2026-03-12 07:15:38 +01:00
Sabrina Dubroca
f468fdd52b xfrm: remove rcu/state_hold from xfrm_state_lookup_spi_proto
xfrm_state_lookup_spi_proto is called under xfrm_state_lock by
xfrm_alloc_spi, no need to take a reference on the state and pretend
to be under RCU.

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2026-03-12 07:15:33 +01:00
Sabrina Dubroca
33cefb76a8 xfrm: state: add xfrm_state_deref_prot to state_by* walk under lock
We're under xfrm_state_lock for all those walks, we can use
xfrm_state_deref_prot to silence sparse warnings such as:

net/xfrm/xfrm_state.c:933:17: warning: dereference of noderef expression

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2026-03-12 07:15:26 +01:00
Sabrina Dubroca
55b5bc0314 xfrm: state: fix sparse warnings around XFRM_STATE_INSERT
We're under xfrm_state_lock in all those cases, use
xfrm_state_deref_prot(state_by*) to avoid sparse warnings:

net/xfrm/xfrm_state.c:2597:25: warning: cast removes address space '__rcu' of expression
net/xfrm/xfrm_state.c:2597:25: warning: incorrect type in argument 2 (different address spaces)
net/xfrm/xfrm_state.c:2597:25:    expected struct hlist_head *h
net/xfrm/xfrm_state.c:2597:25:    got struct hlist_head [noderef] __rcu *

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2026-03-12 07:15:19 +01:00
Sabrina Dubroca
e2f845f672 xfrm: state: fix sparse warnings in xfrm_state_init
Use rcu_assign_pointer, and tmp variables for freeing on the error
path without accessing net->xfrm.state_by*.

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2026-03-12 07:15:11 +01:00
Sabrina Dubroca
9f455aac17 xfrm: state: fix sparse warnings on xfrm_state_hold_rcu
In all callers, x is not an __rcu pointer. We can drop the annotation to
avoid sparse warnings:

net/xfrm/xfrm_state.c:58:39: warning: incorrect type in argument 1 (different address spaces)
net/xfrm/xfrm_state.c:58:39:    expected struct refcount_struct [usertype] *r
net/xfrm/xfrm_state.c:58:39:    got struct refcount_struct [noderef] __rcu *
net/xfrm/xfrm_state.c:1166:42: warning: incorrect type in argument 1 (different address spaces)
net/xfrm/xfrm_state.c:1166:42:    expected struct xfrm_state [noderef] __rcu *x
net/xfrm/xfrm_state.c:1166:42:    got struct xfrm_state *[assigned] x
(repeated for each caller)

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2026-03-12 07:14:56 +01:00
Sayali Patil
146c9ab38b powerpc/selftests/copyloops: extend selftest to exercise __copy_tofrom_user_power7_vmx
The new PowerPC VMX fast path (__copy_tofrom_user_power7_vmx) is not
exercised by existing copyloops selftests. This patch updates
the selftest to exercise the VMX variant, ensuring the VMX copy path
is validated.

Changes include:
  - COPY_LOOP=test___copy_tofrom_user_power7_vmx with -D VMX_TEST is used
    in existing selftest build targets.
  - Inclusion of ../utils.c to provide get_auxv_entry() for hardware
    feature detection.
  - At runtime, the test skips execution if Altivec is not available.
  - Copy sizes above VMX_COPY_THRESHOLD are used to ensure the VMX
    path is taken.

This enables validation of the VMX fast path without affecting systems
that do not support Altivec.

Signed-off-by: Sayali Patil <sayalip@linux.ibm.com>
Tested-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Link: https://patch.msgid.link/20260304122201.153049-2-sayalip@linux.ibm.com
2026-03-12 11:03:48 +05:30
Sayali Patil
6bc9c0a905 powerpc: fix KUAP warning in VMX usercopy path
On powerpc with PREEMPT_FULL or PREEMPT_LAZY and function tracing enabled,
KUAP warnings can be triggered from the VMX usercopy path under memory
stress workloads.

KUAP requires that no subfunctions are called once userspace access has
been enabled. The existing VMX copy implementation violates this
requirement by invoking enter_vmx_usercopy() from the assembly path after
userspace access has already been enabled. If preemption occurs
in this window, the AMR state may not be preserved correctly,
leading to unexpected userspace access state and resulting in
KUAP warnings.

Fix this by restructuring the VMX usercopy flow so that VMX selection
and VMX state management are centralized in raw_copy_tofrom_user(),
which is invoked by the raw_copy_{to,from,in}_user() wrappers.

The new flow is:

  - raw_copy_{to,from,in}_user() calls raw_copy_tofrom_user()
  - raw_copy_tofrom_user() decides whether to use the VMX path
    based on size and CPU capability
  - Call enter_vmx_usercopy() before enabling userspace access
  - Enable userspace access as per the copy direction
    and perform the VMX copy
  - Disable userspace access as per the copy direction
  - Call exit_vmx_usercopy()
  - Fall back to the base copy routine if the VMX copy faults

With this change, the VMX assembly routines no longer perform VMX state
management or call helper functions; they only implement the
copy operations.
The previous feature-section based VMX selection inside
__copy_tofrom_user_power7() is removed, and a dedicated
__copy_tofrom_user_power7_vmx() entry point is introduced.

This ensures correct KUAP ordering, avoids subfunction calls
while KUAP is unlocked, and eliminates the warnings while preserving
the VMX fast path.

Fixes: de78a9c42a ("powerpc: Add a framework for Kernel Userspace Access Protection")
Reported-by: Shrikanth Hegde <sshegde@linux.ibm.com>
Closes: https://lore.kernel.org/all/20260109064917.777587-2-sshegde@linux.ibm.com/
Suggested-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
Reviewed-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
Co-developed-by: Aboorva Devarajan <aboorvad@linux.ibm.com>
Signed-off-by: Aboorva Devarajan <aboorvad@linux.ibm.com>
Signed-off-by: Sayali Patil <sayalip@linux.ibm.com>
Tested-by: Shrikanth Hegde <sshegde@linux.ibm.com>
Tested-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Link: https://patch.msgid.link/20260304122201.153049-1-sayalip@linux.ibm.com
2026-03-12 11:03:47 +05:30
Viktor Malik
e9bbfb4bfa powerpc, perf: Check that current->mm is alive before getting user callchain
It may happen that mm is already released, which leads to kernel panic.
This adds the NULL check for current->mm, similarly to
commit 20afc60f89 ("x86, perf: Check that current->mm is alive before getting user callchain").

I was getting this panic when running a profiling BPF program
(profile.py from bcc-tools):

    [26215.051935] Kernel attempted to read user page (588) - exploit attempt? (uid: 0)
    [26215.051950] BUG: Kernel NULL pointer dereference on read at 0x00000588
    [26215.051952] Faulting instruction address: 0xc00000000020fac0
    [26215.051957] Oops: Kernel access of bad area, sig: 11 [#1]
    [...]
    [26215.052049] Call Trace:
    [26215.052050] [c000000061da6d30] [c00000000020fc10] perf_callchain_user_64+0x2d0/0x490 (unreliable)
    [26215.052054] [c000000061da6dc0] [c00000000020f92c] perf_callchain_user+0x1c/0x30
    [26215.052057] [c000000061da6de0] [c0000000005ab2a0] get_perf_callchain+0x100/0x360
    [26215.052063] [c000000061da6e70] [c000000000573bc8] bpf_get_stackid+0x88/0xf0
    [26215.052067] [c000000061da6ea0] [c008000000042258] bpf_prog_16d4ab9ab662f669_do_perf_event+0xf8/0x274
    [...]

In addition, move storing the top-level stack entry to generic
perf_callchain_user to make sure the top-evel entry is always captured,
even if current->mm is NULL.

Fixes: 20002ded4d ("perf_counter: powerpc: Add callchain support")
Signed-off-by: Viktor Malik <vmalik@redhat.com>
Tested-by: Qiao Zhao <qzhao@redhat.com>
Tested-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
Reviewed-by: Saket Kumar Bhaskar <skb99@linux.ibm.com>
[Maddy: fixed message to avoid checkpatch format style error]
Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Link: https://patch.msgid.link/20260309144045.169427-1-vmalik@redhat.com
2026-03-12 11:02:55 +05:30
Ritesh Harjani (IBM)
0a8321dde0 powerpc/mem: Move CMA reservations to arch_mm_preinit
commit 4267739cab ("arch, mm: consolidate initialization of SPARSE memory model"),
changed the initialization order of "pageblock_order" from...
start_kernel()
    - setup_arch()
       - initmem_init()
         - sparse_init()
           - set_pageblock_order();  // this sets the pageblock_order
       - xxx_cma_reserve();

to...
start_kernel()
    - setup_arch()
       - xxx_cma_reserve();
    - mm_core_init_early()
       - free_area_init()
          - sparse_init()
             - set_pageblock_order() // this sets the pageblock_order.

So this means, pageblock_order is not initialized before these cma
reservation function calls, hence we are seeing CMA failures like...

[    0.000000] kvm_cma_reserve: reserving 3276 MiB for global area
[    0.000000] cma: pageblock_order not yet initialized. Called during early boot?
[    0.000000] cma: Failed to reserve 3276 MiB
....
[    0.000000][    T0] cma: pageblock_order not yet initialized. Called during early boot?
[    0.000000][    T0] cma: Failed to reserve 1024 MiB

This patch moves these CMA reservations to arch_mm_preinit() which
happens in mm_core_init() (which happens after pageblock_order is
initialized), but before the memblock moves the free memory to buddy.

Fixes: 4267739cab ("arch, mm: consolidate initialization of SPARSE memory model")
Suggested-by: Mike Rapoport <rppt@kernel.org>
Reported-and-tested-by: Sourabh Jain <sourabhjain@linux.ibm.com>
Closes: https://lore.kernel.org/linuxppc-dev/4c338a29-d190-44f3-8874-6cfa0a031f0b@linux.ibm.com/
Signed-off-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
Tested-by: Dan Horák <dan@danny.cz>
Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Link: https://patch.msgid.link/6e532cf0db5be99afbe20eed699163d5e86cd71f.1772303986.git.ritesh.list@gmail.com
2026-03-12 10:57:31 +05:30
Dave Airlie
50ae4ce2a6 Merge tag 'drm-msm-fixes-2026-03-06' of https://gitlab.freedesktop.org/drm/msm into drm-fixes
Fixes for v7.0:

Core:
- Adjusted msm_iommu_pagetable_prealloc_allocate() allocation type

DPU:
- Fixed blue screens on Hamoa laptops by reverting the LM reservation
- Fixed the size of the LM block on several platforms
- Dropped usage of %pK (again)
- Fixed smatch warning on SSPP v13+ code
- Fixed INTF_6 interrupts on Lemans

DSI:
- Fixed DSI PHY revision on Kaanapali
- Fixed pixel clock calculation for the bonded DSI mode panels with
  compression enabled

DT bindings:
- Fixed DisplayPort description on Glymur
- Fixed model name in SM8750 MDSS schema

GPU:
- Added MODULE_DEVICE_TABLE to the GPU driver
- Fix bogus protect error on X2-85
- Fix dma_free_attrs() buffer size
- Gen8 UBWC fix for Glymur

From: Rob Clark <rob.clark@oss.qualcomm.com>
Link: https://patch.msgid.link/CACSVV00wZ95gFDLfzJ0Ywb8rsjPSjZ1aHdwE4smnyuZ=Fg-g8Q@mail.gmail.com
Signed-off-by: Dave Airlie <airlied@redhat.com>
2026-03-12 14:38:07 +10:00
Mukesh R
0fc773b0e4 mshv: pass struct mshv_user_mem_region by reference
For unstated reasons, function mshv_partition_ioctl_set_memory passes
struct mshv_user_mem_region by value instead of by reference. Change
it to pass by reference.

Signed-off-by: Mukesh R <mrathor@linux.microsoft.com>
Reviewed-by: Michael Kelley <mhklinux@outlook.com>
Signed-off-by: Wei Liu <wei.liu@kernel.org>
2026-03-12 04:32:21 +00:00
Uros Bizjak
afeb96cb18 x86/hyperv: Use any general-purpose register when saving %cr2 and %cr8
hv_hvcrash_ctxt_save() in arch/x86/hyperv/hv_crash.c currently saves %cr2
and %cr8 using %eax ("=a"). This unnecessarily forces a specific register.
Update the inline assembly to use a general-purpose register ("=r") for
both %cr2 and %cr8. This makes the code more flexible for the compiler
while producing the same saved context contents.

No functional changes.

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Cc: K. Y. Srinivasan <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Wei Liu <wei.liu@kernel.org>
Cc: Dexuan Cui <decui@microsoft.com>
Cc: Long Li <longli@microsoft.com>
Cc: Thomas Gleixner <tglx@kernel.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Wei Liu <wei.liu@kernel.org>
2026-03-12 04:25:20 +00:00
Uros Bizjak
2536091d58 x86/hyperv: Use current_stack_pointer to avoid asm() in hv_hvcrash_ctxt_save()
Use current_stack_pointer to avoid asm() when saving %rsp to the
crash context memory in hv_hvcrash_ctxt_save(). The new code is
more readable and results in exactly the same object file.

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Cc: K. Y. Srinivasan <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Wei Liu <wei.liu@kernel.org>
Cc: Dexuan Cui <decui@microsoft.com>
Cc: Long Li <longli@microsoft.com>
Cc: Thomas Gleixner <tglx@kernel.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Wei Liu <wei.liu@kernel.org>
2026-03-12 04:25:19 +00:00
Uros Bizjak
3484127c19 x86/hyperv: Save segment registers directly to memory in hv_hvcrash_ctxt_save()
hv_hvcrash_ctxt_save() in arch/x86/hyperv/hv_crash.c currently saves
segment registers via a general-purpose register (%eax). Update the
code to save segment registers (cs, ss, ds, es, fs, gs) directly to
the crash context memory using movw. This avoids unnecessary use of
a general-purpose register, making the code simpler and more efficient.

The size of the corresponding object file improves as follows:

   text    data     bss     dec     hex filename
   4167     176     200    4543    11bf hv_crash-old.o
   4151     176     200    4527    11af hv_crash-new.o

No functional change occurs to the saved context contents; this is
purely a code-quality improvement.

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Cc: K. Y. Srinivasan <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Wei Liu <wei.liu@kernel.org>
Cc: Dexuan Cui <decui@microsoft.com>
Cc: Long Li <longli@microsoft.com>
Cc: Thomas Gleixner <tglx@kernel.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Wei Liu <wei.liu@kernel.org>
2026-03-12 04:25:19 +00:00
Alok Tiwari
87f7dff3ec octeontx2-af: devlink: fix NIX RAS reporter to use RAS interrupt status
The NIX RAS health report path uses nix_af_rvu_err when handling the
NIX_AF_RVU_RAS case, so the report prints the ERR interrupt status rather
than the RAS interrupt status.

Use nix_af_rvu_ras for the NIX_AF_RVU_RAS report.

Fixes: 5ed66306ea ("octeontx2-af: Add devlink health reporters for NIX")
Signed-off-by: Alok Tiwari <alok.a.tiwari@oracle.com>
Link: https://patch.msgid.link/20260310184824.1183651-2-alok.a.tiwari@oracle.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-11 20:39:19 -07:00
Alok Tiwari
dc26ca99b8 octeontx2-af: devlink: fix NIX RAS reporter recovery condition
The NIX RAS health reporter recovery routine checks nix_af_rvu_int to
decide whether to re-enable NIX_AF_RAS interrupts. This is the RVU
interrupt status field and is unrelated to RAS events, so the recovery
flow may incorrectly skip re-enabling NIX_AF_RAS interrupts.

Check nix_af_rvu_ras instead before writing NIX_AF_RAS_ENA_W1S.

Fixes: 5ed66306ea ("octeontx2-af: Add devlink health reporters for NIX")
Signed-off-by: Alok Tiwari <alok.a.tiwari@oracle.com>
Link: https://patch.msgid.link/20260310184824.1183651-1-alok.a.tiwari@oracle.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-11 20:39:19 -07:00
Chintan Vankar
840c9d13cb net: ethernet: ti: am65-cpsw-nuss: Fix rx_filter value for PTP support
The "rx_filter" member of "hwtstamp_config" structure is an enum field and
does not support bitwise OR combination of multiple filter values. It
causes error while linuxptp application tries to match rx filter version.
Fix this by storing the requested filter type in a new port field.

Fixes: 97248adb5a ("net: ti: am65-cpsw: Update hw timestamping filter for PTPv1 RX packets")
Signed-off-by: Chintan Vankar <c-vankar@ti.com>
Link: https://patch.msgid.link/20260310160940.109822-1-c-vankar@ti.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-11 20:28:37 -07:00
Shiraz Saleem
87c2302813 net/mana: Null service_wq on setup error to prevent double destroy
In mana_gd_setup() error path, set gc->service_wq to NULL after
destroy_workqueue() to match the cleanup in mana_gd_cleanup().
This prevents a use-after-free if the workqueue pointer is checked
after a failed setup.

Fixes: f975a09552 ("net: mana: Fix double destroy_workqueue on service rescan PCI path")
Signed-off-by: Shiraz Saleem <shirazsaleem@microsoft.com>
Signed-off-by: Konstantin Taranov <kotaranov@microsoft.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20260309172443.688392-1-kotaranov@linux.microsoft.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-11 20:21:45 -07:00
Jakub Kicinski
ead0540548 Merge tag 'nf-26-03-10' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf
Florian Westphal says:

====================
netfilter: updates for net

Due to large volume of backlogged patches its unlikely I will make the
2nd planned PR this week, so several legit fixes will be pushed back
to next week.  Sorry for the inconvenience but I am out of ideas and
alternatives.

1) syzbot managed to add/remove devices to a flowtable, due to a bug in
   the flowtable netdevice notifier this gets us a double-add and
   eventually UaF when device is removed again (we only expect one
   entry, duplicate remains past net_device end-of-life).
   From Phil Sutter, bug added in 6.16.

2) Yiming Qian reports another nf_tables transaction handling bug:
   in some cases error unwind misses to undo certain set elements,
   resulting in refcount underflow and use-after-free, bug added in 6.4.

3) Jenny Guanni Qu found out-of-bounds read in pipapo set type.
   While the value is never used, it still rightfully triggers KASAN
   splats.  Bug exists since this set type was added in 5.6.

4) a few x_tables modules contain copypastry tcp option parsing code which
    can read 1 byte past the option area.  This bug is ancient, fix from
    David Dull.

5) nfnetlink_queue leaks kernel memory if userspace provides bad
   NFQA_VLAN/NFQA_L2HDR attributes.  From Hyunwoo Kim, bug stems from
   from 4.7 days.

6) nfnetlink_cthelper has incorrect loop restart logic which may result
   in reading one pointer past end of array. From 3.6 days, fix also from
   Hyunwoo Kim.

7) xt_IDLETIMER v0 extension must reject working with timers added
   by revision v1, else we get list corruption. Bug added in v5.7.
   From Yifan Wu, Juefei Pu and Yuan Tan via Xin Lu.

* tag 'nf-26-03-10' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf:
  netfilter: xt_IDLETIMER: reject rev0 reuse of ALARM timer labels
  netfilter: nfnetlink_cthelper: fix OOB read in nfnl_cthelper_dump_table()
  netfilter: nfnetlink_queue: fix entry leak in bridge verdict error path
  netfilter: x_tables: guard option walkers against 1-byte tail reads
  netfilter: nft_set_pipapo: fix stack out-of-bounds read in pipapo_drop()
  netfilter: nf_tables: always walk all pending catchall elements
  netfilter: nf_tables: Fix for duplicate device in netdev hooks
====================

Link: https://patch.msgid.link/20260310132050.630-1-fw@strlen.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-11 19:12:59 -07:00
Jakub Kicinski
14ad51036c Merge branch '100GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue
Tony Nguyen says:

====================
Intel Wired LAN Driver Updates 2026-03-10 (ice, iavf, i40e, e1000e, e1000)

Nikolay Aleksandrov changes return code of RDMA related ice devlink get
parameters when irdma is not enabled to -EOPNOTSUPP as current return
of -ENODEV causes issues with devlink output.

Petr Oros resolves a couple of issues in iavf; freeing PTP resources
before reset and disable. Fixing contention issues with the netdev lock
between reset and some ethtool operations.

Alok Tiwari corrects an incorrect comparison of cloud filter values and
adjust some passed arguments to sizeof() for consistency on i40e.

Matt Vollrath removes an incorrect decrement for DMA error on e1000 and
e1000e drivers.

* '100GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue:
  e1000/e1000e: Fix leak in DMA error cleanup
  i40e: fix src IP mask checks and memcpy argument names in cloud filter
  iavf: fix incorrect reset handling in callbacks
  iavf: fix PTP use-after-free during reset
  drivers: net: ice: fix devlink parameters get without irdma
====================

Link: https://patch.msgid.link/20260310205654.4109072-1-anthony.l.nguyen@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-11 19:08:16 -07:00
Jakub Kicinski
e62a22f434 Merge branch 'neighbour-fix-update-of-proxy-neighbour'
Sabrina Dubroca says:

====================
neighbour: fix update of proxy neighbour

While re-reading some "old" patches I ran into a small change of
behavior in commit dc2a27e524 ("neighbour: Update pneigh_entry in
pneigh_create().").

The old behavior was not consistent between ->protocol and ->flags,
and didn't offer a way to clear protocol, so maybe it's better to
change that (7-years-old [1]) behavior. But then we should change
non-proxy neighbours as well to keep neigh/pneigh consistent.

[1] df9b0e30d4 ("neighbor: Add protocol attribute")
====================

Link: https://patch.msgid.link/cover.1772894876.git.sd@queasysnail.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-11 19:04:58 -07:00
Sabrina Dubroca
68e76fc12d selftests: rtnetlink: add neighbour update test
Check that protocol and flags are updated correctly for
neighbour and pneigh entries.

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/d28f72b5b4ff4c9ecbbbde06146a938dcc4c264a.1772894876.git.sd@queasysnail.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-11 19:04:55 -07:00
Sabrina Dubroca
cbada10488 neighbour: restore protocol != 0 check in pneigh update
Prior to commit dc2a27e524 ("neighbour: Update pneigh_entry in
pneigh_create()."), a pneigh's protocol was updated only when the
value of the NDA_PROTOCOL attribute was non-0. While moving the code,
that check was removed. This is a small change of user-visible
behavior, and inconsistent with the (non-proxy) neighbour behavior.

Fixes: dc2a27e524 ("neighbour: Update pneigh_entry in pneigh_create().")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: David Ahern <dsahern@kernel.org>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/38c61de1bb032871a886aff9b9b52fe1cdd4cada.1772894876.git.sd@queasysnail.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-11 19:04:55 -07:00
Marek Behún
e8f0dc024c net: dsa: realtek: Fix LED group port bit for non-zero LED group
The rtl8366rb_led_group_port_mask() function always returns LED port
bit in LED group 0; the switch statement returns the same thing in all
non-default cases.

This means that the driver does not currently support configuring LEDs
in non-zero LED groups.

Fix this.

Fixes: 32d6170054 ("net: dsa: realtek: add LED drivers for rtl8366rb")
Signed-off-by: Marek Behún <kabel@kernel.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://patch.msgid.link/20260311111237.29002-1-kabel@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-11 19:03:21 -07:00
Mehul Rao
6c5a9baa15 tipc: fix divide-by-zero in tipc_sk_filter_connect()
A user can set conn_timeout to any value via
setsockopt(TIPC_CONN_TIMEOUT), including values less than 4.  When a
SYN is rejected with TIPC_ERR_OVERLOAD and the retry path in
tipc_sk_filter_connect() executes:

    delay %= (tsk->conn_timeout / 4);

If conn_timeout is in the range [0, 3], the integer division yields 0,
and the modulo operation triggers a divide-by-zero exception, causing a
kernel oops/panic.

Fix this by clamping conn_timeout to a minimum of 4 at the point of use
in tipc_sk_filter_connect().

Oops: divide error: 0000 [#1] SMP KASAN NOPTI
CPU: 0 UID: 0 PID: 119 Comm: poc-F144 Not tainted 7.0.0-rc2+
RIP: 0010:tipc_sk_filter_rcv (net/tipc/socket.c:2236 net/tipc/socket.c:2362)
Call Trace:
 tipc_sk_backlog_rcv (include/linux/instrumented.h:82 include/linux/atomic/atomic-instrumented.h:32 include/net/sock.h:2357 net/tipc/socket.c:2406)
 __release_sock (include/net/sock.h:1185 net/core/sock.c:3213)
 release_sock (net/core/sock.c:3797)
 tipc_connect (net/tipc/socket.c:2570)
 __sys_connect (include/linux/file.h:62 include/linux/file.h:83 net/socket.c:2098)

Fixes: 6787927475 ("tipc: buffer overflow handling in listener socket")
Cc: stable@vger.kernel.org
Signed-off-by: Mehul Rao <mehulrao@gmail.com>
Reviewed-by: Tung Nguyen <tung.quang.nguyen@est.tech>
Link: https://patch.msgid.link/20260310170730.28841-1-mehulrao@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-11 18:56:28 -07:00
Bastien Curutchet (Schneider Electric)
99c8c16a4a net: dsa: microchip: Fix error path in PTP IRQ setup
If request_threaded_irq() fails during the PTP message IRQ setup, the
newly created IRQ mapping is never disposed. Indeed, the
ksz_ptp_irq_setup()'s error path only frees the mappings that were
successfully set up.

Dispose the newly created mapping if the associated
request_threaded_irq() fails at setup.

Cc: stable@vger.kernel.org
Fixes: d0b8fec8ae ("net: dsa: microchip: Fix symetry in ksz_ptp_msg_irq_{setup/free}()")
Signed-off-by: Bastien Curutchet (Schneider Electric) <bastien.curutchet@bootlin.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Link: https://patch.msgid.link/20260309-ksz-ptp-irq-fix-v1-1-757b3b985955@bootlin.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-11 18:06:22 -07:00
Jakub Kicinski
20c1be4cc8 Merge branch 'net-bpf-nd_tbl-fixes-for-when-ipv6-disable-1'
Ricardo B. Marlière says:

====================
{net,bpf}: nd_tbl fixes for when ipv6.disable=1

Please consider merging these four patches to fix three crashes that were
found after this report:

https://lore.kernel.org/all/CAHXs0ORzd62QOG-Fttqa2Cx_A_VFp=utE2H2VTX5nqfgs7LDxQ@mail.gmail.com

The first patch from Jakub Kicinski is a preparation in order to enable
the use ipv6_mod_enabled() even when CONFIG_IPV6=n.
====================

Link: https://patch.msgid.link/20260307-net-nd_tbl_fixes-v4-0-e2677e85628c@suse.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-11 17:53:41 -07:00
Ricardo B. Marlière
d56b5d1634 bpf: bpf_out_neigh_v6: Fix nd_tbl NULL dereference when IPv6 is disabled
When booting with the 'ipv6.disable=1' parameter, the nd_tbl is never
initialized because inet6_init() exits before ndisc_init() is called which
initializes it. If bpf_redirect_neigh() is called with explicit AF_INET6
nexthop parameters, __bpf_redirect_neigh_v6() can skip the IPv6 FIB lookup
and call bpf_out_neigh_v6() directly. bpf_out_neigh_v6() then calls
ip_neigh_gw6(), which uses ipv6_stub->nd_tbl.

 BUG: kernel NULL pointer dereference, address: 0000000000000248
 Oops: Oops: 0000 [#1] SMP NOPTI
 RIP: 0010:skb_do_redirect+0x44f/0xf40
 Call Trace:
  <TASK>
  ? srso_alias_return_thunk+0x5/0xfbef5
  ? __tcf_classify.constprop.0+0x83/0x160
  ? srso_alias_return_thunk+0x5/0xfbef5
  ? tcf_classify+0x2b/0x50
  ? srso_alias_return_thunk+0x5/0xfbef5
  ? tc_run+0xb8/0x120
  ? srso_alias_return_thunk+0x5/0xfbef5
  __dev_queue_xmit+0x6fa/0x1000
  ? srso_alias_return_thunk+0x5/0xfbef5
  packet_sendmsg+0x10da/0x1700
  ? srso_alias_return_thunk+0x5/0xfbef5
  __sys_sendto+0x1f3/0x220
  __x64_sys_sendto+0x24/0x30
  do_syscall_64+0x101/0xf80
  ? exc_page_fault+0x6e/0x170
  ? srso_alias_return_thunk+0x5/0xfbef5
  entry_SYSCALL_64_after_hwframe+0x77/0x7f
  </TASK>

Fix this by adding an early check in bpf_out_neigh_v6(). If IPv6 is
disabled, drop the packet before neighbor lookup.

Suggested-by: Fernando Fernandez Mancera <fmancera@suse.de>
Fixes: ba452c9e99 ("bpf: Fix bpf_redirect_neigh helper api to support supplying nexthop")
Signed-off-by: Ricardo B. Marlière <rbm@suse.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://patch.msgid.link/20260307-net-nd_tbl_fixes-v4-4-e2677e85628c@suse.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-11 17:53:38 -07:00
Ricardo B. Marlière
dcb4e22314 bpf: bpf_out_neigh_v4: Fix nd_tbl NULL dereference when IPv6 is disabled
When booting with the 'ipv6.disable=1' parameter, the nd_tbl is never
initialized because inet6_init() exits before ndisc_init() is called which
initializes it. If bpf_redirect_neigh() is called from tc with an explicit
nexthop of nh_family == AF_INET6, bpf_out_neigh_v4() takes the AF_INET6
branch and calls ip_neigh_gw6(), which relies on ipv6_stub->nd_tbl.

 BUG: kernel NULL pointer dereference, address: 0000000000000248
 Oops: Oops: 0000 [#1] SMP NOPTI
 RIP: 0010:skb_do_redirect+0xb93/0xf00
 Call Trace:
  <TASK>
  ? srso_alias_return_thunk+0x5/0xfbef5
  ? __tcf_classify.constprop.0+0x83/0x160
  ? srso_alias_return_thunk+0x5/0xfbef5
  ? tcf_classify+0x2b/0x50
  ? srso_alias_return_thunk+0x5/0xfbef5
  ? tc_run+0xb8/0x120
  ? srso_alias_return_thunk+0x5/0xfbef5
  __dev_queue_xmit+0x6fa/0x1000
  ? srso_alias_return_thunk+0x5/0xfbef5
  ? srso_alias_return_thunk+0x5/0xfbef5
  ? alloc_skb_with_frags+0x58/0x200
  packet_sendmsg+0x10da/0x1700
  ? srso_alias_return_thunk+0x5/0xfbef5
  __sys_sendto+0x1f3/0x220
  __x64_sys_sendto+0x24/0x30
  do_syscall_64+0x101/0xf80
  ? exc_page_fault+0x6e/0x170
  ? srso_alias_return_thunk+0x5/0xfbef5
  entry_SYSCALL_64_after_hwframe+0x77/0x7f
  </TASK>

Fix this by adding an early check in the AF_INET6 branch of
bpf_out_neigh_v4(). If IPv6 is disabled, unlock RCU and drop the packet.

Suggested-by: Fernando Fernandez Mancera <fmancera@suse.de>
Fixes: ba452c9e99 ("bpf: Fix bpf_redirect_neigh helper api to support supplying nexthop")
Signed-off-by: Ricardo B. Marlière <rbm@suse.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://patch.msgid.link/20260307-net-nd_tbl_fixes-v4-3-e2677e85628c@suse.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-11 17:53:38 -07:00
Ricardo B. Marlière
30021e969d net: bonding: Fix nd_tbl NULL dereference when IPv6 is disabled
When booting with the 'ipv6.disable=1' parameter, the nd_tbl is never
initialized because inet6_init() exits before ndisc_init() is called
which initializes it. If bonding ARP/NS validation is enabled, an IPv6
NS/NA packet received on a slave can reach bond_validate_na(), which
calls bond_has_this_ip6(). That path calls ipv6_chk_addr() and can
crash in __ipv6_chk_addr_and_flags().

 BUG: kernel NULL pointer dereference, address: 00000000000005d8
 Oops: Oops: 0000 [#1] SMP NOPTI
 RIP: 0010:__ipv6_chk_addr_and_flags+0x69/0x170
 Call Trace:
  <IRQ>
  ipv6_chk_addr+0x1f/0x30
  bond_validate_na+0x12e/0x1d0 [bonding]
  ? __pfx_bond_handle_frame+0x10/0x10 [bonding]
  bond_rcv_validate+0x1a0/0x450 [bonding]
  bond_handle_frame+0x5e/0x290 [bonding]
  ? srso_alias_return_thunk+0x5/0xfbef5
  __netif_receive_skb_core.constprop.0+0x3e8/0xe50
  ? srso_alias_return_thunk+0x5/0xfbef5
  ? update_cfs_rq_load_avg+0x1a/0x240
  ? srso_alias_return_thunk+0x5/0xfbef5
  ? __enqueue_entity+0x5e/0x240
  __netif_receive_skb_one_core+0x39/0xa0
  process_backlog+0x9c/0x150
  __napi_poll+0x30/0x200
  ? srso_alias_return_thunk+0x5/0xfbef5
  net_rx_action+0x338/0x3b0
  handle_softirqs+0xc9/0x2a0
  do_softirq+0x42/0x60
  </IRQ>
  <TASK>
  __local_bh_enable_ip+0x62/0x70
  __dev_queue_xmit+0x2d3/0x1000
  ? srso_alias_return_thunk+0x5/0xfbef5
  ? srso_alias_return_thunk+0x5/0xfbef5
  ? packet_parse_headers+0x10a/0x1a0
  packet_sendmsg+0x10da/0x1700
  ? kick_pool+0x5f/0x140
  ? srso_alias_return_thunk+0x5/0xfbef5
  ? __queue_work+0x12d/0x4f0
  __sys_sendto+0x1f3/0x220
  __x64_sys_sendto+0x24/0x30
  do_syscall_64+0x101/0xf80
  ? exc_page_fault+0x6e/0x170
  ? srso_alias_return_thunk+0x5/0xfbef5
  entry_SYSCALL_64_after_hwframe+0x77/0x7f
  </TASK>

Fix this by checking ipv6_mod_enabled() before dispatching IPv6 packets to
bond_na_rcv(). If IPv6 is disabled, return early from bond_rcv_validate()
and avoid the path to ipv6_chk_addr().

Suggested-by: Fernando Fernandez Mancera <fmancera@suse.de>
Fixes: 4e24be018e ("bonding: add new parameter ns_targets")
Signed-off-by: Ricardo B. Marlière <rbm@suse.com>
Reviewed-by: Hangbin Liu <liuhangbin@gmail.com>
Link: https://patch.msgid.link/20260307-net-nd_tbl_fixes-v4-2-e2677e85628c@suse.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-11 17:53:37 -07:00
Jakub Kicinski
94a4b1f959 ipv6: move the disable_ipv6_mod knob to core code
From: Jakub Kicinski <kuba@kernel.org>

Make sure disable_ipv6_mod itself is not part of the IPv6 module,
in case core code wants to refer to it. We will remove support
for IPv6=m soon, this change helps make fixes we commit before
that less messy.

Link: https://patch.msgid.link/20260307-net-nd_tbl_fixes-v4-1-e2677e85628c@suse.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-11 17:53:37 -07:00
Shyam Prasad N
e3beefd3af cifs: make default value of retrans as zero
When retrans mount option was introduced, the default value was set
as 1. However, in the light of some bugs that this has exposed recently
we should change it to 0 and retain the old behaviour before this option
was introduced.

Cc: <stable@vger.kernel.org>
Reviewed-by: Bharath SM <bharathsm@microsoft.com>
Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-03-11 18:46:42 -05:00
Paulo Alcantara
40e75e42f4 smb: client: fix open handle lookup in cifs_open()
When looking up open handles to be re-used in cifs_open(), calling
cifs_get_{writable,readable}_path() is wrong as it will look up for
the first matching open handle, and if @file->f_flags doesn't match,
it will ignore the remaining open handles in
cifsInodeInfo::openFileList that might potentially match
@file->f_flags.

For writable and readable handles, fix this by calling
__cifs_get_writable_file() and __find_readable_file(), respectively,
with FIND_OPEN_FLAGS set.

With the patch, the following program ends up with two opens instead
of three sent over the wire.

```
  #define _GNU_SOURCE
  #include <unistd.h>
  #include <string.h>
  #include <fcntl.h>

  int main(int argc, char *argv[])
  {
          int fd;

          fd = open("/mnt/1/foo", O_CREAT | O_WRONLY | O_TRUNC, 0664);
          close(fd);
          fd = open("/mnt/1/foo", O_DIRECT | O_WRONLY);
          close(fd);
          fd = open("/mnt/1/foo", O_WRONLY);
          close(fd);
          fd = open("/mnt/1/foo", O_DIRECT | O_WRONLY);
          close(fd);
          return 0;
  }
```

```
$ mount.cifs //srv/share /mnt/1 -o ...
$ gcc test.c && ./a.out
```

Signed-off-by: Paulo Alcantara (Red Hat) <pc@manguebit.org>
Reviewed-by: ChenXiaoSong <chenxiaosong@kylinos.cn>
Cc: David Howells <dhowells@redhat.com>
Cc: linux-cifs@vger.kernel.org
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-03-11 18:46:40 -05:00
Henrique Carvalho
d4c7210d2f smb: client: fix iface port assignment in parse_server_interfaces
parse_server_interfaces() initializes interface socket addresses with
CIFS_PORT. When the mount uses a non-default port this overwrites the
configured destination port.

Later, cifs_chan_update_iface() copies this sockaddr into server->dstaddr,
causing reconnect attempts to use the wrong port after server interface
updates.

Use the existing port from server->dstaddr instead.

Cc: stable@vger.kernel.org
Fixes: fe856be475 ("CIFS: parse and store info on iface queries")
Tested-by: Dr. Thomas Orgis <thomas.orgis@uni-hamburg.de>
Reviewed-by: Enzo Matsumiya <ematsumiya@suse.de>
Signed-off-by: Henrique Carvalho <henrique.carvalho@suse.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-03-11 18:46:28 -05:00
Adrian Ng Ho Yin
f311a05784 i3c: dw-i3c-master: Set SIR_REJECT in DAT on device attach and reattach
The DesignWare I3C master controller ACKs IBIs as soon as a valid
Device Address Table (DAT) entry is present. This can create a race
between device attachment (after DAA) and the point where the client
driver enables IBIs via i3c_device_enable_ibi().

Set DEV_ADDR_TABLE_SIR_REJECT in the DAT entry during
attach_i3c_dev() and reattach_i3c_dev() so that IBIs are rejected
by default. The bit is managed thereafter by the existing
dw_i3c_master_set_sir_enabled() function, which clears it in
enable_ibi() after ENEC is issued, and restores it in disable_ibi()
after DISEC.

Fixes: 1dd728f5d4 ("i3c: master: Add driver for Synopsys DesignWare IP")
Signed-off-by: Adrian Ng Ho Yin <adrianhoyin.ng@altera.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Link: https://patch.msgid.link/53f5b8cbdd8af789ec38b95b02873f32f9182dd6.1770962368.git.adrianhoyin.ng@altera.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
2026-03-11 22:50:29 +01:00
Peter Yin
f26ecaa0f0 i3c: master: dw-i3c: Fix missing of_node for virtual I2C adapter
The DesignWare I3C master driver creates a virtual I2C adapter to
provide backward compatibility with I2C devices. However, the current
implementation does not associate this virtual adapter with any
Device Tree node.

Propagate the of_node from the I3C master platform device to the
virtual I2C adapter's device structure. This ensures that standard
I2C aliases are correctly resolved and bus numbering remains consistent.

Signed-off-by: Peter Yin <peteryin.openbmc@gmail.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Link: https://patch.msgid.link/20260302075645.1492766-1-peteryin.openbmc@gmail.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
2026-03-11 22:48:26 +01:00
Adrian Hunter
9a258d1336 i3c: mipi-i3c-hci: Fallback to software reset when bus disable fails
Disruption of the MIPI I3C HCI controller's internal state can cause
i3c_hci_bus_disable() to fail when attempting to shut down the bus.

In the code paths where bus disable is invoked - bus clean-up and runtime
suspend - the controller does not need to remain operational afterward, so
a full controller reset is a safe recovery mechanism.

Add a fallback to issue a software reset when disabling the bus fails.
This ensures the bus is reliably halted even if the controller's state
machine is stuck or unresponsive.

The fallback is used both during bus clean-up and in the runtime suspend
path.  In the latter case, ensure interrupts are quiesced after reset.

Fixes: 9ad9a52cce ("i3c/master: introduce the mipi-i3c-hci driver")
Cc: stable@vger.kernel.org
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Link: https://patch.msgid.link/20260306072451.11131-15-adrian.hunter@intel.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
2026-03-11 22:10:02 +01:00
Adrian Hunter
c6396b835a i3c: mipi-i3c-hci: Fix handling of shared IRQs during early initialization
Shared interrupts may fire unexpectedly, including during periods when the
controller is not yet fully initialized. Commit b9a15012a1
("i3c: mipi-i3c-hci: Add optional Runtime PM support") addressed this issue
for the runtime-suspended state, but the same problem can also occur before
the bus is enabled for the first time.

Ensure the IRQ handler ignores interrupts until initialization is complete
by making consistent use of the existing irq_inactive flag.  The flag is
now set to false immediately before enabling the bus.

To guarantee correct ordering with respect to the IRQ handler, protect
all transitions of irq_inactive with the same spinlock used inside the
handler.

Fixes: b8460480f6 ("i3c: mipi-i3c-hci: Allow for Multi-Bus Instances")
Cc: stable@vger.kernel.org
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Link: https://patch.msgid.link/20260306072451.11131-14-adrian.hunter@intel.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
2026-03-11 22:10:02 +01:00
Adrian Hunter
e44d271922 i3c: mipi-i3c-hci: Fix race in DMA error handling in interrupt context
The DMA ring halts whenever a transfer encounters an error. The interrupt
handler previously attempted to detect this situation and restart the ring
if a transfer completed at the same time. However, this restart logic runs
entirely in interrupt context and is inherently racy: it interacts with
other paths manipulating the ring state, and fully serializing it within
the interrupt handler is not practical.

Move this error-recovery logic out of the interrupt handler and into the
transfer-processing path (i3c_hci_process_xfer()), where serialization and
state management are already controlled. Introduce a new optional I/O-ops
callback, handle_error(), invoked when a completed transfer reports an
error. For DMA operation, the implementation simply calls the existing
dequeue function, which safely aborts and restarts the ring when needed.

This removes the fragile ring-restart logic from the interrupt handler and
centralizes error handling where proper sequencing can be ensured.

Fixes: ccdb2e0e3b ("i3c: mipi-i3c-hci: Add Intel specific quirk to ring resuming")
Cc: stable@vger.kernel.org
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Link: https://patch.msgid.link/20260306072451.11131-13-adrian.hunter@intel.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
2026-03-11 22:10:02 +01:00
Adrian Hunter
7ac45bc68f i3c: mipi-i3c-hci: Consolidate common xfer processing logic
Several parts of the MIPI I3C HCI driver duplicate the same sequence for
queuing a transfer, waiting for completion, and handling timeouts. This
logic appears in five separate locations and will be affected by an
upcoming fix.

Refactor the repeated code into a new helper, i3c_hci_process_xfer(), and
store the timeout value in the hci_xfer structure so that callers do not
need to pass it as a separate parameter.

Fixes: 9ad9a52cce ("i3c/master: introduce the mipi-i3c-hci driver")
Cc: stable@vger.kernel.org
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Link: https://patch.msgid.link/20260306072451.11131-12-adrian.hunter@intel.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
2026-03-11 22:10:02 +01:00
Adrian Hunter
b6d586431a i3c: mipi-i3c-hci: Restart DMA ring correctly after dequeue abort
The DMA dequeue path attempts to restart the ring after aborting an
in-flight transfer, but the current sequence is incomplete. The controller
must be brought out of the aborted state and the ring control registers
must be programmed in the correct order: first clearing ABORT, then
re-enabling the ring and asserting RUN_STOP to resume operation.

Add the missing controller resume step and update the ring control writes
so that the ring is restarted using the proper sequence.

Fixes: 9ad9a52cce ("i3c/master: introduce the mipi-i3c-hci driver")
Cc: stable@vger.kernel.org
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Link: https://patch.msgid.link/20260306072451.11131-11-adrian.hunter@intel.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
2026-03-11 22:10:02 +01:00
Adrian Hunter
ec3cfd835f i3c: mipi-i3c-hci: Add missing TID field to no-op command descriptor
The internal control command descriptor used for no-op commands includes a
Transaction ID (TID) field, but the no-op command constructed in
hci_dma_dequeue_xfer() omitted it.  As a result, the hardware receives a
no-op descriptor without the expected TID.

This bug has gone unnoticed because the TID is currently not validated in
the no-op completion path, but the descriptor format requires it to be
present.

Add the missing TID field when generating a no-op descriptor so that its
layout matches the defined command structure.

Fixes: 9ad9a52cce ("i3c/master: introduce the mipi-i3c-hci driver")
Cc: stable@vger.kernel.org
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Link: https://patch.msgid.link/20260306072451.11131-10-adrian.hunter@intel.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
2026-03-11 22:10:02 +01:00
Adrian Hunter
b795e68bf3 i3c: mipi-i3c-hci: Correct RING_CTRL_ABORT handling in DMA dequeue
The logic used to abort the DMA ring contains several flaws:

 1. The driver unconditionally issues a ring abort even when the ring has
    already stopped.
 2. The completion used to wait for abort completion is never
    re-initialized, resulting in incorrect wait behavior.
 3. The abort sequence unintentionally clears RING_CTRL_ENABLE, which
    resets hardware ring pointers and disrupts the controller state.
 4. If the ring is already stopped, the abort operation should be
    considered successful without attempting further action.

Fix the abort handling by checking whether the ring is running before
issuing an abort, re-initializing the completion when needed, ensuring that
RING_CTRL_ENABLE remains asserted during abort, and treating an already
stopped ring as a successful condition.

Fixes: 9ad9a52cce ("i3c/master: introduce the mipi-i3c-hci driver")
Cc: stable@vger.kernel.org
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Link: https://patch.msgid.link/20260306072451.11131-9-adrian.hunter@intel.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
2026-03-11 22:10:02 +01:00
Adrian Hunter
f0b5159637 i3c: mipi-i3c-hci: Fix race between DMA ring dequeue and interrupt handler
The DMA ring bookkeeping in the MIPI I3C HCI driver is updated from two
contexts: the DMA ring dequeue path (hci_dma_dequeue_xfer()) and the
interrupt handler (hci_dma_xfer_done()).  Both modify the ring's
in-flight transfer state - specifically rh->src_xfers[] and
xfer->ring_entry - but without any serialization.  This allows the two
paths to race, potentially leading to inconsistent ring state.

Serialize access to the shared ring state by extending the existing
spinlock to cover the DMA dequeue path and the entire interrupt handler.
Since the core IRQ handler now holds this lock, remove the per-function
locking from the PIO and DMA sub-handlers.

Additionally, clear the completed entry in rh->src_xfers[] in
hci_dma_xfer_done() so it cannot be matched or completed again.

Finally, place the ring restart sequence under the same lock in
hci_dma_dequeue_xfer() to avoid concurrent enqueue or completion
operations while the ring state is being modified.

Fixes: 9ad9a52cce ("i3c/master: introduce the mipi-i3c-hci driver")
Cc: stable@vger.kernel.org
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Link: https://patch.msgid.link/20260306072451.11131-8-adrian.hunter@intel.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
2026-03-11 22:10:01 +01:00
Adrian Hunter
1dca8aee80 i3c: mipi-i3c-hci: Fix race in DMA ring dequeue
The HCI DMA dequeue path (hci_dma_dequeue_xfer()) may be invoked for
multiple transfers that timeout around the same time.  However, the
function is not serialized and can race with itself.

When a timeout occurs, hci_dma_dequeue_xfer() stops the ring, processes
incomplete transfers, and then restarts the ring.  If another timeout
triggers a parallel call into the same function, the two instances may
interfere with each other - stopping or restarting the ring at unexpected
times.

Add a mutex so that hci_dma_dequeue_xfer() is serialized with respect to
itself.

Fixes: 9ad9a52cce ("i3c/master: introduce the mipi-i3c-hci driver")
Cc: stable@vger.kernel.org
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Link: https://patch.msgid.link/20260306072451.11131-7-adrian.hunter@intel.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
2026-03-11 22:10:01 +01:00
Adrian Hunter
4decbbc8a8 i3c: mipi-i3c-hci: Fix race in DMA ring enqueue for parallel xfers
The I3C subsystem allows multiple transfers to be queued concurrently.
However, the MIPI I3C HCI driver's DMA enqueue path, hci_dma_queue_xfer(),
lacks sufficient serialization.

In particular, the allocation of the enqueue_ptr and its subsequent update
in the RING_OPERATION1 register, must be done atomically.  Otherwise, for
example, it would be possible for 2 transfers to be allocated the same
enqueue_ptr.

Extend the use of the existing spinlock for that purpose.  Keep a count of
the number of xfers enqueued so that it is easy to determine if the ring
has enough space.

Fixes: 9ad9a52cce ("i3c/master: introduce the mipi-i3c-hci driver")
Cc: stable@vger.kernel.org
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Link: https://patch.msgid.link/20260306072451.11131-6-adrian.hunter@intel.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
2026-03-11 22:10:01 +01:00
Adrian Hunter
fa12bb903b i3c: mipi-i3c-hci: Consolidate spinlocks
The MIPI I3C HCI driver currently uses separate spinlocks for different
contexts (PIO vs. DMA rings).  This split is unnecessary and complicates
upcoming fixes.  The driver does not support concurrent PIO and DMA
operation, and it only supports a single DMA ring, so a single lock is
sufficient for all paths.

Introduce a unified spinlock in struct i3c_hci, switch both PIO and DMA
code to use it, and remove the per-context locks.

No functional change is intended in this patch.

Fixes: 9ad9a52cce ("i3c/master: introduce the mipi-i3c-hci driver")
Cc: stable@vger.kernel.org
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Link: https://patch.msgid.link/20260306072451.11131-5-adrian.hunter@intel.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
2026-03-11 22:10:01 +01:00
Adrian Hunter
f3bcbfe1b8 i3c: mipi-i3c-hci: Factor out DMA mapping from queuing path
Prepare for fixing a race in the DMA ring enqueue path when handling
parallel transfers.  Move all DMA mapping out of hci_dma_queue_xfer()
and into a new helper that performs the mapping up front.

This refactoring allows the upcoming fix to extend the spinlock coverage
around the enqueue operation without performing DMA mapping under the
spinlock.

No functional change is intended in this patch.

Fixes: 9ad9a52cce ("i3c/master: introduce the mipi-i3c-hci driver")
Cc: stable@vger.kernel.org
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Link: https://patch.msgid.link/20260306072451.11131-4-adrian.hunter@intel.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
2026-03-11 22:10:01 +01:00
Adrian Hunter
fa9586bd77 i3c: mipi-i3c-hci: Fix Hot-Join NACK
The MIPI I3C HCI host controller driver does not implement Hot-Join
handling, yet Hot-Join response control defaults to allowing devices to
Hot-Join the bus.  Configure HC_CONTROL_HOT_JOIN_CTRL to NACK all Hot-Join
attempts.

Fixes: 9ad9a52cce ("i3c/master: introduce the mipi-i3c-hci driver")
Cc: stable@vger.kernel.org
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Link: https://patch.msgid.link/20260306072451.11131-3-adrian.hunter@intel.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
2026-03-11 22:10:01 +01:00
Adrian Hunter
4167b89144 i3c: mipi-i3c-hci: Use ETIMEDOUT instead of ETIME for timeout errors
The MIPI I3C HCI driver currently returns -ETIME for various timeout
conditions, while other I3C master drivers consistently use -ETIMEDOUT
for the same class of errors.  Align the HCI driver with the rest of the
subsystem by replacing all uses of -ETIME with -ETIMEDOUT.

Fixes: 9ad9a52cce ("i3c/master: introduce the mipi-i3c-hci driver")
Cc: stable@vger.kernel.org
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Link: https://patch.msgid.link/20260306072451.11131-2-adrian.hunter@intel.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
2026-03-11 22:10:01 +01:00
Chuck Lever
35b16a7a2c perf synthetic-events: Fix stale build ID in module MMAP2 records
perf_event__synthesize_modules() allocates a single union perf_event and
reuses it across every kernel module callback.

After the first module is processed, perf_record_mmap2__read_build_id()
sets PERF_RECORD_MISC_MMAP_BUILD_ID in header.misc and writes that
module's build ID into the event.

On subsequent iterations the callback overwrites start, len, pid, and
filename for the next module but never clears the stale build ID fields
or the MMAP_BUILD_ID flag.

When perf_record_mmap2__read_build_id() runs for the second module it
sees the flag, reads the stale build ID into a dso_id, and
__dso__improve_id() permanently poisons the DSO with the wrong build ID.

Every module after the first therefore receives the first module's build
ID in its MMAP2 record.

On a system with the sunrpc and nfsd modules loaded, this causes perf
script and perf report to show [unknown] for all module symbols.

The latent bug has existed since commit d9f2ecbc5e ("perf dso:
Move build_id to dso_id") introduced the PERF_RECORD_MISC_MMAP_BUILD_ID
check in perf_record_mmap2__read_build_id().

Commit 53b00ff358 ("perf record: Make --buildid-mmap the default")
then exposed it to all users by making the MMAP2-with-build-ID path the
default.  Both commits were merged in the same series.

Clear the MMAP_BUILD_ID flag and zero the build_id union before each
call to perf_record_mmap2__read_build_id() so that every module starts
with a clean slate.

Fixes: d9f2ecbc5e ("perf dso: Move build_id to dso_id")
Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@linaro.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2026-03-11 17:47:42 -03:00
Tom Ryan
6f02c6b196 io_uring: fix physical SQE bounds check for SQE_MIXED 128-byte ops
When IORING_SETUP_SQE_MIXED is used without IORING_SETUP_NO_SQARRAY,
the boundary check for 128-byte SQE operations in io_init_req()
validated the logical SQ head position rather than the physical SQE
index.

The existing check:

  !(ctx->cached_sq_head & (ctx->sq_entries - 1))

ensures the logical position isn't at the end of the ring, which is
correct for NO_SQARRAY rings where physical == logical. However, when
sq_array is present, an unprivileged user can remap any logical
position to an arbitrary physical index via sq_array. Setting
sq_array[N] = sq_entries - 1 places a 128-byte operation at the last
physical SQE slot, causing the 128-byte memcpy in
io_uring_cmd_sqe_copy() to read 64 bytes past the end of the SQE
array.

Replace the cached_sq_head alignment check with a direct validation
of the physical SQE index, which correctly handles both sq_array and
NO_SQARRAY cases.

Fixes: 1cba30bf9f ("io_uring: add support for IORING_SETUP_SQE_MIXED")
Signed-off-by: Tom Ryan <ryan36005@gmail.com>
Link: https://patch.msgid.link/20260310052003.72871-1-ryan36005@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2026-03-11 14:35:19 -06:00
Jens Axboe
177c694321 io_uring/eventfd: use ctx->rings_rcu for flags checking
Similarly to what commit e78f7b70e837 did for local task work additions,
use ->rings_rcu under RCU rather than dereference ->rings directly. See
that commit for more details.

Cc: stable@vger.kernel.org
Fixes: 79cfe9e59c ("io_uring/register: add IORING_REGISTER_RESIZE_RINGS")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2026-03-11 14:35:19 -06:00
Jens Axboe
9618908026 io_uring: ensure ctx->rings is stable for task work flags manipulation
If DEFER_TASKRUN | SETUP_TASKRUN is used and task work is added while
the ring is being resized, it's possible for the OR'ing of
IORING_SQ_TASKRUN to happen in the small window of swapping into the
new rings and the old rings being freed.

Prevent this by adding a 2nd ->rings pointer, ->rings_rcu, which is
protected by RCU. The task work flags manipulation is inside RCU
already, and if the resize ring freeing is done post an RCU synchronize,
then there's no need to add locking to the fast path of task work
additions.

Note: this is only done for DEFER_TASKRUN, as that's the only setup mode
that supports ring resizing. If this ever changes, then they too need to
use the io_ctx_mark_taskrun() helper.

Link: https://lore.kernel.org/io-uring/20260309062759.482210-1-naup96721@gmail.com/
Cc: stable@vger.kernel.org
Fixes: 79cfe9e59c ("io_uring/register: add IORING_REGISTER_RESIZE_RINGS")
Reported-by: Hao-Yu Yang <naup96721@gmail.com>
Suggested-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2026-03-11 14:35:16 -06:00
Chen Ni
30c64fb983 ASoC: amd: acp-mach-common: Add missing error check for clock acquisition
The acp_card_rt5682_init() and acp_card_rt5682s_init() functions did not
check the return values of clk_get(). This could lead to a kernel crash
when the invalid pointers are later dereferenced by clock core
functions.

Fix this by:
1. Changing clk_get() to the device-managed devm_clk_get().
2. Adding IS_ERR() checks immediately after each clock acquisition.

Fixes: 8b72562668 ("ASoC: amd: acp: Add support for RT5682-VS codec")
Fixes: d4c750f2c7 ("ASoC: amd: acp: Add generic machine driver support for ACP cards")
Signed-off-by: Chen Ni <nichen@iscas.ac.cn>
Link: https://patch.msgid.link/20260310044327.2582018-1-nichen@iscas.ac.cn
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-11 19:18:17 +00:00
CL Wang
f879365c5b spi: atcspi200: Handle invalid buswidth and fix compiler warning
The kernel test robot reported a compile-time error regarding the
FIELD_PREP() value being too large for the TRANS_DUAL_QUAD field:

  error: FIELD_PREP: value too large for the field
  note: in expansion of macro 'TRANS_DUAL_QUAD'
  tc |= TRANS_DUAL_QUAD(ffs(op->data.buswidth) - 1);

This occurs because TRANS_DUAL_QUAD is defined as a 2-bit field, and
GCC's static analysis cannot deduce that `ffs(op->data.buswidth) - 1`
will strictly fall within the 0~3 range. Although the SPI framework
guarantees that `op->data.buswidth` is valid at runtime (e.g., 1, 2,
4, 8), an explicit bounds check is necessary to satisfy the compiler.

To resolve the build warning, introduce a safe fallback mechanism.
If an unexpected buswidth is encountered, the driver will trigger
a WARN_ON_ONCE to leave a trace and fall back to width_code = 0
(standard 1-bit SPI mode). This approach guarantees predictable
hardware behavior.

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202602140738.P7ZozxzI-lkp@intel.com/
Suggested-by: Pei Xiao <xiaopei01@kylinos.cn>
Signed-off-by: CL Wang <cl634@andestech.com>
Link: https://patch.msgid.link/20260303024737.1791196-1-cl634@andestech.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-11 19:08:43 +00:00
Paolo Bonzini
dca01b0a26 Documentation: kvm: fix formatting of the quirks table
A recently added quirk does not fit in the left column of the table,
so it all has to be reformatted and realigned.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2026-03-11 19:16:52 +01:00
Mario Limonciello
3646ff2878 drm/amd: Set num IP blocks to 0 if discovery fails
If discovery has failed for any reason (such as no support for a block)
then there is no need to unwind all the IP blocks in fini. In this
condition there can actually be failures during the unwind too.

Reset num_ip_blocks to zero during failure path and skip the unnecessary
cleanup path.

Suggested-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit fae5984296b981c8cc3acca35b701c1f332a6cd8)
Cc: stable@vger.kernel.org
2026-03-11 14:04:08 -04:00
Philip Yang
2ce75a0b7e drm/amdkfd: Unreserve bo if queue update failed
Error handling path should unreserve bo then return failed.

Fixes: 305cd109b7 ("drm/amdkfd: Validate user queue update")
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Alex Sierra <alex.sierra@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit c24afed7de9ecce341825d8ab55a43a254348b33)
2026-03-11 14:02:45 -04:00
Ivan Lipski
becbab4a5a drm/amd/display: Check for S0i3 to be done before DCCG init on DCN21
[WHY]
On DCN21, dccg2_init() is called in dcn10_init_hw() before
bios_golden_init(). During S0i3 resume, BIOS sets MICROSECOND_TIME_BASE_DIV
to 0x00120464 as a marker. dccg2_init() overwrites this to 0x00120264,
causing dcn21_s0i3_golden_init_wa() to misdetect the state and skip golden
init.
Eventually during the resume sequence, a flip timeout occurs.

[HOW]
Skip DCCG on dccg2_is_s0i3_golden_init_wa_done() on DCN21.

Fixes: 4c595e7511 ("drm/amd/display: Migrate DCCG registers access from hwseq to dccg component.")
Reviewed-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
Signed-off-by: Ivan Lipski <ivan.lipski@amd.com>
Signed-off-by: Alex Hung <alex.hung@amd.com>
Tested-by: Dan Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit c61eda434336cf2c033aa35efdc9a08b31d2fdfa)
2026-03-11 14:01:39 -04:00
Ivan Lipski
33efc6346e drm/amd/display: Add missing DCCG register entries for DCN20-DCN316
Commit 4c595e7511 ("drm/amd/display: Migrate DCCG registers access
from hwseq to dccg component.") moved register writes from hwseq to
dccg2_*() functions but did not add the registers to the DCCG register
list macros. The struct fields default to 0, so REG_WRITE() targets
MMIO offset 0, causing a GPU hang on resume (seen on DCN21/DCN30
during IGT kms_cursor_crc@cursor-suspend).

Add
- MICROSECOND_TIME_BASE_DIV
- MILLISECOND_TIME_BASE_DIV
- DCCG_GATE_DISABLE_CNTL
- DCCG_GATE_DISABLE_CNTL2
- DC_MEM_GLOBAL_PWR_REQ_CNTL
to macros in  dcn20_dccg.h, dcn301_dccg.h, dcn31_dccg.h, and dcn314_dccg.h.

Fixes: 4c595e7511 ("drm/amd/display: Migrate DCCG registers access from hwseq to dccg component.")
Reported-by: Rafael Passos <rafael@rcpassos.me>
Reviewed-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
Signed-off-by: Ivan Lipski <ivan.lipski@amd.com>
Signed-off-by: Alex Hung <alex.hung@amd.com>
Tested-by: Dan Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit e6e2b956fc814de766d3480be7018297c41d3ce0)
2026-03-11 14:01:16 -04:00
Paolo Bonzini
6b1ca262a9 KVM: x86: clarify leave_smm() return value
The return value of vmx_leave_smm() is unrelated from that of
nested_vmx_enter_non_root_mode().  Check explicitly for success
(which happens to be 0) and return 1 just like everywhere
else in vmx_leave_smm().

Likewise, in svm_leave_smm() return 0/1 instead of the 0/1/-errno
returned by tenter_svm_guest_mode().

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2026-03-11 18:41:12 +01:00
Paolo Bonzini
3e745694b0 selftests: kvm: add a test that VMX validates controls on RSM
Add a test checking that invalid eVMCS contents are validated after an
RSM instruction is emulated.

The failure mode is simply that the RSM succeeds, because KVM virtualizes
NMIs anyway while running L2; the two pin-based execution controls used
by the test are entirely handled by KVM and not by the processor.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2026-03-11 18:41:12 +01:00
Paolo Bonzini
c52b534f26 selftests: kvm: extract common functionality out of smm_test.c
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2026-03-11 18:41:12 +01:00
Paolo Bonzini
be5fa8737d KVM: SVM: check validity of VMCB controls when returning from SMM
The VMCB12 is stored in guest memory and can be mangled while in SMM; it
is then reloaded by svm_leave_smm(), but it is not checked again for
validity.

Move the cached vmcb12 control and save consistency checks out of
svm_set_nested_state() and into a helper, and reuse it in
svm_leave_smm().

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2026-03-11 18:41:11 +01:00
Paolo Bonzini
5a30e8aea0 KVM: VMX: check validity of VMCS controls when returning from SMM
The VMCS12 is not available while in SMM.  However, it can be overwritten
if userspace manages to trigger copy_enlightened_to_vmcs12() - for example
via KVM_GET_NESTED_STATE.

Because of this, the VMCS12 has to be checked for validity before it is
used to generate the VMCS02.  Move the check code out of vmx_set_nested_state()
(the other "not a VMLAUNCH/VMRESUME" path that emulates a nested vmentry)
and reuse it in vmx_leave_smm().

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2026-03-11 18:41:11 +01:00
Sean Christopherson
87d0f901a9 KVM: SVM: Set/clear CR8 write interception when AVIC is (de)activated
Explicitly set/clear CR8 write interception when AVIC is (de)activated to
fix a bug where KVM leaves the interception enabled after AVIC is
activated.  E.g. if KVM emulates INIT=>WFS while AVIC is deactivated, CR8
will remain intercepted in perpetuity.

On its own, the dangling CR8 intercept is "just" a performance issue, but
combined with the TPR sync bug fixed by commit d02e48830e ("KVM: SVM:
Sync TPR from LAPIC into VMCB::V_TPR even if AVIC is active"), the danging
intercept is fatal to Windows guests as the TPR seen by hardware gets
wildly out of sync with reality.

Note, VMX isn't affected by the bug as TPR_THRESHOLD is explicitly ignored
when Virtual Interrupt Delivery is enabled, i.e. when APICv is active in
KVM's world.  I.e. there's no need to trigger update_cr8_intercept(), this
is firmly an SVM implementation flaw/detail.

WARN if KVM gets a CR8 write #VMEXIT while AVIC is active, as KVM should
never enter the guest with AVIC enabled and CR8 writes intercepted.

Fixes: 3bbf3565f4 ("svm: Do not intercept CR8 when enable AVIC")
Cc: stable@vger.kernel.org
Cc: Jim Mattson <jmattson@google.com>
Cc: Naveen N Rao (AMD) <naveen@kernel.org>
Cc: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Reviewed-by: Naveen N Rao (AMD) <naveen@kernel.org>
Reviewed-by: Jim Mattson <jmattson@google.com>
Link: https://patch.msgid.link/20260203190711.458413-3-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
[Squash fix to avic_deactivate_vmcb. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2026-03-11 18:41:11 +01:00
Sean Christopherson
3989a6d036 KVM: SVM: Initialize AVIC VMCB fields if AVIC is enabled with in-kernel APIC
Initialize all per-vCPU AVIC control fields in the VMCB if AVIC is enabled
in KVM and the VM has an in-kernel local APIC, i.e. if it's _possible_ the
vCPU could activate AVIC at any point in its lifecycle.  Configuring the
VMCB if and only if AVIC is active "works" purely because of optimizations
in kvm_create_lapic() to speculatively set apicv_active if AVIC is enabled
*and* to defer updates until the first KVM_RUN.  In quotes because KVM
likely won't do the right thing if kvm_apicv_activated() is false, i.e. if
a vCPU is created while APICv is inhibited at the VM level for whatever
reason.  E.g. if the inhibit is *removed* before KVM_REQ_APICV_UPDATE is
handled in KVM_RUN, then __kvm_vcpu_update_apicv() will elide calls to
vendor code due to seeing "apicv_active == activate".

Cleaning up the initialization code will also allow fixing a bug where KVM
incorrectly leaves CR8 interception enabled when AVIC is activated without
creating a mess with respect to whether AVIC is activated or not.

Cc: stable@vger.kernel.org
Fixes: 67034bb9dd ("KVM: SVM: Add irqchip_split() checks before enabling AVIC")
Fixes: 6c3e4422dd ("svm: Add support for dynamic APICv")
Reviewed-by: Naveen N Rao (AMD) <naveen@kernel.org>
Reviewed-by: Jim Mattson <jmattson@google.com>
Link: https://patch.msgid.link/20260203190711.458413-2-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2026-03-11 18:41:11 +01:00
Jim Mattson
e2ffe85b6d KVM: x86: Introduce KVM_X86_QUIRK_VMCS12_ALLOW_FREEZE_IN_SMM
Add KVM_X86_QUIRK_VMCS12_ALLOW_FREEZE_IN_SMM to allow L1 to set
FREEZE_IN_SMM in vmcs12's GUEST_IA32_DEBUGCTL field, as permitted
prior to commit 6b1dd26544 ("KVM: VMX: Preserve host's
DEBUGCTLMSR_FREEZE_IN_SMM while running the guest").  Enable the quirk
by default for backwards compatibility (like all quirks); userspace
can disable it via KVM_CAP_DISABLE_QUIRKS2 for consistency with the
constraints on WRMSR(IA32_DEBUGCTL).

Note that the quirk only bypasses the consistency check.  The vmcs02 bit is
still owned by the host, and PMCs are not frozen during virtualized SMM.
In particular, if a host administrator decides that PMCs should not be
frozen during physical SMM, then L1 has no say in the matter.

Fixes: 095686e6fc ("KVM: nVMX: Check vmcs12->guest_ia32_debugctl on nested VM-Enter")
Cc: stable@vger.kernel.org
Signed-off-by: Jim Mattson <jmattson@google.com>
Link: https://patch.msgid.link/20260205231537.1278753-1-jmattson@google.com
[sean: tag for stable@, clean-up and fix goofs in the comment and docs]
Signed-off-by: Sean Christopherson <seanjc@google.com>
[Rename quirk. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2026-03-11 18:41:11 +01:00
Li RongQing
b54e4707a6 KVM: x86: Fix SRCU list traversal in kvm_fire_mask_notifiers()
The mask_notifier_list is protected by kvm->irq_srcu, but the traversal
in kvm_fire_mask_notifiers() incorrectly uses hlist_for_each_entry_rcu().
This leads to lockdep warnings because the standard RCU iterator expects
to be under rcu_read_lock(), not SRCU.

Replace the RCU variant with hlist_for_each_entry_srcu() and provide
the proper srcu_read_lock_held() annotation to ensure correct
synchronization and silence lockdep.

Signed-off-by: Li RongQing <lirongqing@baidu.com>
Link: https://patch.msgid.link/20260204091206.2617-1-lirongqing@baidu.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2026-03-11 18:41:11 +01:00
Namhyung Kim
f78e627a01 KVM: VMX: Fix a wrong MSR update in add_atomic_switch_msr()
The previous change had a bug to update a guest MSR with a host value.

Fixes: c3d6a7210a ("KVM: VMX: Dedup code for adding MSR to VMCS's auto list")
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Reviewed-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Link: https://patch.msgid.link/20260220220216.389475-1-namhyung@kernel.org
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2026-03-11 18:41:11 +01:00
Manuel Andreas
a5264387c2 KVM: x86: hyper-v: Validate all GVAs during PV TLB flush
In KVM guests with Hyper-V hypercalls enabled, the hypercalls
HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST and HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX
allow a guest to request invalidation of portions of a virtual TLB.
For this, the hypercall parameter includes a list of GVAs that are supposed
to be invalidated.

Currently, only the base GVA is checked to be canonical. In reality, this
check needs to be performed for the entire range of GVAs, as checking only
the base GVA enables guests running on Intel hardware to trigger a
WARN_ONCE in the host (see Fixes commit below).

Move the check for non-canonical addresses to be performed for every GVA
of the supplied range to avoid the splat, and to be more in line with the
Hyper-V specification, since, although unlikely, a range starting with an
invalid GVA may still contain GVAs that are valid.

Fixes: fa787ac07b ("KVM: x86/hyper-v: Skip non-canonical addresses during PV TLB flush")
Signed-off-by: Manuel Andreas <manuel.andreas@tum.de>
Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Link: https://patch.msgid.link/00a7a31b-573b-4d92-91f8-7d7e2f88ea48@tum.de
[sean: massage changelog]
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2026-03-11 18:41:11 +01:00
Carlos López
4b3b8a8b0d KVM: x86: synthesize CPUID bits only if CPU capability is set
KVM incorrectly synthesizes CPUID bits for KVM-only leaves, as the
following branch in kvm_cpu_cap_init() is never taken:

    if (leaf < NCAPINTS)
        kvm_cpu_caps[leaf] &= kernel_cpu_caps[leaf];

This means that bits set via SYNTHESIZED_F() for KVM-only leaves are
unconditionally set. This for example can cause issues for SEV-SNP
guests running on Family 19h CPUs, as TSA_SQ_NO and TSA_L1_NO are
always enabled by KVM in 80000021[ECX]. When userspace issues a
SNP_LAUNCH_UPDATE command to update the CPUID page for the guest, SNP
firmware will explicitly reject the command if the page sets sets these
bits on vulnerable CPUs.

To fix this, check in SYNTHESIZED_F() that the corresponding X86
capability is set before adding it to to kvm_cpu_cap_features.

Fixes: 31272abd59 ("KVM: SVM: Advertise TSA CPUID bits to guests")
Link: https://lore.kernel.org/all/20260208164233.30405-1-clopez@suse.de/
Signed-off-by: Carlos López <clopez@suse.de>
Reviewed-by: Nikolay Borisov <nik.borisov@suse.com>
Link: https://patch.msgid.link/20260209153108.70667-2-clopez@suse.de
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2026-03-11 18:41:11 +01:00
Sean Christopherson
fe2a08eca5 KVM: PPC: e500: Rip out "struct tlbe_ref"
Complete the ~13 year journey started by commit 47bf379742
("kvm/ppc/e500: eliminate tlb_refs"), and actually remove "struct
tlbe_ref".

No functional change intended (verified disassembly of e500_mmu.o and
e500_mmu_host.o is identical before and after).

Link: https://patch.msgid.link/20260303190339.974325-3-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2026-03-11 18:41:10 +01:00
Sean Christopherson
4c01346396 KVM: PPC: e500: Fix build error due to using kmalloc_obj() with wrong type
Fix a build error in kvmppc_e500_tlb_init() that was introduced by the
conversion to use kzalloc_objs(), as KVM confusingly uses the size of the
structure that is one and only field in tlbe_priv:

  arch/powerpc/kvm/e500_mmu.c:923:33: error: assignment to 'struct tlbe_priv *'
    from incompatible pointer type 'struct tlbe_ref *' [-Wincompatible-pointer-types]
  923 |         vcpu_e500->gtlb_priv[0] = kzalloc_objs(struct tlbe_ref,
      |                                 ^

KVM has been flawed since commit 0164c0f0c4 ("KVM: PPC: e500: clear up
confusion between host and guest entries"), but the issue went unnoticed
until kmalloc_obj() came along and enforced types, as "struct tlbe_priv"
was just a wrapper of "struct tlbe_ref" (why on earth the two ever existed
separately...).

Fixes: 69050f8d6d ("treewide: Replace kmalloc with kmalloc_obj for non-scalar types")
Cc: Kees Cook <kees@kernel.org>
Reviewed-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
Link: https://patch.msgid.link/20260303190339.974325-2-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2026-03-11 18:41:10 +01:00
Kai Huang
cf534a09fb KVM: selftests: Increase 'maxnode' for guest_memfd tests
Increase 'maxnode' when using 'get_mempolicy' syscall in guest_memfd
mmap and NUMA policy tests to fix a failure on one Intel GNR platform.

On a CXL-capable platform, the memory affinity of CXL memory regions may
not be covered by the SRAT.  Since each CXL memory region is enumerated
via a CFMWS table, at early boot the kernel parses all CFMWS tables to
detect all CXL memory regions and assigns a 'faked' NUMA node for each
of them, starting from the highest NUMA node ID enumerated via the SRAT.

This increases the 'nr_node_ids'.  E.g., on the aforementioned Intel GNR
platform which has 4 NUMA nodes and 18 CFMWS tables, it increases to 22.

This results in the 'get_mempolicy' syscall failure on that platform,
because currently 'maxnode' is hard-coded to 8 but the 'get_mempolicy'
syscall requires the 'maxnode' to be not smaller than the 'nr_node_ids'.

Increase the 'maxnode' to the number of bits of 'nodemask', which is
'unsigned long', to fix this.

This may not cover all systems.  Perhaps a better way is to always set
the 'nodemask' and 'maxnode' based on the actual maximum NUMA node ID on
the system, but for now just do the simple way.

Reported-by: Yi Lai <yi1.lai@intel.com>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=221014
Closes: https://lore.kernel.org/all/bug-221014-28872@https.bugzilla.kernel.org%2F
Signed-off-by: Kai Huang <kai.huang@intel.com>
Reviewed-by: Yuan Yao <yaoyuan@linux.alibaba.com>
Link: https://patch.msgid.link/20260302205158.178058-1-kai.huang@intel.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2026-03-11 18:41:10 +01:00
Paolo Bonzini
6e827fa2c5 Merge tag 'kvmarm-fixes-7.0-3' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/arm64 fixes for 7.0, take #3

- Correctly handle deeactivation of out-of-LRs interrupts by
  starting the EOIcount deactivation walk *after* the last irq
  that made it into an LR. This avoids deactivating irqs that
  are in the LRs and that the vcpu hasn't deactivated yet.

- Avoid calling into the stubs to probe for ICH_VTR_EL2.TDS when
  pKVM is already enabled -- not only thhis isn't possible (pKVM
  will reject the call), but it is also useless: this can only
  happen for a CPU that has already booted once, and the capability
  will not change.
2026-03-11 18:41:09 +01:00
Paolo Bonzini
94fe3e6515 Merge tag 'kvm-x86-generic-7.0-rc3' of https://github.com/kvm-x86/linux into HEAD
KVM generic changes for 7.0

 - Remove a subtle pseudo-overlay of kvm_stats_desc, which, aside from being
   unnecessary and confusing, triggered compiler warnings due to
   -Wflex-array-member-not-at-end.

 - Document that vcpu->mutex is take outside of kvm->slots_lock and
   kvm->slots_arch_lock, which is intentional and desirable despite being
   rather unintuitive.
2026-03-11 18:01:55 +01:00
Paolo Bonzini
40c2ffcac0 Merge tag 'kvm-riscv-fixes-7.0-1' of https://github.com/kvm-riscv/linux into HEAD
KVM/riscv fixes for 7.0, take #1

- Prevent speculative out-of-bounds access using array_index_nospec()
  in APLIC interrupt handling, ONE_REG regiser access, AIA CSR access,
  float register access, and PMU counter access
- Fix potential use-after-free issues in kvm_riscv_gstage_get_leaf(),
  kvm_riscv_aia_aplic_has_attr(), and kvm_riscv_aia_imsic_has_attr()
- Fix potential null pointer dereference in kvm_riscv_vcpu_aia_rmw_topei()
- Fix off-by-one array access in SBI PMU
- Skip THP support check during dirty logging
- Fix error code returned for Smstateen and Ssaia ONE_REG interface
- Check host Ssaia extension when creating AIA irqchip
2026-03-11 18:01:03 +01:00
Paolo Bonzini
de353e3fcc Merge tag 'kvmarm-fixes-7.0-2' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/arm64 fixes for 7.0, take #2

- Fix a couple of low-severity bugs in our S2 fault handling path,
  affecting the recently introduced LS64 handling and the even more
  esoteric handling of hwpoison in a nested context

- Address yet another syzkaller finding in the vgic initialisation,
  were we would end-up destroying an uninitialised vgic, with nasty
  consequences

- Address an annoying case of pKVM failing to boot when some of the
  memblock regions that the host is faulting in are not page-aligned

- Inject some sanity in the NV stage-2 walker by checking the limits
  against the advertised PA size, and correctly report the resulting
  faults

- Drop an unnecessary ISB when emulating an EL2 S1 address translation
2026-03-11 18:00:54 +01:00
Qing Wang
464b1c1158 slab: fix memory leak when refill_sheaf() fails
When refill_sheaf() partially fills one sheaf (e.g., fills 5 objects
but need to fill 10), it will update sheaf->size and return -ENOMEM.
However, the callers (alloc_full_sheaf() and __pcs_replace_empty_main())
directly call free_empty_sheaf() on failure, which only does kfree(sheaf),
causing the partially allocated objects memory in sheaf->objects[] leaked.

Fix this by calling sheaf_flush_unused() before free_empty_sheaf() to
free objects of sheaf->objects[]. And also add a WARN_ON() in
free_empty_sheaf() to catch any future cases where a non-empty sheaf is
being freed.

Fixes: ed30c4adfc ("slab: add optimized sheaf refill from partial list")
Signed-off-by: Qing Wang <wangqing7171@gmail.com>
Link: https://patch.msgid.link/20260311093617.4155965-1-wangqing7171@gmail.com
Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
Reviewed-by: Hao Li <hao.li@linux.dev>
Signed-off-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
2026-03-11 17:55:26 +01:00
Ard Biesheuvel
3fde5281b8 x86/hyperv: Use __naked attribute to fix stackless C function
hv_crash_c_entry() is a C function that is entered without a stack,
and this is only allowed for functions that have the __naked attribute,
which informs the compiler that it must not emit the usual prologue and
epilogue or emit any other kind of instrumentation that relies on a
stack frame.

So split up the function, and set the __naked attribute on the initial
part that sets up the stack, GDT, IDT and other pieces that are needed
for ordinary C execution. Given that function calls are not permitted
either, use the existing long return coded in an asm() block to call the
second part of the function, which is an ordinary function that is
permitted to call other functions as usual.

Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> # asm parts, not hv parts
Reviewed-by: Mukesh Rathor <mrathor@linux.microsoft.com>
Acked-by: Uros Bizjak <ubizjak@gmail.com>
Cc: Wei Liu <wei.liu@kernel.org>
Cc: linux-hyperv@vger.kernel.org
Fixes: 94212d3461 ("x86/hyperv: Implement hypervisor RAM collection into vmcore")
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Wei Liu <wei.liu@kernel.org>
2026-03-11 16:54:24 +00:00
Wei Liu
edd20cb693 Revert "mshv: expose the scrub partition hypercall"
This reverts commit 36d6cbb621.

Calling this as a passthrough hypercall leaves the VM in an inconsistent
state. Revert before it is released.

Signed-off-by: Wei Liu <wei.liu@kernel.org>
2026-03-11 16:54:24 +00:00
Linus Torvalds
80234b5ab2 Merge tag 'rproc-v7.0-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/remoteproc/linux
Pull remoteproc fixes from Bjorn Andersson:

 - Correct the early return from the i.MX remoteproc prepare
   operation, which prevented the platform-specific prepare
   function from being reached

 - Ensure that the Mediatek SCP clock is released during system
   suspend after the recent refactoring to avoid issues with the
   clock framework's prepare lock.

 - Correct the type of the subsys_name_len field in the sysmon
   event QMI message, as the recent introduction of big endian
   support in the QMI encoder highlighted the type mismatch and
   resulted in a failure to encode the message

 - Roll back the devm_ioremap_resource_wc() to a devm_ioremap_wc()
   in the Qualcomm WCNSS remoteproc driver, after reports that
   requesting this resource fails on some platforms

* tag 'rproc-v7.0-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/remoteproc/linux:
  remoteproc: imx_rproc: Fix unreachable platform prepare_ops
  remoteproc: mediatek: Unprepare SCP clock during system suspend
  remoteproc: sysmon: Correct subsys_name_len type in QMI request
  remoteproc: qcom_wcnss: Fix reserved region mapping failure
2026-03-11 09:30:20 -07:00
Linus Torvalds
2b8e3fac9b Merge tag 'powerpc-7.0-2' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc fixes from Madhavan Srinivasan:
 - Correct MSI allocation tracking
 - Always use 64 bits PTE for powerpc/e500
 - Fix inline assembly for clang build on PPC32
 - Fixes for clang build issues in powerpc64/ftrace
 - Fixes for powerpc64/bpf JIT and tailcall support
 - Cleanup MPC83XX devicetrees
 - Fix keymile vendor prefix
 - Fix to use big-endian types for crash variables

Thanks to Abhishek Dubey, Christophe Leroy (CS GROUP), Hari Bathini,
Heiko Schocher, J. Neuschäfer, Mahesh Salgaonkar, Nam Cao, Nilay Shroff,
Rob Herring (Arm), Saket Kumar Bhaskar, Sourabh Jain, Stan Johnson, and
Venkat Rao Bagalkote.

* tag 'powerpc-7.0-2' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (23 commits)
  powerpc/pseries: Correct MSI allocation tracking
  powerpc: dts: mpc83xx: Add unit addresses to /memory
  powerpc: dts: mpc8315erdb: Add missing #cells properties to SPI bus
  powerpc: dts: mpc8315erdb: Rename LED nodes to comply with schema
  powerpc: dts: mpc8315erdb: Use IRQ_TYPE_* macros
  powerpc: dts: mpc8313erdb: Use IRQ_TYPE_* macros
  powerpc: 83xx: km83xx: Fix keymile vendor prefix
  dt-bindings: powerpc: Add Freescale/NXP MPC83xx SoCs
  powerpc64/bpf: fix kfunc call support
  powerpc64/bpf: fix handling of BPF stack in exception callback
  powerpc64/bpf: remove BPF redzone protection in trampoline stack
  powerpc64/bpf: use consistent tailcall offset in trampoline
  powerpc64/bpf: fix the address returned by bpf_get_func_ip
  powerpc64/bpf: do not increment tailcall count when prog is NULL
  powerpc64/ftrace: workaround clang recording GEP in __patchable_function_entries
  powerpc64/ftrace: fix OOL stub count with clang
  powerpc64: make clang cross-build friendly
  powerpc/crash: adjust the elfcorehdr size
  powerpc/kexec/core: use big-endian types for crash variables
  powerpc/prom_init: Fixup missing #size-cells on PowerMac media-bay nodes
  ...
2026-03-11 08:35:31 -07:00
Kamal Dasu
da9ba4dcc0 mtd: rawnand: brcmnand: skip DMA during panic write
When oops_panic_write is set, the driver disables interrupts and
switches to PIO polling mode but still falls through into the DMA
path. DMA cannot be used reliably in panic context, so make the
DMA path an else branch to ensure only PIO is used during panic
writes.

Fixes: c1ac2dc34b ("mtd: rawnand: brcmnand: When oops in progress use pio and interrupt polling")
Signed-off-by: Kamal Dasu <kamal.dasu@broadcom.com>
Reviewed-by: William Zhang <william.zhang@broadcom.com>
Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
2026-03-11 16:32:40 +01:00
Kamal Dasu
bab2bc6e85 mtd: rawnand: serialize lock/unlock against other NAND operations
nand_lock() and nand_unlock() call into chip->ops.lock_area/unlock_area
without holding the NAND device lock. On controllers that implement
SET_FEATURES via multiple low-level PIO commands, these can race with
concurrent UBI/UBIFS background erase/write operations that hold the
device lock, resulting in cmd_pending conflicts on the NAND controller.

Add nand_get_device()/nand_release_device() around the lock/unlock
operations to serialize them against all other NAND controller access.

Fixes: 92270086b7 ("mtd: rawnand: Add support for manufacturer specific lock/unlock operation")
Signed-off-by: Kamal Dasu <kamal.dasu@broadcom.com>
Reviewed-by: William Zhang <william.zhang@broadcom.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
2026-03-11 16:32:36 +01:00
Casey Connolly
a9683730e8 ASoC: detect empty DMI strings
Some bootloaders like recent versions of U-Boot may install some DMI
properties with empty values rather than not populate them. This manages
to make its way through the validator and cleanup resulting in a rogue
hyphen being appended to the card longname.

Fixes: 4e01e5dbba ("ASoC: improve the DMI long card code in asoc-core")
Signed-off-by: Casey Connolly <casey.connolly@linaro.org>
Link: https://patch.msgid.link/20260306174707.283071-2-casey.connolly@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-11 15:28:35 +00:00
Miquel Raynal
073b2db724 dt-bindings: mtd: st,spear600-smi: Fix example
Example is wrong, the reg property of the flash is always matching the
node name.

Fixes: 68cd8ef484 ("dt-bindings: mtd: st,spear600-smi: convert to DT schema")
Reviewed-by: Rob Herring (Arm) <robh@kernel.org>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
2026-03-11 16:26:44 +01:00
Miquel Raynal
c21cac8cdc dt-bindings: mtd: st,spear600-smi: #address/size-cells is mandatory
These properties must be set because they overwrite the default values,
especially #size-cells which is 0 for most controllers and is 'const: 1'
here.

Fixes: 68cd8ef484 ("dt-bindings: mtd: st,spear600-smi: convert to DT schema")
Reviewed-by: Rob Herring (Arm) <robh@kernel.org>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
2026-03-11 16:26:44 +01:00
Miquel Raynal
db9a267650 dt-bindings: mtd: st,spear600-smi: Fix description
The description mixes two nodes. There is the controller, and there is
the flash. Describe the flash (which itself can be considered an mtd
device, unlike the top level controller), and move the st,smi-fast-mode
property inside, as this property is flash specific and should not live
in the parent controller node.

Fixes: 68cd8ef484 ("dt-bindings: mtd: st,spear600-smi: convert to DT schema")
Reviewed-by: Rob Herring (Arm) <robh@kernel.org>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
2026-03-11 16:26:44 +01:00
Danilo Krummrich
0073a17b46 gpu: nova-core: gsp: fix UB in DmaGspMem pointer accessors
The DmaGspMem pointer accessor methods (gsp_write_ptr, gsp_read_ptr,
cpu_read_ptr, cpu_write_ptr, advance_cpu_read_ptr,
advance_cpu_write_ptr) dereference a raw pointer to DMA memory, creating
an intermediate reference before calling volatile read/write methods.

This is undefined behavior since DMA memory can be concurrently modified
by the device.

Fix this by moving the implementations into a gsp_mem module in fw.rs
that uses the dma_read!() / dma_write!() macros, making the original
methods on DmaGspMem thin forwarding wrappers.

An alternative approach would have been to wrap the shared memory in
Opaque, but that would have required even more unsafe code.

Since the gsp_mem module lives in fw.rs (to access firmware-specific
binding field names), GspMem, Msgq and their relevant fields are
temporarily widened to pub(super). This will be reverted once IoView
projections are available.

Cc: Gary Guo <gary@garyguo.net>
Closes: https://lore.kernel.org/nouveau/DGUT14ILG35P.1UMNRKU93JUM1@kernel.org/
Fixes: 75f6b1de81 ("gpu: nova-core: gsp: Add GSP command queue bindings and handling")
Reviewed-by: Alexandre Courbot <acourbot@nvidia.com>
Link: https://patch.msgid.link/20260309225408.27714-1-dakr@kernel.org
[ Use pub(super) where possible; replace bitwise-and with modulo
  operator analogous to [1]. - Danilo ]
Link: https://lore.kernel.org/all/20260129-nova-core-cmdq1-v3-1-2ede85493a27@nvidia.com/ [1]
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
2026-03-11 16:24:15 +01:00
Johan Hovold
163cc462de gpib: lpvo_usb: fix unintended binding of FTDI 8U232AM devices
The LPVO USB GPIB adapter apparently uses an FTDI 8U232AM with the
default PID, but this device id is already handled by the ftdi_sio
serial driver.

Stop binding to the default PID to avoid breaking existing setups with
FTDI 8U232AM.

Anyone using this driver should blacklist the ftdi_sio driver and add
the device id manually through sysfs (e.g. using udev rules).

Fixes: fce79512a9 ("staging: gpib: Add LPVO DIY USB GPIB driver")
Fixes: e6ab504633 ("staging: gpib: Destage gpib")
Cc: Dave Penkler <dpenkler@gmail.com>
Cc: stable <stable@kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
Link: https://patch.msgid.link/20260305151729.10501-2-johan@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-11 16:23:32 +01:00
Xu Yang
8345b1539f usb: roles: get usb role switch from parent only for usb-b-connector
usb_role_switch_is_parent() was walking up to the parent node and checking
for the "usb-role-switch" property regardless of the type of the passed
fwnode. This could cause unrelated device nodes to be probed as potential
role switch parent, leading to spurious matches and "-EPROBE_DEFER" being
returned infinitely.

Till now only Type-B connector node will have a parent node which may
present "usb-role-switch" property and register the role switch device.
For Type-C connector node, its parent node will always be a Type-C chip
device which will never register the role switch device. However, it may
still present a non-boolean "usb-role-switch = <&usb_controller>" property
for historical compatibility.

So restrict the helper to only operate on Type-B connector when attempting
to get the role switch from parent node.

Fixes: 6fadd72943 ("usb: roles: get usb-role-switch from parent")
Cc: stable <stable@kernel.org>
Signed-off-by: Xu Yang <xu.yang_2@nxp.com>
Tested-by: Arnaud Ferraris <arnaud.ferraris@collabora.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Link: https://patch.msgid.link/20260309074313.2809867-3-xu.yang_2@nxp.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-11 16:22:22 +01:00
Xu Yang
6b275bfaa1 Revert "tcpm: allow looking for role_sw device in the main node"
This reverts commit 1366cd228b.

The fwnode_usb_role_switch_get() returns NULL only if no connection is
found, returns ERR_PTR(-EPROBE_DEFER) if connection is found but deferred
probe is needed, or a valid pointer of usb_role_switch.

When switching from a NULL check to IS_ERR_OR_NULL(), usb_role_switch_get()
returns NULL and overwrites the ERR_PTR(-EPROBE_DEFER) returned by
fwnode_usb_role_switch_get(). This causes the deferred probe indication to
be lost, preventing the USB role switch from ever being retrieved.

Fixes: 1366cd228b ("tcpm: allow looking for role_sw device in the main node")
Cc: stable <stable@kernel.org>
Signed-off-by: Xu Yang <xu.yang_2@nxp.com>
Tested-by: Arnaud Ferraris <arnaud.ferraris@collabora.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Link: https://patch.msgid.link/20260309074313.2809867-2-xu.yang_2@nxp.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-11 16:22:18 +01:00
Kuen-Han Tsai
ec35c19696 usb: gadget: f_ncm: Fix net_device lifecycle with device_move
The network device outlived its parent gadget device during
disconnection, resulting in dangling sysfs links and null pointer
dereference problems.

A prior attempt to solve this by removing SET_NETDEV_DEV entirely [1]
was reverted due to power management ordering concerns and a NO-CARRIER
regression.

A subsequent attempt to defer net_device allocation to bind [2] broke
1:1 mapping between function instance and network device, making it
impossible for configfs to report the resolved interface name. This
results in a regression where the DHCP server fails on pmOS.

Use device_move to reparent the net_device between the gadget device and
/sys/devices/virtual/ across bind/unbind cycles. This preserves the
network interface across USB reconnection, allowing the DHCP server to
retain their binding.

Introduce gether_attach_gadget()/gether_detach_gadget() helpers and use
__free(detach_gadget) macro to undo attachment on bind failure. The
bind_count ensures device_move executes only on the first bind.

[1] https://lore.kernel.org/lkml/f2a4f9847617a0929d62025748384092e5f35cce.camel@crapouillou.net/
[2] https://lore.kernel.org/linux-usb/795ea759-7eaf-4f78-81f4-01ffbf2d7961@ixit.cz/

Fixes: 40d133d7f5 ("usb: gadget: f_ncm: convert to new function interface with backward compatibility")
Cc: stable <stable@kernel.org>
Signed-off-by: Kuen-Han Tsai <khtsai@google.com>
Link: https://patch.msgid.link/20260309-f-ncm-revert-v2-7-ea2afbc7d9b2@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-11 16:21:19 +01:00
Kuen-Han Tsai
3131c1aff7 Revert "usb: gadget: u_ether: add gether_opts for config caching"
This reverts commit e065c6a7e4.

This commit is being reverted as part of a series-wide revert.

By deferring the net_device allocation to the bind() phase, a single
function instance will spawn multiple network devices if it is symlinked
to multiple USB configurations.

This causes regressions for userspace tools (like the postmarketOS DHCP
daemon) that rely on reading the interface name (e.g., "usb0") from
configfs. Currently, configfs returns the template "usb%d", causing the
userspace network setup to fail.

Crucially, because this patch breaks the 1:1 mapping between the
function instance and the network device, this naming issue cannot
simply be patched. Configfs only exposes a single 'ifname' attribute per
instance, making it impossible to accurately report the actual interface
name when multiple underlying network devices can exist for that single
instance.

All configurations tied to the same function instance are meant to share
a single network device. Revert this change to restore the 1:1 mapping
by allocating the network device at the instance level (alloc_inst).

Reported-by: David Heidelberg <david@ixit.cz>
Closes: https://lore.kernel.org/linux-usb/70b558ea-a12e-4170-9b8e-c951131249af@ixit.cz/
Fixes: 56a512a9b4 ("usb: gadget: f_ncm: align net_device lifecycle with bind/unbind")
Cc: stable <stable@kernel.org>
Signed-off-by: Kuen-Han Tsai <khtsai@google.com>
Link: https://patch.msgid.link/20260309-f-ncm-revert-v2-6-ea2afbc7d9b2@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-11 16:21:16 +01:00
Kuen-Han Tsai
3549d0ae5f Revert "usb: gadget: u_ether: use <linux/hex.h> header file"
This reverts commit 7a7930c0f9.

This commit is being reverted as part of a series-wide revert.

By deferring the net_device allocation to the bind() phase, a single
function instance will spawn multiple network devices if it is symlinked
to multiple USB configurations.

This causes regressions for userspace tools (like the postmarketOS DHCP
daemon) that rely on reading the interface name (e.g., "usb0") from
configfs. Currently, configfs returns the template "usb%d", causing the
userspace network setup to fail.

Crucially, because this patch breaks the 1:1 mapping between the
function instance and the network device, this naming issue cannot
simply be patched. Configfs only exposes a single 'ifname' attribute per
instance, making it impossible to accurately report the actual interface
name when multiple underlying network devices can exist for that single
instance.

All configurations tied to the same function instance are meant to share
a single network device. Revert this change to restore the 1:1 mapping
by allocating the network device at the instance level (alloc_inst).

Reported-by: David Heidelberg <david@ixit.cz>
Closes: https://lore.kernel.org/linux-usb/70b558ea-a12e-4170-9b8e-c951131249af@ixit.cz/
Fixes: 56a512a9b4 ("usb: gadget: f_ncm: align net_device lifecycle with bind/unbind")
Cc: stable <stable@kernel.org>
Signed-off-by: Kuen-Han Tsai <khtsai@google.com>
Link: https://patch.msgid.link/20260309-f-ncm-revert-v2-5-ea2afbc7d9b2@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-11 16:21:13 +01:00
Kuen-Han Tsai
46662d3a1a Revert "usb: gadget: u_ether: Add auto-cleanup helper for freeing net_device"
This reverts commit 0c0981126b.

This commit is being reverted as part of a series-wide revert.

By deferring the net_device allocation to the bind() phase, a single
function instance will spawn multiple network devices if it is symlinked
to multiple USB configurations.

This causes regressions for userspace tools (like the postmarketOS DHCP
daemon) that rely on reading the interface name (e.g., "usb0") from
configfs. Currently, configfs returns the template "usb%d", causing the
userspace network setup to fail.

Crucially, because this patch breaks the 1:1 mapping between the
function instance and the network device, this naming issue cannot
simply be patched. Configfs only exposes a single 'ifname' attribute per
instance, making it impossible to accurately report the actual interface
name when multiple underlying network devices can exist for that single
instance.

All configurations tied to the same function instance are meant to share
a single network device. Revert this change to restore the 1:1 mapping
by allocating the network device at the instance level (alloc_inst).

Reported-by: David Heidelberg <david@ixit.cz>
Closes: https://lore.kernel.org/linux-usb/70b558ea-a12e-4170-9b8e-c951131249af@ixit.cz/
Fixes: 56a512a9b4 ("usb: gadget: f_ncm: align net_device lifecycle with bind/unbind")
Cc: stable <stable@kernel.org>
Signed-off-by: Kuen-Han Tsai <khtsai@google.com>
Link: https://patch.msgid.link/20260309-f-ncm-revert-v2-4-ea2afbc7d9b2@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-11 16:21:11 +01:00
Kuen-Han Tsai
37893bc5de Revert "usb: gadget: f_ncm: align net_device lifecycle with bind/unbind"
This reverts commit 56a512a9b4.

This commit is being reverted as part of a series-wide revert.

By deferring the net_device allocation to the bind() phase, a single
function instance will spawn multiple network devices if it is symlinked
to multiple USB configurations.

This causes regressions for userspace tools (like the postmarketOS DHCP
daemon) that rely on reading the interface name (e.g., "usb0") from
configfs. Currently, configfs returns the template "usb%d", causing the
userspace network setup to fail.

Crucially, because this patch breaks the 1:1 mapping between the
function instance and the network device, this naming issue cannot
simply be patched. Configfs only exposes a single 'ifname' attribute per
instance, making it impossible to accurately report the actual interface
name when multiple underlying network devices can exist for that single
instance.

All configurations tied to the same function instance are meant to share
a single network device. Revert this change to restore the 1:1 mapping
by allocating the network device at the instance level (alloc_inst).

Reported-by: David Heidelberg <david@ixit.cz>
Closes: https://lore.kernel.org/linux-usb/70b558ea-a12e-4170-9b8e-c951131249af@ixit.cz/
Fixes: 56a512a9b4 ("usb: gadget: f_ncm: align net_device lifecycle with bind/unbind")
Cc: stable <stable@kernel.org>
Signed-off-by: Kuen-Han Tsai <khtsai@google.com>
Link: https://patch.msgid.link/20260309-f-ncm-revert-v2-3-ea2afbc7d9b2@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-11 16:21:07 +01:00
Kuen-Han Tsai
f2524c0e6f Revert "usb: legacy: ncm: Fix NPE in gncm_bind"
This reverts commit fde0634ad9.

This commit is being reverted as part of a series-wide revert.

By deferring the net_device allocation to the bind() phase, a single
function instance will spawn multiple network devices if it is symlinked
to multiple USB configurations.

This causes regressions for userspace tools (like the postmarketOS DHCP
daemon) that rely on reading the interface name (e.g., "usb0") from
configfs. Currently, configfs returns the template "usb%d", causing the
userspace network setup to fail.

Crucially, because this patch breaks the 1:1 mapping between the
function instance and the network device, this naming issue cannot
simply be patched. Configfs only exposes a single 'ifname' attribute per
instance, making it impossible to accurately report the actual interface
name when multiple underlying network devices can exist for that single
instance.

All configurations tied to the same function instance are meant to share
a single network device. Revert this change to restore the 1:1 mapping
by allocating the network device at the instance level (alloc_inst).

Reported-by: David Heidelberg <david@ixit.cz>
Closes: https://lore.kernel.org/linux-usb/70b558ea-a12e-4170-9b8e-c951131249af@ixit.cz/
Fixes: 56a512a9b4 ("usb: gadget: f_ncm: align net_device lifecycle with bind/unbind")
Cc: stable <stable@kernel.org>
Signed-off-by: Kuen-Han Tsai <khtsai@google.com>
Link: https://patch.msgid.link/20260309-f-ncm-revert-v2-2-ea2afbc7d9b2@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-11 16:21:03 +01:00
Kuen-Han Tsai
11199720fa Revert "usb: gadget: f_ncm: Fix atomic context locking issue"
This reverts commit 0d6c8144ca.

This commit is being reverted as part of a series-wide revert.

By deferring the net_device allocation to the bind() phase, a single
function instance will spawn multiple network devices if it is symlinked
to multiple USB configurations.

This causes regressions for userspace tools (like the postmarketOS DHCP
daemon) that rely on reading the interface name (e.g., "usb0") from
configfs. Currently, configfs returns the template "usb%d", causing the
userspace network setup to fail.

Crucially, because this patch breaks the 1:1 mapping between the
function instance and the network device, this naming issue cannot
simply be patched. Configfs only exposes a single 'ifname' attribute per
instance, making it impossible to accurately report the actual interface
name when multiple underlying network devices can exist for that single
instance.

All configurations tied to the same function instance are meant to share
a single network device. Revert this change to restore the 1:1 mapping
by allocating the network device at the instance level (alloc_inst).

Reported-by: David Heidelberg <david@ixit.cz>
Closes: https://lore.kernel.org/linux-usb/70b558ea-a12e-4170-9b8e-c951131249af@ixit.cz/
Fixes: 56a512a9b4 ("usb: gadget: f_ncm: align net_device lifecycle with bind/unbind")
Cc: stable <stable@kernel.org>
Signed-off-by: Kuen-Han Tsai <khtsai@google.com>
Link: https://patch.msgid.link/20260309-f-ncm-revert-v2-1-ea2afbc7d9b2@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-11 16:20:59 +01:00
RD Babiera
e8557acfa0 usb: typec: altmode/displayport: set displayport signaling rate in configure message
dp_altmode_configure sets the signaling rate to the current
configuration's rate and then shifts the value to the Select
Configuration bitfield. On the initial configuration, dp->data.conf
is 0 to begin with, so the signaling rate field is never set, which
leads to some DisplayPort Alt Mode partners sending NAK to the
Configure message.

Set the signaling rate to the capabilities supported by both the
port and the port partner. If the cable supports DisplayPort Alt Mode,
then include its capabilities as well.

Fixes: a17fae8fc3 ("usb: typec: Add Displayport Alternate Mode 2.1 Support")
Cc: stable <stable@kernel.org>
Signed-off-by: RD Babiera <rdbabiera@google.com>
Acked-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Link: https://patch.msgid.link/20260310204106.3939862-2-rdbabiera@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-11 16:20:57 +01:00
Heikki Krogerus
17ab4d4078 usb: dwc3: pci: add support for the Intel Nova Lake -H
This patch adds the necessary PCI ID for Intel Nova Lake -H
devices.

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Cc: stable <stable@kernel.org>
Acked-by: Thinh Nguyen <Thinh.Nguyen@synopsys.com>
Link: https://patch.msgid.link/20260309130204.208661-1-heikki.krogerus@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-11 16:19:41 +01:00
Christoffer Sandberg
0326ff28d5 usb/core/quirks: Add Huawei ME906S-device to wakeup quirk
Similar to other Huawei LTE modules using this quirk, this version with
another vid/pid suffers from spurious wakeups.

Setting the quirk fixes the issue for this device as well.

Cc: stable <stable@kernel.org>
Signed-off-by: Christoffer Sandberg <cs@tuxedo.de>
Signed-off-by: Werner Sembach <wse@tuxedocomputers.com>
Link: https://patch.msgid.link/20260306172817.2098898-1-wse@tuxedocomputers.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-11 16:19:21 +01:00
Junzhong Pan
56135c0c60 usb: gadget: uvc: fix interval_duration calculation
To correctly convert bInterval as interval_duration:
  interval_duration = 2^(bInterval-1) * frame_interval

Current code uses a wrong left shift operand, computing 2^bInterval
instead of 2^(bInterval-1).

Fixes: 010dc57cb5 ("usb: gadget: uvc: fix interval_duration calculation")
Cc: stable <stable@kernel.org>
Signed-off-by: Junzhong Pan <panjunzhong@linux.spacemit.com>
Reviewed-by: Xu Yang <xu.yang_2@nxp.com>
Link: https://patch.msgid.link/20260306-fix-uvc-interval-v1-1-9a2df6859859@linux.spacemit.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-11 16:19:04 +01:00
Mathias Nyman
ae4ff9dead xhci: Fix NULL pointer dereference when reading portli debugfs files
Michal reported and debgged a NULL pointer dereference bug in the
recently added portli debugfs files

Oops is caused when there are more port registers counted in
xhci->max_ports than ports reported by Supported Protocol capabilities.
This is possible if max_ports is more than maximum port number, or
if there are gaps between ports of different speeds the 'Supported
Protocol' capabilities.

In such cases port->rhub will be NULL so we can't reach xhci behind it.
Add an explicit NULL check for this case, and print portli in hex
without dereferencing port->rhub.

Reported-by: Michal Pecio <michal.pecio@gmail.com>
Closes: https://lore.kernel.org/linux-usb/20260304103856.48b785fd.michal.pecio@gmail.com
Fixes: 384c57ec72 ("usb: xhci: Add debugfs support for xHCI Port Link Info (PORTLI) register.")
Cc: stable@vger.kernel.org
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Link: https://patch.msgid.link/20260304223639.3882398-4-mathias.nyman@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-11 16:18:48 +01:00
Dayu Jiang
d6d5febd12 usb: xhci: Prevent interrupt storm on host controller error (HCE)
The xHCI controller reports a Host Controller Error (HCE) in UAS Storage
Device plug/unplug scenarios on Android devices. HCE is checked in
xhci_irq() function and causes an interrupt storm (since the interrupt
isn’t cleared), leading to severe system-level faults.

When the xHC controller reports HCE in the interrupt handler, the driver
only logs a warning and assumes xHC activity will stop as stated in xHCI
specification. An interrupt storm does however continue on some hosts
even after HCE, and only ceases after manually disabling xHC interrupt
and stopping the controller by calling xhci_halt().

Add xhci_halt() to xhci_irq() function where STS_HCE status is checked,
mirroring the existing error handling pattern used for STS_FATAL errors.

This only fixes the interrupt storm. Proper HCE recovery requires resetting
and re-initializing the xHC.

CC: stable@vger.kernel.org
Signed-off-by: Dayu Jiang <jiangdayu@xiaomi.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Link: https://patch.msgid.link/20260304223639.3882398-3-mathias.nyman@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-11 16:18:48 +01:00
Zilin Guan
c1c8550e70 usb: xhci: Fix memory leak in xhci_disable_slot()
xhci_alloc_command() allocates a command structure and, when the
second argument is true, also allocates a completion structure.
Currently, the error handling path in xhci_disable_slot() only frees
the command structure using kfree(), causing the completion structure
to leak.

Use xhci_free_command() instead of kfree(). xhci_free_command() correctly
frees both the command structure and the associated completion structure.
Since the command structure is allocated with zero-initialization,
command->in_ctx is NULL and will not be erroneously freed by
xhci_free_command().

This bug was found using an experimental static analysis tool we are
developing. The tool is based on the LLVM framework and is specifically
designed to detect memory management issues. It is currently under
active development and not yet publicly available, but we plan to
open-source it after our research is published.

The bug was originally detected on v6.13-rc1 using our static analysis
tool, and we have verified that the issue persists in the latest mainline
kernel.

We performed build testing on x86_64 with allyesconfig using GCC=11.4.0.
Since triggering these error paths in xhci_disable_slot() requires specific
hardware conditions or abnormal state, we were unable to construct a test
case to reliably trigger these specific error paths at runtime.

Fixes: 7faac1953e ("xhci: avoid race between disable slot command and host runtime suspend")
CC: stable@vger.kernel.org
Signed-off-by: Zilin Guan <zilin@seu.edu.cn>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Link: https://patch.msgid.link/20260304223639.3882398-2-mathias.nyman@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-11 16:18:48 +01:00
Oliver Neukum
8df672bfe3 usb: class: cdc-wdm: fix reordering issue in read code path
Quoting the bug report:

Due to compiler optimization or CPU out-of-order execution, the
desc->length update can be reordered before the memmove. If this
happens, wdm_read() can see the new length and call copy_to_user() on
uninitialized memory. This also violates LKMM data race rules [1].

Fix it by using WRITE_ONCE and memory barriers.

Fixes: afba937e54 ("USB: CDC WDM driver")
Cc: stable <stable@kernel.org>
Signed-off-by: Oliver Neukum <oneukum@suse.com>
Closes: https://lore.kernel.org/linux-usb/CALbr=LbrUZn_cfp7CfR-7Z5wDTHF96qeuM=3fO2m-q4cDrnC4A@mail.gmail.com/
Reported-by: Gui-Dong Han <hanguidong02@gmail.com>
Reviewed-by: Gui-Dong Han <hanguidong02@gmail.com>
Link: https://patch.msgid.link/20260304130116.1721682-1-oneukum@suse.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-11 16:18:31 +01:00
Fan Wu
3cbc242b88 usb: renesas_usbhs: fix use-after-free in ISR during device removal
In usbhs_remove(), the driver frees resources (including the pipe array)
while the interrupt handler (usbhs_interrupt) is still registered. If an
interrupt fires after usbhs_pipe_remove() but before the driver is fully
unbound, the ISR may access freed memory, causing a use-after-free.

Fix this by calling devm_free_irq() before freeing resources. This ensures
the interrupt handler is both disabled and synchronized (waits for any
running ISR to complete) before usbhs_pipe_remove() is called.

Fixes: f1407d5c66 ("usb: renesas_usbhs: Add Renesas USBHS common code")
Cc: stable <stable@kernel.org>
Suggested-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Fan Wu <fanwu01@zju.edu.cn>
Link: https://patch.msgid.link/20260303073344.34577-1-fanwu01@zju.edu.cn
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-11 16:18:16 +01:00
Marc Zyngier
14ae24cba2 usb: cdc-acm: Restore CAP_BRK functionnality to CH343
The CH343 USB/serial adapter is as buggy as it is popular (very).
One of its quirks is that despite being capable of signalling a
BREAK condition, it doesn't advertise it.

This used to work nonetheless until 66aad7d8d3 ("usb: cdc-acm:
return correct error code on unsupported break") applied some
reasonable restrictions, preventing breaks from being emitted on
devices that do not advertise CAP_BRK.

Add a quirk for this particular device, so that breaks can still
be produced on some of my machines attached to my console server.

Fixes: 66aad7d8d3 ("usb: cdc-acm: return correct error code on unsupported break")
Signed-off-by: Marc Zyngier <maz@kernel.org>
Cc: stable <stable@kernel.org>
Cc: Oliver Neukum <oneukum@suse.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Oliver Neukum <oneukum@suse.com>
Link: https://patch.msgid.link/20260301124440.1192752-1-maz@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-11 16:17:55 +01:00
Seungjin Bae
8479891d1f usb: gadget: f_mass_storage: Fix potential integer overflow in check_command_size_in_blocks()
The `check_command_size_in_blocks()` function calculates the data size
in bytes by left shifting `common->data_size_from_cmnd` by the block
size (`common->curlun->blkbits`). However, it does not validate whether
this shift operation will cause an integer overflow.

Initially, the block size is set up in `fsg_lun_open()` , and the
`common->data_size_from_cmnd` is set up in `do_scsi_command()`. During
initialization, there is no integer overflow check for the interaction
between two variables.

So if a malicious USB host sends a SCSI READ or WRITE command
requesting a large amount of data (`common->data_size_from_cmnd`), the
left shift operation can wrap around. This results in a truncated data
size, which can bypass boundary checks and potentially lead to memory
corruption or out-of-bounds accesses.

Fix this by using the check_shl_overflow() macro to safely perform the
shift and catch any overflows.

Fixes: 144974e7f9 ("usb: gadget: mass_storage: support multi-luns with different logic block size")
Signed-off-by: Seungjin Bae <eeodqql09@gmail.com>
Reviewed-by: Alan Stern <stern@rowland.harvard.edu>
Link: https://patch.msgid.link/20260228104324.1696455-2-eeodqql09@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-11 16:17:44 +01:00
John Keeping
7f58b4148e usb: gadget: f_hid: fix SuperSpeed descriptors
When adding dynamic configuration for bInterval, the value was removed
from the static SuperSpeed endpoint descriptors but was not set from the
configured value in hidg_bind().  Thus at SuperSpeed the interrupt
endpoints have bInterval as zero which is not valid per the USB
specification.

Add the missing setting for SuperSpeed endpoints.

Fixes: ea34925f5b ("usb: gadget: hid: allow dynamic interval configuration via configfs")
Cc: stable <stable@kernel.org>
Signed-off-by: John Keeping <jkeeping@inmusicbrands.com>
Acked-by: Peter Korsgaard <peter@korsgaard.com>
Link: https://patch.msgid.link/20260227111540.431521-1-jkeeping@inmusicbrands.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-11 16:17:29 +01:00
Jie Deng
9f6a983cfa usb: core: new quirk to handle devices with zero configurations
Some USB devices incorrectly report bNumConfigurations as 0 in their
device descriptor, which causes the USB core to reject them during
enumeration.
logs:
usb 1-2: device descriptor read/64, error -71
usb 1-2: no configurations
usb 1-2: can't read configurations, error -22

However, these devices actually work correctly when
treated as having a single configuration.

Add a new quirk USB_QUIRK_FORCE_ONE_CONFIG to handle such devices.
When this quirk is set, assume the device has 1 configuration instead
of failing with -EINVAL.

This quirk is applied to the device with VID:PID 5131:2007 which
exhibits this behavior.

Signed-off-by: Jie Deng <dengjie03@kylinos.cn>
Link: https://patch.msgid.link/20260227084931.1527461-1-dengjie03@kylinos.cn
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-11 16:17:19 +01:00
Greg Kroah-Hartman
45dba8011e usb: misc: uss720: properly clean up reference in uss720_probe()
If get_1284_register() fails, the usb device reference count is
incorrect and needs to be properly dropped before returning.  That will
happen when the kref is dropped in the call to destroy_priv(), so jump
to that error path instead of returning directly.

Cc: stable <stable@kernel.org>
Assisted-by: gkh_clanker_2000
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://patch.msgid.link/2026022342-smokiness-stove-d792@gregkh
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-11 16:17:12 +01:00
Alan Stern
1015c27a5e USB: core: Limit the length of unkillable synchronous timeouts
The usb_control_msg(), usb_bulk_msg(), and usb_interrupt_msg() APIs in
usbcore allow unlimited timeout durations.  And since they use
uninterruptible waits, this leaves open the possibility of hanging a
task for an indefinitely long time, with no way to kill it short of
unplugging the target device.

To prevent this sort of problem, enforce a maximum limit on the length
of these unkillable timeouts.  The limit chosen here, somewhat
arbitrarily, is 60 seconds.  On many systems (although not all) this
is short enough to avoid triggering the kernel's hung-task detector.

In addition, clear up the ambiguity of negative timeout values by
treating them the same as 0, i.e., using the maximum allowed timeout.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Link: https://lore.kernel.org/linux-usb/3acfe838-6334-4f6d-be7c-4bb01704b33d@rowland.harvard.edu/
Fixes: 1da177e4c3 ("Linux-2.6.12-rc2")
CC: stable@vger.kernel.org
Link: https://patch.msgid.link/15fc9773-a007-47b0-a703-df89a8cf83dd@rowland.harvard.edu
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-11 16:16:56 +01:00
Alan Stern
7784caa413 USB: usbtmc: Use usb_bulk_msg_killable() with user-specified timeouts
The usbtmc driver accepts timeout values specified by the user in an
ioctl command, and uses these timeouts for some usb_bulk_msg() calls.
Since the user can specify arbitrarily long timeouts and
usb_bulk_msg() uses unkillable waits, call usb_bulk_msg_killable()
instead to avoid the possibility of the user hanging a kernel thread
indefinitely.

Reported-by: syzbot+25ba18e2c5040447585d@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/linux-usb/8e1c7ac5-e076-44b0-84b8-1b34b20f0ae1@suse.com/T/#t
Tested-by: syzbot+25ba18e2c5040447585d@syzkaller.appspotmail.com
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Fixes: 048c6d88a0 ("usb: usbtmc: Add ioctls to set/get usb timeout")
CC: stable@vger.kernel.org
Link: https://patch.msgid.link/81c6fc24-0607-40f1-8c20-5270dab2fad5@rowland.harvard.edu
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-11 16:16:56 +01:00
Alan Stern
416909962e USB: usbcore: Introduce usb_bulk_msg_killable()
The synchronous message API in usbcore (usb_control_msg(),
usb_bulk_msg(), and so on) uses uninterruptible waits.  However,
drivers may call these routines in the context of a user thread, which
means it ought to be possible to at least kill them.

For this reason, introduce a new usb_bulk_msg_killable() function
which behaves the same as usb_bulk_msg() except for using
wait_for_completion_killable_timeout() instead of
wait_for_completion_timeout().  The same can be done later for
usb_control_msg() later on, if it turns out to be needed.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Suggested-by: Oliver Neukum <oneukum@suse.com>
Link: https://lore.kernel.org/linux-usb/3acfe838-6334-4f6d-be7c-4bb01704b33d@rowland.harvard.edu/
Fixes: 1da177e4c3 ("Linux-2.6.12-rc2")
CC: stable@vger.kernel.org
Link: https://patch.msgid.link/248628b4-cc83-4e81-a620-3ce4e0376d41@rowland.harvard.edu
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-11 16:16:56 +01:00
Gabor Juhos
e293015ba7 usb: core: don't power off roothub PHYs if phy_set_mode() fails
Remove the error path from the usb_phy_roothub_set_mode() function.
The code is clearly wrong, because phy_set_mode() calls can't be
balanced with phy_power_off() calls.

Additionally, the usb_phy_roothub_set_mode() function is called only
from usb_add_hcd() before it powers on the PHYs, so powering off those
makes no sense anyway.

Presumably, the code is copy-pasted from the phy_power_on() function
without adjusting the error handling.

Cc: stable@vger.kernel.org # v5.1+
Fixes: b97a313483 ("usb: core: comply to PHY framework")
Signed-off-by: Gabor Juhos <j4g8y7@gmail.com>
Reviewed-by: Miquel Raynal <miquel.raynal@bootlin.com>
Link: https://patch.msgid.link/20260218-usb-phy-poweroff-fix-v1-1-66e6831e860e@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-03-11 16:16:41 +01:00
Hans de Goede
227312b4a6 HID: input: Add HID_BATTERY_QUIRK_DYNAMIC for Elan touchscreens
Elan touchscreens have a HID-battery device for the stylus which is always
there even if there is no stylus.

This is causing upower to report an empty battery for the stylus and some
desktop-environments will show a notification about this, which is quite
annoying.

Because of this the HID-battery is being ignored on all Elan I2c and USB
touchscreens, but this causes there to be no battery reporting for
the stylus at all.

This adds a new HID_BATTERY_QUIRK_DYNAMIC and uses these for the Elan
touchscreens.

This new quirks causes the present value of the battery to start at 0,
which will make userspace ignore it and only sets present to 1 after
receiving a battery input report which only happens when the stylus
gets in range.

Reported-by: ggrundik@gmail.com
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=221118
Signed-off-by: Hans de Goede <johannes.goede@oss.qualcomm.com>
Reviewed-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
2026-03-11 15:26:47 +01:00
Hans de Goede
487b23afaf HID: input: Drop Asus UX550* touchscreen ignore battery quirks
Drop the Asus UX550* touchscreen ignore battery quirks, there is a blanket
HID_BATTERY_QUIRK_IGNORE for all USB_VENDOR_ID_ELAN USB touchscreens now,
so these are just a duplicate of those.

Signed-off-by: Hans de Goede <johannes.goede@oss.qualcomm.com>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
2026-03-11 15:26:47 +01:00
Chen Ni
53f3a900e9 ASoC: amd: acp3x-rt5682-max9836: Add missing error check for clock acquisition
The acp3x_5682_init() function did not check the return value of
clk_get(), which could lead to dereferencing error pointers in
rt5682_clk_enable().

Fix this by:
1. Changing clk_get() to the device-managed devm_clk_get().
2. Adding proper IS_ERR() checks for both clock acquisitions.

Fixes: 6b8e4e7db3 ("ASoC: amd: Add machine driver for Raven based platform")
Signed-off-by: Chen Ni <nichen@iscas.ac.cn>
Link: https://patch.msgid.link/20260310024246.2153827-1-nichen@iscas.ac.cn
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-11 13:21:01 +00:00
Long Li
362c490980 xfs: fix integer overflow in bmap intent sort comparator
xfs_bmap_update_diff_items() sorts bmap intents by inode number using
a subtraction of two xfs_ino_t (uint64_t) values, with the result
truncated to int. This is incorrect when two inode numbers differ by
more than INT_MAX (2^31 - 1), which is entirely possible on large XFS
filesystems.

Fix this by replacing the subtraction with cmp_int().

Cc: <stable@vger.kernel.org> # v4.9
Fixes: 9f3afb57d5 ("xfs: implement deferred bmbt map/unmap operations")
Signed-off-by: Long Li <leo.lilong@huawei.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-03-11 13:21:42 +01:00
Chen-Yu Tsai
743956bb99 spi: dt-bindings: sun6i: Allow Dual SPI and Quad SPI for newer SoCs
Support for Dual SPI and Quad SPI was added to the Linux driver in
commit 0605d9fb41 ("spi: sun6i: add quirk for dual and quad SPI modes
support") and commit 25453d797d ("spi: sun6i: add dual and quad SPI
modes support for R329/D1/R528/T113s").

However the binding was never updated to allow these modes. Allow them
by adding 2 and 4 to the allowed bus widths for the newer variants.

While at it, also add 0 to the allowed bus widths. This signals that
RX or TX is not available, i.e. the MISO or MOSI pin is disconnected.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
Signed-off-by: Chen-Yu Tsai <wens@kernel.org>
Link: https://patch.msgid.link/20260302153559.3199783-2-wens@kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-11 11:34:10 +00:00
Ben Dooks
393815f576 ACPI: OSL: fix __iomem type on return from acpi_os_map_generic_address()
The pointer returned from acpi_os_map_generic_address() is
tagged with __iomem, so make the rv it is returned to also
of void __iomem * type.

Fixes the following sparse warning:

drivers/acpi/osl.c:1686:20: warning: incorrect type in assignment (different address spaces)
drivers/acpi/osl.c:1686:20:    expected void *rv
drivers/acpi/osl.c:1686:20:    got void [noderef] __iomem *

Fixes: 6915564dc5 ("ACPI: OSL: Change the type of acpi_os_map_generic_address() return value")
Signed-off-by: Ben Dooks <ben.dooks@codethink.co.uk>
[ rjw: Subject tweak, added Fixes tag ]
Link: https://patch.msgid.link/20260311105835.463030-1-ben.dooks@codethink.co.uk
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2026-03-11 12:30:45 +01:00
Thomas Gleixner
192d852129 sched/mmcid: Avoid full tasklist walks
Chasing vfork()'ed tasks on a CID ownership mode switch requires a full
task list walk, which is obviously expensive on large systems.

Avoid that by keeping a list of tasks using a mm MMCID entity in mm::mm_cid
and walk this list instead. This removes the proven to be flaky counting
logic and avoids a full task list walk in the case of vfork()'ed tasks.

Fixes: fbd0e71dc3 ("sched/mmcid: Provide CID ownership mode fixup functions")
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20260310202526.183824481@kernel.org
2026-03-11 12:01:07 +01:00
Thomas Gleixner
7574ac6e49 sched/mmcid: Remove pointless preempt guard
This is a leftover from the early versions of this function where it could
be invoked without mm::mm_cid::lock held.

Remove it and add lockdep asserts instead.

Fixes: 653fda7ae7 ("sched/mmcid: Switch over to the new mechanism")
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20260310202526.116363613@kernel.org
2026-03-11 12:01:06 +01:00
Thomas Gleixner
28b5a13950 sched/mmcid: Handle vfork()/CLONE_VM correctly
Matthieu and Jiri reported stalls where a task endlessly loops in
mm_get_cid() when scheduling in.

It turned out that the logic which handles vfork()'ed tasks is broken. It
is invoked when the number of tasks associated to a process is smaller than
the number of MMCID users. It then walks the task list to find the
vfork()'ed task, but accounts all the already processed tasks as well.

If that double processing brings the number of to be handled tasks to 0,
the walk stops and the vfork()'ed task's CID is not fixed up. As a
consequence a subsequent schedule in fails to acquire a (transitional) CID
and the machine stalls.

Cure this by removing the accounting condition and make the fixup always
walk the full task list if it could not find the exact number of users in
the process' thread list.

Fixes: fbd0e71dc3 ("sched/mmcid: Provide CID ownership mode fixup functions")
Closes: https://lore.kernel.org/b24ffcb3-09d5-4e48-9070-0b69bc654281@kernel.org
Reported-by: Matthieu Baerts <matttbe@kernel.org>
Reported-by: Jiri Slaby <jirislaby@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20260310202526.048657665@kernel.org
2026-03-11 12:01:06 +01:00
Thomas Gleixner
b2e48c429e sched/mmcid: Prevent CID stalls due to concurrent forks
A newly forked task is accounted as MMCID user before the task is visible
in the process' thread list and the global task list. This creates the
following problem:

 CPU1			CPU2
 fork()
   sched_mm_cid_fork(tnew1)
     tnew1->mm.mm_cid_users++;
     tnew1->mm_cid.cid = getcid()
-> preemption
			fork()
			  sched_mm_cid_fork(tnew2)
			    tnew2->mm.mm_cid_users++;
                            // Reaches the per CPU threshold
			    mm_cid_fixup_tasks_to_cpus()
			    for_each_other(current, p)
			         ....

As tnew1 is not visible yet, this fails to fix up the already allocated CID
of tnew1. As a consequence a subsequent schedule in might fail to acquire a
(transitional) CID and the machine stalls.

Move the invocation of sched_mm_cid_fork() after the new task becomes
visible in the thread and the task list to prevent this.

This also makes it symmetrical vs. exit() where the task is removed as CID
user before the task is removed from the thread and task lists.

Fixes: fbd0e71dc3 ("sched/mmcid: Provide CID ownership mode fixup functions")
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20260310202525.969061974@kernel.org
2026-03-11 12:01:06 +01:00
Greg Kroah-Hartman
a1621e06cc Merge tag 'stratix10_svc_fix_for_v7.0' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/dinguyen/linux into char-misc-linus
Dinh writes:

firmware: stratix10-svc: add multiple svc clients support
- Add a dedicated thread for each svc client to fix a timeout issue when the svc
  driver is handling multiple clients.

* tag 'stratix10_svc_fix_for_v7.0' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/dinguyen/linux:
  firmware: stratix10-svc: Add Multi SVC clients support
2026-03-11 11:07:50 +01:00
Steven Rostedt
755a648e78 time/jiffies: Mark jiffies_64_to_clock_t() notrace
The trace_clock_jiffies() function that handles the "uptime" clock for
tracing calls jiffies_64_to_clock_t(). This causes the function tracer to
constantly recurse when the tracing clock is set to "uptime". Mark it
notrace to prevent unnecessary recursion when using the "uptime" clock.

Fixes: 58d4e21e50 ("tracing: Fix wraparound problems in "uptime" trace clock")
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Link: https://patch.msgid.link/20260306212403.72270bb2@robin
2026-03-11 10:33:12 +01:00
Raphael Zimmer
b282c43ed1 libceph: Fix potential out-of-bounds access in ceph_handle_auth_reply()
This patch fixes an out-of-bounds access in ceph_handle_auth_reply()
that can be triggered by a message of type CEPH_MSG_AUTH_REPLY. In
ceph_handle_auth_reply(), the value of the payload_len field of such a
message is stored in a variable of type int. A value greater than
INT_MAX leads to an integer overflow and is interpreted as a negative
value. This leads to decrementing the pointer address by this value and
subsequently accessing it because ceph_decode_need() only checks that
the memory access does not exceed the end address of the allocation.

This patch fixes the issue by changing the data type of payload_len to
u32. Additionally, the data type of result_msg_len is changed to u32,
as it is also a variable holding a non-negative length.

Also, an additional layer of sanity checks is introduced, ensuring that
directly after reading it from the message, payload_len and
result_msg_len are not greater than the overall segment length.

BUG: KASAN: slab-out-of-bounds in ceph_handle_auth_reply+0x642/0x7a0 [libceph]
Read of size 4 at addr ffff88811404df14 by task kworker/20:1/262

CPU: 20 UID: 0 PID: 262 Comm: kworker/20:1 Not tainted 6.19.2 #5 PREEMPT(voluntary)
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
Workqueue: ceph-msgr ceph_con_workfn [libceph]
Call Trace:
 <TASK>
 dump_stack_lvl+0x76/0xa0
 print_report+0xd1/0x620
 ? __pfx__raw_spin_lock_irqsave+0x10/0x10
 ? kasan_complete_mode_report_info+0x72/0x210
 kasan_report+0xe7/0x130
 ? ceph_handle_auth_reply+0x642/0x7a0 [libceph]
 ? ceph_handle_auth_reply+0x642/0x7a0 [libceph]
 __asan_report_load_n_noabort+0xf/0x20
 ceph_handle_auth_reply+0x642/0x7a0 [libceph]
 mon_dispatch+0x973/0x23d0 [libceph]
 ? apparmor_socket_recvmsg+0x6b/0xa0
 ? __pfx_mon_dispatch+0x10/0x10 [libceph]
 ? __kasan_check_write+0x14/0x30i
 ? mutex_unlock+0x7f/0xd0
 ? __pfx_mutex_unlock+0x10/0x10
 ? __pfx_do_recvmsg+0x10/0x10 [libceph]
 ceph_con_process_message+0x1f1/0x650 [libceph]
 process_message+0x1e/0x450 [libceph]
 ceph_con_v2_try_read+0x2e48/0x6c80 [libceph]
 ? __pfx_ceph_con_v2_try_read+0x10/0x10 [libceph]
 ? save_fpregs_to_fpstate+0xb0/0x230
 ? raw_spin_rq_unlock+0x17/0xa0
 ? finish_task_switch.isra.0+0x13b/0x760
 ? __switch_to+0x385/0xda0
 ? __kasan_check_write+0x14/0x30
 ? mutex_lock+0x8d/0xe0
 ? __pfx_mutex_lock+0x10/0x10
 ceph_con_workfn+0x248/0x10c0 [libceph]
 process_one_work+0x629/0xf80
 ? __kasan_check_write+0x14/0x30
 worker_thread+0x87f/0x1570
 ? __pfx__raw_spin_lock_irqsave+0x10/0x10
 ? __pfx_try_to_wake_up+0x10/0x10
 ? kasan_print_address_stack_frame+0x1f7/0x280
 ? __pfx_worker_thread+0x10/0x10
 kthread+0x396/0x830
 ? __pfx__raw_spin_lock_irq+0x10/0x10
 ? __pfx_kthread+0x10/0x10
 ? __kasan_check_write+0x14/0x30
 ? recalc_sigpending+0x180/0x210
 ? __pfx_kthread+0x10/0x10
 ret_from_fork+0x3f7/0x610
 ? __pfx_ret_from_fork+0x10/0x10
 ? __switch_to+0x385/0xda0
 ? __pfx_kthread+0x10/0x10
 ret_from_fork_asm+0x1a/0x30
 </TASK>

[ idryomov: replace if statements with ceph_decode_need() for
  payload_len and result_msg_len ]

Cc: stable@vger.kernel.org
Signed-off-by: Raphael Zimmer <raphael.zimmer@tu-ilmenau.de>
Reviewed-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
2026-03-11 10:18:56 +01:00
Raphael Zimmer
770444611f libceph: Use u32 for non-negative values in ceph_monmap_decode()
This patch fixes unnecessary implicit conversions that change signedness
of blob_len and num_mon in ceph_monmap_decode().
Currently blob_len and num_mon are (signed) int variables. They are used
to hold values that are always non-negative and get assigned in
ceph_decode_32_safe(), which is meant to assign u32 values. Both
variables are subsequently used as unsigned values, and the value of
num_mon is further assigned to monmap->num_mon, which is of type u32.
Therefore, both variables should be of type u32. This is especially
relevant for num_mon. If the value read from the incoming message is
very large, it is interpreted as a negative value, and the check for
num_mon > CEPH_MAX_MON does not catch it. This leads to the attempt to
allocate a very large chunk of memory for monmap, which will most likely
fail. In this case, an unnecessary attempt to allocate memory is
performed, and -ENOMEM is returned instead of -EINVAL.

Cc: stable@vger.kernel.org
Signed-off-by: Raphael Zimmer <raphael.zimmer@tu-ilmenau.de>
Reviewed-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
2026-03-11 10:18:47 +01:00
Thomas Zimmermann
49973e2a41 drm/loongson: Mark driver as orphaned
The maintainer's email address has been bouncing for months. Mark the
loongson DRM driver as orphaned.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Acked-by: Maxime Ripard <mripard@kernel.org>
Link: https://patch.msgid.link/20260309081503.67123-1-tzimmermann@suse.de
2026-03-11 10:15:36 +01:00
Lianqin Hu
5182e5ec43 ALSA: usb-audio: Add iface reset and delay quirk for SPACETOUCH USB Audio
Setting up the interface when suspended/resumeing fail on this card.
Adding a reset and delay quirk will eliminate this problem.

usb 1-1: New USB device found, idVendor=0666, idProduct=0880
usb 1-1: New USB device strings: Mfr=1, Product=2, SerialNumber=3
usb 1-1: Product: USB Audio
usb 1-1: Manufacturer: SPACETOUCH
usb 1-1: SerialNumber: 000000000

Signed-off-by: Lianqin Hu <hulianqin@vivo.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Link: https://patch.msgid.link/TYUPR06MB6217ACC80B70BE25D87456B0D247A@TYUPR06MB6217.apcprd06.prod.outlook.com
2026-03-11 09:05:27 +01:00
Daniel Gomez
775af5cbb2 scripts: kconfig: merge_config.sh: fix indentation
Replace spaces with tabs for consistency with the rest of the script.

Fixes: 5fa9b82cbc ("scripts: kconfig: merge_config.sh: refactor from shell/sed/grep to awk")
Signed-off-by: Daniel Gomez <da.gomez@samsung.com>
Link: https://patch.msgid.link/20260310-fixes-merge-config-v1-2-beaeeaded6bd@samsung.com
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
2026-03-11 00:38:06 -07:00
Daniel Gomez
5f47be1b44 scripts: kconfig: merge_config.sh: pass output file as awk variable
The refactoring commit 5fa9b82cbc ("scripts: kconfig:
merge_config.sh: refactor from shell/sed/grep to awk") passes
$TMP_FILE.new as ARGV[3] to awk, using it as both an output destination
and an input file argument. When the base file is empty, nothing is
written to ARGV[3] during processing, so awk fails trying to open it
for reading:

    awk: cmd. line:52: fatal: cannot open file
    `./.tmp.config.grcQin34jb.new' for reading: No such file or directory
    mv: cannot stat './.tmp.config.grcQin34jb.new': No such file or directory

Pass the output path via -v outfile instead and drop the FILENAME ==
ARGV[3] { nextfile }.

Fixes: 5fa9b82cbc ("scripts: kconfig: merge_config.sh: refactor from shell/sed/grep to awk")
Signed-off-by: Daniel Gomez <da.gomez@samsung.com>
Link: https://patch.msgid.link/20260310-fixes-merge-config-v1-1-beaeeaded6bd@samsung.com
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
2026-03-11 00:37:50 -07:00
Linus Torvalds
b29fb8829b Merge tag 'v7.0-rc3-ksmbd-server-fixes' of git://git.samba.org/ksmbd
Pull smb server fixes from Steve French:

 - Fix potential use after free errors

 - Fix refcount leak in smb2 open error path

 - Prevent allowing logging signing or encryption keys

* tag 'v7.0-rc3-ksmbd-server-fixes' of git://git.samba.org/ksmbd:
  ksmbd: Don't log keys in SMB3 signing and encryption key generation
  smb: server: fix use-after-free in smb2_open()
  ksmbd: fix use-after-free in smb_lazy_parent_lease_break_close()
  ksmbd: fix use-after-free by using call_rcu() for oplock_info
  ksmbd: fix use-after-free in proc_show_files due to early rcu_read_unlock
  smb/server: Fix another refcount leak in smb2_open()
2026-03-10 20:30:52 -07:00
Nicolai Buchwitz
908c344d5c net: bcmgenet: fix broken EEE by converting to phylib-managed state
The bcmgenet EEE implementation is broken in several ways.
phy_support_eee() is never called, so the PHY never advertises EEE
and phylib never sets phydev->enable_tx_lpi.  bcmgenet_mac_config()
checks priv->eee.eee_enabled to decide whether to enable the MAC
LPI logic, but that field is never initialised to true, so the MAC
never enters Low Power Idle even when EEE is negotiated - wasting
the power savings EEE is designed to provide.  The only way to get
EEE working at all is a manual 'ethtool --set-eee eth0 eee on' after
every link-up, and even then bcmgenet_get_eee() immediately clobbers
the reported state because phy_ethtool_get_eee() overwrites
eee_enabled and tx_lpi_enabled with the uninitialised PHY eee_cfg
values.  Finally, bcmgenet_mac_config() is only called on link-up,
so EEE is never disabled in hardware on link-down.

Fix all of this by removing the MAC-side EEE state tracking
(priv->eee) and aligning with the pattern used by other non-phylink
MAC drivers such as FEC.

Call phy_support_eee() in bcmgenet_mii_probe() so the PHY advertises
EEE link modes and phylib tracks negotiation state.  Move the EEE
hardware control to bcmgenet_mii_setup(), which is called on every
link event, and drive it directly from phydev->enable_tx_lpi - the
flag phylib sets when EEE is negotiated and the user has not disabled
it.  This enables EEE automatically once the link partner agrees and
disables it cleanly on link-down.

Make bcmgenet_get_eee() and bcmgenet_set_eee() pure passthroughs to
phy_ethtool_get_eee() and phy_ethtool_set_eee(), with the MAC
hardware register read/written for tx_lpi_timer.  Drop struct
ethtool_keee eee from struct bcmgenet_priv.

Fixes: fe0d4fd928 ("net: phy: Keep track of EEE configuration")
Link: https://lore.kernel.org/netdev/d352039f-4cbb-41e6-9aeb-0b4f3941b54c@lunn.ch/
Suggested-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Nicolai Buchwitz <nb@tipi-net.de>
Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
Tested-by: Florian Fainelli <florian.fainelli@broadcom.com>
Link: https://patch.msgid.link/20260310054935.1238594-1-nb@tipi-net.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-10 19:41:13 -07:00
Paul Moses
57885276cc net-shapers: don't free reply skb after genlmsg_reply()
genlmsg_reply() hands the reply skb to netlink, and
netlink_unicast() consumes it on all return paths, whether the
skb is queued successfully or freed on an error path.

net_shaper_nl_get_doit() and net_shaper_nl_cap_get_doit()
currently jump to free_msg after genlmsg_reply() fails and call
nlmsg_free(msg), which can hit the same skb twice.

Return the genlmsg_reply() error directly and keep free_msg
only for pre-reply failures.

Fixes: 4b623f9f0f ("net-shapers: implement NL get operation")
Fixes: 553ea9f1ef ("net: shaper: implement introspection support")
Cc: stable@vger.kernel.org
Signed-off-by: Paul Moses <p@1g4.org>
Link: https://patch.msgid.link/20260309173450.538026-2-p@1g4.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-10 19:29:09 -07:00
Daniel Golle
f441b489cc net: dsa: mxl862xx: don't set user_mii_bus
The PHY addresses in the MII bus are not equal to the port addresses,
so the bus cannot be assigned as user_mii_bus. Falling back on the
user_mii_bus in case a PHY isn't declared in device tree will result in
using the wrong (in this case: off-by-+1) PHY.
Remove the wrong assignment.

Fixes: 23794bec1c ("net: dsa: add basic initial driver for MxL862xx switches")
Suggested-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: Daniel Golle <daniel@makrotopia.org>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Link: https://patch.msgid.link/0f0df310fd8cab57e0e5e3d0831dd057fd05bcd5.1773103271.git.daniel@makrotopia.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-10 19:18:13 -07:00
Fan Wu
2503d08f8a net: ethernet: arc: emac: quiesce interrupts before requesting IRQ
Normal RX/TX interrupts are enabled later, in arc_emac_open(), so probe
should not see interrupt delivery in the usual case. However, hardware may
still present stale or latched interrupt status left by firmware or the
bootloader.

If probe later unwinds after devm_request_irq() has installed the handler,
such a stale interrupt can still reach arc_emac_intr() during teardown and
race with release of the associated net_device.

Avoid that window by putting the device into a known quiescent state before
requesting the IRQ: disable all EMAC interrupt sources and clear any
pending EMAC interrupt status bits. This keeps the change hardware-focused
and minimal, while preventing spurious IRQ delivery from leftover state.

Fixes: e4f2379db6 ("ethernet/arc/arc_emac - Add new driver")
Cc: stable@vger.kernel.org
Signed-off-by: Fan Wu <fanwu01@zju.edu.cn>
Link: https://patch.msgid.link/20260309132409.584966-1-fanwu01@zju.edu.cn
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-10 19:05:12 -07:00
Jakub Kicinski
28b225282d page_pool: store detach_time as ktime_t to avoid false-negatives
While testing other changes in vng I noticed that
nl_netdev.page_pool_check flakes. This never happens in real CI.

Turns out vng may boot and get to that test in less than a second.
page_pool_detached() records the detach time in seconds, so if
vng is fast enough detach time is set to 0. Other code treats
0 as "not detached". detach_time is only used to report the state
to the user, so it's not a huge deal in practice but let's fix it.
Store the raw ktime_t (nanoseconds) instead. A nanosecond value
of 0 is practically impossible.

Acked-by: Jesper Dangaard Brouer <hawk@kernel.org>
Fixes: 69cb4952b6 ("net: page_pool: report when page pool was destroyed")
Link: https://patch.msgid.link/20260310003907.3540019-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-10 19:03:34 -07:00
Kevin Hao
881a0263d5 net: macb: Shuffle the tx ring before enabling tx
Quanyang observed that when using an NFS rootfs on an AMD ZynqMp board,
the rootfs may take an extended time to recover after a suspend.
Upon investigation, it was determined that the issue originates from a
problem in the macb driver.

According to the Zynq UltraScale TRM [1], when transmit is disabled,
the transmit buffer queue pointer resets to point to the address
specified by the transmit buffer queue base address register.

In the current implementation, the code merely resets `queue->tx_head`
and `queue->tx_tail` to '0'. This approach presents several issues:

- Packets already queued in the tx ring are silently lost,
  leading to memory leaks since the associated skbs cannot be released.

- Concurrent write access to `queue->tx_head` and `queue->tx_tail` may
  occur from `macb_tx_poll()` or `macb_start_xmit()` when these values
  are reset to '0'.

- The transmission may become stuck on a packet that has already been sent
  out, with its 'TX_USED' bit set, but has not yet been processed. However,
  due to the manipulation of 'queue->tx_head' and 'queue->tx_tail',
  `macb_tx_poll()` incorrectly assumes there are no packets to handle
  because `queue->tx_head == queue->tx_tail`. This issue is only resolved
  when a new packet is placed at this position. This is the root cause of
  the prolonged recovery time observed for the NFS root filesystem.

To resolve this issue, shuffle the tx ring and tx skb array so that
the first unsent packet is positioned at the start of the tx ring.
Additionally, ensure that updates to `queue->tx_head` and
`queue->tx_tail` are properly protected with the appropriate lock.

[1] https://docs.amd.com/v/u/en-US/ug1085-zynq-ultrascale-trm

Fixes: bf9cf80cab ("net: macb: Fix tx/rx malfunction after phy link down and up")
Reported-by: Quanyang Wang <quanyang.wang@windriver.com>
Signed-off-by: Kevin Hao <haokexin@gmail.com>
Cc: stable@vger.kernel.org
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20260307-zynqmp-v2-1-6ef98a70e1d0@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-10 17:36:44 -07:00
Alan Borzeszkowski
85b731ad4b spi: intel-pci: Add support for Nova Lake mobile SPI flash
Add Intel Nova Lake PCD-H SPI serial flash PCI ID to the list of
supported devices.

Signed-off-by: Alan Borzeszkowski <alan.borzeszkowski@linux.intel.com>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Link: https://patch.msgid.link/20260309153703.74282-1-alan.borzeszkowski@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-11 00:07:54 +00:00
Ignat Korchagin
182b9b3d8d MAINTAINERS: update email address for Ignat Korchagin
Since I'm moving from Cloudflare update my email address in the
MAINTAINERS file and add an entry to .mailmap so nothing gets lost.

Link: https://lkml.kernel.org/r/20260309173445.71393-1-ignat@cloudflare.com
Signed-off-by: Ignat Korchagin <ignat@cloudflare.com>
Acked-by: Lukas Wunner <lukas@wunner.de>
Cc: David Howells <dhowells@redhat.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-03-10 16:01:49 -07:00
Wei Yang
939080834f mm/huge_memory: fix early failure try_to_migrate() when split huge pmd for shared THP
Commit 60fbb14396 ("mm/huge_memory: adjust try_to_migrate_one() and
split_huge_pmd_locked()") return false unconditionally after
split_huge_pmd_locked().  This may fail try_to_migrate() early when
TTU_SPLIT_HUGE_PMD is specified.

The reason is the above commit adjusted try_to_migrate_one() to, when a
PMD-mapped THP entry is found, and TTU_SPLIT_HUGE_PMD is specified (for
example, via unmap_folio()), return false unconditionally.  This breaks
the rmap walk and fail try_to_migrate() early, if this PMD-mapped THP is
mapped in multiple processes.

The user sensible impact of this bug could be:

  * On memory pressure, shrink_folio_list() may split partially mapped
    folio with split_folio_to_list(). Then free unmapped pages without IO.
    If failed, it may not be reclaimed.
  * On memory failure, memory_failure() would call try_to_split_thp_page()
    to split folio contains the bad page. If succeed, the PG_has_hwpoisoned
    bit is only set in the after-split folio contains @split_at. By doing
    so, we limit bad memory. If failed to split, the whole folios is not
    usable.

One way to reproduce:

    Create an anonymous THP range and fork 512 children, so we have a
    THP shared mapped in 513 processes. Then trigger folio split with
    /sys/kernel/debug/split_huge_pages debugfs to split the THP folio to
    order 0.

Without the above commit, we can successfully split to order 0.  With the
above commit, the folio is still a large folio.

And currently there are two core users of TTU_SPLIT_HUGE_PMD:

  * try_to_unmap_one()
  * try_to_migrate_one()

try_to_unmap_one() would restart the rmap walk, so only
try_to_migrate_one() is affected.

We can't simply revert commit 60fbb14396 ("mm/huge_memory: adjust
try_to_migrate_one() and split_huge_pmd_locked()"), since it removed some
duplicated check covered by page_vma_mapped_walk().

This patch fixes this by restart page_vma_mapped_walk() after
split_huge_pmd_locked().  Since we cannot simply return "true" to fix the
problem, as that would affect another case:

    When invoking folio_try_share_anon_rmap_pmd() from
    split_huge_pmd_locked(), the latter can fail and leave a large folio
    mapped through PTEs, in which case we ought to return true from
    try_to_migrate_one(). This might result in unnecessary walking of the
    rmap but is relatively harmless.

Link: https://lkml.kernel.org/r/20260305015006.27343-1-richard.weiyang@gmail.com
Fixes: 60fbb14396 ("mm/huge_memory: adjust try_to_migrate_one() and split_huge_pmd_locked()")
Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Tested-by: Lance Yang <lance.yang@linux.dev>
Reviewed-by: Lance Yang <lance.yang@linux.dev>
Reviewed-by: Gavin Guo <gavinguo@igalia.com>
Acked-by: David Hildenbrand (arm) <david@kernel.org>
Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-03-10 16:01:49 -07:00
Dev Jain
29f40594a2 mm/rmap: fix incorrect pte restoration for lazyfree folios
We batch unmap anonymous lazyfree folios by folio_unmap_pte_batch.  If the
batch has a mix of writable and non-writable bits, we may end up setting
the entire batch writable.  Fix this by respecting writable bit during
batching.

Although on a successful unmap of a lazyfree folio, the soft-dirty bit is
lost, preserve it on pte restoration by respecting the bit during
batching, to make the fix consistent w.r.t both writable bit and
soft-dirty bit.

I was able to write the below reproducer and crash the kernel. 
Explanation of reproducer (set 64K mTHP to always):

Fault in a 64K large folio.  Split the VMA at mid-point with
MADV_DONTFORK.  fork() - parent points to the folio with 8 writable ptes
and 8 non-writable ptes.  Merge the VMAs with MADV_DOFORK so that
folio_unmap_pte_batch() can determine all the 16 ptes as a batch.  Do
MADV_FREE on the range to mark the folio as lazyfree.  Write to the memory
to dirty the pte, eventually rmap will dirty the folio.  Then trigger
reclaim, we will hit the pte restoration path, and the kernel will crash
with the trace given below.

The BUG happens at:

	BUG_ON(atomic_inc_return(&ptc->anon_map_count) > 1 && rw);

The code path is asking for anonymous page to be mapped writable into the
pagetable.  The BUG_ON() firing implies that such a writable page has been
mapped into the pagetables of more than one process, which breaks
anonymous memory/CoW semantics.

[   21.134473] kernel BUG at mm/page_table_check.c:118!
[   21.134497] Internal error: Oops - BUG: 00000000f2000800 [#1]  SMP
[   21.135917] Modules linked in:
[   21.136085] CPU: 1 UID: 0 PID: 1735 Comm: dup-lazyfree Not tainted 7.0.0-rc1-00116-g018018a17770 #1028 PREEMPT
[   21.136858] Hardware name: linux,dummy-virt (DT)
[   21.137019] pstate: 21400005 (nzCv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--)
[   21.137308] pc : page_table_check_set+0x28c/0x2a8
[   21.137607] lr : page_table_check_set+0x134/0x2a8
[   21.137885] sp : ffff80008a3b3340
[   21.138124] x29: ffff80008a3b3340 x28: fffffdffc3d14400 x27: ffffd1a55e03d000
[   21.138623] x26: 0040000000000040 x25: ffffd1a55f7dd000 x24: 0000000000000001
[   21.139045] x23: 0000000000000001 x22: 0000000000000001 x21: ffffd1a55f217f30
[   21.139629] x20: 0000000000134521 x19: 0000000000134519 x18: 005c43e000040000
[   21.140027] x17: 0001400000000000 x16: 0001700000000000 x15: 000000000000ffff
[   21.140578] x14: 000000000000000c x13: 005c006000000000 x12: 0000000000000020
[   21.140828] x11: 0000000000000000 x10: 005c000000000000 x9 : ffffd1a55c079ee0
[   21.141077] x8 : 0000000000000001 x7 : 005c03e000040000 x6 : 000000004000ffff
[   21.141490] x5 : ffff00017fffce00 x4 : 0000000000000001 x3 : 0000000000000002
[   21.141741] x2 : 0000000000134510 x1 : 0000000000000000 x0 : ffff0000c08228c0
[   21.141991] Call trace:
[   21.142093]  page_table_check_set+0x28c/0x2a8 (P)
[   21.142265]  __page_table_check_ptes_set+0x144/0x1e8
[   21.142441]  __set_ptes_anysz.constprop.0+0x160/0x1a8
[   21.142766]  contpte_set_ptes+0xe8/0x140
[   21.142907]  try_to_unmap_one+0x10c4/0x10d0
[   21.143177]  rmap_walk_anon+0x100/0x250
[   21.143315]  try_to_unmap+0xa0/0xc8
[   21.143441]  shrink_folio_list+0x59c/0x18a8
[   21.143759]  shrink_lruvec+0x664/0xbf0
[   21.144043]  shrink_node+0x218/0x878
[   21.144285]  __node_reclaim.constprop.0+0x98/0x338
[   21.144763]  user_proactive_reclaim+0x2a4/0x340
[   21.145056]  reclaim_store+0x3c/0x60
[   21.145216]  dev_attr_store+0x20/0x40
[   21.145585]  sysfs_kf_write+0x84/0xa8
[   21.145835]  kernfs_fop_write_iter+0x130/0x1c8
[   21.145994]  vfs_write+0x2b8/0x368
[   21.146119]  ksys_write+0x70/0x110
[   21.146240]  __arm64_sys_write+0x24/0x38
[   21.146380]  invoke_syscall+0x50/0x120
[   21.146513]  el0_svc_common.constprop.0+0x48/0xf8
[   21.146679]  do_el0_svc+0x28/0x40
[   21.146798]  el0_svc+0x34/0x110
[   21.146926]  el0t_64_sync_handler+0xa0/0xe8
[   21.147074]  el0t_64_sync+0x198/0x1a0
[   21.147225] Code: f9400441 b4fff241 17ffff94 d4210000 (d4210000)
[   21.147440] ---[ end trace 0000000000000000 ]---

#define _GNU_SOURCE
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <string.h>
#include <sys/wait.h>
#include <sched.h>
#include <fcntl.h>

void write_to_reclaim() {
    const char *path = "/sys/devices/system/node/node0/reclaim";
    const char *value = "409600000000";
    int fd = open(path, O_WRONLY);
    if (fd == -1) {
        perror("open");
        exit(EXIT_FAILURE);
    }

    if (write(fd, value, sizeof("409600000000") - 1) == -1) {
        perror("write");
        close(fd);
        exit(EXIT_FAILURE);
    }

    printf("Successfully wrote %s to %s\n", value, path);
    close(fd);
}

int main()
{
	char *ptr = mmap((void *)(1UL << 30), 1UL << 16, PROT_READ | PROT_WRITE,
			 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
	if ((unsigned long)ptr != (1UL << 30)) {
		perror("mmap");
		return 1;
	}

	/* a 64K folio gets faulted in */
	memset(ptr, 0, 1UL << 16);

	/* 32K half will not be shared into child */
	if (madvise(ptr, 1UL << 15, MADV_DONTFORK)) {
		perror("madvise madv dontfork");
		return 1;
	}

	pid_t pid = fork();

	if (pid < 0) {
		perror("fork");
		return 1;
	} else if (pid == 0) {
		sleep(15);
	} else {
		/* merge VMAs. now first half of the 16 ptes are writable, the other half not. */
		if (madvise(ptr, 1UL << 15, MADV_DOFORK)) {
			perror("madvise madv fork");
			return 1;
		}
		if (madvise(ptr, (1UL << 16), MADV_FREE)) {
			perror("madvise madv free");
			return 1;
		}

		/* dirty the large folio */
		(*ptr) += 10;

		write_to_reclaim();
		// sleep(10);
		waitpid(pid, NULL, 0);

	}
}

Link: https://lkml.kernel.org/r/20260303061528.2429162-1-dev.jain@arm.com
Fixes: 354dffd295 ("mm: support batched unmap for lazyfree large folios during reclamation")
Signed-off-by: Dev Jain <dev.jain@arm.com>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Barry Song <baohua@kernel.org>
Reviewed-by: Wei Yang <richard.weiyang@gmail.com>
Tested-by: Lance Yang <lance.yang@linux.dev>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Harry Yoo <harry.yoo@oracle.com>
Cc: Jann Horn <jannh@google.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-03-10 16:01:49 -07:00
Chris Down
fae654083b mm/huge_memory: fix use of NULL folio in move_pages_huge_pmd()
move_pages_huge_pmd() handles UFFDIO_MOVE for both normal THPs and huge
zero pages.  For the huge zero page path, src_folio is explicitly set to
NULL, and is used as a sentinel to skip folio operations like lock and
rmap.

In the huge zero page branch, src_folio is NULL, so folio_mk_pmd(NULL,
pgprot) passes NULL through folio_pfn() and page_to_pfn().  With
SPARSEMEM_VMEMMAP this silently produces a bogus PFN, installing a PMD
pointing to non-existent physical memory.  On other memory models it is a
NULL dereference.

Use page_folio(src_page) to obtain the valid huge zero folio from the
page, which was obtained from pmd_page() and remains valid throughout.

After commit d82d09e482 ("mm/huge_memory: mark PMD mappings of the huge
zero folio special"), moved huge zero PMDs must remain special so
vm_normal_page_pmd() continues to treat them as special mappings.

move_pages_huge_pmd() currently reconstructs the destination PMD in the
huge zero page branch, which drops PMD state such as pmd_special() on
architectures with CONFIG_ARCH_HAS_PTE_SPECIAL.  As a result,
vm_normal_page_pmd() can treat the moved huge zero PMD as a normal page
and corrupt its refcount.

Instead of reconstructing the PMD from the folio, derive the destination
entry from src_pmdval after pmdp_huge_clear_flush(), then handle the PMD
metadata the same way move_huge_pmd() does for moved entries by marking it
soft-dirty and clearing uffd-wp.

Link: https://lkml.kernel.org/r/a1e787dd-b911-474d-8570-f37685357d86@lucifer.local
Fixes: e3981db444 ("mm: add folio_mk_pmd()")
Signed-off-by: Chris Down <chris@chrisdown.name>
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Tested-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-03-10 16:01:49 -07:00
Randy Dunlap
6ffd853b0b build_bug.h: correct function parameters names in kernel-doc
Use the correct function (or macro) names to avoid kernel-doc warnings:

Warning: include/linux/build_bug.h:38 function parameter 'cond' not
 described in 'BUILD_BUG_ON_MSG'
Warning: include/linux/build_bug.h:38 function parameter 'msg' not
 described in 'BUILD_BUG_ON_MSG'
Warning: include/linux/build_bug.h:76 function parameter 'expr' not
 described in 'static_assert'

Link: https://lkml.kernel.org/r/20260302005144.3467019-1-rdunlap@infradead.org
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-03-10 16:01:48 -07:00
Thorsten Blum
36f46b0e36 crash_dump: don't log dm-crypt key bytes in read_key_from_user_keying
When debug logging is enabled, read_key_from_user_keying() logs the first
8 bytes of the key payload and partially exposes the dm-crypt key.  Stop
logging any key bytes.

Link: https://lkml.kernel.org/r/20260227230008.858641-2-thorsten.blum@linux.dev
Fixes: 479e58549b ("crash_dump: store dm crypt keys in kdump reserved memory")
Signed-off-by: Thorsten Blum <thorsten.blum@linux.dev>
Cc: Baoquan He <bhe@redhat.com>
Cc: Coiby Xu <coxu@redhat.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-03-10 16:01:48 -07:00
Ye Bin
88d37abb36 smb/client: only export symbol for 'smb2maperror-test' module
Only export smb2_get_err_map_test smb2_error_map_table_test and
smb2_error_map_num symbol for 'smb2maperror-test' module.

Fixes: 7d0bf050a5 ("smb/client: make SMB2 maperror KUnit tests a separate module")
Signed-off-by: Ye Bin <yebin10@huawei.com>
Reviewed-by: ChenXiaoSong <chenxiaosong@kylinos.cn>
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-03-10 17:22:04 -05:00
Bharath SM
d78840a6a3 smb: client: fix in-place encryption corruption in SMB2_write()
SMB2_write() places write payload in iov[1..n] as part of rq_iov.
smb3_init_transform_rq() pointer-shares rq_iov, so crypt_message()
encrypts iov[1] in-place, replacing the original plaintext with
ciphertext. On a replayable error, the retry sends the same iov[1]
which now contains ciphertext instead of the original data,
resulting in corruption.

The corruption is most likely to be observed when connections are
unstable, as reconnects trigger write retries that re-send the
already-encrypted data.

This affects SFU mknod, MF symlinks, etc. On kernels before
6.10 (prior to the netfs conversion), sync writes also used
this path and were similarly affected. The async write path
wasn't unaffected as it uses rq_iter which gets deep-copied.

Fix by moving the write payload into rq_iter via iov_iter_kvec(),
so smb3_init_transform_rq() deep-copies it before encryption.

Cc: stable@vger.kernel.org #6.3+
Acked-by: Henrique Carvalho <henrique.carvalho@suse.com>
Acked-by: Shyam Prasad N <sprasad@microsoft.com>
Acked-by: Paulo Alcantara (Red Hat) <pc@manguebit.org>
Signed-off-by: Bharath SM <bharathsm@microsoft.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-03-10 17:22:03 -05:00
Arnd Bergmann
fae11330dc smb: client: fix sbflags initialization
The newly introduced variable is initialized in an #ifdef block
but used outside of it, leading to undefined behavior when
CONFIG_CIFS_ALLOW_INSECURE_LEGACY is disabled:

fs/smb/client/dir.c:417:9: error: variable 'sbflags' is uninitialized when used here [-Werror,-Wuninitialized]
  417 |                                 if (sbflags & CIFS_MOUNT_DYNPERM)
      |                                     ^~~~~~~

Move the initialization into the declaration, the same way as the
other similar function do it.

Fixes: 4fc3a433c1 ("smb: client: use atomic_t for mnt_cifs_flags")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Paulo Alcantara (Red Hat) <pc@manguebit.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-03-10 17:22:03 -05:00
Paulo Alcantara
4a7d2729dc smb: client: fix atomic open with O_DIRECT & O_SYNC
When user application requests O_DIRECT|O_SYNC along with O_CREAT on
open(2), CREATE_NO_BUFFER and CREATE_WRITE_THROUGH bits were missed in
CREATE request when performing an atomic open, thus leading to
potentially data integrity issues.

Fix this by setting those missing bits in CREATE request when
O_DIRECT|O_SYNC has been specified in cifs_do_create().

Fixes: 1da177e4c3 ("Linux-2.6.12-rc2")
Signed-off-by: Paulo Alcantara (Red Hat) <pc@manguebit.org>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Henrique Carvalho <henrique.carvalho@suse.com>
Cc: Tom Talpey <tom@talpey.com>
Cc: linux-cifs@vger.kernel.org
Cc: stable@vger.kernel.org
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-03-10 17:21:42 -05:00
Matt Vollrath
e94eaef111 e1000/e1000e: Fix leak in DMA error cleanup
If an error is encountered while mapping TX buffers, the driver should
unmap any buffers already mapped for that skb.

Because count is incremented after a successful mapping, it will always
match the correct number of unmappings needed when dma_error is reached.
Decrementing count before the while loop in dma_error causes an
off-by-one error. If any mapping was successful before an unsuccessful
mapping, exactly one DMA mapping would leak.

In these commits, a faulty while condition caused an infinite loop in
dma_error:
Commit 03b1320dfc ("e1000e: remove use of skb_dma_map from e1000e
driver")
Commit 602c0554d7 ("e1000: remove use of skb_dma_map from e1000 driver")

Commit c1fa347f20 ("e1000/e1000e/igb/igbvf/ixgb/ixgbe: Fix tests of
unsigned in *_tx_map()") fixed the infinite loop, but introduced the
off-by-one error.

This issue may still exist in the igbvf driver, but I did not address it
in this patch.

Fixes: c1fa347f20 ("e1000/e1000e/igb/igbvf/ixgb/ixgbe: Fix tests of unsigned in *_tx_map()")
Assisted-by: Claude:claude-4.6-opus
Signed-off-by: Matt Vollrath <tactii@gmail.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-03-10 13:02:54 -07:00
Alok Tiwari
e809085f49 i40e: fix src IP mask checks and memcpy argument names in cloud filter
Fix following issues in the IPv4 and IPv6 cloud filter handling logic in
both the add and delete paths:

- The source-IP mask check incorrectly compares mask.src_ip[0] against
  tcf.dst_ip[0]. Update it to compare against tcf.src_ip[0]. This likely
  goes unnoticed because the check is in an "else if" path that only
  executes when dst_ip is not set, most cloud filter use cases focus on
  destination-IP matching, and the buggy condition can accidentally
  evaluate true in some cases.

- memcpy() for the IPv4 source address incorrectly uses
  ARRAY_SIZE(tcf.dst_ip) instead of ARRAY_SIZE(tcf.src_ip), although
  both arrays are the same size.

- The IPv4 memcpy operations used ARRAY_SIZE(tcf.dst_ip) and ARRAY_SIZE
  (tcf.src_ip), Update these to use sizeof(cfilter->ip.v4.dst_ip) and
  sizeof(cfilter->ip.v4.src_ip) to ensure correct and explicit copy size.

- In the IPv6 delete path, memcmp() uses sizeof(src_ip6) when comparing
  dst_ip6 fields. Replace this with sizeof(dst_ip6) to make the intent
  explicit, even though both fields are struct in6_addr.

Fixes: e284fc2804 ("i40e: Add and delete cloud filter")
Signed-off-by: Alok Tiwari <alok.a.tiwari@oracle.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-03-10 13:02:45 -07:00
Linus Torvalds
b4f0dd314b Merge tag 'mm-hotfixes-stable-2026-03-09-16-36' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull misc fixes from Andrew Morton:
 "15 hotfixes. 6 are cc:stable. 14 are for MM.

  Singletons, with one doubleton - please see the changelogs for details"

* tag 'mm-hotfixes-stable-2026-03-09-16-36' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm:
  MAINTAINERS, mailmap: update email address for Lorenzo Stoakes
  mm/mmu_notifier: clean up mmu_notifier.h kernel-doc
  uaccess: correct kernel-doc parameter format
  mm/huge_memory: fix a folio_split() race condition with folio_try_get()
  MAINTAINERS: add co-maintainer and reviewer for SLAB ALLOCATOR
  MAINTAINERS: add RELAY entry
  memcg: fix slab accounting in refill_obj_stock() trylock path
  mm/hugetlb.c: use __pa() instead of virt_to_phys() in early bootmem alloc code
  zram: rename writeback_compressed device attr
  tools/testing: fix testing/vma and testing/radix-tree build
  Revert "ptdesc: remove references to folios from __pagetable_ctor() and pagetable_dtor()"
  mm/cma: move put_page_testzero() out of VM_WARN_ON in cma_release()
  mm/damon/core: clear walk_control on inactive context in damos_walk()
  mm: memfd_luo: always dirty all folios
  mm: memfd_luo: always make all folios uptodate
2026-03-10 12:47:56 -07:00
Peter Ujfalusi
30e4b2290c ASoC: codecs: rt1011: Use component to get the dapm context in spk_mode_put
The correct helper to use in rt1011_recv_spk_mode_put() to retrieve the
DAPM context is snd_soc_component_to_dapm(), from kcontrol we will
receive NULL pointer.

Closes: https://github.com/thesofproject/linux/issues/5691
Fixes: 5b35bb517f ("ASoC: codecs: rt1011: convert to snd_soc_dapm_xxx()")
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Link: https://patch.msgid.link/20260310065350.18921-1-peter.ujfalusi@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-10 19:32:56 +00:00
Paul Chaignon
e06e6b8001 selftests/bpf: Fix pkg-config call on static builds
For commit b0dcdcb9ae ("resolve_btfids: Fix linker flags detection"),
I suggested setting HOSTPKG_CONFIG to $PKG_CONFIG when compiling
resolve_btfids, but I forgot the quotes around that variable.

As a result, when running vmtest.sh with static linking, it fails as
follows:

    $ LDLIBS=-static PKG_CONFIG='pkg-config --static' ./vmtest.sh
    [...]
    make: unrecognized option '--static'
    Usage: make [options] [target] ...
    [...]

This worked when I tested it because HOSTPKG_CONFIG didn't have a
default value in the resolve_btfids Makefile, but once it does, the
quotes aren't preserved and it fails on the next make call.

Fixes: b0dcdcb9ae ("resolve_btfids: Fix linker flags detection")
Signed-off-by: Paul Chaignon <paul.chaignon@gmail.com>
Acked-by: Mykyta Yatsenko <yatsenko@meta.com>
Acked-by: Ihor Solodrai <ihor.solodrai@linux.dev>
Link: https://lore.kernel.org/r/abADBwn_ykblpABE@mail.gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-03-10 12:04:00 -07:00
Sachin Kumar
2321a9596d bpf: Fix constant blinding for PROBE_MEM32 stores
BPF_ST | BPF_PROBE_MEM32 immediate stores are not handled by
bpf_jit_blind_insn(), allowing user-controlled 32-bit immediates to
survive unblinded into JIT-compiled native code when bpf_jit_harden >= 1.

The root cause is that convert_ctx_accesses() rewrites BPF_ST|BPF_MEM
to BPF_ST|BPF_PROBE_MEM32 for arena pointer stores during verification,
before bpf_jit_blind_constants() runs during JIT compilation. The
blinding switch only matches BPF_ST|BPF_MEM (mode 0x60), not
BPF_ST|BPF_PROBE_MEM32 (mode 0xa0). The instruction falls through
unblinded.

Add BPF_ST|BPF_PROBE_MEM32 cases to bpf_jit_blind_insn() alongside the
existing BPF_ST|BPF_MEM cases. The blinding transformation is identical:
load the blinded immediate into BPF_REG_AX via mov+xor, then convert
the immediate store to a register store (BPF_STX).

The rewritten STX instruction must preserve the BPF_PROBE_MEM32 mode so
the architecture JIT emits the correct arena addressing (R12-based on
x86-64). Cannot use the BPF_STX_MEM() macro here because it hardcodes
BPF_MEM mode; construct the instruction directly instead.

Fixes: 6082b6c328 ("bpf: Recognize addr_space_cast instruction in the verifier.")
Reviewed-by: Puranjay Mohan <puranjay@kernel.org>
Reviewed-by: Emil Tsalapatis <emil@etsalapatis.com>
Signed-off-by: Sachin Kumar <xcyfun@protonmail.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/r/Y6IT5VvNRchPBLI5D7JZHBzZrU9rb0ycRJPJzJSXGj7kJlX8RJwZFSM2YZjcDxoQKABkxt1T8Os2gi23PYyFuQe6KkZGWVyfz8K5afdy9ak=@protonmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-03-10 11:58:17 -07:00
Alexei Starovoitov
ac72464b10 Merge branch 'bpf-reset-register-id-for-bpf_end-value-tracking'
Yazhou Tang says:

====================
bpf: Reset register ID for BPF_END value tracking

This patchset fixes a register's scalar ID issue for BPF_END operations
reported by Guillaume Laporte. Please see commit log of 1/2 for more details.

Changes v1 => v2:

1. Reset register ID inside scalar_byte_swap() conditionally. (Eduard)

v1: https://lore.kernel.org/bpf/20260303093956.395076-1-tangyazhou@zju.edu.cn/
====================

Link: https://patch.msgid.link/20260304083228.142016-1-tangyazhou@zju.edu.cn
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-03-10 11:46:41 -07:00
Lizhi Hou
6b13cb8f48 accel/amdxdna: Fix runtime suspend deadlock when there is pending job
The runtime suspend callback drains the running job workqueue before
suspending the device. If a job is still executing and calls
pm_runtime_resume_and_get(), it can deadlock with the runtime suspend
path.

Fix this by moving pm_runtime_resume_and_get() from the job execution
routine to the job submission routine, ensuring the device is resumed
before the job is queued and avoiding the deadlock during runtime
suspend.

Fixes: 063db45183 ("accel/amdxdna: Enhance runtime power management")
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Link: https://patch.msgid.link/20260310180058.336348-1-lizhi.hou@amd.com
2026-03-10 11:46:40 -07:00
Yazhou Tang
ea1989746b selftests/bpf: Add test for BPF_END register ID reset
Add a test case to ensure that BPF_END operations correctly break
register's scalar ID ties.

The test creates a scenario where r1 is a copy of r0, r0 undergoes a
byte swap, and then r0 is checked against a constant.

- Without the fix in the verifier, the bounds learned from r0 are
  incorrectly propagated to r1, making the verifier believe r1 is
  bounded and wrongly allowing subsequent pointer arithmetic.

- With the fix, r1 remains an unbounded scalar, and the verifier
  correctly rejects the arithmetic operation between the frame pointer
  and the unbounded register.

Co-developed-by: Tianci Cao <ziye@zju.edu.cn>
Signed-off-by: Tianci Cao <ziye@zju.edu.cn>
Co-developed-by: Shenghao Yuan <shenghaoyuan0928@163.com>
Signed-off-by: Shenghao Yuan <shenghaoyuan0928@163.com>
Signed-off-by: Yazhou Tang <tangyazhou518@outlook.com>
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20260304083228.142016-3-tangyazhou@zju.edu.cn
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-03-10 11:46:31 -07:00
Yazhou Tang
a3125bc018 bpf: Reset register ID for BPF_END value tracking
When a register undergoes a BPF_END (byte swap) operation, its scalar
value is mutated in-place. If this register previously shared a scalar ID
with another register (e.g., after an `r1 = r0` assignment), this tie must
be broken.

Currently, the verifier misses resetting `dst_reg->id` to 0 for BPF_END.
Consequently, if a conditional jump checks the swapped register, the
verifier incorrectly propagates the learned bounds to the linked register,
leading to false confidence in the linked register's value and potentially
allowing out-of-bounds memory accesses.

Fix this by explicitly resetting `dst_reg->id` to 0 in the BPF_END case
to break the scalar tie, similar to how BPF_NEG handles it via
`__mark_reg_known`.

Fixes: 9d21199842 ("bpf: Add bitwise tracking for BPF_END")
Closes: https://lore.kernel.org/bpf/AMBPR06MB108683CFEB1CB8D9E02FC95ECF17EA@AMBPR06MB10868.eurprd06.prod.outlook.com/
Link: https://lore.kernel.org/bpf/4be25f7442a52244d0dd1abb47bc6750e57984c9.camel@gmail.com/
Reported-by: Guillaume Laporte <glapt.pro@outlook.com>
Co-developed-by: Tianci Cao <ziye@zju.edu.cn>
Signed-off-by: Tianci Cao <ziye@zju.edu.cn>
Co-developed-by: Shenghao Yuan <shenghaoyuan0928@163.com>
Signed-off-by: Shenghao Yuan <shenghaoyuan0928@163.com>
Signed-off-by: Yazhou Tang <tangyazhou518@outlook.com>
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20260304083228.142016-2-tangyazhou@zju.edu.cn
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-03-10 11:46:31 -07:00
Jessica Liu
b330fbfd34 irqchip/riscv-aplic: Register syscore operations only once
Since commit 95a8ddde36 ("irqchip/riscv-aplic: Preserve APLIC
states across suspend/resume"), when multiple NUMA nodes exist
and AIA is not configured as "none", aplic_probe() is called
multiple times. This leads to register_syscore(&aplic_syscore)
being invoked repeatedly, causing the following Oops:

 list_add double add: new=ffffffffb91461f0, prev=ffffffffb91461f0, next=ffffffffb915c408.
 [<ffffffffb7b5c8ca>] __list_add_valid_or_report+0x60/0xc0
 [<ffffffffb7cc3236>] register_syscore+0x3e/0x70
 [<ffffffffb7b8d61c>] aplic_probe+0xc6/0x112

Fix this by registering syscore operations only once, using a static
variable aplic_syscore_registered to track registration.

[ tglx: Trim backtrace properly ]

Fixes: 95a8ddde36 ("irqchip/riscv-aplic: Preserve APLIC states across suspend/resume")
Signed-off-by: Jessica Liu <liu.xuemei1@zte.com.cn>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Link: https://patch.msgid.link/20260310141731145xMwLsyvXl9Gw-m6A4VRYj@zte.com.cn
2026-03-10 18:42:34 +01:00
Jessica Liu
620b6ded72 irqchip/riscv-aplic: Do not clear ACPI dependencies on probe failure
aplic_probe() calls acpi_dev_clear_dependencies() unconditionally at the
end, even when the preceding setup (MSI or direct mode) has failed. This is
incorrect because if the device failed to probe, it should not be
considered as active and should not clear dependencies for other devices
waiting on it.

Fix this by returning immediately when the setup fails, skipping the ACPI
dependency cleanup. Also, explicitly return 0 on success instead of relying
on the value of 'rc' to make the success path clear.

Fixes: 5122e380c2 ("irqchip/riscv-aplic: Add ACPI support")
Signed-off-by: Jessica Liu <liu.xuemei1@zte.com.cn>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Link: https://patch.msgid.link/20260310141600411Fu8H8-GXOOgKISU48Tjgx@zte.com.cn
2026-03-10 18:42:33 +01:00
Tim Kovalenko
c7940c8bf2 gpu: nova-core: fix stack overflow in GSP memory allocation
The `Cmdq::new` function was allocating a `PteArray` struct on the stack
and was causing a stack overflow with 8216 bytes.

Modify the `PteArray` to calculate and write the Page Table Entries
directly into the coherent DMA buffer one-by-one. This reduces the stack
usage quite a lot.

Reported-by: Gary Guo <gary@garyguo.net>
Closes: https://rust-for-linux.zulipchat.com/#narrow/channel/509436-Nova/topic/.60Cmdq.3A.3Anew.60.20uses.20excessive.20stack.20size/near/570375549
Link: https://lore.kernel.org/rust-for-linux/CANiq72mAQxbRJZDnik3Qmd4phvFwPA01O2jwaaXRh_T+2=L-qA@mail.gmail.com/
Fixes: f38b4f105c ("gpu: nova-core: Create initial Gsp")
Acked-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Tim Kovalenko <tim.kovalenko@proton.me>
Link: https://patch.msgid.link/20260309-drm-rust-next-v4-4-4ef485b19a4c@proton.me
[ * Use PteArray::entry() in LogBuffer::new(),
  * Add TODO comment to use IoView projections once available,
  * Add PTE_ARRAY_SIZE constant to avoid duplication.

    - Danilo ]
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
2026-03-10 18:29:14 +01:00
Yihan Ding
697f514ad9 landlock: Clean up interrupted thread logic in TSYNC
In landlock_restrict_sibling_threads(), when the calling thread is
interrupted while waiting for sibling threads to prepare, it executes
a recovery path.

Previously, this path included a wait_for_completion() call on
all_prepared to prevent a Use-After-Free of the local shared_ctx.
However, this wait is redundant. Exiting the main do-while loop
already leads to a bottom cleanup section that unconditionally waits
for all_finished. Therefore, replacing the wait with a simple break
is safe, prevents UAF, and correctly unblocks the remaining task_works.

Clean up the error path by breaking the loop and updating the
surrounding comments to accurately reflect the state machine.

Suggested-by: Günther Noack <gnoack3000@gmail.com>
Signed-off-by: Yihan Ding <dingyihan@uniontech.com>
Tested-by: Günther Noack <gnoack3000@gmail.com>
Reviewed-by: Günther Noack <gnoack3000@gmail.com>
Link: https://lore.kernel.org/r/20260306021651.744723-3-dingyihan@uniontech.com
Signed-off-by: Mickaël Salaün <mic@digikod.net>
2026-03-10 18:22:58 +01:00
Yihan Ding
ff88df67db landlock: Serialize TSYNC thread restriction
syzbot found a deadlock in landlock_restrict_sibling_threads().
When multiple threads concurrently call landlock_restrict_self() with
sibling thread restriction enabled, they can deadlock by mutually
queueing task_works on each other and then blocking in kernel space
(waiting for the other to finish).

Fix this by serializing the TSYNC operations within the same process
using the exec_update_lock. This prevents concurrent invocations
from deadlocking.

We use down_write_trylock() and restart the syscall if the lock
cannot be acquired immediately. This ensures that if a thread fails
to get the lock, it will return to userspace, allowing it to process
any pending TSYNC task_works from the lock holder, and then
transparently restart the syscall.

Fixes: 42fc7e6543 ("landlock: Multithreading support for landlock_restrict_self()")
Reported-by: syzbot+7ea2f5e9dfd468201817@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=7ea2f5e9dfd468201817
Suggested-by: Günther Noack <gnoack3000@gmail.com>
Suggested-by: Tingmao Wang <m@maowtm.org>
Tested-by: Justin Suess <utilityemal77@gmail.com>
Signed-off-by: Yihan Ding <dingyihan@uniontech.com>
Tested-by: Günther Noack <gnoack3000@gmail.com>
Reviewed-by: Günther Noack <gnoack3000@gmail.com>
Link: https://lore.kernel.org/r/20260306021651.744723-2-dingyihan@uniontech.com
Signed-off-by: Mickaël Salaün <mic@digikod.net>
2026-03-10 18:22:56 +01:00
Denis Benato
d1afcd7165 HID: asus: add xg mobile 2022 external hardware support
XG mobile station 2022 has a different PID than the 2023 model: add it
that model to hid-asus.

Signed-off-by: Denis Benato <denis.benato@linux.dev>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
2026-03-10 18:02:01 +01:00
Muhammad Amirul Asyraf Mohamad Jamian
22fd7f7fed firmware: stratix10-svc: Add Multi SVC clients support
In the current implementation, SVC client drivers such as socfpga-hwmon,
intel_fcs, stratix10-soc, stratix10-rsu each send an SMC command that
triggers a single thread in the stratix10-svc driver. Upon receiving a
callback, the initiating client driver sends a stratix10-svc-done signal,
terminating the thread without waiting for other pending SMC commands to
complete. This leads to a timeout issue in the firmware SVC mailbox service
when multiple client drivers send SMC commands concurrently.

To resolve this issue, a dedicated thread is now created per channel. The
stratix10-svc driver will support up to the number of channels defined by
SVC_NUM_CHANNEL. Thread synchronization is handled using a mutex to prevent
simultaneous issuance of SMC commands by multiple threads.

SVC_NUM_DATA_IN_FIFO is reduced from 32 to 8, since each channel now has
its own dedicated FIFO and the SDM processes commands one at a time.
8 entries per channel is sufficient while keeping the total aggregate
capacity the same (4 channels x 8 = 32 entries).

Additionally, a thread task is now validated before invoking kthread_stop
when the user aborts, ensuring safe termination.

Timeout values have also been adjusted to accommodate the increased load
from concurrent client driver activity.

Fixes: 7ca5ce8965 ("firmware: add Intel Stratix10 service layer driver")
Cc: stable@vger.kernel.org
Signed-off-by: Ang Tien Sung <tien.sung.ang@altera.com>
Signed-off-by: Fong, Yan Kei <yankei.fong@altera.com>
Signed-off-by: Muhammad Amirul Asyraf Mohamad Jamian <muhammad.amirul.asyraf.mohamad.jamian@altera.com>
Link: https://lore.kernel.org/all/20260305093151.2678-1-muhammad.amirul.asyraf.mohamad.jamian@altera.com
Signed-off-by: Dinh Nguyen <dinguyen@kernel.org>
2026-03-10 11:32:31 -05:00
Petr Oros
fdadbf6e84 iavf: fix incorrect reset handling in callbacks
Three driver callbacks schedule a reset and wait for its completion:
ndo_change_mtu(), ethtool set_ringparam(), and ethtool set_channels().

Waiting for reset in ndo_change_mtu() and set_ringparam() was added by
commit c2ed2403f1 ("iavf: Wait for reset in callbacks which trigger
it") to fix a race condition where adding an interface to bonding
immediately after MTU or ring parameter change failed because the
interface was still in __RESETTING state. The same commit also added
waiting in iavf_set_priv_flags(), which was later removed by commit
53844673d5 ("iavf: kill "legacy-rx" for good").

Waiting in set_channels() was introduced earlier by commit 4e5e6b5d9d
("iavf: Fix return of set the new channel count") to ensure the PF has
enough time to complete the VF reset when changing channel count, and to
return correct error codes to userspace.

Commit ef490bbb22 ("iavf: Add net_shaper_ops support") added
net_shaper_ops to iavf, which required reset_task to use _locked NAPI
variants (napi_enable_locked, napi_disable_locked) that need the netdev
instance lock.

Later, commit 7e4d784f58 ("net: hold netdev instance lock during
rtnetlink operations") and commit 2bcf4772e4 ("net: ethtool: try to
protect all callback with netdev instance lock") started holding the
netdev instance lock during ndo and ethtool callbacks for drivers with
net_shaper_ops.

Finally, commit 120f28a6f3 ("iavf: get rid of the crit lock")
replaced the driver's crit_lock with netdev_lock in reset_task, causing
incorrect behavior: the callback holds netdev_lock and waits for
reset_task, but reset_task needs the same lock:

  Thread 1 (callback)               Thread 2 (reset_task)
  -------------------               ---------------------
  netdev_lock()                     [blocked on workqueue]
  ndo_change_mtu() or ethtool op
    iavf_schedule_reset()
    iavf_wait_for_reset()           iavf_reset_task()
      waiting...                      netdev_lock() <- blocked

This does not strictly deadlock because iavf_wait_for_reset() uses
wait_event_interruptible_timeout() with a 5-second timeout. The wait
eventually times out, the callback returns an error to userspace, and
after the lock is released reset_task completes the reset. This leads to
incorrect behavior: userspace sees an error even though the configuration
change silently takes effect after the timeout.

Fix this by extracting the reset logic from iavf_reset_task() into a new
iavf_reset_step() function that expects netdev_lock to be already held.
The three callbacks now call iavf_reset_step() directly instead of
scheduling the work and waiting, performing the reset synchronously in
the caller's context which already holds netdev_lock. This eliminates
both the incorrect error reporting and the need for
iavf_wait_for_reset(), which is removed along with the now-unused
reset_waitqueue.

The workqueue-based iavf_reset_task() becomes a thin wrapper that
acquires netdev_lock and calls iavf_reset_step(), preserving its use
for PF-initiated resets.

The callbacks may block for several seconds while iavf_reset_step()
polls hardware registers, but this is acceptable since netdev_lock is a
per-device mutex and only serializes operations on the same interface.

v3:
- Remove netif_running() guard from iavf_set_channels(). Unlike
  set_ringparam where descriptor counts are picked up by iavf_open()
  directly, num_req_queues is only consumed during
  iavf_reinit_interrupt_scheme() in the reset path. Skipping the reset
  on a down device would silently discard the channel count change.
- Remove dead reset_waitqueue code (struct field, init, and all
  wake_up calls) since iavf_wait_for_reset() was the only consumer.

Fixes: 120f28a6f3 ("iavf: get rid of the crit lock")
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: Petr Oros <poros@redhat.com>
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-03-10 09:08:31 -07:00
Petr Oros
efc54fb13d iavf: fix PTP use-after-free during reset
Commit 7c01dbfc8a ("iavf: periodically cache PHC time") introduced a
worker to cache PHC time, but failed to stop it during reset or disable.

This creates a race condition where `iavf_reset_task()` or
`iavf_disable_vf()` free adapter resources (AQ) while the worker is still
running. If the worker triggers `iavf_queue_ptp_cmd()` during teardown, it
accesses freed memory/locks, leading to a crash.

Fix this by calling `iavf_ptp_release()` before tearing down the adapter.
This ensures `ptp_clock_unregister()` synchronously cancels the worker and
cleans up the chardev before the backing resources are destroyed.

Fixes: 7c01dbfc8a ("iavf: periodically cache PHC time")
Signed-off-by: Petr Oros <poros@redhat.com>
Reviewed-by: Ivan Vecera <ivecera@redhat.com>
Acked-by: Jacob Keller <jacob.e.keller@intel.com>
Reviewed-by: Vadim Fedorenko <vadim.fedorenko@linux.dev>
Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-03-10 09:08:31 -07:00
Nikolay Aleksandrov
bd98c6204d drivers: net: ice: fix devlink parameters get without irdma
If CONFIG_IRDMA isn't enabled but there are ice NICs in the system, the
driver will prevent full devlink dev param show dump because its rdma get
callbacks return ENODEV and stop the dump. For example:
 $ devlink dev param show
 pci/0000:82:00.0:
   name msix_vec_per_pf_max type generic
     values:
       cmode driverinit value 2
   name msix_vec_per_pf_min type generic
     values:
       cmode driverinit value 2
 kernel answers: No such device

Returning EOPNOTSUPP allows the dump to continue so we can see all devices'
devlink parameters.

Fixes: c24a65b6a2 ("iidc/ice/irdma: Update IDC to support multiple consumers")
Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Tested-by: Rinitha S <sx.rinitha@intel.com> (A Contingent worker at Intel)
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-03-10 09:08:31 -07:00
Thorsten Blum
fa655a9ca7 nvme: Annotate struct nvme_dhchap_key with __counted_by
Add the __counted_by() compiler attribute to the flexible array member
'key' to improve access bounds-checking via CONFIG_UBSAN_BOUNDS and
CONFIG_FORTIFY_SOURCE.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Thorsten Blum <thorsten.blum@linux.dev>
Signed-off-by: Keith Busch <kbusch@kernel.org>
2026-03-10 08:20:29 -07:00
Maurizio Lombardi
0375c81eb2 nvme-core: do not pass empty queue_limits to blk_mq_alloc_queue()
In nvme_alloc_admin_tag_set(), an empty queue_limits struct is
currently allocated on the stack and passed by reference to
blk_mq_alloc_queue().

This is redundant because blk_mq_alloc_queue() already handles
a NULL limits pointer by internally substituting it with a default
empty queue_limits struct.
Remove the unnecessary local variable and pass a NULL value.

Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
Signed-off-by: Maurizio Lombardi <mlombard@redhat.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>
2026-03-10 08:20:29 -07:00
Sungwoo Kim
fc71f409b2 nvme-pci: Fix race bug in nvme_poll_irqdisable()
In the following scenario, pdev can be disabled between (1) and (3) by
(2). This sets pdev->msix_enabled = 0. Then, pci_irq_vector() will
return MSI-X IRQ(>15) for (1) whereas return INTx IRQ(<=15) for (2).
This causes IRQ warning because it tries to enable INTx IRQ that has
never been disabled before.

To fix this, save IRQ number into a local variable and ensure
disable_irq() and enable_irq() operate on the same IRQ number.  Even if
pci_free_irq_vectors() frees the IRQ concurrently, disable_irq() and
enable_irq() on a stale IRQ number is still valid and safe, and the
depth accounting reamins balanced.

task 1:
nvme_poll_irqdisable()
  disable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)) ...(1)
  enable_irq(pci_irq_vector(pdev, nvmeq->cq_vector))  ...(3)

task 2:
nvme_reset_work()
  nvme_dev_disable()
    pdev->msix_enable = 0;  ...(2)

crash log:

------------[ cut here ]------------
Unbalanced enable for IRQ 10
WARNING: kernel/irq/manage.c:753 at __enable_irq+0x102/0x190 kernel/irq/manage.c:753, CPU#1: kworker/1:0H/26
Modules linked in:
CPU: 1 UID: 0 PID: 26 Comm: kworker/1:0H Not tainted 6.19.0-dirty #9 PREEMPT(voluntary)
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
Workqueue: kblockd blk_mq_timeout_work
RIP: 0010:__enable_irq+0x107/0x190 kernel/irq/manage.c:753
Code: ff df 48 89 fa 48 c1 ea 03 0f b6 14 02 48 89 f8 83 e0 07 83 c0 03 38 d0 7c 04 84 d2 75 79 48 8d 3d 2e 7a 3f 05 41 8b 74 24 2c <67> 48 0f b9 3a e8 ef b9 21 00 5b 41 5c 5d e9 46 54 66 03 e8 e1 b9
RSP: 0018:ffffc900001bf550 EFLAGS: 00010046
RAX: 0000000000000007 RBX: 0000000000000000 RCX: ffffffffb20c0e90
RDX: 0000000000000000 RSI: 000000000000000a RDI: ffffffffb74b88f0
RBP: ffffc900001bf560 R08: ffff88800197cf00 R09: 0000000000000001
R10: 0000000000000003 R11: 0000000000000003 R12: ffff8880012a6000
R13: 1ffff92000037eae R14: 000000000000000a R15: 0000000000000293
FS:  0000000000000000(0000) GS:ffff8880b49f7000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000555da4a25fa8 CR3: 00000000208e8000 CR4: 00000000000006f0
Call Trace:
 <TASK>
 enable_irq+0x121/0x1e0 kernel/irq/manage.c:797
 nvme_poll_irqdisable+0x162/0x1c0 drivers/nvme/host/pci.c:1494
 nvme_timeout+0x965/0x14b0 drivers/nvme/host/pci.c:1744
 blk_mq_rq_timed_out block/blk-mq.c:1653 [inline]
 blk_mq_handle_expired+0x227/0x2d0 block/blk-mq.c:1721
 bt_iter+0x2fc/0x3a0 block/blk-mq-tag.c:292
 __sbitmap_for_each_set include/linux/sbitmap.h:269 [inline]
 sbitmap_for_each_set include/linux/sbitmap.h:290 [inline]
 bt_for_each block/blk-mq-tag.c:324 [inline]
 blk_mq_queue_tag_busy_iter+0x969/0x1e80 block/blk-mq-tag.c:536
 blk_mq_timeout_work+0x627/0x870 block/blk-mq.c:1763
 process_one_work+0x956/0x1aa0 kernel/workqueue.c:3257
 process_scheduled_works kernel/workqueue.c:3340 [inline]
 worker_thread+0x65c/0xe60 kernel/workqueue.c:3421
 kthread+0x41a/0x930 kernel/kthread.c:463
 ret_from_fork+0x6f8/0x8c0 arch/x86/kernel/process.c:158
 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:246
 </TASK>
irq event stamp: 74478
hardirqs last  enabled at (74477): [<ffffffffb5720a9c>] __raw_spin_unlock_irq include/linux/spinlock_api_smp.h:159 [inline]
hardirqs last  enabled at (74477): [<ffffffffb5720a9c>] _raw_spin_unlock_irq+0x2c/0x60 kernel/locking/spinlock.c:202
hardirqs last disabled at (74478): [<ffffffffb57207b5>] __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:108 [inline]
hardirqs last disabled at (74478): [<ffffffffb57207b5>] _raw_spin_lock_irqsave+0x85/0xa0 kernel/locking/spinlock.c:162
softirqs last  enabled at (74304): [<ffffffffb1e9466c>] __do_softirq kernel/softirq.c:656 [inline]
softirqs last  enabled at (74304): [<ffffffffb1e9466c>] invoke_softirq kernel/softirq.c:496 [inline]
softirqs last  enabled at (74304): [<ffffffffb1e9466c>] __irq_exit_rcu+0xdc/0x120 kernel/softirq.c:723
softirqs last disabled at (74287): [<ffffffffb1e9466c>] __do_softirq kernel/softirq.c:656 [inline]
softirqs last disabled at (74287): [<ffffffffb1e9466c>] invoke_softirq kernel/softirq.c:496 [inline]
softirqs last disabled at (74287): [<ffffffffb1e9466c>] __irq_exit_rcu+0xdc/0x120 kernel/softirq.c:723
---[ end trace 0000000000000000 ]---

Fixes: fa059b856a (nvme-pci: Simplify nvme_poll_irqdisable)
Acked-by: Chao Shi <cshi008@fiu.edu>
Acked-by: Weidong Zhu <weizhu@fiu.edu>
Acked-by: Dave Tian <daveti@purdue.edu>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sungwoo Kim <iam@sung-woo.kim>
Signed-off-by: Keith Busch <kbusch@kernel.org>
2026-03-10 08:20:29 -07:00
Chaitanya Kulkarni
2922e3507f nvmet: move async event work off nvmet-wq
For target nvmet_ctrl_free() flushes ctrl->async_event_work.
If nvmet_ctrl_free() runs on nvmet-wq, the flush re-enters workqueue
completion for the same worker:-

A. Async event work queued on nvmet-wq (prior to disconnect):
  nvmet_execute_async_event()
     queue_work(nvmet_wq, &ctrl->async_event_work)

  nvmet_add_async_event()
     queue_work(nvmet_wq, &ctrl->async_event_work)

B. Full pre-work chain (RDMA CM path):
  nvmet_rdma_cm_handler()
     nvmet_rdma_queue_disconnect()
       __nvmet_rdma_queue_disconnect()
         queue_work(nvmet_wq, &queue->release_work)
           process_one_work()
             lock((wq_completion)nvmet-wq)  <--------- 1st
             nvmet_rdma_release_queue_work()

C. Recursive path (same worker):
  nvmet_rdma_release_queue_work()
     nvmet_rdma_free_queue()
       nvmet_sq_destroy()
         nvmet_ctrl_put()
           nvmet_ctrl_free()
             flush_work(&ctrl->async_event_work)
               __flush_work()
                 touch_wq_lockdep_map()
                 lock((wq_completion)nvmet-wq) <--------- 2nd

Lockdep splat:

  ============================================
  WARNING: possible recursive locking detected
  6.19.0-rc3nvme+ #14 Tainted: G                 N
  --------------------------------------------
  kworker/u192:42/44933 is trying to acquire lock:
  ffff888118a00948 ((wq_completion)nvmet-wq){+.+.}-{0:0}, at: touch_wq_lockdep_map+0x26/0x90

  but task is already holding lock:
  ffff888118a00948 ((wq_completion)nvmet-wq){+.+.}-{0:0}, at: process_one_work+0x53e/0x660

  3 locks held by kworker/u192:42/44933:
   #0: ffff888118a00948 ((wq_completion)nvmet-wq){+.+.}-{0:0}, at: process_one_work+0x53e/0x660
   #1: ffffc9000e6cbe28 ((work_completion)(&queue->release_work)){+.+.}-{0:0}, at: process_one_work+0x1c5/0x660
   #2: ffffffff82d4db60 (rcu_read_lock){....}-{1:3}, at: __flush_work+0x62/0x530

  Workqueue: nvmet-wq nvmet_rdma_release_queue_work [nvmet_rdma]
  Call Trace:
   __flush_work+0x268/0x530
   nvmet_ctrl_free+0x140/0x310 [nvmet]
   nvmet_cq_put+0x74/0x90 [nvmet]
   nvmet_rdma_free_queue+0x23/0xe0 [nvmet_rdma]
   nvmet_rdma_release_queue_work+0x19/0x50 [nvmet_rdma]
   process_one_work+0x206/0x660
   worker_thread+0x184/0x320
   kthread+0x10c/0x240
   ret_from_fork+0x319/0x390

Move async event work to a dedicated nvmet-aen-wq to avoid reentrant
flush on nvmet-wq.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>
2026-03-10 08:20:28 -07:00
Sungwoo Kim
b4e78f1427 nvme-pci: Fix slab-out-of-bounds in nvme_dbbuf_set
dev->online_queues is a count incremented in nvme_init_queue. Thus,
valid indices are 0 through dev->online_queues − 1.

This patch fixes the loop condition to ensure the index stays within the
valid range. Index 0 is excluded because it is the admin queue.

KASAN splat:

==================================================================
BUG: KASAN: slab-out-of-bounds in nvme_dbbuf_free drivers/nvme/host/pci.c:377 [inline]
BUG: KASAN: slab-out-of-bounds in nvme_dbbuf_set+0x39c/0x400 drivers/nvme/host/pci.c:404
Read of size 2 at addr ffff88800592a574 by task kworker/u8:5/74

CPU: 0 UID: 0 PID: 74 Comm: kworker/u8:5 Not tainted 6.19.0-dirty #10 PREEMPT(voluntary)
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
Workqueue: nvme-reset-wq nvme_reset_work
Call Trace:
 <TASK>
 __dump_stack lib/dump_stack.c:94 [inline]
 dump_stack_lvl+0xea/0x150 lib/dump_stack.c:120
 print_address_description mm/kasan/report.c:378 [inline]
 print_report+0xce/0x5d0 mm/kasan/report.c:482
 kasan_report+0xdc/0x110 mm/kasan/report.c:595
 __asan_report_load2_noabort+0x18/0x20 mm/kasan/report_generic.c:379
 nvme_dbbuf_free drivers/nvme/host/pci.c:377 [inline]
 nvme_dbbuf_set+0x39c/0x400 drivers/nvme/host/pci.c:404
 nvme_reset_work+0x36b/0x8c0 drivers/nvme/host/pci.c:3252
 process_one_work+0x956/0x1aa0 kernel/workqueue.c:3257
 process_scheduled_works kernel/workqueue.c:3340 [inline]
 worker_thread+0x65c/0xe60 kernel/workqueue.c:3421
 kthread+0x41a/0x930 kernel/kthread.c:463
 ret_from_fork+0x6f8/0x8c0 arch/x86/kernel/process.c:158
 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:246
 </TASK>

Allocated by task 34 on cpu 1 at 4.241550s:
 kasan_save_stack+0x2c/0x60 mm/kasan/common.c:57
 kasan_save_track+0x1c/0x70 mm/kasan/common.c:78
 kasan_save_alloc_info+0x3c/0x50 mm/kasan/generic.c:570
 poison_kmalloc_redzone mm/kasan/common.c:398 [inline]
 __kasan_kmalloc+0xb5/0xc0 mm/kasan/common.c:415
 kasan_kmalloc include/linux/kasan.h:263 [inline]
 __do_kmalloc_node mm/slub.c:5657 [inline]
 __kmalloc_node_noprof+0x2bf/0x8d0 mm/slub.c:5663
 kmalloc_array_node_noprof include/linux/slab.h:1075 [inline]
 nvme_pci_alloc_dev drivers/nvme/host/pci.c:3479 [inline]
 nvme_probe+0x2f1/0x1820 drivers/nvme/host/pci.c:3534
 local_pci_probe+0xef/0x1c0 drivers/pci/pci-driver.c:324
 pci_call_probe drivers/pci/pci-driver.c:392 [inline]
 __pci_device_probe drivers/pci/pci-driver.c:417 [inline]
 pci_device_probe+0x743/0x920 drivers/pci/pci-driver.c:451
 call_driver_probe drivers/base/dd.c:583 [inline]
 really_probe+0x29b/0xb70 drivers/base/dd.c:661
 __driver_probe_device+0x3b0/0x4a0 drivers/base/dd.c:803
 driver_probe_device+0x56/0x1f0 drivers/base/dd.c:833
 __driver_attach_async_helper+0x155/0x340 drivers/base/dd.c:1159
 async_run_entry_fn+0xa6/0x4b0 kernel/async.c:129
 process_one_work+0x956/0x1aa0 kernel/workqueue.c:3257
 process_scheduled_works kernel/workqueue.c:3340 [inline]
 worker_thread+0x65c/0xe60 kernel/workqueue.c:3421
 kthread+0x41a/0x930 kernel/kthread.c:463
 ret_from_fork+0x6f8/0x8c0 arch/x86/kernel/process.c:158
 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:246

The buggy address belongs to the object at ffff88800592a000
 which belongs to the cache kmalloc-2k of size 2048
The buggy address is located 244 bytes to the right of
 allocated 1152-byte region [ffff88800592a000, ffff88800592a480)

The buggy address belongs to the physical page:
page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x5928
head: order:3 mapcount:0 entire_mapcount:0 nr_pages_mapped:0 pincount:0
anon flags: 0xfffffc0000040(head|node=0|zone=1|lastcpupid=0x1fffff)
page_type: f5(slab)
raw: 000fffffc0000040 ffff888001042000 0000000000000000 dead000000000001
raw: 0000000000000000 0000000000080008 00000000f5000000 0000000000000000
head: 000fffffc0000040 ffff888001042000 0000000000000000 dead000000000001
head: 0000000000000000 0000000000080008 00000000f5000000 0000000000000000
head: 000fffffc0000003 ffffea0000164a01 00000000ffffffff 00000000ffffffff
head: ffffffffffffffff 0000000000000000 00000000ffffffff 0000000000000008
page dumped because: kasan: bad access detected

Memory state around the buggy address:
 ffff88800592a400: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
 ffff88800592a480: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
>ffff88800592a500: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
                                                             ^
 ffff88800592a580: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
 ffff88800592a600: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
==================================================================

Fixes: 0f0d2c876c (nvme: free sq/cq dbbuf pointers when dbbuf set fails)
Acked-by: Chao Shi <cshi008@fiu.edu>
Acked-by: Weidong Zhu <weizhu@fiu.edu>
Acked-by: Dave Tian <daveti@purdue.edu>
Signed-off-by: Sungwoo Kim <iam@sung-woo.kim>
Signed-off-by: Keith Busch <kbusch@kernel.org>
2026-03-10 08:19:53 -07:00
Darrick J. Wong
52a8a1ba88 xfs: fix undersized l_iclog_roundoff values
If the superblock doesn't list a log stripe unit, we set the incore log
roundoff value to 512.  This leads to corrupt logs and unmountable
filesystems in generic/617 on a disk with 4k physical sectors...

XFS (sda1): Mounting V5 Filesystem ff3121ca-26e6-4b77-b742-aaff9a449e1c
XFS (sda1): Torn write (CRC failure) detected at log block 0x318e. Truncating head block from 0x3197.
XFS (sda1): failed to locate log tail
XFS (sda1): log mount/recovery failed: error -74
XFS (sda1): log mount failed
XFS (sda1): Mounting V5 Filesystem ff3121ca-26e6-4b77-b742-aaff9a449e1c
XFS (sda1): Ending clean mount

...on the current xfsprogs for-next which has a broken mkfs.  xfs_info
shows this...

meta-data=/dev/sda1              isize=512    agcount=4, agsize=644992 blks
         =                       sectsz=4096  attr=2, projid32bit=1
         =                       crc=1        finobt=1, sparse=1, rmapbt=1
         =                       reflink=1    bigtime=1 inobtcount=1 nrext64=1
         =                       exchange=1   metadir=1
data     =                       bsize=4096   blocks=2579968, imaxpct=25
         =                       sunit=0      swidth=0 blks
naming   =version 2              bsize=4096   ascii-ci=0, ftype=1, parent=1
log      =internal log           bsize=4096   blocks=16384, version=2
         =                       sectsz=4096  sunit=0 blks, lazy-count=1
realtime =none                   extsz=4096   blocks=0, rtextents=0
         =                       rgcount=0    rgsize=268435456 extents
         =                       zoned=0      start=0 reserved=0

...observe that the log section has sectsz=4096 sunit=0, which means
that the roundoff factor is 512, not 4096 as you'd expect.  We should
fix mkfs not to generate broken filesystems, but anyone can fuzz the
ondisk superblock so we should be more cautious.  I think the inadequate
logic predates commit a6a65fef5e, but that's clearly going to
require a different backport.

Cc: stable@vger.kernel.org # v5.14
Fixes: a6a65fef5e ("xfs: log stripe roundoff is a property of the log")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-03-10 16:19:31 +01:00
Ian Rogers
c7c92f76f9 perf annotate loongarch: Fix off-by-one bug in outside check
A copy-paste of a similar issue fixed by Peter Collingbourne in:
https://lore.kernel.org/linux-perf-users/20260304190613.2507582-1-pcc@google.com/

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Bill Wendling <morbo@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@linaro.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Justin Stitt <justinstitt@google.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Nick Desaulniers <nick.desaulniers+lkml@gmail.com>
Cc: Peter Collingbourne <pcc@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2026-03-10 12:06:57 -03:00
Rafael J. Wysocki
d557640e4c sched: idle: Make skipping governor callbacks more consistent
If the cpuidle governor .select() callback is skipped because there
is only one idle state in the cpuidle driver, the .reflect() callback
should be skipped as well, at least for consistency (if not for
correctness), so do it.

Fixes: e5c9ffc6ae ("cpuidle: Skip governor when only one idle state is available")
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Christian Loehle <christian.loehle@arm.com>
Reviewed-by: Aboorva Devarajan <aboorvad@linux.ibm.com>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Link: https://patch.msgid.link/12857700.O9o76ZdvQC@rafael.j.wysocki
2026-03-10 16:03:02 +01:00
Stefan Haberland
4c527c7e03 s390/dasd: Copy detected format information to secondary device
During online processing for a DASD device an IO operation is started to
determine the format of the device. CDL format contains specifically
sized blocks at the beginning of the disk.

For a PPRC secondary device no real IO operation is possible therefore
this IO request can not be started and this step is skipped for online
processing of secondary devices. This is generally fine since the
secondary is a copy of the primary device.

In case of an additional partition detection that is run after a swap
operation the format information is needed to properly drive partition
detection IO.

Currently the information is not passed leading to IO errors during
partition detection and a wrongly detected partition table which in turn
might lead to data corruption on the disk with the wrong partition table.

Fix by passing the format information from primary to secondary device.

Fixes: 413862caad ("s390/dasd: add copy pair swap capability")
Cc: stable@vger.kernel.org #6.1
Reviewed-by: Jan Hoeppner <hoeppner@linux.ibm.com>
Acked-by: Eduard Shishkin <edward6@linux.ibm.com>
Signed-off-by: Stefan Haberland <sth@linux.ibm.com>
Link: https://patch.msgid.link/20260310142330.4080106-3-sth@linux.ibm.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2026-03-10 08:58:38 -06:00
Stefan Haberland
40e9cd4ae8 s390/dasd: Move quiesce state with pprc swap
Quiesce and resume is a mechanism to suspend operations on DASD devices.
In the context of a controlled copy pair swap operation, the quiesce
operation is usually issued before the actual swap and a resume
afterwards.

During the swap operation, the underlying device is exchanged. Therefore,
the quiesce flag must be moved to the secondary device to ensure a
consistent quiesce state after the swap.

The secondary device itself cannot be suspended separately because there
is no separate block device representation for it.

Fixes: 413862caad ("s390/dasd: add copy pair swap capability")
Cc: stable@vger.kernel.org #6.1
Reviewed-by: Jan Hoeppner <hoeppner@linux.ibm.com>
Signed-off-by: Stefan Haberland <sth@linux.ibm.com>
Link: https://patch.msgid.link/20260310142330.4080106-2-sth@linux.ibm.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2026-03-10 08:58:38 -06:00
Chen Ni
be34705aa5 perf ftrace: Fix hashmap__new() error checking
The hashmap__new() function never returns NULL, it returns error
pointers. Fix the error checking to match.

Additionally, set ftrace->profile_hash to NULL on error, and return the
exact error code from hashmap__new().

Fixes: 0f223813ed ("perf ftrace: Add 'profile' command")
Suggested-by: Ian Rogers <irogers@google.com>
Signed-off-by: Chen Ni <nichen@iscas.ac.cn>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@linaro.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2026-03-10 11:53:27 -03:00
Peng Fan
21b3fb7dc1 regulator: pca9450: Correct probed name for PCA9452
An incorrect device name was logged for PCA9452 because the dev_info()
ternary omitted PCA9452 and fell through to "pca9450bc". Introduce a
type_name and set it per device type so the probed message matches the
actual PMIC. While here, make the PCA9451A case explicit.

No functional changes.

Fixes: 017b76fb8e ("regulator: pca9450: Add PMIC pca9452 support")
Signed-off-by: Peng Fan <peng.fan@nxp.com>
Link: https://patch.msgid.link/20260310-pca9450-irq-v1-2-36adf52c2c55@nxp.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-10 14:52:42 +00:00
Peng Fan
5d0efaf47e regulator: pca9450: Correct interrupt type
Kernel warning on i.MX8MP-EVK when doing module test:
irq: type mismatch, failed to map hwirq-3 for gpio@30200000!

Per PCA945[X] specification: The IRQ_B pin is pulled low when any unmasked
interrupt bit status is changed and it is released high once application
processor read INT1 register.

So the interrupt should be configured as IRQF_TRIGGER_LOW, not
IRQF_TRIGGER_FALLING.

Fixes: 0935ff5f1f ("regulator: pca9450: add pca9450 pmic driver")
Signed-off-by: Peng Fan <peng.fan@nxp.com>
Link: https://patch.msgid.link/20260310-pca9450-irq-v1-1-36adf52c2c55@nxp.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-10 14:52:42 +00:00
Mark Brown
e53c0e99fd spi: cadence-qspi: Fix requesting of APB and AHB clocks on JH7110
The move of the AHB and APB clocks from a JH7110 specific quirk to the
main clock init dropped the specification of the clock names to request
for the AHB and APB clocks, resulting in the clock framework requesting
a clock with a NULL name three times.  On most platforms where the
clocks are physically the same or some are always on this makes no
difference but the reason we had the specific quirk for JH7110 is that
it does actually have separate, controllable clocks.  Update the new
code to request the AHB and APB clocks by name to restore the original
behaviour on JH7110.

Fixes: 324ecc7788 ("spi: cadence-qspi: Kill cqspi_jh7110_clk_init")
Reported-by: Ron Economos <re@w6rz.net>
Closes: https://lore.kernel.org/r/a3ca5e9b-7446-497e-8df2-7ef2b42a02e9@w6rz.net
Tested-by: Ron Economos <re@w6rz.net>
Link: https://patch.msgid.link/20260307-spi-cadence-qspi-fix-jh7110-v1-1-c9f37b8c58b1@kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://patch.msgid.link/20260307-spi-cadence-qspi-fix-jh7110-v1-1-c9f37b8c58b1@kernel.org
2026-03-10 14:32:38 +00:00
Paolo Abeni
73aefba4e2 Merge tag 'linux-can-fixes-for-7.0-20260310' of git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can
Marc Kleine-Budde says:

====================
pull-request: can 2026-03-10

this is a pull request of 2 patches for net/main.

Haibo Chen's patch fixes the maximum allowed bit rate error, which was
broken in v6.19.

Wenyuan Li contributes a patch for the hi311x driver that adds missing
error checking in the caller of the hi3110_power_enable() function,
hi3110_open().

linux-can-fixes-for-7.0-20260310

* tag 'linux-can-fixes-for-7.0-20260310' of git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can:
  can: hi311x: hi3110_open(): add check for hi3110_power_enable() return value
  can: dev: keep the max bitrate error at 5%
====================

Link: https://patch.msgid.link/20260310103547.2299403-1-mkl@pengutronix.de
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-10 15:13:55 +01:00
Rafael J. Wysocki
6ab3532b4c ACPI: video: Switch over to auxiliary bus type
Commit 02c057ddef ("ACPI: video: Convert the driver to a platform one")
switched over the ACPI video bus driver from an ACPI driver to a platform
driver, but that change introduced an unwanted and unexpected side effect.
Namely, on some systems, the ACPI device object of the ACPI video bus
device is an ACPI companion of multiple platform devices and, after
adding video_device_ids[] as an acpi_match_table to the acpi_video_bus
platform driver, all of those devices started to match that driver and
its probe callback is invoked for all of them (it fails, but it leaves
a confusing message in the log).  Moreover, the MODULE_DEVICE_TABLE()
of the ACPI video driver module matches all of the devices sharing the
ACPI companion with the ACPI video bus device.

To address this, make the core ACPI device enumeration code create an
auxiliary device for the ACPI video bus device object instead of a
platform device and switch over the ACPI video bus driver (once more)
to an auxiliary driver.

Auxiliary driver generally is a better match for ACPI video bus than
platform driver, among other things because the ACPI video bus device
does not require any resources to be allocated for it during
enumeration.  It also allows the ACPI video bus driver to stop abusing
device matching based on ACPI device IDs and it allows a special case
to be dropped from acpi_create_platform_device() because that function
need not worry about the ACPI video bus device any more.

Fixes: 02c057ddef ("ACPI: video: Convert the driver to a platform one")
Reported-by: Pratap Nirujogi <pratap.nirujogi@amd.com>
Closes: https://lore.kernel.org/linux-acpi/007e3390-6b2b-457e-83c7-c794c5952018@amd.com/
Tested-by: Pratap Nirujogi <pratap.nirujogi@amd.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
[ rjw: Added AUXILIARY_BUS selection to CONFIG_ACPI to fix build issue ]
[ rjw: Fixed error path in acpi_create_video_bus_device() ]
Link: https://patch.msgid.link/5986516.DvuYhMxLoT@rafael.j.wysocki
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2026-03-10 15:08:21 +01:00
Chen Ni
bf29cb3641 perf annotate: Fix hashmap__new() error checking
The hashmap__new() function never returns NULL, it returns error
pointers. Fix the error checking to match.

Additionally, set src->samples to NULL to prevent any later code from
accidentally using the error pointer.

Fixes: d3e7cad6f3 ("perf annotate: Add a hashmap for symbol histogram")
Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Chen Ni <nichen@iscas.ac.cn>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@linaro.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tianyou Li <tianyou.li@intel.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2026-03-10 10:19:44 -03:00
Yuan Tan
329f0b9b48 netfilter: xt_IDLETIMER: reject rev0 reuse of ALARM timer labels
IDLETIMER revision 0 rules reuse existing timers by label and always call
mod_timer() on timer->timer.

If the label was created first by revision 1 with XT_IDLETIMER_ALARM,
the object uses alarm timer semantics and timer->timer is never initialized.
Reusing that object from revision 0 causes mod_timer() on an uninitialized
timer_list, triggering debugobjects warnings and possible panic when
panic_on_warn=1.

Fix this by rejecting revision 0 rule insertion when an existing timer with
the same label is of ALARM type.

Fixes: 68983a354a ("netfilter: xtables: Add snapshot of hardidletimer target")
Co-developed-by: Yifan Wu <yifanwucs@gmail.com>
Signed-off-by: Yifan Wu <yifanwucs@gmail.com>
Co-developed-by: Juefei Pu <tomapufckgml@gmail.com>
Signed-off-by: Juefei Pu <tomapufckgml@gmail.com>
Signed-off-by: Yuan Tan <tanyuan98@outlook.com>
Signed-off-by: Xin Liu <dstsmallbird@foxmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
2026-03-10 14:10:43 +01:00
Hyunwoo Kim
6dcee8496d netfilter: nfnetlink_cthelper: fix OOB read in nfnl_cthelper_dump_table()
nfnl_cthelper_dump_table() has a 'goto restart' that jumps to a label
inside the for loop body.  When the "last" helper saved in cb->args[1]
is deleted between dump rounds, every entry fails the (cur != last)
check, so cb->args[1] is never cleared.  The for loop finishes with
cb->args[0] == nf_ct_helper_hsize, and the 'goto restart' jumps back
into the loop body bypassing the bounds check, causing an 8-byte
out-of-bounds read on nf_ct_helper_hash[nf_ct_helper_hsize].

The 'goto restart' block was meant to re-traverse the current bucket
when "last" is no longer found, but it was placed after the for loop
instead of inside it.  Move the block into the for loop body so that
the restart only occurs while cb->args[0] is still within bounds.

 BUG: KASAN: slab-out-of-bounds in nfnl_cthelper_dump_table+0x9f/0x1b0
 Read of size 8 at addr ffff888104ca3000 by task poc_cthelper/131
 Call Trace:
  nfnl_cthelper_dump_table+0x9f/0x1b0
  netlink_dump+0x333/0x880
  netlink_recvmsg+0x3e2/0x4b0
  sock_recvmsg+0xde/0xf0
  __sys_recvfrom+0x150/0x200
  __x64_sys_recvfrom+0x76/0x90
  do_syscall_64+0xc3/0x6e0

 Allocated by task 1:
  __kvmalloc_node_noprof+0x21b/0x700
  nf_ct_alloc_hashtable+0x65/0xd0
  nf_conntrack_helper_init+0x21/0x60
  nf_conntrack_init_start+0x18d/0x300
  nf_conntrack_standalone_init+0x12/0xc0

Fixes: 12f7a50533 ("netfilter: add user-space connection tracking helper infrastructure")
Signed-off-by: Hyunwoo Kim <imv4bel@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
2026-03-10 14:10:42 +01:00
Hyunwoo Kim
f1ba83755d netfilter: nfnetlink_queue: fix entry leak in bridge verdict error path
nfqnl_recv_verdict() calls find_dequeue_entry() to remove the queue
entry from the queue data structures, taking ownership of the entry.
For PF_BRIDGE packets, it then calls nfqa_parse_bridge() to parse VLAN
attributes.  If nfqa_parse_bridge() returns an error (e.g. NFQA_VLAN
present but NFQA_VLAN_TCI missing), the function returns immediately
without freeing the dequeued entry or its sk_buff.

This leaks the nf_queue_entry, its associated sk_buff, and all held
references (net_device refcounts, struct net refcount).  Repeated
triggering exhausts kernel memory.

Fix this by dropping the entry via nfqnl_reinject() with NF_DROP verdict
on the error path, consistent with other error handling in this file.

Fixes: 8d45ff22f1 ("netfilter: bridge: nf queue verdict to use NFQA_VLAN and NFQA_L2HDR")
Reviewed-by: David Dull <monderasdor@gmail.com>
Signed-off-by: Hyunwoo Kim <imv4bel@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
2026-03-10 14:10:42 +01:00
David Dull
cfe770220a netfilter: x_tables: guard option walkers against 1-byte tail reads
When the last byte of options is a non-single-byte option kind, walkers
that advance with i += op[i + 1] ? : 1 can read op[i + 1] past the end
of the option area.

Add an explicit i == optlen - 1 check before dereferencing op[i + 1]
in xt_tcpudp and xt_dccp option walkers.

Fixes: 2e4e6a17af ("[NETFILTER] x_tables: Abstraction layer for {ip,ip6,arp}_tables")
Signed-off-by: David Dull <monderasdor@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
2026-03-10 14:10:42 +01:00
Jenny Guanni Qu
d6d8cd2db2 netfilter: nft_set_pipapo: fix stack out-of-bounds read in pipapo_drop()
pipapo_drop() passes rulemap[i + 1].n to pipapo_unmap() as the
to_offset argument on every iteration, including the last one where
i == m->field_count - 1. This reads one element past the end of the
stack-allocated rulemap array (declared as rulemap[NFT_PIPAPO_MAX_FIELDS]
with NFT_PIPAPO_MAX_FIELDS == 16).

Although pipapo_unmap() returns early when is_last is true without
using the to_offset value, the argument is evaluated at the call site
before the function body executes, making this a genuine out-of-bounds
stack read confirmed by KASAN:

  BUG: KASAN: stack-out-of-bounds in pipapo_drop+0x50c/0x57c [nf_tables]
  Read of size 4 at addr ffff8000810e71a4

  This frame has 1 object:
   [32, 160) 'rulemap'

  The buggy address is at offset 164 -- exactly 4 bytes past the end
  of the rulemap array.

Pass 0 instead of rulemap[i + 1].n on the last iteration to avoid
the out-of-bounds read.

Fixes: 3c4287f620 ("nf_tables: Add set type for arbitrary concatenation of ranges")
Signed-off-by: Jenny Guanni Qu <qguanni@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
2026-03-10 14:10:42 +01:00
Florian Westphal
7cb9a23d7a netfilter: nf_tables: always walk all pending catchall elements
During transaction processing we might have more than one catchall element:
1 live catchall element and 1 pending element that is coming as part of the
new batch.

If the map holding the catchall elements is also going away, its
required to toggle all catchall elements and not just the first viable
candidate.

Otherwise, we get:
 WARNING: ./include/net/netfilter/nf_tables.h:1281 at nft_data_release+0xb7/0xe0 [nf_tables], CPU#2: nft/1404
 RIP: 0010:nft_data_release+0xb7/0xe0 [nf_tables]
 [..]
 __nft_set_elem_destroy+0x106/0x380 [nf_tables]
 nf_tables_abort_release+0x348/0x8d0 [nf_tables]
 nf_tables_abort+0xcf2/0x3ac0 [nf_tables]
 nfnetlink_rcv_batch+0x9c9/0x20e0 [..]

Fixes: 628bd3e49c ("netfilter: nf_tables: drop map element references from preparation phase")
Reported-by: Yiming Qian <yimingqian591@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
2026-03-10 14:10:42 +01:00
Phil Sutter
b7cdc5a97d netfilter: nf_tables: Fix for duplicate device in netdev hooks
When handling NETDEV_REGISTER notification, duplicate device
registration must be avoided since the device may have been added by
nft_netdev_hook_alloc() already when creating the hook.

Suggested-by: Florian Westphal <fw@strlen.de>
Reported-by: syzbot+bb9127e278fa198e110c@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=bb9127e278fa198e110c
Fixes: a331b78a55 ("netfilter: nf_tables: Respect NETDEV_REGISTER events")
Tested-by: Helen Koike <koike@igalia.com>
Signed-off-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: Florian Westphal <fw@strlen.de>
2026-03-10 14:10:42 +01:00
James Clark
e3741935a3 perf cs-etm: Sync coresight-pmu.h header with the kernel sources
Update the header to pull in the changes from commit 3285c471d0
("coresight: Remove misleading definitions").

Signed-off-by: James Clark <james.clark@linaro.org>
Requested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: Leo Yan <leo.yan@arm.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.g.garry@oracle.com>
Cc: Leo Yan <leo.yan@linux.dev>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mike Leach <mike.leach@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Suzuki Poulouse <suzuki.poulose@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/557db631-aef8-43b1-9f45-fae75910ccb4@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2026-03-10 09:52:00 -03:00
James Clark
3c3b41e591 perf cs-etm: Finish removal of ETM_OPT_*
These #defines have been removed from the kernel headers in favour of
the string based PMU format attributes. Usages were previously removed
from the recording side of cs-etm in Perf. Finish the removal by
removing usages from the decode side too.

It's a straight replacement of the old #defines with the new register
bit definitions. Except cs_etm__setup_timeless_decoding() which wasn't
looking at the saved metadata and was instead hard coding an access to
'attr.config'. This was vulnerable to the same issue of .config being
moved to .config2 etc that the original removal of ETM_OPT_* tried to
fix. So fix that too.

Signed-off-by: James Clark <james.clark@linaro.org>
Tested-by: Leo Yan <leo.yan@arm.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.g.garry@oracle.com>
Cc: Leo Yan <leo.yan@linux.dev>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mike Leach <mike.leach@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Suzuki Poulouse <suzuki.poulose@arm.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2026-03-10 09:50:44 -03:00
Andrzej Kacprowski
59bdbabcca accel/ivpu: Remove boot params address setting via MMIO register
The NPU 60XX uses the default boot params location specified
in the firmware image header, consistent with earlier generations.
Remove the unnecessary MMIO register write, freeing the AON register
for future use.

Fixes: 44e4c88951 ("accel/ivpu: Implement warm boot flow for NPU6 and unify boot handling")
Signed-off-by: Andrzej Kacprowski <andrzej.kacprowski@linux.intel.com>
Reviewed-by: Karol Wachowski <karol.wachowski@linux.intel.com>
Signed-off-by: Karol Wachowski <karol.wachowski@linux.intel.com>
Link: https://patch.msgid.link/20260305142226.194995-1-andrzej.kacprowski@linux.intel.com
(cherry picked from commit 81e62e7bf8b9309bf0febdf00940818f98bc23d8)
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
2026-03-10 13:34:31 +01:00
Weiming Shi
6f1a9140ec net: add xmit recursion limit to tunnel xmit functions
Tunnel xmit functions (iptunnel_xmit, ip6tunnel_xmit) lack their own
recursion limit. When a bond device in broadcast mode has GRE tap
interfaces as slaves, and those GRE tunnels route back through the
bond, multicast/broadcast traffic triggers infinite recursion between
bond_xmit_broadcast() and ip_tunnel_xmit()/ip6_tnl_xmit(), causing
kernel stack overflow.

The existing XMIT_RECURSION_LIMIT (8) in the no-qdisc path is not
sufficient because tunnel recursion involves route lookups and full IP
output, consuming much more stack per level. Use a lower limit of 4
(IP_TUNNEL_RECURSION_LIMIT) to prevent overflow.

Add recursion detection using dev_xmit_recursion helpers directly in
iptunnel_xmit() and ip6tunnel_xmit() to cover all IPv4/IPv6 tunnel
paths including UDP encapsulated tunnels (VXLAN, Geneve, etc.).

Move dev_xmit_recursion helpers from net/core/dev.h to public header
include/linux/netdevice.h so they can be used by tunnel code.

 BUG: KASAN: stack-out-of-bounds in blake2s.constprop.0+0xe7/0x160
 Write of size 32 at addr ffff88810033fed0 by task kworker/0:1/11
 Workqueue: mld mld_ifc_work
 Call Trace:
  <TASK>
  __build_flow_key.constprop.0 (net/ipv4/route.c:515)
  ip_rt_update_pmtu (net/ipv4/route.c:1073)
  iptunnel_xmit (net/ipv4/ip_tunnel_core.c:84)
  ip_tunnel_xmit (net/ipv4/ip_tunnel.c:847)
  gre_tap_xmit (net/ipv4/ip_gre.c:779)
  dev_hard_start_xmit (net/core/dev.c:3887)
  sch_direct_xmit (net/sched/sch_generic.c:347)
  __dev_queue_xmit (net/core/dev.c:4802)
  bond_dev_queue_xmit (drivers/net/bonding/bond_main.c:312)
  bond_xmit_broadcast (drivers/net/bonding/bond_main.c:5279)
  bond_start_xmit (drivers/net/bonding/bond_main.c:5530)
  dev_hard_start_xmit (net/core/dev.c:3887)
  __dev_queue_xmit (net/core/dev.c:4841)
  ip_finish_output2 (net/ipv4/ip_output.c:237)
  ip_output (net/ipv4/ip_output.c:438)
  iptunnel_xmit (net/ipv4/ip_tunnel_core.c:86)
  gre_tap_xmit (net/ipv4/ip_gre.c:779)
  dev_hard_start_xmit (net/core/dev.c:3887)
  sch_direct_xmit (net/sched/sch_generic.c:347)
  __dev_queue_xmit (net/core/dev.c:4802)
  bond_dev_queue_xmit (drivers/net/bonding/bond_main.c:312)
  bond_xmit_broadcast (drivers/net/bonding/bond_main.c:5279)
  bond_start_xmit (drivers/net/bonding/bond_main.c:5530)
  dev_hard_start_xmit (net/core/dev.c:3887)
  __dev_queue_xmit (net/core/dev.c:4841)
  ip_finish_output2 (net/ipv4/ip_output.c:237)
  ip_output (net/ipv4/ip_output.c:438)
  iptunnel_xmit (net/ipv4/ip_tunnel_core.c:86)
  ip_tunnel_xmit (net/ipv4/ip_tunnel.c:847)
  gre_tap_xmit (net/ipv4/ip_gre.c:779)
  dev_hard_start_xmit (net/core/dev.c:3887)
  sch_direct_xmit (net/sched/sch_generic.c:347)
  __dev_queue_xmit (net/core/dev.c:4802)
  bond_dev_queue_xmit (drivers/net/bonding/bond_main.c:312)
  bond_xmit_broadcast (drivers/net/bonding/bond_main.c:5279)
  bond_start_xmit (drivers/net/bonding/bond_main.c:5530)
  dev_hard_start_xmit (net/core/dev.c:3887)
  __dev_queue_xmit (net/core/dev.c:4841)
  mld_sendpack
  mld_ifc_work
  process_one_work
  worker_thread
  </TASK>

Fixes: 745e20f1b6 ("net: add a recursion limit in xmit path")
Reported-by: Xiang Mei <xmei5@asu.edu>
Signed-off-by: Weiming Shi <bestswngs@gmail.com>
Link: https://patch.msgid.link/20260306160133.3852900-2-bestswngs@gmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-10 13:30:30 +01:00
Dongsheng Yang
a5f930eebd MAINTAINERS: update email address of Dongsheng Yang
My easystack email will be unreachable soon, update my email address to
linux.dev one.

Signed-off-by: Dongsheng Yang <dongsheng.yang@linux.dev>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
2026-03-10 12:16:01 +01:00
Ilya Dryomov
c4c22b846e libceph: reject preamble if control segment is empty
While head_onwire_len() has a branch to handle ctrl_len == 0 case,
prepare_read_control() always sets up a kvec for the CRC meaning that
a non-empty control segment is effectively assumed.  All frames that
clients deal with meet that assumption, so let's make it official and
treat the preamble with an empty control segment as malformed.

Cc: stable@vger.kernel.org
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Alex Markuze <amarkuze@redhat.com>
2026-03-10 12:16:00 +01:00
Ilya Dryomov
a5a3737050 libceph: admit message frames only in CEPH_CON_S_OPEN state
Similar checks are performed for all control frames, but an early check
for message frames was missing.  process_message() is already set up to
terminate the loop in case the state changes while con->ops->dispatch()
handler is being executed.

Cc: stable@vger.kernel.org
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Alex Markuze <amarkuze@redhat.com>
Reviewed-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
2026-03-10 12:15:46 +01:00
Ilya Dryomov
69fb5d91bb libceph: prevent potential out-of-bounds reads in process_message_header()
If the message frame is (maliciously) corrupted in a way that the
length of the control segment ends up being less than the size of the
message header or a different frame is made to look like a message
frame, out-of-bounds reads may ensue in process_message_header().

Perform an explicit bounds check before decoding the message header.

Cc: stable@vger.kernel.org
Reported-by: Raphael Zimmer <raphael.zimmer@tu-ilmenau.de>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Alex Markuze <amarkuze@redhat.com>
Reviewed-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
2026-03-10 12:15:36 +01:00
Paolo Abeni
3228835877 Merge branch 'amd-xgbe-rx-adaptation-and-phy-handling-fixes'
Raju Rangoju says:

====================
amd-xgbe: RX adaptation and PHY handling fixes

This series fixes several issues in the amd-xgbe driver related to RX
adaptation and PHY handling in 10GBASE-KR mode, particularly when
auto-negotiation is disabled.

Patch 1 fixes link status handling during RX adaptation by correctly
reading the latched link status bit so transient link drops are
detected without losing the current state.

Patch 2 prevents CRC errors that can occur when performing RX
adaptation with auto-negotiation turned off. The driver now stops
TX/RX before re-triggering RX adaptation and only re-enables traffic
once adaptation completes and the link is confirmed up, ensuring
packets are not corrupted during the adaptation window.

Patch 3 restores the intended ordering of PHY reset relative to
phy_start(), making sure PHY settings are reset before the PHY is
started instead of afterwards.
====================

Link: https://patch.msgid.link/20260306111629.1515676-1-Raju.Rangoju@amd.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-10 12:07:09 +01:00
Raju Rangoju
a8ba129af4 amd-xgbe: reset PHY settings before starting PHY
commit f93505f357 ("amd-xgbe: let the MAC manage PHY PM") moved
xgbe_phy_reset() from xgbe_open() to xgbe_start(), placing it after
phy_start(). As a result, the PHY settings were being reset after the
PHY had already started.

Reorder the calls so that the PHY settings are reset before
phy_start() is invoked.

Fixes: f93505f357 ("amd-xgbe: let the MAC manage PHY PM")
Reviewed-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Signed-off-by: Raju Rangoju <Raju.Rangoju@amd.com>
Link: https://patch.msgid.link/20260306111629.1515676-4-Raju.Rangoju@amd.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-10 12:07:07 +01:00
Raju Rangoju
27a4dd0c70 amd-xgbe: prevent CRC errors during RX adaptation with AN disabled
When operating in 10GBASE-KR mode with auto-negotiation disabled and RX
adaptation enabled, CRC errors can occur during the RX adaptation
process. This happens because the driver continues transmitting and
receiving packets while adaptation is in progress.

Fix this by stopping TX/RX immediately when the link goes down and RX
adaptation needs to be re-triggered, and only re-enabling TX/RX after
adaptation completes and the link is confirmed up. Introduce a flag to
track whether TX/RX was disabled for adaptation so it can be restored
correctly.

This prevents packets from being transmitted or received during the RX
adaptation window and avoids CRC errors from corrupted frames.

The flag tracking the data path state is synchronized with hardware
state in xgbe_start() to prevent stale state after device restarts.
This ensures that after a restart cycle (where xgbe_stop disables
TX/RX and xgbe_start re-enables them), the flag correctly reflects
that the data path is active.

Fixes: 4f3b20bfbb ("amd-xgbe: add support for rx-adaptation")
Signed-off-by: Raju Rangoju <Raju.Rangoju@amd.com>
Link: https://patch.msgid.link/20260306111629.1515676-3-Raju.Rangoju@amd.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-10 12:07:07 +01:00
Raju Rangoju
6485cb96be amd-xgbe: fix link status handling in xgbe_rx_adaptation
The link status bit is latched low to allow detection of momentary
link drops. If the status indicates that the link is already down,
read it again to obtain the current state.

Fixes: 4f3b20bfbb ("amd-xgbe: add support for rx-adaptation")
Signed-off-by: Raju Rangoju <Raju.Rangoju@amd.com>
Link: https://patch.msgid.link/20260306111629.1515676-2-Raju.Rangoju@amd.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-10 12:07:06 +01:00
Shashank Balaji
8cc7dd77a1 x86/apic: Disable x2apic on resume if the kernel expects so
When resuming from s2ram, firmware may re-enable x2apic mode, which may have
been disabled by the kernel during boot either because it doesn't support IRQ
remapping or for other reasons. This causes the kernel to continue using the
xapic interface, while the hardware is in x2apic mode, which causes hangs.
This happens on defconfig + bare metal + s2ram.

Fix this in lapic_resume() by disabling x2apic if the kernel expects it to be
disabled, i.e. when x2apic_mode = 0.

The ACPI v6.6 spec, Section 16.3 [1] says firmware restores either the
pre-sleep configuration or initial boot configuration for each CPU, including
MSR state:

  When executing from the power-on reset vector as a result of waking from an
  S2 or S3 sleep state, the platform firmware performs only the hardware
  initialization required to restore the system to either the state the
  platform was in prior to the initial operating system boot, or to the
  pre-sleep configuration state. In multiprocessor systems, non-boot
  processors should be placed in the same state as prior to the initial
  operating system boot.

  (further ahead)

  If this is an S2 or S3 wake, then the platform runtime firmware restores
  minimum context of the system before jumping to the waking vector. This
  includes:

	CPU configuration. Platform runtime firmware restores the pre-sleep
	configuration or initial boot configuration of each CPU (MSR, MTRR,
	firmware update, SMBase, and so on). Interrupts must be disabled (for
	IA-32 processors, disabled by CLI instruction).

	(and other things)

So at least as per the spec, re-enablement of x2apic by the firmware is
allowed if "x2apic on" is a part of the initial boot configuration.

  [1] https://uefi.org/specs/ACPI/6.6/16_Waking_and_Sleeping.html#initialization

  [ bp: Massage. ]

Fixes: 6e1cb38a2a ("x64, x2apic/intr-remap: add x2apic support, including enabling interrupt-remapping")
Co-developed-by: Rahul Bukte <rahul.bukte@sony.com>
Signed-off-by: Rahul Bukte <rahul.bukte@sony.com>
Signed-off-by: Shashank Balaji <shashank.mahadasyam@sony.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Thomas Gleixner <tglx@kernel.org>
Reviewed-by: Sohil Mehta <sohil.mehta@intel.com>
Cc: stable@vger.kernel.org
Link: https://patch.msgid.link/20260306-x2apic-fix-v2-1-bee99c12efa3@sony.com
2026-03-10 11:57:56 +01:00
Chengfeng Ye
7d86aa41c0 mctp: route: hold key->lock in mctp_flow_prepare_output()
mctp_flow_prepare_output() checks key->dev and may call
mctp_dev_set_key(), but it does not hold key->lock while doing so.

mctp_dev_set_key() and mctp_dev_release_key() are annotated with
__must_hold(&key->lock), so key->dev access is intended to be
serialized by key->lock. The mctp_sendmsg() transmit path reaches
mctp_flow_prepare_output() via mctp_local_output() -> mctp_dst_output()
without holding key->lock, so the check-and-set sequence is racy.

Example interleaving:

  CPU0                                  CPU1
  ----                                  ----
  mctp_flow_prepare_output(key, devA)
    if (!key->dev)  // sees NULL
                                        mctp_flow_prepare_output(
                                            key, devB)
                                          if (!key->dev)  // still NULL
                                          mctp_dev_set_key(devB, key)
                                            mctp_dev_hold(devB)
                                            key->dev = devB
    mctp_dev_set_key(devA, key)
      mctp_dev_hold(devA)
      key->dev = devA   // overwrites devB

Now both devA and devB references were acquired, but only the final
key->dev value is tracked for release. One reference can be lost,
causing a resource leak as mctp_dev_release_key() would only decrease
the reference on one dev.

Fix by taking key->lock around the key->dev check and
mctp_dev_set_key() call.

Fixes: 67737c4572 ("mctp: Pass flow data & flow release events to drivers")
Signed-off-by: Chengfeng Ye <dg573847474@gmail.com>
Link: https://patch.msgid.link/20260306031402.857224-1-dg573847474@gmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-10 11:38:36 +01:00
Jiayuan Chen
950803f725 bonding: fix type confusion in bond_setup_by_slave()
kernel BUG at net/core/skbuff.c:2306!
Oops: invalid opcode: 0000 [#1] SMP KASAN NOPTI
RIP: 0010:pskb_expand_head+0xa08/0xfe0 net/core/skbuff.c:2306
RSP: 0018:ffffc90004aff760 EFLAGS: 00010293
RAX: 0000000000000000 RBX: ffff88807e3c8780 RCX: ffffffff89593e0e
RDX: ffff88807b7c4900 RSI: ffffffff89594747 RDI: ffff88807b7c4900
RBP: 0000000000000820 R08: 0000000000000005 R09: 0000000000000000
R10: 00000000961a63e0 R11: 0000000000000000 R12: ffff88807e3c8780
R13: 00000000961a6560 R14: dffffc0000000000 R15: 00000000961a63e0
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007fe1a0ed8df0 CR3: 000000002d816000 CR4: 00000000003526f0
Call Trace:
 <TASK>
 ipgre_header+0xdd/0x540 net/ipv4/ip_gre.c:900
 dev_hard_header include/linux/netdevice.h:3439 [inline]
 packet_snd net/packet/af_packet.c:3028 [inline]
 packet_sendmsg+0x3ae5/0x53c0 net/packet/af_packet.c:3108
 sock_sendmsg_nosec net/socket.c:727 [inline]
 __sock_sendmsg net/socket.c:742 [inline]
 ____sys_sendmsg+0xa54/0xc30 net/socket.c:2592
 ___sys_sendmsg+0x190/0x1e0 net/socket.c:2646
 __sys_sendmsg+0x170/0x220 net/socket.c:2678
 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
 do_syscall_64+0x106/0xf80 arch/x86/entry/syscall_64.c:94
 entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7fe1a0e6c1a9

When a non-Ethernet device (e.g. GRE tunnel) is enslaved to a bond,
bond_setup_by_slave() directly copies the slave's header_ops to the
bond device:

    bond_dev->header_ops = slave_dev->header_ops;

This causes a type confusion when dev_hard_header() is later called
on the bond device. Functions like ipgre_header(), ip6gre_header(),all use
netdev_priv(dev) to access their device-specific private data. When
called with the bond device, netdev_priv() returns the bond's private
data (struct bonding) instead of the expected type (e.g. struct
ip_tunnel), leading to garbage values being read and kernel crashes.

Fix this by introducing bond_header_ops with wrapper functions that
delegate to the active slave's header_ops using the slave's own
device. This ensures netdev_priv() in the slave's header functions
always receives the correct device.

The fix is placed in the bonding driver rather than individual device
drivers, as the root cause is bond blindly inheriting header_ops from
the slave without considering that these callbacks expect a specific
netdev_priv() layout.

The type confusion can be observed by adding a printk in
ipgre_header() and running the following commands:

    ip link add dummy0 type dummy
    ip addr add 10.0.0.1/24 dev dummy0
    ip link set dummy0 up
    ip link add gre1 type gre local 10.0.0.1
    ip link add bond1 type bond mode active-backup
    ip link set gre1 master bond1
    ip link set gre1 up
    ip link set bond1 up
    ip addr add fe80::1/64 dev bond1

Fixes: 1284cd3a2b ("bonding: two small fixes for IPoIB support")
Suggested-by: Jay Vosburgh <jv@jvosburgh.net>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jiayuan Chen <jiayuan.chen@shopee.com>
Link: https://patch.msgid.link/20260306021508.222062-1-jiayuan.chen@linux.dev
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-10 11:33:58 +01:00
Wenyuan Li
47bba09b14 can: hi311x: hi3110_open(): add check for hi3110_power_enable() return value
In hi3110_open(), the return value of hi3110_power_enable() is not checked.
If power enable fails, the device may not function correctly, while the
driver still returns success.

Add a check for the return value and propagate the error accordingly.

Signed-off-by: Wenyuan Li <2063309626@qq.com>
Link: https://patch.msgid.link/tencent_B5E2E7528BB28AA8A2A56E16C49BD58B8B07@qq.com
Fixes: 57e83fb9b7 ("can: hi311x: Add Holt HI-311x CAN driver")
[mkl: adjust subject, commit message and jump label]
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
2026-03-10 11:12:52 +01:00
Haibo Chen
1eea46908c can: dev: keep the max bitrate error at 5%
Commit b360a13d44 ("can: dev: print bitrate error with two decimal
digits") changed calculation of the bit rate error from on-tenth of a
percent to on-hundredth of a percent, but forgot to adjust the scale of the
CAN_CALC_MAX_ERROR constant.

Keeping the existing logic unchanged: Only when the bitrate error exceeds
5% should an error be returned. Otherwise, simply output a warning log.

Fixes: b360a13d44 ("can: dev: print bitrate error with two decimal digits")
Signed-off-by: Haibo Chen <haibo.chen@nxp.com>
Link: https://patch.msgid.link/20260306-can-fix-v1-1-ac526cec6777@nxp.com
Cc: stable@kernel.org
[mkl: improve commit message]
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
2026-03-10 11:12:52 +01:00
Haiyue Wang
e3f5e0f22c mctp: i2c: fix skb memory leak in receive path
When 'midev->allow_rx' is false, the newly allocated skb isn't consumed
by netif_rx(), it needs to free the skb directly.

Fixes: f5b8abf9fc ("mctp i2c: MCTP I2C binding driver")
Signed-off-by: Haiyue Wang <haiyuewa@163.com>
Link: https://patch.msgid.link/20260305143240.97592-1-haiyuewa@163.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-10 11:10:55 +01:00
Harry Yoo
8dafa9f590 mm/slab: fix an incorrect check in obj_exts_alloc_size()
obj_exts_alloc_size() prevents recursive allocation of slabobj_ext
array from the same cache, to avoid creating slabs that are never freed.

There is one mistake that returns the original size when memory
allocation profiling is disabled. The assumption was that
memcg-triggered slabobj_ext allocation is always served from
KMALLOC_CGROUP type. But this is wrong [1]: when the caller specifies
both __GFP_RECLAIMABLE and __GFP_ACCOUNT with SLUB_TINY enabled, the
allocation is served from normal kmalloc. This is because kmalloc_type()
prioritizes __GFP_RECLAIMABLE over __GFP_ACCOUNT, and SLUB_TINY aliases
KMALLOC_RECLAIM with KMALLOC_NORMAL.

As a result, the recursion guard is bypassed and the problematic slabs
can be created. Fix this by removing the mem_alloc_profiling_enabled()
check entirely. The remaining is_kmalloc_normal() check is still
sufficient to detect whether the cache is of KMALLOC_NORMAL type and
avoid bumping the size if it's not.

Without SLUB_TINY, no functional change intended.
With SLUB_TINY, allocations with __GFP_ACCOUNT|__GFP_RECLAIMABLE
now allocate a larger array if the sizes equal.

Reported-by: Zw Tang <shicenci@gmail.com>
Fixes: 280ea9c315 ("mm/slab: avoid allocating slabobj_ext array from its own slab")
Closes: https://lore.kernel.org/linux-mm/CAPHJ_VKuMKSke8b11AZQw1PTSFN4n2C0gFxC6xGOG0ZLHgPmnA@mail.gmail.com [1]
Cc: stable@vger.kernel.org
Signed-off-by: Harry Yoo <harry.yoo@oracle.com>
Link: https://patch.msgid.link/20260309072219.22653-1-harry.yoo@oracle.com
Tested-by: Zw Tang <shicenci@gmail.com>
Signed-off-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
2026-03-10 11:02:54 +01:00
Marc Zyngier
a79f7b4aeb KVM: arm64: pkvm: Don't reprobe for ICH_VTR_EL2.TDS on CPU hotplug
Hotplugging a CPU off and back on fails with pKVM, as we try to
probe for ICH_VTR_EL2.TDS. In a non-VHE setup, this is achieved
by using an EL2 stub helper. However, the stubs are out of reach
once pKVM has deprivileged the kernel. The CPU never boots.

Since pKVM doesn't allow late onlining of CPUs, we can detect
that protected mode is enforced early on, and return the current
state of the capability.

Fixes: 2a28810cbb ("KVM: arm64: GICv3: Detect and work around the lack of ICV_DIR_EL1 trapping")
Reported-by: Vincent Donnefort <vdonnefort@google.com>
Tested-by: Vincent Donnefort <vdonnefort@google.com>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://patch.msgid.link/20260310085433.3936742-1-maz@kernel.org
Cc: stable@vger.kernel.org
2026-03-10 09:48:45 +00:00
Paolo Abeni
fdfd103aec Merge branch 'net-enetc-fix-fallback-phy-address-handling-and-do-not-skip-setting-for-addr-0'
Wei Fang says:

====================
net: enetc: fix fallback PHY address handling and do not skip setting for addr 0

There are two potential issues when PHY address 0 is used on the board,
see the commit messages of the patches for more details.

v1: https://lore.kernel.org/imx/20260303103047.228005-1-wei.fang@nxp.com/
====================

Link: https://patch.msgid.link/20260305031211.904812-1-wei.fang@nxp.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-10 10:36:48 +01:00
Wei Fang
dbe17e7783 net: enetc: do not skip setting LaBCR[MDIO_PHYAD_PRTAD] for addr 0
Given that some platforms may use PHY address 0 (I suppose the PHY may
not treat address 0 as a broadcast address or default response address).
It is possible for some boards to connect multiple PHYs to the same
ENETC MAC, for example:

  - a PHY with a non-zero address connects to ENETC MAC through SGMII
    interface (selected via DTS_A)
  - a PHY with address 0 connects to ENETC MAC through RGMII interface
    (selected via DTS_B)

For the case where the ENETC port MDIO is used to manage the PHY, when
switching from DTS_A to DTS_B via soft reboot, LaBCR[MDIO_PHYAD_PRTAD]
must be updated to 0 because the NETCMIX block is not reset during soft
reboot. However, the current driver explicitly skips configuring address
0, causing LaBCR[MDIO_PHYAD_PRTAD] to retain its old value.

Therefore, remove the special-case skip of PHY address 0 so that valid
configurations using address 0 are properly supported.

Fixes: 6633df05f3 ("net: enetc: set the external PHY address in IERB for port MDIO usage")
Fixes: 50bfd9c06f ("net: enetc: set external PHY address in IERB for i.MX94 ENETC")
Reviewed-by: Clark Wang <xiaoning.wang@nxp.com>
Signed-off-by: Wei Fang <wei.fang@nxp.com>
Link: https://patch.msgid.link/20260305031211.904812-3-wei.fang@nxp.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-10 10:36:46 +01:00
Wei Fang
246953f33e net: enetc: fix incorrect fallback PHY address handling
The current netc_get_phy_addr() implementation falls back to PHY address
0 when the "mdio" node or the PHY child node is missing. On i.MX95, this
causes failures when a real PHY is actually assigned address 0 and is
managed through the EMDIO interface. Because the bit 0 of phy_mask will
be set, leading imx95_enetc_mdio_phyaddr_config() to return an error, and
the netc_blk_ctrl driver probe subsequently fails. Fix this by returning
-ENODEV when neither an "mdio" node nor any PHY node is present, it means
that ENETC port MDIO is not used to manage the PHY, so there is no need
to configure LaBCR[MDIO_PHYAD_PRTAD].

Reported-by: Alexander Stein <alexander.stein@ew.tq-group.com>
Closes: https://lore.kernel.org/all/7825188.GXAFRqVoOG@steina-w
Fixes: 6633df05f3 ("net: enetc: set the external PHY address in IERB for port MDIO usage")
Reviewed-by: Clark Wang <xiaoning.wang@nxp.com>
Tested-by: Alexander Stein <alexander.stein@ew.tq-group.com>
Signed-off-by: Wei Fang <wei.fang@nxp.com>
Link: https://patch.msgid.link/20260305031211.904812-2-wei.fang@nxp.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-10 10:36:46 +01:00
Dmitry Torokhov
fb22bb9701 pinctrl: renesas: rza1: Normalize return value of gpio_get()
The GPIO .get() callback is expected to return 0 or 1 (or a negative
error code).  Ensure that the value returned by rza1_gpio_get() is
normalized to the [0, 1] range.

Fixes: 86ef402d80 ("gpiolib: sanitize the return value of gpio_chip::get()")
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@intel.com>
Reviewed-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
Reviewed-by: Linus Walleij <linusw@kernel.org>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://patch.msgid.link/aZYnyl-Nf4S1U2yj@google.com
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
2026-03-10 10:33:47 +01:00
Neil Armstrong
2634475324 pinctrl: qcom: spmi-gpio: implement .get_direction()
GPIO controller driver should typically implement the .get_direction()
callback as GPIOLIB internals may try to use it to determine the state
of a pin. Since introduction of shared proxy, it prints a warning splat
when using a shared spmi gpio.

The implementation is not easy because the controller supports enabling
the input and output logic at the same time, so we aligns on the
behaviour of the .get() operation and return -EINVAL in other
situations.

Fixes: eadff30244 ("pinctrl: Qualcomm SPMI PMIC GPIO pin controller driver")
Fixes: d7b5f5cc5e ("pinctrl: qcom: spmi-gpio: Add support for GPIO LV/MV subtype")
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Reviewed-by: Konrad Dybcio <konrad.dybcio@oss.qualcomm.com>
Reviewed-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
Signed-off-by: Linus Walleij <linusw@kernel.org>
2026-03-10 10:21:52 +01:00
Long Li
186ac39b8a xfs: ensure dquot item is deleted from AIL only after log shutdown
In xfs_qm_dqflush(), when a dquot flush fails due to corruption
(the out_abort error path), the original code removed the dquot log
item from the AIL before calling xfs_force_shutdown(). This ordering
introduces a subtle race condition that can lead to data loss after
a crash.

The AIL tracks the oldest dirty metadata in the journal. The position
of the tail item in the AIL determines the log tail LSN, which is the
oldest LSN that must be preserved for crash recovery. When an item is
removed from the AIL, the log tail can advance past the LSN of that item.

The race window is as follows: if the dquot item happens to be at
the tail of the log, removing it from the AIL allows the log tail
to advance. If a concurrent log write is sampling the tail LSN at
the same time and subsequently writes a complete checkpoint (i.e.,
one containing a commit record) to disk before the shutdown takes
effect, the journal will no longer protect the dquot's last
modification. On the next mount, log recovery will not replay the
dquot changes, even though they were never written back to disk,
resulting in silent data loss.

Fix this by calling xfs_force_shutdown() before xfs_trans_ail_delete()
in the out_abort path. Once the log is shut down, no new log writes
can complete with an updated tail LSN, making it safe to remove the
dquot item from the AIL.

Cc: stable@vger.kernel.org
Fixes: b707fffda6 ("xfs: abort consistently on dquot flush failure")
Signed-off-by: Long Li <leo.lilong@huawei.com>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-03-10 09:40:38 +01:00
Long Li
f1d77b863b xfs: remove redundant set null for ip->i_itemp
ip->i_itemp has been set null in xfs_inode_item_destroy(), so there is
no need set it null again in xfs_inode_free_callback().

Signed-off-by: Long Li <leo.lilong@huawei.com>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-03-10 09:39:53 +01:00
Takashi Iwai
df1d8abf36 ALSA: usb-audio: Check endpoint numbers at parsing Scarlett2 mixer interfaces
The Scarlett2 mixer quirk in USB-audio driver may hit a NULL
dereference when a malformed USB descriptor is passed, since it
assumes the presence of an endpoint in the parsed interface in
scarlett2_find_fc_interface(), as reported by fuzzer.

For avoiding the NULL dereference, just add the sanity check of
bNumEndpoints and skip the invalid interface.

Reported-by: syzbot+8f29539ef9a1c8334f42@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/69acbbe1.050a0220.310d8.0001.GAE@google.com
Reported-by: syzbot+ae893a8901067fde2741@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/69acf72a.050a0220.310d8.0004.GAE@google.com
Cc: <stable@vger.kernel.org>
Link: https://patch.msgid.link/20260309104632.141895-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-03-10 09:33:57 +01:00
Arun R Murthy
335b237d90 drm/i915/dp: Read ALPM caps after DPCD init
For eDP read the ALPM DPCD caps after DPCD initalization and just before
the PSR init.

v2: Move intel_alpm_init to intel_edp_init_dpcd (Jouni)
v3: Add Fixes with commit-id (Jouni)
v4: Separated the alpm dpcd read caps from alpm_init and moved to
intel_edp_init_dpcd.
v5: Read alpm_caps always for eDP irrespective of the eDP version (Jouni)
v6: replace drm_dp_dpcd_readb with drm_dp_dpcd_read_byte (Jouni)

Fixes: 15438b3259 ("drm/i915/alpm: Add compute config for lobf")
Signed-off-by: Arun R Murthy <arun.r.murthy@intel.com>
Reviewed-by: Animesh Manna <animesh.manna@intel.com>
Reviewed-by: Jouni Högander <jouni.hogander@intel.com>
Signed-off-by: Animesh Manna <animesh.manna@intel.com>
Link: https://patch.msgid.link/20260304072157.1123283-1-arun.r.murthy@intel.com
(cherry picked from commit 88442ba208dd5d3405de3f5000cf5b2c86876ae3)
Signed-off-by: Tvrtko Ursulin <tursulin@ursulin.net>
2026-03-10 08:26:33 +00:00
Jouni Högander
5923a6e045 drm/i915/psr: Write DSC parameters on Selective Update in ET mode
There are slice row per frame and pic height parameters in DSC that needs
to be configured on every Selective Update in Early Transport mode. Use
helper provided by DSC code to configure these on Selective Update when in
Early Transport mode. Also fill crtc_state->psr2_su_area with full frame
area on full frame update for DSC calculation.

v2: move psr2_su_area under skip_sel_fetch_set_loop label

Bspec: 68927, 71709
Fixes: 467e4e061c ("drm/i915/psr: Enable psr2 early transport as possible")
Cc: <stable@vger.kernel.org> # v6.9+
Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Reviewed-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Link: https://patch.msgid.link/20260304113011.626542-5-jouni.hogander@intel.com
(cherry picked from commit 3140af2fab505a4cd47d516284529bf1585628be)
Signed-off-by: Tvrtko Ursulin <tursulin@ursulin.net>
2026-03-10 08:26:33 +00:00
Jouni Högander
bb5f1cd101 drm/i915/dsc: Add helper for writing DSC Selective Update ET parameters
There are slice row per frame and pic height configuration in DSC Selective
Update Parameter Set 1 register. Add helper for configuring these.

v2:
  - Add WARN_ON_ONCE if vdsc instances per pipe > 2
  - instead of checking vdsc instances per pipe being > 1 check == 2

Bspec: 71709
Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Reviewed-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Link: https://patch.msgid.link/20260304113011.626542-4-jouni.hogander@intel.com
(cherry picked from commit c8698d61aeb3f70fe33761ee9d3d0e131b5bc2eb)
Signed-off-by: Tvrtko Ursulin <tursulin@ursulin.net>
[tursulin: fixup forward declaration conflict]
2026-03-10 08:26:12 +00:00
Jouni Högander
c2c79c6d5b drm/i915/dsc: Add Selective Update register definitions
Add definitions for DSC_SU_PARAMETER_SET_0_DSC0 and
DSC_SU_PARAMETER_SET_0_DSC1 registers. These are for Selective Update Early
Transport configuration.

Bspec: 71709
Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Reviewed-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Link: https://patch.msgid.link/20260304113011.626542-3-jouni.hogander@intel.com
(cherry picked from commit 24f96d903daf3dcf8fafe84d3d22b80ef47ba493)
Signed-off-by: Tvrtko Ursulin <tursulin@ursulin.net>
2026-03-10 08:22:10 +00:00
Jouni Högander
1be2fca84f drm/i915/psr: Repeat Selective Update area alignment
Currently we are aligning Selective Update area to cover cursor fully if
needed only once. It may happen that cursor is in Selective Update area
after pipe alignment and after that covering cursor plane only
partially. Fix this by looping alignment as long as alignment isn't needed
anymore.

v2:
  - do not unecessarily loop if cursor was already fully covered
  - rename aligned as su_area_changed

Fixes: 1bff93b8bc ("drm/i915/psr: Extend SU area to cover cursor fully if needed")
Cc: <stable@vger.kernel.org> # v6.9+
Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Reviewed-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Link: https://patch.msgid.link/20260304113011.626542-2-jouni.hogander@intel.com
(cherry picked from commit 681e12440d8b110350a5709101169f319e10ccbb)
Signed-off-by: Tvrtko Ursulin <tursulin@ursulin.net>
2026-03-10 08:11:34 +00:00
Janusz Krzysztofik
029ae06743 drm/i915: Fix potential overflow of shmem scatterlist length
When a scatterlists table of a GEM shmem object of size 4 GB or more is
populated with pages allocated from a folio, unsigned int .length
attribute of a scatterlist may get overflowed if total byte length of
pages allocated to that single scatterlist happens to reach or cross the
4GB limit.  As a consequence, users of the object may suffer from hitting
unexpected, premature end of the object's backing pages.

[278.780187] ------------[ cut here ]------------
[278.780377] WARNING: CPU: 1 PID: 2326 at drivers/gpu/drm/i915/i915_mm.c:55 remap_sg+0x199/0x1d0 [i915]
...
[278.780654] CPU: 1 UID: 0 PID: 2326 Comm: gem_mmap_offset Tainted: G S   U              6.17.0-rc1-CI_DRM_16981-ged823aaa0607+ #1 PREEMPT(voluntary)
[278.780656] Tainted: [S]=CPU_OUT_OF_SPEC, [U]=USER
[278.780658] Hardware name: Intel Corporation Meteor Lake Client Platform/MTL-P LP5x T3 RVP, BIOS MTLPFWI1.R00.3471.D91.2401310918 01/31/2024
[278.780659] RIP: 0010:remap_sg+0x199/0x1d0 [i915]
...
[278.780786] Call Trace:
[278.780787]  <TASK>
[278.780788]  ? __apply_to_page_range+0x3e6/0x910
[278.780795]  ? __pfx_remap_sg+0x10/0x10 [i915]
[278.780906]  apply_to_page_range+0x14/0x30
[278.780908]  remap_io_sg+0x14d/0x260 [i915]
[278.781013]  vm_fault_cpu+0xd2/0x330 [i915]
[278.781137]  __do_fault+0x3a/0x1b0
[278.781140]  do_fault+0x322/0x640
[278.781143]  __handle_mm_fault+0x938/0xfd0
[278.781150]  handle_mm_fault+0x12c/0x300
[278.781152]  ? lock_mm_and_find_vma+0x4b/0x760
[278.781155]  do_user_addr_fault+0x2d6/0x8e0
[278.781160]  exc_page_fault+0x96/0x2c0
[278.781165]  asm_exc_page_fault+0x27/0x30
...

That issue was apprehended by the author of a change that introduced it,
and potential risk even annotated with a comment, but then never addressed.

When adding folio pages to a scatterlist table, take care of byte length
of any single scatterlist not exceeding max_segment.

Fixes: 0b62af28f2 ("i915: convert shmem_sg_free_table() to use a folio_batch")
Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/14809
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: stable@vger.kernel.org # v6.5+
Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://lore.kernel.org/r/20260224094944.2447913-2-janusz.krzysztofik@linux.intel.com
(cherry picked from commit 06249b4e691a75694c014a61708c007fb5755f60)
Signed-off-by: Tvrtko Ursulin <tursulin@ursulin.net>
2026-03-10 08:11:31 +00:00
Ville Syrjälä
237aab5496 drm/i915/vrr: Configure VRR timings after enabling TRANS_DDI_FUNC_CTL
Apparently ICL may hang with an MCE if we write TRANS_VRR_VMAX/FLIPLINE
before enabling TRANS_DDI_FUNC_CTL.

Personally I was only able to reproduce a hang (on an Dell XPS 7390
2-in-1) with an external display connected via a dock using a dodgy
type-C cable that made the link training fail. After the failed
link training the machine would hang. TGL seemed immune to the
problem for whatever reason.

BSpec does tell us to configure VRR after enabling TRANS_DDI_FUNC_CTL
as well. The DMC firmware also does the VRR restore in two stages:
- first stage seems to be unconditional and includes TRANS_VRR_CTL
  and a few other VRR registers, among other things
- second stage is conditional on the DDI being enabled,
  and includes TRANS_DDI_FUNC_CTL and TRANS_VRR_VMAX/VMIN/FLIPLINE,
  among other things

So let's reorder the steps to match to avoid the hang, and
toss in an extra WARN to make sure we don't screw this up later.

BSpec: 22243
Cc: stable@vger.kernel.org
Cc: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Reported-by: Benjamin Tissoires <bentiss@kernel.org>
Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/15777
Tested-by: Benjamin Tissoires <bentiss@kernel.org>
Fixes: dda7dcd9da ("drm/i915/vrr: Use fixed timings for platforms that support VRR")
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patch.msgid.link/20260303095414.4331-1-ville.syrjala@linux.intel.com
Reviewed-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
(cherry picked from commit 93f3a267c3dd4d811b224bb9e179a10d81456a74)
Signed-off-by: Tvrtko Ursulin <tursulin@ursulin.net>
2026-03-10 08:11:29 +00:00
Pavan Chebbi
0d9a60a061 bnxt_en: Fix RSS table size check when changing ethtool channels
When changing channels, the current check in bnxt_set_channels()
is not checking for non-default RSS contexts when the RSS table size
changes. The current check for IFF_RXFH_CONFIGURED is only sufficient
for the default RSS context. Expand the check to include the presence
of any non-default RSS contexts.

Allowing such change will result in incorrect configuration of the
context's RSS table when the table size changes.

Fixes: b3d0083caf ("bnxt_en: Support RSS contexts in ethtool .{get|set}_rxfh()")
Reported-by: Björn Töpel <bjorn@kernel.org>
Link: https://lore.kernel.org/netdev/20260303181535.2671734-1-bjorn@kernel.org/
Reviewed-by: Andy Gospodarek <andrew.gospodarek@broadcom.com>
Signed-off-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Link: https://patch.msgid.link/20260306225854.3575672-1-michael.chan@broadcom.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-09 19:49:47 -07:00
Jakub Kicinski
183f682591 Merge branch 'net-usb-lan78xx-accumulated-bug-fixes'
Oleksij Rempel says:

====================
net: usb: lan78xx: accumulated bug fixes

This series contains a collection of standalone bug fixes for the
Microchip LAN78xx driver, addressing packet handling, TX statistics,
invalid register accesses, and a kernel warning during disconnect.
====================

Link: https://patch.msgid.link/20260305143429.530909-1-o.rempel@pengutronix.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-09 19:48:39 -07:00
Oleksij Rempel
312c816c6b net: usb: lan78xx: fix WARN in __netif_napi_del_locked on disconnect
Remove redundant netif_napi_del() call from disconnect path.

A WARN may be triggered in __netif_napi_del_locked() during USB device
disconnect:

  WARNING: CPU: 0 PID: 11 at net/core/dev.c:7417 __netif_napi_del_locked+0x2b4/0x350

This happens because netif_napi_del() is called in the disconnect path while
NAPI is still enabled. However, it is not necessary to call netif_napi_del()
explicitly, since unregister_netdev() will handle NAPI teardown automatically
and safely. Removing the redundant call avoids triggering the warning.

Full trace:
 lan78xx 1-1:1.0 enu1: Failed to read register index 0x000000c4. ret = -ENODEV
 lan78xx 1-1:1.0 enu1: Failed to set MAC down with error -ENODEV
 lan78xx 1-1:1.0 enu1: Link is Down
 lan78xx 1-1:1.0 enu1: Failed to read register index 0x00000120. ret = -ENODEV
 ------------[ cut here ]------------
 WARNING: CPU: 0 PID: 11 at net/core/dev.c:7417 __netif_napi_del_locked+0x2b4/0x350
 Modules linked in: flexcan can_dev fuse
 CPU: 0 UID: 0 PID: 11 Comm: kworker/0:1 Not tainted 6.16.0-rc2-00624-ge926949dab03 #9 PREEMPT
 Hardware name: SKOV IMX8MP CPU revC - bd500 (DT)
 Workqueue: usb_hub_wq hub_event
 pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
 pc : __netif_napi_del_locked+0x2b4/0x350
 lr : __netif_napi_del_locked+0x7c/0x350
 sp : ffffffc085b673c0
 x29: ffffffc085b673c0 x28: ffffff800b7f2000 x27: ffffff800b7f20d8
 x26: ffffff80110bcf58 x25: ffffff80110bd978 x24: 1ffffff0022179eb
 x23: ffffff80110bc000 x22: ffffff800b7f5000 x21: ffffff80110bc000
 x20: ffffff80110bcf38 x19: ffffff80110bcf28 x18: dfffffc000000000
 x17: ffffffc081578940 x16: ffffffc08284cee0 x15: 0000000000000028
 x14: 0000000000000006 x13: 0000000000040000 x12: ffffffb0022179e8
 x11: 1ffffff0022179e7 x10: ffffffb0022179e7 x9 : dfffffc000000000
 x8 : 0000004ffdde8619 x7 : ffffff80110bcf3f x6 : 0000000000000001
 x5 : ffffff80110bcf38 x4 : ffffff80110bcf38 x3 : 0000000000000000
 x2 : 0000000000000000 x1 : 1ffffff0022179e7 x0 : 0000000000000000
 Call trace:
  __netif_napi_del_locked+0x2b4/0x350 (P)
  lan78xx_disconnect+0xf4/0x360
  usb_unbind_interface+0x158/0x718
  device_remove+0x100/0x150
  device_release_driver_internal+0x308/0x478
  device_release_driver+0x1c/0x30
  bus_remove_device+0x1a8/0x368
  device_del+0x2e0/0x7b0
  usb_disable_device+0x244/0x540
  usb_disconnect+0x220/0x758
  hub_event+0x105c/0x35e0
  process_one_work+0x760/0x17b0
  worker_thread+0x768/0xce8
  kthread+0x3bc/0x690
  ret_from_fork+0x10/0x20
 irq event stamp: 211604
 hardirqs last  enabled at (211603): [<ffffffc0828cc9ec>] _raw_spin_unlock_irqrestore+0x84/0x98
 hardirqs last disabled at (211604): [<ffffffc0828a9a84>] el1_dbg+0x24/0x80
 softirqs last  enabled at (211296): [<ffffffc080095f10>] handle_softirqs+0x820/0xbc8
 softirqs last disabled at (210993): [<ffffffc080010288>] __do_softirq+0x18/0x20
 ---[ end trace 0000000000000000 ]---
 lan78xx 1-1:1.0 enu1: failed to kill vid 0081/0

Fixes: e110bc8258 ("net: usb: lan78xx: Convert to PHYLINK for improved PHY and MAC management")
Cc: stable@vger.kernel.org
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Link: https://patch.msgid.link/20260305143429.530909-5-o.rempel@pengutronix.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-09 19:48:33 -07:00
Oleksij Rempel
d9cc0e440f net: usb: lan78xx: skip LTM configuration for LAN7850
Do not configure Latency Tolerance Messaging (LTM) on USB 2.0 hardware.

The LAN7850 is a High-Speed (USB 2.0) only device and does not support
SuperSpeed features like LTM. Currently, the driver unconditionally
attempts to configure LTM registers during initialization. On the
LAN7850, these registers do not exist, resulting in writes to invalid
or undocumented memory space.

This issue was identified during a port to the regmap API with strict
register validation enabled. While no functional issues or crashes have
been observed from these invalid writes, bypassing LTM initialization
on the LAN7850 ensures the driver strictly adheres to the hardware's
valid register map.

Fixes: 55d7de9de6 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver")
Cc: stable@vger.kernel.org
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Link: https://patch.msgid.link/20260305143429.530909-4-o.rempel@pengutronix.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-09 19:48:33 -07:00
Oleksij Rempel
50988747c3 net: usb: lan78xx: fix TX byte statistics for small packets
Account for hardware auto-padding in TX byte counters to reflect actual
wire traffic.

The LAN7850 hardware automatically pads undersized frames to the minimum
Ethernet frame length (ETH_ZLEN, 60 bytes). However, the driver tracks
the network statistics based on the unpadded socket buffer length. This
results in the tx_bytes counter under-reporting the actual physical
bytes placed on the Ethernet wire for small packets (like short ARP or
ICMP requests).

Use max_t() to ensure the transmission statistics accurately account for
the hardware-generated padding.

Fixes: d383216a7e ("lan78xx: Introduce Tx URB processing improvements")
Cc: stable@vger.kernel.org
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Link: https://patch.msgid.link/20260305143429.530909-3-o.rempel@pengutronix.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-09 19:48:33 -07:00
Oleksij Rempel
e4f774a0cc net: usb: lan78xx: fix silent drop of packets with checksum errors
Do not drop packets with checksum errors at the USB driver level;
pass them to the network stack.

Previously, the driver dropped all packets where the 'Receive Error
Detected' (RED) bit was set, regardless of the specific error type. This
caused packets with only IP or TCP/UDP checksum errors to be dropped
before reaching the kernel, preventing the network stack from accounting
for them or performing software fallback.

Add a mask for hard hardware errors to safely drop genuinely corrupt
frames, while allowing checksum-errored frames to pass with their
ip_summed field explicitly set to CHECKSUM_NONE.

Fixes: 55d7de9de6 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver")
Cc: stable@vger.kernel.org
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Link: https://patch.msgid.link/20260305143429.530909-2-o.rempel@pengutronix.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-09 19:48:33 -07:00
Eric Dumazet
7a85d370bb MAINTAINERS: include/net/tc_wrapper.h belongs to TC subsystem
include/net/tc_wrapper.h changes should be reviewed by TC maintainers.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://patch.msgid.link/20260307120607.3504191-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-09 18:59:28 -07:00
Mehul Rao
b2662e7593 net: nexthop: fix percpu use-after-free in remove_nh_grp_entry
When removing a nexthop from a group, remove_nh_grp_entry() publishes
the new group via rcu_assign_pointer() then immediately frees the
removed entry's percpu stats with free_percpu(). However, the
synchronize_net() grace period in the caller remove_nexthop_from_groups()
runs after the free. RCU readers that entered before the publish still
see the old group and can dereference the freed stats via
nh_grp_entry_stats_inc() -> get_cpu_ptr(nhge->stats), causing a
use-after-free on percpu memory.

Fix by deferring the free_percpu() until after synchronize_net() in the
caller. Removed entries are chained via nh_list onto a local deferred
free list. After the grace period completes and all RCU readers have
finished, the percpu stats are safely freed.

Fixes: f4676ea74b ("net: nexthop: Add nexthop group entry stats")
Cc: stable@vger.kernel.org
Signed-off-by: Mehul Rao <mehulrao@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Link: https://patch.msgid.link/20260306233821.196789-1-mehulrao@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-09 18:48:26 -07:00
Shuangpeng Bai
288598d80a serial: caif: hold tty->link reference in ldisc_open and ser_release
A reproducer triggers a KASAN slab-use-after-free in pty_write_room()
when caif_serial's TX path calls tty_write_room(). The faulting access
is on tty->link->port.

Hold an extra kref on tty->link for the lifetime of the caif_serial line
discipline: get it in ldisc_open() and drop it in ser_release(), and
also drop it on the ldisc_open() error path.

With this change applied, the reproducer no longer triggers the UAF in
my testing.

Link: https://gist.github.com/shuangpengbai/c898debad6bdf170a84be7e6b3d8707f
Link: https://lore.kernel.org/netdev/20260301220525.1546355-1-shuangpeng.kernel@gmail.com
Fixes: e31d5a0594 ("caif: tty's are kref objects so take a reference")
Signed-off-by: Shuangpeng Bai <shuangpeng.kernel@gmail.com>
Reviewed-by: Jiayuan Chen <jiayuan.chen@linux.dev>
Link: https://patch.msgid.link/20260306034006.3395740-1-shuangpeng.kernel@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-09 18:47:55 -07:00
Álvaro Fernández Rojas
87d1268521 net: sfp: improve Huawei MA5671a fixup
With the current sfp_fixup_ignore_tx_fault() fixup we ignore the TX_FAULT
signal, but we also need to apply sfp_fixup_ignore_los() in order to be
able to communicate with the module even if the fiber isn't connected for
configuration purposes.
This is needed for all the MA5671a firmwares, excluding the FS modded
firmware.

Fixes: 2069624dac ("net: sfp: Add tx-fault workaround for Huawei MA5671A SFP ONT")
Signed-off-by: Álvaro Fernández Rojas <noltari@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://patch.msgid.link/20260306125139.213637-1-noltari@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-09 18:46:46 -07:00
Sen Wang
4185b95f8a ASoC: simple-card-utils: fix graph_util_is_ports0() for DT overlays
graph_util_is_ports0() identifies DPCM front-end (ports@0) vs back-end
(ports@1) by calling of_get_child_by_name() to find the first "ports"
child and comparing pointers. This relies on child iteration order
matching DTS source order.

When the DPCM topology comes from a DT overlay, __of_attach_node()
inserts new children at the head of the sibling list, reversing the
order. of_get_child_by_name() then returns ports@1 instead of ports@0,
causing all front-end links to be classified as back-ends. The card
registers with no PCM devices.

Fix this by matching the unit address directly from the node name
instead of relying on sibling order.

Fixes: 9293925245 ("ASoC: simple-card-utils: add asoc_graph_is_ports0()")
Signed-off-by: Sen Wang <sen@ti.com>
Acked-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://patch.msgid.link/20260309042109.2576612-1-sen@ti.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-10 01:02:43 +00:00
Mark Brown
634672f9d6 ASoC: tegra: Add Tegra238 sound card support
Merge series from "Sheetal ." <sheetal@nvidia.com>:

Add Tegra238 sound card support in the Tegra audio graph card driver,
as Tegra238 requires different PLLA and PLLA_OUT0 clock rates compared
to other Tegra platforms.
2026-03-10 00:57:27 +00:00
John Johansen
8e135b8aee apparmor: fix race between freeing data and fs accessing it
AppArmor was putting the reference to i_private data on its end after
removing the original entry from the file system. However the inode
can aand does live beyond that point and it is possible that some of
the fs call back functions will be invoked after the reference has
been put, which results in a race between freeing the data and
accessing it through the fs.

While the rawdata/loaddata is the most likely candidate to fail the
race, as it has the fewest references. If properly crafted it might be
possible to trigger a race for the other types stored in i_private.

Fix this by moving the put of i_private referenced data to the correct
place which is during inode eviction.

Fixes: c961ee5f21 ("apparmor: convert from securityfs to apparmorfs for policy ns files")
Reported-by: Qualys Security Advisory <qsa@qualys.com>
Reviewed-by: Georgia Garcia <georgia.garcia@canonical.com>
Reviewed-by: Maxime Bélair <maxime.belair@canonical.com>
Reviewed-by: Cengiz Can <cengiz.can@canonical.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
2026-03-09 16:05:44 -07:00
John Johansen
a0b7091c4d apparmor: fix race on rawdata dereference
There is a race condition that leads to a use-after-free situation:
because the rawdata inodes are not refcounted, an attacker can start
open()ing one of the rawdata files, and at the same time remove the
last reference to this rawdata (by removing the corresponding profile,
for example), which frees its struct aa_loaddata; as a result, when
seq_rawdata_open() is reached, i_private is a dangling pointer and
freed memory is accessed.

The rawdata inodes weren't refcounted to avoid a circular refcount and
were supposed to be held by the profile rawdata reference.  However
during profile removal there is a window where the vfs and profile
destruction race, resulting in the use after free.

Fix this by moving to a double refcount scheme. Where the profile
refcount on rawdata is used to break the circular dependency. Allowing
for freeing of the rawdata once all inode references to the rawdata
are put.

Fixes: 5d5182cae4 ("apparmor: move to per loaddata files, instead of replicating in profiles")
Reported-by: Qualys Security Advisory <qsa@qualys.com>
Reviewed-by: Georgia Garcia <georgia.garcia@canonical.com>
Reviewed-by: Maxime Bélair <maxime.belair@canonical.com>
Reviewed-by: Cengiz Can <cengiz.can@canonical.com>
Tested-by: Salvatore Bonaccorso <carnil@debian.org>
Signed-off-by: John Johansen <john.johansen@canonical.com>
2026-03-09 16:05:44 -07:00
John Johansen
39440b1375 apparmor: fix differential encoding verification
Differential encoding allows loops to be created if it is abused. To
prevent this the unpack should verify that a diff-encode chain
terminates.

Unfortunately the differential encode verification had two bugs.

1. it conflated states that had gone through check and already been
   marked, with states that were currently being checked and marked.
   This means that loops in the current chain being verified are treated
   as a chain that has already been verified.

2. the order bailout on already checked states compared current chain
   check iterators j,k instead of using the outer loop iterator i.
   Meaning a step backwards in states in the current chain verification
   was being mistaken for moving to an already verified state.

Move to a double mark scheme where already verified states get a
different mark, than the current chain being kept. This enables us
to also drop the backwards verification check that was the cause of
the second error as any already verified state is already marked.

Fixes: 031dcc8f4e ("apparmor: dfa add support for state differential encoding")
Reported-by: Qualys Security Advisory <qsa@qualys.com>
Tested-by: Salvatore Bonaccorso <carnil@debian.org>
Reviewed-by: Georgia Garcia <georgia.garcia@canonical.com>
Reviewed-by: Cengiz Can <cengiz.can@canonical.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
2026-03-09 16:05:43 -07:00
John Johansen
6601e13e82 apparmor: fix unprivileged local user can do privileged policy management
An unprivileged local user can load, replace, and remove profiles by
opening the apparmorfs interfaces, via a confused deputy attack, by
passing the opened fd to a privileged process, and getting the
privileged process to write to the interface.

This does require a privileged target that can be manipulated to do
the write for the unprivileged process, but once such access is
achieved full policy management is possible and all the possible
implications that implies: removing confinement, DoS of system or
target applications by denying all execution, by-passing the
unprivileged user namespace restriction, to exploiting kernel bugs for
a local privilege escalation.

The policy management interface can not have its permissions simply
changed from 0666 to 0600 because non-root processes need to be able
to load policy to different policy namespaces.

Instead ensure the task writing the interface has privileges that
are a subset of the task that opened the interface. This is already
done via policy for confined processes, but unconfined can delegate
access to the opened fd, by-passing the usual policy check.

Fixes: b7fd2c0340 ("apparmor: add per policy ns .load, .replace, .remove interface files")
Reported-by: Qualys Security Advisory <qsa@qualys.com>
Tested-by: Salvatore Bonaccorso <carnil@debian.org>
Reviewed-by: Georgia Garcia <georgia.garcia@canonical.com>
Reviewed-by: Cengiz Can <cengiz.can@canonical.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
2026-03-09 16:05:43 -07:00
John Johansen
5df0c44e8f apparmor: Fix double free of ns_name in aa_replace_profiles()
if ns_name is NULL after
1071         error = aa_unpack(udata, &lh, &ns_name);

and if ent->ns_name contains an ns_name in
1089                 } else if (ent->ns_name) {

then ns_name is assigned the ent->ns_name
1095                         ns_name = ent->ns_name;

however ent->ns_name is freed at
1262                 aa_load_ent_free(ent);

and then again when freeing ns_name at
1270         kfree(ns_name);

Fix this by NULLing out ent->ns_name after it is transferred to ns_name

Fixes: 145a0ef21c ("apparmor: fix blob compression when ns is forced on a policy load
")
Reported-by: Qualys Security Advisory <qsa@qualys.com>
Tested-by: Salvatore Bonaccorso <carnil@debian.org>
Reviewed-by: Georgia Garcia <georgia.garcia@canonical.com>
Reviewed-by: Cengiz Can <cengiz.can@canonical.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
2026-03-09 16:05:43 -07:00
Massimiliano Pellizzer
d352873bbe apparmor: fix missing bounds check on DEFAULT table in verify_dfa()
The verify_dfa() function only checks DEFAULT_TABLE bounds when the state
is not differentially encoded.

When the verification loop traverses the differential encoding chain,
it reads k = DEFAULT_TABLE[j] and uses k as an array index without
validation. A malformed DFA with DEFAULT_TABLE[j] >= state_count,
therefore, causes both out-of-bounds reads and writes.

[   57.179855] ==================================================================
[   57.180549] BUG: KASAN: slab-out-of-bounds in verify_dfa+0x59a/0x660
[   57.180904] Read of size 4 at addr ffff888100eadec4 by task su/993

[   57.181554] CPU: 1 UID: 0 PID: 993 Comm: su Not tainted 6.19.0-rc7-next-20260127 #1 PREEMPT(lazy)
[   57.181558] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
[   57.181563] Call Trace:
[   57.181572]  <TASK>
[   57.181577]  dump_stack_lvl+0x5e/0x80
[   57.181596]  print_report+0xc8/0x270
[   57.181605]  ? verify_dfa+0x59a/0x660
[   57.181608]  kasan_report+0x118/0x150
[   57.181620]  ? verify_dfa+0x59a/0x660
[   57.181623]  verify_dfa+0x59a/0x660
[   57.181627]  aa_dfa_unpack+0x1610/0x1740
[   57.181629]  ? __kmalloc_cache_noprof+0x1d0/0x470
[   57.181640]  unpack_pdb+0x86d/0x46b0
[   57.181647]  ? srso_alias_return_thunk+0x5/0xfbef5
[   57.181653]  ? srso_alias_return_thunk+0x5/0xfbef5
[   57.181656]  ? aa_unpack_nameX+0x1a8/0x300
[   57.181659]  aa_unpack+0x20b0/0x4c30
[   57.181662]  ? srso_alias_return_thunk+0x5/0xfbef5
[   57.181664]  ? stack_depot_save_flags+0x33/0x700
[   57.181681]  ? kasan_save_track+0x4f/0x80
[   57.181683]  ? kasan_save_track+0x3e/0x80
[   57.181686]  ? __kasan_kmalloc+0x93/0xb0
[   57.181688]  ? __kvmalloc_node_noprof+0x44a/0x780
[   57.181693]  ? aa_simple_write_to_buffer+0x54/0x130
[   57.181697]  ? policy_update+0x154/0x330
[   57.181704]  aa_replace_profiles+0x15a/0x1dd0
[   57.181707]  ? srso_alias_return_thunk+0x5/0xfbef5
[   57.181710]  ? __kvmalloc_node_noprof+0x44a/0x780
[   57.181712]  ? aa_loaddata_alloc+0x77/0x140
[   57.181715]  ? srso_alias_return_thunk+0x5/0xfbef5
[   57.181717]  ? _copy_from_user+0x2a/0x70
[   57.181730]  policy_update+0x17a/0x330
[   57.181733]  profile_replace+0x153/0x1a0
[   57.181735]  ? rw_verify_area+0x93/0x2d0
[   57.181740]  vfs_write+0x235/0xab0
[   57.181745]  ksys_write+0xb0/0x170
[   57.181748]  do_syscall_64+0x8e/0x660
[   57.181762]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
[   57.181765] RIP: 0033:0x7f6192792eb2

Remove the MATCH_FLAG_DIFF_ENCODE condition to validate all DEFAULT_TABLE
entries unconditionally.

Fixes: 031dcc8f4e ("apparmor: dfa add support for state differential encoding")
Reported-by: Qualys Security Advisory <qsa@qualys.com>
Tested-by: Salvatore Bonaccorso <carnil@debian.org>
Reviewed-by: Georgia Garcia <georgia.garcia@canonical.com>
Reviewed-by: Cengiz Can <cengiz.can@canonical.com>
Signed-off-by: Massimiliano Pellizzer <massimiliano.pellizzer@canonical.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
2026-03-09 16:05:43 -07:00
Massimiliano Pellizzer
8756b68eda apparmor: fix side-effect bug in match_char() macro usage
The match_char() macro evaluates its character parameter multiple
times when traversing differential encoding chains. When invoked
with *str++, the string pointer advances on each iteration of the
inner do-while loop, causing the DFA to check different characters
at each iteration and therefore skip input characters.
This results in out-of-bounds reads when the pointer advances past
the input buffer boundary.

[   94.984676] ==================================================================
[   94.985301] BUG: KASAN: slab-out-of-bounds in aa_dfa_match+0x5ae/0x760
[   94.985655] Read of size 1 at addr ffff888100342000 by task file/976

[   94.986319] CPU: 7 UID: 1000 PID: 976 Comm: file Not tainted 6.19.0-rc7-next-20260127 #1 PREEMPT(lazy)
[   94.986322] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
[   94.986329] Call Trace:
[   94.986341]  <TASK>
[   94.986347]  dump_stack_lvl+0x5e/0x80
[   94.986374]  print_report+0xc8/0x270
[   94.986384]  ? aa_dfa_match+0x5ae/0x760
[   94.986388]  kasan_report+0x118/0x150
[   94.986401]  ? aa_dfa_match+0x5ae/0x760
[   94.986405]  aa_dfa_match+0x5ae/0x760
[   94.986408]  __aa_path_perm+0x131/0x400
[   94.986418]  aa_path_perm+0x219/0x2f0
[   94.986424]  apparmor_file_open+0x345/0x570
[   94.986431]  security_file_open+0x5c/0x140
[   94.986442]  do_dentry_open+0x2f6/0x1120
[   94.986450]  vfs_open+0x38/0x2b0
[   94.986453]  ? may_open+0x1e2/0x2b0
[   94.986466]  path_openat+0x231b/0x2b30
[   94.986469]  ? __x64_sys_openat+0xf8/0x130
[   94.986477]  do_file_open+0x19d/0x360
[   94.986487]  do_sys_openat2+0x98/0x100
[   94.986491]  __x64_sys_openat+0xf8/0x130
[   94.986499]  do_syscall_64+0x8e/0x660
[   94.986515]  ? count_memcg_events+0x15f/0x3c0
[   94.986526]  ? srso_alias_return_thunk+0x5/0xfbef5
[   94.986540]  ? handle_mm_fault+0x1639/0x1ef0
[   94.986551]  ? vma_start_read+0xf0/0x320
[   94.986558]  ? srso_alias_return_thunk+0x5/0xfbef5
[   94.986561]  ? srso_alias_return_thunk+0x5/0xfbef5
[   94.986563]  ? fpregs_assert_state_consistent+0x50/0xe0
[   94.986572]  ? srso_alias_return_thunk+0x5/0xfbef5
[   94.986574]  ? arch_exit_to_user_mode_prepare+0x9/0xb0
[   94.986587]  ? srso_alias_return_thunk+0x5/0xfbef5
[   94.986588]  ? irqentry_exit+0x3c/0x590
[   94.986595]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
[   94.986597] RIP: 0033:0x7fda4a79c3ea

Fix by extracting the character value before invoking match_char,
ensuring single evaluation per outer loop.

Fixes: 074c1cd798 ("apparmor: dfa move character match into a macro")
Reported-by: Qualys Security Advisory <qsa@qualys.com>
Tested-by: Salvatore Bonaccorso <carnil@debian.org>
Reviewed-by: Georgia Garcia <georgia.garcia@canonical.com>
Reviewed-by: Cengiz Can <cengiz.can@canonical.com>
Signed-off-by: Massimiliano Pellizzer <massimiliano.pellizzer@canonical.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
2026-03-09 16:05:43 -07:00
John Johansen
3060394149 apparmor: fix: limit the number of levels of policy namespaces
Currently the number of policy namespaces is not bounded relying on
the user namespace limit. However policy namespaces aren't strictly
tied to user namespaces and it is possible to create them and nest
them arbitrarily deep which can be used to exhaust system resource.

Hard cap policy namespaces to the same depth as user namespaces.

Fixes: c88d4c7b04 ("AppArmor: core policy routines")
Reported-by: Qualys Security Advisory <qsa@qualys.com>
Reviewed-by: Ryan Lee <ryan.lee@canonical.com>
Reviewed-by: Cengiz Can <cengiz.can@canonical.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
2026-03-09 16:05:43 -07:00
Massimiliano Pellizzer
ab09264660 apparmor: replace recursive profile removal with iterative approach
The profile removal code uses recursion when removing nested profiles,
which can lead to kernel stack exhaustion and system crashes.

Reproducer:
  $ pf='a'; for ((i=0; i<1024; i++)); do
      echo -e "profile $pf { \n }" | apparmor_parser -K -a;
      pf="$pf//x";
  done
  $ echo -n a > /sys/kernel/security/apparmor/.remove

Replace the recursive __aa_profile_list_release() approach with an
iterative approach in __remove_profile(). The function repeatedly
finds and removes leaf profiles until the entire subtree is removed,
maintaining the same removal semantic without recursion.

Fixes: c88d4c7b04 ("AppArmor: core policy routines")
Reported-by: Qualys Security Advisory <qsa@qualys.com>
Tested-by: Salvatore Bonaccorso <carnil@debian.org>
Reviewed-by: Georgia Garcia <georgia.garcia@canonical.com>
Reviewed-by: Cengiz Can <cengiz.can@canonical.com>
Signed-off-by: Massimiliano Pellizzer <massimiliano.pellizzer@canonical.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
2026-03-09 16:05:42 -07:00
Massimiliano Pellizzer
e38c55d9f8 apparmor: fix memory leak in verify_header
The function sets `*ns = NULL` on every call, leaking the namespace
string allocated in previous iterations when multiple profiles are
unpacked. This also breaks namespace consistency checking since *ns
is always NULL when the comparison is made.

Remove the incorrect assignment.
The caller (aa_unpack) initializes *ns to NULL once before the loop,
which is sufficient.

Fixes: dd51c84857 ("apparmor: provide base for multiple profiles to be replaced at once")
Reported-by: Qualys Security Advisory <qsa@qualys.com>
Tested-by: Salvatore Bonaccorso <carnil@debian.org>
Reviewed-by: Georgia Garcia <georgia.garcia@canonical.com>
Reviewed-by: Cengiz Can <cengiz.can@canonical.com>
Signed-off-by: Massimiliano Pellizzer <massimiliano.pellizzer@canonical.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
2026-03-09 16:05:42 -07:00
Massimiliano Pellizzer
9063d7e261 apparmor: validate DFA start states are in bounds in unpack_pdb
Start states are read from untrusted data and used as indexes into the
DFA state tables. The aa_dfa_next() function call in unpack_pdb() will
access dfa->tables[YYTD_ID_BASE][start], and if the start state exceeds
the number of states in the DFA, this results in an out-of-bound read.

==================================================================
 BUG: KASAN: slab-out-of-bounds in aa_dfa_next+0x2a1/0x360
 Read of size 4 at addr ffff88811956fb90 by task su/1097
 ...

Reject policies with out-of-bounds start states during unpacking
to prevent the issue.

Fixes: ad5ff3db53 ("AppArmor: Add ability to load extended policy")
Reported-by: Qualys Security Advisory <qsa@qualys.com>
Tested-by: Salvatore Bonaccorso <carnil@debian.org>
Reviewed-by: Georgia Garcia <georgia.garcia@canonical.com>
Reviewed-by: Cengiz Can <cengiz.can@canonical.com>
Signed-off-by: Massimiliano Pellizzer <massimiliano.pellizzer@canonical.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
2026-03-09 16:05:42 -07:00
matteo.cotifava
95bc5c2255 ASoC: soc-core: flush delayed work before removing DAIs and widgets
When a sound card is unbound while a PCM stream is open, a
use-after-free can occur in snd_soc_dapm_stream_event(), called from
the close_delayed_work workqueue handler.

During unbind, snd_soc_unbind_card() flushes delayed work and then
calls soc_cleanup_card_resources(). Inside cleanup,
snd_card_disconnect_sync() releases all PCM file descriptors, and
the resulting PCM close path can call snd_soc_dapm_stream_stop()
which schedules new delayed work with a pmdown_time timer delay.
Since this happens after the flush in snd_soc_unbind_card(), the
new work is not caught. soc_remove_link_components() then frees
DAPM widgets before this work fires, leading to the use-after-free.

The existing flush in soc_free_pcm_runtime() also cannot help as it
runs after soc_remove_link_components() has already freed the widgets.

Add a flush in soc_cleanup_card_resources() after
snd_card_disconnect_sync() (after which no new PCM closes can
schedule further delayed work) and before soc_remove_link_dais()
and soc_remove_link_components() (which tear down the structures the
delayed work accesses).

Fixes: e894efef9a ("ASoC: core: add support to card rebind")
Signed-off-by: Matteo Cotifava <cotifavamatteo@gmail.com>
Link: https://patch.msgid.link/20260309215412.545628-3-cotifavamatteo@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-09 22:22:41 +00:00
matteo.cotifava
3c99c9f0ed ASoC: soc-core: drop delayed_work_pending() check before flush
The delayed_work_pending() check before flush_delayed_work() in
soc_free_pcm_runtime() is unnecessary and racy. flush_delayed_work()
is safe to call unconditionally - it is a no-op when no work is
pending. Remove the check.

The original check was added by commit 9c9b652034 ("ASoC: core:
only flush inited work during free") but delayed_work_pending()
followed by flush_delayed_work() has a time-of-check/time-of-use
window where work can become pending between the two calls.

Fixes: 9c9b652034 ("ASoC: core: only flush inited work during free")
Signed-off-by: Matteo Cotifava <cotifavamatteo@gmail.com>
Link: https://patch.msgid.link/20260309215412.545628-2-cotifavamatteo@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-09 22:22:40 +00:00
Luca Ceresoli
d0d7277469 drm/bridge: ti-sn65dsi83: halve horizontal syncs for dual LVDS output
Dual LVDS output (available on the SN65DSI84) requires HSYNC_PULSE_WIDTH
and HORIZONTAL_BACK_PORCH to be divided by two with respect to the values
used for single LVDS output.

While not clearly stated in the datasheet, this is needed according to the
DSI Tuner [0] output. It also makes sense intuitively because in dual LVDS
output two pixels at a time are output and so the output clock is half of
the pixel clock.

Some dual-LVDS panels refuse to show any picture without this fix.

Divide by two HORIZONTAL_FRONT_PORCH too, even though this register is used
only for test pattern generation which is not currently implemented by this
driver.

[0] https://www.ti.com/tool/DSI-TUNER

Fixes: ceb515ba29 ("drm/bridge: ti-sn65dsi83: Add TI SN65DSI83 and SN65DSI84 driver")
Cc: stable@vger.kernel.org
Reviewed-by: Marek Vasut <marek.vasut@mailbox.org>
Link: https://patch.msgid.link/20260226-ti-sn65dsi83-dual-lvds-fixes-and-test-pattern-v1-2-2e15f5a9a6a0@bootlin.com
Signed-off-by: Luca Ceresoli <luca.ceresoli@bootlin.com>
2026-03-09 22:32:22 +01:00
Luca Ceresoli
2f22702dc0 drm/bridge: ti-sn65dsi83: fix CHA_DSI_CLK_RANGE rounding
The DSI frequency must be in the range:

  (CHA_DSI_CLK_RANGE * 5 MHz) <= DSI freq < ((CHA_DSI_CLK_RANGE + 1) * 5 MHz)

So the register value should point to the lower range value, but
DIV_ROUND_UP() rounds the division to the higher range value, resulting in
an excess of 1 (unless the frequency is an exact multiple of 5 MHz).

For example for a 437100000 MHz clock CHA_DSI_CLK_RANGE should be 87 (0x57):

  (87 * 5 = 435) <= 437.1 < (88 * 5 = 440)

but current code returns 88 (0x58).

Fix the computation by removing the DIV_ROUND_UP().

Fixes: ceb515ba29 ("drm/bridge: ti-sn65dsi83: Add TI SN65DSI83 and SN65DSI84 driver")
Cc: stable@vger.kernel.org
Reviewed-by: Marek Vasut <marek.vasut@mailbox.org>
Link: https://patch.msgid.link/20260226-ti-sn65dsi83-dual-lvds-fixes-and-test-pattern-v1-1-2e15f5a9a6a0@bootlin.com
Signed-off-by: Luca Ceresoli <luca.ceresoli@bootlin.com>
2026-03-09 22:32:05 +01:00
Cheng-Yang Chou
652a3017c4 crypto: arm64/aes-neonbs - Move key expansion off the stack
aesbs_setkey() and aesbs_cbc_ctr_setkey() allocate struct crypto_aes_ctx
on the stack. On arm64, the kernel-mode NEON context is also stored on
the stack, causing the combined frame size to exceed 1024 bytes and
triggering -Wframe-larger-than= warnings.

Allocate struct crypto_aes_ctx on the heap instead and use
kfree_sensitive() to ensure the key material is zeroed on free.
Use a goto-based cleanup path to ensure kfree_sensitive() is always
called.

Signed-off-by: Cheng-Yang Chou <yphbchou0911@gmail.com>
Fixes: 4fa617cc68 ("arm64/fpsimd: Allocate kernel mode FP/SIMD buffers on the stack")
Link: https://lore.kernel.org/r/20260306064254.2079274-1-yphbchou0911@gmail.com
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
2026-03-09 13:46:11 -07:00
Weizhao Ouyang
3b4a3a00de scripts: kconfig: merge_config.sh: fix unexpected operator warning
Fix a warning for:

$ ./scripts/kconfig/merge_config.sh .config extra.config
Using .config as base
Merging extra.config
./scripts/kconfig/merge_config.sh: 384: [: false: unexpected operator

The shellcheck report is also attached:

if [ "$STRICT" == "true" ] && [ "$STRICT_MODE_VIOLATED" == "true" ]; then
               ^-- SC3014 (warning): In POSIX sh, == in place of = is undefined.
                                                        ^-- SC3014 (warning): In POSIX sh, == in place of = is undefined.

Fixes: dfc97e1c5d ("scripts: kconfig: merge_config.sh: use awk in checks too")
Signed-off-by: Weizhao Ouyang <o451686892@gmail.com>
Reviewed-by: Mikko Rapeli <mikko.rapeli@linaro.org>
Link: https://patch.msgid.link/20260309121505.40454-1-o451686892@gmail.com
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
2026-03-09 13:43:10 -07:00
Charles Mirabile
a76e30c247 kbuild: Delete .builtin-dtbs.S when running make clean
The makefile tries to delete a file named ".builtin-dtb.S" but the file
created by scripts/Makefile.vmlinux is actually called ".builtin-dtbs.S".

Fixes: 654102df2a ("kbuild: add generic support for built-in boot DTBs")
Cc: stable@vger.kernel.org
Signed-off-by: Charles Mirabile <cmirabil@redhat.com>
Reviewed-by: Nicolas Schier <nsc@kernel.org>
Link: https://patch.msgid.link/20260308044338.181403-1-cmirabil@redhat.com
[nathan: Small commit message adjustments]
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
2026-03-09 13:41:56 -07:00
Jens Axboe
785d4625d3 io_uring/bpf_filter: use bpf_prog_run_pin_on_cpu() to prevent migration
Since the caller, __io_uring_run_bpf_filters(), doesn't prevent
migration, it should use the migration disabling variant for running
the BPF program.

Fixes: d42eb05e60 ("io_uring: add support for BPF filtering for opcode restrictions")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2026-03-09 14:20:14 -06:00
Felix Gu
111e286337 spi: rockchip-sfc: Fix double-free in remove() callback
The driver uses devm_spi_register_controller() for registration, which
automatically unregisters the controller via devm cleanup when the
device is removed. The manual call to spi_unregister_controller() in
the remove() callback can lead to a double-free.

And to make sure controller is unregistered before DMA buffer is
unmapped, switch to use spi_register_controller() in probe().

Fixes: 8011709906 ("spi: rockchip-sfc: Support pm ops")
Signed-off-by: Felix Gu <ustc.gu@gmail.com>
Link: https://patch.msgid.link/20260310-sfc-v2-1-67fab04b097f@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-09 19:10:21 +00:00
Benoît Sevens
2f1763f629 HID: wacom: fix out-of-bounds read in wacom_intuos_bt_irq
The wacom_intuos_bt_irq() function processes Bluetooth HID reports
without sufficient bounds checking. A maliciously crafted short report
can trigger an out-of-bounds read when copying data into the wacom
structure.

Specifically, report 0x03 requires at least 22 bytes to safely read
the processed data and battery status, while report 0x04 (which
falls through to 0x03) requires 32 bytes.

Add explicit length checks for these report IDs and log a warning if
a short report is received.

Signed-off-by: Benoît Sevens <bsevens@google.com>
Reviewed-by: Jason Gerecke <jason.gerecke@wacom.com>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
2026-03-09 19:34:12 +01:00
Rafael J. Wysocki
06c2a67e90 Merge tag 'linux-cpupower-7.0-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux
Pull cpupower utility updates for 7.0-rc4 from Shuah Khan:

"linux-cpupower-7.0-rc4

 - Adds support for setting EPP via systemd service
 - Fixes swapped power/energy unit labels
 - Adds intel_pstate turbo boost support for Intel platforms"

* tag 'linux-cpupower-7.0-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux:
  cpupower: Add intel_pstate turbo boost support for Intel platforms
  cpupower: Add support for setting EPP via systemd service
  cpupower: fix swapped power/energy unit labels
2026-03-09 18:41:20 +01:00
Ryota Sakamoto
8b8f1d5e35 kunit: Add documentation of --list_suites
Commit 60f3ada4174f ("kunit: Add --list_suites to show suites") introduced
the --list_suites option to kunit.py, but the update to the corresponding
run_wrapper documentation was omitted.

Add the missing description for --list_suites to keep the documentation in
sync with the tool's supported arguments.

Fixes: 60f3ada4174f ("kunit: Add --list_suites to show suites")
Signed-off-by: Ryota Sakamoto <sakamo.ryota@gmail.com>
Reviewed-by: David Gow <david@davidgow.net>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
2026-03-09 10:46:02 -06:00
zhidao su
2fcfe5951e sched_ext: Use WRITE_ONCE() for the write side of scx_enable helper pointer
scx_enable() uses double-checked locking to lazily initialize a static
kthread_worker pointer. The fast path reads helper locklessly:

    if (!READ_ONCE(helper)) {          // lockless read -- no helper_mutex

The write side initializes helper under helper_mutex, but previously
used a plain assignment:

        helper = kthread_run_worker(0, "scx_enable_helper");
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                 plain write -- KCSAN data race with READ_ONCE() above

Since READ_ONCE() on the fast path and the plain write on the
initialization path access the same variable without a common lock,
they constitute a data race. KCSAN requires that all sides of a
lock-free access use READ_ONCE()/WRITE_ONCE() consistently.

Use a temporary variable to stage the result of kthread_run_worker(),
and only WRITE_ONCE() into helper after confirming the pointer is
valid. This avoids a window where a concurrent caller on the fast path
could observe an ERR pointer via READ_ONCE(helper) before the error
check completes.

Fixes: b06ccbabe2 ("sched_ext: Fix starvation of scx_enable() under fair-class saturation")
Signed-off-by: zhidao su <suzhidao@xiaomi.com>
Acked-by: Andrea Righi <arighi@nvidia.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-03-09 06:08:26 -10:00
Josh Poimboeuf
9a73f085dc objtool: Fix another stack overflow in validate_branch()
The insn state is getting saved on the stack twice for each recursive
iteration.  No need for that, once is enough.

Fixes the following reported stack overflow:

  drivers/scsi/qla2xxx/qla_dbg.o: error: SIGSEGV: objtool stack overflow!
  Segmentation fault

Fixes: 70589843b3 ("objtool: Add option to trace function validation")
Reported-by: Arnd Bergmann <arnd@arndb.de>
Closes: https://lore.kernel.org/90956545-2066-46e3-b547-10c884582eb0@app.fastmail.com
Link: https://patch.msgid.link/8b97f62d083457f3b0a29a424275f7957dd3372f.1772821683.git.jpoimboe@kernel.org
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
2026-03-09 08:45:13 -07:00
Josh Poimboeuf
7fdaa640c8 objtool: Handle Clang RSP musical chairs
For no apparent reason (possibly related to CONFIG_KMSAN), Clang can
randomly pass the value of RSP to other registers and then back again to
RSP.  Handle that accordingly.

Fixes the following warnings:

  drivers/input/misc/uinput.o: warning: objtool: uinput_str_to_user+0x165: undefined stack state
  drivers/input/misc/uinput.o: warning: objtool: uinput_str_to_user+0x165: unknown CFA base reg -1

Reported-by: Arnd Bergmann <arnd@arndb.de>
Closes: https://lore.kernel.org/90956545-2066-46e3-b547-10c884582eb0@app.fastmail.com
Link: https://patch.msgid.link/240e6a172cc73292499334a3724d02ccb3247fc7.1772818491.git.jpoimboe@kernel.org
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
2026-03-09 08:45:10 -07:00
Jann Horn
3306a589e5 io_uring/register: fix comment about task_no_new_privs
The actual code is right, but the comment is the wrong way around.

Fixes: ed82f35b92 ("io_uring: allow registration of per-task restrictions")
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2026-03-09 08:40:18 -06:00
Ira Weiny
a8aec14230 nvdimm/bus: Fix potential use after free in asynchronous initialization
Dingisoul with KASAN reports a use after free if device_add() fails in
nd_async_device_register().

Commit b6eae0f61d ("libnvdimm: Hold reference on parent while
scheduling async init") correctly added a reference on the parent device
to be held until asynchronous initialization was complete.  However, if
device_add() results in an allocation failure the ref count of the
device drops to 0 prior to the parent pointer being accessed.  Thus
resulting in use after free.

The bug bot AI correctly identified the fix.  Save a reference to the
parent pointer to be used to drop the parent reference regardless of the
outcome of device_add().

Reported-by: Dingisoul <dingiso.kernel@gmail.com>
Closes: http://lore.kernel.org/8855544b-be9e-4153-aa55-0bc328b13733@gmail.com
Fixes: b6eae0f61d ("libnvdimm: Hold reference on parent while scheduling async init")
Cc: stable@vger.kernel.org
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Link: https://patch.msgid.link/20260306-fix-uaf-async-init-v1-1-a28fd7526723@intel.com
Signed-off-by: Ira Weiny <ira.weiny@intel.com>
2026-03-09 09:38:22 -05:00
Sheetal
1bc17c6719 ASoC: tegra: Add support for Tegra238 soundcard
Tegra238 platforms use different clock rates for plla and
plla_out0 clocks. Add Tegra238 support in the Tegra
sound card driver to apply specific clock configurations.

Signed-off-by: Aditya Bavanari <abavanari@nvidia.com>
Signed-off-by: Sheetal <sheetal@nvidia.com>
Reviewed-by: Jon Hunter <jonathanh@nvidia.com>
Link: https://patch.msgid.link/20260303100249.3214529-3-sheetal@nvidia.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-09 14:17:50 +00:00
Fernando Fernandez Mancera
0b352f83ca xfrm: iptfs: fix skb_put() panic on non-linear skb during reassembly
In iptfs_reassem_cont(), IP-TFS attempts to append data to the new inner
packet 'newskb' that is being reassembled. First a zero-copy approach is
tried if it succeeds then newskb becomes non-linear.

When a subsequent fragment in the same datagram does not meet the
fast-path conditions, a memory copy is performed. It calls skb_put() to
append the data and as newskb is non-linear it triggers
SKB_LINEAR_ASSERT check.

 Oops: invalid opcode: 0000 [#1] SMP NOPTI
 [...]
 RIP: 0010:skb_put+0x3c/0x40
 [...]
 Call Trace:
  <IRQ>
  iptfs_reassem_cont+0x1ab/0x5e0 [xfrm_iptfs]
  iptfs_input_ordered+0x2af/0x380 [xfrm_iptfs]
  iptfs_input+0x122/0x3e0 [xfrm_iptfs]
  xfrm_input+0x91e/0x1a50
  xfrm4_esp_rcv+0x3a/0x110
  ip_protocol_deliver_rcu+0x1d7/0x1f0
  ip_local_deliver_finish+0xbe/0x1e0
  __netif_receive_skb_core.constprop.0+0xb56/0x1120
  __netif_receive_skb_list_core+0x133/0x2b0
  netif_receive_skb_list_internal+0x1ff/0x3f0
  napi_complete_done+0x81/0x220
  virtnet_poll+0x9d6/0x116e [virtio_net]
  __napi_poll.constprop.0+0x2b/0x270
  net_rx_action+0x162/0x360
  handle_softirqs+0xdc/0x510
  __irq_exit_rcu+0xe7/0x110
  irq_exit_rcu+0xe/0x20
  common_interrupt+0x85/0xa0
  </IRQ>
  <TASK>

Fix this by checking if the skb is non-linear. If it is, linearize it by
calling skb_linearize(). As the initial allocation of newskb originally
reserved enough tailroom for the entire reassembled packet we do not
need to check if we have enough tailroom or extend it.

Fixes: 5f2b6a9095 ("xfrm: iptfs: add skb-fragment sharing code")
Reported-by: Hao Long <me@imlonghao.com>
Closes: https://lore.kernel.org/netdev/DGRCO9SL0T5U.JTINSHJQ9KPK@imlonghao.com/
Signed-off-by: Fernando Fernandez Mancera <fmancera@suse.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2026-03-09 15:14:46 +01:00
Ming Lei
155a3bedcc ublk: don't clear GD_SUPPRESS_PART_SCAN for unprivileged daemons
When UBLK_F_NO_AUTO_PART_SCAN is set, GD_SUPPRESS_PART_SCAN is cleared
unconditionally, including for unprivileged daemons. Keep it consistent
with the code block for setting GD_SUPPRESS_PART_SCAN by not clearing
it for unprivileged daemons.

In reality this isn't a problem because ioctl(BLKRRPART) requires
CAP_SYS_ADMIN, but it is more reliable to not clear the bit.

Cc: Alexander Atanasov <alex@zazolabs.com>
Fixes: 8443e2087e ("ublk: add UBLK_F_NO_AUTO_PART_SCAN feature flag")
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2026-03-09 07:37:36 -06:00
Felix Gu
ad0e9ac2d5 spi: atcspi200: Fix double-free in atcspi_configure_dma()
The driver uses devm_dma_request_chan() which registers automatic cleanup
via devm_add_action_or_reset(). Calling dma_release_channel() manually on
the RX channel when TX channel request fails causes a double-free when
the devm cleanup runs.

Remove the unnecessary manual cleanup and simplify the error handling
since devm will properly release channels on probe failure or driver
detach.

Fixes: 34e3815ea4 ("spi: atcspi200: Add ATCSPI200 SPI controller driver")
Signed-off-by: Felix Gu <ustc.gu@gmail.com>
Link: https://patch.msgid.link/20260305-atcspi2000-v1-1-eafe08dcca60@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-09 12:29:30 +00:00
Felix Gu
b20b437666 spi: amlogic: spifc-a4: Fix DMA mapping error handling
Fix three bugs in aml_sfc_dma_buffer_setup() error paths:
1. Unnecessary goto: When the first DMA mapping (sfc->daddr) fails,
   nothing needs cleanup. Use direct return instead of goto.
2. Double-unmap bug: When info DMA mapping failed, the code would
   unmap sfc->daddr inline, then fall through to out_map_data which
   would unmap it again, causing a double-unmap.
3. Wrong unmap size: The out_map_info label used datalen instead of
   infolen when unmapping sfc->iaddr, which could lead to incorrect
   DMA sync behavior.

Fixes: 4670db6f32 ("spi: amlogic: add driver for Amlogic SPI Flash Controller")
Signed-off-by: Felix Gu <ustc.gu@gmail.com>
Link: https://patch.msgid.link/20260306-spifc-a4-v1-1-f22c9965f64a@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-09 12:29:28 +00:00
Mika Westerberg
d800d0bb20 dt-bindings: i2c: dw: Update maintainer
Jarkko does now work for Intel anymore and since I'm currently
maintaining this driver, update my contact information here to make sure
patches get Cc'd to me as well.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reported-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com> (internally)
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Rob Herring (Arm) <robh@kernel.org>
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
2026-03-09 13:02:58 +01:00
Hristo Venev
081a0b78ef ceph: do not skip the first folio of the next object in writeback
When `ceph_process_folio_batch` encounters a folio past the end of the
current object, it should leave it in the batch so that it is picked up
in the next iteration.

Removing the folio from the batch means that it does not get written
back and remains dirty instead. This makes `fsync()` silently skip some
of the data, delays capability release, and breaks coherence with
`O_DIRECT`.

The link below contains instructions for reproducing the bug.

Cc: stable@vger.kernel.org
Fixes: ce80b76dd3 ("ceph: introduce ceph_process_folio_batch() method")
Link: https://tracker.ceph.com/issues/75156
Signed-off-by: Hristo Venev <hristo@venev.name>
Reviewed-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
2026-03-09 12:34:40 +01:00
Max Kellermann
040d159a45 ceph: fix memory leaks in ceph_mdsc_build_path()
Add __putname() calls to error code paths that did not free the "path"
pointer obtained by __getname().  If ownership of this pointer is not
passed to the caller via path_info.path, the function must free it
before returning.

Cc: stable@vger.kernel.org
Fixes: 3fd945a79e ("ceph: encode encrypted name in ceph_mdsc_build_path and dentry release")
Fixes: 550f7ca98e ("ceph: give up on paths longer than PATH_MAX")
Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
Reviewed-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
2026-03-09 12:34:40 +01:00
Max Kellermann
43323a5934 ceph: add a bunch of missing ceph_path_info initializers
ceph_mdsc_build_path() must be called with a zero-initialized
ceph_path_info parameter, or else the following
ceph_mdsc_free_path_info() may crash.

Example crash (on Linux 6.18.12):

  virt_to_cache: Object is not a Slab page!
  WARNING: CPU: 184 PID: 2871736 at mm/slub.c:6732 kmem_cache_free+0x316/0x400
  [...]
  Call Trace:
   [...]
   ceph_open+0x13d/0x3e0
   do_dentry_open+0x134/0x480
   vfs_open+0x2a/0xe0
   path_openat+0x9a3/0x1160
  [...]
  cache_from_obj: Wrong slab cache. names_cache but object is from ceph_inode_info
  WARNING: CPU: 184 PID: 2871736 at mm/slub.c:6746 kmem_cache_free+0x2dd/0x400
  [...]
  kernel BUG at mm/slub.c:634!
  Oops: invalid opcode: 0000 [#1] SMP NOPTI
  RIP: 0010:__slab_free+0x1a4/0x350

Some of the ceph_mdsc_build_path() callers had initializers, but
others had not, even though they were all added by commit 15f519e9f8
("ceph: fix race condition validating r_parent before applying state").
The ones without initializer are suspectible to random crashes.  (I can
imagine it could even be possible to exploit this bug to elevate
privileges.)

Unfortunately, these Ceph functions are undocumented and its semantics
can only be derived from the code.  I see that ceph_mdsc_build_path()
initializes the structure only on success, but not on error.

Calling ceph_mdsc_free_path_info() after a failed
ceph_mdsc_build_path() call does not even make sense, but that's what
all callers do, and for it to be safe, the structure must be
zero-initialized.  The least intrusive approach to fix this is
therefore to add initializers everywhere.

Cc: stable@vger.kernel.org
Fixes: 15f519e9f8 ("ceph: fix race condition validating r_parent before applying state")
Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
Reviewed-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
2026-03-09 12:34:40 +01:00
Max Kellermann
ce0123cbb4 ceph: fix i_nlink underrun during async unlink
During async unlink, we drop the `i_nlink` counter before we receive
the completion (that will eventually update the `i_nlink`) because "we
assume that the unlink will succeed".  That is not a bad idea, but it
races against deletions by other clients (or against the completion of
our own unlink) and can lead to an underrun which emits a WARNING like
this one:

 WARNING: CPU: 85 PID: 25093 at fs/inode.c:407 drop_nlink+0x50/0x68
 Modules linked in:
 CPU: 85 UID: 3221252029 PID: 25093 Comm: php-cgi8.1 Not tainted 6.14.11-cm4all1-ampere #655
 Hardware name: Supermicro ARS-110M-NR/R12SPD-A, BIOS 1.1b 10/17/2023
 pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
 pc : drop_nlink+0x50/0x68
 lr : ceph_unlink+0x6c4/0x720
 sp : ffff80012173bc90
 x29: ffff80012173bc90 x28: ffff086d0a45aaf8 x27: ffff0871d0eb5680
 x26: ffff087f2a64a718 x25: 0000020000000180 x24: 0000000061c88647
 x23: 0000000000000002 x22: ffff07ff9236d800 x21: 0000000000001203
 x20: ffff07ff9237b000 x19: ffff088b8296afc0 x18: 00000000f3c93365
 x17: 0000000000070000 x16: ffff08faffcbdfe8 x15: ffff08faffcbdfec
 x14: 0000000000000000 x13: 45445f65645f3037 x12: 34385f6369706f74
 x11: 0000a2653104bb20 x10: ffffd85f26d73290 x9 : ffffd85f25664f94
 x8 : 00000000000000c0 x7 : 0000000000000000 x6 : 0000000000000002
 x5 : 0000000000000081 x4 : 0000000000000481 x3 : 0000000000000000
 x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff08727d3f91e8
 Call trace:
  drop_nlink+0x50/0x68 (P)
  vfs_unlink+0xb0/0x2e8
  do_unlinkat+0x204/0x288
  __arm64_sys_unlinkat+0x3c/0x80
  invoke_syscall.constprop.0+0x54/0xe8
  do_el0_svc+0xa4/0xc8
  el0_svc+0x18/0x58
  el0t_64_sync_handler+0x104/0x130
  el0t_64_sync+0x154/0x158

In ceph_unlink(), a call to ceph_mdsc_submit_request() submits the
CEPH_MDS_OP_UNLINK to the MDS, but does not wait for completion.

Meanwhile, between this call and the following drop_nlink() call, a
worker thread may process a CEPH_CAP_OP_IMPORT, CEPH_CAP_OP_GRANT or
just a CEPH_MSG_CLIENT_REPLY (the latter of which could be our own
completion).  These will lead to a set_nlink() call, updating the
`i_nlink` counter to the value received from the MDS.  If that new
`i_nlink` value happens to be zero, it is illegal to decrement it
further.  But that is exactly what ceph_unlink() will do then.

The WARNING can be reproduced this way:

1. Force async unlink; only the async code path is affected.  Having
   no real clue about Ceph internals, I was unable to find out why the
   MDS wouldn't give me the "Fxr" capabilities, so I patched
   get_caps_for_async_unlink() to always succeed.

   (Note that the WARNING dump above was found on an unpatched kernel,
   without this kludge - this is not a theoretical bug.)

2. Add a sleep call after ceph_mdsc_submit_request() so the unlink
   completion gets handled by a worker thread before drop_nlink() is
   called.  This guarantees that the `i_nlink` is already zero before
   drop_nlink() runs.

The solution is to skip the counter decrement when it is already zero,
but doing so without a lock is still racy (TOCTOU).  Since
ceph_fill_inode() and handle_cap_grant() both hold the
`ceph_inode_info.i_ceph_lock` spinlock while set_nlink() runs, this
seems like the proper lock to protect the `i_nlink` updates.

I found prior art in NFS and SMB (using `inode.i_lock`) and AFS (using
`afs_vnode.cb_lock`).  All three have the zero check as well.

Cc: stable@vger.kernel.org
Fixes: 2ccb45462a ("ceph: perform asynchronous unlink if we have sufficient caps")
Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
Reviewed-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
2026-03-09 12:34:40 +01:00
Tuo Li
e1c9866173 dmaengine: idxd: fix possible wrong descriptor completion in llist_abort_desc()
At the end of this function, d is the traversal cursor of flist, but the
code completes found instead. This can lead to issues such as NULL pointer
dereferences, double completion, or descriptor leaks.

Fix this by completing d instead of found in the final
list_for_each_entry_safe() loop.

Fixes: aa8d18becc ("dmaengine: idxd: add callback support for iaa crypto")
Signed-off-by: Tuo Li <islituo@gmail.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Link: https://patch.msgid.link/20260106032428.162445-1-islituo@gmail.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
2026-03-09 12:27:11 +01:00
Deepanshu Kartikey
e9075e420a netfs: Fix NULL pointer dereference in netfs_unbuffered_write() on retry
When a write subrequest is marked NETFS_SREQ_NEED_RETRY, the retry path
in netfs_unbuffered_write() unconditionally calls stream->prepare_write()
without checking if it is NULL.

Filesystems such as 9P do not set the prepare_write operation, so
stream->prepare_write remains NULL. When get_user_pages() fails with
-EFAULT and the subrequest is flagged for retry, this results in a NULL
pointer dereference at fs/netfs/direct_write.c:189.

Fix this by mirroring the pattern already used in write_retry.c: if
stream->prepare_write is NULL, skip renegotiation and directly reissue
the subrequest via netfs_reissue_write(), which handles iterator reset,
IN_PROGRESS flag, stats update and reissue internally.

Fixes: a0b4c7a491 ("netfs: Fix unbuffered/DIO writes to dispatch subrequests in strict sequence")
Reported-by: syzbot+7227db0fbac9f348dba0@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=7227db0fbac9f348dba0
Signed-off-by: Deepanshu Kartikey <Kartikey406@gmail.com>
Link: https://patch.msgid.link/20260307043947.347092-1-kartikey406@gmail.com
Tested-by: syzbot+7227db0fbac9f348dba0@syzkaller.appspotmail.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
2026-03-09 10:21:56 +01:00
Deepanshu Kartikey
67e467a11f netfs: Fix kernel BUG in netfs_limit_iter() for ITER_KVEC iterators
When a process crashes and the kernel writes a core dump to a 9P
filesystem, __kernel_write() creates an ITER_KVEC iterator. This
iterator reaches netfs_limit_iter() via netfs_unbuffered_write(), which
only handles ITER_FOLIOQ, ITER_BVEC and ITER_XARRAY iterator types,
hitting the BUG() for any other type.

Fix this by adding netfs_limit_kvec() following the same pattern as
netfs_limit_bvec(), since both kvec and bvec are simple segment arrays
with pointer and length fields. Dispatch it from netfs_limit_iter() when
the iterator type is ITER_KVEC.

Fixes: cae932d3ae ("netfs: Add func to calculate pagecount/size-limited span of an iterator")
Reported-by: syzbot+9c058f0d63475adc97fd@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=9c058f0d63475adc97fd
Tested-by: syzbot+9c058f0d63475adc97fd@syzkaller.appspotmail.com
Signed-off-by: Deepanshu Kartikey <Kartikey406@gmail.com>
Link: https://patch.msgid.link/20260307090041.359870-1-kartikey406@gmail.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
2026-03-09 10:17:00 +01:00
Uzair Mughal
542127f652 ALSA: hda/realtek: Add headset jack quirk for Thinkpad X390
The Lenovo ThinkPad X390 (ALC257 codec, subsystem ID 0x17aa2288)
does not report headset button press events. Headphone insertion is
detected (SW_HEADPHONE_INSERT), but pressing the inline microphone
button on a headset produces no input events.

Add a SND_PCI_QUIRK entry that maps this subsystem ID to
ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK, which enables
headset jack button detection through alc_fixup_headset_jack()
and ThinkPad ACPI integration. This is the same fixup used by
similar ThinkPad models (P1 Gen 3, X1 Extreme Gen 3).

Signed-off-by: Uzair Mughal <contact@uzair.is-a.dev>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Link: https://patch.msgid.link/20260307012906.20093-1-contact@uzair.is-a.dev
2026-03-09 10:02:46 +01:00
Liucheng Lu
178dd118c0 ALSA: hda/realtek: add HP Laptop 14s-dr5xxx mute LED quirk
HP Laptop 14s-dr5xxx with ALC236 codec does not handle the toggling of
the mute LED.
This patch adds a quirk entry for subsystem ID 0x8a1f using
ALC236_FIXUP_HP_MUTE_LED_COEFBIT2 fixup, enabling correct mute LED
behavior.

Signed-off-by: Liucheng Lu <luliucheng100@outlook.com>
Link: https://patch.msgid.link/PAVPR03MB9774F3FCE9CCD181C585281AE37BA@PAVPR03MB9774.eurprd03.prod.outlook.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-03-09 09:58:39 +01:00
Zhang Heng
0d3429f121 ALSA: hda/realtek: add quirk for ASUS UM6702RC
The sound card of this machine cannot adjust the volume, it can only
be 0 or 100%. The reason is that the DAC with pin 0x17 is connected
to 0x06. Testing found that connecting 0x02 can fix this problem.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=220356
Signed-off-by: Zhang Heng <zhangheng@kylinos.cn>
Link: https://patch.msgid.link/20260306123317.575346-1-zhangheng@kylinos.cn
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-03-09 09:57:10 +01:00
Mehul Rao
9b1dbd69ba ALSA: pcm: fix use-after-free on linked stream runtime in snd_pcm_drain()
In the drain loop, the local variable 'runtime' is reassigned to a
linked stream's runtime (runtime = s->runtime at line 2157).  After
releasing the stream lock at line 2169, the code accesses
runtime->no_period_wakeup, runtime->rate, and runtime->buffer_size
(lines 2170-2178) — all referencing the linked stream's runtime without
any lock or refcount protecting its lifetime.

A concurrent close() on the linked stream's fd triggers
snd_pcm_release_substream() → snd_pcm_drop() → pcm_release_private()
→ snd_pcm_unlink() → snd_pcm_detach_substream() → kfree(runtime).
No synchronization prevents kfree(runtime) from completing while the
drain path dereferences the stale pointer.

Fix by caching the needed runtime fields (no_period_wakeup, rate,
buffer_size) into local variables while still holding the stream lock,
and using the cached values after the lock is released.

Fixes: f2b3614cef ("ALSA: PCM - Don't check DMA time-out too shortly")
Cc: stable@vger.kernel.org
Signed-off-by: Mehul Rao <mehulrao@gmail.com>
Link: https://patch.msgid.link/20260305193508.311096-1-mehulrao@gmail.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-03-09 09:56:09 +01:00
Zhang Heng
56fbbe096a ALSA: hda/realtek: Add quirk for Gigabyte Technology to fix headphone
The BIOS of this machine has set 0x19 to mic, which needs to be set
to headphone pin in order to work properly.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=220814
Signed-off-by: Zhang Heng <zhangheng@kylinos.cn>
Link: https://patch.msgid.link/b55f6ebe-7449-49f7-ae85-00d2ba1e7af0@kylinos.cn
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-03-09 09:54:32 +01:00
Alexander Stein
e0adbf74e2 dmaengine: xilinx: xdma: Fix regmap init error handling
devm_regmap_init_mmio returns an ERR_PTR() upon error, not NULL.
Fix the error check and also fix the error message. Use the error code
from ERR_PTR() instead of the wrong value in ret.

Fixes: 17ce252266 ("dmaengine: xilinx: xdma: Add xilinx xdma driver")
Signed-off-by: Alexander Stein <alexander.stein@ew.tq-group.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Link: https://patch.msgid.link/20251014061309.283468-1-alexander.stein@ew.tq-group.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
2026-03-09 08:21:22 +01:00
LUO Haowen
3f63297ff6 dmaengine: dw-edma: Fix multiple times setting of the CYCLE_STATE and CYCLE_BIT bits for HDMA.
Others have submitted this issue (https://lore.kernel.org/dmaengine/
20240722030405.3385-1-zhengdongxiong@gxmicro.cn/),
but it has not been fixed yet. Therefore, more supplementary information
is provided here.

As mentioned in the "PCS-CCS-CB-TCB" Producer-Consumer Synchronization of
"DesignWare Cores PCI Express Controller Databook, version 6.00a":

1. The Consumer CYCLE_STATE (CCS) bit in the register only needs to be
initialized once; the value will update automatically to be
~CYCLE_BIT (CB) in the next chunk.
2. The Consumer CYCLE_BIT bit in the register is loaded from the LL
element and tested against CCS. When CB = CCS, the data transfer is
executed. Otherwise not.

The current logic sets customer (HDMA) CS and CB bits to 1 in each chunk
while setting the producer (software) CB of odd chunks to 0 and even
chunks to 1 in the linked list. This is leading to a mismatch between
the producer CB and consumer CS bits.

This issue can be reproduced by setting the transmission data size to
exceed one chunk. By the way, in the EDMA using the same "PCS-CCS-CB-TCB"
mechanism, the CS bit is only initialized once and this issue was not
found. Refer to
drivers/dma/dw-edma/dw-edma-v0-core.c:dw_edma_v0_core_start.

So fix this issue by initializing the CYCLE_STATE and CYCLE_BIT bits
only once.

Fixes: e74c39573d ("dmaengine: dw-edma: Add support for native HDMA")
Signed-off-by: LUO Haowen <luo-hw@foxmail.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Link: https://patch.msgid.link/tencent_CB11AA9F3920C1911AF7477A9BD8EFE0AD05@qq.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
2026-03-09 08:15:54 +01:00
Thorsten Blum
441336115d ksmbd: Don't log keys in SMB3 signing and encryption key generation
When KSMBD_DEBUG_AUTH logging is enabled, generate_smb3signingkey() and
generate_smb3encryptionkey() log the session, signing, encryption, and
decryption key bytes. Remove the logs to avoid exposing credentials.

Fixes: e2f34481b2 ("cifsd: add server-side procedures for SMB3")
Cc: stable@vger.kernel.org
Signed-off-by: Thorsten Blum <thorsten.blum@linux.dev>
Acked-by: Namjae Jeon <linkinjeon@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-03-08 21:28:39 -05:00
Marios Makassikis
1e689a5617 smb: server: fix use-after-free in smb2_open()
The opinfo pointer obtained via rcu_dereference(fp->f_opinfo) is
dereferenced after rcu_read_unlock(), creating a use-after-free
window.

Cc: stable@vger.kernel.org
Signed-off-by: Marios Makassikis <mmakassikis@freebox.fr>
Acked-by: Namjae Jeon <linkinjeon@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-03-08 21:28:39 -05:00
Namjae Jeon
eac3361e3d ksmbd: fix use-after-free in smb_lazy_parent_lease_break_close()
opinfo pointer obtained via rcu_dereference(fp->f_opinfo) is being
accessed after rcu_read_unlock() has been called. This creates a
race condition where the memory could be freed by a concurrent
writer between the unlock and the subsequent pointer dereferences
(opinfo->is_lease, etc.), leading to a use-after-free.

Fixes: 5fb282ba4f ("ksmbd: fix possible null-deref in smb_lazy_parent_lease_break_close")
Cc: stable@vger.kernel.org
Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-03-08 21:28:39 -05:00
Namjae Jeon
1dfd062caa ksmbd: fix use-after-free by using call_rcu() for oplock_info
ksmbd currently frees oplock_info immediately using kfree(), even
though it is accessed under RCU read-side critical sections in places
like opinfo_get() and proc_show_files().

Since there is no RCU grace period delay between nullifying the pointer
and freeing the memory, a reader can still access oplock_info
structure after it has been freed. This can leads to a use-after-free
especially in opinfo_get() where atomic_inc_not_zero() is called on
already freed memory.

Fix this by switching to deferred freeing using call_rcu().

Fixes: 18b4fac5ef ("ksmbd: fix use-after-free in smb_break_all_levII_oplock()")
Cc: stable@vger.kernel.org
Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-03-08 21:28:39 -05:00
Ali Khaledi
40955015fa ksmbd: fix use-after-free in proc_show_files due to early rcu_read_unlock
The opinfo pointer obtained via rcu_dereference(fp->f_opinfo) is
dereferenced after rcu_read_unlock(), creating a use-after-free
window. A concurrent opinfo_put() can free the opinfo between the
unlock and the subsequent access to opinfo->is_lease,
opinfo->o_lease->state, and opinfo->level.

Fix this by deferring rcu_read_unlock() until after all opinfo
field accesses are complete. The values needed (const_names, count,
level) are copied into local variables under the RCU read lock,
and the potentially-sleeping seq_printf calls happen after the
lock is released.

Found by AI-assisted code review (Claude Opus 4.6, Anthropic)
in collaboration with Ali Khaledi.

Cc: stable@vger.kernel.org
Fixes: b38f99c121 ("ksmbd: add procfs interface for runtime monitoring and statistics")
Signed-off-by: Ali Khaledi <ali.khaledi1989@gmail.com>
Acked-by: Namjae Jeon <linkinjeon@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-03-08 21:28:39 -05:00
Guenter Roeck
c15e7c62fe smb/server: Fix another refcount leak in smb2_open()
If ksmbd_override_fsids() fails, we jump to err_out2. At that point, fp is
NULL because it hasn't been assigned dh_info.fp yet, so ksmbd_fd_put(work,
fp) will not be called. However, dh_info.fp was already inserted into the
session file table by ksmbd_reopen_durable_fd(), so it will leak in the
session file table until the session is closed.

Move fp = dh_info.fp; ahead of the ksmbd_override_fsids() check to fix the
problem.

Found by an experimental AI code review agent at Google.

Fixes: c8efcc7861 ("ksmbd: add support for durable handles v1/v2")
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: ChenXiaoSong <chenxiaosong@kylinos.cn>
Acked-by: Namjae Jeon <linkinjeon@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-03-08 21:28:38 -05:00
Richard Fitzgerald
facfdef64d firmware: cs_dsp: Fix fragmentation regression in firmware download
Use vmalloc() instead of kmalloc(..., GFP_DMA) to alloc the temporary
buffer for firmware download blobs. This avoids the problem that a
heavily fragmented system cannot allocate enough physically-contiguous
memory for a large blob.

The redundant alloc buffer mechanism was removed in commit 900baa6e7b
("firmware: cs_dsp: Remove redundant download buffer allocator").
While doing that I was overly focused on the possibility of the
underlying bus requiring DMA-safe memory. So I used GFP_DMA kmalloc()s.
I failed to notice that the code I was removing used vmalloc().
This creates a regression.

Way back in 2014 the problem of fragmentation with kmalloc()s was fixed
by commit cdcd7f7287 ("ASoC: wm_adsp: Use vmalloc to allocate firmware
download buffer").

Although we don't need physically-contiguous memory, we don't know if the
bus needs some particular alignment of the buffers. Since the change in
2014, the firmware download has always used whatever alignment vmalloc()
returns. To avoid introducing a new problem, the temporary buffer is still
used, to keep the same alignment of pointers passed to regmap_raw_write().

Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Fixes: 900baa6e7b ("firmware: cs_dsp: Remove redundant download buffer allocator")
Link: https://patch.msgid.link/20260304141250.1578597-1-rf@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-09 00:16:52 +00:00
Ravi Hothi
d6db827b43 ASoC: qcom: qdsp6: Fix q6apm remove ordering during ADSP stop and start
During ADSP stop and start, the kernel crashes due to the order in which
ASoC components are removed.

On ADSP stop, the q6apm-audio .remove callback unloads topology and removes
PCM runtimes during ASoC teardown. This deletes the RTDs that contain the
q6apm DAI components before their removal pass runs, leaving those
components still linked to the card and causing crashes on the next rebind.

Fix this by ensuring that all dependent (child) components are removed
first, and the q6apm component is removed last.

[   48.105720] Unable to handle kernel NULL pointer dereference at virtual address 00000000000000d0
[   48.114763] Mem abort info:
[   48.117650]   ESR = 0x0000000096000004
[   48.121526]   EC = 0x25: DABT (current EL), IL = 32 bits
[   48.127010]   SET = 0, FnV = 0
[   48.130172]   EA = 0, S1PTW = 0
[   48.133415]   FSC = 0x04: level 0 translation fault
[   48.138446] Data abort info:
[   48.141422]   ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000
[   48.147079]   CM = 0, WnR = 0, TnD = 0, TagAccess = 0
[   48.152354]   GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
[   48.157859] user pgtable: 4k pages, 48-bit VAs, pgdp=00000001173cf000
[   48.164517] [00000000000000d0] pgd=0000000000000000, p4d=0000000000000000
[   48.171530] Internal error: Oops: 0000000096000004 [#1]  SMP
[   48.177348] Modules linked in: q6prm_clocks q6apm_lpass_dais q6apm_dai snd_q6dsp_common q6prm snd_q6apm 8021q garp mrp stp llc snd_soc_hdmi_codec apr pdr_interface phy_qcom_edp fastrpc qcom_pd_mapper rpmsg_ctrl qrtr_smd rpmsg_char qcom_pdr_msg qcom_iris v4l2_mem2mem videobuf2_dma_contig ath11k_pci msm ubwc_config at24 ath11k videobuf2_memops mac80211 ocmem videobuf2_v4l2 libarc4 drm_gpuvm mhi qrtr videodev drm_exec snd_soc_sc8280xp gpu_sched videobuf2_common nvmem_qcom_spmi_sdam snd_soc_qcom_sdw drm_dp_aux_bus qcom_q6v5_pas qcom_spmi_temp_alarm snd_soc_qcom_common rtc_pm8xxx qcom_pon drm_display_helper cec qcom_pil_info qcom_stats soundwire_bus drm_client_lib mc dispcc0_sa8775p videocc_sa8775p qcom_q6v5 camcc_sa8775p snd_soc_dmic phy_qcom_sgmii_eth snd_soc_max98357a i2c_qcom_geni snd_soc_core dwmac_qcom_ethqos llcc_qcom icc_bwmon qcom_sysmon snd_compress qcom_refgen_regulator coresight_stm stmmac_platform snd_pcm_dmaengine qcom_common coresight_tmc stmmac coresight_replicator qcom_glink_smem coresight_cti stm_core
[   48.177444]  coresight_funnel snd_pcm ufs_qcom phy_qcom_qmp_usb gpi phy_qcom_snps_femto_v2 coresight phy_qcom_qmp_ufs qcom_wdt gpucc_sa8775p pcs_xpcs mdt_loader qcom_ice icc_osm_l3 qmi_helpers snd_timer snd soundcore display_connector qcom_rng nvmem_reboot_mode drm_kms_helper phy_qcom_qmp_pcie sha256 cfg80211 rfkill socinfo fuse drm backlight ipv6
[   48.301059] CPU: 2 UID: 0 PID: 293 Comm: kworker/u32:2 Not tainted 6.19.0-rc6-dirty #10 PREEMPT
[   48.310081] Hardware name: Qualcomm Technologies, Inc. Lemans EVK (DT)
[   48.316782] Workqueue: pdr_notifier_wq pdr_notifier_work [pdr_interface]
[   48.323672] pstate: 20400005 (nzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[   48.330825] pc : mutex_lock+0xc/0x54
[   48.334514] lr : soc_dapm_shutdown_dapm+0x44/0x174 [snd_soc_core]
[   48.340794] sp : ffff800084ddb7b0
[   48.344207] x29: ffff800084ddb7b0 x28: ffff00009cd9cf30 x27: ffff00009cd9cc00
[   48.351544] x26: ffff000099610190 x25: ffffa31d2f19c810 x24: ffffa31d2f185098
[   48.358869] x23: ffff800084ddb7f8 x22: 0000000000000000 x21: 00000000000000d0
[   48.366198] x20: ffff00009ba6c338 x19: ffff00009ba6c338 x18: 00000000ffffffff
[   48.373528] x17: 000000040044ffff x16: ffffa31d4ae6dca8 x15: 072007740775076f
[   48.380853] x14: 0765076d07690774 x13: 00313a323a656369 x12: 767265733a637673
[   48.388182] x11: 00000000000003f9 x10: ffffa31d4c7dea98 x9 : 0000000000000001
[   48.395519] x8 : ffff00009a2aadc0 x7 : 0000000000000003 x6 : 0000000000000000
[   48.402854] x5 : 0000000000000000 x4 : 0000000000000028 x3 : ffff000ef397a698
[   48.410180] x2 : ffff00009a2aadc0 x1 : 0000000000000000 x0 : 00000000000000d0
[   48.417506] Call trace:
[   48.420025]  mutex_lock+0xc/0x54 (P)
[   48.423712]  snd_soc_dapm_shutdown+0x44/0xbc [snd_soc_core]
[   48.429447]  soc_cleanup_card_resources+0x30/0x2c0 [snd_soc_core]
[   48.435719]  snd_soc_bind_card+0x4dc/0xcc0 [snd_soc_core]
[   48.441278]  snd_soc_add_component+0x27c/0x2c8 [snd_soc_core]
[   48.447192]  snd_soc_register_component+0x9c/0xf4 [snd_soc_core]
[   48.453371]  devm_snd_soc_register_component+0x64/0xc4 [snd_soc_core]
[   48.459994]  apm_probe+0xb4/0x110 [snd_q6apm]
[   48.464479]  apr_device_probe+0x24/0x40 [apr]
[   48.468964]  really_probe+0xbc/0x298
[   48.472651]  __driver_probe_device+0x78/0x12c
[   48.477132]  driver_probe_device+0x40/0x160
[   48.481435]  __device_attach_driver+0xb8/0x134
[   48.486011]  bus_for_each_drv+0x80/0xdc
[   48.489964]  __device_attach+0xa8/0x1b0
[   48.493916]  device_initial_probe+0x50/0x54
[   48.498219]  bus_probe_device+0x38/0xa0
[   48.502170]  device_add+0x590/0x760
[   48.505761]  device_register+0x20/0x30
[   48.509623]  of_register_apr_devices+0x1d8/0x318 [apr]
[   48.514905]  apr_pd_status+0x2c/0x54 [apr]
[   48.519114]  pdr_notifier_work+0x8c/0xe0 [pdr_interface]
[   48.524570]  process_one_work+0x150/0x294
[   48.528692]  worker_thread+0x2d8/0x3d8
[   48.532551]  kthread+0x130/0x204
[   48.535874]  ret_from_fork+0x10/0x20
[   48.539559] Code: d65f03c0 d5384102 d503201f d2800001 (c8e17c02)
[   48.545823] ---[ end trace 0000000000000000 ]---

Fixes: 5477518b8a ("ASoC: qdsp6: audioreach: add q6apm support")
Cc: stable@vger.kernel.org
Signed-off-by: Ravi Hothi <ravi.hothi@oss.qualcomm.com>
Reviewed-by: Srinivas Kandagatla <srinivas.kandagatla@oss.qualcomm.com>
Link: https://patch.msgid.link/20260227144534.278568-1-ravi.hothi@oss.qualcomm.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-09 00:16:51 +00:00
Linus Torvalds
1f318b96cc Linux 7.0-rc3 2026-03-08 16:56:54 -07:00
Shenghao Yang
7149be786d drm/gud: fix NULL crtc dereference on display disable
gud_plane_atomic_update() currently handles both crtc state and
framebuffer updates - the complexity has led to a few accidental
NULL pointer dereferences.

Commit dc2d5ddb19 ("drm/gud: fix NULL fb and crtc dereferences
on USB disconnect") [1] fixed an earlier dereference but planes
can also be disabled in non-hotplug paths (e.g. display disables
via the desktop environment). The drm_dev_enter() call would not
cause an early return in those and subsequently oops on
dereferencing crtc:

BUG: kernel NULL pointer dereference, address: 00000000000005c8
CPU: 6 UID: 1000 PID: 3473 Comm: kwin_wayland Not tainted 6.18.2-200.vanilla.gud.fc42.x86_64 #1 PREEMPT(lazy)
RIP: 0010:gud_plane_atomic_update+0x148/0x470 [gud]
 <TASK>
 drm_atomic_helper_commit_planes+0x28e/0x310
 drm_atomic_helper_commit_tail+0x2a/0x70
 commit_tail+0xf1/0x150
 drm_atomic_helper_commit+0x13c/0x180
 drm_atomic_commit+0xb1/0xe0
info ? __pfx___drm_printfn_info+0x10/0x10
 drm_mode_atomic_ioctl+0x70f/0x7c0
 ? __pfx_drm_mode_atomic_ioctl+0x10/0x10
 drm_ioctl_kernel+0xae/0x100
 drm_ioctl+0x2a8/0x550
 ? __pfx_drm_mode_atomic_ioctl+0x10/0x10
 __x64_sys_ioctl+0x97/0xe0
 do_syscall_64+0x7e/0x7f0
 ? __ct_user_enter+0x56/0xd0
 ? do_syscall_64+0x158/0x7f0
 ? __ct_user_enter+0x56/0xd0
 ? do_syscall_64+0x158/0x7f0
 entry_SYSCALL_64_after_hwframe+0x76/0x7e

Split out crtc handling from gud_plane_atomic_update() into
atomic_enable() and atomic_disable() functions to delegate
crtc state transitioning work to the DRM helpers.

To preserve the gud state commit sequence [2], switch to
the runtime PM version of drm_atomic_helper_commit_tail() which
ensures that crtcs are enabled (hence sending the
GUD_REQ_SET_CONTROLLER_ENABLE and GUD_REQ_SET_DISPLAY_ENABLE
requests) before a framebuffer update is sent.

[1] https://lore.kernel.org/all/20251231055039.44266-1-me@shenghaoyang.info/
[2] https://github.com/notro/gud/wiki/GUD-Protocol#display-state

Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
Closes: https://lore.kernel.org/r/202601142159.0v8ilfVs-lkp@intel.com/
Fixes: 73cfd166e0 ("drm/gud: Replace simple display pipe with DRM atomic helpers")
Cc: <stable@vger.kernel.org> # 6.19.x
Cc: <stable@vger.kernel.org> # 6.18.x
Signed-off-by: Shenghao Yang <me@shenghaoyang.info>
Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
Acked-by: Ruben Wauters <rubenru09@aol.com>
Signed-off-by: Ruben Wauters <rubenru09@aol.com>
Link: https://patch.msgid.link/20260222054551.80864-1-me@shenghaoyang.info
2026-03-08 20:20:30 +00:00
Linus Torvalds
fc9f248d8c Merge tag 'efi-fixes-for-v7.0-2' of git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi
Pull EFI fix from Ard Biesheuvel:
 "Fix for the x86 EFI workaround keeping boot services code and data
  regions reserved until after SetVirtualAddressMap() completes:
  deferred struct page initialization may result in some of this memory
  being lost permanently"

* tag 'efi-fixes-for-v7.0-2' of git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi:
  x86/efi: defer freeing of boot services memory
2026-03-08 12:13:09 -07:00
Yonatan Nachum
ef3b06742c RDMA/efa: Fix use of completion ctx after free
On admin queue completion handling, if the admin command completed with
error we print data from the completion context. The issue is that we
already freed the completion context in polling/interrupts handler which
means we print data from context in an unknown state (it might be
already used again).
Change the admin submission flow so alloc/dealloc of the context will be
symmetric and dealloc will be called after any potential use of the
context.

Fixes: 68fb9f3e31 ("RDMA/efa: Remove redundant NULL pointer check of CQE")
Reviewed-by: Daniel Kranzdorf <dkkranzd@amazon.com>
Reviewed-by: Michael Margolin <mrgolin@amazon.com>
Signed-off-by: Yonatan Nachum <ynachum@amazon.com>
Link: https://patch.msgid.link/20260308165350.18219-1-ynachum@amazon.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
2026-03-08 14:46:42 -04:00
Linus Torvalds
014441d1e4 Merge tag 'i2c-for-7.0-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux
Pull i2c fix from Wolfram Sang:
 "A revert for the i801 driver restoring old locking behaviour"

* tag 'i2c-for-7.0-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux:
  i2c: i801: Revert "i2c: i801: replace acpi_lock with I2C bus lock"
2026-03-08 10:17:05 -07:00
Geert Uytterhoeven
4e701b47c3 firmware: arm_scmi: Spelling s/mulit/multi/, s/currenly/currently/
Fix misspellings of "multi" and "currently".

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Message-Id: <6735401861e0c2f3e5e680533cd6f71c4d6fd5eb.1771581270.git.geert+renesas@glider.be>
Signed-off-by: Sudeep Holla <sudeep.holla@kernel.org>
2026-03-08 13:00:15 +00:00
Linus Torvalds
c23719abc3 Merge tag 'x86-urgent-2026-03-08' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Ingo Molnar:

 - Fix SEV guest boot failures in certain circumstances, due to
   very early code relying on a BSS-zeroed variable that isn't
   actually zeroed yet an may contain non-zero bootup values

   Move the variable into the .data section go gain even earlier
   zeroing

 - Expose & allow the IBPB-on-Entry feature on SNP guests, which
   was not properly exposed to guests due to initial implementational
   caution

 - Fix O= build failure when CONFIG_EFI_SBAT_FILE is using relative
   file paths

 - Fix the various SNC (Sub-NUMA Clustering) topology enumeration
   bugs/artifacts (sched-domain build errors mostly).

   SNC enumeration data got more complicated with Granite Rapids X
   (GNR) and Clearwater Forest X (CWF), which exposed these bugs
   and made their effects more serious

 - Also use the now sane(r) SNC code to fix resctrl SNC detection bugs

 - Work around a historic libgcc unwinder bug in the vdso32 sigreturn
   code (again), which regressed during an overly aggressive recent
   cleanup of DWARF annotations

* tag 'x86-urgent-2026-03-08' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/entry/vdso32: Work around libgcc unwinder bug
  x86/resctrl: Fix SNC detection
  x86/topo: Fix SNC topology mess
  x86/topo: Replace x86_has_numa_in_package
  x86/topo: Add topology_num_nodes_per_package()
  x86/numa: Store extra copy of numa_nodes_parsed
  x86/boot: Handle relative CONFIG_EFI_SBAT_FILE file paths
  x86/sev: Allow IBPB-on-Entry feature for SNP guests
  x86/boot/sev: Move SEV decompressor variables into the .data section
2026-03-07 17:12:06 -08:00
Linus Torvalds
6ff1020c2f Merge tag 'timers-urgent-2026-03-08' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull timer fix from Ingo Molnar:
 "Make clock_adjtime() syscall timex validation slightly more permissive
  for auxiliary clocks, to not reject syscalls based on the status field
  that do not try to modify the status field.

  This makes the ABI behavior in clock_adjtime() consistent with
  CLOCK_REALTIME"

* tag 'timers-urgent-2026-03-08' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  timekeeping: Fix timex status validation for auxiliary clocks
2026-03-07 17:09:15 -08:00
Linus Torvalds
b1b9a9d0b5 Merge tag 'sched-urgent-2026-03-08' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler fix from Ingo Molnar:
 "Fix a DL scheduler bug that may corrupt internal metrics during PI and
  setscheduler() syscalls, resulting in kernel warnings and misbehavior.

  Found during stress-testing"

* tag 'sched-urgent-2026-03-08' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched/deadline: Fix missing ENQUEUE_REPLENISH during PI de-boosting
2026-03-07 17:07:13 -08:00
David Lechner
46d8a07b4a drm/sitronix/st7586: fix bad pixel data due to byte swap
Correctly set dbi->write_memory_bpw for the ST7586 driver. This driver
is for a monochrome display that has an unusual data format, so the
default value set in mipi_dbi_spi_init() is not correct simply because
this controller is non-standard.

Previously, we were using dbi->swap_bytes to make the same sort of
workaround, but it was removed in the same commit that added
dbi->write_memory_bpw, so we need to use the latter now to have the
correct behavior.

This fixes every 3 columns of pixels being swapped on the display. There
are 3 pixels per byte, so the byte swap caused this effect.

Fixes: df3fb27a74 ("drm/mipi-dbi: Make bits per word configurable for pixel transfers")
Acked-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Signed-off-by: David Lechner <dlechner@baylibre.com>
Link: https://patch.msgid.link/20260228-drm-mipi-dbi-fix-st7586-byte-swap-v1-1-e78f6c24cd28@baylibre.com
2026-03-07 17:14:09 -06:00
Eric Dumazet
1954c4f012 eventpoll: Convert epoll_put_uevent() to scoped user access
Saves two function calls, and one stac/clac pair.

stac/clac is rather expensive on older cpus like Zen 2.

A synthetic network stress test gives a ~1.5% increase of pps
on AMD Zen 2.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Kuniyuki Iwashima <kuniyu@google.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2026-03-07 15:03:14 -08:00
Gary Guo
4da879a0d3 rust: dma: use pointer projection infra for dma_{read,write} macro
Current `dma_read!`, `dma_write!` macros also use a custom
`addr_of!()`-based implementation for projecting pointers, which has
soundness issue as it relies on absence of `Deref` implementation on types.
It also has a soundness issue where it does not protect against unaligned
fields (when `#[repr(packed)]` is used) so it can generate misaligned
accesses.

This commit migrates them to use the general pointer projection
infrastructure, which handles these cases correctly.

As part of migration, the macro is updated to have an improved surface
syntax. The current macro have

    dma_read!(a.b.c[d].e.f)

to mean `a.b.c` is a DMA coherent allocation and it should project into it
with `[d].e.f` and do a read, which is confusing as it makes the indexing
operator integral to the macro (so it will break if you have an array of
`CoherentAllocation`, for example).

This also is problematic as we would like to generalize
`CoherentAllocation` from just slices to arbitrary types.

Make the macro expects `dma_read!(path.to.dma, .path.inside.dma)` as the
canonical syntax. The index operator is no longer special and is just one
type of projection (in additional to field projection). Similarly, make
`dma_write!(path.to.dma, .path.inside.dma, value)` become the canonical
syntax for writing.

Another issue of the current macro is that it is always fallible. This
makes sense with existing design of `CoherentAllocation`, but once we
support fixed size arrays with `CoherentAllocation`, it is desirable to
have the ability to perform infallible indexing as well, e.g. doing a `[0]`
index of `[Foo; 2]` is okay and can be checked at build-time, so forcing
falliblity is non-ideal. To capture this, the macro is changed to use
`[idx]` as infallible projection and `[idx]?` as fallible index projection
(those syntax are part of the general projection infra). A benefit of this
is that while individual indexing operation may fail, the overall
read/write operation is not fallible.

Fixes: ad2907b4e3 ("rust: add dma coherent allocator abstraction")
Reviewed-by: Benno Lossin <lossin@kernel.org>
Signed-off-by: Gary Guo <gary@garyguo.net>
Link: https://patch.msgid.link/20260302164239.284084-4-gary@kernel.org
[ Capitalize safety comments; slightly improve wording in doc-comments.
  - Danilo ]
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
2026-03-07 23:06:20 +01:00
Gary Guo
f41941aab3 rust: ptr: add projection infrastructure
Add a generic infrastructure for performing field and index projections on
raw pointers. This will form the basis of performing I/O projections.

Pointers manipulations are intentionally using the safe wrapping variants
instead of the unsafe variants, as the latter requires pointers to be
inside an allocation which is not necessarily true for I/O pointers.

This projection macro protects against rogue `Deref` implementation, which
can causes the projected pointer to be outside the bounds of starting
pointer. This is extremely unlikely and Rust has a lint to catch this, but
is unsoundness regardless. The protection works by inducing type inference
ambiguity when `Deref` is implemented.

This projection macro also stops projecting into unaligned fields (i.e.
fields of `#[repr(packed)]` structs), as misaligned pointers require
special handling. This is implemented by attempting to create reference to
projected field inside a `if false` block. Despite being unreachable, Rust
still checks that they're not unaligned fields.

The projection macro supports both fallible and infallible index
projections. These are described in detail inside the documentation.

Signed-off-by: Gary Guo <gary@garyguo.net>
Reviewed-by: Benno Lossin <lossin@kernel.org>
Acked-by: Miguel Ojeda <ojeda@kernel.org>
Link: https://patch.msgid.link/20260302164239.284084-3-gary@kernel.org
[ * Add intro-doc links where possible,
  * Fix typos and slightly improve wording, e.g. "as documentation
    describes" -> "as the documentation of [`Self::proj`] describes",
  * Add an empty line between regular and safety comments, before
    examples, and between logically independent comments,
  * Capitalize various safety comments.

    - Danilo ]
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
2026-03-07 23:06:17 +01:00
Linus Torvalds
3b5d535c63 Merge tag 'scsi-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
Pull SCSI fixes from James Bottomley:
 "Two core changes and the rest in drivers, one core change to quirk the
  behaviour of the Iomega Zip drive and one to fix a hang caused by tag
  reallocation problems, which has mostly been seen by the iscsi client.

  Note the latter fixes the problem but still has a slight sysfs memory
  leak, so will be amended in the next pull request (once we've run the
  fix for the fix through our testing)"

* tag 'scsi-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi:
  scsi: target: Fix recursive locking in __configfs_open_file()
  scsi: devinfo: Add BLIST_SKIP_IO_HINTS for Iomega ZIP
  scsi: mpi3mr: Clear reset history on ready and recheck state after timeout
  scsi: core: Fix refcount leak for tagset_refcnt
2026-03-07 14:04:50 -08:00
Gary Guo
08da98f18f rust: ptr: add KnownSize trait to support DST size info extraction
Add a `KnownSize` trait which is used obtain a size from a raw pointer's
metadata. This makes it possible to obtain size information on a raw slice
pointer. This is similar to Rust `core::mem::size_of_val_raw` which is not
yet stable.

Signed-off-by: Gary Guo <gary@garyguo.net>
Reviewed-by: Benno Lossin <lossin@kernel.org>
Acked-by: Miguel Ojeda <ojeda@kernel.org>
Link: https://patch.msgid.link/20260302164239.284084-2-gary@kernel.org
[ Fix wording in doc-comment. - Danilo ]
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
2026-03-07 23:02:45 +01:00
Marc Zyngier
6da5e537f5 KVM: arm64: vgic: Pick EOIcount deactivations from AP-list tail
Valentine reports that their guests fail to boot correctly, losing
interrupts, and indicates that the wrong interrupt gets deactivated.

What happens here is that if the maintenance interrupt is slow enough
to kick us out of the guest, extra interrupts can be activated from
the LRs. We then exit and proceed to handle EOIcount deactivations,
picking active interrupts from the AP list. But we start from the
top of the list, potentially deactivating interrupts that were in
the LRs, while EOIcount only denotes deactivation of interrupts that
are not present in an LR.

Solve this by tracking the last interrupt that made it in the LRs,
and start the EOIcount deactivation walk *after* that interrupt.
Since this only makes sense while the vcpu is loaded, stash this
in the per-CPU host state.

Huge thanks to Valentine for doing all the detective work and
providing an initial patch.

Fixes: 3cfd59f81e ("KVM: arm64: GICv3: Handle LR overflow when EOImode==0")
Fixes: 281c6c06e2 ("KVM: arm64: GICv2: Handle LR overflow when EOImode==0")
Reported-by: Valentine Burley <valentine.burley@collabora.com>
Tested-by: Valentine Burley <valentine.burley@collabora.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20260307115955.369455-1-valentine.burley@collabora.com
Link: https://patch.msgid.link/20260307191151.3781182-1-maz@kernel.org
Cc: stable@vger.kernel.org
2026-03-07 21:45:58 +00:00
Harald Freudenberger
598bbefa80 s390/zcrypt: Enable AUTOSEL_DOM for CCA serialnr sysfs attribute
The serialnr sysfs attribute for CCA cards when queried always
used the default domain for sending the request down to the card.
If for any reason exactly this default domain is disabled then
the attribute code fails to retrieve the CCA info and the sysfs
entry shows an empty string. Works as designed but the serial
number is a card attribute and thus it does not matter which
domain is used for the query. So if there are other domains on
this card available, these could be used.

So extend the code to use AUTOSEL_DOM for the domain value to
address any online domain within the card for querying the cca
info and thus show the serialnr as long as there is one domain
usable regardless of the default domain setting.

Fixes: 8f291ebf32 ("s390/zcrypt: enable card/domain autoselect on ep11 cprbs")
Suggested-by: Ingo Franzki <ifranzki@linux.ibm.com>
Signed-off-by: Harald Freudenberger <freude@linux.ibm.com>
Reviewed-by: Ingo Franzki <ifranzki@linux.ibm.com>
Cc: stable@vger.kernel.org
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
2026-03-07 22:41:10 +01:00
Heiko Carstens
75aa996ea6 s390: Revert "s390/irq/idle: Remove psw bits early"
This reverts commit d8b5cf9c63.

Mikhail Zaslonko reported that linux-next doesn't boot anymore [2]. Reason
for this is recent change [2] was supposed to slightly optimize the irq
entry/exit path by removing some psw bits early in case of an idle exit.

This however is incorrect since irqentry_exit() requires the correct old
psw state at irq entry. Otherwise the embedded regs_irqs_disabled() will
not provide the correct result.

With linux-next and HRTIMER_REARM_DEFERRED this leads to the observed boot
problems, however the commit is broken in any case.

Revert the commit which introduced this.

Thanks to Peter Zijlstra for pointing out that this is a bug in the s390
entry code.

Fixes: d8b5cf9c63 ("s390/irq/idle: Remove psw bits early") [1]
Reported-by: Mikhail Zaslonko <zaslonko@linux.ibm.com>
Reported-by: Peter Zijlstra <peterz@infradead.org>
Closes: https://lore.kernel.org/r/af549a19-db99-4b16-8511-bf315177a13e@linux.ibm.com/ [2]
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Acked-by: Mikhail Zaslonko <zaslonko@linux.ibm.com>
Tested-by: Mikhail Zaslonko <zaslonko@linux.ibm.com>
Acked-by: Vasily Gorbik <gor@linux.ibm.com>
Link: https://lore.kernel.org/r/20260306111919.362559-1-hca@linux.ibm.com
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
2026-03-07 22:41:10 +01:00
Linus Torvalds
fb07430e6f Merge tag 'fbdev-for-7.0-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/deller/linux-fbdev
Pull fbdev fix from Helge Deller:
 "Silence build error in au1100fb driver found by kernel test robot"

* tag 'fbdev-for-7.0-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/deller/linux-fbdev:
  fbdev: au1100fb: Fix build on MIPS64
2026-03-07 13:21:43 -08:00
Linus Torvalds
6deccafcb4 Merge tag 'parisc-for-7.0-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux
Pull parisc fixes from Helge Deller:
 "While testing Sasha Levin's 'kallsyms: embed source file:line info in
  kernel stack traces' patch series, which increases the typical kernel
  image size, I found some issues with the parisc initial kernel mapping
  which may prevent the kernel to boot.

  The three small patches here fix this"

* tag 'parisc-for-7.0-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux:
  parisc: Fix initial page table creation for boot
  parisc: Check kernel mapping earlier at bootup
  parisc: Increase initial mapping to 64 MB with KALLSYMS
2026-03-07 12:38:16 -08:00
Linus Torvalds
8b7f4cd3ac Merge tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Pull bpf fixes from Alexei Starovoitov:

 - Fix u32/s32 bounds when ranges cross min/max boundary (Eduard
   Zingerman)

 - Fix precision backtracking with linked registers (Eduard Zingerman)

 - Fix linker flags detection for resolve_btfids (Ihor Solodrai)

 - Fix race in update_ftrace_direct_add/del (Jiri Olsa)

 - Fix UAF in bpf_trampoline_link_cgroup_shim (Lang Xu)

* tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf:
  resolve_btfids: Fix linker flags detection
  selftests/bpf: add reproducer for spurious precision propagation through calls
  bpf: collect only live registers in linked regs
  Revert "selftests/bpf: Update reg_bound range refinement logic"
  selftests/bpf: test refining u32/s32 bounds when ranges cross min/max boundary
  bpf: Fix u32/s32 bounds when ranges cross min/max boundary
  bpf: Fix a UAF issue in bpf_trampoline_link_cgroup_shim
  ftrace: Add missing ftrace_lock to update_ftrace_direct_add/del
2026-03-07 12:20:37 -08:00
Linus Torvalds
03dcad79ee Merge tag 'rcu-fixes.v7.0-20260307a' of git://git.kernel.org/pub/scm/linux/kernel/git/rcu/linux
Pull RCU selftest fixes from Boqun Feng:
 "Fix a regression in RCU torture test pre-defined scenarios caused by
  commit 7dadeaa6e8 ("sched: Further restrict the preemption modes")
  which limits PREEMPT_NONE to architectures that do not support
  preemption at all and PREEMPT_VOLUNTARY to those architectures that do
  not yet have PREEMPT_LAZY support.

  Since major architectures (e.g. x86 and arm64) no longer support
  CONFIG_PREEMPT_NONE and CONFIG_PREEMPT_VOLUNTARY, using them in
  rcutorture, rcuscale, refscale, and scftorture pre-defined scenarios
  causes config checking errors.

  Switch these kconfigs to PREEMPT_LAZY"

* tag 'rcu-fixes.v7.0-20260307a' of git://git.kernel.org/pub/scm/linux/kernel/git/rcu/linux:
  scftorture: Update due to x86 not supporting none/voluntary preemption
  refscale: Update due to x86 not supporting none/voluntary preemption
  rcuscale: Update due to x86 not supporting none/voluntary preemption
  rcutorture: Update due to x86 not supporting none/voluntary preemption
2026-03-07 11:56:55 -08:00
Cristian Marussi
555317d610 firmware: arm_scmi: Fix NULL dereference on notify error path
Since commit b5daf93b80 ("firmware: arm_scmi: Avoid notifier
registration for unsupported events") the call chains leading to the helper
__scmi_event_handler_get_ops expect an ERR_PTR to be returned on failure to
get an handler for the requested event key, while the current helper can
still return a NULL when no handler could be found or created.

Fix by forcing an ERR_PTR return value when the handler reference is NULL.

Fixes: b5daf93b80 ("firmware: arm_scmi: Avoid notifier registration for unsupported events")
Signed-off-by: Cristian Marussi <cristian.marussi@arm.com>
Reviewed-by: Dan Carpenter <dan.carpenter@linaro.org>
Message-Id: <20260305131011.541444-1-cristian.marussi@arm.com>
Signed-off-by: Sudeep Holla <sudeep.holla@kernel.org>
2026-03-07 19:49:52 +00:00
Felix Gu
879c001afb firmware: arm_scpi: Fix device_node reference leak in probe path
A device_node reference obtained from the device tree is not released
on all error paths in the arm_scpi probe path. Specifically, a node
returned by of_parse_phandle() could be leaked when the probe failed
after the node was acquired. The probe function returns early and
the shmem reference is not released.

Use __free(device_node) scope-based cleanup to automatically release
the reference when the variable goes out of scope.

Fixes: ed7ecb8839 ("firmware: arm_scpi: Add compatibility checks for shmem node")
Signed-off-by: Felix Gu <ustc.gu@gmail.com>
Message-Id: <20260121-arm_scpi_2-v2-1-702d7fa84acb@gmail.com>
Signed-off-by: Sudeep Holla <sudeep.holla@kernel.org>
2026-03-07 19:49:52 +00:00
Linus Torvalds
aed0af05a8 Merge tag 'trace-v7.0-rc2-2' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace
Pull tracing fixes from Steven Rostedt:

 - Fix possible NULL pointer dereference in trace_data_alloc()

   On the trace_data_alloc() error path, it can call trigger_data_free()
   with a NULL pointer. This used to be a kfree() but was changed to
   trigger_data_free() to clean up any partial initialization. The issue
   is that trigger_data_free() does not expect a NULL pointer. Have
   trigger_data_free() return safely on NULL pointer.

 - Fix multiple events on the command line and bootconfig

   If multiple events are enabled on the command line separately and not
   grouped, only the last event gets enabled. That is:

      trace_event=sched_switch trace_event=sched_waking

   will only enable sched_waking whereas:

      trace_event=sched_switch,sched_waking

   will enable both.

   The bootconfig makes it even worse as the second way is the more
   common method.

   The issue is that a temporary buffer is used to store the events to
   enable later in boot. Each time the cmdline callback is called, it
   overwrites what was previously there.

   Have the callback append the next value (delimited by a comma) if the
   temporary buffer already has content.

 - Fix command line trace_buffer_size if >= 2G

   The logic to allocate the trace buffer uses "int" for the size
   parameter in the command line code causing overflow issues if more
   that 2G is specified.

* tag 'trace-v7.0-rc2-2' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace:
  tracing: Fix trace_buf_size= cmdline parameter with sizes >= 2G
  tracing: Fix enabling multiple events on the kernel command line and bootconfig
  tracing: Add NULL pointer check to trigger_data_free()
2026-03-07 09:50:54 -08:00
Ihor Solodrai
b0dcdcb9ae resolve_btfids: Fix linker flags detection
The "|| echo -lzstd" default makes zstd an unconditional link
dependency of resolve_btfids. On systems where libzstd-dev is not
installed and pkg-config fails, the linker fails:

  ld: cannot find -lzstd: No such file or directory

libzstd is a transitive dependency of libelf, so the -lzstd flag is
strictly necessary only for static builds [1].

Remove ZSTD_LIBS variable, and instead set LIBELF_LIBS depending on
whether the build is static or not. Use $(HOSTPKG_CONFIG) as primary
source of the flags list.

Also add a default value for HOSTPKG_CONFIG in case it's not built via
the toplevel Makefile. Pass it from selftests/bpf too.

[1] https://lore.kernel.org/bpf/4ff82800-2daa-4b9f-95a9-6f512859ee70@linux.dev/

Reported-by: BPF CI Bot (Claude Opus 4.6) <bot+bpf-ci@kernel.org>
Reported-by: Vitaly Chikunov <vt@altlinux.org>
Closes: https://lore.kernel.org/bpf/aaWqMcK-2AQw5dx8@altlinux.org/
Fixes: 4021848a90 ("selftests/bpf: Pass through build flags to bpftool and resolve_btfids")
Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
Reviewed-by: Paul Chaignon <paul.chaignon@gmail.com>
Link: https://lore.kernel.org/r/20260305014730.3123382-1-ihor.solodrai@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-03-07 08:51:51 -08:00
Linus Torvalds
7b6e48df88 Merge tag 'hwmon-for-v7.0-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging
Pull hwmon fixes from Guenter Roeck:

 - Fix initialization commands for AHT20

 - Correct a malformed email address (emc1403)

 - Check the it87_lock() return value

 - Fix inverted polarity (max6639)

 - Fix overflows, underflows, sign extension, and other problems in
   macsmc

 - Fix stack overflow in debugfs read (pmbus/q54sj108a2)

 - Drop support for SMARC-sAM67 (discontinued and never released to
   market)

* tag 'hwmon-for-v7.0-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging:
  hwmon: (pmbus/q54sj108a2) fix stack overflow in debugfs read
  hwmon: (max6639) fix inverted polarity
  dt-bindings: hwmon: sl28cpld: Drop sa67mcu compatible
  hwmon: (it87) Check the it87_lock() return value
  Revert "hwmon: add SMARC-sAM67 support"
  hwmon: (aht10) Fix initialization commands for AHT20
  hwmon: (emc1403) correct a malformed email address
  hwmon: (macsmc) Fix overflows, underflows, and sign extension
  hwmon: (macsmc) Fix regressions in Apple Silicon SMC hwmon driver
2026-03-07 08:39:59 -08:00
Linus Torvalds
e33aafac04 Merge tag 'driver-core-7.0-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/driver-core/driver-core
Pull driver core fix from Danilo Krummrich:

 - Revert "driver core: enforce device_lock for driver_match_device()":

   When a device is already present in the system and a driver is
   registered on the same bus, we iterate over all devices registered on
   this bus to see if one of them matches. If we come across an already
   bound one where the corresponding driver crashed while holding the
   device lock (e.g. in probe()) we can't make any progress anymore.

   Thus, revert and clarify that an implementer of struct bus_type must
   not expect match() to be called with the device lock held.

* tag 'driver-core-7.0-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/driver-core/driver-core:
  Revert "driver core: enforce device_lock for driver_match_device()"
2026-03-07 08:16:48 -08:00
Xingui Yang
8ddc0c2691 scsi: hisi_sas: Fix NULL pointer exception during user_scan()
user_scan() invokes updated sas_user_scan() for channel 0, and if
successful, iteratively scans remaining channels (1 to shost->max_channel)
via scsi_scan_host_selected() in commit 37c4e72b06 ("scsi: Fix
sas_user_scan() to handle wildcard and multi-channel scans"). However,
hisi_sas supports only one channel, and the current value of max_channel is
1. sas_user_scan() for channel 1 will trigger the following NULL pointer
exception:

[  441.554662] Unable to handle kernel NULL pointer dereference at virtual address 00000000000008b0
[  441.554699] Mem abort info:
[  441.554710]   ESR = 0x0000000096000004
[  441.554718]   EC = 0x25: DABT (current EL), IL = 32 bits
[  441.554723]   SET = 0, FnV = 0
[  441.554726]   EA = 0, S1PTW = 0
[  441.554730]   FSC = 0x04: level 0 translation fault
[  441.554735] Data abort info:
[  441.554737]   ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000
[  441.554742]   CM = 0, WnR = 0, TnD = 0, TagAccess = 0
[  441.554747]   GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
[  441.554752] user pgtable: 4k pages, 48-bit VAs, pgdp=00000828377a6000
[  441.554757] [00000000000008b0] pgd=0000000000000000, p4d=0000000000000000
[  441.554769] Internal error: Oops: 0000000096000004 [#1]  SMP
[  441.629589] Modules linked in: arm_spe_pmu arm_smmuv3_pmu tpm_tis_spi hisi_uncore_sllc_pmu hisi_uncore_pa_pmu hisi_uncore_l3c_pmu hisi_uncore_hha_pmu hisi_uncore_ddrc_pmu hisi_uncore_cpa_pmu hns3_pmu hisi_ptt hisi_pcie_pmu tpm_tis_core spidev spi_hisi_sfc_v3xx hisi_uncore_pmu spi_dw_mmio fuse hclge hclge_common hisi_sec2 hisi_hpre hisi_zip hisi_qm hns3 hisi_sas_v3_hw sm3_ce sbsa_gwdt hnae3 hisi_sas_main uacce hisi_dma i2c_hisi dm_mirror dm_region_hash dm_log dm_mod
[  441.670819] CPU: 46 UID: 0 PID: 6994 Comm: bash Kdump: loaded Not tainted 7.0.0-rc2+ #84 PREEMPT
[  441.691327] pstate: 81400009 (Nzcv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--)
[  441.698277] pc : sas_find_dev_by_rphy+0x44/0x118
[  441.702896] lr : sas_find_dev_by_rphy+0x3c/0x118
[  441.707502] sp : ffff80009abbba40
[  441.710805] x29: ffff80009abbba40 x28: ffff082819a40008 x27: ffff082810c37c08
[  441.717930] x26: ffff082810c37c28 x25: ffff082819a40290 x24: ffff082810c37c00
[  441.725054] x23: 0000000000000000 x22: 0000000000000001 x21: ffff082819a40000
[  441.732179] x20: ffff082819a40290 x19: 0000000000000000 x18: 0000000000000020
[  441.739304] x17: 0000000000000000 x16: ffffb5dad6bda690 x15: 00000000ffffffff
[  441.746428] x14: ffff082814c3b26c x13: 00000000ffffffff x12: ffff082814c3b26a
[  441.753553] x11: 00000000000000c0 x10: 000000000000003a x9 : ffffb5dad5ea94f4
[  441.760678] x8 : 000000000000003a x7 : ffff80009abbbab0 x6 : 0000000000000030
[  441.767802] x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000000
[  441.774926] x2 : ffff08280f35a300 x1 : ffffb5dad7127180 x0 : 0000000000000000
[  441.782053] Call trace:
[  441.784488]  sas_find_dev_by_rphy+0x44/0x118 (P)
[  441.789095]  sas_target_alloc+0x24/0xb0
[  441.792920]  scsi_alloc_target+0x290/0x330
[  441.797010]  __scsi_scan_target+0x88/0x258
[  441.801096]  scsi_scan_channel+0x74/0xb8
[  441.805008]  scsi_scan_host_selected+0x170/0x188
[  441.809615]  sas_user_scan+0xfc/0x148
[  441.813267]  store_scan+0x10c/0x180
[  441.816743]  dev_attr_store+0x20/0x40
[  441.820398]  sysfs_kf_write+0x84/0xa8
[  441.824054]  kernfs_fop_write_iter+0x130/0x1c8
[  441.828487]  vfs_write+0x2c0/0x370
[  441.831880]  ksys_write+0x74/0x118
[  441.835271]  __arm64_sys_write+0x24/0x38
[  441.839182]  invoke_syscall+0x50/0x120
[  441.842919]  el0_svc_common.constprop.0+0xc8/0xf0
[  441.847611]  do_el0_svc+0x24/0x38
[  441.850913]  el0_svc+0x38/0x158
[  441.854043]  el0t_64_sync_handler+0xa0/0xe8
[  441.858214]  el0t_64_sync+0x1ac/0x1b0
[  441.861865] Code: aa1303e0 97ff70a8 34ffff80 d10a4273 (f9445a75)
[  441.867946] ---[ end trace 0000000000000000 ]---

Therefore, set max_channel to 0.

Fixes: e21fe3a526 ("scsi: hisi_sas: add initialisation for v3 pci-based controller")
Signed-off-by: Xingui Yang <yangxingui@huawei.com>
Signed-off-by: Yihang Li <liyihang9@huawei.com>
Link: https://patch.msgid.link/20260305064039.4096775-1-liyihang9@huawei.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2026-03-07 11:16:34 -05:00
Vladimir Riabchun
c0b7da13a0 scsi: qla2xxx: Completely fix fcport double free
In qla24xx_els_dcmd_iocb() sp->free is set to qla2x00_els_dcmd_sp_free().
When an error happens, this function is called by qla2x00_sp_release(),
when kref_put() releases the first and the last reference.

qla2x00_els_dcmd_sp_free() frees fcport by calling qla2x00_free_fcport().
Doing it one more time after kref_put() is a bad idea.

Fixes: 82f522ae0d ("scsi: qla2xxx: Fix double free of fcport")
Fixes: 4895009c4b ("scsi: qla2xxx: Prevent command send on chip reset")
Signed-off-by: Vladimir Riabchun <ferr.lambarginio@gmail.com>
Signed-off-by: Farhat Abbas <fabbas@cloudlinux.com>
Link: https://patch.msgid.link/aYsDln9NFQQsPDgg@vova-pc
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2026-03-07 11:12:46 -05:00
Wang Shuaiwei
b0bd84c392 scsi: ufs: core: Fix SError in ufshcd_rtc_work() during UFS suspend
In __ufshcd_wl_suspend(), cancel_delayed_work_sync() is called to cancel
the UFS RTC work, but it is placed after ufshcd_vops_suspend(hba, pm_op,
POST_CHANGE). This creates a race condition where ufshcd_rtc_work() can
still be running while ufshcd_vops_suspend() is executing. When
UFSHCD_CAP_CLK_GATING is not supported, the condition
!hba->clk_gating.active_reqs is always true, causing ufshcd_update_rtc()
to be executed. Since ufshcd_vops_suspend() typically performs clock
gating operations, executing ufshcd_update_rtc() at that moment triggers
an SError. The kernel panic trace is as follows:

Kernel panic - not syncing: Asynchronous SError Interrupt
Call trace:
 dump_backtrace+0xec/0x128
 show_stack+0x18/0x28
 dump_stack_lvl+0x40/0xa0
 dump_stack+0x18/0x24
 panic+0x148/0x374
 nmi_panic+0x3c/0x8c
 arm64_serror_panic+0x64/0x8c
 do_serror+0xc4/0xc8
 el1h_64_error_handler+0x34/0x4c
 el1h_64_error+0x68/0x6c
 el1_interrupt+0x20/0x58
 el1h_64_irq_handler+0x18/0x24
 el1h_64_irq+0x68/0x6c
 ktime_get+0xc4/0x12c
 ufshcd_mcq_sq_stop+0x4c/0xec
 ufshcd_mcq_sq_cleanup+0x64/0x1dc
 ufshcd_clear_cmd+0x38/0x134
 ufshcd_issue_dev_cmd+0x298/0x4d0
 ufshcd_exec_dev_cmd+0x1a4/0x1c4
 ufshcd_query_attr+0xbc/0x19c
 ufshcd_rtc_work+0x10c/0x1c8
 process_scheduled_works+0x1c4/0x45c
 worker_thread+0x32c/0x3e8
 kthread+0x120/0x1d8
 ret_from_fork+0x10/0x20

Fix this by moving cancel_delayed_work_sync() before the call to
ufshcd_vops_suspend(hba, pm_op, PRE_CHANGE), ensuring the UFS RTC work is
fully completed or cancelled at that point.

Cc: Bean Huo <beanhuo@iokpp.de>
Fixes: 6bf999e0eb ("scsi: ufs: core: Add UFS RTC support")
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Wang Shuaiwei <wangshuaiwei1@xiaomi.com>
Link: https://patch.msgid.link/20260307035128.3419687-1-wangshuaiwei1@xiaomi.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2026-03-07 11:08:39 -05:00
Junxiao Bi
4ce7ada40c scsi: core: Fix error handling for scsi_alloc_sdev()
After scsi_sysfs_device_initialize() was called, error paths must call
__scsi_remove_device().

Fixes: 1ac22c8eae ("scsi: core: Fix refcount leak for tagset_refcnt")
Cc: stable@vger.kernel.org
Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
Reviewed-by: John Garry <john.g.garry@oracle.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Link: https://patch.msgid.link/20260304164603.51528-1-junxiao.bi@oracle.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2026-03-07 10:55:31 -05:00
Linus Torvalds
0f912c8917 Merge tag 'for-linus-7.0-rc3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
Pull xen fixes from Juergen Gross:

 - a cleanup of arch/x86/kernel/head_64.S removing the pre-built page
   tables for Xen guests

 - a small comment update

 - another cleanup for Xen PVH guests mode

 - fix an issue with Xen PV-devices backed by driver domains

* tag 'for-linus-7.0-rc3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
  xen/xenbus: better handle backend crash
  xenbus: add xenbus_device parameter to xenbus_read_driver_state()
  x86/PVH: Use boot params to pass RSDP address in start_info page
  x86/xen: update outdated comment
  xen/acpi-processor: fix _CST detection using undersized evaluation buffer
  x86/xen: Build identity mapping page tables dynamically for XENPV
2026-03-07 07:44:32 -08:00
Tejun Heo
57ccf5ccdc sched_ext: Fix enqueue_task_scx() truncation of upper enqueue flags
enqueue_task_scx() takes int enq_flags from the sched_class interface.
SCX enqueue flags starting at bit 32 (SCX_ENQ_PREEMPT and above) are
silently truncated when passed through activate_task(). extra_enq_flags
was added as a workaround - storing high bits in rq->scx.extra_enq_flags
and OR-ing them back in enqueue_task_scx(). However, the OR target is
still the int parameter, so the high bits are lost anyway.

The current impact is limited as the only affected flag is SCX_ENQ_PREEMPT
which is informational to the BPF scheduler - its loss means the scheduler
doesn't know about preemption but doesn't cause incorrect behavior.

Fix by renaming the int parameter to core_enq_flags and introducing a
u64 enq_flags local that merges both sources. All downstream functions
already take u64 enq_flags.

Fixes: f0e1a0643a ("sched_ext: Implement BPF extensible scheduler class")
Cc: stable@vger.kernel.org # v6.12+
Acked-by: Andrea Righi <arighi@nvidia.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-03-07 04:53:32 -10:00
Nam Cao
35e4f2a17e powerpc/pseries: Correct MSI allocation tracking
The per-device MSI allocation calculation in pseries_irq_domain_alloc()
is clearly wrong. It can still happen to work when nr_irqs is 1.

Correct it.

Fixes: c0215e2d72 ("powerpc/pseries: Fix MSI-X allocation failure when quota is exceeded")
Cc: stable@vger.kernel.org
Signed-off-by: Nam Cao <namcao@linutronix.de>
Reviewed-by: Mahesh Salgaonkar <mahesh@linux.ibm.com>
Reviewed-by: Nilay Shroff <nilay@linux.ibm.com>
[maddy: Fixed Nilay's reviewed-by tag]
Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Link: https://patch.msgid.link/20260302003948.1452016-1-namcao@linutronix.de
2026-03-07 16:02:28 +05:30
J. Neuschäfer
6373a2b5c8 powerpc: dts: mpc83xx: Add unit addresses to /memory
This fixes dtschema warnings such as the following:

  arch/powerpc/boot/dts/mpc8315erdb.dtb: /: memory: False schema
  does not allow {'device_type': ['memory'], 'reg': [[0, 134217728]]}

Signed-off-by: J. Neuschäfer <j.ne@posteo.net>
Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Link: https://patch.msgid.link/20260303-mpc83xx-cleanup-v2-5-187d3a13effa@posteo.net
2026-03-07 16:02:27 +05:30
J. Neuschäfer
fde54f1a4d powerpc: dts: mpc8315erdb: Add missing #cells properties to SPI bus
These properties are required by the spi-controller binding.

Signed-off-by: J. Neuschäfer <j.ne@posteo.net>
Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Link: https://patch.msgid.link/20260303-mpc83xx-cleanup-v2-4-187d3a13effa@posteo.net
2026-03-07 16:02:27 +05:30
J. Neuschäfer
31618e0e21 powerpc: dts: mpc8315erdb: Rename LED nodes to comply with schema
The leds-gpio.yaml schema requires that GPIO LED nodes contain "led",
and preferably start with "led-"

Signed-off-by: J. Neuschäfer <j.ne@posteo.net>
Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Link: https://patch.msgid.link/20260303-mpc83xx-cleanup-v2-3-187d3a13effa@posteo.net
2026-03-07 16:02:27 +05:30
J. Neuschäfer
4f43974781 powerpc: dts: mpc8315erdb: Use IRQ_TYPE_* macros
This increases readability, because "0x8" isn't very descriptive.

mpc8315erdb.dtb remains identical after this patch.

Signed-off-by: J. Neuschäfer <j.ne@posteo.net>
Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Link: https://patch.msgid.link/20260303-mpc83xx-cleanup-v2-2-187d3a13effa@posteo.net
2026-03-07 16:02:27 +05:30
J. Neuschäfer
38ce944d47 powerpc: dts: mpc8313erdb: Use IRQ_TYPE_* macros
This increases readability, because "0x8" isn't very descriptive.

mpc8313erdb.dtb remains identical after this patch.

Signed-off-by: J. Neuschäfer <j.ne@posteo.net>
Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Link: https://patch.msgid.link/20260303-mpc83xx-cleanup-v2-1-187d3a13effa@posteo.net
2026-03-07 16:02:27 +05:30
J. Neuschäfer
691417ffe7 powerpc: 83xx: km83xx: Fix keymile vendor prefix
When kmeter.c was refactored into km83xx.c in 2011, the "keymile" vendor
prefix was changed to upper-case "Keymile". The devicetree at
arch/powerpc/boot/dts/kmeter1.dts never underwent the same change,
suggesting that this was simply a mistake.

Fixes: 93e2b95c81 ("powerpc/83xx: rename and update kmeter1")
Signed-off-by: J. Neuschäfer <j.ne@posteo.net>
Reviewed-by: Heiko Schocher <hs@nabladev.com>
Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Link: https://patch.msgid.link/20260303-keymile-v1-1-463a11e71702@posteo.net
2026-03-07 16:02:27 +05:30
J. Neuschäfer
202d23eecc dt-bindings: powerpc: Add Freescale/NXP MPC83xx SoCs
Add a new binding for MPC83xx platforms, describing the board compatible
strings used in currently existing device trees.

Note that the SoC bus is called immr@... in many existing devicetrees,
but this contradicts the simple-bus binding.

Reviewed-by: Rob Herring (Arm) <robh@kernel.org>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: J. Neuschäfer <j.ne@posteo.net>
Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Link: https://patch.msgid.link/20260303-ppcyaml-soc-v5-1-2982d5a857bc@posteo.net
2026-03-07 16:02:26 +05:30
Hari Bathini
01b6ac7272 powerpc64/bpf: fix kfunc call support
Commit 61688a82e0 ("powerpc/bpf: enable kfunc call") inadvertently
enabled kfunc call support for 32-bit powerpc but that support will
not be possible until ABI mismatch between 32-bit powerpc and eBPF is
handled in 32-bit powerpc JIT code. Till then, advertise support only
for 64-bit powerpc. Also, in powerpc ABI, caller needs to extend the
arguments properly based on signedness. The JIT code is responsible
for handling this explicitly for kfunc calls as verifier can't handle
this for each architecture-specific ABI needs. But this was not taken
care of while kfunc call support was enabled for powerpc. Fix it by
handling this with bpf_jit_find_kfunc_model() and using zero_extend()
& sign_extend() helper functions.

Fixes: 61688a82e0 ("powerpc/bpf: enable kfunc call")
Cc: stable@vger.kernel.org
Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Link: https://patch.msgid.link/20260303181031.390073-7-hbathini@linux.ibm.com
2026-03-07 16:02:26 +05:30
Hari Bathini
51b8de4b3d powerpc64/bpf: fix handling of BPF stack in exception callback
Exception callback reuses the stack frame of exception boundary. When
exception boundary and exception callback programs have different BPF
stack depth, the current stack unwind in exception callback will fail.
Adjust the stack frame size of exception callback, in its prologue,
if its BPF stack depth is different from that of exception boundary.

Reported-by: bot+bpf-ci@kernel.org
Closes: https://lore.kernel.org/bpf/2a310e86a59eb4c44c3ac9e5647814469d9c955580c9c0f1b3d9ca4a44717a34@mail.kernel.org/
Fixes: 11d45eee9f ("powerpc64/bpf: Additional NVR handling for bpf_throw")
Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Link: https://patch.msgid.link/20260303181031.390073-6-hbathini@linux.ibm.com
2026-03-07 16:02:26 +05:30
Hari Bathini
2d347d10f8 powerpc64/bpf: remove BPF redzone protection in trampoline stack
Since bpf2bpf tailcall support is enabled for 64-bit powerpc with
kernel commit 2ed2d8f6fb ("powerpc64/bpf: Support tailcalls with
subprogs"), 'tailcalls/tailcall_bpf2bpf_hierarchy_fexit' BPF selftest
is triggering "corrupted stack end detected inside scheduler" with the
config option CONFIG_SCHED_STACK_END_CHECK enabled. While reviewing
the stack layout for BPF trampoline, observed that the dummy frame is
trying to protect the redzone of BPF program. This is because tail
call info and NVRs save area are in redzone at the time of tailcall
as the current BPF program stack frame is teared down before the
tailcall. But saving this redzone in the dummy frame of trampoline
is unnecessary because of the follow reasons:

  1) Firstly, trampoline can be attached to BPF entry/main program
     or subprog. But prologue part of the BPF entry/main program,
     where the trampoline attachpoint is, is skipped during tailcall.
     So, protecting the redzone does not arise when the trampoline is
     not even triggered in this scenario.
  2) In case of subprog, the caller's stackframe is already setup
     and the subprog's stackframe is yet to be setup. So, nothing
     on the redzone to be protected.

Also, using dummy frame in BPF trampoline, wastes critically scarce
kernel stack space, especially in tailcall sequence, for marginal
benefit in stack unwinding. So, drop setting up the dummy frame.
Instead, save return address in bpf trampoline frame and use it as
appropriate. Pruning this unnecessary stack usage mitigates the
likelihood of stack overflow in scenarios where bpf2bpf tailcalls
and fexit programs are mixed.

Reported-by: Saket Kumar Bhaskar <skb99@linux.ibm.com>
Fixes: 2ed2d8f6fb ("powerpc64/bpf: Support tailcalls with subprogs")
Tested-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Link: https://patch.msgid.link/20260303181031.390073-5-hbathini@linux.ibm.com
2026-03-07 16:02:26 +05:30
Hari Bathini
3727d6ec13 powerpc64/bpf: use consistent tailcall offset in trampoline
Ideally, the offset used to load the tail call info field and to find
the pass by reference address for tail call field should be the same.
But while setting up the tail call info in the trampoline, this was
not followed. This can be misleading and can lead to unpredictable
results if and when bpf_has_stack_frame() ends up returning true
for trampoline frame. Since commit 15513beeb6 ("powerpc64/bpf:
Moving tail_call_cnt to bottom of frame") and commit 2ed2d8f6fb
("powerpc64/bpf: Support tailcalls with subprogs") ensured tail call
field is at the bottom of the stack frame for BPF programs as well as
BPF trampoline, avoid relying on bpf_jit_stack_tailcallinfo_offset()
and bpf_has_stack_frame() for trampoline frame and always calculate
tail call field offset with reference to older frame.

Fixes: 2ed2d8f6fb ("powerpc64/bpf: Support tailcalls with subprogs")
Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Link: https://patch.msgid.link/20260303181031.390073-4-hbathini@linux.ibm.com
2026-03-07 16:02:26 +05:30
Hari Bathini
157820264a powerpc64/bpf: fix the address returned by bpf_get_func_ip
bpf_get_func_ip() helper function returns the address of the traced
function. It relies on the IP address stored at ctx - 16 by the bpf
trampoline. On 64-bit powerpc, this address is recovered from LR
accounting for OOL trampoline. But the address stored here was off
by 4-bytes. Ensure the address is the actual start of the traced
function.

Reported-by: Abhishek Dubey <adubey@linux.ibm.com>
Fixes: d243b62b7b ("powerpc64/bpf: Add support for bpf trampolines")
Cc: stable@vger.kernel.org
Tested-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Link: https://patch.msgid.link/20260303181031.390073-3-hbathini@linux.ibm.com
2026-03-07 16:02:26 +05:30
Hari Bathini
521bd39d9d powerpc64/bpf: do not increment tailcall count when prog is NULL
Do not increment tailcall count, if tailcall did not succeed due to
missing BPF program.

Fixes: ce0761419f ("powerpc/bpf: Implement support for tail calls")
Cc: stable@vger.kernel.org
Tested-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Link: https://patch.msgid.link/20260303181031.390073-2-hbathini@linux.ibm.com
2026-03-07 16:02:26 +05:30
Hari Bathini
db54c28702 powerpc64/ftrace: workaround clang recording GEP in __patchable_function_entries
Support for -fpatchable-function-entry on ppc64le was added in Clang
with [1]. However, when no prefix NOPs are specified - as is the case
with CONFIG_PPC_FTRACE_OUT_OF_LINE - the first NOP is emitted at LEP,
but Clang records the Global Entry Point (GEP) unlike GCC which does
record the Local Entry Point (LEP). Issue [2] has been raised to align
Clang's behavior with GCC. As a temporary workaround to ensure ftrace
initialization works as expected with Clang, derive the LEP using
ppc_function_entry() for kernel symbols and by looking for the below
module GEP sequence for module addresses, until [2] is resolved:

	ld	r2, -8(r12)
	add	r2, r2, r12

[1] https://github.com/llvm/llvm-project/pull/151569
[2] https://github.com/llvm/llvm-project/issues/163706

Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
Tested-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Link: https://patch.msgid.link/20260127084926.34497-4-hbathini@linux.ibm.com
2026-03-07 16:02:25 +05:30
Hari Bathini
875612a774 powerpc64/ftrace: fix OOL stub count with clang
The total number of out-of-line (OOL) stubs required for function
tracing is determined using the following command:

    $(OBJDUMP) -r -j __patchable_function_entries vmlinux.o

While this works correctly with GNU objdump, llvm-objdump does not
list the expected relocation records for this section. Fix this by
using the -d option and counting R_PPC64_ADDR64 relocation entries.
This works as desired with both objdump and llvm-objdump.

Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
Tested-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Link: https://patch.msgid.link/20260127084926.34497-3-hbathini@linux.ibm.com
2026-03-07 16:02:25 +05:30
Hari Bathini
73cdf24e81 powerpc64: make clang cross-build friendly
ARCH_USING_PATCHABLE_FUNCTION_ENTRY depends on toolchain support for
-fpatchable-function-entry option. The current script that checks
for this support only handles GCC. Rename the script and extend it
to detect support for -fpatchable-function-entry with Clang as well,
allowing clean cross-compilation with Clang toolchains.

Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
Tested-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Link: https://patch.msgid.link/20260127084926.34497-2-hbathini@linux.ibm.com
2026-03-07 16:02:25 +05:30
Alexei Starovoitov
325d1ba3ca Merge branch 'bpf-fix-precision-backtracking-bug-with-linked-registers'
Eduard Zingerman says:

====================
bpf: Fix precision backtracking bug with linked registers

Emil Tsalapatis reported a verifier bug hit by the scx_lavd sched_ext
scheduler. The essential part of the verifier log looks as follows:

  436: ...
  // checkpoint hit for 438: (1d) if r7 == r8 goto ...
  frame 3: propagating r2,r7,r8
  frame 2: propagating r6
  mark_precise: frame3: last_idx ...
  mark_precise: frame3: regs=r2,r7,r8 stack= before 436: ...
  mark_precise: frame3: regs=r2,r7 stack= before 435: ...
  mark_precise: frame3: regs=r2,r7 stack= before 434: (85) call bpf_trace_vprintk#177
  verifier bug: backtracking call unexpected regs 84

The log complains that registers r2 and r7 are tracked as precise
while processing the bpf_trace_vprintk() call in precision backtracking.
This can't be right, as r2 is reset by the call and there is nothing
to backtrack it to. The precision propagation is triggered when
a checkpoint is hit at instruction 438, r2 is dead at that instruction.

This happens because of the following sequence of events:
- Instruction 438 is first reached with registers r2 and r7 having
  the same id via a path that does not call bpf_trace_vprintk():
  - Checkpoint is created at 438.
  - The jump at 438 is predicted, hence r7 and registers linked to it
    (r2) are propagated as precise, marking r2 and r7 precise in the
    checkpoint.
- Instruction 438 is reached a second time with r2 undefined and via
  a path that calls bpf_trace_vprintk():
  - Checkpoint is hit.
  - propagate_precision() picks registers r2 and r7 and propagates
    precision marks for those up to the helper call.

The root cause is the fact that states_equal() and
propagate_precision() assume that the precision flag can't be set for a
dead register (as computed by compute_live_registers()).
However, this is not the case when linked registers are at play.
Fix this by accounting for live register flags in
collect_linked_regs().
---
====================

Link: https://patch.msgid.link/20260306-linked-regs-and-propagate-precision-v1-0-18e859be570d@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-03-06 21:50:05 -08:00
Eduard Zingerman
223ffb6a3d selftests/bpf: add reproducer for spurious precision propagation through calls
Add a test for the scenario described in the previous commit:
an iterator loop with two paths where one ties r2/r7 via
shared scalar id and skips a call, while the other goes
through the call. Precision marks from the linked registers
get spuriously propagated to the call path via
propagate_precision(), hitting "backtracking call unexpected
regs" in backtrack_insn().

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20260306-linked-regs-and-propagate-precision-v1-2-18e859be570d@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-03-06 21:50:05 -08:00
Eduard Zingerman
2658a1720a bpf: collect only live registers in linked regs
Fix an inconsistency between func_states_equal() and
collect_linked_regs():
- regsafe() uses check_ids() to verify that cached and current states
  have identical register id mapping.
- func_states_equal() calls regsafe() only for registers computed as
  live by compute_live_registers().
- clean_live_states() is supposed to remove dead registers from cached
  states, but it can skip states belonging to an iterator-based loop.
- collect_linked_regs() collects all registers sharing the same id,
  ignoring the marks computed by compute_live_registers().
  Linked registers are stored in the state's jump history.
- backtrack_insn() marks all linked registers for an instruction
  as precise whenever one of the linked registers is precise.

The above might lead to a scenario:
- There is an instruction I with register rY known to be dead at I.
- Instruction I is reached via two paths: first A, then B.
- On path A:
  - There is an id link between registers rX and rY.
  - Checkpoint C is created at I.
  - Linked register set {rX, rY} is saved to the jump history.
  - rX is marked as precise at I, causing both rX and rY
    to be marked precise at C.
- On path B:
  - There is no id link between registers rX and rY,
    otherwise register states are sub-states of those in C.
  - Because rY is dead at I, check_ids() returns true.
  - Current state is considered equal to checkpoint C,
    propagate_precision() propagates spurious precision
    mark for register rY along the path B.
  - Depending on a program, this might hit verifier_bug()
    in the backtrack_insn(), e.g. if rY ∈  [r1..r5]
    and backtrack_insn() spots a function call.

The reproducer program is in the next patch.
This was hit by sched_ext scx_lavd scheduler code.

Changes in tests:
- verifier_scalar_ids.c selftests need modification to preserve
  some registers as live for __msg() checks.
- exceptions_assert.c adjusted to match changes in the verifier log,
  R0 is dead after conditional instruction and thus does not get
  range.
- precise.c adjusted to match changes in the verifier log, register r9
  is dead after comparison and it's range is not important for test.

Reported-by: Emil Tsalapatis <emil@etsalapatis.com>
Fixes: 0fb3cf6110 ("bpf: use register liveness information for func_states_equal")
Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20260306-linked-regs-and-propagate-precision-v1-1-18e859be570d@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-03-06 21:49:40 -08:00
Linus Torvalds
4ae12d8bd9 Merge tag 'kbuild-fixes-7.0-2' of git://git.kernel.org/pub/scm/linux/kernel/git/kbuild/linux
Pull Kbuild fixes from Nathan Chancellor:

 - Split out .modinfo section from ELF_DETAILS macro, as that macro may
   be used in other areas that expect to discard .modinfo, breaking
   certain image layouts

 - Adjust genksyms parser to handle optional attributes in certain
   declarations, necessary after commit 07919126ec ("netfilter:
   annotate NAT helper hook pointers with __rcu")

 - Include resolve_btfids in external module build created by
   scripts/package/install-extmod-build when it may be run on external
   modules

 - Avoid removing objtool binary with 'make clean', as it is required
   for external module builds

* tag 'kbuild-fixes-7.0-2' of git://git.kernel.org/pub/scm/linux/kernel/git/kbuild/linux:
  kbuild: Leave objtool binary around with 'make clean'
  kbuild: install-extmod-build: Package resolve_btfids if necessary
  genksyms: Fix parsing a declarator with a preceding attribute
  kbuild: Split .modinfo out from ELF_DETAILS
2026-03-06 20:27:13 -08:00
Linus Torvalds
591d8796b2 Merge tag 's390-7.0-4' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
Pull s390 fixes from Vasily Gorbik:

 - Fix stackleak and xor lib inline asm, constraints and clobbers to
   prevent miscompilations and incomplete stack poisoning

* tag 's390-7.0-4' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux:
  s390/stackleak: Fix __stackleak_poison() inline assembly constraint
  s390/xor: Improve inline assembly constraints
  s390/xor: Fix xor_xc_2() inline assembly constraints
  s390/xor: Fix xor_xc_5() inline assembly
2026-03-06 20:20:17 -08:00
Linus Torvalds
4660e168c6 Merge tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm64 fixes from Will Deacon:
 "The main changes are a fix to the way in which we manage the access
  flag setting for mappings using the contiguous bit and a fix for a
  hang on the kexec/hibernation path.

  Summary:

   - Fix kexec/hibernation hang due to bogus read-only mappings

   - Fix sparse warnings in our cmpxchg() implementation

   - Prevent runtime-const being used in modules, just like x86

   - Fix broken elision of access flag modifications for contiguous
     entries on systems without support for hardware updates

   - Fix a broken SVE selftest that was testing the wrong instruction"

* tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux:
  selftest/arm64: Fix sve2p1_sigill() to hwcap test
  arm64: contpte: fix set_access_flags() no-op check for SMMU/ATS faults
  arm64: make runtime const not usable by modules
  arm64: mm: Add PTE_DIRTY back to PAGE_KERNEL* to fix kexec/hibernation
  arm64: Silence sparse warnings caused by the type casting in (cmp)xchg
2026-03-06 19:57:03 -08:00
Calvin Owens
d008ba8be8 tracing: Fix trace_buf_size= cmdline parameter with sizes >= 2G
Some of the sizing logic through tracer_alloc_buffers() uses int
internally, causing unexpected behavior if the user passes a value that
does not fit in an int (on my x86 machine, the result is uselessly tiny
buffers).

Fix by plumbing the parameter's real type (unsigned long) through to the
ring buffer allocation functions, which already use unsigned long.

It has always been possible to create larger ring buffers via the sysfs
interface: this only affects the cmdline parameter.

Cc: stable@vger.kernel.org
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Link: https://patch.msgid.link/bff42a4288aada08bdf74da3f5b67a2c28b761f8.1772852067.git.calvin@wbinvd.org
Fixes: 73c5162aa3 ("tracing: keep ring buffer to minimum size till used")
Signed-off-by: Calvin Owens <calvin@wbinvd.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2026-03-06 22:25:53 -05:00
Jakub Kicinski
c113d5e326 Merge branch 'net-spacemit-a-few-error-handling-fixes'
Vivian Wang says:

====================
net: spacemit: A few error handling fixes

Recently a user reported a supposed UAF/double-free in this driver.
It turned out to be a false positive (ugh) from a bug with riscv's
kfence_protect_page() [1], but it did also prompt me to review the
driver code yet again. These are some fixes for error handling problems
that I've found.

[1]: https://lore.kernel.org/r/20260303-handle-kfence-protect-spurious-fault-v2-0-f80d8354d79d@iscas.ac.cn/
====================

Link: https://patch.msgid.link/20260305-k1-ethernet-more-fixes-v2-0-e4e434d65055@iscas.ac.cn
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-06 18:58:36 -08:00
Vivian Wang
86292155be net: spacemit: Fix error handling in emac_tx_mem_map()
The DMA mappings were leaked on mapping error. Free them with the
existing emac_free_tx_buf() function.

Fixes: bfec6d7f20 ("net: spacemit: Add K1 Ethernet MAC")
Signed-off-by: Vivian Wang <wangruikang@iscas.ac.cn>
Link: https://patch.msgid.link/20260305-k1-ethernet-more-fixes-v2-2-e4e434d65055@iscas.ac.cn
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-06 18:58:34 -08:00
Vivian Wang
3aa1417803 net: spacemit: Fix error handling in emac_alloc_rx_desc_buffers()
Even if we get a dma_mapping_error() while mapping an RX buffer, we
should still update rx_ring->head to ensure that the buffers we were
able to allocate and map are used. Fix this by breaking out to the
existing code after the loop, analogous to the existing handling for skb
allocation failure.

Fixes: bfec6d7f20 ("net: spacemit: Add K1 Ethernet MAC")
Signed-off-by: Vivian Wang <wangruikang@iscas.ac.cn>
Link: https://patch.msgid.link/20260305-k1-ethernet-more-fixes-v2-1-e4e434d65055@iscas.ac.cn
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-06 18:58:34 -08:00
Alexei Starovoitov
6895e1d7c3 Merge branch 'bpf-fix-u32-s32-bounds-when-ranges-cross-min-max-boundary'
Eduard Zingerman says:

====================
bpf: Fix u32/s32 bounds when ranges cross min/max boundary

Cover the following cases in range refinement logic for 32-bit ranges:
- s32 range crosses U32_MAX/0 boundary, positive part of the s32 range
  overlaps with u32 range.
- s32 range crosses U32_MAX/0 boundary, negative part of the s32 range
  overlaps with u32 range.

These cases are already handled for 64-bit range refinement.

Without the fix the test in patch 2 is rejected by the verifier.
The test was reduced from sched-ext program.

Changelog:
- v2 -> v3:
  - Reverted da653de268 (Paul)
  - Removed !BPF_F_TEST_REG_INVARIANTS flag from
    crossing_32_bit_signed_boundary_2() (Paul)
- v1 -> v2:
  - Extended commit message and comments (Emil)
  - Targeting 'bpf' tree instead of bpf-next (Alexei)

v1: https://lore.kernel.org/bpf/9a23fbacdc6d33ec8fcb3f6988395b5129f75369.camel@gmail.com/T
v2: https://lore.kernel.org/bpf/20260305-bpf-32-bit-range-overflow-v2-0-7169206a3041@gmail.com/
---
====================

Link: https://patch.msgid.link/20260306-bpf-32-bit-range-overflow-v3-0-f7f67e060a6b@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-03-06 18:16:17 -08:00
Eduard Zingerman
d87c9305a8 Revert "selftests/bpf: Update reg_bound range refinement logic"
This reverts commit da653de268.
Removed logic is now covered by range_refine_in_halves()
which handles both 32-bit and 64-bit refinements.

Suggested-by: Paul Chaignon <paul.chaignon@gmail.com>
Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20260306-bpf-32-bit-range-overflow-v3-3-f7f67e060a6b@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-03-06 18:16:17 -08:00
Eduard Zingerman
f81fdfd167 selftests/bpf: test refining u32/s32 bounds when ranges cross min/max boundary
Two test cases for signed/unsigned 32-bit bounds refinement
when s32 range crosses the sign boundary:
- s32 range [S32_MIN..1] overlapping with u32 range [3..U32_MAX],
  s32 range tail before sign boundary overlaps with u32 range.
- s32 range [-3..5] overlapping with u32 range [0..S32_MIN+3],
  s32 range head after the sign boundary overlaps with u32 range.

This covers both branches added in the __reg32_deduce_bounds().

Also, crossing_32_bit_signed_boundary_2() no longer triggers invariant
violations.

Reviewed-by: Emil Tsalapatis <emil@etsalapatis.com>
Reviewed-by: Paul Chaignon <paul.chaignon@gmail.com>
Acked-by: Shung-Hsi Yu <shung-hsi.yu@suse.com>
Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20260306-bpf-32-bit-range-overflow-v3-2-f7f67e060a6b@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-03-06 18:16:17 -08:00
Eduard Zingerman
fbc7aef517 bpf: Fix u32/s32 bounds when ranges cross min/max boundary
Same as in __reg64_deduce_bounds(), refine s32/u32 ranges
in __reg32_deduce_bounds() in the following situations:

- s32 range crosses U32_MAX/0 boundary, positive part of the s32 range
  overlaps with u32 range:

  0                                                   U32_MAX
  |  [xxxxxxxxxxxxxx u32 range xxxxxxxxxxxxxx]              |
  |----------------------------|----------------------------|
  |xxxxx s32 range xxxxxxxxx]                       [xxxxxxx|
  0                     S32_MAX S32_MIN                    -1

- s32 range crosses U32_MAX/0 boundary, negative part of the s32 range
  overlaps with u32 range:

  0                                                   U32_MAX
  |              [xxxxxxxxxxxxxx u32 range xxxxxxxxxxxxxx]  |
  |----------------------------|----------------------------|
  |xxxxxxxxx]                       [xxxxxxxxxxxx s32 range |
  0                     S32_MAX S32_MIN                    -1

- No refinement if ranges overlap in two intervals.

This helps for e.g. consider the following program:

   call %[bpf_get_prandom_u32];
   w0 &= 0xffffffff;
   if w0 < 0x3 goto 1f;    // on fall-through u32 range [3..U32_MAX]
   if w0 s> 0x1 goto 1f;   // on fall-through s32 range [S32_MIN..1]
   if w0 s< 0x0 goto 1f;   // range can be narrowed to  [S32_MIN..-1]
   r10 = 0;
1: ...;

The reg_bounds.c selftest is updated to incorporate identical logic,
refinement based on non-overflowing range halves:

  ((x ∩ [0, smax]) ∩ (y ∩ [0, smax])) ∪
  ((x ∩ [smin,-1]) ∩ (y ∩ [smin,-1]))

Reported-by: Andrea Righi <arighi@nvidia.com>
Reported-by: Emil Tsalapatis <emil@etsalapatis.com>
Closes: https://lore.kernel.org/bpf/aakqucg4vcujVwif@gpd4/T/
Reviewed-by: Emil Tsalapatis <emil@etsalapatis.com>
Acked-by: Shung-Hsi Yu <shung-hsi.yu@suse.com>
Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20260306-bpf-32-bit-range-overflow-v3-1-f7f67e060a6b@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-03-06 18:16:06 -08:00
Miaoqian Lin
4245a79003 rxrpc, afs: Fix missing error pointer check after rxrpc_kernel_lookup_peer()
rxrpc_kernel_lookup_peer() can also return error pointers in addition to
NULL, so just checking for NULL is not sufficient.

Fix this by:

 (1) Changing rxrpc_kernel_lookup_peer() to return -ENOMEM rather than NULL
     on allocation failure.

 (2) Making the callers in afs use IS_ERR() and PTR_ERR() to pass on the
     error code returned.

Fixes: 72904d7b9b ("rxrpc, afs: Allow afs to pin rxrpc_peer objects")
Signed-off-by: Miaoqian Lin <linmq006@gmail.com>
Co-developed-by: David Howells <dhowells@redhat.com>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Simon Horman <horms@kernel.org>
cc: linux-afs@lists.infradead.org
Link: https://patch.msgid.link/368272.1772713861@warthog.procyon.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-06 17:49:52 -08:00
Jakub Kicinski
7a4d74676c Merge branch 'further-sja1105-phylink-link-replay-fixups'
Vladimir Oltean says:

====================
Further SJA1105 phylink link replay fixups

While I was playing around with the subsystem knowledge in Chris Mason's
review-prompts to see what LLMs would have needed to catch the bug
behind commit bfd264fbbb ("net: dsa: sja1105: protect link replay
helpers against NULL phylink instance"), it flagged another issue
instead, which IMO is valid. This is being fixed in patch 2/2.
Patch 1/2 is preparatory reordering for that.

I haven't noticed any physical issues, it only has to do with the
soundness of the new call path introduced in January in commit
0b2edc531e ("net: dsa: sja1105: let phylink help with the replay of
link callbacks").
====================

Link: https://patch.msgid.link/20260304220900.3865120-1-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-06 17:48:03 -08:00
Vladimir Oltean
ce2da643f0 net: dsa: sja1105: ensure phylink_replay_link_end() will not be missed
Most errors that can occur in sja1105_static_config_reload() are fatal
(example: fail to communicate with hardware), but not all are.

For example, sja1105_static_config_upload() -> kcalloc() may fail, and
if that happens, we have called phylink_replay_link_begin() but never
phylink_replay_link_end().

Under that circumstance, all port phylink instances are left in a state
where the resolver is stopped with the PHYLINK_DISABLE_REPLAY bit set.
We have effectively disabled link management with no way to recover from
this condition.

Avoid that situation by ensuring phylink_replay_link_begin() is always
paired with phylink_replay_link_end(), regardless of whether we faced
any errors during switch reset, configuration reload and general state
reload.

Fixes: 0b2edc531e ("net: dsa: sja1105: let phylink help with the replay of link callbacks")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://patch.msgid.link/20260304220900.3865120-3-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-06 17:48:01 -08:00
Vladimir Oltean
976703cae7 net: dsa: sja1105: reorder sja1105_reload_cbs() and phylink_replay_link_end()
Move phylink_replay_link_end() as the last locked operation under
sja1105_static_config_reload(). The purpose is to be able to goto
this step from the error path of intermediate steps (we must call
phylink_replay_link_end()).

sja1105_reload_cbs() notably does not depend on port states or link
speeds. See commit 954ad9bf13 ("net: dsa: sja1105: fix bandwidth
discrepancy between tc-cbs software and offload") which has discussed
this issue specifically.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://patch.msgid.link/20260304220900.3865120-2-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-06 17:48:01 -08:00
Weiming Shi
0cc0c2e661 net/sched: teql: fix NULL pointer dereference in iptunnel_xmit on TEQL slave xmit
teql_master_xmit() calls netdev_start_xmit(skb, slave) to transmit
through slave devices, but does not update skb->dev to the slave device
beforehand.

When a gretap tunnel is a TEQL slave, the transmit path reaches
iptunnel_xmit() which saves dev = skb->dev (still pointing to teql0
master) and later calls iptunnel_xmit_stats(dev, pkt_len). This
function does:

    get_cpu_ptr(dev->tstats)

Since teql_master_setup() does not set dev->pcpu_stat_type to
NETDEV_PCPU_STAT_TSTATS, the core network stack never allocates tstats
for teql0, so dev->tstats is NULL. get_cpu_ptr(NULL) computes
NULL + __per_cpu_offset[cpu], resulting in a page fault.

 BUG: unable to handle page fault for address: ffff8880e6659018
 #PF: supervisor write access in kernel mode
 #PF: error_code(0x0002) - not-present page
 PGD 68bc067 P4D 68bc067 PUD 0
 Oops: Oops: 0002 [#1] SMP KASAN PTI
 RIP: 0010:iptunnel_xmit (./include/net/ip_tunnels.h:664 net/ipv4/ip_tunnel_core.c:89)
 Call Trace:
  <TASK>
  ip_tunnel_xmit (net/ipv4/ip_tunnel.c:847)
  __gre_xmit (net/ipv4/ip_gre.c:478)
  gre_tap_xmit (net/ipv4/ip_gre.c:779)
  teql_master_xmit (net/sched/sch_teql.c:319)
  dev_hard_start_xmit (net/core/dev.c:3887)
  sch_direct_xmit (net/sched/sch_generic.c:347)
  __dev_queue_xmit (net/core/dev.c:4802)
  neigh_direct_output (net/core/neighbour.c:1660)
  ip_finish_output2 (net/ipv4/ip_output.c:237)
  __ip_finish_output.part.0 (net/ipv4/ip_output.c:315)
  ip_mc_output (net/ipv4/ip_output.c:369)
  ip_send_skb (net/ipv4/ip_output.c:1508)
  udp_send_skb (net/ipv4/udp.c:1195)
  udp_sendmsg (net/ipv4/udp.c:1485)
  inet_sendmsg (net/ipv4/af_inet.c:859)
  __sys_sendto (net/socket.c:2206)

Fix this by setting skb->dev = slave before calling
netdev_start_xmit(), so that tunnel xmit functions see the correct
slave device with properly allocated tstats.

Fixes: 039f50629b ("ip_tunnel: Move stats update to iptunnel_xmit()")
Reported-by: Xiang Mei <xmei5@asu.edu>
Signed-off-by: Weiming Shi <bestswngs@gmail.com>
Link: https://patch.msgid.link/20260304044216.3517851-3-bestswngs@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-06 17:45:37 -08:00
Jian Zhang
5c3398a542 net: ncsi: fix skb leak in error paths
Early return paths in NCSI RX and AEN handlers fail to release
the received skb, resulting in a memory leak.

Specifically, ncsi_aen_handler() returns on invalid AEN packets
without consuming the skb. Similarly, ncsi_rcv_rsp() exits early
when failing to resolve the NCSI device, response handler, or
request, leaving the skb unfreed.

CC: stable@vger.kernel.org
Fixes: 7a82ecf4cf ("net/ncsi: NCSI AEN packet handler")
Fixes: 138635cc27 ("net/ncsi: NCSI response packet handler")
Signed-off-by: Jian Zhang <zhangjian.3032@bytedance.com>
Link: https://patch.msgid.link/20260305060656.3357250-1-zhangjian.3032@bytedance.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-06 17:34:48 -08:00
Jakub Kicinski
63f428cb94 Merge branch 'mlx5-misc-fixes-2026-03-05'
Tariq Toukan says:

====================
mlx5 misc fixes 2026-03-05

This patchset provides misc bug fixes from the team to the mlx5
core and Eth drivers.
====================

Link: https://patch.msgid.link/20260305142634.1813208-1-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-06 17:26:04 -08:00
Dragos Tatulea
a6413e6f6c net/mlx5e: RX, Fix XDP multi-buf frag counting for legacy RQ
XDP multi-buf programs can modify the layout of the XDP buffer when the
program calls bpf_xdp_pull_data() or bpf_xdp_adjust_tail(). The
referenced commit in the fixes tag corrected the assumption in the mlx5
driver that the XDP buffer layout doesn't change during a program
execution. However, this fix introduced another issue: the dropped
fragments still need to be counted on the driver side to avoid page
fragment reference counting issues.

Such issue can be observed with the
test_xdp_native_adjst_tail_shrnk_data selftest when using a payload of
3600 and shrinking by 256 bytes (an upcoming selftest patch): the last
fragment gets released by the XDP code but doesn't get tracked by the
driver. This results in a negative pp_ref_count during page release and
the following splat:

  WARNING: include/net/page_pool/helpers.h:297 at mlx5e_page_release_fragmented.isra.0+0x4a/0x50 [mlx5_core], CPU#12: ip/3137
  Modules linked in: [...]
  CPU: 12 UID: 0 PID: 3137 Comm: ip Not tainted 6.19.0-rc3+ #12 NONE
  Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
  RIP: 0010:mlx5e_page_release_fragmented.isra.0+0x4a/0x50 [mlx5_core]
  [...]
  Call Trace:
   <TASK>
   mlx5e_dealloc_rx_wqe+0xcb/0x1a0 [mlx5_core]
   mlx5e_free_rx_descs+0x7f/0x110 [mlx5_core]
   mlx5e_close_rq+0x50/0x60 [mlx5_core]
   mlx5e_close_queues+0x36/0x2c0 [mlx5_core]
   mlx5e_close_channel+0x1c/0x50 [mlx5_core]
   mlx5e_close_channels+0x45/0x80 [mlx5_core]
   mlx5e_safe_switch_params+0x1a5/0x230 [mlx5_core]
   mlx5e_change_mtu+0xf3/0x2f0 [mlx5_core]
   netif_set_mtu_ext+0xf1/0x230
   do_setlink.isra.0+0x219/0x1180
   rtnl_newlink+0x79f/0xb60
   rtnetlink_rcv_msg+0x213/0x3a0
   netlink_rcv_skb+0x48/0xf0
   netlink_unicast+0x24a/0x350
   netlink_sendmsg+0x1ee/0x410
   __sock_sendmsg+0x38/0x60
   ____sys_sendmsg+0x232/0x280
   ___sys_sendmsg+0x78/0xb0
   __sys_sendmsg+0x5f/0xb0
   [...]
   do_syscall_64+0x57/0xc50

This patch fixes the issue by doing page frag counting on all the
original XDP buffer fragments for all relevant XDP actions (XDP_TX ,
XDP_REDIRECT and XDP_PASS). This is basically reverting to the original
counting before the commit in the fixes tag.

As frag_page is still pointing to the original tail, the nr_frags
parameter to xdp_update_skb_frags_info() needs to be calculated
in a different way to reflect the new nr_frags.

Fixes: afd5ba577c ("net/mlx5e: RX, Fix generating skb from non-linear xdp_buff for legacy RQ")
Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Amery Hung <ameryhung@gmail.com>
Link: https://patch.msgid.link/20260305142634.1813208-6-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-06 17:25:39 -08:00
Dragos Tatulea
db25c42c2e net/mlx5e: RX, Fix XDP multi-buf frag counting for striding RQ
XDP multi-buf programs can modify the layout of the XDP buffer when the
program calls bpf_xdp_pull_data() or bpf_xdp_adjust_tail(). The
referenced commit in the fixes tag corrected the assumption in the mlx5
driver that the XDP buffer layout doesn't change during a program
execution. However, this fix introduced another issue: the dropped
fragments still need to be counted on the driver side to avoid page
fragment reference counting issues.

The issue was discovered by the drivers/net/xdp.py selftest,
more specifically the test_xdp_native_tx_mb:
- The mlx5 driver allocates a page_pool page and initializes it with
  a frag counter of 64 (pp_ref_count=64) and the internal frag counter
  to 0.
- The test sends one packet with no payload.
- On RX (mlx5e_skb_from_cqe_mpwrq_nonlinear()), mlx5 configures the XDP
  buffer with the packet data starting in the first fragment which is the
  page mentioned above.
- The XDP program runs and calls bpf_xdp_pull_data() which moves the
  header into the linear part of the XDP buffer. As the packet doesn't
  contain more data, the program drops the tail fragment since it no
  longer contains any payload (pp_ref_count=63).
- mlx5 device skips counting this fragment. Internal frag counter
  remains 0.
- mlx5 releases all 64 fragments of the page but page pp_ref_count is
  63 => negative reference counting error.

Resulting splat during the test:

  WARNING: CPU: 0 PID: 188225 at ./include/net/page_pool/helpers.h:297 mlx5e_page_release_fragmented.isra.0+0xbd/0xe0 [mlx5_core]
  Modules linked in: [...]
  CPU: 0 UID: 0 PID: 188225 Comm: ip Not tainted 6.18.0-rc7_for_upstream_min_debug_2025_12_08_11_44 #1 NONE
  Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
  RIP: 0010:mlx5e_page_release_fragmented.isra.0+0xbd/0xe0 [mlx5_core]
  [...]
  Call Trace:
   <TASK>
   mlx5e_free_rx_mpwqe+0x20a/0x250 [mlx5_core]
   mlx5e_dealloc_rx_mpwqe+0x37/0xb0 [mlx5_core]
   mlx5e_free_rx_descs+0x11a/0x170 [mlx5_core]
   mlx5e_close_rq+0x78/0xa0 [mlx5_core]
   mlx5e_close_queues+0x46/0x2a0 [mlx5_core]
   mlx5e_close_channel+0x24/0x90 [mlx5_core]
   mlx5e_close_channels+0x5d/0xf0 [mlx5_core]
   mlx5e_safe_switch_params+0x2ec/0x380 [mlx5_core]
   mlx5e_change_mtu+0x11d/0x490 [mlx5_core]
   mlx5e_change_nic_mtu+0x19/0x30 [mlx5_core]
   netif_set_mtu_ext+0xfc/0x240
   do_setlink.isra.0+0x226/0x1100
   rtnl_newlink+0x7a9/0xba0
   rtnetlink_rcv_msg+0x220/0x3c0
   netlink_rcv_skb+0x4b/0xf0
   netlink_unicast+0x255/0x380
   netlink_sendmsg+0x1f3/0x420
   __sock_sendmsg+0x38/0x60
   ____sys_sendmsg+0x1e8/0x240
   ___sys_sendmsg+0x7c/0xb0
   [...]
   __sys_sendmsg+0x5f/0xb0
   do_syscall_64+0x55/0xc70

The problem applies for XDP_PASS as well which is handled in a different
code path in the driver.

This patch fixes the issue by doing page frag counting on all the
original XDP buffer fragments for all relevant XDP actions (XDP_TX ,
XDP_REDIRECT and XDP_PASS). This is basically reverting to the original
counting before the commit in the fixes tag.

As frag_page is still pointing to the original tail, the nr_frags
parameter to xdp_update_skb_frags_info() needs to be calculated
in a different way to reflect the new nr_frags.

Fixes: 87bcef158a ("net/mlx5e: RX, Fix generating skb from non-linear xdp_buff for striding RQ")
Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
Cc: Amery Hung <ameryhung@gmail.com>
Reviewed-by: Nimrod Oren <noren@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/20260305142634.1813208-5-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-06 17:25:39 -08:00
Gal Pressman
1633111d69 net/mlx5e: Fix DMA FIFO desync on error CQE SQ recovery
In case of a TX error CQE, a recovery flow is triggered,
mlx5e_reset_txqsq_cc_pc() resets dma_fifo_cc to 0 but not dma_fifo_pc,
desyncing the DMA FIFO producer and consumer.

After recovery, the producer pushes new DMA entries at the old
dma_fifo_pc, while the consumer reads from position 0.
This causes us to unmap stale DMA addresses from before the recovery.

The DMA FIFO is a purely software construct with no HW counterpart.
At the point of reset, all WQEs have been flushed so dma_fifo_cc is
already equal to dma_fifo_pc. There is no need to reset either counter,
similar to how skb_fifo pc/cc are untouched.

Remove the 'dma_fifo_cc = 0' reset.

This fixes the following WARNING:
    WARNING: CPU: 0 PID: 0 at drivers/iommu/dma-iommu.c:1240 iommu_dma_unmap_page+0x79/0x90
    Modules linked in: mlx5_vdpa vringh vdpa bonding mlx5_ib mlx5_vfio_pci ipip mlx5_fwctl tunnel4 mlx5_core ib_ipoib geneve ip6_gre ip_gre gre nf_tables ip6_tunnel rdma_ucm ib_uverbs ib_umad vfio_pci vfio_pci_core act_mirred act_skbedit act_vlan vhost_net vhost tap ip6table_mangle ip6table_nat ip6table_filter ip6_tables iptable_mangle cls_matchall nfnetlink_cttimeout act_gact cls_flower sch_ingress vhost_iotlb iptable_raw tunnel6 vfio_iommu_type1 vfio openvswitch nsh rpcsec_gss_krb5 auth_rpcgss oid_registry xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink iptable_nat nf_nat xt_addrtype br_netfilter overlay zram zsmalloc rpcrdma ib_iser libiscsi scsi_transport_iscsi rdma_cm iw_cm ib_cm ib_core fuse [last unloaded: nf_tables]
    CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted 6.13.0-rc5_for_upstream_min_debug_2024_12_30_21_33 #1
    Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
    RIP: 0010:iommu_dma_unmap_page+0x79/0x90
    Code: 2b 4d 3b 21 72 26 4d 3b 61 08 73 20 49 89 d8 44 89 f9 5b 4c 89 f2 4c 89 e6 48 89 ef 5d 41 5c 41 5d 41 5e 41 5f e9 c7 ae 9e ff <0f> 0b 5b 5d 41 5c 41 5d 41 5e 41 5f c3 66 2e 0f 1f 84 00 00 00 00
    Call Trace:
     <IRQ>
     ? __warn+0x7d/0x110
     ? iommu_dma_unmap_page+0x79/0x90
     ? report_bug+0x16d/0x180
     ? handle_bug+0x4f/0x90
     ? exc_invalid_op+0x14/0x70
     ? asm_exc_invalid_op+0x16/0x20
     ? iommu_dma_unmap_page+0x79/0x90
     ? iommu_dma_unmap_page+0x2e/0x90
     dma_unmap_page_attrs+0x10d/0x1b0
     mlx5e_tx_wi_dma_unmap+0xbe/0x120 [mlx5_core]
     mlx5e_poll_tx_cq+0x16d/0x690 [mlx5_core]
     mlx5e_napi_poll+0x8b/0xac0 [mlx5_core]
     __napi_poll+0x24/0x190
     net_rx_action+0x32a/0x3b0
     ? mlx5_eq_comp_int+0x7e/0x270 [mlx5_core]
     ? notifier_call_chain+0x35/0xa0
     handle_softirqs+0xc9/0x270
     irq_exit_rcu+0x71/0xd0
     common_interrupt+0x7f/0xa0
     </IRQ>
     <TASK>
     asm_common_interrupt+0x22/0x40

Fixes: db75373c91 ("net/mlx5e: Recover Send Queue (SQ) from error state")
Signed-off-by: Gal Pressman <gal@nvidia.com>
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/20260305142634.1813208-4-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-06 17:25:39 -08:00
Carolina Jubran
76324e4041 net/mlx5: Fix peer miss rules host disabled checks
The check on mlx5_esw_host_functions_enabled(esw->dev) for adding VF
peer miss rules is incorrect. These rules match traffic from peer's VFs,
so the local device's host function status is irrelevant. Remove this
check to ensure peer VF traffic is properly handled regardless of local
host configuration.

Also fix the PF peer miss rule deletion to be symmetric with the add
path, so only attempt to delete the rule if it was actually created.

Fixes: 520369ef43 ("net/mlx5: Support disabling host PFs")
Signed-off-by: Carolina Jubran <cjubran@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/20260305142634.1813208-3-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-06 17:25:39 -08:00
Patrisious Haddad
24b2795f96 net/mlx5: Fix crash when moving to switchdev mode
When moving to switchdev mode when the device doesn't support IPsec,
we try to clean up the IPsec resources anyway which causes the crash
below, fix that by correctly checking for IPsec support before trying
to clean up its resources.

[27642.515799] WARNING: arch/x86/mm/fault.c:1276 at
do_user_addr_fault+0x18a/0x680, CPU#4: devlink/6490
[27642.517159] Modules linked in: xt_conntrack xt_MASQUERADE
ip6table_nat ip6table_filter ip6_tables iptable_nat nf_nat xt_addrtype
rpcsec_gss_krb5 auth_rpcgss oid_registry overlay mlx5_fwctl nfnetlink
zram zsmalloc mlx5_ib fuse rpcrdma rdma_ucm ib_uverbs ib_iser libiscsi
scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm mlx5_core
ib_core
[27642.521358] CPU: 4 UID: 0 PID: 6490 Comm: devlink Not tainted
6.19.0-rc5_for_upstream_min_debug_2026_01_14_16_47 #1 NONE
[27642.522923] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS
rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
[27642.524528] RIP: 0010:do_user_addr_fault+0x18a/0x680
[27642.525362] Code: ff 0f 84 75 03 00 00 48 89 ee 4c 89 e7 e8 5e b9 22
00 49 89 c0 48 85 c0 0f 84 a8 02 00 00 f7 c3 60 80 00 00 74 22 31 c9 eb
   ae <0f> 0b 48 83 c4 10 48 89 ea 48 89 de 4c 89 f7 5b 5d 41 5c 41 5d
41
[27642.528166] RSP: 0018:ffff88810770f6b8 EFLAGS: 00010046
[27642.529038] RAX: 0000000000000000 RBX: 0000000000000002 RCX:
ffff88810b980f00
[27642.530158] RDX: 00000000000000a0 RSI: 0000000000000002 RDI:
ffff88810770f728
[27642.531270] RBP: 00000000000000a0 R08: 0000000000000000 R09:
0000000000000000
[27642.532383] R10: 0000000000000000 R11: 0000000000000000 R12:
ffff888103f3c4c0
[27642.533499] R13: 0000000000000000 R14: ffff88810770f728 R15:
0000000000000000
[27642.534614] FS:  00007f197c741740(0000) GS:ffff88856a94c000(0000)
knlGS:0000000000000000
[27642.535915] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[27642.536858] CR2: 00000000000000a0 CR3: 000000011334c003 CR4:
0000000000172eb0
[27642.537982] Call Trace:
[27642.538466]  <TASK>
[27642.538907]  exc_page_fault+0x76/0x140
[27642.539583]  asm_exc_page_fault+0x22/0x30
[27642.540282] RIP: 0010:_raw_spin_lock_irqsave+0x10/0x30
[27642.541134] Code: 07 85 c0 75 11 ba ff 00 00 00 f0 0f b1 17 75 06 b8
01 00 00 00 c3 31 c0 c3 90 0f 1f 44 00 00 53 9c 5b fa 31 c0 ba 01 00 00
   00 <f0> 0f b1 17 75 05 48 89 d8 5b c3 89 c6 e8 7e 02 00 00 48 89 d8
      5b
[27642.543936] RSP: 0018:ffff88810770f7d8 EFLAGS: 00010046
[27642.544803] RAX: 0000000000000000 RBX: 0000000000000202 RCX:
ffff888113ad96d8
[27642.545916] RDX: 0000000000000001 RSI: ffff88810770f818 RDI:
00000000000000a0
[27642.547027] RBP: 0000000000000098 R08: 0000000000000400 R09:
ffff88810b980f00
[27642.548140] R10: 0000000000000001 R11: ffff888101845a80 R12:
00000000000000a8
[27642.549263] R13: ffffffffa02a9060 R14: 00000000000000a0 R15:
ffff8881130d8a40
[27642.550379]  complete_all+0x20/0x90
[27642.551010]  mlx5e_ipsec_disable_events+0xb6/0xf0 [mlx5_core]
[27642.552022]  mlx5e_nic_disable+0x12d/0x220 [mlx5_core]
[27642.552929]  mlx5e_detach_netdev+0x66/0xf0 [mlx5_core]
[27642.553822]  mlx5e_netdev_change_profile+0x5b/0x120 [mlx5_core]
[27642.554821]  mlx5e_vport_rep_load+0x419/0x590 [mlx5_core]
[27642.555757]  ? xa_load+0x53/0x90
[27642.556361]  __esw_offloads_load_rep+0x54/0x70 [mlx5_core]
[27642.557328]  mlx5_esw_offloads_rep_load+0x45/0xd0 [mlx5_core]
[27642.558320]  esw_offloads_enable+0xb4b/0xc90 [mlx5_core]
[27642.559247]  mlx5_eswitch_enable_locked+0x34e/0x4f0 [mlx5_core]
[27642.560257]  ? mlx5_rescan_drivers_locked+0x222/0x2d0 [mlx5_core]
[27642.561284]  mlx5_devlink_eswitch_mode_set+0x5ac/0x9c0 [mlx5_core]
[27642.562334]  ? devlink_rate_set_ops_supported+0x21/0x3a0
[27642.563220]  devlink_nl_eswitch_set_doit+0x67/0xe0
[27642.564026]  genl_family_rcv_msg_doit+0xe0/0x130
[27642.564816]  genl_rcv_msg+0x183/0x290
[27642.565466]  ? __devlink_nl_pre_doit.isra.0+0x160/0x160
[27642.566329]  ? devlink_nl_eswitch_get_doit+0x290/0x290
[27642.567181]  ? devlink_nl_pre_doit_parent_dev_optional+0x20/0x20
[27642.568147]  ? genl_family_rcv_msg_dumpit+0xf0/0xf0
[27642.568966]  netlink_rcv_skb+0x4b/0xf0
[27642.569629]  genl_rcv+0x24/0x40
[27642.570215]  netlink_unicast+0x255/0x380
[27642.570901]  ? __alloc_skb+0xfa/0x1e0
[27642.571560]  netlink_sendmsg+0x1f3/0x420
[27642.572249]  __sock_sendmsg+0x38/0x60
[27642.572911]  __sys_sendto+0x119/0x180
[27642.573561]  ? __sys_recvmsg+0x5c/0xb0
[27642.574227]  __x64_sys_sendto+0x20/0x30
[27642.574904]  do_syscall_64+0x55/0xc10
[27642.575554]  entry_SYSCALL_64_after_hwframe+0x4b/0x53
[27642.576391] RIP: 0033:0x7f197c85e807
[27642.577050] Code: c7 c0 ff ff ff ff eb be 66 2e 0f 1f 84 00 00 00 00
00 90 f3 0f 1e fa 80 3d 45 08 0d 00 00 41 89 ca 74 10 b8 2c 00 00 00 0f
   05 <48> 3d 00 f0 ff ff 77 69 c3 55 48 89 e5 53 48 83 ec 38 44 89 4d
      d0
[27642.579846] RSP: 002b:00007ffebd4e2248 EFLAGS: 00000202 ORIG_RAX:
000000000000002c
[27642.581082] RAX: ffffffffffffffda RBX: 000055cfcd9cd2a0 RCX:
00007f197c85e807
[27642.582200] RDX: 0000000000000038 RSI: 000055cfcd9cd490 RDI:
0000000000000003
[27642.583320] RBP: 00007ffebd4e2290 R08: 00007f197c942200 R09:
000000000000000c
[27642.584437] R10: 0000000000000000 R11: 0000000000000202 R12:
0000000000000000
[27642.585555] R13: 000055cfcd9cd490 R14: 00007ffebd4e45d1 R15:
000055cfcd9cd2a0
[27642.586671]  </TASK>
[27642.587121] ---[ end trace 0000000000000000 ]---
[27642.587910] BUG: kernel NULL pointer dereference, address:
00000000000000a0

Fixes: 664f76be38 ("net/mlx5: Fix IPsec cleanup over MPV device")
Signed-off-by: Patrisious Haddad <phaddad@nvidia.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/20260305142634.1813208-2-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-06 17:25:38 -08:00
Cosmin Ratiu
aed763abf0 net/mlx5: Fix deadlock between devlink lock and esw->wq
esw->work_queue executes esw_functions_changed_event_handler ->
esw_vfs_changed_event_handler and acquires the devlink lock.

.eswitch_mode_set (acquires devlink lock in devlink_nl_pre_doit) ->
mlx5_devlink_eswitch_mode_set -> mlx5_eswitch_disable_locked ->
mlx5_eswitch_event_handler_unregister -> flush_workqueue deadlocks
when esw_vfs_changed_event_handler executes.

Fix that by no longer flushing the work to avoid the deadlock, and using
a generation counter to keep track of work relevance. This avoids an old
handler manipulating an esw that has undergone one or more mode changes:
- the counter is incremented in mlx5_eswitch_event_handler_unregister.
- the counter is read and passed to the ephemeral mlx5_host_work struct.
- the work handler takes the devlink lock and bails out if the current
  generation is different than the one it was scheduled to operate on.
- mlx5_eswitch_cleanup does the final draining before destroying the wq.

No longer flushing the workqueue has the side effect of maybe no longer
cancelling pending vport_change_handler work items, but that's ok since
those are disabled elsewhere:
- mlx5_eswitch_disable_locked disables the vport eq notifier.
- mlx5_esw_vport_disable disarms the HW EQ notification and marks
  vport->enabled under state_lock to false to prevent pending vport
  handler from doing anything.
- mlx5_eswitch_cleanup destroys the workqueue and makes sure all events
  are disabled/finished.

Fixes: f1bc646c9a ("net/mlx5: Use devl_ API in mlx5_esw_offloads_devlink_port_register")
Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/20260305081019.1811100-1-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-06 17:24:28 -08:00
Laurent Vivier
55f854dd5b qmi_wwan: allow max_mtu above hard_mtu to control rx_urb_size
Commit c7159e960f ("usbnet: limit max_mtu based on device's hard_mtu")
capped net->max_mtu to the device's hard_mtu in usbnet_probe(). While
this correctly prevents oversized packets on standard USB network
devices, it breaks the qmi_wwan driver.

qmi_wwan relies on userspace (e.g. ModemManager) setting a large MTU on
the wwan0 interface to configure rx_urb_size via usbnet_change_mtu().
QMI modems negotiate USB transfer sizes of 16,383 or 32,767 bytes, and
the USB receive buffers must be sized accordingly. With max_mtu capped
to hard_mtu (~1500 bytes), userspace can no longer raise the MTU, the
receive buffers remain small, and download speeds drop from >300 Mbps
to ~0.8 Mbps.

Introduce a FLAG_NOMAXMTU driver flag that allows individual usbnet
drivers to opt out of the max_mtu cap. Set this flag in qmi_wwan's
driver_info structures to restore the previous behavior for QMI devices,
while keeping the safety fix in place for all other usbnet drivers.

Fixes: c7159e960f ("usbnet: limit max_mtu based on device's hard_mtu")
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/lkml/CAPh3n803k8JcBPV5qEzUB-oKzWkAs-D5CU7z=Vd_nLRCr5ZqQg@mail.gmail.com/
Reported-by: Koen Vandeputte <koen.vandeputte@citymesh.com>
Tested-by: Daniele Palmas <dnlplm@gmail.com>
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Link: https://patch.msgid.link/20260304134338.1785002-1-lvivier@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-06 16:31:41 -08:00
Jakub Kicinski
03910cdc21 Merge branch 'bond-fix-2-link-state-issues'
Hangbin Liu says:

====================
bond: fix 2 link state issues

This patch set fixes two bonding link state issues:

1. Broadcast mode incorrectly sets usable_slaves, causing updelay to be ignored
2. BOND_LINK_FAIL and BOND_LINK_BACK are treated as invalid states, generating
   confusing error messages

Here is the reproducer:

```
ip netns add ns
ip -n ns link add bond0 type bond mode 3 miimon 100 updelay 200 downdelay 200
ip -n ns link add type veth
ip -n ns link add type veth
ip -n ns link set veth1 up
ip -n ns link set veth3 up
ip -n ns link set veth0 master bond0
ip -n ns link set veth2 master bond0
ip -n ns link set bond0 up
sleep 1
ip -n ns link set veth3 down
sleep 1
ip -n ns link set veth3 up
sleep 1
dmesg | tail
```
====================

Link: https://patch.msgid.link/20260304-b4-bond_updelay-v1-0-f72eb2e454d0@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-06 16:25:19 -08:00
Hangbin Liu
3348be7978 bonding: handle BOND_LINK_FAIL, BOND_LINK_BACK as valid link states
Before the fixed commit, we check slave->new_link during commit
state, which values are only BOND_LINK_{NOCHANGE, UP, DOWN}. After
the commit, we start using slave->link_new_state, which state also could
be BOND_LINK_{FAIL, BACK}.

For example, when we set updelay/downdelay, after a failover,
the slave->link_new_state could be set to BOND_LINK_{FAIL, BACK} in
bond_miimon_inspect(). And later in bond_miimon_commit(), it will treat
it as invalid and print an error, which would cause confusion for users.

[  106.440254] bond0: (slave veth2): link status down for interface, disabling it in 200 ms
[  106.440265] bond0: (slave veth2): invalid new link 1 on slave
[  106.648276] bond0: (slave veth2): link status definitely down, disabling slave
[  107.480271] bond0: (slave veth2): link status up, enabling it in 200 ms
[  107.480288] bond0: (slave veth2): invalid new link 3 on slave
[  107.688302] bond0: (slave veth2): link status definitely up, 10000 Mbps full duplex

Let's handle BOND_LINK_{FAIL, BACK} as valid link states.

Fixes: 1899bb3251 ("bonding: fix state transition issue in link monitoring")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Link: https://patch.msgid.link/20260304-b4-bond_updelay-v1-2-f72eb2e454d0@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-06 16:25:16 -08:00
Hangbin Liu
45fc134bcf bonding: do not set usable_slaves for broadcast mode
After commit e0caeb24f5 ("net: bonding: update the slave array for broadcast mode"),
broadcast mode will also set all_slaves and usable_slaves during
bond_enslave(). But if we also set updelay, during enslave, the
slave init state will be BOND_LINK_BACK. And later
bond_update_slave_arr() will alloc usable_slaves but add nothing.
This will cause bond_miimon_inspect() to have ignore_updelay
always true. So the updelay will be always ignored. e.g.

[    6.498368] bond0: (slave veth2): link status definitely down, disabling slave
[    7.536371] bond0: (slave veth2): link status up, enabling it in 0 ms
[    7.536402] bond0: (slave veth2): link status definitely up, 10000 Mbps full duplex

To fix it, we can either always call bond_update_slave_arr() on every
place when link changes. Or, let's just not set usable_slaves for
broadcast mode.

Fixes: e0caeb24f5 ("net: bonding: update the slave array for broadcast mode")
Reported-by: Liang Li <liali@redhat.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Link: https://patch.msgid.link/20260304-b4-bond_updelay-v1-1-f72eb2e454d0@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-06 16:25:16 -08:00
Linus Torvalds
e0c505cb76 Merge tag 'v7.0-rc2-smb3-client-fixes' of git://git.samba.org/sfrench/cifs-2.6
Pull smb client fixes from Steve French:

 - Fix potential oops on open failure

 - Fix unmount to better free deferred closes

 - Use proper constant-time MAC comparison function

 - Two buffer allocation size fixes

 - Two minor cleanups

 - make SMB2 kunit tests a distinct module

* tag 'v7.0-rc2-smb3-client-fixes' of git://git.samba.org/sfrench/cifs-2.6:
  smb: client: fix oops due to uninitialised var in smb2_unlink()
  cifs: open files should not hold ref on superblock
  smb: client: Compare MACs in constant time
  smb/client: remove unused SMB311_posix_query_info()
  smb/client: fix buffer size for smb311_posix_qinfo in SMB311_posix_query_info()
  smb/client: fix buffer size for smb311_posix_qinfo in smb2_compound_op()
  smb: update some doc references
  smb/client: make SMB2 maperror KUnit tests a separate module
2026-03-06 16:07:22 -08:00
Johan Hovold
224a0d284c net: mctp: fix device leak on probe failure
Driver core holds a reference to the USB interface and its parent USB
device while the interface is bound to a driver and there is no need to
take additional references unless the structures are needed after
disconnect.

This driver takes a reference to the USB device during probe but does
not to release it on probe failures.

Drop the redundant device reference to fix the leak, reduce cargo
culting, make it easier to spot drivers where an extra reference is
needed, and reduce the risk of further memory leaks.

Fixes: 0791c0327a ("net: mctp: Add MCTP USB transport driver")
Cc: stable@vger.kernel.org	# 6.15
Signed-off-by: Johan Hovold <johan@kernel.org>
Acked-by: Jeremy Kerr <jk@codeconstruct.com.au>
Link: https://patch.msgid.link/20260305104549.16110-1-johan@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-06 15:38:21 -08:00
Sebastian Andrzej Siewior
a72f73c4dd cgroup: Don't expose dead tasks in cgroup
Once a task exits it has its state set to TASK_DEAD and then it is
removed from the cgroup it belonged to. The last step happens on the task
gets out of its last schedule() invocation and is delayed on PREEMPT_RT
due to locking constraints.

As a result it is possible to receive a pid via waitpid() of a task
which is still listed in cgroup.procs for the cgroup it belonged
to. This is something that systemd does not expect and as a result it
waits for its exit until a time out occurs.
This can also be reproduced on !PREEMPT_RT kernel with a significant
delay in do_exit() after exit_notify().

Hide the task from the output which have PF_EXITING set which is done
before the parent is notified. Keeping zombies with live threads
shouldn't break anything (suggested by Tejun).

Reported-by: Bert Karwatzki <spasswolf@web.de>
Closes: https://lore.kernel.org/all/20260219164648.3014-1-spasswolf@web.de/
Tested-by: Bert Karwatzki <spasswolf@web.de>
Fixes: 9311e6c29b ("cgroup: Fix sleeping from invalid context warning on PREEMPT_RT")
Cc: stable@vger.kernel.org # v6.19+
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-03-06 12:43:25 -10:00
Cheng-Yang Chou
2a0596d516 sched_ext: Documentation: Update sched-ext.rst
- Remove CONFIG_PAHOLE_HAS_BTF_TAG from required config list
- Document ext_idle.c as the built-in idle CPU selection policy
- Add descriptions for example schedulers in tools/sched_ext/

Signed-off-by: Cheng-Yang Chou <yphbchou0911@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-03-06 12:40:27 -10:00
Mario Limonciello
72ecb1dae7 drm/amd: Fix a few more NULL pointer dereference in device cleanup
I found a few more paths that cleanup fails due to a NULL version pointer
on unsupported hardware.

Add NULL checks as applicable.

Fixes: 39fc2bc4da ("drm/amdgpu: Protect GPU register accesses in powergated state in some paths")
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit f5a05f8414fc10f307eb965f303580c7778f8dd2)
Cc: stable@vger.kernel.org
2026-03-06 17:19:14 -05:00
Yang Wang
a6571045cf drm/amdgpu: fix gpu idle power consumption issue for gfx v12
Older versions of the MES firmware may cause abnormal GPU power consumption.
When performing inference tasks on the GPU (e.g., with Ollama using ROCm),
the GPU may show abnormal power consumption in idle state and incorrect GPU load information.
This issue has been fixed in firmware version 0x8b and newer.

Closes: https://github.com/ROCm/ROCm/issues/5706
Signed-off-by: Yang Wang <kevinyang.wang@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 4e22a5fe6ea6e0b057e7f246df4ac3ff8bfbc46a)
2026-03-06 17:11:46 -05:00
Cristian Ciocaltea
52289ce48e drm/amdgpu: Fix kernel-doc comments for some LUT properties
The following members of struct amdgpu_mode_info do not have valid
references in the related kernel-doc sections:

 - plane_shaper_lut_property
 - plane_shaper_lut_size_property,
 - plane_lut3d_size_property

Correct all affected comment blocks.

Fixes: f545d82479 ("drm/amd/display: add plane shaper LUT and TF driver-specific properties")
Fixes: 671994e3bf ("drm/amd/display: add plane 3D LUT driver-specific properties")
Reviewed-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit ec5708d6e547f7efe2f009073bfa98dbc4c5c2ac)
2026-03-06 17:11:15 -05:00
Mario Limonciello
062ea905ff drm/amd: Fix NULL pointer dereference in device cleanup
When GPU initialization fails due to an unsupported HW block
IP blocks may have a NULL version pointer. During cleanup in
amdgpu_device_fini_hw, the code calls amdgpu_device_set_pg_state and
amdgpu_device_set_cg_state which iterate over all IP blocks and access
adev->ip_blocks[i].version without NULL checks, leading to a kernel
NULL pointer dereference.

Add NULL checks for adev->ip_blocks[i].version in both
amdgpu_device_set_cg_state and amdgpu_device_set_pg_state to prevent
dereferencing NULL pointers during GPU teardown when initialization has
failed.

Fixes: 39fc2bc4da ("drm/amdgpu: Protect GPU register accesses in powergated state in some paths")
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit b7ac77468cda92eecae560b05f62f997a12fe2f2)
Cc: stable@vger.kernel.org
2026-03-06 17:10:40 -05:00
Yang Wang
9d4837a261 drm/amd/pm: add missing od setting PP_OD_FEATURE_ZERO_FAN_BIT for smu v14
add missing od setting PP_OD_FEATURE_ZERO_FAN_BIT for smu v14.0.2/14.0.3

Fixes: 9710b84e2a ("drm/amd/pm: add overdrive support on smu v14.0.2/3")
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/5018
Signed-off-by: Yang Wang <kevinyang.wang@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 1b5cf07d80bb16d1593579ccdb23f08ea4262c14)
2026-03-06 17:10:26 -05:00
Yang Wang
cb47c882c3 drm/amd/pm: add missing od setting PP_OD_FEATURE_ZERO_FAN_BIT for smu v13
add missing od setting PP_OD_FEATURE_ZERO_FAN_BIT for smu v13.0.0/13.0.7

Fixes: cfffd980bf ("drm/amd/pm: add zero RPM OD setting support for SMU13")
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/5018
Signed-off-by: Yang Wang <kevinyang.wang@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 576a10797b607ee9e4068218daf367b481564120)
2026-03-06 17:10:13 -05:00
Andrei-Alexandru Tachici
3b1679e086 tracing: Fix enabling multiple events on the kernel command line and bootconfig
Multiple events can be enabled on the kernel command line via a comma
separator. But if the are specified one at a time, then only the last
event is enabled. This is because the event names are saved in a temporary
buffer, and each call by the init cmdline code will reset that buffer.

This also affects names in the boot config file, as it may call the
callback multiple times with an example of:

  kernel.trace_event = ":mod:rproc_qcom_common", ":mod:qrtr", ":mod:qcom_aoss"

Change the cmdline callback function to append a comma and the next value
if the temporary buffer already has content.

Cc: stable@vger.kernel.org
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Link: https://patch.msgid.link/20260302-trace-events-allow-multiple-modules-v1-1-ce4436e37fb8@oss.qualcomm.com
Signed-off-by: Andrei-Alexandru Tachici <andrei-alexandru.tachici@oss.qualcomm.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2026-03-06 16:54:34 -05:00
Linus Torvalds
325a118c12 Merge tag 'pci-v7.0-fixes-3' of git://git.kernel.org/pub/scm/linux/kernel/git/pci/pci
Pull pci fixes from Bjorn Helgaas:

 - Initialize msi_addr_mask for OF-created PCI devices to fix sparc and
   powerpc probe regressions (Nilay Shroff)

 - Orphan the Altera PCIe controller driver (Dave Hansen)

* tag 'pci-v7.0-fixes-3' of git://git.kernel.org/pub/scm/linux/kernel/git/pci/pci:
  MAINTAINERS: Orphan Altera PCIe controller driver
  sparc/PCI: Initialize msi_addr_mask for OF-created PCI devices
  powerpc/pci: Initialize msi_addr_mask for OF-created PCI devices
2026-03-06 13:37:52 -08:00
Linus Torvalds
dfb3142844 Merge tag 'drm-fixes-2026-03-07' of https://gitlab.freedesktop.org/drm/kernel
Pull drm fixes from Dave Airlie:
 "Weekly fixes pull.

  There is one mm fix in here for a HMM livelock triggered by the xe
  driver tests. Otherwise it's a pretty wide range of fixes across the
  board, ttm UAF regression fix, amdgpu fixes, nouveau doesn't crash my
  laptop anymore fix, and a fair bit of misc.

  Seems about right for rc3.

  mm:
   - mm: Fix a hmm_range_fault() livelock / starvation problem

  pagemap:
   - Revert "drm/pagemap: Disable device-to-device migration"

  ttm:
   - fix function return breaking reclaim
   - fix build failure on PREEMPT_RT
   - fix bo->resource UAF

  dma-buf:
   - include ioctl.h in uapi header

  sched:
   - fix kernel doc warning

  amdgpu:
   - LUT fixes
   - VCN5 fix
   - Dispclk fix
   - SMU 13.x fix
   - Fix race in VM acquire
   - PSP 15.x fix
   - UserQ fix

  amdxdna:
   - fix invalid payload for failed command
   - fix NULL ptr dereference
   - fix major fw version check
   - avoid inconsistent fw state on error

  i915/display:
   - Fix for Lenovo T14 G7 display not refreshing

  xe:
   - Do not preempt fence signaling CS instructions
   - Some leak and finalization fixes
   - Workaround fix

  nouveau:
   - avoid runtime suspend oops when using dp aux

  panthor:
   - fix gem_sync argument ordering

  solomon:
   - fix incorrect display output

  renesas:
   - fix DSI divider programming

  ethosu:
   - fix job submit error clean-up refcount
   - fix NPU_OP_ELEMENTWISE validation
   - handle possible underflows in IFM size calcs"

* tag 'drm-fixes-2026-03-07' of https://gitlab.freedesktop.org/drm/kernel: (38 commits)
  accel: ethosu: Handle possible underflow in IFM size calculations
  accel: ethosu: Fix NPU_OP_ELEMENTWISE validation with scalar
  accel: ethosu: Fix job submit error clean-up refcount underflows
  accel/amdxdna: Split mailbox channel create function
  drm/panthor: Correct the order of arguments passed to gem_sync
  Revert "drm/syncobj: Fix handle <-> fd ioctls with dirty stack"
  drm/ttm: Fix bo resource use-after-free
  nouveau/dpcd: return EBUSY for aux xfer if the device is asleep
  accel/amdxdna: Fix major version check on NPU1 platform
  drm/amdgpu/userq: refcount userqueues to avoid any race conditions
  drm/amdgpu/userq: Consolidate wait ioctl exit path
  drm/amdgpu/psp: Use Indirect access address for GFX to PSP mailbox
  drm/amdgpu: Fix use-after-free race in VM acquire
  drm/amd/pm: remove invalid gpu_metrics.energy_accumulator on smu v13.0.x
  drm/xe: Fix memory leak in xe_vm_madvise_ioctl
  drm/xe/reg_sr: Fix leak on xa_store failure
  drm/xe/xe2_hpg: Correct implementation of Wa_16025250150
  drm/xe/gsc: Fix GSC proxy cleanup on early initialization failure
  Revert "drm/pagemap: Disable device-to-device migration"
  drm/i915/psr: Fix for Panel Replay X granularity DPCD register handling
  ...
2026-03-06 13:29:12 -08:00
Dave Hansen
0e16181704 MAINTAINERS: Remove bouncing T7XX reviewer
This reviewer's email no longer works. Remove it from MAINTAINERS.

Cc: Chandrashekar Devegowda <chandrashekar.devegowda@intel.com>
Cc: Liu Haijun <haijun.liu@mediatek.com>
Cc: Ricardo Martinez <ricardo.martinez@linux.intel.com>
Cc: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Andrew Lunn <andrew+netdev@lunn.ch>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Loic Poulain <loic.poulain@oss.qualcomm.com>
Link: https://patch.msgid.link/20260305172806.3118806-1-dave.hansen@linux.intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-06 13:18:59 -08:00
Linus Torvalds
3593e678f5 Merge tag 'linux_kselftest-kunit-fixes-7.0-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest
Pull kunit fixes from Shuah Khan:

 - Fix rust warnings when CONFIG_PRINTK is disabled

 - Reduce stack usage in kunit_run_tests() to fix warnings when
   CONFIG_FRAME_WARN is set to a relatively low value

 - Update email address for David Gow

 - Copy caller args in kunit tool in run_kernel to prevent mutation

* tag 'linux_kselftest-kunit-fixes-7.0-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest:
  kunit: reduce stack usage in kunit_run_tests()
  kunit: tool: copy caller args in run_kernel to prevent mutation
  rust: kunit: fix warning when !CONFIG_PRINTK
  MAINTAINERS: Update email address for David Gow
2026-03-06 12:34:49 -08:00
Pengyu Luo
e4eb11b34d drm/msm/dsi: fix pclk rate calculation for bonded dsi
Recently, we round up new_hdisplay once at most, for bonded dsi, we
may need twice, since they are independent links, we should round up
each half separately. This also aligns with the hdisplay we program
later in dsi_timing_setup()

Example:
	full_hdisplay = 1904, dsc_bpp = 8, bpc = 8
	new_full_hdisplay = DIV_ROUND_UP(1904 * 8, 8 * 3) = 635

if we use half display
	new_half_hdisplay = DIV_ROUND_UP(952 * 8, 8 * 3) = 318
	new_full_display = 636

Fixes: 7c9e4a554d ("drm/msm/dsi: Reduce pclk rate for compression")
Signed-off-by: Pengyu Luo <mitltlatltl@gmail.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
Patchwork: https://patchwork.freedesktop.org/patch/709716/
Link: https://lore.kernel.org/r/20260306163255.215456-1-mitltlatltl@gmail.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
2026-03-06 20:41:08 +02:00
Linus Torvalds
651690480a Merge tag 'spi-fix-v7.0-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi
Pull spi fix from Mark Brown:
 "One device specific fix here, it was possible we might end up trying
  to dereference an invalid pointer while reporting a transfer timeout
  on DesignWare controllers"

* tag 'spi-fix-v7.0-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi:
  spi: spi-dw-dma: fix print error log when wait finish transaction
2026-03-06 10:33:32 -08:00
Linus Torvalds
5abeba615c Merge tag 'regulator-fix-v7.0-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator
Pull regulator fixes from Mark Brown:
 "A couple of small, driver specific fixes which might not even have
  much impact if you have the affected devices depending on your setup"

* tag 'regulator-fix-v7.0-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator:
  regulator: pf9453: Respect IRQ trigger settings from firmware
  regulator: mt6363: Fix incorrect and redundant IRQ disposal in probe
2026-03-06 10:27:45 -08:00
Linus Torvalds
d249037ac4 Merge tag 'sound-7.0-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound
Pull sound fixes from Takashi Iwai:
 "Again a collection of device-specific fixes. Most of changes are
  fairly small device-specific quirks of fixes for HD- and USB-audio,
  ASoC Intel, AMD, fsl, Cirrus and co.

  The only large LOC is for plumbing ASoC ACP driver to add the Cirrus
  Logic codec support, so this one is also just adding some tables"

* tag 'sound-7.0-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound: (21 commits)
  ALSA: us122l: drop redundant interface references
  ASoC: amd: yc: Add DMI quirk for ASUS EXPERTBOOK PM1503CDA
  ASoC: dt-bindings: renesas,rz-ssi: Document RZ/G3L SoC
  ASoC: SDCA: Add allocation failure check for Entity name
  ALSA: hda/senary: Ensure EAPD is enabled during init
  ALSA: hda/senary: Use codec->core.afg for GPIO access
  ALSA: doc: usb-audio: Add doc for QUIRK_FLAG_SKIP_IFACE_SETUP
  ASoC: dt-bindings: tegra: Add compatible for Tegra238 sound card
  ALSA: hda/hdmi: Add Tegra238 HDA codec device ID
  ASoC: cs35l56: Suppress pointless warning about number of GPIO pulls
  ASoC: amd: acp: Add ACP6.3 match entries for Cirrus Logic parts
  ASoC: Intel: sof_sdw: Add quirk for Alienware Area 51 (2025) 0CCD SKU
  ASoC: rt1321: fix DMIC ch2/3 mask issue
  ASoC: cs35l56: Only patch ASP registers if the DAI is part of a DAIlink
  ASoC: fsl_easrc: Fix event generation in fsl_easrc_iec958_set_reg()
  ASoC: fsl_easrc: Fix event generation in fsl_easrc_iec958_put_bits()
  ALSA: firewire: dice: Fix printf warning with W=1
  ALSA: hda/tas2781: A workaround solution to lower-vol issue among lower calibrated-impedance micro-speaker on TAS2781
  ALSA: hda/realtek: Add quirk for HP Pavilion 15-eh1xxx to enable mute LED
  ALSA: usb-audio: Add iface reset and delay quirk for AB13X USB Audio
  ...
2026-03-06 10:06:04 -08:00
Guenter Roeck
457965c13f tracing: Add NULL pointer check to trigger_data_free()
If trigger_data_alloc() fails and returns NULL, event_hist_trigger_parse()
jumps to the out_free error path. While kfree() safely handles a NULL
pointer, trigger_data_free() does not. This causes a NULL pointer
dereference in trigger_data_free() when evaluating
data->cmd_ops->set_filter.

Fix the problem by adding a NULL pointer check to trigger_data_free().

The problem was found by an experimental code review agent based on
gemini-3.1-pro while reviewing backports into v6.18.y.

Cc: Miaoqian Lin <linmq006@gmail.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Steven Rostedt (Google) <rostedt@goodmis.org>
Link: https://patch.msgid.link/20260305193339.2810953-1-linux@roeck-us.net
Fixes: 0550069cc2 ("tracing: Properly process error handling in event_hist_trigger_parse()")
Assisted-by: Gemini:gemini-3.1-pro
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2026-03-06 13:04:30 -05:00
Linus Torvalds
48976c0eba Merge tag 'hid-for-linus-2026030601' of git://git.kernel.org/pub/scm/linux/kernel/git/hid/hid
Pull HID fixes from Benjamin Tissoires:

 - fix a few memory leaks (Günther Noack)

 - fix potential kernel crashes in cmedia, creative-sb0540 and zydacron
   (Greg Kroah-Hartman)

 - fix NULL pointer dereference in pidff (Tomasz Pakuła)

 - fix battery reporting for Apple Magic Trackpad 2 (Julius Lehmann)

 - mcp2221 proper handling of failed read operation (Romain Sioen)

 - various device quirks / device ID additions

* tag 'hid-for-linus-2026030601' of git://git.kernel.org/pub/scm/linux/kernel/git/hid/hid:
  HID: mcp2221: cancel last I2C command on read error
  HID: asus: add xg mobile 2023 external hardware support
  HID: multitouch: Keep latency normal on deactivate for reactivation gesture
  HID: apple: Add EPOMAKER TH87 to the non-apple keyboards list
  HID: intel-ish-hid: ipc: Add Nova Lake-H/S PCI device IDs
  selftests: hid: tests: test_wacom_generic: add tests for display devices and opaque devices
  HID: multitouch: new class MT_CLS_EGALAX_P80H84
  HID: magicmouse: fix battery reporting for Apple Magic Trackpad 2
  HID: pidff: Fix condition effect bit clearing
  HID: Add HID_CLAIMED_INPUT guards in raw_event callbacks missing them
  HID: asus: avoid memory leak in asus_report_fixup()
  HID: magicmouse: avoid memory leak in magicmouse_report_fixup()
  HID: apple: avoid memory leak in apple_report_fixup()
  HID: Document memory allocation properties of report_fixup()
2026-03-06 10:00:58 -08:00
Linus Torvalds
54de8b835b Merge tag 'platform-drivers-x86-v7.0-2' of git://git.kernel.org/pub/scm/linux/kernel/git/pdx86/platform-drivers-x86
Pull x86 platform driver fixes from Ilpo Järvinen:

 - alienware-wmi-wmax: Add G-Mode support to m18 laptops

 - asus-armoury: Add support for FA401UM, G733QS, GX650RX

 - dell-wmi-sysman: Don't hex dump plaintext password data

 - hp-bioscfg: Support large number of enumeration attributes

 - hp-wmi: Add support for Omen 14-fb1xxx, 16-xd0xxx, 16-wf0xxx, and
   Victus-d0xxx

 - int3472: Handle GPIO type 0x10 (DOVDD)

 - intel-hid:
     - Add Dell 14 & 16 Plus 2-in-1 to dmi_vgbs_allow_list
     - Enable 5-button array on ThinkPad X1 Fold 16 Gen 1

 - mellanox: mlxreg: Fix kernel-doc warnings

 - oxpec: Add support for OneXPlayer X1 Air, X1z, APEX, and Aokzoe A2
   Pro

 - redmi-wmi: Add more Fn hotkey mappings

 - thinkpad_acpi: Fix errors reading battery thresholds

 - touchscreen_dmi: Add quirk for y-inverted Goodix touchscreen on SUPI
   S10

 - uniwill-laptop:
     - FN lock/super key lock attributes rename
     - Fix crash on unexpected battery event
     - A special key combination can alter FN lock status so mark it
       volatile
     - Handle FN lock event

* tag 'platform-drivers-x86-v7.0-2' of git://git.kernel.org/pub/scm/linux/kernel/git/pdx86/platform-drivers-x86: (27 commits)
  platform/x86: dell-wmi-sysman: Don't hex dump plaintext password data
  platform_data/mlxreg: mlxreg.h: fix all kernel-doc warnings
  platform/x86: asus-armoury: add support for FA401UM
  platform/x86: asus-armoury: add support for GX650RX
  platform/x86: hp-bioscfg: Support allocations of larger data
  platform/x86: oxpec: Add support for Aokzoe A2 Pro
  platform/x86: oxpec: Add support for OneXPlayer X1 Air
  platform/x86: oxpec: Add support for OneXPlayer X1z
  platform/x86: oxpec: Add support for OneXPlayer APEX
  platform/x86: uniwill-laptop: Handle FN lock event
  platform/x86: uniwill-laptop: Mark FN lock status as being volatile
  platform/x86: uniwill-laptop: Fix crash on unexpected battery event
  platform/x86: uniwill-laptop: Rename FN lock and super key lock attrs
  platform/x86: redmi-wmi: Add more hotkey mappings
  platform/x86: alienware-wmi-wmax: Add G-Mode support to m18 laptops
  platform/x86: hp-wmi: add Omen 14-fb1xxx (board 8E41) support
  platform/x86: dell-wmi: Add audio/mic mute key codes
  platform/x86: hp-wmi: Add Victus 16-d0xxx support
  platform/x86: intel-hid: Enable 5-button array on ThinkPad X1 Fold 16 Gen 1
  platform/x86: int3472: Handle GPIO type 0x10 (DOVDD)
  ...
2026-03-06 09:48:50 -08:00
Linus Torvalds
9a881ea3da Merge tag 'slab-for-7.0-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab
Pull slab fixes from Vlastimil Babka:

 - Fix for slab->stride truncation on 64k page systems due to short
   type. It was not due to races and lack of barriers in the end. (Harry
   Yoo)

 - Fix for severe performance regression due to unnecessary sheaf refill
   restrictions exposed by mempool allocation strategy. (Vlastimil
   Babka)

 - Stable fix for potential silent percpu sheaf flushing failures on
   PREEMPT_RT. (Vlastimil Babka)

* tag 'slab-for-7.0-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab:
  mm/slab: change stride type from unsigned short to unsigned int
  mm/slab: allow sheaf refill if blocking is not allowed
  slab: distinguish lock and trylock for sheaf_flush_main()
2026-03-06 09:22:51 -08:00
Linus Torvalds
345dfaaf9f Merge tag 'pmdomain-v7.0-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/linux-pm
Pull pmdomain fixes from Ulf Hansson:

 - rockchip: Fix PD_VCODEC for RK3588

 - bcm: Fix broken reset status read for bcm2835

* tag 'pmdomain-v7.0-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/linux-pm:
  pmdomain: rockchip: Fix PD_VCODEC for RK3588
  pmdomain: bcm: bcm2835-power: Fix broken reset status read
2026-03-06 09:16:39 -08:00
Linus Torvalds
0b2758f48f Require (reasonably) normal mappings for MADV_DOFORK
This came up as a result of the tracing fix pull request, and commit
e39bb9e02b ("tracing: Fix WARN_ON in tracing_buffers_mmap_close") in
particular.

The use of MADV_DOFORK confused the ring buffer mapping reference
counting just because it was unexpected, since the mapping was
originally done with VM_DONTCOPY.

The tracing code may well be the only case of this (and fixed it all by
just using the mmap open callback to unconfuse itself), but it's just
strange that we allow MADV_DOFORK on special mappings where the kernel
has set the "don't copy this" bit.

The code already disallowed it for VM_IO mappings (going back to the
original commit f822566165: "madvise MADV_DONTFORK/MADV_DOFORK"), so
just extend it to any of the VM_SPECIAL cases (which includes
VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP in addition to VM_IO).

We could also allow MADV_DOFORK only on mappings that had been marked
DONTFORK by the user.  But that would require us to track that
(presumably with another VM_xyz bit), so let's just do this trivial and
straightforward modifications.

If anybody notices, Lorenzo will be boarding Flying Pig Airlines.

Suggested-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Link: https://lore.kernel.org/all/a8907468-d7e9-4727-af28-66d905093230@kernel.org/
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2026-03-06 09:10:36 -08:00
Waiman Long
ca174c705d cgroup/cpuset: Call rebuild_sched_domains() directly in hotplug
Besides deferring the call to housekeeping_update(), commit 6df415aa46
("cgroup/cpuset: Defer housekeeping_update() calls from CPU hotplug
to workqueue") also defers the rebuild_sched_domains() call to
the workqueue. So a new offline CPU may still be in a sched domain
or new online CPU not showing up in the sched domains for a short
transition period. That could be a problem in some corner cases and
can be the cause of a reported test failure[1]. Fix it by calling
rebuild_sched_domains_cpuslocked() directly in hotplug as before. If
isolated partition invalidation or recreation is being done, the
housekeeping_update() call to update the housekeeping cpumasks will
still be deferred to a workqueue.

In commit 3bfe479671 ("cgroup/cpuset: Move
housekeeping_update()/rebuild_sched_domains() together"),
housekeeping_update() is called before rebuild_sched_domains() because
it needs to access the HK_TYPE_DOMAIN housekeeping cpumask. That is now
changed to use the static HK_TYPE_DOMAIN_BOOT cpumask as HK_TYPE_DOMAIN
cpumask is now changeable at run time.  As a result, we can move the
rebuild_sched_domains() call before housekeeping_update() with
the slight advantage that it will be done in the same cpus_read_lock
critical section without the possibility of interference by a concurrent
cpu hot add/remove operation.

As it doesn't make sense to acquire cpuset_mutex/cpuset_top_mutex after
calling housekeeping_update() and immediately release them again, move
the cpuset_full_unlock() operation inside update_hk_sched_domains()
and rename it to cpuset_update_sd_hk_unlock() to signify that it will
release the full set of locks.

[1] https://lore.kernel.org/lkml/1a89aceb-48db-4edd-a730-b445e41221fe@nvidia.com

Fixes: 6df415aa46 ("cgroup/cpuset: Defer housekeeping_update() calls from CPU hotplug to workqueue")
Tested-by: Jon Hunter <jonathanh@nvidia.com>
Reviewed-by: Chen Ridong <chenridong@huaweicloud.com>
Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-03-06 06:58:25 -10:00
David Carlier
1dde502587 sched_ext: Use READ_ONCE() for scx_slice_bypass_us in scx_bypass()
Commit 0927780c90 ("sched_ext: Use READ_ONCE() for lock-free reads
of module param variables") annotated the plain reads of
scx_slice_bypass_us and scx_bypass_lb_intv_us in bypass_lb_cpu(), but
missed a third site in scx_bypass():

  WRITE_ONCE(scx_slice_dfl, scx_slice_bypass_us * NSEC_PER_USEC);

scx_slice_bypass_us is a module parameter writable via sysfs in
process context through set_slice_us() -> param_set_uint_minmax(),
which performs a plain store without holding bypass_lock. scx_bypass()
reads the variable under bypass_lock, but since the writer does not
take that lock, the two accesses are concurrent.

WRITE_ONCE() only applies volatile semantics to the store of
scx_slice_dfl -- the val expression containing scx_slice_bypass_us is
evaluated as a plain read, providing no protection against concurrent
writes.

Wrap the read with READ_ONCE() to complete the annotation started by
commit 0927780c90 and make the access KCSAN-clean, consistent with
the existing READ_ONCE(scx_slice_bypass_us) in bypass_lb_cpu().

Signed-off-by: David Carlier <devnexen@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-03-06 06:57:23 -10:00
Linus Torvalds
617f5e9fad Merge tag 'v7.0-p2' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto fixes from Herbert Xu:

 - Fix use-after-free in ccp

 - Fix bug when SEV is disabled in ccp

 - Fix tfm_count leak in atmel-sha204a

* tag 'v7.0-p2' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6:
  crypto: atmel-sha204a - Fix OOM ->tfm_count leak
  crypto: ccp - Fix use-after-free on error path
  crypto: ccp - allow callers to use HV-Fixed page API when SEV is disabled
2026-03-06 08:44:20 -08:00
Linus Torvalds
723b5c93aa Merge tag 'ata-7.0-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/libata/linux
Pull ata fixes from Niklas Cassel:

 - Fix a problem where the deferred non-NCQ command would incorrectly
   get completed as a failed command, if there was another command that
   timed out. Found by Gemini (Guenter)

 - The deferred non-NCQ command work is only supposed to run after the
   last NCQ command finishes. However, because the work was never
   canceled on error (e.g. a timeout), the work could incorrectly run
   when commands were still in flight. Found by syzbot (me)

 - Add a quirk to make sure that QEMU harddrives can potentially use up
   to 32 MiB I/Os (Pedro)

 - Add a quirk to disable LPM on Seagate ST1000DM010-2EP102 (Maximilian)

* tag 'ata-7.0-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/libata/linux:
  ata: libata-eh: Fix detection of deferred qc timeouts
  ata: libata-core: Add BRIDGE_OK quirk for QEMU drives
  ata: libata: cancel pending work after clearing deferred_qc
  ata: libata-core: Disable LPM on ST1000DM010-2EP102
2026-03-06 08:41:20 -08:00
Breno Leitao
98c790b100 workqueue: Rename show_cpu_pool{s,}_hog{s,}() to reflect broadened scope
show_cpu_pool_hog() and show_cpu_pools_hogs() no longer only dump CPU
hogs — since commit 8823eaef45 ("workqueue: Show all busy workers in
stall diagnostics"), they dump every in-flight worker in the pool's
busy_hash.

Rename them to show_cpu_pool_busy_workers() and
show_cpu_pools_busy_workers() to accurately describe what they do.

Also fix the pr_info() message to say "stalled worker pools" instead of
"stalled CPU-bound worker pools", since sleeping/blocked workers are now
included.

No functional change.

Suggested-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Breno Leitao <leitao@debian.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-03-06 06:38:16 -10:00
Linus Torvalds
a028739a43 Merge tag 'block-7.0-20260305' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux
Pull block fixes from Jens Axboe:

 - NVMe pull request via Keith:
      - Improve quirk visibility and configurability (Maurizio)
      - Fix runtime user modification to queue setup (Keith)
      - Fix multipath leak on try_module_get failure (Keith)
      - Ignore ambiguous spec definitions for better atomics support
        (John)
      - Fix admin queue leak on controller reset (Ming)
      - Fix large allocation in persistent reservation read keys
        (Sungwoo Kim)
      - Fix fcloop callback handling (Justin)
      - Securely free DHCHAP secrets (Daniel)
      - Various cleanups and typo fixes (John, Wilfred)

 - Avoid a circular lock dependency issue in the sysfs nr_requests or
   scheduler store handling

 - Fix a circular lock dependency with the pcpu mutex and the queue
   freeze lock

 - Cleanup for bio_copy_kern(), using __bio_add_page() rather than the
   bio_add_page(), as adding a page here cannot fail. The exiting code
   had broken cleanup for the error condition, so make it clear that the
   error condition cannot happen

 - Fix for a __this_cpu_read() in preemptible context splat

* tag 'block-7.0-20260305' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux:
  block: use trylock to avoid lockdep circular dependency in sysfs
  nvme: fix memory allocation in nvme_pr_read_keys()
  block: use __bio_add_page in bio_copy_kern
  block: break pcpu_alloc_mutex dependency on freeze_lock
  blktrace: fix __this_cpu_read/write in preemptible context
  nvme-multipath: fix leak on try_module_get failure
  nvmet-fcloop: Check remoteport port_state before calling done callback
  nvme-pci: do not try to add queue maps at runtime
  nvme-pci: cap queue creation to used queues
  nvme-pci: ensure we're polling a polled queue
  nvme: fix memory leak in quirks_param_set()
  nvme: correct comment about nvme_ns_remove()
  nvme: stop setting namespace gendisk device driver data
  nvme: add support for dynamic quirk configuration via module parameter
  nvme: fix admin queue leak on controller reset
  nvme-fabrics: use kfree_sensitive() for DHCHAP secrets
  nvme: stop using AWUPF
  nvme: expose active quirks in sysfs
  nvme/host: fixup some typos
2026-03-06 08:36:18 -08:00
Christian Loehle
8ce8d0524c sched_ext: Documentation: Mention scheduling class precedence
Mention the scheduling class precedence of fair and sched_ext to
clear up how sched_ext partial mode works.

Signed-off-by: Christian Loehle <christian.loehle@arm.com>
Acked-by: Andrea Righi <arighi@nvidia.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-03-06 06:32:19 -10:00
Linus Torvalds
3ad66a34cc Merge tag 'io_uring-7.0-20260305' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux
Pull io_uring fixes from Jens Axboe:

 - Fix a typo in the mock_file help text

 - Fix a comment regarding IORING_SETUP_TASKRUN_FLAG in the
   io_uring.h UAPI header

 - Use READ_ONCE() for reading refill queue entries

 - Reject SEND_VECTORIZED for fixed buffer sends, as it isn't
   implemented. Currently this flag is silently ignored

   This is in preparation for making these work, but first we
   need a fixup so that older kernels will correctly reject them

 - Ensure "0" means default for the rx page size

* tag 'io_uring-7.0-20260305' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux:
  io_uring/zcrx: use READ_ONCE with user shared RQEs
  io_uring/mock: Fix typo in help text
  io_uring/net: reject SEND_VECTORIZED when unsupported
  io_uring: correct comment for IORING_SETUP_TASKRUN_FLAG
  io_uring/zcrx: don't set rx_page_size when not requested
2026-03-06 08:31:36 -08:00
Yeoreum Yun
a4e8473b77 firmware: arm_ffa: Remove vm_id argument in ffa_rxtx_unmap()
According to the FF-A specification (DEN0077, v1.1, §13.7), when
FFA_RXTX_UNMAP is invoked from any instance other than non-secure
physical, the w1 register must be zero (MBZ). If a non-zero value is
supplied in this context, the SPMC must return FFA_INVALID_PARAMETER.

The Arm FF-A driver operates exclusively as a guest or non-secure
physical instance where the partition ID is always zero and is not
invoked from a hypervisor context where w1 carries a VM ID. In this
execution model, the partition ID observed by the driver is always zero,
and passing a VM ID is unnecessary and potentially invalid.

Remove the vm_id parameter from ffa_rxtx_unmap() and ensure that the
SMC call is issued with w1 implicitly zeroed, as required by the
specification. This prevents invalid parameter errors and aligns the
implementation with the defined FF-A ABI behavior.

Fixes: 3bbfe98710 ("firmware: arm_ffa: Add initial Arm FFA driver support")
Signed-off-by: Yeoreum Yun <yeoreum.yun@arm.com>
Message-Id: <20260304120953.847671-1-yeoreum.yun@arm.com>
Signed-off-by: Sudeep Holla <sudeep.holla@kernel.org>
2026-03-06 16:26:08 +00:00
Christian Loehle
7fe44c4388 bpf: drop kthread_exit from noreturn_deny
kthread_exit became a macro to do_exit in commit 28aaa9c399
("kthread: consolidate kthread exit paths to prevent use-after-free"),
so there is no kthread_exit function BTF ID to resolve. Remove it from
noreturn_deny to avoid resolve_btfids unresolved symbol warnings.

Signed-off-by: Christian Loehle <christian.loehle@arm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2026-03-06 08:25:54 -08:00
Josh Poimboeuf
1fd1dc4172 objtool: Fix ERROR_INSN() error message
Confusingly, ERROR_INSN() shows "warning:" instead of "error:".  Fix that.

Link: https://patch.msgid.link/c4fe793bb3d23fac2c636b2511059af1158410e2.1772681234.git.jpoimboe@kernel.org
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
2026-03-06 07:53:37 -08:00
Josh Poimboeuf
356e4b2f5b objtool: Fix data alignment in elf_add_data()
Any data added to a section needs to be aligned in accordance with the
section's sh_addralign value.  Particularly strings added to a .str1.8
section.  Otherwise you may get some funky strings.

Fixes: dd590d4d57 ("objtool/klp: Introduce klp diff subcommand for diffing object files")
Link: https://patch.msgid.link/d962fc0ca24fa0825cca8dad71932dccdd9312a9.1772681234.git.jpoimboe@kernel.org
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
2026-03-06 07:53:36 -08:00
HONG Yifan
3223404910 objtool: Use HOSTCFLAGS for HAVE_XXHASH test
Previously, HAVE_XXHASH is tested by invoking HOSTCC without HOSTCFLAGS.

Consider the following scenario:

- The host machine has libxxhash installed
- We build the kernel with HOSTCFLAGS containing a --sysroot that does
  not have xxhash.h (for hermetic builds)

In this case, HAVE_XXHASH is set to y, but when it builds objtool with
HOSTCFLAGS, because the --sysroot does not contain xxhash.h, the
following error is raised:

<...>/common/tools/objtool/include/objtool/checksum_types.h:12:10: fatal error: 'xxhash.h' file not found
   12 | #include <xxhash.h>
      |          ^~~~~~~~~~

To resolve the error, we test HAVE_XXHASH by invoking HOSTCC with
HOSTCFLAGS.

Signed-off-by: HONG Yifan <elsk@google.com>
Reviewed-by: Carlos Llamas <cmllamas@google.com>
Link: https://patch.msgid.link/20260303010340.306164-1-elsk@google.com
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
2026-03-06 07:48:41 -08:00
Josh Poimboeuf
11c2adcd1f objtool/klp: Avoid NULL pointer dereference when printing code symbol name
Fix a hypothetical NULL pointer defereference of the 'code_sym'
variable.  In theory this should never happen.

Reviewed-and-tested-by: Song Liu <song@kernel.org>
Link: https://patch.msgid.link/64116517bc93851a98fe366ea0a4d807f4c70aab.1770759954.git.jpoimboe@kernel.org
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
2026-03-06 07:47:11 -08:00
Josh Poimboeuf
e476bb277c objtool/klp: Disable unsupported pr_debug() usage
Instead of erroring out on unsupported pr_debug() (e.g., when patching a
module), issue a warning and make it inert, similar to how unsupported
tracepoints are currently handled.

Reviewed-and-tested-by: Song Liu <song@kernel.org>
Link: https://patch.msgid.link/3a7db3a5b7d4abf9b2534803a74e2e7231322738.1770759954.git.jpoimboe@kernel.org
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
2026-03-06 07:47:11 -08:00
Josh Poimboeuf
f9fb44b0ec objtool/klp: Fix detection of corrupt static branch/call entries
Patching a function which references a static key living in a kernel
module is unsupported due to ordering issues inherent to late module
patching:

  1) Load a livepatch module which has a __jump_table entry which needs
     a klp reloc to reference static key K which lives in module M.

  2) The __jump_table klp reloc does *not* get resolved because module M
     is not yet loaded.

  3) jump_label_add_module() corrupts memory (or causes a panic) when
     dereferencing the uninitialized pointer to key K.

validate_special_section_klp_reloc() intends to prevent that from ever
happening by catching it at build time.  However, it incorrectly assumes
the special section entry's reloc symbol references have already been
converted from section symbols to object symbols, causing the validation
to miss corruption in extracted static branch/call table entries.

Make sure the references have been properly converted before doing the
validation.

Fixes: dd590d4d57 ("objtool/klp: Introduce klp diff subcommand for diffing object files")
Reported-by: Song Liu <song@kernel.org>
Reviewed-and-tested-by: Song Liu <song@kernel.org>
Link: https://patch.msgid.link/124ad747b751df0df1725eff89de8332e3fb26d6.1770759954.git.jpoimboe@kernel.org
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
2026-03-06 07:47:10 -08:00
Keith Busch
93d0fcdddc cxl/acpi: Fix CXL_ACPI and CXL_PMEM Kconfig tristate mismatch
Commit e7e222ad73 ("cxl: Move devm_cxl_add_nvdimm_bridge() to
cxl_pmem.ko") moves devm_cxl_add_nvdimm_bridge() into the cxl_pmem file,
which has independent config compile options for built-in or module. The
call from cxl_acpi_probe() is guarded by IS_ENABLED(CONFIG_CXL_PMEM),
which evaluates to true for both =y and =m.

When CONFIG_CXL_PMEM=m, a built-in cxl_acpi attempts to reference a
symbol exported by a module, which fails to link. CXL_PMEM cannot simply
be promoted to =y in this configuration because it depends on LIBNVDIMM,
which may itself be =m.

Add a Kconfig dependency to prevent CXL_ACPI from being built-in when
CXL_PMEM is a module. This contrains CXL_ACPI to =m when CXL_PMEM=m,
while still allowing CXL_ACPI to be freely configured when CXL_PMEM is
either built-in or disabled.

[ dj: Fix up commit reference formatting. ]

Fixes: e7e222ad73 ("cxl: Move devm_cxl_add_nvdimm_bridge() to cxl_pmem.ko")
Signed-off-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Link: https://patch.msgid.link/20260305204057.1516948-1-kbusch@meta.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2026-03-06 08:08:53 -07:00
Cosmin Tanislav
ebe7561e9b pinctrl: renesas: rzt2h: Fix invalid wait context
The rzt2h_gpio_get_direction() function is called from
gpiod_get_direction(), which ends up being used within the __setup_irq()
call stack when requesting an interrupt.

__setup_irq() holds a raw_spinlock_t with IRQs disabled, which creates
an atomic context. spinlock_t cannot be used within atomic context
when PREEMPT_RT is enabled, since it may become a sleeping lock.

An "[ BUG: Invalid wait context ]" splat is observed when running with
CONFIG_PROVE_LOCKING enabled, describing exactly the aforementioned call
stack.

__setup_irq() needs to hold a raw_spinlock_t with IRQs disabled to
serialize access against a concurrent hard interrupt.

Switch to raw_spinlock_t to fix this.

Fixes: 829dde3369 ("pinctrl: renesas: rzt2h: Add GPIO IRQ chip to handle interrupts")
Signed-off-by: Cosmin Tanislav <cosmin-gabriel.tanislav.xa@renesas.com>
Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://patch.msgid.link/20260205103930.666051-1-cosmin-gabriel.tanislav.xa@renesas.com
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
2026-03-06 13:28:34 +01:00
Felix Gu
e825c79ef9 pinctrl: renesas: rzt2h: Fix device node leak in rzt2h_gpio_register()
When calling of_parse_phandle_with_fixed_args(), the caller is
responsible for calling of_node_put() to release the device node
reference.

In rzt2h_gpio_register(), the driver fails to call of_node_put() to
release the reference in of_args.np, which causes a memory leak.

Add the missing of_node_put() call to fix the leak.

Fixes: 34d4d09307 ("pinctrl: renesas: Add support for RZ/T2H")
Signed-off-by: Felix Gu <ustc.gu@gmail.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://patch.msgid.link/20260127-rzt2h-v1-1-86472e7421b8@gmail.com
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
2026-03-06 13:28:34 +01:00
Geert Uytterhoeven
85c2601e2c arm64: dts: renesas: r8a78000: Fix out-of-range SPI interrupt numbers
SPI interrupts are in the range 0-987.  Extended SPI interrupts should
use GIC_ESPI, instead of abusing GIC_SPI with a manual offset of 4064.

Fixes: 63500d12cf ("arm64: dts: renesas: Add R8A78000 SoC support")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://patch.msgid.link/1f9dd274720ea1b66617a5dd84f76c3efc829dc8.1772641415.git.geert+renesas@glider.be
2026-03-06 13:15:21 +01:00
Claudiu Beznea
6dcbb6f070 arm64: dts: renesas: rzg3s-smarc-som: Set bypass for Versa3 PLL2
The default settings for the Versa3 device on the Renesas RZ/G3S SMARC
SoM board have PLL2 disabled. PLL2 was later enabled together with audio
support, as it is required to support both 44.1 kHz and 48 kHz audio.

With PLL2 enabled, it was observed that Linux occasionally either hangs
during boot (the last log message being related to the I2C probe) or
randomly crashes. This was mainly reproducible on cold boots. During
debugging, it was also noticed that the Unicode replacement character (�)
sometimes appears on the serial console. Further investigation traced this
to the configuration applied through the Versa3 register at offset 0x1c,
which controls PLL enablement.

The appearance of the Unicode replacement character suggested an issue
with the SoC reference clock. The RZ/G3S reference clock is provided by
the Versa3 clock generator (REF output).

After checking with the Renesas Versa3 hardware team, it was found that
this is related to the PLL2 lock bit being set through the
renesas,settings DT property.

The PLL lock bit must be set to avoid unstable clock output from the PLL.
However, due to the Versa3 hardware design, when a PLL lock bit is set,
all outputs (including the REF clock) are temporarily disabled until the
configured PLLs become stable.

As an alternative, the bypass bit can be used. This does not interrupt the
PLL2 output or any other Versa3 outputs, but it may result in temporary
instability on PLL2 output while the configuration is applied. Since PLL2
feeds only the audio path and audio is not used during early boot, this is
acceptable and does not affect system boot.

Drop the PLL2 lock bit and set the bypass bit instead.

This has been tested with more than 1000 cold boots.

Fixes: a94253232b ("arm64: dts: renesas: rzg3s-smarc-som: Add versa3 clock generator node")
Signed-off-by: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://patch.msgid.link/20260302135703.162601-1-claudiu.beznea.uj@bp.renesas.com
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
2026-03-06 13:15:02 +01:00
Lad Prabhakar
f459672cf3 arm64: dts: renesas: r9a09g087: Fix CPG register region sizes
The CPG register regions were incorrectly sized.  Update them to match
the actual hardware specification:
  - First region (0x80280000): 0x1000 -> 0x10000 (64kiB)
  - Second region (0x81280000): 0x9000 -> 0x10000 (64kiB)

Fixes: 4b3d31f0b8 ("arm64: dts: renesas: Add initial SoC DTSI for the RZ/N2H SoC")
Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://patch.msgid.link/20260213131742.3606334-3-prabhakar.mahadev-lad.rj@bp.renesas.com
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
2026-03-06 13:15:02 +01:00
Lad Prabhakar
b12985ceca arm64: dts: renesas: r9a09g077: Fix CPG register region sizes
The CPG register regions were incorrectly sized.  Update them to match
the actual hardware specification:
  - First region (0x80280000): 0x1000 -> 0x10000 (64kiB)
  - Second region (0x81280000): 0x9000 -> 0x10000 (64kiB)

Fixes: d17b34744f ("arm64: dts: renesas: Add initial support for the Renesas RZ/T2H SoC")
Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://patch.msgid.link/20260213131742.3606334-2-prabhakar.mahadev-lad.rj@bp.renesas.com
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
2026-03-06 13:15:02 +01:00
Fabrizio Castro
a3f34651de arm64: dts: renesas: r9a09g057: Remove wdt{0,2,3} nodes
The HW user manual for the Renesas RZ/V2H(P) SoC (a.k.a r9a09g057)
states that only WDT1 is supposed to be accessed by the CA55 cores.
WDT0 is supposed to be used by the CM33 core, WDT2 is supposed
to be used by the CR8 core 0, and WDT3 is supposed to be used
by the CR8 core 1.

Remove wdt{0,2,3} from the SoC specific device tree to make it
compliant with the specification from the HW manual.

This change is harmless as there are currently no users of the
wdt{0,2,3} device tree nodes, only the wdt1 node is actually used.

Fixes: 095105496e ("arm64: dts: renesas: r9a09g057: Add WDT0-WDT3 nodes")
Signed-off-by: Fabrizio Castro <fabrizio.castro.jz@renesas.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://patch.msgid.link/20260203124247.7320-3-fabrizio.castro.jz@renesas.com
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
2026-03-06 13:15:02 +01:00
Lad Prabhakar
5c03465ecf arm64: dts: renesas: rzv2-evk-cn15-sd: Add ramp delay for SD0 regulator
Set an appropriate ramp delay for the SD0 I/O voltage regulator in the
CN15 SD overlay to make UHS-I voltage switching reliable during card
initialization.

This issue was observed on the RZ/V2H EVK, while the same UHS-I cards
worked on the RZ/V2N EVK without problems. Adding the ramp delay makes
the behavior consistent and avoids SD init timeouts.

Before this change SD0 could fail with:

    mmc0: error -110 whilst initialising SD card

With the delay in place UHS-I cards enumerate correctly:

    mmc0: new UHS-I speed SDR104 SDXC card at address aaaa
    mmcblk0: mmc0:aaaa SR64G 59.5 GiB
     mmcblk0: p1

Fixes: 3d6c2bc762 ("arm64: dts: renesas: Add CN15 eMMC and SD overlays for RZ/V2H and RZ/V2N EVKs")
Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://patch.msgid.link/20260123225957.1007089-5-prabhakar.mahadev-lad.rj@bp.renesas.com
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
2026-03-06 13:15:02 +01:00
Lad Prabhakar
bb70589b67 arm64: dts: renesas: rzt2h-n2h-evk: Add ramp delay for SD0 card regulator
Add a ramp delay of 60 uV/us to the vqmmc_sdhi0 voltage regulator to
fix UHS-I SD card detection failures.

Measurements on CN78 pin 4 showed the actual voltage ramp time to be
21.86ms when switching between 3.3V and 1.8V. A 25ms ramp delay has
been configured to provide adequate margin. The calculation is based
on the voltage delta of 1.5V (3.3V - 1.8V):
  1500000 uV / 60 uV/us = 25000 us (25ms)

Prior to this patch, UHS-I cards failed to initialize with:

    mmc0: error -110 whilst initialising SD card

After this patch, UHS-I cards are properly detected on SD0:

    mmc0: new UHS-I speed SDR104 SDXC card at address aaaa
    mmcblk0: mmc0:aaaa SR64G 59.5 GiB

Fixes: d065453e5e ("arm64: dts: renesas: rzt2h-rzn2h-evk: Enable SD card slot")
Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://patch.msgid.link/20260123225957.1007089-2-prabhakar.mahadev-lad.rj@bp.renesas.com
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
2026-03-06 13:15:02 +01:00
Yifan Wu
d87c828daa selftest/arm64: Fix sve2p1_sigill() to hwcap test
The FEAT_SVE2p1 is indicated by ID_AA64ZFR0_EL1.SVEver. However,
the BFADD requires the FEAT_SVE_B16B16, which is indicated by
ID_AA64ZFR0_EL1.B16B16. This could cause the test to incorrectly
fail on a CPU that supports FEAT_SVE2.1 but not FEAT_SVE_B16B16.

LD1Q Gather load quadwords which is decoded from SVE encodings and
implied by FEAT_SVE2p1.

Fixes: c5195b027d ("kselftest/arm64: Add SVE 2.1 to hwcap test")
Signed-off-by: Yifan Wu <wuyifan50@huawei.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
2026-03-06 11:54:26 +00:00
Peddolla Harshavardhan Reddy
6dccbc9f3e wifi: cfg80211: cancel pmsr_free_wk in cfg80211_pmsr_wdev_down
When the nl80211 socket that originated a PMSR request is
closed, cfg80211_release_pmsr() sets the request's nl_portid
to zero and schedules pmsr_free_wk to process the abort
asynchronously. If the interface is concurrently torn down
before that work runs, cfg80211_pmsr_wdev_down() calls
cfg80211_pmsr_process_abort() directly. However, the already-
scheduled pmsr_free_wk work item remains pending and may run
after the interface has been removed from the driver. This
could cause the driver's abort_pmsr callback to operate on a
torn-down interface, leading to undefined behavior and
potential crashes.

Cancel pmsr_free_wk synchronously in cfg80211_pmsr_wdev_down()
before calling cfg80211_pmsr_process_abort(). This ensures any
pending or in-progress work is drained before interface teardown
proceeds, preventing the work from invoking the driver abort
callback after the interface is gone.

Fixes: 9bb7e0f24e ("cfg80211: add peer measurement with FTM initiator API")
Signed-off-by: Peddolla Harshavardhan Reddy <peddolla.reddy@oss.qualcomm.com>
Link: https://patch.msgid.link/20260305160712.1263829-3-peddolla.reddy@oss.qualcomm.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2026-03-06 12:41:59 +01:00
Claudio Imbrenda
f303406efd KVM: s390: Fix a deadlock
In some scenarios, a deadlock can happen, involving _do_shadow_pte().

Convert all usages of pgste_get_lock() to pgste_get_trylock() in
_do_shadow_pte() and return -EAGAIN. All callers can already deal with
-EAGAIN being returned.

Fixes: e38c884df9 ("KVM: s390: Switch to new gmap")
Tested-by: Christian Borntraeger <borntraeger@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Christoph Schlameuss <schlameuss@linux.ibm.com>
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
2026-03-06 12:41:28 +01:00
Piotr Jaroszynski
97c5550b76 arm64: contpte: fix set_access_flags() no-op check for SMMU/ATS faults
contpte_ptep_set_access_flags() compared the gathered ptep_get() value
against the requested entry to detect no-ops. ptep_get() ORs AF/dirty
from all sub-PTEs in the CONT block, so a dirty sibling can make the
target appear already-dirty. When the gathered value matches entry, the
function returns 0 even though the target sub-PTE still has PTE_RDONLY
set in hardware.

For a CPU with FEAT_HAFDBS this gathered view is fine, since hardware may
set AF/dirty on any sub-PTE and CPU TLB behavior is effectively gathered
across the CONT range. But page-table walkers that evaluate each
descriptor individually (e.g. a CPU without DBM support, or an SMMU
without HTTU, or with HA/HD disabled in CD.TCR) can keep faulting on the
unchanged target sub-PTE, causing an infinite fault loop.

Gathering can therefore cause false no-ops when only a sibling has been
updated:
 - write faults: target still has PTE_RDONLY (needs PTE_RDONLY cleared)
 - read faults:  target still lacks PTE_AF

Fix by checking each sub-PTE against the requested AF/dirty/write state
(the same bits consumed by __ptep_set_access_flags()), using raw
per-PTE values rather than the gathered ptep_get() view, before
returning no-op. Keep using the raw target PTE for the write-bit unfold
decision.

Per Arm ARM (DDI 0487) D8.7.1 ("The Contiguous bit"), any sub-PTE in a CONT
range may become the effective cached translation and software must
maintain consistent attributes across the range.

Fixes: 4602e5757b ("arm64/mm: wire up PTE_CONT for user mappings")
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Breno Leitao <leitao@debian.org>
Cc: stable@vger.kernel.org
Reviewed-by: Alistair Popple <apopple@nvidia.com>
Reviewed-by: James Houghton <jthoughton@google.com>
Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Breno Leitao <leitao@debian.org>
Signed-off-by: Piotr Jaroszynski <pjaroszynski@nvidia.com>
Acked-by: Balbir Singh <balbirs@nvidia.com>
Signed-off-by: Will Deacon <will@kernel.org>
2026-03-06 11:25:59 +00:00
Mehul Rao
25966fc097 ublk: fix NULL pointer dereference in ublk_ctrl_set_size()
ublk_ctrl_set_size() unconditionally dereferences ub->ub_disk via
set_capacity_and_notify() without checking if it is NULL.

ub->ub_disk is NULL before UBLK_CMD_START_DEV completes (it is only
assigned in ublk_ctrl_start_dev()) and after UBLK_CMD_STOP_DEV runs
(ublk_detach_disk() sets it to NULL). Since the UBLK_CMD_UPDATE_SIZE
handler performs no state validation, a user can trigger a NULL pointer
dereference by sending UPDATE_SIZE to a device that has been added but
not yet started, or one that has been stopped.

Fix this by checking ub->ub_disk under ub->mutex before dereferencing
it, and returning -ENODEV if the disk is not available.

Fixes: 98b995660b ("ublk: Add UBLK_U_CMD_UPDATE_SIZE")
Cc: stable@vger.kernel.org
Signed-off-by: Mehul Rao <mehulrao@gmail.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2026-03-06 04:25:44 -07:00
Zenghui Yu (Huawei)
3599c714c0 KVM: arm64: Remove the redundant ISB in __kvm_at_s1e2()
We already have an ISB in __kvm_at() to make the address translation result
visible to subsequent reads of PAR_EL1. Remove the redundant one right
after it.

Signed-off-by: Zenghui Yu (Huawei) <zenghui.yu@linux.dev>
Link: https://patch.msgid.link/20260306074422.47694-1-zenghui.yu@linux.dev
Signed-off-by: Marc Zyngier <maz@kernel.org>
2026-03-06 10:42:21 +00:00
Fuad Tabba
244acf1976 KVM: arm64: Fix vma_shift staleness on nested hwpoison path
When user_mem_abort() handles a nested stage-2 fault, it truncates
vma_pagesize to respect the guest's mapping size. However, the local
variable vma_shift is never updated to match this new size.

If the underlying host page turns out to be hardware poisoned,
kvm_send_hwpoison_signal() is called with the original, larger
vma_shift instead of the actual mapping size. This signals incorrect
poison boundaries to userspace and breaks hugepage memory poison
containment for nested VMs.

Update vma_shift to match the truncated vma_pagesize when operating
on behalf of a nested hypervisor.

Fixes: fd276e71d1 ("KVM: arm64: nv: Handle shadow stage 2 page faults")
Signed-off-by: Fuad Tabba <tabba@google.com>
Link: https://patch.msgid.link/20260304162222.836152-3-tabba@google.com
[maz: simplified vma_shift assignment from the original patch]
Signed-off-by: Marc Zyngier <maz@kernel.org>
2026-03-06 10:42:02 +00:00
Helge Deller
8475d8fe21 parisc: Fix initial page table creation for boot
The KERNEL_INITIAL_ORDER value defines the initial size (usually 32 or
64 MB) of the page table during bootup. Up until now the whole area was
initialized with PTE entries, but there was no check if we filled too
many entries.  Change the code to fill up with so many entries that the
"_end" symbol can be reached by the kernel, but not more entries than
actually fit into the initial PTE tables.

Signed-off-by: Helge Deller <deller@gmx.de>
Cc: <stable@vger.kernel.org> # v6.0+
2026-03-06 11:33:13 +01:00
Helge Deller
17c144f110 parisc: Check kernel mapping earlier at bootup
The check if the initial mapping is sufficient needs to happen much
earlier during bootup. Move this test directly to the start_parisc()
function and use native PDC iodc functions to print the warning, because
panic() and printk() are not functional yet.

This fixes boot when enabling various KALLSYSMS options which need
much more space.

Signed-off-by: Helge Deller <deller@gmx.de>
Cc: <stable@vger.kernel.org> # v6.0+
2026-03-06 11:33:13 +01:00
Helge Deller
8e732934fb parisc: Increase initial mapping to 64 MB with KALLSYMS
The 32MB initial kernel mapping can become too small when CONFIG_KALLSYMS
is used. Increase the mapping to 64 MB in this case.

Signed-off-by: Helge Deller <deller@gmx.de>
Cc: <stable@vger.kernel.org> # v6.0+
2026-03-06 11:33:13 +01:00
Kuniyuki Iwashima
b94ae8e0d5 wifi: mac80211: Fix static_branch_dec() underflow for aql_disable.
syzbot reported static_branch_dec() underflow in aql_enable_write(). [0]

The problem is that aql_enable_write() does not serialise concurrent
write()s to the debugfs.

aql_enable_write() checks static_key_false(&aql_disable.key) and
later calls static_branch_inc() or static_branch_dec(), but the
state may change between the two calls.

aql_disable does not need to track inc/dec.

Let's use static_branch_enable() and static_branch_disable().

[0]:
val == 0
WARNING: kernel/jump_label.c:311 at __static_key_slow_dec_cpuslocked.part.0+0x107/0x120 kernel/jump_label.c:311, CPU#0: syz.1.3155/20288
Modules linked in:
CPU: 0 UID: 0 PID: 20288 Comm: syz.1.3155 Tainted: G     U       L      syzkaller #0 PREEMPT(full)
Tainted: [U]=USER, [L]=SOFTLOCKUP
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/24/2026
RIP: 0010:__static_key_slow_dec_cpuslocked.part.0+0x107/0x120 kernel/jump_label.c:311
Code: f2 c9 ff 5b 5d c3 cc cc cc cc e8 54 f2 c9 ff 48 89 df e8 ac f9 ff ff eb ad e8 45 f2 c9 ff 90 0f 0b 90 eb a2 e8 3a f2 c9 ff 90 <0f> 0b 90 eb 97 48 89 df e8 5c 4b 33 00 e9 36 ff ff ff 0f 1f 80 00
RSP: 0018:ffffc9000b9f7c10 EFLAGS: 00010293
RAX: 0000000000000000 RBX: ffffffff9b3e5d40 RCX: ffffffff823c57b4
RDX: ffff8880285a0000 RSI: ffffffff823c5846 RDI: ffff8880285a0000
RBP: 0000000000000000 R08: 0000000000000005 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: 000000000000000a
R13: 1ffff9200173ef88 R14: 0000000000000001 R15: ffffc9000b9f7e98
FS:  00007f530dd726c0(0000) GS:ffff8881245e3000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000200000001140 CR3: 000000007cc4a000 CR4: 00000000003526f0
Call Trace:
 <TASK>
 __static_key_slow_dec_cpuslocked kernel/jump_label.c:297 [inline]
 __static_key_slow_dec kernel/jump_label.c:321 [inline]
 static_key_slow_dec+0x7c/0xc0 kernel/jump_label.c:336
 aql_enable_write+0x2b2/0x310 net/mac80211/debugfs.c:343
 short_proxy_write+0x133/0x1a0 fs/debugfs/file.c:383
 vfs_write+0x2aa/0x1070 fs/read_write.c:684
 ksys_pwrite64 fs/read_write.c:793 [inline]
 __do_sys_pwrite64 fs/read_write.c:801 [inline]
 __se_sys_pwrite64 fs/read_write.c:798 [inline]
 __x64_sys_pwrite64+0x1eb/0x250 fs/read_write.c:798
 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
 do_syscall_64+0xc9/0xf80 arch/x86/entry/syscall_64.c:94
 entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7f530cf9aeb9
Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 e8 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007f530dd72028 EFLAGS: 00000246 ORIG_RAX: 0000000000000012
RAX: ffffffffffffffda RBX: 00007f530d215fa0 RCX: 00007f530cf9aeb9
RDX: 0000000000000003 RSI: 0000000000000000 RDI: 0000000000000010
RBP: 00007f530d008c1f R08: 0000000000000000 R09: 0000000000000000
R10: 4200000000000005 R11: 0000000000000246 R12: 0000000000000000
R13: 00007f530d216038 R14: 00007f530d215fa0 R15: 00007ffde89fb978
 </TASK>

Fixes: e908435e40 ("mac80211: introduce aql_enable node in debugfs")
Reported-by: syzbot+feb9ce36a95341bb47a4@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/69a8979e.a70a0220.b118c.0025.GAE@google.com/
Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20260306072405.3649474-1-kuniyu@google.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2026-03-06 11:09:31 +01:00
Felix Fietkau
672e5229e1 mac80211: fix crash in ieee80211_chan_bw_change for AP_VLAN stations
ieee80211_chan_bw_change() iterates all stations and accesses
link->reserved.oper via sta->sdata->link[link_id]. For stations on
AP_VLAN interfaces (e.g. 4addr WDS clients), sta->sdata points to
the VLAN sdata, whose link never participates in chanctx reservations.
This leaves link->reserved.oper zero-initialized with chan == NULL,
causing a NULL pointer dereference in __ieee80211_sta_cap_rx_bw()
when accessing chandef->chan->band during CSA.

Resolve the VLAN sdata to its parent AP sdata using get_bss_sdata()
before accessing link data.

Cc: stable@vger.kernel.org
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Link: https://patch.msgid.link/20260305170812.2904208-1-nbd@nbd.name
[also change sta->sdata in ARRAY_SIZE even if it doesn't matter]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2026-03-06 11:08:43 +01:00
Nicolas Cavallari
ac6f24cc9c wifi: mac80211: use jiffies_delta_to_msecs() for sta_info inactive times
Inactive times of around 0xffffffff milliseconds have been observed on
an ath9k device on ARM.  This is likely due to a memory ordering race in
the jiffies_to_msecs(jiffies - last_active()) calculation causing an
overflow when the observed jiffies is below ieee80211_sta_last_active().

Use jiffies_delta_to_msecs() instead to avoid this problem.

Fixes: 7bbdd2d987 ("mac80211: implement station stats retrieval")
Signed-off-by: Nicolas Cavallari <nicolas.cavallari@green-communications.fr>
Link: https://patch.msgid.link/20260303161701.31808-1-nicolas.cavallari@green-communications.fr
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2026-03-06 11:06:27 +01:00
Johannes Berg
708bbb4553 wifi: mac80211: remove keys after disabling beaconing
We shouldn't remove keys before disable beaconing, at least when
beacon protection is used, since that would remove keys that are
still used for beacon transmission at the same time. Stop before
removing keys so there's no race.

Fixes: af2d14b01c ("mac80211: Beacon protection using the new BIGTK (STA)")
Reviewed-by: Miriam Rachel Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://patch.msgid.link/20260303150339.574e7887b3ab.I50d708f5aa22584506a91d0da7f8a73ba39fceac@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2026-03-06 11:06:14 +01:00
Johannes Berg
bd9121a5e9 wifi: mac80211_hwsim: fully initialise PMSR capabilities
Since the recent additions to PMSR capabilities, it's no longer
sufficient to call parse_pmsr_capa() here since the capabilities
that were added aren't represented/filled by it. Always init the
data to zero to avoid using uninitialized memory.

Fixes: 86c6b6e4d1 ("wifi: nl80211/cfg80211: add new FTM capabilities")
Reported-by: syzbot+c686c6b197d10ff3a749@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/69a67aa3.a70a0220.b118c.000a.GAE@google.com/
Link: https://patch.msgid.link/20260303113739.176403-2-johannes@sipsolutions.net
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2026-03-06 11:06:06 +01:00
Dave Airlie
96bfe9ff7e Merge tag 'drm-xe-fixes-2026-03-05' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes
Cross-subsystem Changes:
 - mm: Fix a hmm_range_fault() livelock / starvation problem (Thomas)

Core Changes:
 - Revert "drm/pagemap: Disable device-to-device migration" (Thomas)

Driver Changes:
 - Do not preempt fence signaling CS instructions (Brost)
 - Some leak and finalization fixes (Shuicheng, Tomasz, Varun, Zhanjun)
 - Workaround fix (Roper)

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patch.msgid.link/aamGvvGRBRtX8-6u@intel.com
2026-03-06 19:45:19 +10:00
Dave Airlie
431989960f Merge tag 'drm-misc-fixes-2026-03-06' of https://gitlab.freedesktop.org/drm/misc/kernel into drm-fixes
Another early drm-misc-fixes PR to revert the previous uapi fix sent in
drm-misc-fixes-2026-03-05, together with a UAF fix in TTM, an argument
order fix for panthor, a fix for the firmware getting stuck on
resource allocation error handling for amdxdna, and a few fixes for
ethosu (size calculation and reference underflows, and a validation
fix).

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Maxime Ripard <mripard@redhat.com>
Link: https://patch.msgid.link/20260306-grumpy-pegasus-of-witchcraft-6bd2db@houat
2026-03-06 19:41:28 +10:00
Dave Airlie
3d3234d5da Merge tag 'drm-misc-fixes-2026-03-05' of https://gitlab.freedesktop.org/drm/misc/kernel into drm-fixes
A return type fix for ttm, a display fix for solomon, several misc fixes
for amdxdna, a DSI clock rate fix for rz-du, a uapi fix for syncobj, a
possible build failure fix for dma-buf, a doc warning fix for sched, a
build failure fix for ttm tests, and a crash fix when suspended for
nouveau.

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Maxime Ripard <mripard@redhat.com>
Link: https://patch.msgid.link/20260305-ludicrous-quirky-raven-7cdafd@houat
2026-03-06 19:40:00 +10:00
Guenter Roeck
ee0e6e69a7 ata: libata-eh: Fix detection of deferred qc timeouts
If the ata_qc_for_each_raw() loop finishes without finding a matching SCSI
command for any QC, the variable qc will hold a pointer to the last element
examined, which has the tag i == ATA_MAX_QUEUE - 1. This qc can match the
port deferred QC (ap->deferred_qc).

If that happens, the condition qc == ap->deferred_qc evaluates to true
despite the loop not breaking with a match on the SCSI command for this QC.
In that case, the error handler mistakenly intercepts a command that has
not been issued yet and that has not timed out, and thus erroneously
returning a timeout error.

Fix the problem by checking for i < ATA_MAX_QUEUE in addition to
qc == ap->deferred_qc.

The problem was found by an experimental code review agent based on
gemini-3.1-pro while reviewing backports into v6.18.y.

Assisted-by: Gemini:gemini-3.1-pro
Fixes: eddb98ad93 ("ata: libata-eh: correctly handle deferred qc timeouts")
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
[cassel: modified commit log as suggested by Damien]
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Signed-off-by: Niklas Cassel <cassel@kernel.org>
2026-03-06 09:58:47 +01:00
Carlos Maiolino
54fcd2f95f xfs: fix returned valued from xfs_defer_can_append
xfs_defer_can_append returns a bool, it shouldn't be returning
a NULL.

Found by code inspection.

Fixes: 4dffb2cbb4 ("xfs: allow pausing of pending deferred work items")
Cc: <stable@vger.kernel.org> # v6.8
Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Acked-by: Souptick Joarder <souptick.joarder@hpe.com>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-03-06 09:30:07 +01:00
Dave Airlie
3fcb1cbd76 Merge tag 'drm-intel-fixes-2026-03-05' of https://gitlab.freedesktop.org/drm/i915/kernel into drm-fixes
- Fix for #7284: Lenovo T14 G7 display not refreshing

Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patch.msgid.link/aakz17Jx3Ye9Vqci@jlahtine-mobl
2026-03-06 17:10:02 +10:00
Anup Patel
c61ec3e8cc RISC-V: KVM: Check host Ssaia extension when creating AIA irqchip
The KVM user-space may create KVM AIA irqchip before checking
VCPU Ssaia extension availability so KVM AIA irqchip must fail
when host does not have Ssaia extension.

Fixes: 89d01306e3 ("RISC-V: KVM: Implement device interface for AIA irqchip")
Signed-off-by: Anup Patel <anup.patel@oss.qualcomm.com>
Link: https://lore.kernel.org/r/20260120080013.2153519-4-anup.patel@oss.qualcomm.com
Signed-off-by: Anup Patel <anup@brainfault.org>
2026-03-06 11:20:30 +05:30
Anup Patel
24433b2b5c RISC-V: KVM: Fix error code returned for Ssaia ONE_REG
Return -ENOENT for Ssaia ONE_REG when Ssaia is not enabled
for a VCPU.

This will make Ssaia ONE_REG error codes consistent with
other ONE_REG interfaces of KVM RISC-V.

Fixes: 2a88f38cd5 ("RISC-V: KVM: return ENOENT in *_one_reg() when reg is unknown")
Signed-off-by: Anup Patel <anup.patel@oss.qualcomm.com>
Link: https://lore.kernel.org/r/20260120080013.2153519-3-anup.patel@oss.qualcomm.com
Signed-off-by: Anup Patel <anup@brainfault.org>
2026-03-06 11:20:30 +05:30
Anup Patel
45700a743a RISC-V: KVM: Fix error code returned for Smstateen ONE_REG
Return -ENOENT for Smstateen ONE_REG when:
1) Smstateen is not enabled for a VCPU
2) ONE_REG id is out of range

This will make Smstateen ONE_REG error codes consistent
with other ONE_REG interfaces of KVM RISC-V.

Fixes: c04913f2b5 ("RISCV: KVM: Add sstateen0 to ONE_REG")
Signed-off-by: Anup Patel <anup.patel@oss.qualcomm.com>
Link: https://lore.kernel.org/r/20260120080013.2153519-2-anup.patel@oss.qualcomm.com
Signed-off-by: Anup Patel <anup@brainfault.org>
2026-03-06 11:20:30 +05:30
Lukas Gerlach
2dda6a9e09 KVM: riscv: Fix Spectre-v1 in PMU counter access
Guest-controlled counter indices received via SBI ecalls are used to
index into the PMC array. Sanitize them with array_index_nospec()
to prevent speculative out-of-bounds access.

Similar to x86 commit 13c5183a4e ("KVM: x86: Protect MSR-based
index computations in pmu.h from Spectre-v1/L1TF attacks").

Fixes: 8f0153ecd3 ("RISC-V: KVM: Add skeleton support for perf")
Reviewed-by: Radim Krčmář <radim.krcmar@oss.qualcomm.com>
Signed-off-by: Lukas Gerlach <lukas.gerlach@cispa.de>
Link: https://lore.kernel.org/r/20260303-kvm-riscv-spectre-v1-v2-4-192caab8e0dc@cispa.de
Signed-off-by: Anup Patel <anup@brainfault.org>
2026-03-06 11:20:30 +05:30
Lukas Gerlach
8f0c15c4b1 KVM: riscv: Fix Spectre-v1 in floating-point register access
User-controlled indices are used to index into floating-point registers.
Sanitize them with array_index_nospec() to prevent speculative
out-of-bounds access.

Reviewed-by: Radim Krčmář <radim.krcmar@oss.qualcomm.com>
Signed-off-by: Lukas Gerlach <lukas.gerlach@cispa.de>
Link: https://lore.kernel.org/r/20260303-kvm-riscv-spectre-v1-v2-3-192caab8e0dc@cispa.de
Signed-off-by: Anup Patel <anup@brainfault.org>
2026-03-06 11:20:30 +05:30
Lukas Gerlach
ec87a82ca8 KVM: riscv: Fix Spectre-v1 in AIA CSR access
User-controlled indices are used to access AIA CSR registers.
Sanitize them with array_index_nospec() to prevent speculative
out-of-bounds access.

Similar to x86 commit 8c86405f60 ("KVM: x86: Protect
ioapic_read_indirect() from Spectre-v1/L1TF attacks") and arm64
commit 41b87599c7 ("KVM: arm/arm64: vgic: fix possible spectre-v1
in vgic_get_irq()").

Reviewed-by: Radim Krčmář <radim.krcmar@oss.qualcomm.com>
Signed-off-by: Lukas Gerlach <lukas.gerlach@cispa.de>
Link: https://lore.kernel.org/r/20260303-kvm-riscv-spectre-v1-v2-2-192caab8e0dc@cispa.de
Signed-off-by: Anup Patel <anup@brainfault.org>
2026-03-06 11:20:30 +05:30
Lukas Gerlach
f9e26fc325 KVM: riscv: Fix Spectre-v1 in ONE_REG register access
User-controlled register indices from the ONE_REG ioctl are used to
index into arrays of register values. Sanitize them with
array_index_nospec() to prevent speculative out-of-bounds access.

Reviewed-by: Radim Krčmář <radim.krcmar@oss.qualcomm.com>
Signed-off-by: Lukas Gerlach <lukas.gerlach@cispa.de>
Link: https://lore.kernel.org/r/20260303-kvm-riscv-spectre-v1-v2-1-192caab8e0dc@cispa.de
Signed-off-by: Anup Patel <anup@brainfault.org>
2026-03-06 11:20:30 +05:30
Wang Yechao
b342166cbc RISC-V: KVM: Skip THP support check during dirty logging
When dirty logging is enabled, guest stage mappings are forced to
PAGE_SIZE granularity. Changing the mapping page size at this point
is incorrect.

Fixes: ed7ae7a34b ("RISC-V: KVM: Transparent huge page support")
Signed-off-by: Wang Yechao <wang.yechao255@zte.com.cn>
Reviewed-by: Anup Patel <anup@brainfault.org>
Link: https://lore.kernel.org/r/20260226191231140_X1Juus7s2kgVlc0ZyW_K@zte.com.cn
Signed-off-by: Anup Patel <anup@brainfault.org>
2026-03-06 11:20:30 +05:30
Jiakai Xu
7120a9d9e0 RISC-V: KVM: Fix potential UAF in kvm_riscv_aia_imsic_has_attr()
The KVM_DEV_RISCV_AIA_GRP_APLIC branch of aia_has_attr() was identified
to have a race condition with concurrent KVM_SET_DEVICE_ATTR ioctls,
leading to a use-after-free bug.

Upon analyzing the code, it was discovered that the
KVM_DEV_RISCV_AIA_GRP_IMSIC branch of aia_has_attr() suffers from the same
lack of synchronization. It invokes kvm_riscv_aia_imsic_has_attr() without
holding dev->kvm->lock.

While aia_has_attr() is running, a concurrent aia_set_attr() could call
aia_init() under the dev->kvm->lock. If aia_init() fails, it may trigger
kvm_riscv_vcpu_aia_imsic_cleanup(), which frees imsic_state. Without proper
locking, kvm_riscv_aia_imsic_has_attr() could attempt to access imsic_state
while it is being deallocated.

Although this specific path has not yet been reported by a fuzzer, it
is logically identical to the APLIC issue. Fix this by acquiring the
dev->kvm->lock before calling kvm_riscv_aia_imsic_has_attr(), ensuring
consistency with the locking pattern used for other AIA attribute groups.

Fixes: 5463091a51 ("RISC-V: KVM: Expose IMSIC registers as attributes of AIA irqchip")
Signed-off-by: Jiakai Xu <xujiakai2025@iscas.ac.cn>
Signed-off-by: Jiakai Xu <jiakaiPeanut@gmail.com>
Reviewed-by: Anup Patel <anup@brainfault.org>
Link: https://lore.kernel.org/r/20260304080804.2281721-1-xujiakai2025@iscas.ac.cn
Signed-off-by: Anup Patel <anup@brainfault.org>
2026-03-06 11:20:30 +05:30
Jiakai Xu
721ead7757 RISC-V: KVM: Fix use-after-free in kvm_riscv_aia_aplic_has_attr()
Fuzzer reports a KASAN use-after-free bug triggered by a race
between KVM_HAS_DEVICE_ATTR and KVM_SET_DEVICE_ATTR ioctls on
the AIA device. The root cause is that aia_has_attr() invokes
kvm_riscv_aia_aplic_has_attr() without holding dev->kvm->lock, while
a concurrent aia_set_attr() may call aia_init() under that lock. When
aia_init() fails after kvm_riscv_aia_aplic_init() has succeeded, it
calls kvm_riscv_aia_aplic_cleanup() in its fail_cleanup_imsics path,
which frees both aplic_state and aplic_state->irqs. The concurrent
has_attr path can then dereference the freed aplic->irqs in
aplic_read_pending():
	irqd = &aplic->irqs[irq];   /* UAF here */

KASAN report:
 BUG: KASAN: slab-use-after-free in aplic_read_pending
             arch/riscv/kvm/aia_aplic.c:119 [inline]
 BUG: KASAN: slab-use-after-free in aplic_read_pending_word
             arch/riscv/kvm/aia_aplic.c:351 [inline]
 BUG: KASAN: slab-use-after-free in aplic_mmio_read_offset
             arch/riscv/kvm/aia_aplic.c:406
 Read of size 8 at addr ff600000ba965d58 by task 9498
 Call Trace:
  aplic_read_pending arch/riscv/kvm/aia_aplic.c:119 [inline]
  aplic_read_pending_word arch/riscv/kvm/aia_aplic.c:351 [inline]
  aplic_mmio_read_offset arch/riscv/kvm/aia_aplic.c:406
  kvm_riscv_aia_aplic_has_attr arch/riscv/kvm/aia_aplic.c:566
  aia_has_attr arch/riscv/kvm/aia_device.c:469
 allocated by task 9473:
  kvm_riscv_aia_aplic_init arch/riscv/kvm/aia_aplic.c:583
  aia_init arch/riscv/kvm/aia_device.c:248 [inline]
  aia_set_attr arch/riscv/kvm/aia_device.c:334
 freed by task 9473:
  kvm_riscv_aia_aplic_cleanup arch/riscv/kvm/aia_aplic.c:644
  aia_init arch/riscv/kvm/aia_device.c:292 [inline]
  aia_set_attr arch/riscv/kvm/aia_device.c:334

Fix this race by acquiring dev->kvm->lock in aia_has_attr() before
calling kvm_riscv_aia_aplic_has_attr(), consistent with the locking
pattern used in aia_get_attr() and aia_set_attr().

Fixes: 289a007b98 ("RISC-V: KVM: Expose APLIC registers as attributes of AIA irqchip")
Signed-off-by: Jiakai Xu <jiakaiPeanut@gmail.com>
Signed-off-by: Jiakai Xu <xujiakai2025@iscas.ac.cn>
Reviewed-by: Anup Patel <anup@brainfault.org>
Link: https://lore.kernel.org/r/20260302132703.1721415-1-xujiakai2025@iscas.ac.cn
Signed-off-by: Anup Patel <anup@brainfault.org>
2026-03-06 11:20:30 +05:30
Radim Krčmář
5c1bb07871 RISC-V: KVM: fix off-by-one array access in SBI PMU
The indexed array only has RISCV_KVM_MAX_COUNTERS elements.
The out-of-bound access could have been performed by a guest,
but it could only access another guest accessible data.

Fixes: 8f0153ecd3 ("RISC-V: KVM: Add skeleton support for perf")
Signed-off-by: Radim Krčmář <radim.krcmar@oss.qualcomm.com>
Reviewed-by: Anup Patel <anup@brainfault.org>
Link: https://lore.kernel.org/r/20260227134617.23378-1-radim.krcmar@oss.qualcomm.com
Signed-off-by: Anup Patel <anup@brainfault.org>
2026-03-06 11:20:30 +05:30
Jiakai Xu
c28eb189e4 RISC-V: KVM: Fix null pointer dereference in kvm_riscv_vcpu_aia_rmw_topei()
kvm_riscv_vcpu_aia_rmw_topei() assumes that the per-vCPU IMSIC state has
been initialized once AIA is reported as available and initialized at
the VM level. This assumption does not always hold.

Under fuzzed ioctl sequences, a guest may access the IMSIC TOPEI CSR
before the vCPU IMSIC state is set up. In this case,
vcpu->arch.aia_context.imsic_state is still NULL, and the TOPEI RMW path
dereferences it unconditionally, leading to a host kernel crash.

The crash manifests as:
  Unable to handle kernel paging request at virtual address
  dfffffff0000000e
  ...
  kvm_riscv_vcpu_aia_imsic_rmw arch/riscv/kvm/aia_imsic.c:909
  kvm_riscv_vcpu_aia_rmw_topei arch/riscv/kvm/aia.c:231
  csr_insn arch/riscv/kvm/vcpu_insn.c:208
  system_opcode_insn arch/riscv/kvm/vcpu_insn.c:281
  kvm_riscv_vcpu_virtual_insn arch/riscv/kvm/vcpu_insn.c:355
  kvm_riscv_vcpu_exit arch/riscv/kvm/vcpu_exit.c:230
  kvm_arch_vcpu_ioctl_run arch/riscv/kvm/vcpu.c:1008
  ...

Fix this by explicitly checking whether the vCPU IMSIC state has been
initialized before handling TOPEI CSR accesses. If not, forward the CSR
emulation to user space.

Fixes: db8b7e97d6 ("RISC-V: KVM: Add in-kernel virtualization of AIA IMSIC")
Signed-off-by: Jiakai Xu <xujiakai2025@iscas.ac.cn>
Signed-off-by: Jiakai Xu <jiakaiPeanut@gmail.com>
Reviewed-by: Nutty Liu <nutty.liu@hotmail.com>
Reviewed-by: Anup Patel <anup@brainfault.org>
Link: https://lore.kernel.org/r/20260226085119.643295-1-xujiakai2025@iscas.ac.cn
Signed-off-by: Anup Patel <anup@brainfault.org>
2026-03-06 11:20:30 +05:30
Jiakai Xu
dec9ed9944 RISC-V: KVM: Fix use-after-free in kvm_riscv_gstage_get_leaf()
While fuzzing KVM on RISC-V, a use-after-free was observed in
kvm_riscv_gstage_get_leaf(),  where ptep_get() dereferences a
freed gstage page table page during gfn unmap.

The crash manifests as:
  use-after-free in ptep_get include/linux/pgtable.h:340 [inline]
  use-after-free in kvm_riscv_gstage_get_leaf arch/riscv/kvm/gstage.c:89
  Call Trace:
    ptep_get include/linux/pgtable.h:340 [inline]
    kvm_riscv_gstage_get_leaf+0x2ea/0x358 arch/riscv/kvm/gstage.c:89
    kvm_riscv_gstage_unmap_range+0xf0/0x308 arch/riscv/kvm/gstage.c:265
    kvm_unmap_gfn_range+0x168/0x1fc arch/riscv/kvm/mmu.c:256
    kvm_mmu_unmap_gfn_range virt/kvm/kvm_main.c:724 [inline]
  page last free pid 808 tgid 808 stack trace:
    kvm_riscv_mmu_free_pgd+0x1b6/0x26a arch/riscv/kvm/mmu.c:457
    kvm_arch_flush_shadow_all+0x1a/0x24 arch/riscv/kvm/mmu.c:134
    kvm_flush_shadow_all virt/kvm/kvm_main.c:344 [inline]

The UAF is caused by gstage page table walks running concurrently with
gstage pgd teardown. In particular, kvm_unmap_gfn_range() can traverse
gstage page tables while kvm_arch_flush_shadow_all() frees the pgd,
leading to use-after-free of page table pages.

Fix the issue by serializing gstage unmap and pgd teardown with
kvm->mmu_lock. Holding mmu_lock ensures that gstage page tables
remain valid for the duration of unmap operations and prevents
concurrent frees.

This matches existing RISC-V KVM usage of mmu_lock to protect gstage
map/unmap operations, e.g. kvm_riscv_mmu_iounmap.

Fixes: dd82e35638 ("RISC-V: KVM: Factor-out g-stage page table management")
Signed-off-by: Jiakai Xu <xujiakai2025@iscas.ac.cn>
Signed-off-by: Jiakai Xu <jiakaiPeanut@gmail.com>
Reviewed-by: Anup Patel <anup@brainfault.org>
Link: https://lore.kernel.org/r/20260202040059.1801167-1-xujiakai2025@iscas.ac.cn
Signed-off-by: Anup Patel <anup@brainfault.org>
2026-03-06 11:20:30 +05:30
Lukas Gerlach
8565617a85 KVM: riscv: Fix Spectre-v1 in APLIC interrupt handling
Guests can control IRQ indices via MMIO. Sanitize them with
array_index_nospec() to prevent speculative out-of-bounds access
to the aplic->irqs[] array.

Similar to arm64 commit 41b87599c7 ("KVM: arm/arm64: vgic: fix possible
spectre-v1 in vgic_get_irq()") and x86 commit 8c86405f60 ("KVM: x86:
Protect ioapic_read_indirect() from Spectre-v1/L1TF attacks").

Fixes: 74967aa208 ("RISC-V: KVM: Add in-kernel emulation of AIA APLIC")
Signed-off-by: Lukas Gerlach <lukas.gerlach@cispa.de>
Reviewed-by: Nutty Liu <nutty.liu@hotmail.com>
Reviewed-by: Anup Patel <anup@brainfault.org>
Link: https://lore.kernel.org/r/20260116095731.24555-1-lukas.gerlach@cispa.de
Signed-off-by: Anup Patel <anup@brainfault.org>
2026-03-06 11:20:30 +05:30
Paulo Alcantara
048efe129a smb: client: fix oops due to uninitialised var in smb2_unlink()
If SMB2_open_init() or SMB2_close_init() fails (e.g. reconnect), the
iovs set @rqst will be left uninitialised, hence calling
SMB2_open_free(), SMB2_close_free() or smb2_set_related() on them will
oops.

Fix this by initialising @close_iov and @open_iov before setting them
in @rqst.

Reported-by: Thiago Becker <tbecker@redhat.com>
Fixes: 1cf9f2a6a5 ("smb: client: handle unlink(2) of files open by different clients")
Signed-off-by: Paulo Alcantara (Red Hat) <pc@manguebit.org>
Cc: David Howells <dhowells@redhat.com>
Cc: linux-cifs@vger.kernel.org
Cc: stable@vger.kernel.org
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-03-05 20:41:16 -06:00
Mieczyslaw Nalewaj
f76a93241d net: dsa: realtek: rtl8365mb: remove ifOutDiscards from rx_packets
rx_packets should report the number of frames successfully received:
unicast + multicast + broadcast. Subtracting ifOutDiscards (a TX
counter) is incorrect and can undercount RX packets. RX drops are
already reported via rx_dropped (e.g. etherStatsDropEvents), so
there is no need to adjust rx_packets.

This patch removes the subtraction of ifOutDiscards from rx_packets
in rtl8365mb_stats_update().

Link: https://lore.kernel.org/netdev/878777925.105015.1763423928520@mail.yahoo.com/
Fixes: 4af2950c50 ("net: dsa: realtek-smi: add rtl8365mb subdriver for RTL8365MB-VC")
Signed-off-by: Mieczyslaw Nalewaj <namiltd@yahoo.com>
Signed-off-by: Luiz Angelo Daros de Luca <luizluca@gmail.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Acked-by: Linus Walleij <linusw@kernel.org>
Link: https://patch.msgid.link/20260303-realtek_namiltd_fix2-v1-1-bfa433d3401e@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-05 18:27:04 -08:00
Benno Lossin
580cc37b1d rust: pin-init: internal: init: document load-bearing fact of field accessors
The functions `[Pin]Init::__[pinned_]init` and `ptr::write` called from
the `init!` macro require the passed pointer to be aligned. This fact is
ensured by the creation of field accessors to previously initialized
fields.

Since we missed this very important fact from the beginning [1],
document it in the code.

Link: https://rust-for-linux.zulipchat.com/#narrow/channel/561532-pin-init/topic/initialized.20field.20accessor.20detection/with/576210658 [1]
Fixes: 90e53c5e70 ("rust: add pin-init API core")
Cc: <stable@vger.kernel.org> # 6.6.y, 6.12.y: 42415d163e: rust: pin-init: add references to previously initialized fields
Cc: <stable@vger.kernel.org> # 6.6.y, 6.12.y, 6.18.y, 6.19.y
Signed-off-by: Benno Lossin <lossin@kernel.org>
Reviewed-by: Gary Guo <gary@garyguo.net>
Link: https://patch.msgid.link/20260302140424.4097655-2-lossin@kernel.org
[ Updated Cc: stable@ tags as discussed. - Miguel ]
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
2026-03-06 02:05:46 +01:00
Benno Lossin
a075082a15 rust: pin-init: internal: init: remove #[disable_initialized_field_access]
Gary noticed [1] that the initializer macros as well as the `[Pin]Init`
traits cannot support unaligned fields, since they use operations that
require aligned pointers. This means that any code using structs with
unaligned fields in pin-init is unsound.

By default, the `init!` macro generates references to initialized fields,
which makes the compiler check that those fields are aligned.  However,
we added the `#[disable_initialized_field_access]` attribute to avoid
this behavior in commit ceca298c53 ("rust: pin-init: internal: init:
add escape hatch for referencing initialized fields"). Thus remove the
`#[disable_initialized_field_access]` attribute from `init!`, which is
the only safe way to create an initializer handling unaligned fields.

If support for in-place initializing structs with unaligned fields is
required in the future, we could figure out a solution. This is tracked
in [2].

Reported-by: Gary Guo <gary@garyguo.net>
Closes: https://rust-for-linux.zulipchat.com/#narrow/channel/561532-pin-init/topic/initialized.20field.20accessor.20detection/with/576210658 [1]
Link: https://github.com/Rust-for-Linux/pin-init/issues/112 [2]
Fixes: ceca298c53 ("rust: pin-init: internal: init: add escape hatch for referencing initialized fields")
Signed-off-by: Benno Lossin <lossin@kernel.org>
Acked-by: Janne Grunau <j@jannau.net>
Reviewed-by: Gary Guo <gary@garyguo.net>
Reviewed-by: Alice Ryhl <aliceryhl@google.com>
Link: https://patch.msgid.link/20260302140424.4097655-1-lossin@kernel.org
[ Adjusted tags and reworded as discussed. - Miguel ]
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
2026-03-06 02:04:09 +01:00
Gary Guo
dda135077e rust: build: remap path to avoid absolute path
When building with an out directory (O=), absolute paths can end up in the
file name in `#[track_caller]` or the panic message. This is not desirable
as this leaks the exact path being used to build the kernel and means that
the same location can appear in two forms (relative or absolute).

This is reported by Asahi [1] and is being workaround in [2] previously to
force everything to be absolute path. Using absolute path for everything
solves the inconsistency, however it does not address the reproducibility
issue. So, fix this by remap all absolute paths to srctree to relative path
instead.

This is previously attempted in commit dbdffaf50f ("kbuild, rust: use
-fremap-path-prefix to make paths relative") but that was reverted as
remapping debug info causes some tool (e.g. objdump) to be unable to find
sources. Therefore, use `--remap-path-scope` to only remap macros but leave
debuginfo untouched. `--remap-path-scope` is only stable in Rust 1.95, so
use `rustc-option` to detect its presence. This feature has been available
as  `-Zremap-path-scope` for all versions that we support; however due to
bugs in the Rust compiler, it does not work reliably until 1.94. I opted to
not enable it for 1.94 as it's just a single version that we missed.

This change can be validated by building a kernel with O=, strip debug info
on vmlinux, and then check if the absolute path exists in `strings
vmlinux`, e.g. `strings vmlinux |grep \/home`.

Reported-by: Janne Grunau <j@jannau.net>
Reported-by: Asahi Lina <lina+kernel@asahilina.net>
Closes: https://rust-for-linux.zulipchat.com/#narrow/channel/288089-General/topic/Per-call-site.20data.20and.20lock.20class.20keys/near/572466559 [1]
Link: 54ab888788 [2]
Signed-off-by: Gary Guo <gary@garyguo.net>
Acked-by: Nicolas Schier <nsc@kernel.org> # kbuild
Link: https://patch.msgid.link/20260226152112.3222886-1-gary@kernel.org
[ Reworded for few typos. - Miguel ]
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
2026-03-06 02:00:00 +01:00
Gary Guo
e174dd14bf rust: kbuild: emit dep-info into $(depfile) directly
After commit 295d8398c6 ("kbuild: specify output names separately for
each emission type from rustc"), the preferred pattern is to ask rustc to
emit dependency information into $(depfile) directly, and after commit
2185242fad ("kbuild: remove sed commands after rustc rules"), the
post-processing to remove comments is no longer necessary as fixdep can
handle comments directly. Thus, emit dep-info into $(depfile) directly and
remove the mv and sed invocation.

This fixes the issue where a non-ignored .d file is emitted during
compilation and removed shortly afterwards.

[ Like Gary mentioned in Zulip, this likely happened due to rebasing
  the builds part of the old `syn` work I had. - Miguel ]

Reported-by: Onur Özkan <work@onurozkan.dev>
Closes: https://rust-for-linux.zulipchat.com/#narrow/channel/288089-General/topic/syn.20artifact.20being.20tracked.20by.20git/with/575467879
Fixes: 7dbe46c0b1 ("rust: kbuild: add proc macro library support")
Signed-off-by: Gary Guo <gary@garyguo.net>
Tested-by: Onur Özkan <work@onurozkan.dev>
Cc: stable@vger.kernel.org
Link: https://patch.msgid.link/20260224072957.214979-1-gary@garyguo.net
[ Reworded for a couple of typos. - Miguel ]
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
2026-03-06 01:43:03 +01:00
Krzysztof Kozlowski
4355b13d46 dt-bindings: display/msm: qcom,sm8750-mdss: Fix model typo
Fix obvious model typo (SM8650->SM8750) in the description.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
Fixes: 6b93840116 ("dt-bindings: display/msm: qcom,sm8750-mdss: Add SM8750")
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
Patchwork: https://patchwork.freedesktop.org/patch/707192/
Link: https://lore.kernel.org/r/20260225173419.125565-2-krzysztof.kozlowski@oss.qualcomm.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
2026-03-06 01:53:03 +02:00
Pengyu Luo
fd941c787c drm/msm/dsi/phy: fix hardware revision
The hardware revision for TSMC 3nm-based Qualcomm SOCs should be 7.2,
this can be confirmed from REG_DSI_7nm_PHY_CMN_REVISION_ID0, the value
is 0x27, which means hardware revision is 7.2

No functional change.

Fixes: 1337d7ebfb ("drm/msm/dsi/phy: Add support for SM8750")
Suggested-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
Signed-off-by: Pengyu Luo <mitltlatltl@gmail.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
Patchwork: https://patchwork.freedesktop.org/patch/707414/
Link: https://lore.kernel.org/r/20260226122958.22555-2-mitltlatltl@gmail.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
2026-03-06 01:53:03 +02:00
Abel Vesa
7403e87c13 dt-bindings: display: msm: Fix reg ranges and clocks on Glymur
The Glymur platform has four DisplayPort controllers. The hardware
supports four streams (MST) per controller. However, on Glymur the first
three controllers only have two streams wired to the display subsystem,
while the fourth controller operates in single-stream mode.

Add a dedicated clause for the Glymur compatible to require the register
ranges for all four stream blocks, while allowing either one pixel clock
(for the single-stream controller) or two pixel clocks (for the remaining
controllers).

Update the Glymur MDSS schema example by adding the missing p2, p3,
mst2link and mst3link register blocks. Without these, the bindings
validation fails. Also replace the made-up register addresses with the
actual addresses from the first controller to match the SoC devicetree
description.

Cc: stable@vger.kernel.org # v6.19
Fixes: 8f63bf9082 ("dt-bindings: display: msm: Document the Glymur DiplayPort controller")
Fixes: 1aee577bbc ("dt-bindings: display: msm: Document the Glymur Mobile Display SubSystem")
Signed-off-by: Abel Vesa <abel.vesa@oss.qualcomm.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
Patchwork: https://patchwork.freedesktop.org/patch/708518/
Link: https://lore.kernel.org/r/20260303-glymur-fix-dp-bindings-reg-clocks-v4-1-1ebd9c7c2cee@oss.qualcomm.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
2026-03-06 01:53:03 +02:00
Abhinav Kumar
4ce71cea57 drm/msm/dpu: Correct the SA8775P intr_underrun/intr_underrun index
The intr_underrun and intr_vsync indices have been swapped, just simply
corrects them.

Cc: stable@vger.kernel.org
Fixes: b139c80d18 ("drm/msm/dpu: Add SA8775P support")
Signed-off-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Signed-off-by: Yongxing Mou <yongxing.mou@oss.qualcomm.com>
Reviewed-by: Konrad Dybcio <konrad.dybcio@oss.qualcomm.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
Patchwork: https://patchwork.freedesktop.org/patch/709209/
Link: https://lore.kernel.org/r/20260305-mdss_catalog-v5-2-06678ac39ac7@oss.qualcomm.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
2026-03-06 01:53:03 +02:00
Akhil P Oommen
7e459c4126 drm/msm/a8xx: Fix ubwc config related to swizzling
To disable l2/l3 swizzling in A8x, set the respective bits in both
GRAS_NC_MODE_CNTL and RB_CCU_NC_MODE_CNTL registers. This is required
for Glymur where it is recommended to keep l2/l3 swizzling disabled.

Fixes: 288a932008 ("drm/msm/adreno: Introduce A8x GPU Support")
Signed-off-by: Akhil P Oommen <akhilpo@oss.qualcomm.com>
Message-ID: <20260305-a8xx-ubwc-fix-v1-1-d99b6da4c5a9@oss.qualcomm.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
Signed-off-by: Rob Clark <robin.clark@oss.qualcomm.com>
2026-03-05 13:49:50 -08:00
Rob Herring (Arm)
021f1b77f7 accel: ethosu: Handle possible underflow in IFM size calculations
If the command stream has larger padding sizes than the IFM and OFM
diminsions, then the calculations will underflow to a negative value.
The result is a very large region bounds which is caught on submit, but
it's better to catch it earlier.

Current mesa ethosu driver has a signedness bug which resulted in
padding of 127 (the max) and triggers this issue.

Reviewed-and-Tested-by: Anders Roxell <anders.roxell@linaro.org>
Link: https://patch.msgid.link/20260218-ethos-fixes-v1-3-be3fa3ea9a30@kernel.org
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
2026-03-05 15:21:17 -06:00
Rob Herring (Arm)
838ae99f9a accel: ethosu: Fix NPU_OP_ELEMENTWISE validation with scalar
The NPU_OP_ELEMENTWISE instruction uses a scalar value for IFM2 if the
IFM2_BROADCAST "scalar" mode is set. It is a bit (7) on the u65 and
part of a field (bits 3:0) on the u85. The driver was hardcoded to the
u85.

Fixes: 5a5e9c0228 ("accel: Add Arm Ethos-U NPU driver")
Reviewed-and-Tested-by: Anders Roxell <anders.roxell@linaro.org>
Link: https://patch.msgid.link/20260218-ethos-fixes-v1-2-be3fa3ea9a30@kernel.org
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
2026-03-05 15:21:17 -06:00
Rob Herring (Arm)
150bceb3e0 accel: ethosu: Fix job submit error clean-up refcount underflows
If the job submit fails before adding the job to the scheduler queue
such as when the GEM buffer bounds checks fail, then doing a
ethosu_job_put() results in a pm_runtime_put_autosuspend() without the
corresponding pm_runtime_resume_and_get(). The dma_fence_put()'s are
also unnecessary, but seem to be harmless.

Split the ethosu_job_cleanup() function into 2 parts for the before
and after the job is queued.

Fixes: 5a5e9c0228 ("accel: Add Arm Ethos-U NPU driver")
Reviewed-and-Tested-by: Anders Roxell <anders.roxell@linaro.org>
Link: https://patch.msgid.link/20260218-ethos-fixes-v1-1-be3fa3ea9a30@kernel.org
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
2026-03-05 15:21:17 -06:00
Paul E. McKenney
78c2ce0fd6 scftorture: Update due to x86 not supporting none/voluntary preemption
As of v7.0-rc1, architectures that support preemption, including x86 and
arm64, no longer support CONFIG_PREEMPT_NONE or CONFIG_PREEMPT_VOLUNTARY.
Attempting to build kernels with these two Kconfig options results in
.config errors.  This commit therefore switches such scftorture scenarios
to CONFIG_PREEMPT_LAZY.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Boqun Feng <boqun@kernel.org>
Link: https://patch.msgid.link/20260303235903.1967409-4-paulmck@kernel.org
2026-03-05 13:11:10 -08:00
Paul E. McKenney
3c6ddb58f6 refscale: Update due to x86 not supporting none/voluntary preemption
As of v7.0-rc1, architectures that support preemption, including x86 and
arm64, no longer support CONFIG_PREEMPT_NONE or CONFIG_PREEMPT_VOLUNTARY.
Attempting to build kernels with these two Kconfig options results in
.config errors.  This commit therefore switches such refscale scenarios
to CONFIG_PREEMPT_LAZY.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Boqun Feng <boqun@kernel.org>
Link: https://patch.msgid.link/20260303235903.1967409-3-paulmck@kernel.org
2026-03-05 13:11:07 -08:00
Paul E. McKenney
59af2d5652 rcuscale: Update due to x86 not supporting none/voluntary preemption
As of v7.0-rc1, architectures that support preemption, including x86 and
arm64, no longer support CONFIG_PREEMPT_NONE or CONFIG_PREEMPT_VOLUNTARY.
Attempting to build kernels with these two Kconfig options results in
.config errors.  This commit therefore switches such rcuscale scenarios
to CONFIG_PREEMPT_LAZY.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Boqun Feng <boqun@kernel.org>
Link: https://patch.msgid.link/20260303235903.1967409-2-paulmck@kernel.org
2026-03-05 13:11:03 -08:00
Paul E. McKenney
f2fa6cc736 rcutorture: Update due to x86 not supporting none/voluntary preemption
As of v7.0-rc1, architectures that support preemption, including x86 and
arm64, no longer support CONFIG_PREEMPT_NONE or CONFIG_PREEMPT_VOLUNTARY.
Attempting to build kernels with these two Kconfig options results in
.config errors.  This commit therefore switches such rcutorture scenarios
to CONFIG_PREEMPT_LAZY.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Reviewed-by: Joel Fernandes <joelagnelf@nvidia.com>
Signed-off-by: Boqun Feng <boqun@kernel.org>
Link: https://patch.msgid.link/bfe89f6c-3b63-40c6-aa6d-5f523e3e9a31@paulmck-laptop
2026-03-05 13:07:38 -08:00
Arnaldo Carvalho de Melo
0693907ffa tools headers UAPI: Update tools' copy of linux/coresight-pmu.h
To get the comment changes in this commit:

  171efc7009 ("x86/ibs: Fix typo in dc_l2tlb_miss comment")

This silences this perf build warning:

  Warning: Kernel ABI header differences:
    diff -u tools/arch/x86/include/asm/amd/ibs.h arch/x86/include/asm/amd/ibs.h

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2026-03-05 17:28:44 -03:00
Arnaldo Carvalho de Melo
c9d77f0a0c tools headers: Update the syscall tables and unistd.h, to support the new 'rseq_slice_yield' syscall
Picking up the changes from these csets:

  2153b2e891 ("sparc: Add architecture support for clone3")
  99d2592023 ("rseq: Implement sys_rseq_slice_yield()")
  4ac286c4a8 ("s390/syscalls: Switch to generic system call table generation")

This makes 'perf trace' support it, now its possible, for instance, to
do:

  # perf trace -e rseq_slice_yield --max-stack=16

Here is an example with the 'sendmmsg' syscall:

  root@x1:~# perf trace -e sendmmsg --max-stack 16 --max-events=1
       0.000 ( 0.062 ms): dbus-broker/1012 sendmmsg(fd: 150, mmsg: 0x7ffef57cca50, vlen: 1, flags: DONTWAIT|NOSIGNAL) = 1
                                         syscall_exit_to_user_mode_prepare ([kernel.kallsyms])
                                         syscall_exit_to_user_mode_prepare ([kernel.kallsyms])
                                         syscall_exit_to_user_mode ([kernel.kallsyms])
                                         do_syscall_64 ([kernel.kallsyms])
                                         entry_SYSCALL_64 ([kernel.kallsyms])
                                         [0x117ce7] (/usr/lib64/libc.so.6 (deleted))
  root@x1:~#

To do a system wide tracing of the new 'rseq_slice_yield' syscall with a
backtrace of at most 16 entries.

This addresses these perf tools build warnings:

  Warning: Kernel ABI header differences:
    diff -u tools/include/uapi/asm-generic/unistd.h include/uapi/asm-generic/unistd.h
    diff -u tools/scripts/syscall.tbl scripts/syscall.tbl
    diff -u tools/perf/arch/x86/entry/syscalls/syscall_32.tbl arch/x86/entry/syscalls/syscall_32.tbl
    diff -u tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl
    diff -u tools/perf/arch/powerpc/entry/syscalls/syscall.tbl arch/powerpc/kernel/syscalls/syscall.tbl
    diff -u tools/perf/arch/s390/entry/syscalls/syscall.tbl arch/s390/kernel/syscalls/syscall.tbl
    diff -u tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl arch/mips/kernel/syscalls/syscall_n64.tbl
    diff -u tools/perf/arch/arm/entry/syscalls/syscall.tbl arch/arm/tools/syscall.tbl
    diff -u tools/perf/arch/sh/entry/syscalls/syscall.tbl arch/sh/kernel/syscalls/syscall.tbl
    diff -u tools/perf/arch/sparc/entry/syscalls/syscall.tbl arch/sparc/kernel/syscalls/syscall.tbl
    diff -u tools/perf/arch/xtensa/entry/syscalls/syscall.tbl arch/xtensa/kernel/syscalls/syscall.tbl

Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Ludwig Rydberg <ludwig.rydberg@gaisler.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2026-03-05 17:20:23 -03:00
Linus Torvalds
5ee8dbf546 Merge tag 'fsverity-for-linus' of git://git.kernel.org/pub/scm/fs/fsverity/linux
Pull fsverity fix from Eric Biggers:
 "Prevent CONFIG_FS_VERITY from being enabled when the page size is
  256K, since it doesn't work in that case"

* tag 'fsverity-for-linus' of git://git.kernel.org/pub/scm/fs/fsverity/linux:
  fsverity: add dependency on 64K or smaller pages
2026-03-05 11:52:03 -08:00
Peter Collingbourne
b3ce769203 perf disasm: Fix off-by-one bug in outside check
If a branch target points to one past the end of a function, the branch
should be treated as a branch to another function.

This can happen e.g. with a tail call to a function that is laid out
immediately after the caller.

Fixes: 751b1783da ("perf annotate: Mark jumps to outher functions with the call arrow")
Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Peter Collingbourne <pcc@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Bill Wendling <morbo@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@linaro.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Justin Stitt <justinstitt@google.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Nick Desaulniers <nick.desaulniers+lkml@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://linux-review.googlesource.com/id/Ide471112e82d68177e0faf08ca411d9fcf0a7bdf
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2026-03-05 16:51:09 -03:00
Arnaldo Carvalho de Melo
1e972ec76e tools arch x86: Sync msr-index.h to pick MSR_{OMR_[0-3],CORE_PERF_GLOBAL_STATUS_SET}
To pick up the changes in:

  4e955c08d6 ("perf/x86/intel: Support the 4 new OMR MSRs introduced in DMR and NVL")
  736a2dcfda ("x86/CPU/AMD: Simplify the spectral chicken fix")
  56bb273697 ("KVM: x86/pmu: Load/put mediated PMU context when entering/exiting guest")

Addressing this tools/perf build warning:

  Warning: Kernel ABI header differences:
    diff -u tools/arch/x86/include/asm/msr-index.h arch/x86/include/asm/msr-index.h

That makes the beautification scripts to pick some new entries:

  $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > before.txt
  $ cp arch/x86/include/asm/msr-index.h tools/arch/x86/include/asm/msr-index.h
  $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > after.txt
  $ diff -u before.txt after.txt
  --- before.txt	2026-03-04 17:21:39.165956041 -0300
  +++ after.txt	2026-03-04 17:21:52.479191640 -0300
  @@ -130,6 +130,11 @@
   	[0x0000038e] = "CORE_PERF_GLOBAL_STATUS",
   	[0x0000038f] = "CORE_PERF_GLOBAL_CTRL",
   	[0x00000390] = "CORE_PERF_GLOBAL_OVF_CTRL",
  +	[0x00000391] = "CORE_PERF_GLOBAL_STATUS_SET",
  +	[0x000003e0] = "OMR_0",
  +	[0x000003e1] = "OMR_1",
  +	[0x000003e2] = "OMR_2",
  +	[0x000003e3] = "OMR_3",
   	[0x000003f1] = "IA32_PEBS_ENABLE",
   	[0x000003f2] = "PEBS_DATA_CFG",
   	[0x000003f4] = "IA32_PEBS_BASE",
  $

Now one can use those strings in 'perf trace' to do filtering, e.g.:

  # perf trace -e msr:*_msr/max-stack=32/ --filter="msr==CORE_PERF_GLOBAL_STATUS_SET"

Cc: Borislav Petkov (AMD) <bp@alien8.de>
Cc: Dapeng Mi <dapeng1.mi@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sean Christopherson <seanjc@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2026-03-05 16:51:09 -03:00
Linus Torvalds
6a42ff33f3 Merge tag 'libcrypto-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux
Pull crypto library fixes from Eric Biggers:

 - Several test fixes:

    - Fix flakiness in the interrupt context tests in certain VMs

    - Make the lib/crypto/ KUnit tests depend on the corresponding
      library options rather than selecting them. This follows the
      standard KUnit convention, and it fixes an issue where enabling
      CONFIG_KUNIT_ALL_TESTS pulled in all the crypto library code

    - Add a kunitconfig file for lib/crypto/

    - Fix a couple stale references to "aes-generic" that made it in
      concurrently with the rename to "aes-lib"

 - Update the help text for several CRYPTO kconfig options to remove
   outdated information about users that now use the library instead

* tag 'libcrypto-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux:
  crypto: testmgr - Fix stale references to aes-generic
  crypto: Clean up help text for CRYPTO_CRC32
  crypto: Clean up help text for CRYPTO_CRC32C
  crypto: Clean up help text for CRYPTO_XXHASH
  crypto: Clean up help text for CRYPTO_SHA256
  crypto: Clean up help text for CRYPTO_BLAKE2B
  lib/crypto: tests: Add a .kunitconfig file
  lib/crypto: tests: Depend on library options rather than selecting them
  kunit: irq: Ensure timer doesn't fire too frequently
2026-03-05 11:49:05 -08:00
Linus Torvalds
398871616f Merge tag 'acpi-7.0-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull ACPI support fixes from Rafael Wysocki:

 - Revert a commit related to ACPI device power management that was
   not supposed to make any functional difference, but it did so and
   introduced a regression (Rafael Wysocki)

 - Update the _CPC object definition in ACPICA to match ACPI 6.6 and
   prevent the kernel from printing a false-positive warning regarding
   _CPC output package format on platforms shipping with firmware based
   on ACPI 6.6 (Saket Dumbre)

* tag 'acpi-7.0-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
  Revert "ACPI: PM: Let acpi_dev_pm_attach() skip devices without ACPI PM"
  ACPICA: Update the _CPC definition to match ACPI 6.6
2026-03-05 11:37:27 -08:00
Linus Torvalds
abacaf5599 Merge tag 'net-7.0-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Jakub Kicinski:
 "Including fixes from CAN, netfilter and wireless.

  Current release - new code bugs:

   - sched: cake: fixup cake_mq rate adjustment for diffserv config

   - wifi: fix missing ieee80211_eml_params member initialization

  Previous releases - regressions:

   - tcp: give up on stronger sk_rcvbuf checks (for now)

  Previous releases - always broken:

   - net: fix rcu_tasks stall in threaded busypoll

   - sched:
      - fq: clear q->band_pkt_count[] in fq_reset()
      - only allow act_ct to bind to clsact/ingress qdiscs and shared
        blocks

   - bridge: check relevant per-VLAN options in VLAN range grouping

   - xsk: fix fragment node deletion to prevent buffer leak

  Misc:

   - spring cleanup of inactive maintainers"

* tag 'net-7.0-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (138 commits)
  xdp: produce a warning when calculated tailroom is negative
  net: enetc: use truesize as XDP RxQ info frag_size
  libeth, idpf: use truesize as XDP RxQ info frag_size
  i40e: use xdp.frame_sz as XDP RxQ info frag_size
  i40e: fix registering XDP RxQ info
  ice: change XDP RxQ frag_size from DMA write length to xdp.frame_sz
  ice: fix rxq info registering in mbuf packets
  xsk: introduce helper to determine rxq->frag_size
  xdp: use modulo operation to calculate XDP frag tailroom
  selftests/tc-testing: Add tests exercising act_ife metalist replace behaviour
  net/sched: act_ife: Fix metalist update behavior
  selftests: net: add test for IPv4 route with loopback IPv6 nexthop
  net: ipv6: fix panic when IPv4 route references loopback IPv6 nexthop
  net: vxlan: fix nd_tbl NULL dereference when IPv6 is disabled
  net: bridge: fix nd_tbl NULL dereference when IPv6 is disabled
  MAINTAINERS: remove Thomas Falcon from IBM ibmvnic
  MAINTAINERS: remove Claudiu Manoil and Alexandre Belloni from Ocelot switch
  MAINTAINERS: replace Taras Chornyi with Elad Nachman for Marvell Prestera
  MAINTAINERS: remove Jonathan Lemon from OpenCompute PTP
  MAINTAINERS: replace Clark Wang with Frank Li for Freescale FEC
  ...
2026-03-05 11:00:46 -08:00
Rafael J. Wysocki
084f843093 Merge branch 'acpica'
Merge a fix updating the _CPC object definition in ACPICA to avoid
printing a false-positive output package format warning on new
platforms (Saket Dumbre)

* acpica:
  ACPICA: Update the _CPC definition to match ACPI 6.6
2026-03-05 18:46:43 +01:00
Dave Hansen
9f2c7349b2 MAINTAINERS: Orphan Altera PCIe controller driver
Email to Joyce Ooi <joyce.ooi@intel.com> now bounces.  Remove the address
and mark the Altera PCIe controller driver as an orphan for now.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Link: https://patch.msgid.link/20260305171852.3114177-1-dave.hansen@linux.intel.com
2026-03-05 11:34:21 -06:00
Breno Leitao
9e83d5104a workqueue: Add stall detector sample module
Add a sample module under samples/workqueue/stall_detector/ that
reproduces a workqueue stall caused by PF_WQ_WORKER misuse.  The
module queues two work items on the same per-CPU pool, then clears
PF_WQ_WORKER and sleeps in wait_event_idle(), hiding from the
concurrency manager and stalling the second work item indefinitely.

This is useful for testing the workqueue watchdog stall diagnostics.

Signed-off-by: Breno Leitao <leitao@debian.org>
Acked-by: Song Liu <song@kernel.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-03-05 07:30:49 -10:00
Breno Leitao
8823eaef45 workqueue: Show all busy workers in stall diagnostics
show_cpu_pool_hog() only prints workers whose task is currently running
on the CPU (task_is_running()).  This misses workers that are busy
processing a work item but are sleeping or blocked — for example, a
worker that clears PF_WQ_WORKER and enters wait_event_idle().  Such a
worker still occupies a pool slot and prevents progress, yet produces
an empty backtrace section in the watchdog output.

This is happening on real arm64 systems, where
toggle_allocation_gate() IPIs every single CPU in the machine (which
lacks NMI), causing workqueue stalls that show empty backtraces because
toggle_allocation_gate() is sleeping in wait_event_idle().

Remove the task_is_running() filter so every in-flight worker in the
pool's busy_hash is dumped.  The busy_hash is protected by pool->lock,
which is already held.

Signed-off-by: Breno Leitao <leitao@debian.org>
Acked-by: Song Liu <song@kernel.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-03-05 07:30:11 -10:00
Breno Leitao
e8e14ac7cf workqueue: Show in-flight work item duration in stall diagnostics
When diagnosing workqueue stalls, knowing how long each in-flight work
item has been executing is valuable. Add a current_start timestamp
(jiffies) to struct worker, set it when a work item begins execution in
process_one_work(), and print the elapsed wall-clock time in show_pwq().

Unlike current_at (which tracks CPU runtime and resets on wakeup for
CPU-intensive detection), current_start is never reset because the
diagnostic cares about total wall-clock time including sleeps.

Before: in-flight: 165:stall_work_fn [wq_stall]
After:  in-flight: 165:stall_work_fn [wq_stall] for 100s

Signed-off-by: Breno Leitao <leitao@debian.org>
Acked-by: Song Liu <song@kernel.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-03-05 07:27:48 -10:00
Breno Leitao
6037160e52 workqueue: Rename pool->watchdog_ts to pool->last_progress_ts
The watchdog_ts name doesn't convey what the timestamp actually tracks.
This field tracks the last time a workqueue got progress.

Rename it to last_progress_ts to make it clear that it records when the
pool last made forward progress (started processing new work items).

No functional change.

Signed-off-by: Breno Leitao <leitao@debian.org>
Acked-by: Song Liu <song@kernel.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-03-05 07:26:59 -10:00
Breno Leitao
f42f9091be workqueue: Use POOL_BH instead of WQ_BH when checking pool flags
pr_cont_worker_id() checks pool->flags against WQ_BH, which is a
workqueue-level flag (defined in workqueue.h). Pool flags use a
separate namespace with POOL_* constants (defined in workqueue.c).
The correct constant is POOL_BH. Both WQ_BH and POOL_BH are defined
as (1 << 0) so this has no behavioral impact, but it is semantically
wrong and inconsistent with every other pool-level BH check in the
file.

Fixes: 4cb1ef6460 ("workqueue: Implement BH workqueues to eventually replace tasklets")
Signed-off-by: Breno Leitao <leitao@debian.org>
Acked-by: Song Liu <song@kernel.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-03-05 07:26:50 -10:00
Lizhi Hou
d5b8b0347f accel/amdxdna: Split mailbox channel create function
The management channel used for firmware control command submission is
currently created after the firmware is started. If channel creation
fails (for example, due to memory allocation failure or workqueue
creation interruption), the firmware remains in a pending state and is
unable to receive any control commands.

To avoid leaving the firmware in this inconsistent state, split
xdna_mailbox_create_channel() into two separate functions so that
resource allocation can be completed before interacting with the
hardware.
  xdna_mailbox_alloc_channel()
    Allocates memory and initializes the workqueue. This can be called
    earlier, before interacting with the hardware.
  xdna_mailbox_start_channel()
    Performs the hardware interaction required to start the channel.

Rename xdna_mailbox_destroy_channel() to xdna_mailbox_free_channel().
Ensure that xdna_mailbox_stop_channel() and xdna_mailbox_free_channel()
properly unwind the corresponding start and allocation steps, respectively.

Fixes: b87f920b93 ("accel/amdxdna: Support hardware mailbox")
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Link: https://patch.msgid.link/20260305062041.3954024-1-lizhi.hou@amd.com
2026-03-05 09:24:33 -08:00
Peng Fan
97e4567d39 remoteproc: imx_rproc: Fix unreachable platform prepare_ops
Smatch reports unreachable code in imx_rproc_prepare(), where an early
return inside the reserved-memory parsing loop prevents platform
prepare_ops from being executed.

When of_reserved_mem_region_to_resource() fails, imx_rproc_prepare()
returns immediately, so the platform-specific prepare callback is never
called. As a result, prepare_ops such as imx_rproc_sm_lmm_prepare() on
i.MX95 have no chance to run.

This is problematic when Linux controls the M7 Logical Machine and is
responsible for preparing resources such as TCM. Without running the
platform prepare callback, loading the M7 ELF into TCM may fail if the
bootloader did not power up and initialize TCM.

Fix this by breaking out of the reserved-memory loop instead of
returning, allowing the platform prepare_ops to be executed as intended.

Fixes: edd2a99560 ("remoteproc: imx_rproc: Introduce prepare ops for imx_rproc_dcfg")
Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
Closes: https://lore.kernel.org/linux-remoteproc/aYYXAa2Fj36XG4yQ@p14s/T/#t
Signed-off-by: Peng Fan <peng.fan@nxp.com>
Reviewed-by: Daniel Baluta <daniel.baluta@nxp.com>
Link: https://lore.kernel.org/r/20260208-imx-rproc-fix-v1-1-ad74555eb9a4@nxp.com
Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
2026-03-05 10:18:23 -07:00
Tzung-Bi Shih
35c3f72a2d remoteproc: mediatek: Unprepare SCP clock during system suspend
Prior to commit d935187cfb ("remoteproc: mediatek: Break lock
dependency to prepare_lock"), `scp->clk` was prepared and enabled only
when it needs to communicate with the SCP.  The commit d935187cfb
moved the prepare operation to remoteproc's prepare(), keeping the clock
prepared as long as the SCP is running.

The power consumption due to the prolonged clock preparation can be
negligible when the system is running, as SCP is designed to be a very
power efficient processor.

However, the clock remains prepared even when the system enters system
suspend.  This prevents the underlying clock controller (and potentially
the parent PLLs) from shutting down, which increases power consumption
and may block the system from entering deep sleep states.

Add suspend and resume callbacks.  Unprepare the clock in suspend() if
it was active and re-prepare it in resume() to ensure the clock is
properly disabled during system suspend, while maintaining the "always
prepared" semantics while the system is active.  The driver doesn't
implement .attach() callback, hence it only checks for RPROC_RUNNING.

Fixes: d935187cfb ("remoteproc: mediatek: Break lock dependency to prepare_lock")
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Signed-off-by: Tzung-Bi Shih <tzungbi@kernel.org>
Link: https://lore.kernel.org/r/20260206033034.3031781-1-tzungbi@kernel.org
Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
2026-03-05 10:14:36 -07:00
Akash Goel
76e8173ba9 drm/panthor: Correct the order of arguments passed to gem_sync
This commit corrects the order of arguments passed to panthor_gem_sync()
function, called when the SYNC_WAIT condition has to be evaluated for a
blocked GPU queue.

Fixes: cd2c9c3015 ("drm/panthor: Add flag to map GEM object Write-Back Cacheable")
Signed-off-by: Akash Goel <akash.goel@arm.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Liviu Dudau <liviu.dudau@arm.com>
Link: https://patch.msgid.link/20260305110723.2871733-1-akash.goel@arm.com
Signed-off-by: Liviu Dudau <liviu.dudau@arm.com>
2026-03-05 16:53:09 +00:00
Helge Deller
e31a374a99 fbdev: au1100fb: Fix build on MIPS64
Fix an error reported by the kernel test robot:
 au1100fb.c: error: implicit declaration of function 'KSEG1ADDR'; did you mean 'CKSEG1ADDR'?

arch/mips/include/asm/addrspace.h defines KSEG1ADDR only for 32 bit
configurations. So provide its compile-test stub also for 64bit mips builds.

Fixes: 6f366e8648 ("fbdev: au1100fb: Make driver compilable on non-mips platforms")
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202603042127.PT6LuKqi-lkp@intel.com/
Signed-off-by: Helge Deller <deller@gmx.de>
Acked-by: Uwe Kleine-König <u.kleine-koenig@baylibre.com>
2026-03-05 17:35:12 +01:00
Fuad Tabba
e07fc9e2da KVM: arm64: Fix page leak in user_mem_abort() on atomic fault
When a guest performs an atomic/exclusive operation on memory lacking
the required attributes, user_mem_abort() injects a data abort and
returns early. However, it fails to release the reference to the
host page acquired via __kvm_faultin_pfn().

A malicious guest could repeatedly trigger this fault, leaking host
page references and eventually causing host memory exhaustion (OOM).

Fix this by consolidating the early error returns to a new out_put_page
label that correctly calls kvm_release_page_unused().

Fixes: 2937aeec9d ("KVM: arm64: Handle DABT caused by LS64* instructions on unsupported memory")
Signed-off-by: Fuad Tabba <tabba@google.com>
Reviewed-by: Yuan Yao <yaoyuan@linux.alibaba.com>
Link: https://patch.msgid.link/20260304162222.836152-2-tabba@google.com
Signed-off-by: Marc Zyngier <maz@kernel.org>
2026-03-05 16:23:30 +00:00
Takashi Iwai
8457669db9 Merge tag 'asoc-fix-v7.0-rc2' of https://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-linus
ASoC: Fixes for v7.0

A moderately large pile of fixes, though none of them are  super major,
plus a few new quirks and device IDs.
2026-03-05 17:22:14 +01:00
Andrea Righi
70f54f61a3 sched_ext: Document task ownership state machine
The task ownership state machine in sched_ext is quite hard to follow
from the code alone. The interaction of ownership states, memory
ordering rules and cross-CPU "lock dancing" makes the overall model
subtle.

Extend the documentation next to scx_ops_state to provide a more
structured and self-contained description of the state transitions and
their synchronization rules.

The new reference should make the code easier to reason about and
maintain and can help future contributors understand the overall
task-ownership workflow.

Signed-off-by: Andrea Righi <arighi@nvidia.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-03-05 06:21:06 -10:00
zhidao su
0927780c90 sched_ext: Use READ_ONCE() for lock-free reads of module param variables
bypass_lb_cpu() reads scx_bypass_lb_intv_us and scx_slice_bypass_us
without holding any lock, in timer callback context where module
parameter writes via sysfs can happen concurrently:

    min_delta_us = scx_bypass_lb_intv_us / SCX_BYPASS_LB_MIN_DELTA_DIV;
                   ^^^^^^^^^^^^^^^^^^^^
                   plain read -- KCSAN data race

    if (delta < DIV_ROUND_UP(min_delta_us, scx_slice_bypass_us))
                                           ^^^^^^^^^^^^^^^^^
                                           plain read -- KCSAN data race

scx_bypass_lb_intv_us already uses READ_ONCE() in scx_bypass_lb_timerfn()
and scx_bypass() for its other lock-free read sites, leaving
bypass_lb_cpu() inconsistent. scx_slice_bypass_us has the same
lock-free access pattern in the same function.

Fix both plain reads by using READ_ONCE() to complete the concurrent
access annotation and make the code KCSAN-clean.

Signed-off-by: zhidao su <suzhidao@xiaomi.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-03-05 06:05:15 -10:00
Linus Torvalds
18ecff396c Merge tag 'trace-v7.0-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace
Pull tracing fixes from Steven Rostedt:

 - Fix thresh_return of function graph tracer

   The update to store data on the shadow stack removed the abuse of
   using the task recursion word as a way to keep track of what
   functions to ignore. The trace_graph_return() was updated to handle
   this, but when function_graph tracer is using a threshold (only trace
   functions that took longer than a specified time), it uses
   trace_graph_thresh_return() instead.

   This function was still incorrectly using the task struct recursion
   word causing the function graph tracer to permanently set all
   functions to "notrace"

 - Fix thresh_return nosleep accounting

   When the calltime was moved to the shadow stack storage instead of
   being on the fgraph descriptor, the calculations for the amount of
   sleep time was updated. The calculation was done in the
   trace_graph_thresh_return() function, which also called the
   trace_graph_return(), which did the calculation again, causing the
   time to be doubled.

   Remove the call to trace_graph_return() as what it needed to do
   wasn't that much, and just do the work in
   trace_graph_thresh_return().

 - Fix syscall trace event activation on boot up

   The syscall trace events are pseudo events attached to the
   raw_syscall tracepoints. When the first syscall event is enabled, it
   enables the raw_syscall tracepoint and doesn't need to do anything
   when a second syscall event is also enabled.

   When events are enabled via the kernel command line, syscall events
   are partially enabled as the enabling is called before rcu_init. This
   is due to allow early events to be enabled immediately. Because
   kernel command line events do not distinguish between different types
   of events, the syscall events are enabled here but are not fully
   functioning. After rcu_init, they are disabled and re-enabled so that
   they can be fully enabled.

   The problem happened is that this "disable-enable" is done one at a
   time. If more than one syscall event is specified on the command
   line, by disabling them one at a time, the counter never gets to
   zero, and the raw_syscall is not disabled and enabled, keeping the
   syscall events in their non-fully functional state.

   Instead, disable all events and re-enabled them all, as that will
   ensure the raw_syscall event is also disabled and re-enabled.

 - Disable preemption in ftrace pid filtering

   The ftrace pid filtering attaches to the fork and exit tracepoints to
   add or remove pids that should be traced. They access variables
   protected by RCU (preemption disabled). Now that tracepoint callbacks
   are called with preemption enabled, this protection needs to be added
   explicitly, and not depend on the functions being called with
   preemption disabled.

 - Disable preemption in event pid filtering

   The event pid filtering needs the same preemption disabling guards as
   ftrace pid filtering.

 - Fix accounting of the memory mapped ring buffer on fork

   Memory mapping the ftrace ring buffer sets the vm_flags to DONTCOPY.
   But this does not prevent the application from calling
   madvise(MADVISE_DOFORK). This causes the mapping to be copied on
   fork. After the first tasks exits, the mapping is considered unmapped
   by everyone. But when he second task exits, the counter goes below
   zero and triggers a WARN_ON.

   Since nothing prevents two separate tasks from mmapping the ftrace
   ring buffer (although two mappings may mess each other up), there's
   no reason to stop the memory from being copied on fork.

   Update the vm_operations to have an ".open" handler to update the
   accounting and let the ring buffer know someone else has it mapped.

 - Add all ftrace headers in MAINTAINERS file

   The MAINTAINERS file only specifies include/linux/ftrace.h But misses
   ftrace_irq.h and ftrace_regs.h. Make the file use wildcards to get
   all *ftrace* files.

* tag 'trace-v7.0-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace:
  ftrace: Add MAINTAINERS entries for all ftrace headers
  tracing: Fix WARN_ON in tracing_buffers_mmap_close
  tracing: Disable preemption in the tracepoint callbacks handling filtered pids
  ftrace: Disable preemption in the tracepoint callbacks handling filtered pids
  tracing: Fix syscall events activation by ensuring refcount hits zero
  fgraph: Fix thresh_return nosleeptime double-adjust
  fgraph: Fix thresh_return clear per-task notrace
2026-03-05 08:05:05 -08:00
Jakub Kicinski
cf440e5b40 Merge branch 'Address-XDP-frags-having-negative-tailroom'
Larysa Zaremba says:

====================
Address XDP frags having negative tailroom

Aside from the issue described below, tailroom calculation does not account
for pages being split between frags, e.g. in i40e, enetc and
AF_XDP ZC with smaller chunks. These series address the problem by
calculating modulo (skb_frag_off() % rxq->frag_size) in order to get
data offset within a smaller block of memory. Please note, xskxceiver
tail grow test passes without modulo e.g. in xdpdrv mode on i40e,
because there is not enough descriptors to get to flipped buffers.

Many ethernet drivers report xdp Rx queue frag size as being the same as
DMA write size. However, the only user of this field, namely
bpf_xdp_frags_increase_tail(), clearly expects a truesize.

Such difference leads to unspecific memory corruption issues under certain
circumstances, e.g. in ixgbevf maximum DMA write size is 3 KB, so when
running xskxceiver's XDP_ADJUST_TAIL_GROW_MULTI_BUFF, 6K packet fully uses
all DMA-writable space in 2 buffers. This would be fine, if only
rxq->frag_size was properly set to 4K, but value of 3K results in a
negative tailroom, because there is a non-zero page offset.

We are supposed to return -EINVAL and be done with it in such case,
but due to tailroom being stored as an unsigned int, it is reported to be
somewhere near UINT_MAX, resulting in a tail being grown, even if the
requested offset is too much(it is around 2K in the abovementioned test).
This later leads to all kinds of unspecific calltraces.

[ 7340.337579] xskxceiver[1440]: segfault at 1da718 ip 00007f4161aeac9d sp 00007f41615a6a00 error 6
[ 7340.338040] xskxceiver[1441]: segfault at 7f410000000b ip 00000000004042b5 sp 00007f415bffecf0 error 4
[ 7340.338179]  in libc.so.6[61c9d,7f4161aaf000+160000]
[ 7340.339230]  in xskxceiver[42b5,400000+69000]
[ 7340.340300]  likely on CPU 6 (core 0, socket 6)
[ 7340.340302] Code: ff ff 01 e9 f4 fe ff ff 0f 1f 44 00 00 4c 39 f0 74 73 31 c0 ba 01 00 00 00 f0 0f b1 17 0f 85 ba 00 00 00 49 8b 87 88 00 00 00 <4c> 89 70 08 eb cc 0f 1f 44 00 00 48 8d bd f0 fe ff ff 89 85 ec fe
[ 7340.340888]  likely on CPU 3 (core 0, socket 3)
[ 7340.345088] Code: 00 00 00 ba 00 00 00 00 be 00 00 00 00 89 c7 e8 31 ca ff ff 89 45 ec 8b 45 ec 85 c0 78 07 b8 00 00 00 00 eb 46 e8 0b c8 ff ff <8b> 00 83 f8 69 74 24 e8 ff c7 ff ff 8b 00 83 f8 0b 74 18 e8 f3 c7
[ 7340.404334] Oops: general protection fault, probably for non-canonical address 0x6d255010bdffc: 0000 [#1] SMP NOPTI
[ 7340.405972] CPU: 7 UID: 0 PID: 1439 Comm: xskxceiver Not tainted 6.19.0-rc1+ #21 PREEMPT(lazy)
[ 7340.408006] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.17.0-5.fc42 04/01/2014
[ 7340.409716] RIP: 0010:lookup_swap_cgroup_id+0x44/0x80
[ 7340.410455] Code: 83 f8 1c 73 39 48 ba ff ff ff ff ff ff ff 03 48 8b 04 c5 20 55 fa bd 48 21 d1 48 89 ca 83 e1 01 48 d1 ea c1 e1 04 48 8d 04 90 <8b> 00 48 83 c4 10 d3 e8 c3 cc cc cc cc 31 c0 e9 98 b7 dd 00 48 89
[ 7340.412787] RSP: 0018:ffffcc5c04f7f6d0 EFLAGS: 00010202
[ 7340.413494] RAX: 0006d255010bdffc RBX: ffff891f477895a8 RCX: 0000000000000010
[ 7340.414431] RDX: 0001c17e3fffffff RSI: 00fa070000000000 RDI: 000382fc7fffffff
[ 7340.415354] RBP: 00fa070000000000 R08: ffffcc5c04f7f8f8 R09: ffffcc5c04f7f7d0
[ 7340.416283] R10: ffff891f4c1a7000 R11: ffffcc5c04f7f9c8 R12: ffffcc5c04f7f7d0
[ 7340.417218] R13: 03ffffffffffffff R14: 00fa06fffffffe00 R15: ffff891f47789500
[ 7340.418229] FS:  0000000000000000(0000) GS:ffff891ffdfaa000(0000) knlGS:0000000000000000
[ 7340.419489] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 7340.420286] CR2: 00007f415bfffd58 CR3: 0000000103f03002 CR4: 0000000000772ef0
[ 7340.421237] PKRU: 55555554
[ 7340.421623] Call Trace:
[ 7340.421987]  <TASK>
[ 7340.422309]  ? softleaf_from_pte+0x77/0xa0
[ 7340.422855]  swap_pte_batch+0xa7/0x290
[ 7340.423363]  zap_nonpresent_ptes.constprop.0.isra.0+0xd1/0x270
[ 7340.424102]  zap_pte_range+0x281/0x580
[ 7340.424607]  zap_pmd_range.isra.0+0xc9/0x240
[ 7340.425177]  unmap_page_range+0x24d/0x420
[ 7340.425714]  unmap_vmas+0xa1/0x180
[ 7340.426185]  exit_mmap+0xe1/0x3b0
[ 7340.426644]  __mmput+0x41/0x150
[ 7340.427098]  exit_mm+0xb1/0x110
[ 7340.427539]  do_exit+0x1b2/0x460
[ 7340.427992]  do_group_exit+0x2d/0xc0
[ 7340.428477]  get_signal+0x79d/0x7e0
[ 7340.428957]  arch_do_signal_or_restart+0x34/0x100
[ 7340.429571]  exit_to_user_mode_loop+0x8e/0x4c0
[ 7340.430159]  do_syscall_64+0x188/0x6b0
[ 7340.430672]  ? __do_sys_clone3+0xd9/0x120
[ 7340.431212]  ? switch_fpu_return+0x4e/0xd0
[ 7340.431761]  ? arch_exit_to_user_mode_prepare.isra.0+0xa1/0xc0
[ 7340.432498]  ? do_syscall_64+0xbb/0x6b0
[ 7340.433015]  ? __handle_mm_fault+0x445/0x690
[ 7340.433582]  ? count_memcg_events+0xd6/0x210
[ 7340.434151]  ? handle_mm_fault+0x212/0x340
[ 7340.434697]  ? do_user_addr_fault+0x2b4/0x7b0
[ 7340.435271]  ? clear_bhb_loop+0x30/0x80
[ 7340.435788]  ? clear_bhb_loop+0x30/0x80
[ 7340.436299]  ? clear_bhb_loop+0x30/0x80
[ 7340.436812]  ? clear_bhb_loop+0x30/0x80
[ 7340.437323]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
[ 7340.437973] RIP: 0033:0x7f4161b14169
[ 7340.438468] Code: Unable to access opcode bytes at 0x7f4161b1413f.
[ 7340.439242] RSP: 002b:00007ffc6ebfa770 EFLAGS: 00000246 ORIG_RAX: 00000000000000ca
[ 7340.440173] RAX: fffffffffffffe00 RBX: 00000000000005a1 RCX: 00007f4161b14169
[ 7340.441061] RDX: 00000000000005a1 RSI: 0000000000000109 RDI: 00007f415bfff990
[ 7340.441943] RBP: 00007ffc6ebfa7a0 R08: 0000000000000000 R09: 00000000ffffffff
[ 7340.442824] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
[ 7340.443707] R13: 0000000000000000 R14: 00007f415bfff990 R15: 00007f415bfff6c0
[ 7340.444586]  </TASK>
[ 7340.444922] Modules linked in: rfkill intel_rapl_msr intel_rapl_common intel_uncore_frequency_common skx_edac_common nfit libnvdimm kvm_intel vfat fat kvm snd_pcm irqbypass rapl iTCO_wdt snd_timer intel_pmc_bxt iTCO_vendor_support snd ixgbevf virtio_net soundcore i2c_i801 pcspkr libeth_xdp net_failover i2c_smbus lpc_ich failover libeth virtio_balloon joydev 9p fuse loop zram lz4hc_compress lz4_compress 9pnet_virtio 9pnet netfs ghash_clmulni_intel serio_raw qemu_fw_cfg
[ 7340.449650] ---[ end trace 0000000000000000 ]---

The issue can be fixed in all in-tree drivers, but we cannot just trust OOT
drivers to not do this. Therefore, make tailroom a signed int and produce a
warning when it is negative to prevent such mistakes in the future.

The issue can also be easily reproduced with ice driver, by applying
the following diff to xskxceiver and enjoying a kernel panic in xdpdrv mode:

diff --git a/tools/testing/selftests/bpf/prog_tests/test_xsk.c b/tools/testing/selftests/bpf/prog_tests/test_xsk.c
index 5af28f359cfd..042d587fa7ef 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_xsk.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_xsk.c
@@ -2541,8 +2541,8 @@ int testapp_adjust_tail_grow_mb(struct test_spec *test)
 {
        test->mtu = MAX_ETH_JUMBO_SIZE;
        /* Grow by (frag_size - last_frag_Size) - 1 to stay inside the last fragment */
-       return testapp_adjust_tail(test, (XSK_UMEM__MAX_FRAME_SIZE / 2) - 1,
-                                  XSK_UMEM__LARGE_FRAME_SIZE * 2);
+       return testapp_adjust_tail(test, XSK_UMEM__MAX_FRAME_SIZE * 100,
+                                  6912);
 }

 int testapp_tx_queue_consumer(struct test_spec *test)

If we print out the values involved in the tailroom calculation:

tailroom = rxq->frag_size - skb_frag_size(frag) - skb_frag_off(frag);

4294967040 = 3456 - 3456 - 256

I personally reproduced and verified the issue in ice and i40e,
aside from WiP ixgbevf implementation.
====================

Link: https://patch.msgid.link/20260305111253.2317394-1-larysa.zaremba@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-05 08:03:27 -08:00
Larysa Zaremba
8821e85775 xdp: produce a warning when calculated tailroom is negative
Many ethernet drivers report xdp Rx queue frag size as being the same as
DMA write size. However, the only user of this field, namely
bpf_xdp_frags_increase_tail(), clearly expects a truesize.

Such difference leads to unspecific memory corruption issues under certain
circumstances, e.g. in ixgbevf maximum DMA write size is 3 KB, so when
running xskxceiver's XDP_ADJUST_TAIL_GROW_MULTI_BUFF, 6K packet fully uses
all DMA-writable space in 2 buffers. This would be fine, if only
rxq->frag_size was properly set to 4K, but value of 3K results in a
negative tailroom, because there is a non-zero page offset.

We are supposed to return -EINVAL and be done with it in such case, but due
to tailroom being stored as an unsigned int, it is reported to be somewhere
near UINT_MAX, resulting in a tail being grown, even if the requested
offset is too much (it is around 2K in the abovementioned test). This later
leads to all kinds of unspecific calltraces.

[ 7340.337579] xskxceiver[1440]: segfault at 1da718 ip 00007f4161aeac9d sp 00007f41615a6a00 error 6
[ 7340.338040] xskxceiver[1441]: segfault at 7f410000000b ip 00000000004042b5 sp 00007f415bffecf0 error 4
[ 7340.338179]  in libc.so.6[61c9d,7f4161aaf000+160000]
[ 7340.339230]  in xskxceiver[42b5,400000+69000]
[ 7340.340300]  likely on CPU 6 (core 0, socket 6)
[ 7340.340302] Code: ff ff 01 e9 f4 fe ff ff 0f 1f 44 00 00 4c 39 f0 74 73 31 c0 ba 01 00 00 00 f0 0f b1 17 0f 85 ba 00 00 00 49 8b 87 88 00 00 00 <4c> 89 70 08 eb cc 0f 1f 44 00 00 48 8d bd f0 fe ff ff 89 85 ec fe
[ 7340.340888]  likely on CPU 3 (core 0, socket 3)
[ 7340.345088] Code: 00 00 00 ba 00 00 00 00 be 00 00 00 00 89 c7 e8 31 ca ff ff 89 45 ec 8b 45 ec 85 c0 78 07 b8 00 00 00 00 eb 46 e8 0b c8 ff ff <8b> 00 83 f8 69 74 24 e8 ff c7 ff ff 8b 00 83 f8 0b 74 18 e8 f3 c7
[ 7340.404334] Oops: general protection fault, probably for non-canonical address 0x6d255010bdffc: 0000 [#1] SMP NOPTI
[ 7340.405972] CPU: 7 UID: 0 PID: 1439 Comm: xskxceiver Not tainted 6.19.0-rc1+ #21 PREEMPT(lazy)
[ 7340.408006] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.17.0-5.fc42 04/01/2014
[ 7340.409716] RIP: 0010:lookup_swap_cgroup_id+0x44/0x80
[ 7340.410455] Code: 83 f8 1c 73 39 48 ba ff ff ff ff ff ff ff 03 48 8b 04 c5 20 55 fa bd 48 21 d1 48 89 ca 83 e1 01 48 d1 ea c1 e1 04 48 8d 04 90 <8b> 00 48 83 c4 10 d3 e8 c3 cc cc cc cc 31 c0 e9 98 b7 dd 00 48 89
[ 7340.412787] RSP: 0018:ffffcc5c04f7f6d0 EFLAGS: 00010202
[ 7340.413494] RAX: 0006d255010bdffc RBX: ffff891f477895a8 RCX: 0000000000000010
[ 7340.414431] RDX: 0001c17e3fffffff RSI: 00fa070000000000 RDI: 000382fc7fffffff
[ 7340.415354] RBP: 00fa070000000000 R08: ffffcc5c04f7f8f8 R09: ffffcc5c04f7f7d0
[ 7340.416283] R10: ffff891f4c1a7000 R11: ffffcc5c04f7f9c8 R12: ffffcc5c04f7f7d0
[ 7340.417218] R13: 03ffffffffffffff R14: 00fa06fffffffe00 R15: ffff891f47789500
[ 7340.418229] FS:  0000000000000000(0000) GS:ffff891ffdfaa000(0000) knlGS:0000000000000000
[ 7340.419489] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 7340.420286] CR2: 00007f415bfffd58 CR3: 0000000103f03002 CR4: 0000000000772ef0
[ 7340.421237] PKRU: 55555554
[ 7340.421623] Call Trace:
[ 7340.421987]  <TASK>
[ 7340.422309]  ? softleaf_from_pte+0x77/0xa0
[ 7340.422855]  swap_pte_batch+0xa7/0x290
[ 7340.423363]  zap_nonpresent_ptes.constprop.0.isra.0+0xd1/0x270
[ 7340.424102]  zap_pte_range+0x281/0x580
[ 7340.424607]  zap_pmd_range.isra.0+0xc9/0x240
[ 7340.425177]  unmap_page_range+0x24d/0x420
[ 7340.425714]  unmap_vmas+0xa1/0x180
[ 7340.426185]  exit_mmap+0xe1/0x3b0
[ 7340.426644]  __mmput+0x41/0x150
[ 7340.427098]  exit_mm+0xb1/0x110
[ 7340.427539]  do_exit+0x1b2/0x460
[ 7340.427992]  do_group_exit+0x2d/0xc0
[ 7340.428477]  get_signal+0x79d/0x7e0
[ 7340.428957]  arch_do_signal_or_restart+0x34/0x100
[ 7340.429571]  exit_to_user_mode_loop+0x8e/0x4c0
[ 7340.430159]  do_syscall_64+0x188/0x6b0
[ 7340.430672]  ? __do_sys_clone3+0xd9/0x120
[ 7340.431212]  ? switch_fpu_return+0x4e/0xd0
[ 7340.431761]  ? arch_exit_to_user_mode_prepare.isra.0+0xa1/0xc0
[ 7340.432498]  ? do_syscall_64+0xbb/0x6b0
[ 7340.433015]  ? __handle_mm_fault+0x445/0x690
[ 7340.433582]  ? count_memcg_events+0xd6/0x210
[ 7340.434151]  ? handle_mm_fault+0x212/0x340
[ 7340.434697]  ? do_user_addr_fault+0x2b4/0x7b0
[ 7340.435271]  ? clear_bhb_loop+0x30/0x80
[ 7340.435788]  ? clear_bhb_loop+0x30/0x80
[ 7340.436299]  ? clear_bhb_loop+0x30/0x80
[ 7340.436812]  ? clear_bhb_loop+0x30/0x80
[ 7340.437323]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
[ 7340.437973] RIP: 0033:0x7f4161b14169
[ 7340.438468] Code: Unable to access opcode bytes at 0x7f4161b1413f.
[ 7340.439242] RSP: 002b:00007ffc6ebfa770 EFLAGS: 00000246 ORIG_RAX: 00000000000000ca
[ 7340.440173] RAX: fffffffffffffe00 RBX: 00000000000005a1 RCX: 00007f4161b14169
[ 7340.441061] RDX: 00000000000005a1 RSI: 0000000000000109 RDI: 00007f415bfff990
[ 7340.441943] RBP: 00007ffc6ebfa7a0 R08: 0000000000000000 R09: 00000000ffffffff
[ 7340.442824] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
[ 7340.443707] R13: 0000000000000000 R14: 00007f415bfff990 R15: 00007f415bfff6c0
[ 7340.444586]  </TASK>
[ 7340.444922] Modules linked in: rfkill intel_rapl_msr intel_rapl_common intel_uncore_frequency_common skx_edac_common nfit libnvdimm kvm_intel vfat fat kvm snd_pcm irqbypass rapl iTCO_wdt snd_timer intel_pmc_bxt iTCO_vendor_support snd ixgbevf virtio_net soundcore i2c_i801 pcspkr libeth_xdp net_failover i2c_smbus lpc_ich failover libeth virtio_balloon joydev 9p fuse loop zram lz4hc_compress lz4_compress 9pnet_virtio 9pnet netfs ghash_clmulni_intel serio_raw qemu_fw_cfg
[ 7340.449650] ---[ end trace 0000000000000000 ]---

The issue can be fixed in all in-tree drivers, but we cannot just trust OOT
drivers to not do this. Therefore, make tailroom a signed int and produce a
warning when it is negative to prevent such mistakes in the future.

Fixes: bf25146a55 ("bpf: add frags support to the bpf_xdp_adjust_tail() API")
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
Link: https://patch.msgid.link/20260305111253.2317394-10-larysa.zaremba@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-05 08:02:05 -08:00
Larysa Zaremba
f8e18abf18 net: enetc: use truesize as XDP RxQ info frag_size
The only user of frag_size field in XDP RxQ info is
bpf_xdp_frags_increase_tail(). It clearly expects truesize instead of DMA
write size. Different assumptions in enetc driver configuration lead to
negative tailroom.

Set frag_size to the same value as frame_sz.

Fixes: 2768b2e2f7 ("net: enetc: register XDP RX queues with frag_size")
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
Link: https://patch.msgid.link/20260305111253.2317394-9-larysa.zaremba@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-05 08:02:05 -08:00
Larysa Zaremba
75d9228982 libeth, idpf: use truesize as XDP RxQ info frag_size
The only user of frag_size field in XDP RxQ info is
bpf_xdp_frags_increase_tail(). It clearly expects whole buffer size instead
of DMA write size. Different assumptions in idpf driver configuration lead
to negative tailroom.

To make it worse, buffer sizes are not actually uniform in idpf when
splitq is enabled, as there are several buffer queues, so rxq->rx_buf_size
is meaningless in this case.

Use truesize of the first bufq in AF_XDP ZC, as there is only one. Disable
growing tail for regular splitq.

Fixes: ac8a861f63 ("idpf: prepare structures to support XDP")
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
Link: https://patch.msgid.link/20260305111253.2317394-8-larysa.zaremba@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-05 08:02:05 -08:00
Larysa Zaremba
c69d22c6c4 i40e: use xdp.frame_sz as XDP RxQ info frag_size
The only user of frag_size field in XDP RxQ info is
bpf_xdp_frags_increase_tail(). It clearly expects whole buffer size instead
of DMA write size. Different assumptions in i40e driver configuration lead
to negative tailroom.

Set frag_size to the same value as frame_sz in shared pages mode, use new
helper to set frag_size when AF_XDP ZC is active.

Fixes: a045d2f2d0 ("i40e: set xdp_rxq_info::frag_size")
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
Link: https://patch.msgid.link/20260305111253.2317394-7-larysa.zaremba@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-05 08:02:04 -08:00
Larysa Zaremba
8f497dc8a6 i40e: fix registering XDP RxQ info
Current way of handling XDP RxQ info in i40e has a problem, where frag_size
is not updated when xsk_buff_pool is detached or when MTU is changed, this
leads to growing tail always failing for multi-buffer packets.

Couple XDP RxQ info registering with buffer allocations and unregistering
with cleaning the ring.

Fixes: a045d2f2d0 ("i40e: set xdp_rxq_info::frag_size")
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
Link: https://patch.msgid.link/20260305111253.2317394-6-larysa.zaremba@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-05 08:02:04 -08:00
Larysa Zaremba
e142dc4ef0 ice: change XDP RxQ frag_size from DMA write length to xdp.frame_sz
The only user of frag_size field in XDP RxQ info is
bpf_xdp_frags_increase_tail(). It clearly expects whole buff size instead
of DMA write size. Different assumptions in ice driver configuration lead
to negative tailroom.

This allows to trigger kernel panic, when using
XDP_ADJUST_TAIL_GROW_MULTI_BUFF xskxceiver test and changing packet size to
6912 and the requested offset to a huge value, e.g.
XSK_UMEM__MAX_FRAME_SIZE * 100.

Due to other quirks of the ZC configuration in ice, panic is not observed
in ZC mode, but tailroom growing still fails when it should not.

Use fill queue buffer truesize instead of DMA write size in XDP RxQ info.
Fix ZC mode too by using the new helper.

Fixes: 2fba7dc515 ("ice: Add support for XDP multi-buffer on Rx side")
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
Link: https://patch.msgid.link/20260305111253.2317394-5-larysa.zaremba@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-05 08:02:04 -08:00
Larysa Zaremba
02852b47c7 ice: fix rxq info registering in mbuf packets
XDP RxQ info contains frag_size, which depends on the MTU. This makes the
old way of registering RxQ info before calculating new buffer sizes
invalid. Currently, it leads to frag_size being outdated, making it
sometimes impossible to grow tailroom in a mbuf packet. E.g. fragments are
actually 3K+, but frag size is still as if MTU was 1500.

Always register new XDP RxQ info after reconfiguring memory pools.

Fixes: 2fba7dc515 ("ice: Add support for XDP multi-buffer on Rx side")
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
Link: https://patch.msgid.link/20260305111253.2317394-4-larysa.zaremba@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-05 08:02:03 -08:00
Larysa Zaremba
16394d8053 xsk: introduce helper to determine rxq->frag_size
rxq->frag_size is basically a step between consecutive strictly aligned
frames. In ZC mode, chunk size fits exactly, but if chunks are unaligned,
there is no safe way to determine accessible space to grow tailroom.

Report frag_size to be zero, if chunks are unaligned, chunk_size otherwise.

Fixes: 24ea50127e ("xsk: support mbuf on ZC RX")
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
Link: https://patch.msgid.link/20260305111253.2317394-3-larysa.zaremba@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-05 08:02:03 -08:00
Larysa Zaremba
88b6b7f7b2 xdp: use modulo operation to calculate XDP frag tailroom
The current formula for calculating XDP tailroom in mbuf packets works only
if each frag has its own page (if rxq->frag_size is PAGE_SIZE), this
defeats the purpose of the parameter overall and without any indication
leads to negative calculated tailroom on at least half of frags, if shared
pages are used.

There are not many drivers that set rxq->frag_size. Among them:
* i40e and enetc always split page uniformly between frags, use shared
  pages
* ice uses page_pool frags via libeth, those are power-of-2 and uniformly
  distributed across page
* idpf has variable frag_size with XDP on, so current API is not applicable
* mlx5, mtk and mvneta use PAGE_SIZE or 0 as frag_size for page_pool

As for AF_XDP ZC, only ice, i40e and idpf declare frag_size for it. Modulo
operation yields good results for aligned chunks, they are all power-of-2,
between 2K and PAGE_SIZE. Formula without modulo fails when chunk_size is
2K. Buffers in unaligned mode are not distributed uniformly, so modulo
operation would not work.

To accommodate unaligned buffers, we could define frag_size as
data + tailroom, and hence do not subtract offset when calculating
tailroom, but this would necessitate more changes in the drivers.

Define rxq->frag_size as an even portion of a page that fully belongs to a
single frag. When calculating tailroom, locate the data start within such
portion by performing a modulo operation on page offset.

Fixes: bf25146a55 ("bpf: add frags support to the bpf_xdp_adjust_tail() API")
Acked-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
Link: https://patch.msgid.link/20260305111253.2317394-2-larysa.zaremba@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-05 08:02:03 -08:00
Victor Nogueira
5d1271ff4c selftests/tc-testing: Add tests exercising act_ife metalist replace behaviour
Add 2 test cases to exercise fix in act_ife's internal metalist
behaviour.

- Update decode ife action into encode with tcindex metadata
- Update decode ife action into encode with multiple metadata

Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Victor Nogueira <victor@mojatatu.com>
Link: https://patch.msgid.link/20260304140603.76500-2-jhs@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-05 07:54:09 -08:00
Jamal Hadi Salim
e2cedd400c net/sched: act_ife: Fix metalist update behavior
Whenever an ife action replace changes the metalist, instead of
replacing the old data on the metalist, the current ife code is appending
the new metadata. Aside from being innapropriate behavior, this may lead
to an unbounded addition of metadata to the metalist which might cause an
out of bounds error when running the encode op:

[  138.423369][    C1] ==================================================================
[  138.424317][    C1] BUG: KASAN: slab-out-of-bounds in ife_tlv_meta_encode (net/ife/ife.c:168)
[  138.424906][    C1] Write of size 4 at addr ffff8880077f4ffe by task ife_out_out_bou/255
[  138.425778][    C1] CPU: 1 UID: 0 PID: 255 Comm: ife_out_out_bou Not tainted 7.0.0-rc1-00169-gfbdfa8da05b6 #624 PREEMPT(full)
[  138.425795][    C1] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[  138.425800][    C1] Call Trace:
[  138.425804][    C1]  <IRQ>
[  138.425808][    C1]  dump_stack_lvl (lib/dump_stack.c:122)
[  138.425828][    C1]  print_report (mm/kasan/report.c:379 mm/kasan/report.c:482)
[  138.425839][    C1]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[  138.425844][    C1]  ? __virt_addr_valid (./arch/x86/include/asm/preempt.h:95 (discriminator 1) ./include/linux/rcupdate.h:975 (discriminator 1) ./include/linux/mmzone.h:2207 (discriminator 1) arch/x86/mm/physaddr.c:54 (discriminator 1))
[  138.425853][    C1]  ? ife_tlv_meta_encode (net/ife/ife.c:168)
[  138.425859][    C1]  kasan_report (mm/kasan/report.c:221 mm/kasan/report.c:597)
[  138.425868][    C1]  ? ife_tlv_meta_encode (net/ife/ife.c:168)
[  138.425878][    C1]  kasan_check_range (mm/kasan/generic.c:186 (discriminator 1) mm/kasan/generic.c:200 (discriminator 1))
[  138.425884][    C1]  __asan_memset (mm/kasan/shadow.c:84 (discriminator 2))
[  138.425889][    C1]  ife_tlv_meta_encode (net/ife/ife.c:168)
[  138.425893][    C1]  ? ife_tlv_meta_encode (net/ife/ife.c:171)
[  138.425898][    C1]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[  138.425903][    C1]  ife_encode_meta_u16 (net/sched/act_ife.c:57)
[  138.425910][    C1]  ? __pfx_do_raw_spin_lock (kernel/locking/spinlock_debug.c:114)
[  138.425916][    C1]  ? __asan_memcpy (mm/kasan/shadow.c:105 (discriminator 3))
[  138.425921][    C1]  ? __pfx_ife_encode_meta_u16 (net/sched/act_ife.c:45)
[  138.425927][    C1]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[  138.425931][    C1]  tcf_ife_act (net/sched/act_ife.c:847 net/sched/act_ife.c:879)

To solve this issue, fix the replace behavior by adding the metalist to
the ife rcu data structure.

Fixes: aa9fd9a325 ("sched: act: ife: update parameters via rcu handling")
Reported-by: Ruitong Liu <cnitlrt@gmail.com>
Tested-by: Ruitong Liu <cnitlrt@gmail.com>
Co-developed-by: Victor Nogueira <victor@mojatatu.com>
Signed-off-by: Victor Nogueira <victor@mojatatu.com>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://patch.msgid.link/20260304140603.76500-1-jhs@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-05 07:54:08 -08:00
Jakub Kicinski
4517b74cc0 Merge branch 'net-ipv6-fix-panic-when-ipv4-route-references-loopback-ipv6-nexthop-and-add-selftest'
Jiayuan Chen says:

====================
net: ipv6: fix panic when IPv4 route references loopback IPv6 nexthop and add selftest

syzbot reported a kernel panic [1] when an IPv4 route references
a loopback IPv6 nexthop object:

BUG: unable to handle page fault for address: ffff8d069e7aa000
PF: supervisor read access in kernel mode
PF: error_code(0x0000) - not-present page
PGD 6aa01067 P4D 6aa01067 PUD 0
Oops: Oops: 0000 [#1] SMP PTI
CPU: 2 UID: 0 PID: 530 Comm: ping Not tainted 6.19.0+ #193 PREEMPT
RIP: 0010:ip_route_output_key_hash_rcu+0x578/0x9e0
RSP: 0018:ffffd2ffc1573918 EFLAGS: 00010286
RAX: ffff8d069e7aa000 RBX: ffffd2ffc1573988 RCX: 0000000000000000
RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
RBP: ffffd2ffc1573978 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: ffff8d060d496000
R13: 0000000000000000 R14: ffff8d060399a600 R15: ffff8d06019a6ab8
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: ffff8d069e7aa000 CR3: 0000000106eb0001 CR4: 0000000000770ef0
PKRU: 55555554
Call Trace:
 <TASK>
 ip_route_output_key_hash+0x86/0x1a0
 __ip4_datagram_connect+0x2b5/0x4e0
 udp_connect+0x2c/0x60
 inet_dgram_connect+0x88/0xd0
 __sys_connect_file+0x56/0x90
 __sys_connect+0xa8/0xe0
 __x64_sys_connect+0x18/0x30
 x64_sys_call+0xfb9/0x26e0
 do_syscall_64+0xd3/0x1510
 entry_SYSCALL_64_after_hwframe+0x76/0x7e

Reproduction:

    ip -6 nexthop add id 100 dev lo
    ip route add 172.20.20.0/24 nhid 100
    ping -c1 172.20.20.1     # kernel crash

Problem Description

When a standalone IPv6 nexthop object is created with a loopback device,
fib6_nh_init() misclassifies it as a reject route. Nexthop objects have
no destination prefix (fc_dst=::), so fib6_is_reject() always matches
any loopback nexthop. The reject path skips fib_nh_common_init(), leaving
nhc_pcpu_rth_output unallocated. When an IPv4 route later references
this nexthop and triggers a route lookup, __mkroute_output() calls
raw_cpu_ptr(nhc->nhc_pcpu_rth_output) on a NULL pointer, causing a page
fault.

The reject classification was designed for regular IPv6 routes to prevent
kernel routing loops, but nexthop objects should not be subject to this
check since they carry no destination information. Loop prevention is
handled separately when the route itself is created.
[1] https://syzkaller.appspot.com/bug?extid=334190e097a98a1b81bb
====================

Link: https://patch.msgid.link/20260304113817.294966-1-jiayuan.chen@linux.dev
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-05 07:53:20 -08:00
Jiayuan Chen
46c1ef0cfc selftests: net: add test for IPv4 route with loopback IPv6 nexthop
Add a regression test for a kernel panic that occurs when an IPv4 route
references an IPv6 nexthop object created on the loopback device.

The test creates an IPv6 nexthop on lo, binds an IPv4 route to it, then
triggers a route lookup via ping to verify the kernel does not crash.

  ./fib_nexthops.sh
  Tests passed: 249
  Tests failed:   0

Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Jiayuan Chen <jiayuan.chen@shopee.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://patch.msgid.link/20260304113817.294966-3-jiayuan.chen@linux.dev
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-05 07:53:17 -08:00
Jiayuan Chen
21ec92774d net: ipv6: fix panic when IPv4 route references loopback IPv6 nexthop
When a standalone IPv6 nexthop object is created with a loopback device
(e.g., "ip -6 nexthop add id 100 dev lo"), fib6_nh_init() misclassifies
it as a reject route. This is because nexthop objects have no destination
prefix (fc_dst=::), causing fib6_is_reject() to match any loopback
nexthop. The reject path skips fib_nh_common_init(), leaving
nhc_pcpu_rth_output unallocated. If an IPv4 route later references this
nexthop, __mkroute_output() dereferences NULL nhc_pcpu_rth_output and
panics.

Simplify the check in fib6_nh_init() to only match explicit reject
routes (RTF_REJECT) instead of using fib6_is_reject(). The loopback
promotion heuristic in fib6_is_reject() is handled separately by
ip6_route_info_create_nh(). After this change, the three cases behave
as follows:

1. Explicit reject route ("ip -6 route add unreachable 2001:db8::/64"):
   RTF_REJECT is set, enters reject path, skips fib_nh_common_init().
   No behavior change.

2. Implicit loopback reject route ("ip -6 route add 2001:db8::/32 dev lo"):
   RTF_REJECT is not set, takes normal path, fib_nh_common_init() is
   called. ip6_route_info_create_nh() still promotes it to reject
   afterward. nhc_pcpu_rth_output is allocated but unused, which is
   harmless.

3. Standalone nexthop object ("ip -6 nexthop add id 100 dev lo"):
   RTF_REJECT is not set, takes normal path, fib_nh_common_init() is
   called. nhc_pcpu_rth_output is properly allocated, fixing the crash
   when IPv4 routes reference this nexthop.

Suggested-by: Ido Schimmel <idosch@nvidia.com>
Fixes: 493ced1ac4 ("ipv4: Allow routes to use nexthop objects")
Reported-by: syzbot+334190e097a98a1b81bb@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/698f8482.a70a0220.2c38d7.00ca.GAE@google.com/T/
Signed-off-by: Jiayuan Chen <jiayuan.chen@shopee.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://patch.msgid.link/20260304113817.294966-2-jiayuan.chen@linux.dev
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-05 07:53:17 -08:00
Fernando Fernandez Mancera
168ff39e47 net: vxlan: fix nd_tbl NULL dereference when IPv6 is disabled
When booting with the 'ipv6.disable=1' parameter, the nd_tbl is never
initialized because inet6_init() exits before ndisc_init() is called
which initializes it. If an IPv6 packet is injected into the interface,
route_shortcircuit() is called and a NULL pointer dereference happens on
neigh_lookup().

 BUG: kernel NULL pointer dereference, address: 0000000000000380
 Oops: Oops: 0000 [#1] SMP NOPTI
 [...]
 RIP: 0010:neigh_lookup+0x20/0x270
 [...]
 Call Trace:
  <TASK>
  vxlan_xmit+0x638/0x1ef0 [vxlan]
  dev_hard_start_xmit+0x9e/0x2e0
  __dev_queue_xmit+0xbee/0x14e0
  packet_sendmsg+0x116f/0x1930
  __sys_sendto+0x1f5/0x200
  __x64_sys_sendto+0x24/0x30
  do_syscall_64+0x12f/0x1590
  entry_SYSCALL_64_after_hwframe+0x76/0x7e

Fix this by adding an early check on route_shortcircuit() when protocol
is ETH_P_IPV6. Note that ipv6_mod_enabled() cannot be used here because
VXLAN can be built-in even when IPv6 is built as a module.

Fixes: e15a00aafa ("vxlan: add ipv6 route short circuit support")
Signed-off-by: Fernando Fernandez Mancera <fmancera@suse.de>
Link: https://patch.msgid.link/20260304120357.9778-2-fmancera@suse.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-05 07:52:56 -08:00
Fernando Fernandez Mancera
e5e8906305 net: bridge: fix nd_tbl NULL dereference when IPv6 is disabled
When booting with the 'ipv6.disable=1' parameter, the nd_tbl is never
initialized because inet6_init() exits before ndisc_init() is called
which initializes it. Then, if neigh_suppress is enabled and an ICMPv6
Neighbor Discovery packet reaches the bridge, br_do_suppress_nd() will
dereference ipv6_stub->nd_tbl which is NULL, passing it to
neigh_lookup(). This causes a kernel NULL pointer dereference.

 BUG: kernel NULL pointer dereference, address: 0000000000000268
 Oops: 0000 [#1] PREEMPT SMP NOPTI
 [...]
 RIP: 0010:neigh_lookup+0x16/0xe0
 [...]
 Call Trace:
  <IRQ>
  ? neigh_lookup+0x16/0xe0
  br_do_suppress_nd+0x160/0x290 [bridge]
  br_handle_frame_finish+0x500/0x620 [bridge]
  br_handle_frame+0x353/0x440 [bridge]
  __netif_receive_skb_core.constprop.0+0x298/0x1110
  __netif_receive_skb_one_core+0x3d/0xa0
  process_backlog+0xa0/0x140
  __napi_poll+0x2c/0x170
  net_rx_action+0x2c4/0x3a0
  handle_softirqs+0xd0/0x270
  do_softirq+0x3f/0x60

Fix this by replacing IS_ENABLED(IPV6) call with ipv6_mod_enabled() in
the callers. This is in essence disabling NS/NA suppression when IPv6 is
disabled.

Fixes: ed842faeb2 ("bridge: suppress nd pkts on BR_NEIGH_SUPPRESS ports")
Reported-by: Guruprasad C P <gurucp2005@gmail.com>
Closes: https://lore.kernel.org/netdev/CAHXs0ORzd62QOG-Fttqa2Cx_A_VFp=utE2H2VTX5nqfgs7LDxQ@mail.gmail.com/
Signed-off-by: Fernando Fernandez Mancera <fmancera@suse.de>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Link: https://patch.msgid.link/20260304120357.9778-1-fmancera@suse.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-05 07:52:56 -08:00
Pedro Falcato
b92b0075ee ata: libata-core: Add BRIDGE_OK quirk for QEMU drives
Currently, whenever you boot with a QEMU drive over an AHCI interface,
you get:
[    1.632121] ata1.00: applying bridge limits

This happens due to the kernel not believing the given drive is SATA,
since word 93 of IDENTIFY (ATA_ID_HW_CONFIG) is non-zero. The result is
a pretty severe limit in max_hw_sectors_kb, which limits our IO sizes.

QEMU has set word 93 erroneously for SATA drives but does not, in any
way, emulate any of these real hardware details. There is no PATA
drive and no SATA cable.

As such, add a BRIDGE_OK quirk for QEMU HARDDISK. Special care is taken
to limit this quirk to "2.5+", to allow for fixed future versions.

This results in the max_hw_sectors being limited solely by the
controller interface's limits. Which, for AHCI controllers, takes it
from 128KB to 32767KB.

Cc: stable@vger.kernel.org
Signed-off-by: Pedro Falcato <pfalcato@suse.de>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Niklas Cassel <cassel@kernel.org>
2026-03-05 16:49:02 +01:00
Zenghui Yu (Huawei)
eb54fa1025 KVM: arm64: nv: Inject a SEA if failed to read the descriptor
Failure to read the descriptor (because it is outside of a memslot) should
result in a SEA being injected in the guest.

Suggested-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/86ms1m9lp3.wl-maz@kernel.org
Signed-off-by: Zenghui Yu (Huawei) <zenghui.yu@linux.dev>
Link: https://patch.msgid.link/20260225173515.20490-4-zenghui.yu@linux.dev
Signed-off-by: Marc Zyngier <maz@kernel.org>
2026-03-05 15:46:48 +00:00
Zenghui Yu (Huawei)
99a339377f KVM: arm64: nv: Report addrsz fault at level 0 with a bad VTTBR.BADDR
As per R_BFHQH,

" When an Address size fault is generated, the reported fault code
  indicates one of the following:

  If the fault was generated due to the TTBR_ELx used in the translation
  having nonzero address bits above the OA size, then a fault at level 0. "

Fix the reported Address size fault level as being 0 if the base address is
wrongly programmed by L1.

Fixes: 61e30b9eef ("KVM: arm64: nv: Implement nested Stage-2 page table walk logic")
Signed-off-by: Zenghui Yu (Huawei) <zenghui.yu@linux.dev>
Link: https://patch.msgid.link/20260225173515.20490-3-zenghui.yu@linux.dev
Signed-off-by: Marc Zyngier <maz@kernel.org>
2026-03-05 15:46:48 +00:00
Zenghui Yu (Huawei)
4c2264ecdf KVM: arm64: nv: Check S2 limits based on implemented PA size
check_base_s2_limits() checks the validity of SL0 and inputsize against
ia_size (inputsize again!) but the pseudocode from DDI0487 G.a
AArch64.TranslationTableWalk() says that we should check against the
implemented PA size.

We would otherwise fail to walk S2 with a valid configuration. E.g.,
granule size = 4KB, inputsize = 40 bits, initial lookup level = 0 (no
concatenation) on a system with 48 bits PA range supported is allowed by
architecture.

Fix it by obtaining PA size by kvm_get_pa_bits(). Note that
kvm_get_pa_bits() returns the fixed limit now and should eventually reflect
the per VM PARange (one day!). Given that the configured PARange should not
be greater that kvm_ipa_limit, it at least fixes the problem described
above.

While at it, inject a level 0 translation fault to guest if
check_base_s2_limits() fails, as per the pseudocode.

Fixes: 61e30b9eef ("KVM: arm64: nv: Implement nested Stage-2 page table walk logic")
Signed-off-by: Zenghui Yu (Huawei) <zenghui.yu@linux.dev>
Link: https://patch.msgid.link/20260225173515.20490-2-zenghui.yu@linux.dev
Signed-off-by: Marc Zyngier <maz@kernel.org>
2026-03-05 15:46:47 +00:00
Jakub Kicinski
02b2920e30 Merge branch 'maintainers-annual-cleanup-of-inactive-maintainers'
Jakub Kicinski says:

====================
MAINTAINERS: annual cleanup of inactive maintainers

Annual cleanup of inactive maintainers under networking.
The goal is to make sure MAINTAINERS reflect reality for
code which is relatively actively changed (at least 70 commits
in the last 2 years or at least 120 commits in the last 5 years).

Those who either:
 - were the initial author / "upstreamer" of the driver; or
 - authored at least 1/3rd of the exiting code base (per git blame); or
 - authored at least 25% of commits before becoming inactive
are moved to CREDITS.

The discovery of inactive maintainers was done using gitdm tools,
with a bunch of ad-hoc scripts on top to do the rest. I tried to
double check the results but this is mostly a scripted cleanup
so please report inaccuracies if any.
====================

Link: https://patch.msgid.link/20260303215339.2333548-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-05 07:35:47 -08:00
Jakub Kicinski
ed65790025 MAINTAINERS: remove Thomas Falcon from IBM ibmvnic
We have not seen emails or tags from Thomas's IBM address
(tlfalcon@linux.ibm.com) in over 5 years. Looks like Thomas
is active in perf tooling at Intel (thomas.falcon@intel.com).

Subsystem IBM Power SRIOV Virtual NIC Device Driver
  Changes 49 / 134 (36%)
  Last activity: 2025-08-26
  Haren Myneni <haren@linux.ibm.com>:
    Tags 3c14917953 2025-08-26 00:00:00 2
  Rick Lindsley <ricklind@linux.ibm.com>:
  Nick Child <nnac123@linux.ibm.com>:
    Author d93a6caab5 2025-03-25 00:00:00 14
    Tags d93a6caab5 2025-03-25 00:00:00 16
  Thomas Falcon <tlfalcon@linux.ibm.com>:
  Top reviewers:
    [22]: drt@linux.ibm.com
    [13]: horms@kernel.org
    [9]: ricklind@linux.vnet.ibm.com
    [3]: davemarq@linux.ibm.com
  INACTIVE MAINTAINER Thomas Falcon <tlfalcon@linux.ibm.com>

Move Thomas to CREDITS as the initial author of ibmvnic.

Acked-by: Thomas Falcon <thomas.falcon@intel.com>
Link: https://patch.msgid.link/20260303215339.2333548-12-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-05 07:35:45 -08:00
Jakub Kicinski
4d37e68c46 MAINTAINERS: remove Claudiu Manoil and Alexandre Belloni from Ocelot switch
We have not seen tags from Claudiu for the Ocelot switch driver
in over 5 years. He is active upstream in other NXP subsystems
(ENETC, gianfar), with 46 emails on lore since 2024.
We have not seen tags from Alexandre for the Ocelot switch driver
in over 5 years. He is very active upstream in other subsystems
(RTC, I3C, Atmel/Microchip SoC), with over 1,200 emails on lore
since 2024.
Vladimir Oltean is active.

Subsystem OCELOT ETHERNET SWITCH DRIVER
  Changes 180 / 494 (36%)
  Last activity: 2026-02-12
  Vladimir Oltean <vladimir.oltean@nxp.com>:
    Author c22ba07c82 2026-02-10 00:00:00 33
    Tags 026f6513c5 2026-02-12 00:00:00 39
  Claudiu Manoil <claudiu.manoil@nxp.com>:
  Alexandre Belloni <alexandre.belloni@bootlin.com>:
  Top reviewers:
    [49]: f.fainelli@gmail.com
    [19]: horms@kernel.org
    [10]: richardcochran@gmail.com
    [9]: jacob.e.keller@intel.com
    [8]: colin.foster@in-advantage.com
  INACTIVE MAINTAINER Claudiu Manoil <claudiu.manoil@nxp.com>

Acked-by: Claudiu Manoil <claudiu.manoil@nxp.com>
Acked-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Link: https://patch.msgid.link/20260303215339.2333548-11-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-05 07:35:45 -08:00
Jakub Kicinski
9ede3e910f MAINTAINERS: replace Taras Chornyi with Elad Nachman for Marvell Prestera
We have not seen emails or tags from Taras in over 5 years,
and there is no recent mailing list activity.
Elad Nachman has been providing reviews in the last couple
of years and is the top reviewer for this subsystem.

Subsystem MARVELL PRESTERA ETHERNET SWITCH DRIVER
  Changes 39 / 157 (24%)
  (No activity)
  Top reviewers:
    [8]: enachman@marvell.com
    [6]: horms@kernel.org
    [4]: idosch@nvidia.com
    [3]: andrew@lunn.ch
    [3]: jacob.e.keller@intel.com
    [3]: jiri@nvidia.com
  INACTIVE MAINTAINER Taras Chornyi <taras.chornyi@plvision.eu>

Link: https://patch.msgid.link/20260303215339.2333548-10-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-05 07:35:45 -08:00
Jakub Kicinski
60da2d2752 MAINTAINERS: remove Jonathan Lemon from OpenCompute PTP
We have not seen emails or tags from Jonathan in over 5 years,
and there is no recent mailing list activity.
Vadim Fedorenko is active.

Subsystem OPENCOMPUTE PTP CLOCK DRIVER
  Changes 49 / 130 (37%)
  Last activity: 2025-11-25
  Jonathan Lemon <jonathan.lemon@gmail.com>:
  Vadim Fedorenko <vadim.fedorenko@linux.dev>:
    Author d3ca2ef0c9 2025-09-19 00:00:00 5
    Tags 648282e2d1 2025-11-25 00:00:00 20
  Top reviewers:
    [7]: horms@kernel.org
    [4]: jiri@nvidia.com
    [3]: richardcochran@gmail.com
    [2]: aleksandr.loktionov@intel.com
  INACTIVE MAINTAINER Jonathan Lemon <jonathan.lemon@gmail.com>

Add Jonathan to CREDITS as the initial author of ptp_ocp.

Acked-by: Vadim Fedorenko <vadim.fedorenko@linux.dev>
Link: https://patch.msgid.link/20260303215339.2333548-9-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-05 07:35:45 -08:00
Jakub Kicinski
34f49454b2 MAINTAINERS: replace Clark Wang with Frank Li for Freescale FEC
We have not seen tags from Clark for FEC in over 5 years.
He has some limited recent activity on the mailing list in other
NXP subsystems (stmmac, phy). Wei Fang and Shenwei Wang are active,
with decent review coverage (61%).

Frank Li has been reviewing code actively more recenty, let's
make it official.

Subsystem FREESCALE IMX / MXC FEC DRIVER
  Changes 57 / 92 (61%)
  Last activity: 2026-02-10
  Wei Fang <wei.fang@nxp.com>:
    Author 25eb3058eb 2026-02-10 00:00:00 33
    Tags 25eb3058eb 2026-02-10 00:00:00 61
  Shenwei Wang <shenwei.wang@nxp.com>:
    Author d466c16026 2025-09-14 00:00:00 6
    Tags d466c16026 2025-09-14 00:00:00 6
  Clark Wang <xiaoning.wang@nxp.com>:
  Top reviewers:
    [23]: Frank.Li@nxp.com
    [17]: andrew@lunn.ch
    [4]: csokas.bence@prolan.hu
    [3]: horms@kernel.org
    [2]: maxime.chevallier@bootlin.com
  INACTIVE MAINTAINER Clark Wang <xiaoning.wang@nxp.com>

Reviewed-by: Wei Fang <wei.fang@nxp.com>
Link: https://patch.msgid.link/20260303215339.2333548-8-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-05 07:35:44 -08:00
Jakub Kicinski
593cdf1452 MAINTAINERS: remove DENG Qingfang from MediaTek switch
We have not seen tags from DENG Qingfang for the MediaTek
switch driver in over 5 years. He is active upstream with
PPP/PPPoE patches in net-next. Chester and Daniel are active.

Subsystem MEDIATEK SWITCH DRIVER
  Changes 26 / 70 (37%)
  Last activity: 2025-12-01
  Chester A. Unal <chester.a.unal@arinc9.com>:
    Tags 585943b7ad 2025-12-01 00:00:00 7
  Daniel Golle <daniel@makrotopia.org>:
    Author 497041d763 2025-04-23 00:00:00 2
    Tags 3b87e60d21 2025-12-01 00:00:00 14
  DENG Qingfang <dqfext@gmail.com>:
  Sean Wang <sean.wang@mediatek.com>:
  Top reviewers:
    [4]: andrew@lunn.ch
    [4]: florian.fainelli@broadcom.com
    [4]: arinc.unal@arinc9.com
    [2]: olteanv@gmail.com
  INACTIVE MAINTAINER DENG Qingfang <dqfext@gmail.com>

Acked-by: Chester A. Unal <chester.a.unal@arinc9.com>
Link: https://patch.msgid.link/20260303215339.2333548-7-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-05 07:35:44 -08:00
Jakub Kicinski
77f72ef58f MAINTAINERS: remove Sean Wang from MediaTek Ethernet and switch
We have not seen tags from Sean in over 5 years,
with only one mailing list post since 2024.
Felix and Lorenzo are active for the Ethernet driver,
and Chester, Daniel and DENG Qingfang are active for
the switch driver.

Subsystem MEDIATEK ETHERNET DRIVER
  Changes 55 / 113 (48%)
  Last activity: 2025-10-12
  Felix Fietkau <nbd@nbd.name>:
    Author d473673711 2025-09-02 00:00:00 3
    Tags d473673711 2025-09-02 00:00:00 4
  Sean Wang <sean.wang@mediatek.com>:
  Lorenzo Bianconi <lorenzo@kernel.org>:
    Author 96326447d4 2025-08-13 00:00:00 35
    Tags 3abc0e55ea 2025-10-12 00:00:00 40
  Top reviewers:
    [26]: horms@kernel.org
    [5]: andrew@lunn.ch
    [4]: jacob.e.keller@intel.com
    [3]: shannon.nelson@amd.com
    [3]: michal.swiatkowski@linux.intel.com
  INACTIVE MAINTAINER Sean Wang <sean.wang@mediatek.com>

Subsystem MEDIATEK SWITCH DRIVER
  Changes 26 / 70 (37%)
  Last activity: 2025-12-01
  Chester A. Unal <chester.a.unal@arinc9.com>:
    Tags 585943b7ad 2025-12-01 00:00:00 7
  Daniel Golle <daniel@makrotopia.org>:
    Author 497041d763 2025-04-23 00:00:00 2
    Tags 3b87e60d21 2025-12-01 00:00:00 14
  DENG Qingfang <dqfext@gmail.com>:
  Sean Wang <sean.wang@mediatek.com>:
  Top reviewers:
    [4]: andrew@lunn.ch
    [4]: florian.fainelli@broadcom.com
    [4]: arinc.unal@arinc9.com
    [2]: olteanv@gmail.com
  INACTIVE MAINTAINER Sean Wang <sean.wang@mediatek.com>

Acked-by: Chester A. Unal <chester.a.unal@arinc9.com>
Link: https://patch.msgid.link/20260303215339.2333548-6-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-05 07:35:44 -08:00
Jakub Kicinski
e07f796a86 MAINTAINERS: remove Jerin Jacob from Marvell OcteonTX2
We have not seen tags from Jerin for OcteonTX2 in over 5 years.
Recent lore activity is in DPDK (non-kernel), not Linux.
Sunil, Linu, Geetha, hariprasad, and Subbaraya are active,
though the review coverage isn't great (38%).

Subsystem MARVELL OCTEONTX2 RVU ADMIN FUNCTION DRIVER
  Changes 53 / 138 (38%)
  Last activity: 2026-02-18
  Sunil Goutham <sgoutham@marvell.com>:
    Author fc1b2901e0 2024-03-08 00:00:00 1
    Tags 70f8986ece 2025-06-16 00:00:00 9
  Linu Cherian <lcherian@marvell.com>:
    Author a861e5809f 2025-10-30 00:00:00 7
    Tags a861e5809f 2025-10-30 00:00:00 7
  Geetha sowjanya <gakula@marvell.com>:
    Author 70e9a5760a 2026-01-29 00:00:00 16
    Tags 70e9a5760a 2026-01-29 00:00:00 20
  Jerin Jacob <jerinj@marvell.com>:
  hariprasad <hkelam@marvell.com>:
    Author 45be47bf5d 2026-02-18 00:00:00 22
    Tags 45be47bf5d 2026-02-18 00:00:00 25
  Subbaraya Sundeep <sbhatta@marvell.com>:
    Author 47a1208776 2025-10-30 00:00:00 20
    Tags 47a1208776 2025-10-30 00:00:00 30
  Top reviewers:
    [36]: horms@kernel.org
    [4]: jacob.e.keller@intel.com
    [4]: kalesh-anakkur.purayil@broadcom.com
    [3]: vadim.fedorenko@linux.dev
    [2]: shaojijie@huawei.com
    [2]: jiri@nvidia.com
  INACTIVE MAINTAINER Jerin Jacob <jerinj@marvell.com>

Link: https://patch.msgid.link/20260303215339.2333548-4-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-05 07:35:36 -08:00
Jakub Kicinski
80f8a19fe7 MAINTAINERS: remove Manish Chopra from QLogic QL4xxx (now orphan)
We have not seen tags from Manish for the QL4xxx driver in over 5 years,
and there is no mailing list activity since Oct 2023. There has been
no maintainer activity in this subsystem at all.

Since there is no other maintainer for this driver it becomes an Orphan.

Subsystem QLOGIC QL4xxx ETHERNET DRIVER
  Changes 40 / 74 (54%)
  (No activity)
  Top reviewers:
    [30]: horms@kernel.org
    [2]: jiri@nvidia.com
    [2]: shannon.nelson@amd.com
    [1]: saeedm@nvidia.com
    [1]: aleksandr.loktionov@intel.com
    [1]: kory.maincent@bootlin.com
  INACTIVE MAINTAINER Manish Chopra <manishc@marvell.com>

Link: https://patch.msgid.link/20260303215339.2333548-3-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-05 07:35:36 -08:00
Jakub Kicinski
f9b5bf12eb MAINTAINERS: remove Johan Hedberg from Bluetooth subsystem
We have not seen emails or tags from Johan in over 5 years,
and there is no recent mailing list activity.
Marcel Holtmann hasn't provided any tags in the Bluetooth
subsystem in over 5 years, but he is active on the Bluetooth
mailing list, providing informal review.
Luiz Augusto von Dentz is very active, handling essentially
all commits and reviews (12% coverage, but Luiz is the sole
active committer).

Subsystem BLUETOOTH SUBSYSTEM
  Changes 50 / 411 (12%)
  Last activity: 2026-02-23
  Marcel Holtmann <marcel@holtmann.org>:
  Johan Hedberg <johan.hedberg@gmail.com>:
  Luiz Augusto von Dentz <luiz.dentz@gmail.com>:
    Author 138d7eca44 2026-02-23 00:00:00 164
    Committer 138d7eca44 2026-02-23 00:00:00 361
    Tags 138d7eca44 2026-02-23 00:00:00 362
  Top reviewers:
    [15]: pmenzel@molgen.mpg.de
    [8]: keescook@chromium.org
    [5]: willemb@google.com
    [4]: horms@kernel.org
    [3]: kuniyu@amazon.com
    [3]: luiz.von.dentz@intel.com
  INACTIVE MAINTAINER Johan Hedberg <johan.hedberg@gmail.com>

Acked-by: Marcel Holtmann <marcel@holtmann.org>
Link: https://patch.msgid.link/20260303215339.2333548-2-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-05 07:35:35 -08:00
Sun Jian
c952291593 selftests: net: tun: don't abort XFAIL cases
The tun UDP tunnel GSO fixture contains XFAIL-marked variants intended to
exercise failure paths (e.g. EMSGSIZE / "Message too long").
Using ASSERT_EQ() in these tests aborts the subtest, which prevents the
harness from classifying them as XFAIL and can make the overall net: tun
test fail.

Switch the relevant ASSERT_EQ() checks to EXPECT_EQ() so the subtests
continue running and the failures are correctly reported and accounted
as XFAIL where applicable.

Signed-off-by: Sun Jian <sun.jian.kdev@gmail.com>
Link: https://patch.msgid.link/20260225111451.347923-2-sun.jian.kdev@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-05 07:34:55 -08:00
Sun Jian
6be2681514 selftests/harness: order TEST_F and XFAIL_ADD constructors
TEST_F() allocates and registers its struct __test_metadata via mmap()
inside its constructor, and only then assigns the
_##fixture_##test##_object pointer.

XFAIL_ADD() runs in a constructor too and reads
_##fixture_##test##_object to initialize xfail->test. If XFAIL_ADD runs
first, xfail->test can be NULL and the expected failure will be reported
as FAIL.

Use constructor priorities to ensure TEST_F registration runs before
XFAIL_ADD, without adding extra state or runtime lookups.

Fixes: 2709473c93 ("selftests: kselftest_harness: support using xfail")
Signed-off-by: Sun Jian <sun.jian.kdev@gmail.com>
Link: https://patch.msgid.link/20260225111451.347923-1-sun.jian.kdev@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-05 07:34:54 -08:00
Jakub Kicinski
37380976cf Merge tag 'nf-26-03-05' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf
Florian Westphal says:

====================
netfilter: updates for net

1) Inseo An reported a bug with the set element handling in nf_tables:
   When set cannot accept more elements, we unlink and immediately free
   an element that was inserted into a public data structure, freeing it
   without waiting for RCU grace period.  Fix this by doing the
   increment earlier and by deferring possible unlink-and-free to the
   existing abort path, which performs the needed synchronize_rcu before
   free.  From Pablo Neira Ayuso. This is an ancient bug, dating back to
   kernel 4.10.

2) syzbot reported WARN_ON() splat in nf_tables that occurs on memory
   allocation failure.  Fix this by a new iterator annotation:
   The affected walker does not need to clone the data structure and
   can just use the live version if no clone exists yet.
   Also from Pablo.  This bug existed since 6.10 days.

3) Ancient forever bug in nft_pipapo data structure:
   The garbage collection logic to remove expired elements is broken.
   We must unlink from data structure and can only hand the freeing
   to call_rcu after the clone/live pointers of the data structures
   have been swapped.  Else, readers can observe the free'd element.
   Reported by Yiming Qian.

* tag 'nf-26-03-05' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf:
  netfilter: nft_set_pipapo: split gc into unlink and reclaim phase
  netfilter: nf_tables: clone set on flush only
  netfilter: nf_tables: unconditionally bump set->nelems before insertion
====================

Link: https://patch.msgid.link/20260305122635.23525-1-fw@strlen.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-05 07:33:26 -08:00
Marc Zyngier
8531d5a83d KVM: arm64: pkvm: Fallback to level-3 mapping on host stage-2 fault
If, for any odd reason, we cannot converge to mapping size that is
completely contained in a memblock region, we fail to install a S2
mapping and go back to the faulting instruction. Rince, repeat.

This happens when faulting in regions that are smaller than a page
or that do not have PAGE_SIZE-aligned boundaries (as witnessed on
an O6 board that refuses to boot in protected mode).

In this situation, fallback to using a PAGE_SIZE mapping anyway --
it isn't like we can go any lower.

Fixes: e728e70580 ("KVM: arm64: Adjust range correctly during host stage-2 faults")
Link: https://lore.kernel.org/r/86wlzr77cn.wl-maz@kernel.org
Cc: stable@vger.kernel.org
Cc: Quentin Perret <qperret@google.com>
Reviewed-by: Quentin Perret <qperret@google.com>
Link: https://patch.msgid.link/20260305132751.2928138-1-maz@kernel.org
Signed-off-by: Marc Zyngier <maz@kernel.org>
2026-03-05 15:18:47 +00:00
Marc Zyngier
ac6769c8f9 KVM: arm64: Eagerly init vgic dist/redist on vgic creation
If vgic_allocate_private_irqs_locked() fails for any odd reason,
we exit kvm_vgic_create() early, leaving dist->rd_regions uninitialised.

kvm_vgic_dist_destroy() then comes along and walks into the weeds
trying to free the RDs. Got to love this stuff.

Solve it by moving all the static initialisation early, and make
sure that if we fail halfway, we're in a reasonable shape to
perform the rest of the teardown. While at it, reset the vgic model
on failure, just in case...

Reported-by: syzbot+f6a46b038fc243ac0175@syzkaller.appspotmail.com
Tested-by: syzbot+f6a46b038fc243ac0175@syzkaller.appspotmail.com
Fixes: b3aa9283c0 ("KVM: arm64: vgic: Hoist SGI/PPI alloc from vgic_init() to kvm_create_vgic()")
Link: https://lore.kernel.org/r/69a2d58c.050a0220.3a55be.003b.GAE@google.com
Link: https://patch.msgid.link/20260228164559.936268-1-maz@kernel.org
Signed-off-by: Marc Zyngier <maz@kernel.org>
Cc: stable@vger.kernel.org
2026-03-05 15:18:38 +00:00
Jerome Marchand
f26b098d93 ftrace: Add MAINTAINERS entries for all ftrace headers
There is currently no entry for ftrace_irq.h and ftrace_regs.h. Add a
generic entry for all *ftrace* headers to include them and prevent
overlooking future ftrace headers.

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Link: https://patch.msgid.link/20260305093117.853700-1-jmarchan@redhat.com
Signed-off-by: Jerome Marchand <jmarchan@redhat.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2026-03-05 10:17:31 -05:00
Lorenzo Bianconi
0abc73c8a4 net: ethernet: mtk_eth_soc: Reset prog ptr to old_prog in case of error in mtk_xdp_setup()
Reset eBPF program pointer to old_prog and do not decrease its ref-count
if mtk_open routine in mtk_xdp_setup() fails.

Fixes: 7c26c20da5 ("net: ethernet: mtk_eth_soc: add basic XDP support")
Suggested-by: Paolo Valerio <pvalerio@redhat.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://patch.msgid.link/20260303-mtk-xdp-prog-ptr-fix-v2-1-97b6dbbe240f@kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-05 15:39:51 +01:00
Maarten Lankhorst
ee8ade4d96 Revert "drm/syncobj: Fix handle <-> fd ioctls with dirty stack"
This reverts commit 2e3649e237.

The problem occurs when userspace is compiled against new headers
with new members, but don't correctly initialise those new members.

This is not a kernel problem, and should be fixed in userspace by
correctly zero'ing all members.

Cc: Rob Clark <robdclark@chromium.org>
Cc: Julian Orth <ju.orth@gmail.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Michel Dänzer <michel.daenzer@mailbox.org>
Reviewed-by: Christian König <christian.koenig@amd.com>
Acked-by: Julian Orth <ju.orth@gmail.com>
Link: https://patch.msgid.link/20260305113734.1309238-1-dev@lankhorst.se
Signed-off-by: Maarten Lankhorst <dev@lankhorst.se>
2026-03-05 15:16:48 +01:00
Johan Hovold
3d543d9515 ALSA: us122l: drop redundant interface references
Driver core holds a reference to the USB interface and its parent USB
device while the interface is bound to a driver and there is no need to
take additional references unless the structures are needed after
disconnect.

Similarly, USB core holds a reference to all interfaces in the active
configuration so there is no need for a driver to take a reference to a
sibling interface only to release it at disconnect either.

Drop the redundant references to reduce cargo culting, make it easier to
spot drivers where extra references are needed, and reduce the risk of
memory leaks when drivers fail to release them.

Signed-off-by: Johan Hovold <johan@kernel.org>
Link: https://patch.msgid.link/20260305111810.18688-1-johan@kernel.org
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-03-05 14:00:55 +01:00
Florian Westphal
9df95785d3 netfilter: nft_set_pipapo: split gc into unlink and reclaim phase
Yiming Qian reports Use-after-free in the pipapo set type:
  Under a large number of expired elements, commit-time GC can run for a very
  long time in a non-preemptible context, triggering soft lockup warnings and
  RCU stall reports (local denial of service).

We must split GC in an unlink and a reclaim phase.

We cannot queue elements for freeing until pointers have been swapped.
Expired elements are still exposed to both the packet path and userspace
dumpers via the live copy of the data structure.

call_rcu() does not protect us: dump operations or element lookups starting
after call_rcu has fired can still observe the free'd element, unless the
commit phase has made enough progress to swap the clone and live pointers
before any new reader has picked up the old version.

This a similar approach as done recently for the rbtree backend in commit
35f83a7552 ("netfilter: nft_set_rbtree: don't gc elements on insert").

Fixes: 3c4287f620 ("nf_tables: Add set type for arbitrary concatenation of ranges")
Reported-by: Yiming Qian <yimingqian591@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
2026-03-05 13:22:37 +01:00
Pablo Neira Ayuso
fb7fb40163 netfilter: nf_tables: clone set on flush only
Syzbot with fault injection triggered a failing memory allocation with
GFP_KERNEL which results in a WARN splat:

iter.err
WARNING: net/netfilter/nf_tables_api.c:845 at nft_map_deactivate+0x34e/0x3c0 net/netfilter/nf_tables_api.c:845, CPU#0: syz.0.17/5992
Modules linked in:
CPU: 0 UID: 0 PID: 5992 Comm: syz.0.17 Not tainted syzkaller #0 PREEMPT(full)
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2026
RIP: 0010:nft_map_deactivate+0x34e/0x3c0 net/netfilter/nf_tables_api.c:845
Code: 8b 05 86 5a 4e 09 48 3b 84 24 a0 00 00 00 75 62 48 8d 65 d8 5b 41 5c 41 5d 41 5e 41 5f 5d c3 cc cc cc cc cc e8 63 6d fa f7 90 <0f> 0b 90 43
+80 7c 35 00 00 0f 85 23 fe ff ff e9 26 fe ff ff 89 d9
RSP: 0018:ffffc900045af780 EFLAGS: 00010293
RAX: ffffffff89ca45bd RBX: 00000000fffffff4 RCX: ffff888028111e40
RDX: 0000000000000000 RSI: 00000000fffffff4 RDI: 0000000000000000
RBP: ffffc900045af870 R08: 0000000000400dc0 R09: 00000000ffffffff
R10: dffffc0000000000 R11: fffffbfff1d141db R12: ffffc900045af7e0
R13: 1ffff920008b5f24 R14: dffffc0000000000 R15: ffffc900045af920
FS:  000055557a6a5500(0000) GS:ffff888125496000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007fb5ea271fc0 CR3: 000000003269e000 CR4: 00000000003526f0
Call Trace:
 <TASK>
 __nft_release_table+0xceb/0x11f0 net/netfilter/nf_tables_api.c:12115
 nft_rcv_nl_event+0xc25/0xdb0 net/netfilter/nf_tables_api.c:12187
 notifier_call_chain+0x19d/0x3a0 kernel/notifier.c:85
 blocking_notifier_call_chain+0x6a/0x90 kernel/notifier.c:380
 netlink_release+0x123b/0x1ad0 net/netlink/af_netlink.c:761
 __sock_release net/socket.c:662 [inline]
 sock_close+0xc3/0x240 net/socket.c:1455

Restrict set clone to the flush set command in the preparation phase.
Add NFT_ITER_UPDATE_CLONE and use it for this purpose, update the rbtree
and pipapo backends to only clone the set when this iteration type is
used.

As for the existing NFT_ITER_UPDATE type, update the pipapo backend to
use the existing set clone if available, otherwise use the existing set
representation. After this update, there is no need to clone a set that
is being deleted, this includes bound anonymous set.

An alternative approach to NFT_ITER_UPDATE_CLONE is to add a .clone
interface and call it from the flush set path.

Reported-by: syzbot+4924a0edc148e8b4b342@syzkaller.appspotmail.com
Fixes: 3f1d886cc7 ("netfilter: nft_set_pipapo: move cloning of match info to insert/removal path")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
2026-03-05 13:22:37 +01:00
Pablo Neira Ayuso
def602e498 netfilter: nf_tables: unconditionally bump set->nelems before insertion
In case that the set is full, a new element gets published then removed
without waiting for the RCU grace period, while RCU reader can be
walking over it already.

To address this issue, add the element transaction even if set is full,
but toggle the set_full flag to report -ENFILE so the abort path safely
unwinds the set to its previous state.

As for element updates, decrement set->nelems to restore it.

A simpler fix is to call synchronize_rcu() in the error path.
However, with a large batch adding elements to already maxed-out set,
this could cause noticeable slowdown of such batches.

Fixes: 35d0ac9070 ("netfilter: nf_tables: fix set->nelems counting with no NLM_F_EXCL")
Reported-by: Inseo An <y0un9sa@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
2026-03-05 13:22:37 +01:00
Sebastian Andrzej Siewior
b824c3e16c net: Provide a PREEMPT_RT specific check for netdev_queue::_xmit_lock
After acquiring netdev_queue::_xmit_lock the number of the CPU owning
the lock is recorded in netdev_queue::xmit_lock_owner. This works as
long as the BH context is not preemptible.

On PREEMPT_RT the softirq context is preemptible and without the
softirq-lock it is possible to have multiple user in __dev_queue_xmit()
submitting a skb on the same CPU. This is fine in general but this means
also that the current CPU is recorded as netdev_queue::xmit_lock_owner.
This in turn leads to the recursion alert and the skb is dropped.

Instead checking the for CPU number, that owns the lock, PREEMPT_RT can
check if the lockowner matches the current task.

Add netif_tx_owned() which returns true if the current context owns the
lock by comparing the provided CPU number with the recorded number. This
resembles the current check by negating the condition (the current check
returns true if the lock is not owned).
On PREEMPT_RT use rt_mutex_owner() to return the lock owner and compare
the current task against it.
Use the new helper in __dev_queue_xmit() and netif_local_xmit_active()
which provides a similar check.
Update comments regarding pairing READ_ONCE().

Reported-by: Bert Karwatzki <spasswolf@web.de>
Closes: https://lore.kernel.org/all/20260216134333.412332-1-spasswolf@web.de
Fixes: 3253cb49cb ("softirq: Allow to drop the softirq-BKL lock on PREEMPT_RT")
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reported-by: Bert Karwatzki <spasswolf@web.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: https://patch.msgid.link/20260302162631.uGUyIqDT@linutronix.de
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-05 12:14:21 +01:00
Ming Lei
ce8ee8583e block: use trylock to avoid lockdep circular dependency in sysfs
Use trylock instead of blocking lock acquisition for update_nr_hwq_lock
in queue_requests_store() and elv_iosched_store() to avoid circular lock
dependency with kernfs active reference during concurrent disk deletion:

  update_nr_hwq_lock -> kn->active (via del_gendisk -> kobject_del)
  kn->active -> update_nr_hwq_lock (via sysfs write path)

Return -EBUSY when the lock is not immediately available.

Reported-and-tested-by: Yi Zhang <yi.zhang@redhat.com>
Closes: https://lore.kernel.org/linux-block/CAHj4cs-em-4acsHabMdT=jJhXkCzjnprD-aQH1OgrZo4nTnmMw@mail.gmail.com/
Fixes: 626ff4f8eb ("blk-mq: convert to serialize updating nr_requests with update_nr_hwq_lock")
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Tested-by: Yi Zhang <yi.zhang@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2026-03-05 04:01:42 -07:00
Kamal Heib
c242e92c9d RDMA/bng_re: Fix silent failure in HWRM version query
If the firmware version query fails, the driver currently ignores the
error and continues initializing. This leaves the device in a bad state.

Fix this by making bng_re_query_hwrm_version() return the error code and
update the driver to check for this error and stop the setup process
safely if it happens.

Fixes: 745065770c ("RDMA/bng_re: Register and get the resources from bnge driver")
Signed-off-by: Kamal Heib <kheib@redhat.com>
Link: https://patch.msgid.link/20260303043645.425724-1-kheib@redhat.com
Reviewed-by: Siva Reddy Kallam <siva.kallam@broadcom.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
2026-03-05 04:34:31 -05:00
hongao
281cb17787 xfs: Remove redundant NULL check after __GFP_NOFAIL
kzalloc() is called with __GFP_NOFAIL, so a NULL return is not expected.
Drop the redundant !map check in xfs_dabuf_map().
Also switch the nirecs-sized allocation to kcalloc().

Signed-off-by: hongao <hongao@uniontech.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-03-05 10:02:45 +01:00
Thadeu Lima de Souza Cascardo
91d7e9df42 drm/ttm: Fix bo resource use-after-free
When allocating a lot of buffers and putting the TTM under memory pressure,
during swapout, it might crash the system with the stack trace below.

It turns out that ttm_bo_swapout_cb might replace bo->resource when it
moves it to system cached.

When commit c06da4b357 ("drm/ttm: Tidy usage of local variables a little
bit") used a local variable for bo->resource, it used the freed resource
later in the function, leading to a UAF.

Move back to using bo->resource in all cases in that function instead of a
local variable.

[  604.814275] BUG: kernel NULL pointer dereference, address: 0000000000000000
[  604.814284] #PF: supervisor read access in kernel mode
[  604.814288] #PF: error_code(0x0000) - not-present page
[  604.814291] PGD 0 P4D 0
[  604.814296] Oops: Oops: 0000 [#1] SMP NOPTI
[  604.814303] CPU: 2 UID: 0 PID: 4408 Comm: vulkan Tainted: G        W           7.0.0-rc2-00001-gc50a051e6aca #21 PREEMPT(full)  aef6eb0c02036a7c8a5e62e0c84a30c2be90688d
[  604.814309] Tainted: [W]=WARN
[  604.814311] Hardware name: Valve Jupiter/Jupiter, BIOS F7A0133 08/05/2024
[  604.814314] RIP: 0010:ttm_resource_move_to_lru_tail+0x100/0x160 [ttm]
[  604.814329] Code: 5b 5d e9 83 b4 1b cb 48 63 d2 48 c1 e0 04 48 8b 4e 40 48 8d 7e 40 48 8b ac d3 d8 00 00 00 48 89 c3 48 8d 54 05 68 48 8b 46 48 <48> 3b 38 0f 85 b3 3b 00 00 48 3b 79 08 0f 85 a9 3b 00 00 48 89 41
[  604.814332] RSP: 0018:ffffcfe54e3d7578 EFLAGS: 00010256
[  604.814336] RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff8cf09eced300
[  604.814339] RDX: 0000000000000068 RSI: ffff8cf1d4c1fc00 RDI: ffff8cf1d4c1fc40
[  604.814341] RBP: 0000000000000000 R08: ffff8cf09eced300 R09: 0000000000000000
[  604.814344] R10: 0000000000000000 R11: 0000000000000016 R12: ffff8cf1d4c1fc00
[  604.814346] R13: 0000000000000400 R14: ffff8cf096289c00 R15: ffff8cf084c8f688
[  604.814349] FS:  00007f00531b7780(0000) GS:ffff8cf4217a0000(0000) knlGS:0000000000000000
[  604.814352] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  604.814355] CR2: 0000000000000000 CR3: 000000018e3df000 CR4: 0000000000350ef0
[  604.814358] Call Trace:
[  604.814362]  <TASK>
[  604.814368]  ttm_bo_swapout_cb+0x24c/0x280 [ttm a469cf7fcb6737fdcf3fb5cdbcc8b1ca41f3e302]
[  604.814380]  ttm_lru_walk_for_evict+0xac/0x1d0 [ttm a469cf7fcb6737fdcf3fb5cdbcc8b1ca41f3e302]
[  604.814394]  ttm_bo_swapout+0x5b/0x80 [ttm a469cf7fcb6737fdcf3fb5cdbcc8b1ca41f3e302]
[  604.814405]  ttm_global_swapout+0x63/0x100 [ttm a469cf7fcb6737fdcf3fb5cdbcc8b1ca41f3e302]
[  604.814415]  ttm_tt_populate+0x82/0x130 [ttm a469cf7fcb6737fdcf3fb5cdbcc8b1ca41f3e302]
[  604.814424]  ttm_bo_populate+0x37/0xa0 [ttm a469cf7fcb6737fdcf3fb5cdbcc8b1ca41f3e302]
[  604.814433]  ttm_bo_handle_move_mem+0x157/0x170 [ttm a469cf7fcb6737fdcf3fb5cdbcc8b1ca41f3e302]
[  604.814443]  ttm_bo_validate+0xd9/0x180 [ttm a469cf7fcb6737fdcf3fb5cdbcc8b1ca41f3e302]
[  604.814453]  ttm_bo_init_reserved+0xa0/0x1b0 [ttm a469cf7fcb6737fdcf3fb5cdbcc8b1ca41f3e302]
[  604.814461]  ? srso_return_thunk+0x5/0x5f
[  604.814469]  amdgpu_bo_create+0x1f5/0x500 [amdgpu 361516226706227f4403914dbfdd3f90996136ca]
[  604.814855]  ? __pfx_amdgpu_bo_user_destroy+0x10/0x10 [amdgpu 361516226706227f4403914dbfdd3f90996136ca]
[  604.815182]  amdgpu_bo_create_user+0x3d/0x70 [amdgpu 361516226706227f4403914dbfdd3f90996136ca]
[  604.815504]  amdgpu_gem_create_ioctl+0x16c/0x3b0 [amdgpu 361516226706227f4403914dbfdd3f90996136ca]
[  604.815830]  ? __pfx_amdgpu_bo_user_destroy+0x10/0x10 [amdgpu 361516226706227f4403914dbfdd3f90996136ca]
[  604.816155]  ? __pfx_amdgpu_gem_create_ioctl+0x10/0x10 [amdgpu 361516226706227f4403914dbfdd3f90996136ca]
[  604.816478]  drm_ioctl_kernel+0xae/0x100
[  604.816486]  drm_ioctl+0x283/0x510
[  604.816491]  ? __pfx_amdgpu_gem_create_ioctl+0x10/0x10 [amdgpu 361516226706227f4403914dbfdd3f90996136ca]
[  604.816819]  amdgpu_drm_ioctl+0x4a/0x80 [amdgpu 361516226706227f4403914dbfdd3f90996136ca]
[  604.817135]  __x64_sys_ioctl+0x96/0xe0
[  604.817142]  do_syscall_64+0x11b/0x7e0
[  604.817148]  ? srso_return_thunk+0x5/0x5f
[  604.817152]  ? srso_return_thunk+0x5/0x5f
[  604.817156]  ? walk_system_ram_range+0xb0/0x110
[  604.817161]  ? srso_return_thunk+0x5/0x5f
[  604.817165]  ? __pte_offset_map+0x1b/0xb0
[  604.817170]  ? srso_return_thunk+0x5/0x5f
[  604.817174]  ? pte_offset_map_lock+0x87/0xf0
[  604.817179]  ? srso_return_thunk+0x5/0x5f
[  604.817183]  ? insert_pfn+0x9f/0x1f0
[  604.817188]  ? srso_return_thunk+0x5/0x5f
[  604.817192]  ? vmf_insert_pfn_prot+0x97/0x190
[  604.817197]  ? srso_return_thunk+0x5/0x5f
[  604.817201]  ? ttm_bo_vm_fault_reserved+0x1a6/0x3f0 [ttm a469cf7fcb6737fdcf3fb5cdbcc8b1ca41f3e302]
[  604.817213]  ? srso_return_thunk+0x5/0x5f
[  604.817217]  ? amdgpu_gem_fault+0xe2/0x100 [amdgpu 361516226706227f4403914dbfdd3f90996136ca]
[  604.817542]  ? srso_return_thunk+0x5/0x5f
[  604.817546]  ? __do_fault+0x33/0x180
[  604.817550]  ? srso_return_thunk+0x5/0x5f
[  604.817554]  ? do_fault+0x178/0x610
[  604.817559]  ? srso_return_thunk+0x5/0x5f
[  604.817562]  ? __handle_mm_fault+0x9be/0x1120
[  604.817567]  ? srso_return_thunk+0x5/0x5f
[  604.817574]  ? srso_return_thunk+0x5/0x5f
[  604.817578]  ? count_memcg_events+0xc4/0x160
[  604.817583]  ? srso_return_thunk+0x5/0x5f
[  604.817587]  ? handle_mm_fault+0x1d7/0x2e0
[  604.817593]  ? srso_return_thunk+0x5/0x5f
[  604.817596]  ? do_user_addr_fault+0x173/0x660
[  604.817602]  ? srso_return_thunk+0x5/0x5f
[  604.817607]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
[  604.817612] RIP: 0033:0x7f00532cef4d
[  604.817617] Code: 04 25 28 00 00 00 48 89 45 c8 31 c0 48 8d 45 10 c7 45 b0 10 00 00 00 48 89 45 b8 48 8d 45 d0 48 89 45 c0 b8 10 00 00 00 0f 05 <89> c2 3d 00 f0 ff ff 77 1a 48 8b 45 c8 64 48 2b 04 25 28 00 00 00
[  604.817620] RSP: 002b:00007ffd69ab0650 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
[  604.817624] RAX: ffffffffffffffda RBX: 00007ffd69ab07d0 RCX: 00007f00532cef4d
[  604.817627] RDX: 00007ffd69ab0700 RSI: 00000000c0206440 RDI: 0000000000000005
[  604.817629] RBP: 00007ffd69ab06a0 R08: 00007f00533a0ac0 R09: 0000000000000000
[  604.817632] R10: 00007ffd69ab07c0 R11: 0000000000000246 R12: 00007ffd69ab0700
[  604.817634] R13: 00000000c0206440 R14: 0000000000000005 R15: 0000000000000243
[  604.817642]  </TASK>

Cc: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Cc: Christian König <christian.koenig@amd.com>
Fixes: c06da4b357 ("drm/ttm: Tidy usage of local variables a little bit")
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@igalia.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Signed-off-by: Tvrtko Ursulin <tursulin@ursulin.net>
Link: https://lore.kernel.org/r/20260304-ttm_bo_res_uaf-v1-1-43f20125b67f@igalia.com
2026-03-05 08:12:36 +00:00
Jakub Kicinski
ae779bcb18 Merge branch 'net-stmmac-fix-vlan-handling-when-interface-is-down'
Ovidiu Panait says:

====================
net: stmmac: Fix VLAN handling when interface is down

VLAN register accesses on the MAC side require the PHY RX clock to be
active. When the network interface is down, the PHY is suspended and
the RX clock is unavailable, causing VLAN operations to fail with
timeouts.

The VLAN core automatically removes VID 0 after the interface goes down
and re-adds it when it comes back up, so these timeouts happen during
normal interface down/up:

    # ip link set end1 down
    renesas-gbeth 15c40000.ethernet end1: Timeout accessing MAC_VLAN_Tag_Filter
    renesas-gbeth 15c40000.ethernet end1: failed to kill vid 0081/0

Adding VLANs while the interface is down also fails:

    # ip link add link end1 name end1.10 type vlan id 10
    renesas-gbeth 15c40000.ethernet end1: Timeout accessing MAC_VLAN_Tag_Filter
    RTNETLINK answers: Device or resource busy

Patch 4 fixes this by adding checks in the VLAN paths for netif_running(),
and skipping register accesses if the interface is down. Only the software
state is updated in this case. When the interface is brought up, the VLAN
state is restored to hardware.

Patches 1-3 fix some issues in the existing VLAN implementation.
====================

Link: https://patch.msgid.link/20260303145828.7845-1-ovidiu.panait.rb@renesas.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-04 18:48:51 -08:00
Ovidiu Panait
2cd70e3968 net: stmmac: Defer VLAN HW configuration when interface is down
VLAN register accesses on the MAC side require the PHY RX clock to be
active. When the network interface is down, the PHY is suspended and
the RX clock is unavailable, causing VLAN operations to fail with
timeouts.

The VLAN core automatically removes VID 0 after the interface goes down
and re-adds it when it comes back up, so these timeouts happen during
normal interface down/up:

    # ip link set end1 down
    renesas-gbeth 15c40000.ethernet end1: Timeout accessing MAC_VLAN_Tag_Filter
    renesas-gbeth 15c40000.ethernet end1: failed to kill vid 0081/0

Adding VLANs while the interface is down also fails:

    # ip link add link end1 name end1.10 type vlan id 10
    renesas-gbeth 15c40000.ethernet end1: Timeout accessing MAC_VLAN_Tag_Filter
    RTNETLINK answers: Device or resource busy

To fix this, check if the interface is up before accessing VLAN registers.
The software state is always kept up to date regardless of interface state.

When the interface is brought up, stmmac_vlan_restore() is called
to write the VLAN state to hardware.

Fixes: ed64639bc1 ("net: stmmac: Add support for VLAN Rx filtering")
Signed-off-by: Ovidiu Panait <ovidiu.panait.rb@renesas.com>
Link: https://patch.msgid.link/20260303145828.7845-5-ovidiu.panait.rb@renesas.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-04 18:48:49 -08:00
Ovidiu Panait
bd7ad51253 net: stmmac: Fix VLAN HW state restore
When the network interface is opened or resumed, a DMA reset is performed,
which resets all hardware state, including VLAN state. Currently, only
the resume path is restoring the VLAN state via
stmmac_restore_hw_vlan_rx_fltr(), but that is incomplete: the VLAN hash
table and the VLAN_TAG control bits are not restored.

Therefore, add stmmac_vlan_restore(), which restores the full VLAN
state by updating both the HW filter entries and the hash table, and
call it from both the open and resume paths.

The VLAN restore is moved outside of phylink_rx_clk_stop_block/unblock
in the resume path because receive clock stop is already disabled when
stmmac supports VLAN.

Also, remove the hash readback code in vlan_restore_hw_rx_fltr() that
attempts to restore VTHM by reading VLAN_HASH_TABLE, as it always reads
zero after DMA reset, making it dead code.

Fixes: 3cd1cfcba2 ("net: stmmac: Implement VLAN Hash Filtering in XGMAC")
Fixes: ed64639bc1 ("net: stmmac: Add support for VLAN Rx filtering")
Signed-off-by: Ovidiu Panait <ovidiu.panait.rb@renesas.com>
Link: https://patch.msgid.link/20260303145828.7845-4-ovidiu.panait.rb@renesas.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-04 18:48:49 -08:00
Ovidiu Panait
e38200e361 net: stmmac: Improve double VLAN handling
The double VLAN bits (EDVLP, ESVL, DOVLTC) are handled inconsistently
between the two vlan_update_hash() implementations:

- dwxgmac2_update_vlan_hash() explicitly clears the double VLAN bits when
is_double is false, meaning that adding a 802.1Q VLAN will disable
double VLAN mode:

  $ ip link add link eth0 name eth0.200 type vlan id 200 protocol 802.1ad
  $ ip link add link eth0 name eth0.100 type vlan id 100
  # Double VLAN bits no longer set

- vlan_update_hash() sets these bits and only clears them when the last
VLAN has been removed, so double VLAN mode remains enabled even after all
802.1AD VLANs are removed.

Address both issues by tracking the number of active 802.1AD VLANs in
priv->num_double_vlans. Pass this count to stmmac_vlan_update() so both
implementations correctly set the double VLAN bits when any 802.1AD
VLAN is active, and clear them only when none remain.

Also update vlan_update_hash() to explicitly clear the double VLAN bits
when is_double is false, matching the dwxgmac2 behavior.

Signed-off-by: Ovidiu Panait <ovidiu.panait.rb@renesas.com>
Link: https://patch.msgid.link/20260303145828.7845-3-ovidiu.panait.rb@renesas.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-04 18:48:49 -08:00
Ovidiu Panait
35dfedce44 net: stmmac: Fix error handling in VLAN add and delete paths
stmmac_vlan_rx_add_vid() updates active_vlans and the VLAN hash
register before writing the HW filter entry. If the filter write
fails, it leaves a stale VID in active_vlans and the hash register.

stmmac_vlan_rx_kill_vid() has the reverse problem: it clears
active_vlans before removing the HW filter. On failure, the VID is
gone from active_vlans but still present in the HW filter table.

To fix this, reorder the operations to update the hash table first,
then attempt the HW filter operation. If the HW filter fails, roll
back both the active_vlans bitmap and the hash table by calling
stmmac_vlan_update() again.

Fixes: ed64639bc1 ("net: stmmac: Add support for VLAN Rx filtering")
Signed-off-by: Ovidiu Panait <ovidiu.panait.rb@renesas.com>
Link: https://patch.msgid.link/20260303145828.7845-2-ovidiu.panait.rb@renesas.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-04 18:48:48 -08:00
Jakub Kicinski
550921c67b Merge branch '100GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue
Tony Nguyen says:

====================
Intel Wired LAN Driver Updates 2026-03-03 (ice, libie, iavf, igb, igc)

Larysa removes VF restriction for LLDP filters on ice to allow for LLDP
traffic to reach the correct destination.

Jakub adds retry mechanism for AdminQ Read/Write SFF EEPROM call to
follow hardware specification on ice.

Zilin Guan adds cleanup path to free XDP rings on failure in
ice_set_ringparam().

Michal bypasses firmware logging unroll in libie when it isn't supported.

Kohei Enju fixes iavf to take into account hardware MTU support when
setting max MTU values.

Vivek Behera fixes issues on igb and igc using incorrect IRQs when Tx/Rx
queues do not share the same IRQ.

* '100GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue:
  igc: Fix trigger of incorrect irq in igc_xsk_wakeup function
  igb: Fix trigger of incorrect irq in igb_xsk_wakeup
  iavf: fix netdev->max_mtu to respect actual hardware limit
  libie: don't unroll if fwlog isn't supported
  ice: Fix memory leak in ice_set_ringparam()
  ice: fix retry for AQ command 0x06EE
  ice: reintroduce retry mechanism for indirect AQ
  ice: fix adding AQ LLDP filter for VF
====================

Link: https://patch.msgid.link/20260303231155.2895065-1-anthony.l.nguyen@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-04 18:35:35 -08:00
Jakub Kicinski
ff2c591625 Merge branch 'mptcp-misc-fixes-for-v7-0-rc2'
Matthieu Baerts says:

====================
mptcp: misc fixes for v7.0-rc2

Here are various unrelated fixes:

- Patch 1: avoid bufferbloat in simult_flows selftest which can cause
  instabilities. A fix for v5.10.

- Patches 2-3: reduce RM_ADDR lost by not sending it over the same
  subflow as the one being removed, if possible. A fix for v5.13.

- Patches 4-5: avoid a WARN when using signal + subflow endpoints with a
  subflow limit of 0, and removing such endpoints during an active
  connection. A fix for v5.17.
====================

Link: https://patch.msgid.link/20260303-net-mptcp-misc-fixes-7-0-rc2-v1-0-4b5462b6f016@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-04 18:21:16 -08:00
Matthieu Baerts (NGI0)
1777f349ff selftests: mptcp: join: check removing signal+subflow endp
This validates the previous commit: endpoints with both the signal and
subflow flags should always be marked as used even if it was not
possible to create new subflows due to the MPTCP PM limits.

For this test, an extra endpoint is created with both the signal and the
subflow flags, and limits are set not to create extra subflows. In this
case, an ADD_ADDR is sent, but no subflows are created. Still, the local
endpoint is marked as used, and no warning is fired when removing the
endpoint, after having sent a RM_ADDR.

The 'Fixes' tag here below is the same as the one from the previous
commit: this patch here is not fixing anything wrong in the selftests,
but it validates the previous fix for an issue introduced by this commit
ID.

Fixes: 85df533a78 ("mptcp: pm: do not ignore 'subflow' if 'signal' flag is also set")
Cc: stable@vger.kernel.org
Reviewed-by: Mat Martineau <martineau@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20260303-net-mptcp-misc-fixes-7-0-rc2-v1-5-4b5462b6f016@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-04 18:21:13 -08:00
Matthieu Baerts (NGI0)
579a752464 mptcp: pm: in-kernel: always mark signal+subflow endp as used
Syzkaller managed to find a combination of actions that was generating
this warning:

  msk->pm.local_addr_used == 0
  WARNING: net/mptcp/pm_kernel.c:1071 at __mark_subflow_endp_available net/mptcp/pm_kernel.c:1071 [inline], CPU#1: syz.2.17/961
  WARNING: net/mptcp/pm_kernel.c:1071 at mptcp_nl_remove_subflow_and_signal_addr net/mptcp/pm_kernel.c:1103 [inline], CPU#1: syz.2.17/961
  WARNING: net/mptcp/pm_kernel.c:1071 at mptcp_pm_nl_del_addr_doit+0x81d/0x8f0 net/mptcp/pm_kernel.c:1210, CPU#1: syz.2.17/961
  Modules linked in:
  CPU: 1 UID: 0 PID: 961 Comm: syz.2.17 Not tainted 6.19.0-08368-gfafda3b4b06b #22 PREEMPT(full)
  Hardware name: QEMU Ubuntu 25.10 PC v2 (i440FX + PIIX, + 10.1 machine, 1996), BIOS 1.17.0-debian-1.17.0-1build1 04/01/2014
  RIP: 0010:__mark_subflow_endp_available net/mptcp/pm_kernel.c:1071 [inline]
  RIP: 0010:mptcp_nl_remove_subflow_and_signal_addr net/mptcp/pm_kernel.c:1103 [inline]
  RIP: 0010:mptcp_pm_nl_del_addr_doit+0x81d/0x8f0 net/mptcp/pm_kernel.c:1210
  Code: 89 c5 e8 46 30 6f fe e9 21 fd ff ff 49 83 ed 80 e8 38 30 6f fe 4c 89 ef be 03 00 00 00 e8 db 49 df fe eb ac e8 24 30 6f fe 90 <0f> 0b 90 e9 1d ff ff ff e8 16 30 6f fe eb 05 e8 0f 30 6f fe e8 9a
  RSP: 0018:ffffc90001663880 EFLAGS: 00010293
  RAX: ffffffff82de1a6c RBX: 0000000000000000 RCX: ffff88800722b500
  RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
  RBP: ffff8880158b22d0 R08: 0000000000010425 R09: ffffffffffffffff
  R10: ffffffff82de18ba R11: 0000000000000000 R12: ffff88800641a640
  R13: ffff8880158b1880 R14: ffff88801ec3c900 R15: ffff88800641a650
  FS:  00005555722c3500(0000) GS:ffff8880f909d000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 00007f66346e0f60 CR3: 000000001607c000 CR4: 0000000000350ef0
  Call Trace:
   <TASK>
   genl_family_rcv_msg_doit+0x117/0x180 net/netlink/genetlink.c:1115
   genl_family_rcv_msg net/netlink/genetlink.c:1195 [inline]
   genl_rcv_msg+0x3a8/0x3f0 net/netlink/genetlink.c:1210
   netlink_rcv_skb+0x16d/0x240 net/netlink/af_netlink.c:2550
   genl_rcv+0x28/0x40 net/netlink/genetlink.c:1219
   netlink_unicast_kernel net/netlink/af_netlink.c:1318 [inline]
   netlink_unicast+0x3e9/0x4c0 net/netlink/af_netlink.c:1344
   netlink_sendmsg+0x4aa/0x5b0 net/netlink/af_netlink.c:1894
   sock_sendmsg_nosec net/socket.c:727 [inline]
   __sock_sendmsg+0xc9/0xf0 net/socket.c:742
   ____sys_sendmsg+0x272/0x3b0 net/socket.c:2592
   ___sys_sendmsg+0x2de/0x320 net/socket.c:2646
   __sys_sendmsg net/socket.c:2678 [inline]
   __do_sys_sendmsg net/socket.c:2683 [inline]
   __se_sys_sendmsg net/socket.c:2681 [inline]
   __x64_sys_sendmsg+0x110/0x1a0 net/socket.c:2681
   do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
   do_syscall_64+0x143/0x440 arch/x86/entry/syscall_64.c:94
   entry_SYSCALL_64_after_hwframe+0x77/0x7f
  RIP: 0033:0x7f66346f826d
  Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 e8 ff ff ff f7 d8 64 89 01 48
  RSP: 002b:00007ffc83d8bdc8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
  RAX: ffffffffffffffda RBX: 00007f6634985fa0 RCX: 00007f66346f826d
  RDX: 00000000040000b0 RSI: 0000200000000740 RDI: 0000000000000007
  RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000
  R10: 0000000000000000 R11: 0000000000000246 R12: 00007f6634985fa8
  R13: 00007f6634985fac R14: 0000000000000000 R15: 0000000000001770
   </TASK>

The actions that caused that seem to be:

 - Set the MPTCP subflows limit to 0
 - Create an MPTCP endpoint with both the 'signal' and 'subflow' flags
 - Create a new MPTCP connection from a different address: an ADD_ADDR
   linked to the MPTCP endpoint will be sent ('signal' flag), but no
   subflows is initiated ('subflow' flag)
 - Remove the MPTCP endpoint

In this case, msk->pm.local_addr_used has been kept to 0 -- because no
subflows have been created -- but the corresponding bit in
msk->pm.id_avail_bitmap has been cleared when the ADD_ADDR has been
sent. This later causes a splat when removing the MPTCP endpoint because
msk->pm.local_addr_used has been kept to 0.

Now, if an endpoint has both the signal and subflow flags, but it is not
possible to create subflows because of the limits or the c-flag case,
then the local endpoint counter is still incremented: the endpoint is
used at the end. This avoids issues later when removing the endpoint and
calling __mark_subflow_endp_available(), which expects
msk->pm.local_addr_used to have been previously incremented if the
endpoint was marked as used according to msk->pm.id_avail_bitmap.

Note that signal_and_subflow variable is reset to false when the limits
and the c-flag case allows subflows creation. Also, local_addr_used is
only incremented for non ID0 subflows.

Fixes: 85df533a78 ("mptcp: pm: do not ignore 'subflow' if 'signal' flag is also set")
Cc: stable@vger.kernel.org
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/613
Reviewed-by: Mat Martineau <martineau@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20260303-net-mptcp-misc-fixes-7-0-rc2-v1-4-4b5462b6f016@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-04 18:21:13 -08:00
Matthieu Baerts (NGI0)
560edd99b5 selftests: mptcp: join: check RM_ADDR not sent over same subflow
This validates the previous commit: RM_ADDR were sent over the first
found active subflow which could be the same as the one being removed.
It is more likely to loose this notification.

For this check, RM_ADDR are explicitly dropped when trying to send them
over the initial subflow, when removing the endpoint attached to it. If
it is dropped, the test will complain because some RM_ADDR have not been
received.

Note that only the RM_ADDR are dropped, to allow the linked subflow to
be quickly and cleanly closed. To only drop those RM_ADDR, a cBPF byte
code is used. If the IPTables commands fail, that's OK, the tests will
continue to pass, but not validate this part. This can be ignored:
another subtest fully depends on such command, and will be marked as
skipped.

The 'Fixes' tag here below is the same as the one from the previous
commit: this patch here is not fixing anything wrong in the selftests,
but it validates the previous fix for an issue introduced by this commit
ID.

Fixes: 8dd5efb1f9 ("mptcp: send ack for rm_addr")
Cc: stable@vger.kernel.org
Reviewed-by: Mat Martineau <martineau@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20260303-net-mptcp-misc-fixes-7-0-rc2-v1-3-4b5462b6f016@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-04 18:21:12 -08:00
Matthieu Baerts (NGI0)
fb8d0bccb2 mptcp: pm: avoid sending RM_ADDR over same subflow
RM_ADDR are sent over an active subflow, the first one in the subflows
list. There is then a high chance the initial subflow is picked. With
the in-kernel PM, when an endpoint is removed, a RM_ADDR is sent, then
linked subflows are closed. This is done for each active MPTCP
connection.

MPTCP endpoints are likely removed because the attached network is no
longer available or usable. In this case, it is better to avoid sending
this RM_ADDR over the subflow that is going to be removed, but prefer
sending it over another active and non stale subflow, if any.

This modification avoids situations where the other end is not notified
when a subflow is no longer usable: typically when the endpoint linked
to the initial subflow is removed, especially on the server side.

Fixes: 8dd5efb1f9 ("mptcp: send ack for rm_addr")
Cc: stable@vger.kernel.org
Reported-by: Frank Lorenz <lorenz-frank@web.de>
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/612
Reviewed-by: Mat Martineau <martineau@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20260303-net-mptcp-misc-fixes-7-0-rc2-v1-2-4b5462b6f016@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-04 18:21:12 -08:00
Paolo Abeni
8c09412e58 selftests: mptcp: more stable simult_flows tests
By default, the netem qdisc can keep up to 1000 packets under its belly
to deal with the configured rate and delay. The simult flows test-case
simulates very low speed links, to avoid problems due to slow CPUs and
the TCP stack tend to transmit at a slightly higher rate than the
(virtual) link constraints.

All the above causes a relatively large amount of packets being enqueued
in the netem qdiscs - the longer the transfer, the longer the queue -
producing increasingly high TCP RTT samples and consequently increasingly
larger receive buffer size due to DRS.

When the receive buffer size becomes considerably larger than the needed
size, the tests results can flake, i.e. because minimal inaccuracy in the
pacing rate can lead to a single subflow usage towards the end of the
connection for a considerable amount of data.

Address the issue explicitly setting netem limits suitable for the
configured link speeds and unflake all the affected tests.

Fixes: 1a418cb8e8 ("mptcp: simult flow self-tests")
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20260303-net-mptcp-misc-fixes-7-0-rc2-v1-1-4b5462b6f016@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-04 18:21:12 -08:00
Jakub Kicinski
f43ed0c545 Merge branch 'nfc-fix-leaks-and-races-surfaced-by-nipa'
Jakub Kicinski says:

====================
nfc: fix leaks and races surfaced by NIPA

I recently added the nci test to NIPA. Somewhat surprisingly it runs
without much settup but hits kmemleaks fairly often. Fix a handful of
issues to make the test pass in a stable way.
====================

Link: https://patch.msgid.link/20260303162346.2071888-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-04 18:18:58 -08:00
Jakub Kicinski
d793458c45 nfc: rawsock: cancel tx_work before socket teardown
In rawsock_release(), cancel any pending tx_work and purge the write
queue before orphaning the socket.  rawsock_tx_work runs on the system
workqueue and calls nfc_data_exchange which dereferences the NCI
device.  Without synchronization, tx_work can race with socket and
device teardown when a process is killed (e.g. by SIGKILL), leading
to use-after-free or leaked references.

Set SEND_SHUTDOWN first so that if tx_work is already running it will
see the flag and skip transmitting, then use cancel_work_sync to wait
for any in-progress execution to finish, and finally purge any
remaining queued skbs.

Fixes: 23b7869c0f ("NFC: add the NFC socket raw protocol")
Reviewed-by: Joe Damato <joe@dama.to>
Link: https://patch.msgid.link/20260303162346.2071888-6-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-04 18:18:57 -08:00
Jakub Kicinski
0efdc02f4f nfc: nci: clear NCI_DATA_EXCHANGE before calling completion callback
Move clear_bit(NCI_DATA_EXCHANGE) before invoking the data exchange
callback in nci_data_exchange_complete().

The callback (e.g. rawsock_data_exchange_complete) may immediately
schedule another data exchange via schedule_work(tx_work).  On a
multi-CPU system, tx_work can run and reach nci_transceive() before
the current nci_data_exchange_complete() clears the flag, causing
test_and_set_bit(NCI_DATA_EXCHANGE) to return -EBUSY and the new
transfer to fail.

This causes intermittent flakes in nci/nci_dev in NIPA:

  # #  RUN           NCI.NCI1_0.t4t_tag_read ...
  # # t4t_tag_read: Test terminated by timeout
  # #          FAIL  NCI.NCI1_0.t4t_tag_read
  # not ok 3 NCI.NCI1_0.t4t_tag_read

Fixes: 38f04c6b1b ("NFC: protect nci_data_exchange transactions")
Reviewed-by: Joe Damato <joe@dama.to>
Link: https://patch.msgid.link/20260303162346.2071888-5-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-04 18:18:57 -08:00
Jakub Kicinski
6608358194 nfc: nci: complete pending data exchange on device close
In nci_close_device(), complete any pending data exchange before
closing. The data exchange callback (e.g.
rawsock_data_exchange_complete) holds a socket reference.

NIPA occasionally hits this leak:

unreferenced object 0xff1100000f435000 (size 2048):
  comm "nci_dev", pid 3954, jiffies 4295441245
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
    27 00 01 40 00 00 00 00 00 00 00 00 00 00 00 00  '..@............
  backtrace (crc ec2b3c5):
    __kmalloc_noprof+0x4db/0x730
    sk_prot_alloc.isra.0+0xe4/0x1d0
    sk_alloc+0x36/0x760
    rawsock_create+0xd1/0x540
    nfc_sock_create+0x11f/0x280
    __sock_create+0x22d/0x630
    __sys_socket+0x115/0x1d0
    __x64_sys_socket+0x72/0xd0
    do_syscall_64+0x117/0xfc0
    entry_SYSCALL_64_after_hwframe+0x4b/0x53

Fixes: 38f04c6b1b ("NFC: protect nci_data_exchange transactions")
Reviewed-by: Joe Damato <joe@dama.to>
Link: https://patch.msgid.link/20260303162346.2071888-4-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-04 18:18:57 -08:00
Jakub Kicinski
d42449d2c1 nfc: digital: free skb on digital_in_send error paths
digital_in_send() takes ownership of the skb passed by the caller
(nfc_data_exchange), make sure it's freed on all error paths.

Found looking around the real driver for similar bugs to the one
just fixed in nci.

Fixes: 2c66daecc4 ("NFC Digital: Add NFC-A technology support")
Reviewed-by: Joe Damato <joe@dama.to>
Link: https://patch.msgid.link/20260303162346.2071888-3-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-04 18:16:10 -08:00
Jakub Kicinski
7bd4b0c477 nfc: nci: free skb on nci_transceive early error paths
nci_transceive() takes ownership of the skb passed by the caller,
but the -EPROTO, -EINVAL, and -EBUSY error paths return without
freeing it.

Due to issues clearing NCI_DATA_EXCHANGE fixed by subsequent changes
the nci/nci_dev selftest hits the error path occasionally in NIPA,
and kmemleak detects leaks:

unreferenced object 0xff11000015ce6a40 (size 640):
  comm "nci_dev", pid 3954, jiffies 4295441246
  hex dump (first 32 bytes):
    6b 6b 6b 6b 00 a4 00 0c 02 e1 03 6b 6b 6b 6b 6b  kkkk.......kkkkk
    6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
  backtrace (crc 7c40cc2a):
    kmem_cache_alloc_node_noprof+0x492/0x630
    __alloc_skb+0x11e/0x5f0
    alloc_skb_with_frags+0xc6/0x8f0
    sock_alloc_send_pskb+0x326/0x3f0
    nfc_alloc_send_skb+0x94/0x1d0
    rawsock_sendmsg+0x162/0x4c0
    do_syscall_64+0x117/0xfc0

Fixes: 6a2968aaf5 ("NFC: basic NCI protocol implementation")
Reviewed-by: Joe Damato <joe@dama.to>
Link: https://patch.msgid.link/20260303162346.2071888-2-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-04 18:14:29 -08:00
Bobby Eshleman
40bf00ec2e net: devmem: use READ_ONCE/WRITE_ONCE on binding->dev
binding->dev is protected on the write-side in
mp_dmabuf_devmem_uninstall() against concurrent writes, but due to the
concurrent bare reads in net_devmem_get_binding() and
validate_xmit_unreadable_skb() it should be wrapped in a
READ_ONCE/WRITE_ONCE pair to make sure no compiler optimizations play
with the underlying register in unforeseen ways.

Doesn't present a critical bug because the known compiler optimizations
don't result in bad behavior. There is no tearing on u64, and load
omissions/invented loads would only break if additional binding->dev
references were inlined together (they aren't right now).

This just more strictly follows the linux memory model (i.e.,
"Lock-Protected Writes With Lockless Reads" in
tools/memory-model/Documentation/access-marking.txt).

Fixes: bd61848900 ("net: devmem: Implement TX path")
Signed-off-by: Bobby Eshleman <bobbyeshleman@meta.com>
Link: https://patch.msgid.link/20260302-devmem-membar-fix-v2-1-5b33c9cbc28b@meta.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-04 17:59:27 -08:00
Eric Dumazet
a4c2b8be2e net_sched: sch_fq: clear q->band_pkt_count[] in fq_reset()
When/if a NIC resets, queues are deactivated by dev_deactivate_many(),
then reactivated when the reset operation completes.

fq_reset() removes all the skbs from various queues.

If we do not clear q->band_pkt_count[], these counters keep growing
and can eventually reach sch->limit, preventing new packets to be queued.

Many thanks to Praveen for discovering the root cause.

Fixes: 29f834aa32 ("net_sched: sch_fq: add 3 bands and WRR scheduling")
Diagnosed-by: Praveen Kaligineedi <pkaligineedi@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Neal Cardwell <ncardwell@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Link: https://patch.msgid.link/20260304015640.961780-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-04 17:54:22 -08:00
Ian Ray
f7d92f11bd net: nfc: nci: Fix zero-length proprietary notifications
NCI NFC controllers may have proprietary OIDs with zero-length payload.
One example is: drivers/nfc/nxp-nci/core.c, NXP_NCI_RF_TXLDO_ERROR_NTF.

Allow a zero length payload in proprietary notifications *only*.

Before:

-- >8 --
kernel: nci: nci_recv_frame: len 3
-- >8 --

After:

-- >8 --
kernel: nci: nci_recv_frame: len 3
kernel: nci: nci_ntf_packet: NCI RX: MT=ntf, PBF=0, GID=0x1, OID=0x23, plen=0
kernel: nci: nci_ntf_packet: unknown ntf opcode 0x123
kernel: nfc nfc0: NFC: RF transmitter couldn't start. Bad power and/or configuration?
-- >8 --

After fixing the hardware:

-- >8 --
kernel: nci: nci_recv_frame: len 27
kernel: nci: nci_ntf_packet: NCI RX: MT=ntf, PBF=0, GID=0x1, OID=0x5, plen=24
kernel: nci: nci_rf_intf_activated_ntf_packet: rf_discovery_id 1
-- >8 --

Fixes: d24b03535e ("nfc: nci: Fix uninit-value in nci_dev_up and nci_ntf_packet")
Signed-off-by: Ian Ray <ian.ray@gehealthcare.com>
Link: https://patch.msgid.link/20260302163238.140576-1-ian.ray@gehealthcare.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-04 17:48:12 -08:00
Eric Dumazet
165573e41f tcp: secure_seq: add back ports to TS offset
This reverts 28ee1b746f ("secure_seq: downgrade to per-host timestamp offsets")

tcp_tw_recycle went away in 2017.

Zhouyan Deng reported off-path TCP source port leakage via
SYN cookie side-channel that can be fixed in multiple ways.

One of them is to bring back TCP ports in TS offset randomization.

As a bonus, we perform a single siphash() computation
to provide both an ISN and a TS offset.

Fixes: 28ee1b746f ("secure_seq: downgrade to per-host timestamp offsets")
Reported-by: Zhouyan Deng <dengzhouyan_nwpu@163.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Acked-by: Florian Westphal <fw@strlen.de>
Link: https://patch.msgid.link/20260302205527.1982836-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-04 17:44:35 -08:00
Koichiro Den
7f083faf59 net: sched: avoid qdisc_reset_all_tx_gt() vs dequeue race for lockless qdiscs
When shrinking the number of real tx queues,
netif_set_real_num_tx_queues() calls qdisc_reset_all_tx_gt() to flush
qdiscs for queues which will no longer be used.

qdisc_reset_all_tx_gt() currently serializes qdisc_reset() with
qdisc_lock(). However, for lockless qdiscs, the dequeue path is
serialized by qdisc_run_begin/end() using qdisc->seqlock instead, so
qdisc_reset() can run concurrently with __qdisc_run() and free skbs
while they are still being dequeued, leading to UAF.

This can easily be reproduced on e.g. virtio-net by imposing heavy
traffic while frequently changing the number of queue pairs:

  iperf3 -ub0 -c $peer -t 0 &
  while :; do
    ethtool -L eth0 combined 1
    ethtool -L eth0 combined 2
  done

With KASAN enabled, this leads to reports like:

  BUG: KASAN: slab-use-after-free in __qdisc_run+0x133f/0x1760
  ...
  Call Trace:
   <TASK>
   ...
   __qdisc_run+0x133f/0x1760
   __dev_queue_xmit+0x248f/0x3550
   ip_finish_output2+0xa42/0x2110
   ip_output+0x1a7/0x410
   ip_send_skb+0x2e6/0x480
   udp_send_skb+0xb0a/0x1590
   udp_sendmsg+0x13c9/0x1fc0
   ...
   </TASK>

  Allocated by task 1270 on cpu 5 at 44.558414s:
   ...
   alloc_skb_with_frags+0x84/0x7c0
   sock_alloc_send_pskb+0x69a/0x830
   __ip_append_data+0x1b86/0x48c0
   ip_make_skb+0x1e8/0x2b0
   udp_sendmsg+0x13a6/0x1fc0
   ...

  Freed by task 1306 on cpu 3 at 44.558445s:
   ...
   kmem_cache_free+0x117/0x5e0
   pfifo_fast_reset+0x14d/0x580
   qdisc_reset+0x9e/0x5f0
   netif_set_real_num_tx_queues+0x303/0x840
   virtnet_set_channels+0x1bf/0x260 [virtio_net]
   ethnl_set_channels+0x684/0xae0
   ethnl_default_set_doit+0x31a/0x890
   ...

Serialize qdisc_reset_all_tx_gt() against the lockless dequeue path by
taking qdisc->seqlock for TCQ_F_NOLOCK qdiscs, matching the
serialization model already used by dev_reset_queue().

Additionally clear QDISC_STATE_NON_EMPTY after reset so the qdisc state
reflects an empty queue, avoiding needless re-scheduling.

Fixes: 6b3ba9146f ("net: sched: allow qdiscs to handle locking")
Signed-off-by: Koichiro Den <den@valinux.co.jp>
Link: https://patch.msgid.link/20260228145307.3955532-1-den@valinux.co.jp
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-04 17:43:45 -08:00
Dave Airlie
681d787cb6 Merge tag 'amd-drm-fixes-7.0-2026-03-04' of https://gitlab.freedesktop.org/agd5f/linux into drm-fixes
amd-drm-fixes-7.0-2026-03-04:

amdgpu:
- LUT fixes
- VCN5 fix
- Dispclk fix
- SMU 13.x fix
- Fix race in VM acquire
- PSP 15.x fix
- UserQ fix

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patch.msgid.link/20260304204837.1937266-1-alexander.deucher@amd.com
2026-03-05 11:11:55 +10:00
Sanman Pradhan
25dd70a03b hwmon: (pmbus/q54sj108a2) fix stack overflow in debugfs read
The q54sj108a2_debugfs_read function suffers from a stack buffer overflow
due to incorrect arguments passed to bin2hex(). The function currently
passes 'data' as the destination and 'data_char' as the source.

Because bin2hex() converts each input byte into two hex characters, a
32-byte block read results in 64 bytes of output. Since 'data' is only
34 bytes (I2C_SMBUS_BLOCK_MAX + 2), this writes 30 bytes past the end
of the buffer onto the stack.

Additionally, the arguments were swapped: it was reading from the
zero-initialized 'data_char' and writing to 'data', resulting in
all-zero output regardless of the actual I2C read.

Fix this by:
1. Expanding 'data_char' to 66 bytes to safely hold the hex output.
2. Correcting the bin2hex() argument order and using the actual read count.
3. Using a pointer to select the correct output buffer for the final
   simple_read_from_buffer call.

Fixes: d014538aa3 ("hwmon: (pmbus) Driver for Delta power supplies Q54SJ108A2")
Cc: stable@vger.kernel.org
Signed-off-by: Sanman Pradhan <psanman@juniper.net>
Link: https://lore.kernel.org/r/20260304235116.1045-1-sanman.p211993@gmail.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
2026-03-04 16:48:06 -08:00
Jakub Kicinski
c649e99764 Merge tag 'linux-can-fixes-for-7.0-20260302' of git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can
Marc Kleine-Budde says:

====================
pull-request: can 2026-03-02

The first 2 patches are by Oliver Hartkopp. The first fixes the
locking for CAN Broadcast Manager op runtime updates, the second fixes
the packet statisctics for the CAN dummy driver.

Alban Bedel's patch fixes a potential problem in the error path of the
mcp251x's ndo_open callback.

A patch by Ziyi Guo add USB endpoint type validation to the esd_usb
driver.

The next 6 patches are by Greg Kroah-Hartman and fix URB data parsing
for the ems_usb and ucan driver, fix URB anchoring in the etas_es58x,
and in the f81604 driver fix URB data parsing, add URB error handling
and fix URB anchoring.

A patch by me targets the gs_usb driver and fixes interoperability
with the CANable-2.5 firmware by always configuring the bit rate
before starting the device.

The last patch is by Frank Li and fixes a CHECK_DTBS warning for the
nxp,sja1000 dt-binding.

* tag 'linux-can-fixes-for-7.0-20260302' of git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can:
  dt-bindings: net: can: nxp,sja1000: add reference to mc-peripheral-props.yaml
  can: gs_usb: gs_can_open(): always configure bitrates before starting device
  can: usb: f81604: correctly anchor the urb in the read bulk callback
  can: usb: f81604: handle bulk write errors properly
  can: usb: f81604: handle short interrupt urb messages properly
  can: usb: etas_es58x: correctly anchor the urb in the read bulk callback
  can: ucan: Fix infinite loop from zero-length messages
  can: ems_usb: ems_usb_read_bulk_callback(): check the proper length of a message
  can: esd_usb: add endpoint type validation
  can: mcp251x: fix deadlock in error path of mcp251x_open
  can: dummy_can: dummy_can_init(): fix packet statistics
  can: bcm: fix locking for bcm_op runtime updates
====================

Link: https://patch.msgid.link/20260302152755.1700177-1-mkl@pengutronix.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-04 16:47:46 -08:00
Jason Xing
18b43bec54 mailmap: reflect my gmail as default
Use my gmail instead so that I can easily handle those emails
that CC me.

Signed-off-by: Jason Xing <kernelxing@tencent.com>
Link: https://patch.msgid.link/20260303033720.84108-1-kerneljasonxing@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-04 16:42:53 -08:00
Linus Torvalds
c107785c7e Merge tag 'modules-7.0-rc3.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/modules/linux
Pull module fixes from Sami Tolvanen:

 - Fix a potential kernel panic in the module loader by adding a bounds
   check for the ELF section index. This prevents crashes if attempting
   to load a module that uses SHN_XINDEX or is corrupted.

 - Fix the Kconfig menu layout for module versioning, signing, and
   compression options so they correctly appear as submenus in
   menuconfig.

 - Remove a redundant lockdep_free_key_range() call in the load_module()
   error path. This is already handled by module_deallocate() calling
   free_mod_mem() since the module_memory rework.

* tag 'modules-7.0-rc3.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/modules/linux:
  module: Fix kernel panic when a symbol st_shndx is out of bounds
  module: Fix the modversions and signing submenus
  module: Remove duplicate freeing of lockdep classes
2026-03-04 15:42:24 -08:00
Jakub Kicinski
2697c45a48 Merge tag 'wireless-2026-03-04' of https://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless
Johannes Berg says:

====================
Some more fixes:
 - mt76 gets three almost identical new length checks
 - cw1200 & ti: locking fixes
 - mac80211 has a fix for the recent EML frame handling
 - rsi driver no longer oddly responds to config, which
   had triggered a warning in mac80211
 - ath12k has two fixes for station statistics handling

* tag 'wireless-2026-03-04' of https://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless:
  wifi: mt76: Fix possible oob access in mt76_connac2_mac_write_txwi_80211()
  wifi: mt76: mt7925: Fix possible oob access in mt7925_mac_write_txwi_80211()
  wifi: mt76: mt7996: Fix possible oob access in mt7996_mac_write_txwi_80211()
  wifi: wlcore: Fix a locking bug
  wifi: cw1200: Fix locking in error paths
  wifi: mac80211: fix missing ieee80211_eml_params member initialization
  wifi: rsi: Don't default to -EOPNOTSUPP in rsi_mac80211_config
  wifi: ath12k: fix station lookup failure when disconnecting from AP
  wifi: ath12k: use correct pdev id when requesting firmware stats
====================

Link: https://patch.msgid.link/20260304112500.169639-3-johannes@sipsolutions.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-04 15:29:56 -08:00
Linus Torvalds
0b3bb20580 Merge tag 'vfs-7.0-rc3.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull vfs fixes from Christian Brauner:

 - kthread: consolidate kthread exit paths to prevent use-after-free

 - iomap:
    - don't mark folio uptodate if read IO has bytes pending
    - don't report direct-io retries to fserror
    - reject delalloc mappings during writeback

 - ns: tighten visibility checks

 - netfs: Fix unbuffered/DIO writes to dispatch subrequests in strict
   sequence

* tag 'vfs-7.0-rc3.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  iomap: reject delalloc mappings during writeback
  iomap: don't mark folio uptodate if read IO has bytes pending
  selftests: fix mntns iteration selftests
  nstree: tighten permission checks for listing
  nsfs: tighten permission checks for handle opening
  nsfs: tighten permission checks for ns iteration ioctls
  netfs: Fix unbuffered/DIO writes to dispatch subrequests in strict sequence
  kthread: consolidate kthread exit paths to prevent use-after-free
  iomap: don't report direct-io retries to fserror
2026-03-04 15:03:16 -08:00
Joe Damato
126fe7ef12 mailmap: Add entry for Joe Damato
My Fastly email address is no longer used. Add a mailmap entry to
reflect that.

Signed-off-by: Joe Damato <joe@dama.to>
Link: https://patch.msgid.link/20260303024202.2526604-1-joe@dama.to
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-04 14:09:45 -08:00
Cheng-Yang Chou
6944e6d8a6 sched_ext/selftests: Fix format specifier and buffer length in file_write_long()
Use %ld (not %lu) for signed long, and pass the actual string length
returned by sprintf() to write_text() instead of sizeof(buf).

Signed-off-by: Cheng-Yang Chou <yphbchou0911@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-03-04 12:07:43 -10:00
Olivier Sobrie
170a4b21f4 hwmon: (max6639) fix inverted polarity
According to MAX6639 documentation:

  D1: PWM Output Polarity. PWM output is low at
  100% duty cycle when this bit is set to zero. PWM
  output is high at 100% duty cycle when this bit is set
  to 1.

Up to commit 0f33272b60 ("hwmon: (max6639) : Update hwmon init using
info structure"), the polarity was set to high (0x2) when no platform
data was set. After the patch, the polarity register wasn't set anymore
if no platform data was specified. Nowadays, since commit 7506ebcd66
("hwmon: (max6639) : Configure based on DT property"), it is always set
to low which doesn't match with the comment above and change the
behavior compared to versions prior 0f33272b60.

Fixes: 0f33272b60 ("hwmon: (max6639) : Update hwmon init using info structure")
Signed-off-by: Olivier Sobrie <olivier@sobrie.be>
Link: https://lore.kernel.org/r/20260304212039.570274-1-olivier@sobrie.be
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
2026-03-04 13:57:15 -08:00
Dave Airlie
8f3c6f08ab nouveau/dpcd: return EBUSY for aux xfer if the device is asleep
If we have runtime suspended, and userspace wants to use /dev/drm_dp_*
then just tell it the device is busy instead of crashing in the GSP
code.

WARNING: CPU: 2 PID: 565741 at drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c:164 r535_gsp_msgq_wait+0x9a/0xb0 [nouveau]
CPU: 2 UID: 0 PID: 565741 Comm: fwupd Not tainted 6.18.10-200.fc43.x86_64 #1 PREEMPT(lazy)
Hardware name: LENOVO 20QTS0PQ00/20QTS0PQ00, BIOS N2OET65W (1.52 ) 08/05/2024
RIP: 0010:r535_gsp_msgq_wait+0x9a/0xb0 [nouveau]

This is a simple fix to get backported. We should probably engineer a
proper power domain solution to wake up devices and keep them awake
while fw updates are happening.

Cc: stable@vger.kernel.org
Fixes: 8894f4919b ("drm/nouveau: register a drm_dp_aux channel for each dp connector")
Reviewed-by: Lyude Paul <lyude@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Link: https://patch.msgid.link/20260224031750.791621-1-airlied@gmail.com
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
2026-03-04 22:08:01 +01:00
Arnd Bergmann
22e6afddb5 Merge tag 'reset-fixes-for-v7.0' of https://git.pengutronix.de/git/pza/linux into arm/fixes
Reset controller fixes for v7.0

* Fix NULL pointer dereference in reset-rzg2l-usbphy-ctrl driver for
  renesas,rzg2l-usbphy-ctrl devices without pwrrdy control.

* tag 'reset-fixes-for-v7.0' of https://git.pengutronix.de/git/pza/linux:
  reset: rzg2l-usbphy-ctrl: Check pwrrdy is valid before using it

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
2026-03-04 21:42:22 +01:00
Arnd Bergmann
4bc732b00a Merge tag 'soc_fsl-7.0-2' of https://git.kernel.org/pub/scm/linux/kernel/git/chleroy/linux into arm/fixes
FSL SOC Fixes for 7.0

- Fix a race condition in Freescale Queue and Buffer Manager.
- Fix a trivial error verification in CPM1

* tag 'soc_fsl-7.0-2' of https://git.kernel.org/pub/scm/linux/kernel/git/chleroy/linux:
  soc: fsl: cpm1: qmc: Fix error check for devm_ioremap_resource() in qmc_qe_init_resources()
  soc: fsl: qbman: fix race condition in qman_destroy_fq

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
2026-03-04 21:39:09 +01:00
Arnd Bergmann
b69d48137e Merge tag 'riscv-soc-fixes-for-v7.0-rc1' of https://git.kernel.org/pub/scm/linux/kernel/git/conor/linux into arm/fixes
RISC-V soc fixes for v7.0-rc1

drivers:
Fix leaks in probe/init function teardown code in three drivers.

microchip:
Fix a warning introduced by a recent binding change, that made resets
required on Polarfire SoC's CAN IP.

Signed-off-by: Conor Dooley <conor.dooley@microchip.com>

* tag 'riscv-soc-fixes-for-v7.0-rc1' of https://git.kernel.org/pub/scm/linux/kernel/git/conor/linux:
  cache: ax45mp: Fix device node reference leak in ax45mp_cache_init()
  cache: starfive: fix device node leak in starlink_cache_init()
  riscv: dts: microchip: add can resets to mpfs
  soc: microchip: mpfs: Fix memory leak in mpfs_sys_controller_probe()

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
2026-03-04 21:35:06 +01:00
Lizhi Hou
f82859c84a accel/amdxdna: Fix major version check on NPU1 platform
Add the missing major number in npu1_fw_feature_table.

Without the major version specified, the firmware feature check fails,
preventing new firmware commands from being enabled on the NPU1
platform.

With the correct major version populated, the driver properly detects
firmware support and enables the new command.

Fixes: f1eac46fe5 ("accel/amdxdna: Update firmware version check for latest firmware")
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Link: https://patch.msgid.link/20260304195012.3616908-1-lizhi.hou@amd.com
2026-03-04 12:05:02 -08:00
Bjorn Andersson
da994db94e remoteproc: sysmon: Correct subsys_name_len type in QMI request
The QMI message encoder has up until recently read a single byte (as
elem_size == 1), but with the introduction of big endian support it's
become apparent that this field is expected to be a full u32 -
regardless of the size of the length in the encoded message (which is
what elem_size specifies).

The result is that the encoder now reads past the length byte and
rejects the unreasonably large length formed when including the
following 3 bytes from the subsys_name array.

Fix this by changing to the expected type.

Fixes: 1fb82ee806 ("remoteproc: qcom: Introduce sysmon")
Signed-off-by: Bjorn Andersson <bjorn.andersson@oss.qualcomm.com>
Reviewed-by: Chris Lew <christopher.lew@oss.qualcomm.com>
Link: https://lore.kernel.org/r/20260220-qmi-encode-invalid-length-v2-1-5674be35ab29@oss.qualcomm.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
2026-03-04 13:12:06 -06:00
Miroslav Lichvar
e48a869957 timekeeping: Fix timex status validation for auxiliary clocks
The timekeeping_validate_timex() function validates the timex status
of an auxiliary system clock even when the status is not to be changed,
which causes unexpected errors for applications that make read-only
clock_adjtime() calls, or set some other timex fields, but without
clearing the status field.

Do the AUX-specific status validation only when the modes field contains
ADJ_STATUS, i.e. the application is actually trying to change the
status. This makes the AUX-specific clock_adjtime() behavior consistent
with CLOCK_REALTIME.

Fixes: 4eca49d0b6 ("timekeeping: Prepare do_adtimex() for auxiliary clocks")
Signed-off-by: Miroslav Lichvar <mlichvar@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Link: https://patch.msgid.link/20260225085231.276751-1-mlichvar@redhat.com
2026-03-04 20:05:37 +01:00
Sunil Khatri
65b5c326ce drm/amdgpu/userq: refcount userqueues to avoid any race conditions
To avoid race condition and avoid UAF cases, implement kref
based queues and protect the below operations using xa lock
a. Getting a queue from xarray
b. Increment/Decrement it's refcount

Every time some one want to access a queue, always get via
amdgpu_userq_get to make sure we have locks in place and get
the object if active.

A userqueue is destroyed on the last refcount is dropped which
typically would be via IOCTL or during fini.

v2: Add the missing drop in one the condition in the signal ioclt [Alex]

v3: remove the queue from the xarray first in the free queue ioctl path
    [Christian]

- Pass queue to the amdgpu_userq_put directly.
- make amdgpu_userq_put xa_lock free since we are doing put for each get
  only and final put is done via destroy and we remove the queue from xa
  with lock.
- use userq_put in fini too so cleanup is done fully.

v4: Use xa_erase directly rather than doing load and erase in free
    ioctl. Also remove some of the error logs which could be exploited
    by the user to flood the logs [Christian]

Signed-off-by: Sunil Khatri <sunil.khatri@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 4952189b284d4d847f92636bb42dd747747129c0)
Cc: <stable@vger.kernel.org> # 048c1c4e51: drm/amdgpu/userq: Consolidate wait ioctl exit path
Cc: <stable@vger.kernel.org>
2026-03-04 13:15:00 -05:00
Tvrtko Ursulin
048c1c4e51 drm/amdgpu/userq: Consolidate wait ioctl exit path
If we gate the fence destruction with a check telling us whether there are
valid pointers in there we can eliminate the need for dual, basically
identical, exit paths.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit bea29bb0dd29012949cd44fdb122465a9fd5cf91)
2026-03-04 13:15:00 -05:00
sguttula
a145bbff6f drm/amdgpu/psp: Use Indirect access address for GFX to PSP mailbox
The reason the RAP is not granting access to 0x58200 is that
a dedicated RSMU slot would have to be spent for this address range,
and MPASP is close to running out of RSMU slots.

This will help to fix PSP TOC load failure during secureboot.
GFX Driver Need to use indirect access for SMN address regs.

Signed-off-by: sguttula <suresh.guttula@amd.com>
Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 9b822e26eea3899003aa8a89d5e2c4408e066e20)
2026-03-04 13:15:00 -05:00
Alysa Liu
2c1030f2e8 drm/amdgpu: Fix use-after-free race in VM acquire
Replace non-atomic vm->process_info assignment with cmpxchg()
to prevent race when parent/child processes sharing a drm_file
both try to acquire the same VM after fork().

Reviewed-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Signed-off-by: Alysa Liu <Alysa.Liu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit c7c573275ec20db05be769288a3e3bb2250ec618)
Cc: stable@vger.kernel.org
2026-03-04 13:15:00 -05:00
Yang Wang
68785c5e79 drm/amd/pm: remove invalid gpu_metrics.energy_accumulator on smu v13.0.x
v1:
The metrics->EnergyAccumulator field has been deprecated on newer pmfw.

v2:
add smu 13.0.0/13.0.7/13.0.10 support.

Signed-off-by: Yang Wang <kevinyang.wang@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 8de9edb35976fa56565dc8fbb5d1310e8e10187c)
Cc: stable@vger.kernel.org
2026-03-04 13:14:59 -05:00
Lorenzo Stoakes (Oracle)
b12bbe35c7 MAINTAINERS, mailmap: update email address for Lorenzo Stoakes
I want to experiment with a new email setup, and using the @kernel.org
address is the easiest way to have flexibility on this.

Link: https://lkml.kernel.org/r/20260303195025.1170895-1-ljs@kernel.org
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-03-04 10:06:40 -08:00
Randy Dunlap
599b4e290c mm/mmu_notifier: clean up mmu_notifier.h kernel-doc
Eliminate kernel-doc warnings in mmu_notifier.h:
- add a missing struct short description
- use the correct format for function parameters
- add missing function return comment sections

Warning: include/linux/mmu_notifier.h:236 missing initial short
 description on line: * struct mmu_interval_notifier_ops
Warning: include/linux/mmu_notifier.h:325 function parameter 'interval_sub'
 not described in 'mmu_interval_set_seq'
Warning: include/linux/mmu_notifier.h:325 function parameter 'cur_seq'
 not described in 'mmu_interval_set_seq'
Warning: include/linux/mmu_notifier.h:346 function parameter 'interval_sub'
 not described in 'mmu_interval_read_retry'
Warning: include/linux/mmu_notifier.h:346 function parameter 'seq' not
 described in 'mmu_interval_read_retry'
Warning: include/linux/mmu_notifier.h:346 No description found for return
 value of 'mmu_interval_read_retry'
Warning: include/linux/mmu_notifier.h:370 function parameter 'interval_sub'
 not described in 'mmu_interval_check_retry'
Warning: include/linux/mmu_notifier.h:370 function parameter 'seq' not
 described in 'mmu_interval_check_retry'
Warning: include/linux/mmu_notifier.h:370 No description found for return
 value of 'mmu_interval_check_retry'

Link: https://lkml.kernel.org/r/20260302005222.3470783-1-rdunlap@infradead.org
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Cc: David Hildenbrand <david@kernel.org>
Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-03-04 09:44:24 -08:00
Randy Dunlap
7392f8e4ea uaccess: correct kernel-doc parameter format
Use the correct kernel-doc function parameter format to avoid kernel-doc
warnings:

Warning: include/linux/uaccess.h:814 function parameter 'uptr' not
 described in 'scoped_user_rw_access_size'
Warning: include/linux/uaccess.h:826 function parameter 'uptr' not
 described in 'scoped_user_rw_access'

Link: https://lkml.kernel.org/r/20260302005229.3471955-1-rdunlap@infradead.org
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-03-04 09:44:24 -08:00
Zi Yan
577a1f495f mm/huge_memory: fix a folio_split() race condition with folio_try_get()
During a pagecache folio split, the values in the related xarray should
not be changed from the original folio at xarray split time until all
after-split folios are well formed and stored in the xarray.  Current use
of xas_try_split() in __split_unmapped_folio() lets some after-split
folios show up at wrong indices in the xarray.  When these misplaced
after-split folios are unfrozen, before correct folios are stored via
__xa_store(), and grabbed by folio_try_get(), they are returned to
userspace at wrong file indices, causing data corruption.  More detailed
explanation is at the bottom.

The reproducer is at: https://github.com/dfinity/thp-madv-remove-test
It
1. creates a memfd,
2. forks,
3. in the child process, maps the file with large folios (via shmem code
   path) and reads the mapped file continuously with 16 threads,
4. in the parent process, uses madvise(MADV_REMOVE) to punch poles in the
   large folio.

Data corruption can be observed without the fix.  Basically, data from a
wrong page->index is returned.

Fix it by using the original folio in xas_try_split() calls, so that
folio_try_get() can get the right after-split folios after the original
folio is unfrozen.

Uniform split, split_huge_page*(), is not affected, since it uses
xas_split_alloc() and xas_split() only once and stores the original folio
in the xarray.  Change xas_split() used in uniform split branch to use the
original folio to avoid confusion.

Fixes below points to the commit introduces the code, but folio_split() is
used in a later commit 7460b470a1 ("mm/truncate: use folio_split() in
truncate operation").

More details:

For example, a folio f is split non-uniformly into f, f2, f3, f4 like
below:
+----------------+---------+----+----+
|       f        |    f2   | f3 | f4 |
+----------------+---------+----+----+
but the xarray would look like below after __split_unmapped_folio() is
done:
+----------------+---------+----+----+
|       f        |    f2   | f3 | f3 |
+----------------+---------+----+----+

After __split_unmapped_folio(), the code changes the xarray and unfreezes
after-split folios:

1. unfreezes f2, __xa_store(f2)
2. unfreezes f3, __xa_store(f3)
3. unfreezes f4, __xa_store(f4), which overwrites the second f3 to f4.
4. unfreezes f.

Meanwhile, a parallel filemap_get_entry() can read the second f3 from the
xarray and use folio_try_get() on it at step 2 when f3 is unfrozen. Then,
f3 is wrongly returned to user.

After the fix, the xarray looks like below after __split_unmapped_folio():
+----------------+---------+----+----+
|       f        |    f    | f  | f  |
+----------------+---------+----+----+
so that the race window no longer exists.

[ziy@nvidia.com: move comment, per David]
  Link: https://lkml.kernel.org/r/5C9FA053-A4C6-4615-BE05-74E47A6462B3@nvidia.com
Link: https://lkml.kernel.org/r/20260302203159.3208341-1-ziy@nvidia.com
Fixes: 00527733d0 ("mm/huge_memory: add two new (not yet used) functions for folio_split()")
Signed-off-by: Zi Yan <ziy@nvidia.com>
Reported-by: Bas van Dijk <bas@dfinity.org>
Closes: https://lore.kernel.org/all/CAKNNEtw5_kZomhkugedKMPOG-sxs5Q5OLumWJdiWXv+C9Yct0w@mail.gmail.com/
Tested-by: Lance Yang <lance.yang@linux.dev>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Wei Yang <richard.weiyang@gmail.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Barry Song <baohua@kernel.org>
Cc: David Hildenbrand <david@kernel.org>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Nico Pache <npache@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-03-04 09:44:24 -08:00
Vlastimil Babka (SUSE)
431b04f008 MAINTAINERS: add co-maintainer and reviewer for SLAB ALLOCATOR
Promote Harry Yoo from reviewer to maintainer.  Harry's been involved in
slab development for multiple years now and doing a great job.

Add Hao Li as a new reviewer.  Hao has been doing very useful reviews for
a while now, so make it official and ensure the Cc's.

Link: https://lkml.kernel.org/r/20260302101345.36713-2-vbabka@kernel.org
Signed-off-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
Acked-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Acked-by: Harry Yoo <harry.yoo@oracle.com>
Acked-by: Hao Li <hao.li@linux.dev>
Acked-by: SeongJae Park <sj@kernel.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-03-04 09:44:24 -08:00
Jason Xing
06de173b13 MAINTAINERS: add RELAY entry
RELAYFS was originally developed by Tom Zanussi and Karim Yaghmour in
2005[1].  Jens Axboe converted it from filesystem into a generic API in
2006[2] and made it widely known through the notable I/O tracing tool
blktrace.  In the decade, there remain a few users scatterred across
different subsystems, like recently added wifi commit[3] that is an
example to show how to communicate between users and kernel.  Last year
I've already done some maintenance and added/corrected some diagnostic
counters.

At Tencent, we internally maintain RELAY as one of most crucial components
of network observibility platform which was shared a bit at LPC 2025[4][5]
and hopefully will be published in the paper this year.  RELAY has proven
highly efficient due to its inherent design essence.  This design becomes
the indispensable way to build a 7x24 platform monitoring various hot
paths even without any selectively sampling (yes, sampling is commonly
used to avoid the overall performance degradation).  One of the
recommended usages is to use its zerocopy function relay_reserve() to
transfer data in a raw format that can be recognized and parsed by the
corresponding application to userspace without introducing heavy locks and
complicated logic that appears in other types of approaches, like printk. 
More details can be discovered by reading through the Documentation :)

Credits are given to the all the contributors and reviewers for
RELAY/RELAYFS in the past and future! Many thanks!

[1]: commit e82894f84d ("[PATCH] relayfs")
[2]: commit b86ff981a8 ("[PATCH] relay: migrate from relayfs to a generic relay API")
[3]: commit c1bf6959dd ("wifi: ath11k: Register relayfs entries for CFR dump")
[4]: https://lpc.events/event/19/contributions/2055/
[5]: https://lpc.events/event/19/contributions/2010/

Link: https://lkml.kernel.org/r/20260301020902.56476-1-kerneljasonxing@gmail.com
Signed-off-by: Jason Xing <kernelxing@tencent.com>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Jens Axboe <axboe@kernel.dk>
Cc: Andriy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Tom Zanussi <zanussi@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-03-04 09:44:23 -08:00
Hao Li
dccd5ee262 memcg: fix slab accounting in refill_obj_stock() trylock path
In the trylock path of refill_obj_stock(), mod_objcg_mlstate() should use
the real alloc/free bytes (i.e., nr_acct) for accounting, rather than
nr_bytes.

The user-visible impact is that the NR_SLAB_RECLAIMABLE_B and
NR_SLAB_UNRECLAIMABLE_B stats can end up being incorrect.

For example, if a user allocates a 6144-byte object, then before this
fix efill_obj_stock() calls mod_objcg_mlstate(..., nr_bytes=2048), even
though it should account for 6144 bytes (i.e., nr_acct).

When the user later frees the same object with kfree(),
refill_obj_stock() calls mod_objcg_mlstate(..., nr_bytes=6144).  This
ends up adding 6144 to the stats, but it should be applying -6144
(i.e., nr_acct) since the object is being freed.

Link: https://lkml.kernel.org/r/20260226115145.62903-1-hao.li@linux.dev
Fixes: 200577f69f ("memcg: objcg stock trylock without irq disabling")
Signed-off-by: Hao Li <hao.li@linux.dev>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Vlastimil Babka <vbabka@suse.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-03-04 09:44:23 -08:00
Ritesh Harjani (IBM)
a1e59fc6ee mm/hugetlb.c: use __pa() instead of virt_to_phys() in early bootmem alloc code
Architecture like powerpc, checks for pfn_valid() in their virt_to_phys()
implementation (when CONFIG_DEBUG_VIRTUAL is enabled) [1].  Commit
d49004c5f0 "arch, mm: consolidate initialization of nodes, zones and
memory map" changed the order of initialization between
hugetlb_bootmem_alloc() and free_area_init().  This means, pfn_valid() can
now return false in alloc_bootmem() path, since sparse_init() is not yet
done.

Since, alloc_bootmem() uses memblock_alloc(.., MEMBLOCK_ALLOC_ACCESSIBLE),
this means these allocations are always going to happen below high_memory,
where __pa() should return valid physical addresses.  Hence this patch
converts the two callers of virt_to_phys() in alloc_bootmem() path to
__pa() to avoid this bootup warning:

 ------------[ cut here ]------------
 WARNING: arch/powerpc/include/asm/io.h:879 at virt_to_phys+0x44/0x1b8, CPU#0: swapper/0
 Modules linked in:
 <...>
 NIP [c000000000601584] virt_to_phys+0x44/0x1b8
 LR [c000000004075de4] alloc_bootmem+0x144/0x1a8
 Call Trace:
 [c000000004d1fb50] [c000000004075dd4] alloc_bootmem+0x134/0x1a8
 [c000000004d1fba0] [c000000004075fac] __alloc_bootmem_huge_page+0x164/0x230
 [c000000004d1fbe0] [c000000004030bc4] alloc_bootmem_huge_page+0x44/0x138
 [c000000004d1fc10] [c000000004076e48] hugetlb_hstate_alloc_pages+0x350/0x5ac
 [c000000004d1fd30] [c0000000040782f0] hugetlb_bootmem_alloc+0x15c/0x19c
 [c000000004d1fd70] [c00000000406d7b4] mm_core_init_early+0x7c/0xdf4
 [c000000004d1ff30] [c000000004011d84] start_kernel+0xac/0xc58
 [c000000004d1ffe0] [c00000000000e99c] start_here_common+0x1c/0x20

[1]: https://lore.kernel.org/linuxppc-dev/87tsv5h544.ritesh.list@gmail.com/

Link: https://lkml.kernel.org/r/b4a7d2c6c4c1dd81dddc904fc21f01303290a4b8.1772107852.git.riteshh@linux.ibm.com
Fixes: d49004c5f0 ("arch, mm: consolidate initialization of nodes, zones and memory map")
Signed-off-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: David Hildenbrand <david@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Oscar Salvador <osalvador@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-03-04 09:44:23 -08:00
Sergey Senozhatsky
ba4c3698e6 zram: rename writeback_compressed device attr
Rename writeback_compressed attr to compressed_writeback to avoid possible
confusion and have more natural naming.  writeback_compressed may look
like an alternative version of writeback while in fact
writeback_compressed only sets a writeback property.  Make this
distinction more clear with a new compressed_writeback name.

This updates a feature which is new in 7.0-rcX.

Link: https://lkml.kernel.org/r/20260226025429.1042083-1-senozhatsky@chromium.org
Fixes: 4c1d61389e ("zram: introduce writeback_compressed device attribute")
Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Suggested-by: Minchan Kim <minchan@kernel.org>
Acked-by: Minchan Kim <minchan@kernel.org>
Cc: Brian Geffon <bgeffon@google.com>
Cc: Richard Chang <richardycc@google.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: "Christoph Böhmwalder" <christoph.boehmwalder@linbit.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Lars Ellenberg <lars.ellenberg@linbit.com>
Cc: Philipp Reisner <philipp.reisner@linbit.com>
Cc: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-03-04 09:44:23 -08:00
Mike Rapoport (Microsoft)
5548dd7fa8 tools/testing: fix testing/vma and testing/radix-tree build
Build of VMA and radix-tree tests is unhappy after the conversion of
kzalloc() to kzalloc_obj() in lib/idr.c:

  cc -I../shared -I. -I../../include -I../../arch/x86/include -I../../../lib -g -Og -Wall -D_LGPL_SOURCE -fsanitize=address -fsanitize=undefined  -DNUM_VMA_FLAG_BITS=128 -DNUM_MM_FLAG_BITS=128   -c -o idr.o idr.c
  idr.c: In function `ida_alloc_range':
  idr.c:420:34: error: implicit declaration of function `kzalloc_obj'; did you mean `kzalloc_node'? [-Wimplicit-function-declaration]
    420 |                         bitmap = kzalloc_obj(*bitmap, GFP_NOWAIT);
        |                                  ^~~~~~~~~~~
        |                                  kzalloc_node
  idr.c:420:32: error: assignment to `struct ida_bitmap *' from `int' makes pointer from integer without a cast [-Wint-conversion]
    420 |                         bitmap = kzalloc_obj(*bitmap, GFP_NOWAIT);
        |                                ^
  idr.c:447:40: error: assignment to `struct ida_bitmap *' from `int' makes pointer from integer without a cast [-Wint-conversion]
    447 |                                 bitmap = kzalloc_obj(*bitmap, GFP_NOWAIT);
        |                                        ^
  idr.c:468:15: error: assignment to `struct ida_bitmap *' from `int' makes pointer from integer without a cast [-Wint-conversion]
    468 |         alloc = kzalloc_obj(*bitmap, gfp);
        |               ^
  make: *** [<builtin>: idr.o] Error 1

Import necessary macros from include/linux to tools/include/linux to fix
the compilation.

Link: https://lkml.kernel.org/r/20260225233111.2760752-1-rppt@kernel.org
Fixes: 69050f8d6d ("treewide: Replace kmalloc with kmalloc_obj for non-scalar types")
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Tested-by: SeongJae Park <sj@kernel.org>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: David Hildenbrand <david@kernel.org>
Cc: Kees Cook <kees@kernel.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-03-04 09:44:22 -08:00
Axel Rasmussen
2d28ed588f Revert "ptdesc: remove references to folios from __pagetable_ctor() and pagetable_dtor()"
This change swapped out mod_node_page_state for lruvec_stat_add_folio. 
But, these two APIs are not interchangeable: the lruvec version also
increments memcg stats, in addition to "global" pgdat stats.

So after this change, the "pagetables" memcg stat in memory.stat always
yields "0", which is a userspace visible regression.

I tried to look for a refactor where we add a variant of
lruvec_stat_mod_folio which takes a pgdat and a memcg instead of a folio,
to try to adhere to the spirit of the original patch.  But at the end of
the day this just means we have to call folio_memcg(ptdesc_folio(ptdesc))
anyway, which doesn't really accomplish much.

This regression is visible in master as well as 6.18 stable, so CC stable
too.

Link: https://lkml.kernel.org/r/20260225002434.2953895-1-axelrasmussen@google.com
Fixes: f0c92726e8 ("ptdesc: remove references to folios from __pagetable_ctor() and pagetable_dtor()")
Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Cc: David Hildenbrand <david@kernel.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-03-04 09:44:22 -08:00
Zi Yan
f4355d6bb3 mm/cma: move put_page_testzero() out of VM_WARN_ON in cma_release()
When CONFIG_DEBUG_VM is not set, VM_WARN_ON is a NOP.  Putting any
statement with side effect inside it is incorrect.  Collect all
!put_page_testzero() results and check the sum using WARN instead after
the loop.  It restores the same check in free_contig_range() before commit
e0c1326779 ("mm: page_alloc: add alloc_contig_frozen_{range,pages}()"),
the commit prior to the Fixes one.

Link: https://lkml.kernel.org/r/20260225031231.2352011-1-ziy@nvidia.com
Fixes: 9bda131c60 ("mm: cma: add cma_alloc_frozen{_compound}()")
Signed-off-by: Zi Yan <ziy@nvidia.com>
Reported-by: Ron Economos <re@w6rz.net>
Closes: https://lore.kernel.org/all/1b17c38f-30d3-4bb4-a7e1-e74b19ada885@w6rz.net/
Suggested-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Reviewed-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Debugged-by: David Hildenbrand (Arm) <david@kernel.org>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Tested-by: Ron Economos <re@w6rz.net>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Reviewed-by: SeongJae Park <sj@kernel.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-03-04 09:44:22 -08:00
Raul Pazemecxas De Andrade
d210fdcac9 mm/damon/core: clear walk_control on inactive context in damos_walk()
damos_walk() sets ctx->walk_control to the caller-provided control
structure before checking whether the context is running.  If the context
is inactive (damon_is_running() returns false), the function returns
-EINVAL without clearing ctx->walk_control.  This leaves a dangling
pointer to a stack-allocated structure that will be freed when the caller
returns.

This is structurally identical to the bug fixed in commit f9132fbc2e
("mm/damon/core: remove call_control in inactive contexts") for
damon_call(), which had the same pattern of linking a control object and
returning an error without unlinking it.

The dangling walk_control pointer can cause:
1. Use-after-free if the context is later started and kdamond
   dereferences ctx->walk_control (e.g., in damos_walk_cancel()
   which writes to control->canceled and calls complete())
2. Permanent -EBUSY from subsequent damos_walk() calls, since the
   stale pointer is non-NULL

Nonetheless, the real user impact is quite restrictive.  The
use-after-free is impossible because there is no damos_walk() callers who
starts the context later.  The permanent -EBUSY can actually confuse
users, as DAMON is not running.  But the symptom is kept only while the
context is turned off.  Turning it on again will make DAMON internally
uses a newly generated damon_ctx object that doesn't have the invalid
damos_walk_control pointer, so everything will work fine again.

Fix this by clearing ctx->walk_control under walk_control_lock before
returning -EINVAL, mirroring the fix pattern from f9132fbc2e.

Link: https://lkml.kernel.org/r/20260224011102.56033-1-sj@kernel.org
Fixes: bf0eaba0ff ("mm/damon/core: implement damos_walk()")
Reported-by: Raul Pazemecxas De Andrade <raul_pazemecxas@hotmail.com>
Closes: https://lore.kernel.org/CPUPR80MB8171025468965E583EF2490F956CA@CPUPR80MB8171.lamprd80.prod.outlook.com
Signed-off-by: Raul Pazemecxas De Andrade <raul_pazemecxas@hotmail.com>
Signed-off-by: SeongJae Park <sj@kernel.org>
Reviewed-by: SeongJae Park <sj@kernel.org>
Cc: <stable@vger.kernel.org>	[6.14+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-03-04 09:44:21 -08:00
Pratyush Yadav (Google)
7e04bf1f33 mm: memfd_luo: always dirty all folios
A dirty folio is one which has been written to.  A clean folio is its
opposite.  Since a clean folio has no user data, it can be freed under
memory pressure.

memfd preservation with LUO saves the flag at preserve().  This is
problematic.  The folio might get dirtied later.  Saving it at freeze()
also doesn't work, since the dirty bit from PTE is normally synced at
unmap and there might still be mappings of the file at freeze().

To see why this is a problem, say a folio is clean at preserve, but gets
dirtied later.  The serialized state of the folio will mark it as clean. 
After retrieve, the next kernel will see the folio as clean and might try
to reclaim it under memory pressure.  This will result in losing user
data.

Mark all folios of the file as dirty, and always set the
MEMFD_LUO_FOLIO_DIRTY flag.  This comes with the side effect of making all
clean folios un-reclaimable.  This is a cost that has to be paid for
participants of live update.  It is not expected to be a common use case
to preserve a lot of clean folios anyway.

Since the value of pfolio->flags is a constant now, drop the flags
variable and set it directly.

Link: https://lkml.kernel.org/r/20260223173931.2221759-3-pratyush@kernel.org
Fixes: b3749f174d ("mm: memfd_luo: allow preserving memfd")
Signed-off-by: Pratyush Yadav (Google) <pratyush@kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-03-04 09:44:21 -08:00
Pratyush Yadav (Google)
50d7b4332f mm: memfd_luo: always make all folios uptodate
Patch series "mm: memfd_luo: fixes for folio flag preservation".

This series contains a couple fixes for flag preservation for memfd live
update.

The first patch fixes memfd preservation when fallocate() was used to
pre-allocate some pages.  For these memfds, all the writes to fallocated
pages touched after preserve were lost.

The second patch fixes dirty flag tracking.  If the dirty flag is not
tracked correctly, the next kernel might incorrectly reclaim some folios
under memory pressure, losing user data.  This is a theoretical bug that I
observed when reading the code, and haven't been able to reproduce it.


This patch (of 2):

When a folio is added to a shmem file via fallocate, it is not zeroed on
allocation.  This is done as a performance optimization since it is
possible the folio will never end up being used at all.  When the folio is
used, shmem checks for the uptodate flag, and if absent, zeroes the folio
(and sets the flag) before returning to user.

With LUO, the flags of each folio are saved at preserve time.  It is
possible to have a memfd with some folios fallocated but not uptodate. 
For those, the uptodate flag doesn't get saved.  The folios might later
end up being used and become uptodate.  They would get passed to the next
kernel via KHO correctly since they did get preserved.  But they won't
have the MEMFD_LUO_FOLIO_UPTODATE flag.

This means that when the memfd is retrieved, the folios will be added to
the shmem file without the uptodate flag.  They will be zeroed before
first use, losing the data in those folios.

Since we take a big performance hit in allocating, zeroing, and pinning
all folios at prepare time anyway, take some more and zero all
non-uptodate ones too.

Later when there is a stronger need to make prepare faster, this can be
optimized.

To avoid racing with another uptodate operation, take the folio lock.

Link: https://lkml.kernel.org/r/20260223173931.2221759-2-pratyush@kernel.org
Fixes: b3749f174d ("mm: memfd_luo: allow preserving memfd")
Signed-off-by: Pratyush Yadav (Google) <pratyush@kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-03-04 09:44:21 -08:00
Günther Noack
f8e2019c3b samples/landlock: Bump ABI version to 8
The sample tool should print a warning if it is not running on a
kernel that provides the newest Landlock ABI version.

Link: https://lore.kernel.org/all/20260218.ufao5Vaefa2u@digikod.net/
Suggested-by: Mickaël Salaün <mic@digikod.net>
Signed-off-by: Günther Noack <gnoack3000@gmail.com>
Link: https://lore.kernel.org/r/20260220160627.53913-1-gnoack3000@gmail.com
Signed-off-by: Mickaël Salaün <mic@digikod.net>
2026-03-04 18:28:11 +01:00
Mickaël Salaün
bb8369ead4 landlock: Improve TSYNC types
Constify pointers when it makes sense.

Consistently use size_t for loops, especially to match works->size type.

Add new lines to improve readability.

Cc: Jann Horn <jannh@google.com>
Reviewed-by: Günther Noack <gnoack@google.com>
Link: https://lore.kernel.org/r/20260217122341.2359582-2-mic@digikod.net
Signed-off-by: Mickaël Salaün <mic@digikod.net>
2026-03-04 18:28:11 +01:00
Mickaël Salaün
929553bbb4 landlock: Fully release unused TSYNC work entries
If task_work_add() failed, ctx->task is put but the tsync_works struct
is not reset to its previous state.  The first consequence is that the
kernel allocates memory for dying threads, which could lead to
user-accounted memory exhaustion (not very useful nor specific to this
case).  The second consequence is that task_work_cancel(), called by
cancel_tsync_works(), can dereference a NULL task pointer.

Fix this issues by keeping a consistent works->size wrt the added task
work.  This is done in a new tsync_works_trim() helper which also cleans
up the shared_ctx and work fields.

As a safeguard, add a pointer check to cancel_tsync_works() and update
tsync_works_release() accordingly.

Cc: Jann Horn <jannh@google.com>
Reviewed-by: Günther Noack <gnoack@google.com>
Link: https://lore.kernel.org/r/20260217122341.2359582-1-mic@digikod.net
[mic: Replace memset() with compound literal]
Signed-off-by: Mickaël Salaün <mic@digikod.net>
2026-03-04 18:28:10 +01:00
Mickaël Salaün
405ca72dc5 landlock: Fix formatting
Auto-format with clang-format -i security/landlock/*.[ch]

Cc: Günther Noack <gnoack@google.com>
Cc: Kees Cook <kees@kernel.org>
Fixes: 69050f8d6d ("treewide: Replace kmalloc with kmalloc_obj for non-scalar types")
Reviewed-by: Günther Noack <gnoack3000@gmail.com>
Link: https://lore.kernel.org/r/20260303173632.88040-1-mic@digikod.net
Signed-off-by: Mickaël Salaün <mic@digikod.net>
2026-03-04 18:28:08 +01:00
Davidlohr Bueso
77b310bb7b cxl/region: Fix leakage in __construct_region()
Failing the first sysfs_update_group() needs to explicitly
kfree the resource as it is too early for cxl_region_iomem_release()
to do so.

Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Gregory Price <gourry@gourry.net>
Fixes: d6602e2581 (cxl/region: Add support to indicate region has extended linear cache)
Link: https://patch.msgid.link/20260202191330.245608-1-dave@stgolabs.net
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2026-03-04 10:26:39 -07:00
zhidao su
7a8464555d sched_ext: Use WRITE_ONCE() for the write side of dsq->seq update
bpf_iter_scx_dsq_new() reads dsq->seq via READ_ONCE() without holding
any lock, making dsq->seq a lock-free concurrently accessed variable.
However, dispatch_enqueue(), the sole writer of dsq->seq, uses a plain
increment without the matching WRITE_ONCE() on the write side:

    dsq->seq++;
    ^^^^^^^^^^^
    plain write -- KCSAN data race

The KCSAN documentation requires that if one accessor uses READ_ONCE()
or WRITE_ONCE() on a variable to annotate lock-free access, all other
accesses must also use the appropriate accessor. A plain write leaves
the pair incomplete and will trigger KCSAN warnings.

Fix by using WRITE_ONCE() for the write side of the update:

    WRITE_ONCE(dsq->seq, dsq->seq + 1);

This is consistent with bpf_iter_scx_dsq_new() and makes the
concurrent access annotation complete and KCSAN-clean.

Signed-off-by: zhidao su <suzhidao@xiaomi.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-03-04 07:01:18 -10:00
Matthew Schwartz
2b76e0cc78 mmc: sdhci-pci-gli: fix GL9750 DMA write corruption
The GL9750 SD host controller has intermittent data corruption during
DMA write operations. The GM_BURST register's R_OSRC_Lmt field
(bits 17:16), which limits outstanding DMA read requests from system
memory, is not being cleared during initialization. The Windows driver
sets R_OSRC_Lmt to zero, limiting requests to the smallest unit.

Clear R_OSRC_Lmt to match the Windows driver behavior. This eliminates
write corruption verified with f3write/f3read tests while maintaining
DMA performance.

Cc: stable@vger.kernel.org
Fixes: e51df6ce66 ("mmc: host: sdhci-pci: Add Genesys Logic GL975x support")
Closes: https://lore.kernel.org/linux-mmc/33d12807-5c72-41ce-8679-57aa11831fad@linux.dev/
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Matthew Schwartz <matthew.schwartz@linux.dev>
Reviewed-by: Ben Chuang <ben.chuang@genesyslogic.com.tw>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2026-03-04 17:48:59 +01:00
Linus Torvalds
ecc64d2dc9 Merge tag 'sysctl-7.00-fixes-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/sysctl/sysctl
Pull sysctl fix from Joel Granados:

 - Fix error when reporting jiffies converted values back to user space

   Return the converted value instead of "Invalid argument" error

* tag 'sysctl-7.00-fixes-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/sysctl/sysctl:
  time/jiffies: Fix sysctl file error on configurations where USER_HZ < HZ
2026-03-04 08:21:11 -08:00
Jisheng Zhang
0100e495cd arm64: make runtime const not usable by modules
Similar as commit 284922f4c5 ("x86: uaccess: don't use runtime-const
rewriting in modules") does, make arm64's runtime const not usable by
modules too, to "make sure this doesn't get forgotten the next time
somebody wants to do runtime constant optimizations". The reason is
well explained in the above commit: "The runtime-const infrastructure
was never designed to handle the modular case, because the constant
fixup is only done at boot time for core kernel code."

Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
2026-03-04 16:13:58 +00:00
Linus Torvalds
4053c47680 Merge tag 'media/v7.0-3' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media
Pull media fix from Mauro Carvalho Chehab:
 "Fix for MPEG-TS decoder in dvb-net"

* tag 'media/v7.0-3' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media:
  media: dvb-net: fix OOB access in ULE extension header tables
2026-03-04 08:12:06 -08:00
Shyam Prasad N
340cea84f6 cifs: open files should not hold ref on superblock
Today whenever we deal with a file, in addition to holding
a reference on the dentry, we also get a reference on the
superblock. This happens in two cases:
1. when a new cinode is allocated
2. when an oplock break is being processed

The reasoning for holding the superblock ref was to make sure
that when umount happens, if there are users of inodes and
dentries, it does not try to clean them up and wait for the
last ref to superblock to be dropped by last of such users.

But the side effect of doing that is that umount silently drops
a ref on the superblock and we could have deferred closes and
lease breaks still holding these refs.

Ideally, we should ensure that all of these users of inodes and
dentries are cleaned up at the time of umount, which is what this
code is doing.

This code change allows these code paths to use a ref on the
dentry (and hence the inode). That way, umount is
ensured to clean up SMB client resources when it's the last
ref on the superblock (For ex: when same objects are shared).

The code change also moves the call to close all the files in
deferred close list to the umount code path. It also waits for
oplock_break workers to be flushed before calling
kill_anon_super (which eventually frees up those objects).

Fixes: 24261fc23d ("cifs: delay super block destruction until all cifsFileInfo objects are gone")
Fixes: 705c79101c ("smb: client: fix use-after-free in cifs_oplock_break")
Cc: <stable@vger.kernel.org>
Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-03-04 10:11:39 -06:00
H. Peter Anvin
b5ef09a77d x86/entry/vdso32: Work around libgcc unwinder bug
The unwinder code in libgcc has a long standing bug which causes it to
fail to pick up the signal frame CFI flag. This is a generic bug
across all platforms.

It affects the __kernel_sigreturn and __kernel_rt_sigreturn vdso entry
points on i386. The x86-64 kernel doesn't provide a sigreturn stub,
and so there is no kernel-provided code that is affected on x86-64.

libgcc does have a legacy fallback path which happens to work as long
as the bytes immediately before each of the sigreturn functions fall
outside any function. This patch adds a nop before the ALIGN to each
of the sigreturn stubs to ensure that this is, indeed, the case.

The rest of the patch is just a comment which documents the invariants
that need to be maintained for this legacy path to work correctly.

This is a manifest bug: in the current vdso, __kernel_vsyscall is a
multiple of 16 bytes long and thus __kernel_sigreturn does not have
any padding in front of it.

Closes: https://lore.kernel.org/lkml/f3412cc3e8f66d1853cc9d572c0f2fab076872b1.camel@xry111.site
Fixes: 884961618e ("x86/entry/vdso32: Remove open-coded DWARF in sigreturn.S")
Reported-by: Xi Ruoyao <xry111@xry111.site>
Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=124050
Link: https://patch.msgid.link/20260227010308.310342-1-hpa@zytor.com
2026-03-04 17:06:08 +01:00
Juri Lelli
d658686a13 sched/deadline: Fix missing ENQUEUE_REPLENISH during PI de-boosting
Running stress-ng --schedpolicy 0 on an RT kernel on a big machine
might lead to the following WARNINGs (edited).

 sched: DL de-boosted task PID 22725: REPLENISH flag missing

 WARNING: CPU: 93 PID: 0 at kernel/sched/deadline.c:239 dequeue_task_dl+0x15c/0x1f8
 ... (running_bw underflow)
 Call trace:
  dequeue_task_dl+0x15c/0x1f8 (P)
  dequeue_task+0x80/0x168
  deactivate_task+0x24/0x50
  push_dl_task+0x264/0x2e0
  dl_task_timer+0x1b0/0x228
  __hrtimer_run_queues+0x188/0x378
  hrtimer_interrupt+0xfc/0x260
  ...

The problem is that when a SCHED_DEADLINE task (lock holder) is
changed to a lower priority class via sched_setscheduler(), it may
fail to properly inherit the parameters of potential DEADLINE donors
if it didn't already inherit them in the past (shorter deadline than
donor's at that time). This might lead to bandwidth accounting
corruption, as enqueue_task_dl() won't recognize the lock holder as
boosted.

The scenario occurs when:
1. A DEADLINE task (donor) blocks on a PI mutex held by another
   DEADLINE task (holder), but the holder doesn't inherit parameters
   (e.g., it already has a shorter deadline)
2. sched_setscheduler() changes the holder from DEADLINE to a lower
   class while still holding the mutex
3. The holder should now inherit DEADLINE parameters from the donor
   and be enqueued with ENQUEUE_REPLENISH, but this doesn't happen

Fix the issue by introducing __setscheduler_dl_pi(), which detects when
a DEADLINE (proper or boosted) task gets setscheduled to a lower
priority class. In case, the function makes the task inherit DEADLINE
parameters of the donoer (pi_se) and sets ENQUEUE_REPLENISH flag to
ensure proper bandwidth accounting during the next enqueue operation.

Fixes: 2279f540ea ("sched/deadline: Fix priority inheritance with multiple scheduling classes")
Reported-by: Bruno Goncalves <bgoncalv@redhat.com>
Signed-off-by: Juri Lelli <juri.lelli@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260302-upstream-fix-deadline-piboost-b4-v3-1-6ba32184a9e0@redhat.com
2026-03-04 17:06:08 +01:00
Linus Torvalds
40d3f62247 Merge tag 'pinctrl-v7.0-2' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-pinctrl
Pull pin control fixes from Linus Walleij:
 "All of these are driver fixes except a memory leak in the
  pinconf_generic_parse_dt_config() helper which is the most
  important fix.

   - Rename and fix up the Intel Equilibrium immutable interrupt chip

   - Handle the Qualcomm QCS615 dual edge GPIO IRQ by adding the right
     flag

   - Fix a memory leak in the widely used pinconf_generic_parse_dt_config()
     and a more local leak in aml_dt_node_to_map_pinmux()

   - Fix double put in the Cirrus cs42l43_pin_probe()

   - Staticize amdisp_pinctrl_ops, Qualcomm SDM660 groups and functions

   - Unexport CIX sky1_pinctrl_pm_ops

   - Fix configuration of deferred pin in the Rockchip driver

   - Implement .get_direction() in the Sunxi driver squelching a dmesg
     warning message

   - Fix a readout of the last bank of registers in the Cypress CY8C95x0
     driver"

* tag 'pinctrl-v7.0-2' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-pinctrl:
  pinctrl: cy8c95x0: Don't miss reading the last bank registers
  pinctrl: sunxi: Implement gpiochip::get_direction()
  pinctrl: rockchip: Fix configuring a deferred pin
  pinctrl: cirrus: cs42l43: Fix double-put in cs42l43_pin_probe()
  pinctrl: meson: amlogic-a4: Fix device node reference leak in aml_dt_node_to_map_pinmux()
  pinctrl: qcom: sdm660-lpass-lpi: Make groups and functions variables static
  pinctrl: cix: sky1: Unexport sky1_pinctrl_pm_ops
  pinctrl: amdisp: Make amdisp_pinctrl_ops variable static
  pinctrl: pinconf-generic: Fix memory leak in pinconf_generic_parse_dt_config()
  pinctrl: qcom: qcs615: Add missing dual edge GPIO IRQ errata flag
  pinctrl: equilibrium: fix warning trace on load
  pinctrl: equilibrium: rename irq_chip function callbacks
2026-03-04 08:03:43 -08:00
Arnaldo Carvalho de Melo
1b3f004bac tools headers UAPI: Sync x86's asm/kvm.h with the kernel sources
To pick the changes in:

  6517dfbcc9 ("KVM: x86: Add x2APIC "features" to control EOI broadcast suppression")
  20c3c4108d ("KVM: SEV: Add KVM_SEV_SNP_ENABLE_REQ_CERTS command")

This silences these perf build warning:

  Warning: Kernel ABI header differences:
    diff -u tools/arch/x86/include/uapi/asm/kvm.h arch/x86/include/uapi/asm/kvm.h

Please see tools/include/uapi/README for further details.

Cc: Sean Christopherson <seanjc@google.com>
Cc: Khushit Shah <khushit.shah@nutanix.com>
Cc: Michael Roth <michael.roth@amd.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2026-03-04 12:54:08 -03:00
Arnaldo Carvalho de Melo
4ebe2b8cda tools headers x86 cpufeatures: Sync with the kernel sources
To pick the changes from:

  f24ef0093d ("KVM: x86: Advertise MOVRS CPUID to userspace")
  f49ecf5e11 ("x86/cpufeature: Replace X86_FEATURE_SYSENTER32 with X86_FEATURE_SYSFAST32")
  db5e824964 ("KVM: SVM: Virtualize and advertise support for ERAPS")

This causes these perf files to be rebuilt and brings some X86_FEATURE
that may be used by:

      CC       /tmp/build/perf/bench/mem-memcpy-x86-64-asm.o
      CC       /tmp/build/perf/bench/mem-memset-x86-64-asm.o

And addresses this perf build warning:

  Warning: Kernel ABI header differences:
    diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h

Please see tools/include/uapi/README for further details.

Cc: Amit Shah <amit.shah@amd.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Zhao Liu <zhao1.liu@intel.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2026-03-04 12:50:31 -03:00
Tony Luck
59674fc9d0 x86/resctrl: Fix SNC detection
Now that the x86 topology code has a sensible nodes-per-package
measure, that does not depend on the online status of CPUs, use this
to divinate the SNC mode.

Note that when Cluster on Die (CoD) is configured on older systems this
will also show multiple NUMA nodes per package. Intel Resource Director
Technology is incomaptible with CoD. Print a warning and do not use the
fixup MSR_RMID_SNC_CONFIG.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Tested-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Chen Yu <yu.c.chen@intel.com>
Link: https://patch.msgid.link/aaCxbbgjL6OZ6VMd@agluck-desk3
Link: https://patch.msgid.link/20260303110100.367976706@infradead.org
2026-03-04 16:35:09 +01:00
Peter Zijlstra
528d89a470 x86/topo: Fix SNC topology mess
Per 4d6dd05d07 ("sched/topology: Fix sched domain build error for GNR, CWF in
SNC-3 mode"), the original crazy SNC-3 SLIT table was:

node distances:
node     0    1    2    3    4    5
    0:   10   15   17   21   28   26
    1:   15   10   15   23   26   23
    2:   17   15   10   26   23   21
    3:   21   28   26   10   15   17
    4:   23   26   23   15   10   15
    5:   26   23   21   17   15   10

And per:

  https://lore.kernel.org/lkml/20250825075642.GQ3245006@noisy.programming.kicks-ass.net/

The suggestion was to average the off-trace clusters to restore sanity.

However, 4d6dd05d07 implements this under various assumptions:

 - anything GNR/CWF with numa_in_package;
 - there will never be more than 2 packages;
 - the off-trace cluster will have distance >20

And then HPE shows up with a machine that matches the
Vendor-Family-Model checks but looks like this:

Here's an 8 socket (2 chassis) HPE system with SNC enabled:

node   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
  0:  10  12  16  16  16  16  18  18  40  40  40  40  40  40  40  40
  1:  12  10  16  16  16  16  18  18  40  40  40  40  40  40  40  40
  2:  16  16  10  12  18  18  16  16  40  40  40  40  40  40  40  40
  3:  16  16  12  10  18  18  16  16  40  40  40  40  40  40  40  40
  4:  16  16  18  18  10  12  16  16  40  40  40  40  40  40  40  40
  5:  16  16  18  18  12  10  16  16  40  40  40  40  40  40  40  40
  6:  18  18  16  16  16  16  10  12  40  40  40  40  40  40  40  40
  7:  18  18  16  16  16  16  12  10  40  40  40  40  40  40  40  40
  8:  40  40  40  40  40  40  40  40  10  12  16  16  16  16  18  18
  9:  40  40  40  40  40  40  40  40  12  10  16  16  16  16  18  18
 10:  40  40  40  40  40  40  40  40  16  16  10  12  18  18  16  16
 11:  40  40  40  40  40  40  40  40  16  16  12  10  18  18  16  16
 12:  40  40  40  40  40  40  40  40  16  16  18  18  10  12  16  16
 13:  40  40  40  40  40  40  40  40  16  16  18  18  12  10  16  16
 14:  40  40  40  40  40  40  40  40  18  18  16  16  16  16  10  12
 15:  40  40  40  40  40  40  40  40  18  18  16  16  16  16  12  10

 10 = Same chassis and socket
 12 = Same chassis and socket (SNC)
 16 = Same chassis and adjacent socket
 18 = Same chassis and non-adjacent socket
 40 = Different chassis

Turns out, the 'max 2 packages' thing is only relevant to the SNC-3 parts, the
smaller parts do 8 sockets (like usual). The above SLIT table is sane, but
violates the previous assumptions and trips a WARN.

Now that the topology code has a sensible measure of nodes-per-package, we can
use that to divinate the SNC mode at hand, and only fix up SNC-3 topologies.

There is a 'healthy' amount of paranoia code validating the assumptions on the
SLIT table, a simple pr_err(FW_BUG) print on failure and a fallback to using
the regular table. Lets see how long this lasts :-)

Fixes: 4d6dd05d07 ("sched/topology: Fix sched domain build error for GNR, CWF in SNC-3 mode")
Reported-by: Kyle Meyer <kyle.meyer@hpe.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Tested-by: K Prateek Nayak <kprateek.nayak@amd.com>
Tested-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Chen Yu <yu.c.chen@intel.com>
Tested-by: Kyle Meyer <kyle.meyer@hpe.com>
Link: https://patch.msgid.link/20260303110100.238361290@infradead.org
2026-03-04 16:35:09 +01:00
Peter Zijlstra
717b64d58c x86/topo: Replace x86_has_numa_in_package
.. with the brand spanking new topology_num_nodes_per_package().

Having the topology setup determine this value during MADT/SRAT parsing before
SMP bringup avoids having to detect this situation when building the SMP
topology masks.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Tested-by: Tony Luck <tony.luck@intel.com>
Tested-by: K Prateek Nayak <kprateek.nayak@amd.com>
Tested-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Chen Yu <yu.c.chen@intel.com>
Tested-by: Kyle Meyer <kyle.meyer@hpe.com>
Link: https://patch.msgid.link/20260303110100.123701837@infradead.org
2026-03-04 16:35:08 +01:00
Peter Zijlstra
ae6730ff42 x86/topo: Add topology_num_nodes_per_package()
Use the MADT and SRAT table data to compute __num_nodes_per_package.

Specifically, SRAT has already been parsed in x86_numa_init(), which is called
before acpi_boot_init() which parses MADT. So both are available in
topology_init_possible_cpus().

This number is useful to divinate the various Intel CoD/SNC and AMD NPS modes,
since the platforms are failing to provide this otherwise.

Doing it this way is independent of the number of online CPUs and
other such shenanigans.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Tested-by: Tony Luck <tony.luck@intel.com>
Tested-by: K Prateek Nayak <kprateek.nayak@amd.com>
Tested-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Chen Yu <yu.c.chen@intel.com>
Tested-by: Kyle Meyer <kyle.meyer@hpe.com>
Link: https://patch.msgid.link/20260303110100.004091624@infradead.org
2026-03-04 16:35:08 +01:00
Peter Zijlstra
48084cc153 x86/numa: Store extra copy of numa_nodes_parsed
The topology setup code needs to know the total number of physical
nodes enumerated in SRAT; however NUMA_EMU can cause the existing
numa_nodes_parsed bitmap to be fictitious. Therefore, keep a copy of
the bitmap specifically to retain the physical node count.

Suggested-by: K Prateek Nayak <kprateek.nayak@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Tested-by: K Prateek Nayak <kprateek.nayak@amd.com>
Tested-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Chen Yu <yu.c.chen@intel.com>
Tested-by: Kyle Meyer <kyle.meyer@hpe.com>
Link: https://patch.msgid.link/20260303110059.889884023@infradead.org
2026-03-04 16:35:08 +01:00
Arnaldo Carvalho de Melo
9cd284105b tools headers UAPI: Sync linux/kvm.h with the kernel sources
To pick the changes in:

  f7ab71f178 ("KVM: s390: Add explicit padding to struct kvm_s390_keyop")
  0ee4ddc164 ("KVM: s390: Storage key manipulation IOCTL")
  fa9893fadb ("KVM: Introduce KVM_EXIT_SNP_REQ_CERTS for SNP certificate-fetching")
  f174a9ffcd ("KVM: arm64: Add exit to userspace on {LD,ST}64B* outside of memslots")

That just rebuilds perf, as these patches add just one new KVM ioctl,
but for S390, that is not being considered by tools/perf/trace/beauty/kvm_ioctl.sh
so far.

This addresses this perf build warning:

  Warning: Kernel ABI header differences:
    diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h

Please see tools/include/uapi/README for further details.

Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Claudio Imbrenda <imbrenda@linux.ibm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Michael Roth <michael.roth@amd.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2026-03-04 12:32:19 -03:00
Jens Axboe
d90c470b0e Merge tag 'nvme-7.0-2026-03-04' of git://git.infradead.org/nvme into block-7.0
Pull NVMe fixes from Keith:

"- Improve quirk visibility and configurability (Maurizio)
 - Fix runtime user modification to queue setup (Keith)
 - Fix multipath leak on try_module_get failure (Keith)
 - Ignore ambiguous spec definitions for better atomics support (John)
 - Fix admin queue leak on controller reset (Ming)
 - Fix large allocation in persistent reservation read keys (Sungwoo Kim)
 - Fix fcloop callback handling (Justin)
 - Securely free DHCHAP secrets (Daniel)
 - Various cleanups and typo fixes (John, Wilfred)"

* tag 'nvme-7.0-2026-03-04' of git://git.infradead.org/nvme:
  nvme: fix memory allocation in nvme_pr_read_keys()
  nvme-multipath: fix leak on try_module_get failure
  nvmet-fcloop: Check remoteport port_state before calling done callback
  nvme-pci: do not try to add queue maps at runtime
  nvme-pci: cap queue creation to used queues
  nvme-pci: ensure we're polling a polled queue
  nvme: fix memory leak in quirks_param_set()
  nvme: correct comment about nvme_ns_remove()
  nvme: stop setting namespace gendisk device driver data
  nvme: add support for dynamic quirk configuration via module parameter
  nvme: fix admin queue leak on controller reset
  nvme-fabrics: use kfree_sensitive() for DHCHAP secrets
  nvme: stop using AWUPF
  nvme: expose active quirks in sysfs
  nvme/host: fixup some typos
2026-03-04 08:15:17 -07:00
Arnaldo Carvalho de Melo
916a9f385d tools headers: Update the linux/gfp_types.h copy with the kernel sources
To pick up the changes in:

  f3ec502b67 ("mm/slab: mark alloc tags empty for sheaves allocated with __GFP_NO_OBJ_EXT")
  241b3a0963 ("mm: clarify GFP_ATOMIC/GFP_NOWAIT doc-comment")

That just adds some comments, so no changes in perf tooling, just silences
this build warning:

Warning: Kernel ABI header differences:
  diff -u tools/include/linux/gfp_types.h include/linux/gfp_types.h

Please see tools/include/uapi/README.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2026-03-04 11:55:14 -03:00
Sungwoo Kim
c332015376 nvme: fix memory allocation in nvme_pr_read_keys()
nvme_pr_read_keys() takes num_keys from userspace and uses it to
calculate the allocation size for rse via struct_size(). The upper
limit is PR_KEYS_MAX (64K).

A malicious or buggy userspace can pass a large num_keys value that
results in a 4MB allocation attempt at most, causing a warning in
the page allocator when the order exceeds MAX_PAGE_ORDER.

To fix this, use kvzalloc() instead of kzalloc().

This bug has the same reasoning and fix with the patch below:
https://lore.kernel.org/linux-block/20251212013510.3576091-1-kartikey406@gmail.com/

Warning log:
WARNING: mm/page_alloc.c:5216 at __alloc_frozen_pages_noprof+0x5aa/0x2300 mm/page_alloc.c:5216, CPU#1: syz-executor117/272
Modules linked in:
CPU: 1 UID: 0 PID: 272 Comm: syz-executor117 Not tainted 6.19.0 #1 PREEMPT(voluntary)
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
RIP: 0010:__alloc_frozen_pages_noprof+0x5aa/0x2300 mm/page_alloc.c:5216
Code: ff 83 bd a8 fe ff ff 0a 0f 86 69 fb ff ff 0f b6 1d f9 f9 c4 04 80 fb 01 0f 87 3b 76 30 ff 83 e3 01 75 09 c6 05 e4 f9 c4 04 01 <0f> 0b 48 c7 85 70 fe ff ff 00 00 00 00 e9 8f fd ff ff 31 c0 e9 0d
RSP: 0018:ffffc90000fcf450 EFLAGS: 00010246
RAX: 0000000000000000 RBX: 0000000000000000 RCX: 1ffff920001f9ea0
RDX: 0000000000000000 RSI: 000000000000000b RDI: 0000000000040dc0
RBP: ffffc90000fcf648 R08: ffff88800b6c3380 R09: 0000000000000001
R10: ffffc90000fcf840 R11: ffff88807ffad280 R12: 0000000000000000
R13: 0000000000040dc0 R14: 0000000000000001 R15: ffffc90000fcf620
FS:  0000555565db33c0(0000) GS:ffff8880be26c000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 000000002000000c CR3: 0000000003b72000 CR4: 00000000000006f0
Call Trace:
 <TASK>
 alloc_pages_mpol+0x236/0x4d0 mm/mempolicy.c:2486
 alloc_frozen_pages_noprof+0x149/0x180 mm/mempolicy.c:2557
 ___kmalloc_large_node+0x10c/0x140 mm/slub.c:5598
 __kmalloc_large_node_noprof+0x25/0xc0 mm/slub.c:5629
 __do_kmalloc_node mm/slub.c:5645 [inline]
 __kmalloc_noprof+0x483/0x6f0 mm/slub.c:5669
 kmalloc_noprof include/linux/slab.h:961 [inline]
 kzalloc_noprof include/linux/slab.h:1094 [inline]
 nvme_pr_read_keys+0x8f/0x4c0 drivers/nvme/host/pr.c:245
 blkdev_pr_read_keys block/ioctl.c:456 [inline]
 blkdev_common_ioctl+0x1b71/0x29b0 block/ioctl.c:730
 blkdev_ioctl+0x299/0x700 block/ioctl.c:786
 vfs_ioctl fs/ioctl.c:51 [inline]
 __do_sys_ioctl fs/ioctl.c:597 [inline]
 __se_sys_ioctl fs/ioctl.c:583 [inline]
 __x64_sys_ioctl+0x1bf/0x220 fs/ioctl.c:583
 x64_sys_call+0x1280/0x21b0 mnt/fuzznvme_1/fuzznvme/linux-build/v6.19/./arch/x86/include/generated/asm/syscalls_64.h:17
 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
 do_syscall_64+0x71/0x330 arch/x86/entry/syscall_64.c:94
 entry_SYSCALL_64_after_hwframe+0x76/0x7e
RIP: 0033:0x7fb893d3108d
Code: 28 c3 e8 46 1e 00 00 66 0f 1f 44 00 00 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007ffff61f2f38 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
RAX: ffffffffffffffda RBX: 00007ffff61f3138 RCX: 00007fb893d3108d
RDX: 0000000020000040 RSI: 00000000c01070ce RDI: 0000000000000003
RBP: 0000000000000001 R08: 0000000000000000 R09: 00007ffff61f3138
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000001
R13: 00007ffff61f3128 R14: 00007fb893dae530 R15: 0000000000000001
 </TASK>

Fixes: 5fd96a4e15 (nvme: Add pr_ops read_keys support)
Acked-by: Chao Shi <cshi008@fiu.edu>
Acked-by: Weidong Zhu <weizhu@fiu.edu>
Acked-by: Dave Tian <daveti@purdue.edu>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Sungwoo Kim <iam@sung-woo.kim>
Signed-off-by: Keith Busch <kbusch@kernel.org>
2026-03-04 06:53:41 -08:00
Arnaldo Carvalho de Melo
ecd5a2fd4c perf beauty: Update the linux/perf_event.h copy with the kernel sources
Update it as one comment got realigned, probably in a merge, so no
changes in perf tooling, just silences this build warning:

  Warning: Kernel ABI header differences:
    diff -u tools/include/uapi/linux/perf_event.h include/uapi/linux/perf_event.h

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2026-03-04 11:51:36 -03:00
Arnaldo Carvalho de Melo
3abbb7cae8 perf beauty: Update the arch/x86/include/asm/irq_vectors.h copy with the kernel sources
To pick up the change in:

  a1fab3e69d ("x86/irq: Fix comment on IRQ vector layout")

That just adds one comment, so no changes in perf tooling, just silences
this build warning:

  diff -u tools/perf/trace/beauty/arch/x86/include/asm/irq_vectors.h arch/x86/include/asm/irq_vectors.h

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2026-03-04 11:49:24 -03:00
Arnaldo Carvalho de Melo
e367679f16 perf beauty: Sync UAPI linux/fs.h with kernel sources
To pick up changes from:

  0e6b7eae1f ("fs: add FS_XFLAG_VERITY for fs-verity files")

These are used to beautify fs syscall arguments, albeit the changes in
this update are not affecting those beautifiers.

This addresses these tools/perf build warnings:

  Warning: Kernel ABI header differences:
    diff -u tools/perf/trace/beauty/include/uapi/linux/fs.h include/uapi/linux/fs.h

Please see tools/include/uapi/README.

Cc: Andrey Albershteyn <aalbersh@kernel.org>
Cc: Christian Brauner <brauner@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2026-03-04 11:46:01 -03:00
Arnaldo Carvalho de Melo
6036165ab1 perf beauty: Sync linux/mount.h copy with the kernel sources
To pick the changes from:

  9b8a0ba682 ("mount: add OPEN_TREE_NAMESPACE")
  0e5032237e ("statmount: accept fd as a parameter")

That doesn't change anything in tools this time as nothing that is
harvested by the beauty scripts got changed:

  $ ls -1 tools/perf/trace/beauty/*mount*sh
  tools/perf/trace/beauty/fsmount.sh
  tools/perf/trace/beauty/mount_flags.sh
  tools/perf/trace/beauty/move_mount_flags.sh
  $

This addresses this perf build warning.

  Warning: Kernel ABI header differences:
    diff -u tools/include/uapi/linux/mount.h include/uapi/linux/mount.h

Please see tools/include/uapi/README for further details.

Cc: Christian Brauner <brauner@kernel.org>
Cc: Bhavik Sachdev <b.sachdev1904@gmail.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2026-03-04 11:41:12 -03:00
Dmitrii Dolgov
30f998c992 tools build: Fix rust cross compilation
Currently no target is specified to compile rust code when needed, which
breaks cross compilation. E.g. for arm64:

      LD      /tmp/build/tests/workloads/perf-test-in.o
    aarch64-linux-gnu-ld: /tmp/build/tests/workloads/code_with_type.a(code_with_type.code_with_type.d12f4324cb53c560-cgu.0.rcgu.o): Relocations in generic ELF (EM: 62)
    aarch64-linux-gnu-ld: /tmp/build/tests/workloads/code_with_type.a(code_with_type.code_with_type.d12f4324cb53c560-cgu.0.rcgu.o): Relocations in generic ELF (EM: 62)
    [...repeated...]
    aarch64-linux-gnu-ld: /tmp/build/tests/workloads/code_with_type.a(code_with_type.code_with_type.d12f4324cb53c560-cgu.0.rcgu.o): Relocations in generic ELF (EM: 62)
    aarch64-linux-gnu-ld: /tmp/build/tests/workloads/code_with_type.a(code_with_type.code_with_type.d12f4324cb53c560-cgu.0.rcgu.o): Relocations in generic ELF (EM: 62)
    aarch64-linux-gnu-ld: /tmp/build/tests/workloads/code_with_type.a: error adding symbols: file in wrong format
    make[5]: *** [/perf/tools/build/Makefile.build:162: /tmp/build/tests/workloads/perf-test-in.o] Error 1
    make[4]: *** [/perf/tools/build/Makefile.build:156: workloads] Error 2
    make[3]: *** [/perf/tools/build/Makefile.build:156: tests] Error 2
    make[2]: *** [Makefile.perf:785: /tmp/build/perf-test-in.o] Error 2
    make[2]: *** Waiting for unfinished jobs....
    make[1]: *** [Makefile.perf:289: sub-make] Error 2
    make: *** [Makefile:76: all] Error 2

Detect required target and pass it via rust_flags to the compiler.

Note that CROSS_COMPILE might be different from what rust compiler
expects, since it may omit the target vendor value, e.g.
"aarch64-linux-gnu" instead of "aarch64-unknown-linux-gnu".

Thus explicitly map supported CROSS_COMPILE values to corresponding Rust
versions, as suggested by Miguel Ojeda.

Tested using arm64 cross-compilation example from [1].

Fixes: 2e05bb52a1 ("perf test workload: Add code_with_type test workload")
Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Dmitrii Dolgov <9erthalion6@gmail.com>
Cc: Levi Zim <i@kxxt.dev>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Nicolas Schier <nsc@kernel.org>
Link: https://perfwiki.github.io/main/arm64-cross-compilation-dockerfile/ [1]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2026-03-04 11:37:30 -03:00
Catalin Marinas
c25c4aa3f7 arm64: mm: Add PTE_DIRTY back to PAGE_KERNEL* to fix kexec/hibernation
Commit 143937ca51 ("arm64, mm: avoid always making PTE dirty in
pte_mkwrite()") changed pte_mkwrite_novma() to only clear PTE_RDONLY
when PTE_DIRTY is set. This was to allow writable-clean PTEs for swap
pages that haven't actually been written.

However, this broke kexec and hibernation for some platforms. Both go
through trans_pgd_create_copy() -> _copy_pte(), which calls
pte_mkwrite_novma() to make the temporary linear-map copy fully
writable. With the updated pte_mkwrite_novma(), read-only kernel pages
(without PTE_DIRTY) remain read-only in the temporary mapping.
While such behaviour is fine for user pages where hardware DBM or
trapping will make them writeable, subsequent in-kernel writes by the
kexec relocation code will fault.

Add PTE_DIRTY back to all _PAGE_KERNEL* protection definitions. This was
the case prior to 5.4, commit aa57157be6 ("arm64: Ensure
VM_WRITE|VM_SHARED ptes are clean by default"). With the kernel
linear-map PTEs always having PTE_DIRTY set, pte_mkwrite_novma()
correctly clears PTE_RDONLY.

Fixes: 143937ca51 ("arm64, mm: avoid always making PTE dirty in pte_mkwrite()")
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: stable@vger.kernel.org
Reported-by: Jianpeng Chang <jianpeng.chang.cn@windriver.com>
Link: https://lore.kernel.org/r/20251204062722.3367201-1-jianpeng.chang.cn@windriver.com
Cc: Will Deacon <will@kernel.org>
Cc: Huang, Ying <ying.huang@linux.alibaba.com>
Cc: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Huang Ying <ying.huang@linux.alibaba.com>
Signed-off-by: Will Deacon <will@kernel.org>
2026-03-04 14:33:08 +00:00
Juergen Gross
e2dcf90655 xen/xenbus: better handle backend crash
When the backend domain crashes, coordinated device cleanup is not
possible (as it involves waiting for the backend state change). In that
case, toolstack forcefully removes frontend xenstore entries.
xenbus_dev_changed() handles this case, and triggers device cleanup.
It's possible that toolstack manages to connect new device in that
place, before xenbus_dev_changed() notices the old one is missing. If
that happens, new one won't be probed and will forever remain in
XenbusStateInitialising.

Fix this by checking the frontend's state in Xenstore. In case it has
been reset to XenbusStateInitialising by Xen tools, consider this
being the result of an unplug+plug operation.

It's important that cleanup on such unplug doesn't modify Xenstore
entries (especially the "state" key) as it belong to the new device
to be probed - changing it would derail establishing connection to the
new backend (most likely, closing the device before it was even
connected). Handle this case by setting new xenbus_device->vanished
flag to true, and check it before changing state entry.

And even if xenbus_dev_changed() correctly detects the device was
forcefully removed, the cleanup handling is still racy. Since this whole
handling doesn't happened in a single Xenstore transaction, it's possible
that toolstack might put a new device there already. Avoid re-creating
the state key (which in the case of loosing the race would actually
close newly attached device).

The problem does not apply to frontend domain crash, as this case
involves coordinated cleanup.

Problem originally reported at
https://lore.kernel.org/xen-devel/aOZvivyZ9YhVWDLN@mail-itl/T/#t,
including reproduction steps.

Based-on-patch-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
Tested-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Message-ID: <20260218095205.453657-3-jgross@suse.com>
2026-03-04 15:31:40 +01:00
Juergen Gross
82169dace4 xenbus: add xenbus_device parameter to xenbus_read_driver_state()
In order to prepare checking the xenbus device status in
xenbus_read_driver_state(), add the pointer to struct xenbus_device
as a parameter.

Tested-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Acked-by: "Martin K. Petersen" <martin.petersen@oracle.com> # SCSI
Acked-by: Jakub Kicinski <kuba@kernel.org>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>	# drivers/pci/xen-pcifront.c
Signed-off-by: Juergen Gross <jgross@suse.com>
Message-ID: <20260218095205.453657-2-jgross@suse.com>
2026-03-04 15:31:40 +01:00
Catalin Marinas
212dd84776 arm64: Silence sparse warnings caused by the type casting in (cmp)xchg
The arm64 xchg/cmpxchg() wrappers cast the arguments to (unsigned long)
prior to invoking the static inline functions implementing the
operation. Some restrictive type annotations (e.g. __bitwise) lead to
sparse warnings like below:

sparse warnings: (new ones prefixed by >>)
   fs/crypto/bio.c:67:17: sparse: sparse: cast from restricted blk_status_t
>> fs/crypto/bio.c:67:17: sparse: sparse: cast to restricted blk_status_t

Force the casting in the arm64 xchg/cmpxchg() wrappers to silence
sparse.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202602230947.uNRsPyBn-lkp@intel.com/
Link: https://lore.kernel.org/r/202602230947.uNRsPyBn-lkp@intel.com/
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Will Deacon <will@kernel.org>
2026-03-04 14:31:27 +00:00
Markus Mayer
b6712d91f8 perf build: Prevent "argument list too long" error
Due to a recent change, building perf may result in a build error when
it is trying to "prune orphans".

The file list passed to "rm" may exceed what the shell can handle.

The build will then abort with an error like this:

    TEST    [...]/arm64/build/linux-custom/tools/perf/pmu-events/metric_test.log
  make[5]: /bin/sh: Argument list too long
  make[5]: *** [pmu-events/Build:217: prune_orphans] Error 127
  make[5]: *** Waiting for unfinished jobs....
  make[4]: *** [Makefile.perf:773: [...]/tools/perf/pmu-events/pmu-events-in.o] Error 2
  make[4]: *** Waiting for unfinished jobs....
  make[3]: *** [Makefile.perf:289: sub-make] Error 2

Processing the arguments via "xargs", instead of passing the list of
files directly to "rm" via the shell, prevents this issue.

Fixes: 36a1b0061a ("perf build: Reduce pmu-events related copying and mkdirs")
Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Markus Mayer <mmayer@broadcom.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@linaro.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2026-03-04 11:22:31 -03:00
Changqing Li
f079ff3732 tools build: Make in-target rule robust against too long argument error
The command length of in-target scales with the depth of the directory
times the number of objects in the Makefile.

When there are many objects, and O=[absolute_path] is set, and the
absolute_path is relatively long.

It is possible that this line "$(call if_changed,$(host)ld_multi)" will
report error: "make[4]: /bin/sh: Argument list too long"

For example, build perf tools with O=/long/output/path

Like built-in.a and *.mod rules in scripts/Makefile.build, add
$(objpredix)/ by the shell command instead of by Make's builtin
function.

Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Changqing Li <changqing.li@windriver.com>
Cc: Charlie Jenkins <charlie@rivosinc.com>
Cc: James Clark <james.clark@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2026-03-04 11:21:48 -03:00
Arnaldo Carvalho de Melo
cfdf6456c0 tools headers: Sync uapi/linux/prctl.h with the kernel source
To pick up the changes in these csets:

  5ca243f6e3 ("prctl: add arch-agnostic prctl()s for indirect branch tracking")
  28621ec2d4 ("rseq: Add prctl() to enable time slice extensions")

That don't introduced these new prctls:

  $ tools/perf/trace/beauty/prctl_option.sh > before.txt
  $ cp include/uapi/linux/prctl.h tools/perf/trace/beauty/include/uapi/linux/prctl.h
  $ tools/perf/trace/beauty/prctl_option.sh > after.txt
  $ diff -u before.txt after.txt
  --- before.txt	2026-02-27 09:07:16.435611457 -0300
  +++ after.txt	2026-02-27 09:07:28.189816531 -0300
  @@ -73,6 +73,10 @@
   	[76] = "LOCK_SHADOW_STACK_STATUS",
   	[77] = "TIMER_CREATE_RESTORE_IDS",
   	[78] = "FUTEX_HASH",
  +	[79] = "RSEQ_SLICE_EXTENSION",
  +	[80] = "GET_INDIR_BR_LP_STATUS",
  +	[81] = "SET_INDIR_BR_LP_STATUS",
  +	[82] = "LOCK_INDIR_BR_LP_STATUS",
   };
   static const char *prctl_set_mm_options[] = {
   	[1] = "START_CODE",
  $

That now will be used to decode the syscall option and also to compose
filters, for instance:

  [root@five ~]# perf trace -e syscalls:sys_enter_prctl --filter option==SET_NAME
       0.000 Isolated Servi/3474327 syscalls:sys_enter_prctl(option: SET_NAME, arg2: 0x7f23f13b7aee)
       0.032 DOM Worker/3474327 syscalls:sys_enter_prctl(option: SET_NAME, arg2: 0x7f23deb25670)
       7.920 :3474328/3474328 syscalls:sys_enter_prctl(option: SET_NAME, arg2: 0x7f23e24fbb10)
       7.935 StreamT~s #374/3474328 syscalls:sys_enter_prctl(option: SET_NAME, arg2: 0x7f23e24fb970)
       8.400 Isolated Servi/3474329 syscalls:sys_enter_prctl(option: SET_NAME, arg2: 0x7f23e24bab10)
       8.418 StreamT~s #374/3474329 syscalls:sys_enter_prctl(option: SET_NAME, arg2: 0x7f23e24ba970)
  ^C[root@five ~]#

This addresses these perf build warnings:

  Warning: Kernel ABI header differences:
    diff -u tools/perf/trace/beauty/include/uapi/linux/prctl.h include/uapi/linux/prctl.h

Please see tools/include/uapi/README for further details.

Cc: Deepak Gupta <debug@rivosinc.com>
Cc: Paul Walmsley <pjw@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2026-03-04 11:14:10 -03:00
Yang Xiuwei
8da8df4312 block: use __bio_add_page in bio_copy_kern
Since the bio is allocated with the exact number of pages needed via
blk_rq_map_bio_alloc(), and the loop iterates exactly that many times,
bio_add_page() cannot fail due to insufficient space.  Switch to
__bio_add_page() and remove the dead error handling code.

Suggested-by: Christoph Hellwig <hch@infradead.org>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Yang Xiuwei <yangxiuwei@kylinos.cn>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2026-03-04 06:59:26 -07:00
Varun Gupta
0cfe9c4838 drm/xe: Fix memory leak in xe_vm_madvise_ioctl
When check_bo_args_are_sane() validation fails, jump to the new
free_vmas cleanup label to properly free the allocated resources.
This ensures proper cleanup in this error path.

Fixes: 293032eec4 ("drm/xe/bo: Update atomic_access attribute on madvise")
Cc: stable@vger.kernel.org # v6.18+
Reviewed-by: Shuicheng Lin <shuicheng.lin@intel.com>
Signed-off-by: Varun Gupta <varun.gupta@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patch.msgid.link/20260223175145.1532801-1-varun.gupta@intel.com
Signed-off-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
(cherry picked from commit 29bd06faf727a4b76663e4be0f7d770e2d2a7965)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
2026-03-04 08:54:19 -05:00
Shuicheng Lin
3091723785 drm/xe/reg_sr: Fix leak on xa_store failure
Free the newly allocated entry when xa_store() fails to avoid a memory
leak on the error path.

v2: use goto fail_free. (Bala)

Fixes: e5283bd4df ("drm/xe/reg_sr: Remove register pool")
Cc: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Shuicheng Lin <shuicheng.lin@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patch.msgid.link/20260204172810.1486719-2-shuicheng.lin@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
(cherry picked from commit 6bc6fec71ac45f52db609af4e62bdb96b9f5fadb)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
2026-03-04 08:54:19 -05:00
Matt Roper
89865e6dc8 drm/xe/xe2_hpg: Correct implementation of Wa_16025250150
Wa_16025250150 asks us to set five register fields of the register to
0x1 each.  However we were just OR'ing this into the existing register
value (which has a default of 0x4 for each nibble-sized field) resulting
in final field values of 0x5 instead of the desired 0x1.  Correct the
RTP programming (use FIELD_SET instead of SET) to ensure each field is
assigned to exactly the value we want.

Cc: Aradhya Bhatia <aradhya.bhatia@intel.com>
Cc: Tejas Upadhyay <tejas.upadhyay@intel.com>
Cc: stable@vger.kernel.org # v6.16+
Fixes: 7654d51f1f ("drm/xe/xe2hpg: Add Wa_16025250150")
Reviewed-by: Ngai-Mint Kwan <ngai-mint.kwan@linux.intel.com>
Link: https://patch.msgid.link/20260227164341.3600098-2-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
(cherry picked from commit d139209ef88e48af1f6731cd45440421c757b6b5)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
2026-03-04 08:54:18 -05:00
Zhanjun Dong
b3368ecca9 drm/xe/gsc: Fix GSC proxy cleanup on early initialization failure
xe_gsc_proxy_remove undoes what is done in both xe_gsc_proxy_init and
xe_gsc_proxy_start; however, if we fail between those 2 calls, it is
possible that the HW forcewake access hasn't been initialized yet and so
we hit errors when the cleanup code tries to write GSC register. To
avoid that, split the cleanup in 2 functions so that the HW cleanup is
only called if the HW setup was completed successfully.

Since the HW cleanup (interrupt disabling) is now removed from
xe_gsc_proxy_remove, the cleanup on error paths in xe_gsc_proxy_start
must be updated to disable interrupts before returning.

Fixes: ff6cd29b69 ("drm/xe: Cleanup unwind of gt initialization")
Signed-off-by: Zhanjun Dong <zhanjun.dong@intel.com>
Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Link: https://patch.msgid.link/20260220225308.101469-1-zhanjun.dong@intel.com
(cherry picked from commit 2b37c401b265c07b46408b5cb36a4b757c9b5060)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
2026-03-04 08:54:18 -05:00
Thomas Hellström
a99d34e5ec Revert "drm/pagemap: Disable device-to-device migration"
With commit
a69d1ab971a6 ("mm: Fix a hmm_range_fault() livelock / starvation problem")
device-to-device migration is not functional again and the
disabling can be reverted.

Add the above commit as a Fixes: tag in order for the revert to not
take place unless that commit is present.

This reverts commit 10dd1eaa80.

Cc: Matthew Brost <matthew.brost@intel.com>
Fixes: b570f37a2c ("mm: Fix a hmm_range_fault() livelock / starvation problem")
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patch.msgid.link/20260211104159.114947-1-thomas.hellstrom@linux.intel.com
(cherry picked from commit 1a3c0049b3f56278c9caf2784c53f6ab435fd12c)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
[Rodrigo updated Fixes tag]
2026-03-04 08:53:37 -05:00
Darrick J. Wong
d320f160aa iomap: reject delalloc mappings during writeback
Filesystems should never provide a delayed allocation mapping to
writeback; they're supposed to allocate the space before replying.
This can lead to weird IO errors and crashes in the block layer if the
filesystem is being malicious, or if it hadn't set iomap->dev because
it's a delalloc mapping.

Fix this by failing writeback on delalloc mappings.  Currently no
filesystems actually misbehave in this manner, but we ought to be
stricter about things like that.

Cc: stable@vger.kernel.org # v5.5
Fixes: 598ecfbaa7 ("iomap: lift the xfs writeback code to iomap")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Link: https://patch.msgid.link/20260302173002.GL13829@frogsfrogsfrogs
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
2026-03-04 14:31:56 +01:00
Pavel Begunkov
531bb98a03 io_uring/zcrx: use READ_ONCE with user shared RQEs
Refill queue entries are shared with the user space, use READ_ONCE when
reading them.

Fixes: 34a3e60821 ("io_uring/zcrx: implement zerocopy receive pp memory provider");
Cc: stable@vger.kernel.org
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2026-03-04 06:30:39 -07:00
Jouni Högander
a99cac460d drm/i915/psr: Fix for Panel Replay X granularity DPCD register handling
DP specification is saying value 0xff 0xff in PANEL REPLAY SELECTIVE UPDATE
X GRANULARITY CAPABILITY registers (0xb2 and 0xb3) means full-line
granularity. Take this into account when handling Panel Replay X
granularity informed by the panel.

Fixes: 1cc8546474 ("drm/i915/psr: Use SU granularity information available in intel_connector")
Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/7284
Tested-by: Mark Pearson <mpearson-lenovo@squebb.ca>
Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Reviewed-by: Uma Shankar <uma.shankar@intel.com>
Link: https://patch.msgid.link/20260225074221.1744330-2-jouni.hogander@intel.com
(cherry picked from commit f5c8f824a495e849492f09a43bd965a8f4d86cb2)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
2026-03-04 15:26:08 +02:00
Jouni Högander
ace7dcc818 drm/dp: Add definition for Panel Replay full-line granularity
DP specification is saying value 0xff 0xff in PANEL REPLAY SELECTIVE UPDATE
X GRANULARITY CAPABILITY registers (0xb2 and 0xb3) means full-line
granularity. Add definition for this.

Cc: dri-devel@lists.freedesktop.org
Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Reviewed-by: Uma Shankar <uma.shankar@intel.com>
Acked-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: https://patch.msgid.link/20260225074221.1744330-1-jouni.hogander@intel.com
(cherry picked from commit b93311673263bb98a200ab1cb6304f969bdada5c)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
2026-03-04 15:26:08 +02:00
Christian Brauner
d3ccc4d86d Merge patch "iomap: don't mark folio uptodate if read IO has bytes pending"
Joanne Koong <joannelkoong@gmail.com> says:

This is a fix for this scenario:

->read_folio() gets called on a folio size that is 16k while the file is 4k:
  a) ifs->read_bytes_pending gets initialized to 16k
  b) ->read_folio_range() is called for the 4k read
  c) the 4k read succeeds, ifs->read_bytes_pending is now 12k and the
0 to 4k range is marked uptodate
  d) the post-eof blocks are zeroed and marked uptodate in the call to
iomap_set_range_uptodate()
  e) iomap_set_range_uptodate() sees all the ranges are marked
uptodate and it marks the folio uptodate
  f) iomap_read_end() gets called to subtract the 12k from
ifs->read_bytes_pending. it too sees all the ranges are marked
uptodate and marks the folio uptodate using XOR
  g) the XOR call clears the uptodate flag on the folio

The same situation can occur if the last range read for the folio is done as
an inline read and all the previous ranges have already completed by the time
the inline read completes.

For more context, the full discussion can be found in [1]. There was a
discussion about alternative approaches in that thread, but they had more
complications.

There is another discussion in v1 [2] about consolidating the read paths.
Until that is resolved, this patch fixes the issue.

[1] https://lore.kernel.org/linux-fsdevel/CAJnrk1Z9za5w4FoJqTGx50zR2haHHaoot1KJViQyEHJQq4=34w@mail.gmail.com/#t
[2] https://lore.kernel.org/linux-fsdevel/20260219003911.344478-1-joannelkoong@gmail.com/T/#u

* patches from https://patch.msgid.link/20260303233420.874231-1-joannelkoong@gmail.com:
  iomap: don't mark folio uptodate if read IO has bytes pending

Link: https://patch.msgid.link/20260303233420.874231-1-joannelkoong@gmail.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
2026-03-04 14:19:05 +01:00
Joanne Koong
debc1a492b iomap: don't mark folio uptodate if read IO has bytes pending
If a folio has ifs metadata attached to it and the folio is partially
read in through an async IO helper with the rest of it then being read
in through post-EOF zeroing or as inline data, and the helper
successfully finishes the read first, then post-EOF zeroing / reading
inline will mark the folio as uptodate in iomap_set_range_uptodate().

This is a problem because when the read completion path later calls
iomap_read_end(), it will call folio_end_read(), which sets the uptodate
bit using XOR semantics. Calling folio_end_read() on a folio that was
already marked uptodate clears the uptodate bit.

Fix this by not marking the folio as uptodate if the read IO has bytes
pending. The folio uptodate state will be set in the read completion
path through iomap_end_read() -> folio_end_read().

Reported-by: Wei Gao <wegao@suse.com>
Suggested-by: Sasha Levin <sashal@kernel.org>
Tested-by: Wei Gao <wegao@suse.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Cc: stable@vger.kernel.org # v6.19
Link: https://lore.kernel.org/linux-fsdevel/aYbmy8JdgXwsGaPP@autotest-wegao.qe.prg2.suse.org/
Fixes: b2f35ac414 ("iomap: add caller-provided callbacks for read and readahead")
Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
Link: https://patch.msgid.link/20260303233420.874231-2-joannelkoong@gmail.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
2026-03-04 14:18:54 +01:00
Gerd Rausch
6932256d3a time/jiffies: Fix sysctl file error on configurations where USER_HZ < HZ
Commit 2dc164a48e ("sysctl: Create converter functions with two new
macros") incorrectly returns error to user space when jiffies sysctl
converter is used. The old overflow check got replaced with an
unconditional one:
     +    if (USER_HZ < HZ)
     +        return -EINVAL;
which will always be true on configurations with "USER_HZ < HZ".

Remove the check; it is no longer needed as clock_t_to_jiffies() returns
ULONG_MAX for the overflow case and proc_int_u2k_conv_uop() checks for
"> INT_MAX" after conversion

Fixes: 2dc164a48e ("sysctl: Create converter functions with two new macros")
Reported-by: Colm Harrington <colm.harrington@oracle.com>
Signed-off-by: Gerd Rausch <gerd.rausch@oracle.com>
Signed-off-by: Joel Granados <joel.granados@kernel.org>
2026-03-04 13:48:31 +01:00
Charles Haithcock
cfc69c2e6c i2c: i801: Revert "i2c: i801: replace acpi_lock with I2C bus lock"
This reverts commit f707d6b9e7.

Under rare circumstances, multiple udev threads can collect i801 device
info on boot and walk i801_acpi_io_handler somewhat concurrently. The
first will note the area is reserved by acpi to prevent further touches.
This ultimately causes the area to be deregistered. The second will
enter i801_acpi_io_handler after the area is unregistered but before a
check can be made that the area is unregistered. i2c_lock_bus relies on
the now unregistered area containing lock_ops to lock the bus. The end
result is a kernel panic on boot with the following backtrace;

[   14.971872] ioatdma 0000:09:00.2: enabling device (0100 -> 0102)
[   14.971873] BUG: kernel NULL pointer dereference, address: 0000000000000000
[   14.971880] #PF: supervisor read access in kernel mode
[   14.971884] #PF: error_code(0x0000) - not-present page
[   14.971887] PGD 0 P4D 0
[   14.971894] Oops: 0000 [#1] PREEMPT SMP PTI
[   14.971900] CPU: 5 PID: 956 Comm: systemd-udevd Not tainted 5.14.0-611.5.1.el9_7.x86_64 #1
[   14.971905] Hardware name: XXXXXXXXXXXXXXXXXXXXXXX BIOS 1.20.10.SV91 01/30/2023
[   14.971908] RIP: 0010:i801_acpi_io_handler+0x2d/0xb0 [i2c_i801]
[   14.971929] Code: 00 00 49 8b 40 20 41 57 41 56 4d 8b b8 30 04 00 00 49 89 ce 41 55 41 89 d5 41 54 49 89 f4 be 02 00 00 00 55 4c 89 c5 53 89 fb <48> 8b 00 4c 89 c7 e8 18 61 54 e9 80 bd 80 04 00 00 00 75 09 4c 3b
[   14.971933] RSP: 0018:ffffbaa841483838 EFLAGS: 00010282
[   14.971938] RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff9685e01ba568
[   14.971941] RDX: 0000000000000008 RSI: 0000000000000002 RDI: 0000000000000000
[   14.971944] RBP: ffff9685ca22f028 R08: ffff9685ca22f028 R09: ffff9685ca22f028
[   14.971948] R10: 000000000000000b R11: 0000000000000580 R12: 0000000000000580
[   14.971951] R13: 0000000000000008 R14: ffff9685e01ba568 R15: ffff9685c222f000
[   14.971954] FS:  00007f8287c0ab40(0000) GS:ffff96a47f940000(0000) knlGS:0000000000000000
[   14.971959] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   14.971963] CR2: 0000000000000000 CR3: 0000000168090001 CR4: 00000000003706f0
[   14.971966] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[   14.971968] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[   14.971972] Call Trace:
[   14.971977]  <TASK>
[   14.971981]  ? show_trace_log_lvl+0x1c4/0x2df
[   14.971994]  ? show_trace_log_lvl+0x1c4/0x2df
[   14.972003]  ? acpi_ev_address_space_dispatch+0x16e/0x3c0
[   14.972014]  ? __die_body.cold+0x8/0xd
[   14.972021]  ? page_fault_oops+0x132/0x170
[   14.972028]  ? exc_page_fault+0x61/0x150
[   14.972036]  ? asm_exc_page_fault+0x22/0x30
[   14.972045]  ? i801_acpi_io_handler+0x2d/0xb0 [i2c_i801]
[   14.972061]  acpi_ev_address_space_dispatch+0x16e/0x3c0
[   14.972069]  ? __pfx_i801_acpi_io_handler+0x10/0x10 [i2c_i801]
[   14.972085]  acpi_ex_access_region+0x5b/0xd0
[   14.972093]  acpi_ex_field_datum_io+0x73/0x2e0
[   14.972100]  acpi_ex_read_data_from_field+0x8e/0x230
[   14.972106]  acpi_ex_resolve_node_to_value+0x23d/0x310
[   14.972114]  acpi_ds_evaluate_name_path+0xad/0x110
[   14.972121]  acpi_ds_exec_end_op+0x321/0x510
[   14.972127]  acpi_ps_parse_loop+0xf7/0x680
[   14.972136]  acpi_ps_parse_aml+0x17a/0x3d0
[   14.972143]  acpi_ps_execute_method+0x137/0x270
[   14.972150]  acpi_ns_evaluate+0x1f4/0x2e0
[   14.972158]  acpi_evaluate_object+0x134/0x2f0
[   14.972164]  acpi_evaluate_integer+0x50/0xe0
[   14.972173]  ? vsnprintf+0x24b/0x570
[   14.972181]  acpi_ac_get_state.part.0+0x23/0x70
[   14.972189]  get_ac_property+0x4e/0x60
[   14.972195]  power_supply_show_property+0x90/0x1f0
[   14.972205]  add_prop_uevent+0x29/0x90
[   14.972213]  power_supply_uevent+0x109/0x1d0
[   14.972222]  dev_uevent+0x10e/0x2f0
[   14.972228]  uevent_show+0x8e/0x100
[   14.972236]  dev_attr_show+0x19/0x40
[   14.972246]  sysfs_kf_seq_show+0x9b/0x100
[   14.972253]  seq_read_iter+0x120/0x4b0
[   14.972262]  ? selinux_file_permission+0x106/0x150
[   14.972273]  vfs_read+0x24f/0x3a0
[   14.972284]  ksys_read+0x5f/0xe0
[   14.972291]  do_syscall_64+0x5f/0xe0
...

The kernel panic is mitigated by setting limiting the count of udev
children to 1. Revert to using the acpi_lock to continue protecting
marking the area as owned by firmware without relying on a lock in
a potentially unmapped region of memory.

Fixes: f707d6b9e7 ("i2c: i801: replace acpi_lock with I2C bus lock")
Signed-off-by: Charles Haithcock <chaithco@redhat.com>
[wsa: added Fixes-tag and updated comment stating the importance of the lock]
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
2026-03-04 12:44:14 +01:00
Zhang Heng
325291b20f ASoC: amd: yc: Add DMI quirk for ASUS EXPERTBOOK PM1503CDA
Add a DMI quirk for the ASUS EXPERTBOOK PM1503CDA fixing the
issue where the internal microphone was not detected.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=221070
Cc: stable@vger.kernel.org
Signed-off-by: Zhang Heng <zhangheng@kylinos.cn>
Link: https://patch.msgid.link/20260304063255.139331-1-zhangheng@kylinos.cn
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-04 11:40:17 +00:00
Biju Das
fbb143e4a6 ASoC: dt-bindings: renesas,rz-ssi: Document RZ/G3L SoC
Document RZ/G3L SSIF-2 bindings. The RZ/G3L SSIF-2 IP is identical to one
found on the RZ/G2L SoC.

Signed-off-by: Biju Das <biju.das.jz@bp.renesas.com>
Link: https://patch.msgid.link/20260304072000.6787-1-biju.das.jz@bp.renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-04 11:40:16 +00:00
Jan Stancek
3d1973a0c7 x86/boot: Handle relative CONFIG_EFI_SBAT_FILE file paths
CONFIG_EFI_SBAT_FILE can be a relative path. When compiling using a different
output directory (O=) the build currently fails because it can't find the
filename set in CONFIG_EFI_SBAT_FILE:

  arch/x86/boot/compressed/sbat.S: Assembler messages:
  arch/x86/boot/compressed/sbat.S:6: Error: file not found: kernel.sbat

Add $(srctree) as include dir for sbat.o.

  [ bp: Massage commit message. ]

Fixes: 61b57d3539 ("x86/efi: Implement support for embedding SBAT data for x86")
Signed-off-by: Jan Stancek <jstancek@redhat.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: <stable@kernel.org>
Link: https://patch.msgid.link/f4eda155b0cef91d4d316b4e92f5771cb0aa7187.1772047658.git.jstancek@redhat.com
2026-03-04 11:51:08 +01:00
Maarten Lankhorst
a58d487fb1 drm/ttm/tests: Fix build failure on PREEMPT_RT
Fix a compile error in the kunit tests when CONFIG_PREEMPT_RT is
enabled, and the normal mutex is converted into a rtmutex.

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202602261547.3bM6yVAS-lkp@intel.com/
Reviewed-by: Jouni Högander <jouni.hogander@intel.com>
Link: https://patch.msgid.link/20260304085616.1216961-1-dev@lankhorst.se
Signed-off-by: Maarten Lankhorst <dev@lankhorst.se>
2026-03-04 11:31:54 +01:00
Shawn Lin
0fb59eaca1 pmdomain: rockchip: Fix PD_VCODEC for RK3588
>From the RK3588 TRM Table 7-1 RK3588 Voltage Domain and Power Domain Summary,
PD_RKVDEC0/1 and PD_VENC0/1 rely on VD_VCODEC which require extra voltages to
be applied, otherwise it breaks RK3588-evb1-v10 board after vdec support landed[1].
The panic looks like below:

  rockchip-pm-domain fd8d8000.power-management:power-controller: failed to set domain 'rkvdec0' on, val=0
  rockchip-pm-domain fd8d8000.power-management:power-controller: failed to set domain 'rkvdec1' on, val=0
  ...
  Hardware name: Rockchip RK3588S EVB1 V10 Board (DT)
  Workqueue: pm genpd_power_off_work_fn
  Call trace:
  show_stack+0x18/0x24 (C)
  dump_stack_lvl+0x40/0x84
  dump_stack+0x18/0x24
  vpanic+0x1ec/0x4fc
  vpanic+0x0/0x4fc
  check_panic_on_warn+0x0/0x94
  arm64_serror_panic+0x6c/0x78
  do_serror+0xc4/0xcc
  el1h_64_error_handler+0x3c/0x5c
  el1h_64_error+0x6c/0x70
  regmap_mmio_read32le+0x18/0x24 (P)
  regmap_bus_reg_read+0xfc/0x130
  regmap_read+0x188/0x1ac
  regmap_read+0x54/0x78
  rockchip_pd_power+0xcc/0x5f0
  rockchip_pd_power_off+0x1c/0x4c
  genpd_power_off+0x84/0x120
  genpd_power_off+0x1b4/0x260
  genpd_power_off_work_fn+0x38/0x58
  process_scheduled_works+0x194/0x2c4
  worker_thread+0x2ac/0x3d8
  kthread+0x104/0x124
  ret_from_fork+0x10/0x20
  SMP: stopping secondary CPUs
  Kernel Offset: disabled
  CPU features: 0x3000000,000e0005,40230521,0400720b
  Memory Limit: none
  ---[ end Kernel panic - not syncing: Asynchronous SError Interrupt ]---

Chaoyi pointed out the PD_VCODEC is the parent of PD_RKVDEC0/1 and PD_VENC0/1, so checking
the PD_VCODEC is enough.

[1] https://lore.kernel.org/linux-rockchip/20251020212009.8852-2-detlev.casanova@collabora.com/

Fixes: db6df2e3fc ("pmdomain: rockchip: add regulator support")
Cc: stable@vger.kernel.org
Suggested-by: Chaoyi Chen <chaoyi.chen@rock-chips.com>
Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
Reviewed-by: Chaoyi Chen <chaoyi.chen@rock-chips.com>
Reviewed-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2026-03-04 11:22:36 +01:00
Harry Yoo
6432f15c81 mm/slab: change stride type from unsigned short to unsigned int
Commit 7a8e71bc61 ("mm/slab: use stride to access slabobj_ext")
defined the type of slab->stride as unsigned short, because the author
initially planned to store stride within the lower 16 bits of the
page_type field, but later stored it in unused bits in the counters
field instead.

However, the idea of having only 2-byte stride turned out to be a
serious mistake. On systems with 64k pages, order-1 pages are 128k,
which is larger than USHRT_MAX. It triggers a debug warning because
s->size is 128k while stride, truncated to 2 bytes, becomes zero:

  ------------[ cut here ]------------
  Warning! stride (0) != s->size (131072)
  WARNING: mm/slub.c:2231 at alloc_slab_obj_exts_early.constprop.0+0x524/0x534, CPU#6: systemd-sysctl/307
  Modules linked in:
  CPU: 6 UID: 0 PID: 307 Comm: systemd-sysctl Not tainted 7.0.0-rc1+ #6 PREEMPTLAZY
  Hardware name: IBM,9009-22A POWER9 (architected) 0x4e0202 0xf000005 of:IBM,FW950.E0 (VL950_179) hv:phyp pSeries
  NIP:  c0000000008a9ac0 LR: c0000000008a9abc CTR: 0000000000000000
  REGS: c0000000141f7390 TRAP: 0700   Not tainted  (7.0.0-rc1+)
  MSR:  8000000000029033 <SF,EE,ME,IR,DR,RI,LE>  CR: 28004400  XER: 00000005
  CFAR: c000000000279318 IRQMASK: 0
  GPR00: c0000000008a9abc c0000000141f7630 c00000000252a300 c00000001427b200
  GPR04: 0000000000000004 0000000000000000 c000000000278fd0 0000000000000000
  GPR08: fffffffffffe0000 0000000000000000 0000000000000000 0000000022004400
  GPR12: c000000000f644b0 c000000017ff8f00 0000000000000000 0000000000000000
  GPR16: 0000000000000000 c0000000141f7aa0 0000000000000000 c0000000141f7a88
  GPR20: 0000000000000000 0000000000400cc0 ffffffffffffffff c00000001427b180
  GPR24: 0000000000000004 00000000000c0cc0 c000000004e89a20 c00000005de90011
  GPR28: 0000000000010010 c00000005df00000 c000000006017f80 c00c000000177a00
  NIP [c0000000008a9ac0] alloc_slab_obj_exts_early.constprop.0+0x524/0x534
  LR [c0000000008a9abc] alloc_slab_obj_exts_early.constprop.0+0x520/0x534
  Call Trace:
  [c0000000141f7630] [c0000000008a9abc] alloc_slab_obj_exts_early.constprop.0+0x520/0x534 (unreliable)
  [c0000000141f76c0] [c0000000008aafbc] allocate_slab+0x154/0x94c
  [c0000000141f7760] [c0000000008b41c0] refill_objects+0x124/0x16c
  [c0000000141f77c0] [c0000000008b4be0] __pcs_replace_empty_main+0x2b0/0x444
  [c0000000141f7810] [c0000000008b9600] __kvmalloc_node_noprof+0x840/0x914
  [c0000000141f7900] [c000000000a3dd40] seq_read_iter+0x60c/0xb00
  [c0000000141f7a10] [c000000000b36b24] proc_reg_read_iter+0x154/0x1fc
  [c0000000141f7a50] [c0000000009cee7c] vfs_read+0x39c/0x4e4
  [c0000000141f7b30] [c0000000009d0214] ksys_read+0x9c/0x180
  [c0000000141f7b90] [c00000000003a8d0] system_call_exception+0x1e0/0x4b0
  [c0000000141f7e50] [c00000000000d05c] system_call_vectored_common+0x15c/0x2ec

This leads to slab_obj_ext() returning the first slabobj_ext or all
objects and confuses the reference counting of object cgroups [1] and
memory (un)charging for memory cgroups [2].

Fortunately, the counters field has 32 unused bits instead of 16
on 64-bit CPUs, which is wide enough to hold any value of s->size.
Change the type to unsigned int.

Reported-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
Closes: https://lore.kernel.org/lkml/ca241daa-e7e7-4604-a48d-de91ec9184a5@linux.ibm.com [1]
Closes: https://lore.kernel.org/all/ddff7c7d-c0c3-4780-808f-9a83268bbf0c@linux.ibm.com [2]
Fixes: 7a8e71bc61 ("mm/slab: use stride to access slabobj_ext")
Signed-off-by: Harry Yoo <harry.yoo@oracle.com>
Link: https://patch.msgid.link/20260303135722.2680521-1-harry.yoo@oracle.com
Reviewed-by: Hao Li <hao.li@linux.dev>
Tested-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
Signed-off-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
2026-03-04 11:05:57 +01:00
Vlastimil Babka (SUSE)
fb1091febd mm/slab: allow sheaf refill if blocking is not allowed
Ming Lei reported [1] a regression in the ublk null target benchmark due
to sheaves. The profile shows that the alloc_from_pcs() fastpath fails
and allocations fall back to ___slab_alloc(). It also shows the
allocations happen through mempool_alloc().

The strategy of mempool_alloc() is to call the underlying allocator
(here slab) without __GFP_DIRECT_RECLAIM first. This does not play well
with __pcs_replace_empty_main() checking for gfpflags_allow_blocking()
to decide if it should refill an empty sheaf or fallback to the
slowpath, so we end up falling back.

We could change the mempool strategy but there might be other paths
doing the same ting. So instead allow sheaf refill when blocking is not
allowed, changing the condition to gfpflags_allow_spinning(). The
original condition was unnecessarily restrictive.

Note this doesn't fully resolve the regression [1] as another component
of that are memoryless nodes, which is to be addressed separately.

Reported-by: Ming Lei <ming.lei@redhat.com>
Fixes: e47c897a29 ("slab: add sheaves to most caches")
Link: https://lore.kernel.org/all/aZ0SbIqaIkwoW2mB@fedora/ [1]
Link: https://patch.msgid.link/20260302095536.34062-2-vbabka@kernel.org
Signed-off-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
2026-03-04 11:03:54 +01:00
Niklas Cassel
aac9b27f7c ata: libata: cancel pending work after clearing deferred_qc
Syzbot reported a WARN_ON() in ata_scsi_deferred_qc_work(), caused by
ap->ops->qc_defer() returning non-zero before issuing the deferred qc.

ata_scsi_schedule_deferred_qc() is called during each command completion.
This function will check if there is a deferred QC, and if
ap->ops->qc_defer() returns zero, meaning that it is possible to queue the
deferred qc at this time (without being deferred), then it will queue the
work which will issue the deferred qc.

Once the work get to run, which can potentially be a very long time after
the work was scheduled, there is a WARN_ON() if ap->ops->qc_defer() returns
non-zero.

While we hold the ap->lock both when assigning and clearing deferred_qc,
and the work itself holds the ap->lock, the code currently does not cancel
the work after clearing the deferred qc.

This means that the following scenario can happen:
1) One or several NCQ commands are queued.
2) A non-NCQ command is queued, gets stored in ap->deferred_qc.
3) Last NCQ command gets completed, work is queued to issue the deferred
   qc.
4) Timeout or error happens, ap->deferred_qc is cleared. The queued work is
   currently NOT canceled.
5) Port is reset.
6) One or several NCQ commands are queued.
7) A non-NCQ command is queued, gets stored in ap->deferred_qc.
8) Work is finally run. Yet at this time, there is still NCQ commands in
   flight.

The work in 8) really belongs to the non-NCQ command in 2), not to the
non-NCQ command in 7). The reason why the work is executed when it is not
supposed to, is because it was never canceled when ap->deferred_qc was
cleared in 4). Thus, ensure that we always cancel the work after clearing
ap->deferred_qc.

Another potential fix would have been to let ata_scsi_deferred_qc_work() do
nothing if ap->ops->qc_defer() returns non-zero. However, canceling the
work when clearing ap->deferred_qc seems slightly more logical, as we hold
the ap->lock when clearing ap->deferred_qc, so we know that the work cannot
be holding the lock. (The function could be waiting for the lock, but that
is okay since it will do nothing if ap->deferred_qc is not set.)

Reported-by: syzbot+bcaf842a1e8ead8dfb89@syzkaller.appspotmail.com
Fixes: 0ea84089db ("ata: libata-scsi: avoid Non-NCQ command starvation")
Fixes: eddb98ad93 ("ata: libata-eh: correctly handle deferred qc timeouts")
Reviewed-by: Igor Pylypiv <ipylypiv@google.com>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Signed-off-by: Niklas Cassel <cassel@kernel.org>
2026-03-04 10:42:12 +01:00
Yujie Liu
61ded1083b drm/sched: Fix kernel-doc warning for drm_sched_job_done()
There is a kernel-doc warning for the scheduler:

Warning: drivers/gpu/drm/scheduler/sched_main.c:367 function parameter 'result' not described in 'drm_sched_job_done'

Fix the warning by describing the undocumented error code.

Fixes: 539f9ee4b5 ("drm/scheduler: properly forward fence errors")
Signed-off-by: Yujie Liu <yujie.liu@intel.com>
[phasta: Flesh out commit message]
Signed-off-by: Philipp Stanner <phasta@kernel.org>
Link: https://patch.msgid.link/20260227082452.1802922-1-yujie.liu@intel.com
2026-03-04 10:29:27 +01:00
Darrick J. Wong
0ca1a8331c xfs: fix race between healthmon unmount and read_iter
xfs/1879 on one of my test VMs got stuck due to the xfs_io healthmon
subcommand sleeping in wait_event_interruptible at:

 xfs_healthmon_read_iter+0x558/0x5f8 [xfs]
 vfs_read+0x248/0x320
 ksys_read+0x78/0x120

Looking at xfs_healthmon_read_iter, in !O_NONBLOCK mode it will sleep
until the mount cookie == DETACHED_MOUNT_COOKIE, there are events
waiting to be formatted, or there are formatted events in the read
buffer that could be copied to userspace.

Poking into the running kernel, I see that there are zero events in the
list, the read buffer is empty, and the mount cookie is indeed in
DETACHED state.  IOWs, xfs_healthmon_has_eventdata should have returned
true, but instead we're asleep waiting for a wakeup.

I think what happened here is that xfs_healthmon_read_iter and
xfs_healthmon_unmount were racing with each other, and _read_iter lost
the race.  _unmount queued an unmount event, which woke up _read_iter.
It found, formatted, and copied the event out to userspace.  That
cleared out the pending event list and emptied the read buffer.  xfs_io
then called read() again, so _has_eventdata decided that we should sleep
on the empty event queue.

Next, _unmount called xfs_healthmon_detach, which set the mount cookie
to DETACHED.  Unfortunately, it didn't call wake_up_all on the hm, so
the wait_event_interruptible in the _read_iter thread remains asleep.
That's why the test stalled.

Fix this by moving the wake_up_all call to xfs_healthmon_detach.

Fixes: b3a289a2a9 ("xfs: create event queuing, formatting, and discovery infrastructure")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-03-04 10:11:47 +01:00
Damien Le Moal
6270b8ac2f xfs: remove scratch field from struct xfs_gc_bio
The scratch field in struct xfs_gc_bio is unused. Remove it.

Fixes: 102f444b57 ("xfs: rework zone GC buffer management")
Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-03-04 09:34:12 +01:00
Maximilian Pezzullo
b3b1d3ae1d ata: libata-core: Disable LPM on ST1000DM010-2EP102
According to a user report, the ST1000DM010-2EP102 has problems with LPM,
causing random system freezes. The drive belongs to the same BarraCuda
family as the ST2000DM008-2FR102 which has the same issue.

Cc: stable@vger.kernel.org
Fixes: 7627a0edef ("ata: ahci: Drop low power policy board type")
Reported-by: Filippo Baiamonte <filippo.ba03@bugzilla.kernel.org>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=221163
Signed-off-by: Maximilian Pezzullo <maximilianpezzullo@gmail.com>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Signed-off-by: Niklas Cassel <cassel@kernel.org>
2026-03-04 09:16:56 +01:00
Felix Gu
5c3daa5301 power: sequencing: pcie-m2: Fix device node reference leak in probe
In pwrseq_pcie_m2_probe(), ctx->of_node acquires an explicit reference
to the device node using of_node_get(), but there is no corresponding
of_node_put() in the driver's error handling paths or removal.

Since the ctx is tied to the lifecycle of the platform device, there
is no need to hold an additional reference to the device's own of_node.

Fixes: 52e7b5bd62 ("power: sequencing: Add the Power Sequencing driver for the PCIe M.2 connectors")
Signed-off-by: Felix Gu <ustc.gu@gmail.com>
Link: https://patch.msgid.link/20260302-m2-v1-1-a6533e18aa69@gmail.com
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
2026-03-04 09:16:41 +01:00
Abhijit Gangurde
a08aaf3968 RDMA/ionic: Preserve and set Ethernet source MAC after ib_ud_header_init()
ionic_build_hdr() populated the Ethernet source MAC (hdr->eth.smac_h) by
passing the header’s storage directly to rdma_read_gid_l2_fields().
However, ib_ud_header_init() is called after that and re-initializes the
UD header, which wipes the previously written smac_h. As a result, packets
are emitted with an zero source MAC address on the wire.

Correct the source MAC by reading the GID-derived smac into a temporary
buffer and copy it after ib_ud_header_init() completes.

Fixes: e8521822c7 ("RDMA/ionic: Register device ops for control path")
Cc: stable@vger.kernel.org # 6.18
Signed-off-by: Abhijit Gangurde <abhijit.gangurde@amd.com>
Link: https://patch.msgid.link/20260227061809.2979990-1-abhijit.gangurde@amd.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
2026-03-04 02:44:01 -05:00
Jacob Moroni
29a3edd700 RDMA/irdma: Fix double free related to rereg_user_mr
If IB_MR_REREG_TRANS is set during rereg_user_mr, the
umem will be released and a new one will be allocated
in irdma_rereg_mr_trans. If any step of irdma_rereg_mr_trans
fails after the new umem is allocated, it releases the umem,
but does not set iwmr->region to NULL. The problem is that
this failure is propagated to the user, who will then call
ibv_dereg_mr (as they should). Then, the dereg_mr path will
see a non-NULL umem and attempt to call ib_umem_release again.

Fix this by setting iwmr->region to NULL after ib_umem_release.

Fixed: 5ac388db27 ("RDMA/irdma: Add support to re-register a memory region")
Signed-off-by: Jacob Moroni <jmoroni@google.com>
Link: https://patch.msgid.link/20260227152743.1183388-1-jmoroni@google.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
2026-03-04 02:44:01 -05:00
Naveen Anandhan
fbdfa8da05 selftests: tc-testing: fix list_categories() crash on list type
list_categories() builds a set directly from the 'category'
field of each test case. Since 'category' is a list,
set(map(...)) attempts to insert lists into a set, which
raises:

  TypeError: unhashable type: 'list'

Flatten category lists and collect unique category names
using set.update() instead.

Signed-off-by: Naveen Anandhan <mr.navi8680@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2026-03-04 05:42:57 +00:00
Sourabh Jain
04e707cb77 powerpc/crash: adjust the elfcorehdr size
With crash hotplug support enabled, additional memory is allocated to
the elfcorehdr kexec segment to accommodate resources added during
memory hotplug events. However, the kdump FDT is not updated with the
same size, which can result in elfcorehdr corruption in the kdump
kernel.

Update elf_headers_sz (the kimage member representing the size of the
elfcorehdr kexec segment) to reflect the total memory allocated for the
elfcorehdr segment instead of the elfcorehdr buffer size at the time of
kdump load. This allows of_kexec_alloc_and_setup_fdt() to reserve the
full elfcorehdr memory in the kdump FDT and prevents elfcorehdr
corruption.

Fixes: 849599b702 ("powerpc/crash: add crash memory hotplug support")
Reviewed-by: Hari Bathini <hbathini@linux.ibm.com>
Signed-off-by: Sourabh Jain <sourabhjain@linux.ibm.com>
Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Link: https://patch.msgid.link/20260227171801.2238847-1-sourabhjain@linux.ibm.com
2026-03-04 11:12:48 +05:30
Sourabh Jain
20197b967a powerpc/kexec/core: use big-endian types for crash variables
Use explicit word-sized big-endian types for kexec and crash related
variables. This makes the endianness unambiguous and avoids type
mismatches that trigger sparse warnings.

The change addresses sparse warnings like below (seen on both 32-bit
and 64-bit builds):

CHECK   ../arch/powerpc/kexec/core.c
sparse:    expected unsigned int static [addressable] [toplevel] [usertype] crashk_base
sparse:    got restricted __be32 [usertype]
sparse: warning: incorrect type in assignment (different base types)
sparse:    expected unsigned int static [addressable] [toplevel] [usertype] crashk_size
sparse:    got restricted __be32 [usertype]
sparse: warning: incorrect type in assignment (different base types)
sparse:    expected unsigned long long static [addressable] [toplevel] mem_limit
sparse:    got restricted __be32 [usertype]
sparse: warning: incorrect type in assignment (different base types)
sparse:    expected unsigned int static [addressable] [toplevel] [usertype] kernel_end
sparse:    got restricted __be32 [usertype]

No functional change intended.

Fixes: ea961a828f ("powerpc: Fix endian issues in kexec and crash dump code")
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202512221405.VHPKPjnp-lkp@intel.com/
Signed-off-by: Sourabh Jain <sourabhjain@linux.ibm.com>
Tested-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Link: https://patch.msgid.link/20251224151257.28672-1-sourabhjain@linux.ibm.com
2026-03-04 11:08:57 +05:30
Rob Herring (Arm)
6fc5d63c6f powerpc/prom_init: Fixup missing #size-cells on PowerMac media-bay nodes
Similar to other PowerMac mac-io devices, the media-bay node is missing the
"#size-cells" property.

Depends-on: commit 045b14ca5c ("of: WARN on deprecated #address-cells/#size-cells handling")
Reported-by: Stan Johnson <userm57@yahoo.com>
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Link: https://patch.msgid.link/20251029174047.1620073-1-robh@kernel.org
2026-03-04 11:07:36 +05:30
Rob Herring (Arm)
0706178339 powerpc: dts: fsl: Drop unused .dtsi files
These files are not included by anything and therefore don't get built or
tested.

There's also no upstream driver for the interlaken-lac stuff.

Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
Reviewed-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Link: https://patch.msgid.link/20260128140222.1627203-1-robh@kernel.org
2026-03-04 11:06:46 +05:30
Christophe Leroy (CS GROUP)
0ee95a1d45 powerpc/uaccess: Fix inline assembly for clang build on PPC32
Test robot reports the following error with clang-16.0.6:

   In file included from kernel/rseq.c:75:
   include/linux/rseq_entry.h:141:3: error: invalid operand for instruction
                   unsafe_get_user(offset, &ucs->post_commit_offset, efault);
                   ^
   include/linux/uaccess.h:608:2: note: expanded from macro 'unsafe_get_user'
           arch_unsafe_get_user(x, ptr, local_label);      \
           ^
   arch/powerpc/include/asm/uaccess.h:518:2: note: expanded from macro 'arch_unsafe_get_user'
           __get_user_size_goto(__gu_val, __gu_addr, sizeof(*(p)), e); \
           ^
   arch/powerpc/include/asm/uaccess.h:284:2: note: expanded from macro '__get_user_size_goto'
           __get_user_size_allowed(x, ptr, size, __gus_retval);    \
           ^
   arch/powerpc/include/asm/uaccess.h:275:10: note: expanded from macro '__get_user_size_allowed'
           case 8: __get_user_asm2(x, (u64 __user *)ptr, retval);  break;  \
                   ^
   arch/powerpc/include/asm/uaccess.h:258:4: note: expanded from macro '__get_user_asm2'
                   "       li %1+1,0\n"                    \
                    ^
   <inline asm>:7:5: note: instantiated into assembly here
           li 31+1,0
              ^
   1 error generated.

On PPC32, for 64 bits vars a pair of registers is used. Usually the
lower register in the pair is the high part and the higher register is
the low part. GCC uses r3/r4 ... r11/r12 ... r14/r15 ... r30/r31

In older kernel code inline assembly was using %1 and %1+1 to represent
64 bits values. However here it looks like clang uses r31 as high part,
allthough r32 doesn't exist hence the error.

Allthoug %1+1 should work, most places now use %L1 instead of %1+1, so
let's do the same here.

With that change, the build doesn't fail anymore and a disassembly shows
clang uses r17/r18 and r31/r14 pair when GCC would have used r16/r17 and
r30/r31:

	Disassembly of section .fixup:

	00000000 <.fixup>:
	   0:	38 a0 ff f2 	li      r5,-14
	   4:	3a 20 00 00 	li      r17,0
	   8:	3a 40 00 00 	li      r18,0
	   c:	48 00 00 00 	b       c <.fixup+0xc>
				c: R_PPC_REL24	.text+0xbc
	  10:	38 a0 ff f2 	li      r5,-14
	  14:	3b e0 00 00 	li      r31,0
	  18:	39 c0 00 00 	li      r14,0
	  1c:	48 00 00 00 	b       1c <.fixup+0x1c>
				1c: R_PPC_REL24	.text+0x144

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202602021825.otcItxGi-lkp@intel.com/
Fixes: c20beffeec ("powerpc/uaccess: Use flexible addressing with __put_user()/__get_user()")
Signed-off-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
Acked-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Link: https://patch.msgid.link/8ca3a657a650e497a96bfe7acde2f637dadab344.1770103646.git.chleroy@kernel.org
2026-03-04 11:06:02 +05:30
Christophe Leroy
b9e7e3ea60 powerpc/e500: Always use 64 bits PTE
Today there are two PTE formats for e500:
- The 64 bits format, used
 - On 64 bits kernel
 - On 32 bits kernel with 64 bits physical addresses
 - On 32 bits kernel with support of huge pages
- The 32 bits format, used in other cases

Maintaining two PTE formats means unnecessary maintenance burden
because every change needs to be implemented and tested for both
formats.

Remove the 32 bits PTE format. The memory usage increase due to
larger PTEs is minimal (approx. 0,1% of memory).

This also means that from now on huge pages are supported also
with 32 bits physical addresses.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Link: https://patch.msgid.link/04a658209ea78dcc0f3dbde6b2c29cf1939adfe9.1767721208.git.chleroy@kernel.org
2026-03-04 11:05:06 +05:30
Qing Wang
e39bb9e02b tracing: Fix WARN_ON in tracing_buffers_mmap_close
When a process forks, the child process copies the parent's VMAs but the
user_mapped reference count is not incremented. As a result, when both the
parent and child processes exit, tracing_buffers_mmap_close() is called
twice. On the second call, user_mapped is already 0, causing the function to
return -ENODEV and triggering a WARN_ON.

Normally, this isn't an issue as the memory is mapped with VM_DONTCOPY set.
But this is only a hint, and the application can call
madvise(MADVISE_DOFORK) which resets the VM_DONTCOPY flag. When the
application does that, it can trigger this issue on fork.

Fix it by incrementing the user_mapped reference count without re-mapping
the pages in the VMA's open callback.

Cc: stable@vger.kernel.org
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Vincent Donnefort <vdonnefort@google.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Link: https://patch.msgid.link/20260227025842.1085206-1-wangqing7171@gmail.com
Fixes: cf9f0f7c4c ("tracing: Allow user-space mapping of the ring-buffer")
Reported-by: syzbot+3b5dd2030fe08afdf65d@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=3b5dd2030fe08afdf65d
Tested-by: syzbot+3b5dd2030fe08afdf65d@syzkaller.appspotmail.com
Signed-off-by: Qing Wang <wangqing7171@gmail.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2026-03-03 22:25:32 -05:00
Masami Hiramatsu (Google)
a5dd6f5866 tracing: Disable preemption in the tracepoint callbacks handling filtered pids
Filtering PIDs for events triggered the following during selftests:

[37] event tracing - restricts events based on pid notrace filtering
[  155.874095]
[  155.874869] =============================
[  155.876037] WARNING: suspicious RCU usage
[  155.877287] 7.0.0-rc1-00004-g8cd473a19bc7 #7 Not tainted
[  155.879263] -----------------------------
[  155.882839] kernel/trace/trace_events.c:1057 suspicious rcu_dereference_check() usage!
[  155.889281]
[  155.889281] other info that might help us debug this:
[  155.889281]
[  155.894519]
[  155.894519] rcu_scheduler_active = 2, debug_locks = 1
[  155.898068] no locks held by ftracetest/4364.
[  155.900524]
[  155.900524] stack backtrace:
[  155.902645] CPU: 1 UID: 0 PID: 4364 Comm: ftracetest Not tainted 7.0.0-rc1-00004-g8cd473a19bc7 #7 PREEMPT(lazy)
[  155.902648] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.17.0-debian-1.17.0-1 04/01/2014
[  155.902651] Call Trace:
[  155.902655]  <TASK>
[  155.902659]  dump_stack_lvl+0x67/0x90
[  155.902665]  lockdep_rcu_suspicious+0x154/0x1a0
[  155.902672]  event_filter_pid_sched_process_fork+0x9a/0xd0
[  155.902678]  kernel_clone+0x367/0x3a0
[  155.902689]  __x64_sys_clone+0x116/0x140
[  155.902696]  do_syscall_64+0x158/0x460
[  155.902700]  ? entry_SYSCALL_64_after_hwframe+0x77/0x7f
[  155.902702]  ? trace_irq_disable+0x1d/0xc0
[  155.902709]  entry_SYSCALL_64_after_hwframe+0x77/0x7f
[  155.902711] RIP: 0033:0x4697c3
[  155.902716] Code: 1f 84 00 00 00 00 00 64 48 8b 04 25 10 00 00 00 45 31 c0 31 d2 31 f6 bf 11 00 20 01 4c 8d 90 d0 02 00 00 b8 38 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 35 89 c2 85 c0 75 2c 64 48 8b 04 25 10 00 00
[  155.902718] RSP: 002b:00007ffc41150428 EFLAGS: 00000246 ORIG_RAX: 0000000000000038
[  155.902721] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00000000004697c3
[  155.902722] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000001200011
[  155.902724] RBP: 0000000000000000 R08: 0000000000000000 R09: 000000003fccf990
[  155.902725] R10: 000000003fccd690 R11: 0000000000000246 R12: 0000000000000001
[  155.902726] R13: 000000003fce8103 R14: 0000000000000001 R15: 0000000000000000
[  155.902733]  </TASK>
[  155.902747]

The tracepoint callbacks recently were changed to allow preemption. The
event PID filtering callbacks that were attached to the fork and exit
tracepoints expected preemption disabled in order to access the RCU
protected PID lists.

Add a guard(preempt)() to protect the references to the PID list.

Cc: stable@vger.kernel.org
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Link: https://patch.msgid.link/20260303215738.6ab275af@fedora
Fixes: a46023d561 ("tracing: Guard __DECLARE_TRACE() use of __DO_TRACE_CALL() with SRCU-fast")
Link: https://patch.msgid.link/20260303131706.96057f61a48a34c43ce1e396@kernel.org
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2026-03-03 22:25:32 -05:00
Steven Rostedt
cc337974cd ftrace: Disable preemption in the tracepoint callbacks handling filtered pids
When function trace PID filtering is enabled, the function tracer will
attach a callback to the fork tracepoint as well as the exit tracepoint
that will add the forked child PID to the PID filtering list as well as
remove the PID that is exiting.

Commit a46023d561 ("tracing: Guard __DECLARE_TRACE() use of
__DO_TRACE_CALL() with SRCU-fast") removed the disabling of preemption
when calling tracepoint callbacks.

The callbacks used for the PID filtering accounting depended on preemption
being disabled, and now the trigger a "suspicious RCU usage" warning message.

Make them explicitly disable preemption.

Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Link: https://patch.msgid.link/20260302213546.156e3e4f@gandalf.local.home
Fixes: a46023d561 ("tracing: Guard __DECLARE_TRACE() use of __DO_TRACE_CALL() with SRCU-fast")
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
2026-03-03 22:25:31 -05:00
Huiwen He
0a663b764d tracing: Fix syscall events activation by ensuring refcount hits zero
When multiple syscall events are specified in the kernel command line
(e.g., trace_event=syscalls:sys_enter_openat,syscalls:sys_enter_close),
they are often not captured after boot, even though they appear enabled
in the tracing/set_event file.

The issue stems from how syscall events are initialized. Syscall
tracepoints require the global reference count (sys_tracepoint_refcount)
to transition from 0 to 1 to trigger the registration of the syscall
work (TIF_SYSCALL_TRACEPOINT) for tasks, including the init process (pid 1).

The current implementation of early_enable_events() with disable_first=true
used an interleaved sequence of "Disable A -> Enable A -> Disable B -> Enable B".
If multiple syscalls are enabled, the refcount never drops to zero,
preventing the 0->1 transition that triggers actual registration.

Fix this by splitting early_enable_events() into two distinct phases:
1. Disable all events specified in the buffer.
2. Enable all events specified in the buffer.

This ensures the refcount hits zero before re-enabling, allowing syscall
events to be properly activated during early boot.

The code is also refactored to use a helper function to avoid logic
duplication between the disable and enable phases.

Cc: stable@vger.kernel.org
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Link: https://patch.msgid.link/20260224023544.1250787-1-hehuiwen@kylinos.cn
Fixes: ce1039bd3a ("tracing: Fix enabling of syscall events on the command line")
Signed-off-by: Huiwen He <hehuiwen@kylinos.cn>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2026-03-03 22:15:02 -05:00
Shengming Hu
b96d0c59cd fgraph: Fix thresh_return nosleeptime double-adjust
trace_graph_thresh_return() called handle_nosleeptime() and then delegated
to trace_graph_return(), which calls handle_nosleeptime() again. When
sleep-time accounting is disabled this double-adjusts calltime and can
produce bogus durations (including underflow).

Fix this by computing rettime once, applying handle_nosleeptime() only
once, using the adjusted calltime for threshold comparison, and writing
the return event directly via __trace_graph_return() when the threshold is
met.

Cc: stable@vger.kernel.org
Link: https://patch.msgid.link/20260221113314048jE4VRwIyZEALiYByGK0My@zte.com.cn
Fixes: 3c9880f3ab ("ftrace: Use a running sleeptime instead of saving on shadow stack")
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Shengming Hu <hu.shengming@zte.com.cn>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2026-03-03 22:11:20 -05:00
Shengming Hu
6ca8379b5d fgraph: Fix thresh_return clear per-task notrace
When tracing_thresh is enabled, function graph tracing uses
trace_graph_thresh_return() as the return handler. Unlike
trace_graph_return(), it did not clear the per-task TRACE_GRAPH_NOTRACE
flag set by the entry handler for set_graph_notrace addresses. This could
leave the task permanently in "notrace" state and effectively disable
function graph tracing for that task.

Mirror trace_graph_return()'s per-task notrace handling by clearing
TRACE_GRAPH_NOTRACE and returning early when set.

Cc: stable@vger.kernel.org
Link: https://patch.msgid.link/20260221113007819YgrZsMGABff4Rc-O_fZxL@zte.com.cn
Fixes: b84214890a ("function_graph: Move graph notrace bit to shadow stack global var")
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Shengming Hu <hu.shengming@zte.com.cn>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2026-03-03 22:10:37 -05:00
Eric Biggers
26bc83b88b smb: client: Compare MACs in constant time
To prevent timing attacks, MAC comparisons need to be constant-time.
Replace the memcmp() with the correct function, crypto_memneq().

Fixes: 1da177e4c3 ("Linux-2.6.12-rc2")
Cc: stable@vger.kernel.org
Acked-by: Paulo Alcantara (Red Hat) <pc@manguebit.org>
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-03-03 20:56:36 -06:00
J. Neuschäfer
9e7dc228bb io_uring/mock: Fix typo in help text
Fix the spelling of "subsystem".

Signed-off-by: J. Neuschäfer <j.ne@posteo.net>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2026-03-03 19:42:53 -07:00
Eric Biggers
46d0d6f50d net/tcp-md5: Fix MAC comparison to be constant-time
To prevent timing attacks, MACs need to be compared in constant
time.  Use the appropriate helper function for this.

Fixes: cfb6eeb4c8 ("[TCP]: MD5 Signature Option (RFC2385) support.")
Fixes: 658ddaaf66 ("tcp: md5: RST: getting md5 key from listener")
Cc: stable@vger.kernel.org
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
Link: https://patch.msgid.link/20260302203409.13388-1-ebiggers@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-03 18:39:43 -08:00
Stephen Hemminger
7f5d8e63f3 MAINTAINERS: update the skge/sky2 maintainers
Mark the skge and sky2 drivers as orphan.
I no longer have any Marvell/SysKonnect boards to test with and
mail to Mirko Lindner bounced because Marvell sold off that divsion.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Link: https://patch.msgid.link/20260302195120.187183-1-stephen@networkplumber.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-03 18:39:27 -08:00
Raju Rangoju
e2f27363aa amd-xgbe: fix sleep while atomic on suspend/resume
The xgbe_powerdown() and xgbe_powerup() functions use spinlocks
(spin_lock_irqsave) while calling functions that may sleep:
- napi_disable() can sleep waiting for NAPI polling to complete
- flush_workqueue() can sleep waiting for pending work items

This causes a "BUG: scheduling while atomic" error during suspend/resume
cycles on systems using the AMD XGBE Ethernet controller.

The spinlock protection in these functions is unnecessary as these
functions are called from suspend/resume paths which are already serialized
by the PM core

Fix this by removing the spinlock. Since only code that takes this lock
is xgbe_powerdown() and xgbe_powerup(), remove it completely.

Fixes: c5aa9e3b81 ("amd-xgbe: Initial AMD 10GbE platform driver")
Signed-off-by: Raju Rangoju <Raju.Rangoju@amd.com>
Link: https://patch.msgid.link/20260302042124.1386445-1-Raju.Rangoju@amd.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-03 17:24:38 -08:00
Breno Leitao
5af6e8b549 netconsole: fix sysdata_release_enabled_show checking wrong flag
sysdata_release_enabled_show() checks SYSDATA_TASKNAME instead of
SYSDATA_RELEASE, causing the configfs release_enabled attribute to
reflect the taskname feature state rather than the release feature
state. This is a copy-paste error from the adjacent
sysdata_taskname_enabled_show() function.

The corresponding _store function already uses the correct
SYSDATA_RELEASE flag.

Fixes: 343f902270 ("netconsole: implement configfs for release_enabled")
Signed-off-by: Breno Leitao <leitao@debian.org>
Cc: stable@vger.kernel.org
Link: https://patch.msgid.link/20260302-sysdata_release_fix-v1-1-e5090f677c7c@debian.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-03 17:23:50 -08:00
Yung Chih Su
4ee7fa6cf7 net: ipv4: fix ARM64 alignment fault in multipath hash seed
`struct sysctl_fib_multipath_hash_seed` contains two u32 fields
(user_seed and mp_seed), making it an 8-byte structure with a 4-byte
alignment requirement.

In `fib_multipath_hash_from_keys()`, the code evaluates the entire
struct atomically via `READ_ONCE()`:

    mp_seed = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed).mp_seed;

While this silently works on GCC by falling back to unaligned regular
loads which the ARM64 kernel tolerates, it causes a fatal kernel panic
when compiled with Clang and LTO enabled.

Commit e35123d83e ("arm64: lto: Strengthen READ_ONCE() to acquire
when CONFIG_LTO=y") strengthens `READ_ONCE()` to use Load-Acquire
instructions (`ldar` / `ldapr`) to prevent compiler reordering bugs
under Clang LTO. Since the macro evaluates the full 8-byte struct,
Clang emits a 64-bit `ldar` instruction. ARM64 architecture strictly
requires `ldar` to be naturally aligned, thus executing it on a 4-byte
aligned address triggers a strict Alignment Fault (FSC = 0x21).

Fix the read side by moving the `READ_ONCE()` directly to the `u32`
member, which emits a safe 32-bit `ldar Wn`.

Furthermore, Eric Dumazet pointed out that `WRITE_ONCE()` on the entire
struct in `proc_fib_multipath_hash_set_seed()` is also flawed. Analysis
shows that Clang splits this 8-byte write into two separate 32-bit
`str` instructions. While this avoids an alignment fault, it destroys
atomicity and exposes a tear-write vulnerability. Fix this by
explicitly splitting the write into two 32-bit `WRITE_ONCE()`
operations.

Finally, add the missing `READ_ONCE()` when reading `user_seed` in
`proc_fib_multipath_hash_seed()` to ensure proper pairing and
concurrency safety.

Fixes: 4ee2a8cace ("net: ipv4: Add a sysctl to set multipath hash seed")
Signed-off-by: Yung Chih Su <yuuchihsu@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20260302060247.7066-1-yuuchihsu@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-03 17:20:37 -08:00
Eric Biggers
67edfec516 net/tcp-ao: Fix MAC comparison to be constant-time
To prevent timing attacks, MACs need to be compared in constant
time.  Use the appropriate helper function for this.

Fixes: 0a3a809089 ("net/tcp: Verify inbound TCP-AO signed segments")
Cc: stable@vger.kernel.org
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com>
Link: https://patch.msgid.link/20260302203600.13561-1-ebiggers@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-03 17:16:54 -08:00
Jakub Kicinski
2ffb4f5c2c ipv6: fix NULL pointer deref in ip6_rt_get_dev_rcu()
l3mdev_master_dev_rcu() can return NULL when the slave device is being
un-slaved from a VRF. All other callers deal with this, but we lost
the fallback to loopback in ip6_rt_pcpu_alloc() -> ip6_rt_get_dev_rcu()
with commit 4832c30d54 ("net: ipv6: put host and anycast routes on
device with address").

  KASAN: null-ptr-deref in range [0x0000000000000108-0x000000000000010f]
  RIP: 0010:ip6_rt_pcpu_alloc (net/ipv6/route.c:1418)
  Call Trace:
   ip6_pol_route (net/ipv6/route.c:2318)
   fib6_rule_lookup (net/ipv6/fib6_rules.c:115)
   ip6_route_output_flags (net/ipv6/route.c:2607)
   vrf_process_v6_outbound (drivers/net/vrf.c:437)

I was tempted to rework the un-slaving code to clear the flag first
and insert synchronize_rcu() before we remove the upper. But looks like
the explicit fallback to loopback_dev is an established pattern.
And I guess avoiding the synchronize_rcu() is nice, too.

Fixes: 4832c30d54 ("net: ipv6: put host and anycast routes on device with address")
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://patch.msgid.link/20260301194548.927324-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-03 17:14:48 -08:00
Alexandre Courbot
3ac88a9948 rust: str: make NullTerminatedFormatter public
If `CONFIG_BLOCK` is disabled, the following warnings are displayed
during build:

  warning: struct `NullTerminatedFormatter` is never constructed
    --> ../rust/kernel/str.rs:667:19
      |
  667 | pub(crate) struct NullTerminatedFormatter<'a> {
      |                   ^^^^^^^^^^^^^^^^^^^^^^^
      |
      = note: `#[warn(dead_code)]` (part of `#[warn(unused)]`) on by default

  warning: associated function `new` is never used
    --> ../rust/kernel/str.rs:673:19
      |
  671 | impl<'a> NullTerminatedFormatter<'a> {
      | ------------------------------------ associated function in this implementation
  672 |     /// Create a new [`Self`] instance.
  673 |     pub(crate) fn new(buffer: &'a mut [u8]) -> Option<NullTerminatedFormatter<'a>> {

Fix them by making `NullTerminatedFormatter` public, as it could be
useful for drivers anyway.

Fixes: cdde7a1951 ("rust: str: introduce `NullTerminatedFormatter`")
Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
Reviewed-by: Alice Ryhl <aliceryhl@google.com>
Reviewed-by: Andreas Hindborg <a.hindborg@kernel.org>
Cc: stable@vger.kernel.org
Link: https://patch.msgid.link/20260224-nullterminatedformatter-v1-1-5bef7b9b3d4c@nvidia.com
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
2026-03-04 02:03:31 +01:00
ZhangGuoDong
8098179dc9 smb/client: remove unused SMB311_posix_query_info()
It is currently unused, as now we are doing compounding instead
(see smb2_query_path_info()).

Signed-off-by: ZhangGuoDong <zhangguodong@kylinos.cn>
Reviewed-by: ChenXiaoSong <chenxiaosong@kylinos.cn>
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-03-03 18:03:56 -06:00
ZhangGuoDong
9621b996e4 smb/client: fix buffer size for smb311_posix_qinfo in SMB311_posix_query_info()
SMB311_posix_query_info() is currently unused, but it may still be used in
some stable versions, so these changes are submitted as a separate patch.

Use `sizeof(struct smb311_posix_qinfo)` instead of sizeof its pointer,
so the allocated buffer matches the actual struct size.

Fixes: b1bc1874b8 ("smb311: Add support for SMB311 query info (non-compounded)")
Reported-by: ChenXiaoSong <chenxiaosong@kylinos.cn>
Signed-off-by: ZhangGuoDong <zhangguodong@kylinos.cn>
Reviewed-by: ChenXiaoSong <chenxiaosong@kylinos.cn>
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-03-03 18:03:56 -06:00
ZhangGuoDong
12c43a062a smb/client: fix buffer size for smb311_posix_qinfo in smb2_compound_op()
Use `sizeof(struct smb311_posix_qinfo)` instead of sizeof its pointer,
so the allocated buffer matches the actual struct size.

Fixes: 6a5f6592a0 ("SMB311: Add support for query info using posix extensions (level 100)")
Reported-by: ChenXiaoSong <chenxiaosong@kylinos.cn>
Signed-off-by: ZhangGuoDong <zhangguodong@kylinos.cn>
Reviewed-by: ChenXiaoSong <chenxiaosong@kylinos.cn>
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-03-03 18:03:56 -06:00
Lang Xu
56145d2373 bpf: Fix a UAF issue in bpf_trampoline_link_cgroup_shim
The root cause of this bug is that when 'bpf_link_put' reduces the
refcount of 'shim_link->link.link' to zero, the resource is considered
released but may still be referenced via 'tr->progs_hlist' in
'cgroup_shim_find'. The actual cleanup of 'tr->progs_hlist' in
'bpf_shim_tramp_link_release' is deferred. During this window, another
process can cause a use-after-free via 'bpf_trampoline_link_cgroup_shim'.

Based on Martin KaFai Lau's suggestions, I have created a simple patch.

To fix this:
   Add an atomic non-zero check in 'bpf_trampoline_link_cgroup_shim'.
   Only increment the refcount if it is not already zero.

Testing:
   I verified the fix by adding a delay in
   'bpf_shim_tramp_link_release' to make the bug easier to trigger:

static void bpf_shim_tramp_link_release(struct bpf_link *link)
{
	/* ... */
	if (!shim_link->trampoline)
		return;

+	msleep(100);
	WARN_ON_ONCE(bpf_trampoline_unlink_prog(&shim_link->link,
		shim_link->trampoline, NULL));
	bpf_trampoline_put(shim_link->trampoline);
}

Before the patch, running a PoC easily reproduced the crash(almost 100%)
with a call trace similar to KaiyanM's report.
After the patch, the bug no longer occurs even after millions of
iterations.

Fixes: 69fd337a97 ("bpf: per-cgroup lsm flavor")
Reported-by: Kaiyan Mei <M202472210@hust.edu.cn>
Closes: https://lore.kernel.org/bpf/3c4ebb0b.46ff8.19abab8abe2.Coremail.kaiyanm@hust.edu.cn/
Signed-off-by: Lang Xu <xulang@uniontech.com>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://patch.msgid.link/279EEE1BA1DDB49D+20260303095217.34436-1-xulang@uniontech.com
2026-03-03 15:13:51 -08:00
Linus Torvalds
0031c06807 Merge tag 'cgroup-for-7.0-rc2-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup fixes from Tejun Heo:

 - Fix circular locking dependency in cpuset partition code by
   deferring housekeeping_update() calls to a workqueue instead
   of calling them directly under cpus_read_lock

 - Fix null-ptr-deref in rebuild_sched_domains_cpuslocked() when
   generate_sched_domains() returns NULL due to kmalloc failure

 - Fix incorrect cpuset behavior for effective_xcpus in
   partition_xcpus_del() and cpuset_update_tasks_cpumask()
   in update_cpumasks_hier()

 - Fix race between task migration and cgroup iteration

* tag 'cgroup-for-7.0-rc2-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  cgroup/cpuset: fix null-ptr-deref in rebuild_sched_domains_cpuslocked
  cgroup/cpuset: Call housekeeping_update() without holding cpus_read_lock
  cgroup/cpuset: Defer housekeeping_update() calls from CPU hotplug to workqueue
  cgroup/cpuset: Move housekeeping_update()/rebuild_sched_domains() together
  kselftest/cgroup: Simplify test_cpuset_prs.sh by removing "S+" command
  cgroup/cpuset: Set isolated_cpus_updating only if isolated_cpus is changed
  cgroup/cpuset: Clarify exclusion rules for cpuset internal variables
  cgroup/cpuset: Fix incorrect use of cpuset_update_tasks_cpumask() in update_cpumasks_hier()
  cgroup/cpuset: Fix incorrect change to effective_xcpus in partition_xcpus_del()
  cgroup: fix race between task migration and iteration
2026-03-03 14:25:18 -08:00
Linus Torvalds
6a8dab043c Merge tag 'sched_ext-for-7.0-rc2-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext
Pull sched_ext fixes from Tejun Heo:

 - Fix starvation of scx_enable() under fair-class saturation by
   offloading the enable path to an RT kthread

 - Fix out-of-bounds access in idle mask initialization on systems with
   non-contiguous NUMA node IDs

 - Fix a preemption window during scheduler exit and a refcount
   underflow in cgroup init error path

 - Fix SCX_EFLAG_INITIALIZED being a no-op flag

 - Add READ_ONCE() annotations for KCSAN-clean lockless accesses and
   replace naked scx_root dereferences with container_of() in kobject
   callbacks

 - Tooling and selftest fixes: compilation issues with clang 17,
   strtoul() misuse, unused options cleanup, and Kconfig sync

* tag 'sched_ext-for-7.0-rc2-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext:
  sched_ext: Fix starvation of scx_enable() under fair-class saturation
  sched_ext: Remove redundant css_put() in scx_cgroup_init()
  selftests/sched_ext: Fix peek_dsq.bpf.c compile error for clang 17
  selftests/sched_ext: Add -fms-extensions to bpf build flags
  tools/sched_ext: Add -fms-extensions to bpf build flags
  sched_ext: Use READ_ONCE() for plain reads of scx_watchdog_timeout
  sched_ext: Replace naked scx_root dereferences in kobject callbacks
  sched_ext: Use READ_ONCE() for the read side of dsq->nr update
  tools/sched_ext: fix strtoul() misuse in scx_hotplug_seq()
  sched_ext: Fix SCX_EFLAG_INITIALIZED being a no-op flag
  sched_ext: Fix out-of-bounds access in scx_idle_init_masks()
  sched_ext: Disable preemption between scx_claim_exit() and kicking helper work
  tools/sched_ext: Add Kconfig to sync with upstream
  tools/sched_ext: Sync README.md Kconfig with upstream scx
  selftests/sched_ext: Remove duplicated unistd.h include in rt_stall.c
  tools/sched_ext: scx_sdt: Remove unused '-f' option
  tools/sched_ext: scx_central: Remove unused '-p' option
  selftests/sched_ext: Fix unused-result warning for read()
  selftests/sched_ext: Abort test loop on signal
2026-03-03 14:14:20 -08:00
Tejun Heo
b06ccbabe2 sched_ext: Fix starvation of scx_enable() under fair-class saturation
During scx_enable(), the READY -> ENABLED task switching loop changes the
calling thread's sched_class from fair to ext. Since fair has higher
priority than ext, saturating fair-class workloads can indefinitely starve
the enable thread, hanging the system. This was introduced when the enable
path switched from preempt_disable() to scx_bypass() which doesn't protect
against fair-class starvation. Note that the original preempt_disable()
protection wasn't complete either - in partial switch modes, the calling
thread could still be starved after preempt_enable() as it may have been
switched to ext class.

Fix it by offloading the enable body to a dedicated system-wide RT
(SCHED_FIFO) kthread which cannot be starved by either fair or ext class
tasks. scx_enable() lazily creates the kthread on first use and passes the
ops pointer through a struct scx_enable_cmd containing the kthread_work,
then synchronously waits for completion.

The workfn runs on a different kthread from sch->helper (which runs
disable_work), so it can safely flush disable_work on the error path
without deadlock.

Fixes: 8c2090c504 ("sched_ext: Initialize in bypass mode")
Cc: stable@vger.kernel.org # v6.12+
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-03-03 11:10:40 -10:00
Vivek Behera
554a1c34c1 igc: Fix trigger of incorrect irq in igc_xsk_wakeup function
This patch addresses the issue where the igc_xsk_wakeup function
was triggering an incorrect IRQ for tx-0 when the i226 is configured
with only 2 combined queues or in an environment with 2 active CPU cores.
This prevented XDP Zero-copy send functionality in such split IRQ
configurations.

The fix implements the correct logic for extracting q_vectors saved
during rx and tx ring allocation and utilizes flags provided by the
ndo_xsk_wakeup API to trigger the appropriate IRQ.

Fixes: fc9df2a0b5 ("igc: Enable RX via AF_XDP zero-copy")
Fixes: 15fd021bc4 ("igc: Add Tx hardware timestamp request for AF_XDP zero-copy packet")
Signed-off-by: Vivek Behera <vivek.behera@siemens.com>
Reviewed-by: Jacob Keller <jacob.keller@intel.com>
Reviewed-by: Aleksandr loktinov <aleksandr.loktionov@intel.com>
Reviewed-by: Piotr Kwapulinski <piotr.kwapulinski@intel.com>
Reviewed-by: Song Yoong Siang <yoong.siang.song@intel.com>
Tested-by: Avigail Dahan <avigailx.dahan@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-03-03 13:06:05 -08:00
Vivek Behera
d4c13ab362 igb: Fix trigger of incorrect irq in igb_xsk_wakeup
The current implementation in the igb_xsk_wakeup expects
the Rx and Tx queues to share the same irq. This would lead
to triggering of incorrect irq in split irq configuration.
This patch addresses this issue which could impact environments
with 2 active cpu cores
or when the number of queues is reduced to 2 or less

cat /proc/interrupts | grep eno2
 167:          0          0          0          0 IR-PCI-MSIX-0000:08:00.0
 0-edge      eno2
 168:          0          0          0          0 IR-PCI-MSIX-0000:08:00.0
 1-edge      eno2-rx-0
 169:          0          0          0          0 IR-PCI-MSIX-0000:08:00.0
 2-edge      eno2-rx-1
 170:          0          0          0          0 IR-PCI-MSIX-0000:08:00.0
 3-edge      eno2-tx-0
 171:          0          0          0          0 IR-PCI-MSIX-0000:08:00.0
 4-edge      eno2-tx-1

Furthermore it uses the flags input argument to trigger either rx, tx or
both rx and tx irqs as specified in the ndo_xsk_wakeup api documentation

Fixes: 80f6ccf9f1 ("igb: Introduce XSK data structures and helpers")
Signed-off-by: Vivek Behera <vivek.behera@siemens.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Suggested-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Tested-by: Saritha Sanigani <sarithax.sanigani@intel.com> (A Contingent Worker at Intel)
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-03-03 13:06:05 -08:00
Kohei Enju
b848521701 iavf: fix netdev->max_mtu to respect actual hardware limit
iavf sets LIBIE_MAX_MTU as netdev->max_mtu, ignoring vf_res->max_mtu
from PF [1]. This allows setting an MTU beyond the actual hardware
limit, causing TX queue timeouts [2].

Set correct netdev->max_mtu using vf_res->max_mtu from the PF.

Note that currently PF drivers such as ice/i40e set the frame size in
vf_res->max_mtu, not MTU. Convert vf_res->max_mtu to MTU before setting
netdev->max_mtu.

[1]
 # ip -j -d link show $DEV | jq '.[0].max_mtu'
 16356

[2]
 iavf 0000:00:05.0 enp0s5: NETDEV WATCHDOG: CPU: 1: transmit queue 0 timed out 5692 ms
 iavf 0000:00:05.0 enp0s5: NIC Link is Up Speed is 10 Gbps Full Duplex
 iavf 0000:00:05.0 enp0s5: NETDEV WATCHDOG: CPU: 6: transmit queue 3 timed out 5312 ms
 iavf 0000:00:05.0 enp0s5: NIC Link is Up Speed is 10 Gbps Full Duplex
 ...

Fixes: 5fa4caff59 ("iavf: switch to Page Pool")
Signed-off-by: Kohei Enju <kohei@enjuk.jp>
Reviewed-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-03-03 13:06:04 -08:00
Michal Swiatkowski
636cc3bd12 libie: don't unroll if fwlog isn't supported
The libie_fwlog_deinit() function can be called during driver unload
even when firmware logging was never properly initialized. This led to call
trace:

[  148.576156] Oops: Oops: 0000 [#1] SMP NOPTI
[  148.576167] CPU: 80 UID: 0 PID: 12843 Comm: rmmod Kdump: loaded Not tainted 6.17.0-rc7next-queue-3oct-01915-g06d79d51cf51 #1 PREEMPT(full)
[  148.576177] Hardware name: HPE ProLiant DL385 Gen10 Plus/ProLiant DL385 Gen10 Plus, BIOS A42 07/18/2020
[  148.576182] RIP: 0010:__dev_printk+0x16/0x70
[  148.576196] Code: 1f 44 00 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 0f 1f 44 00 00 41 55 41 54 49 89 d4 55 48 89 fd 53 48 85 f6 74 3c <4c> 8b 6e 50 48 89 f3 4d 85 ed 75 03 4c 8b 2e 48 89 df e8 f3 27 98
[  148.576204] RSP: 0018:ffffd2fd7ea17a48 EFLAGS: 00010202
[  148.576211] RAX: ffffd2fd7ea17aa0 RBX: ffff8eb288ae2000 RCX: 0000000000000000
[  148.576217] RDX: ffffd2fd7ea17a70 RSI: 00000000000000c8 RDI: ffffffffb68d3d88
[  148.576222] RBP: ffffffffb68d3d88 R08: 0000000000000000 R09: 0000000000000000
[  148.576227] R10: 00000000000000c8 R11: ffff8eb2b1a49400 R12: ffffd2fd7ea17a70
[  148.576231] R13: ffff8eb3141fb000 R14: ffffffffc1215b48 R15: ffffffffc1215bd8
[  148.576236] FS:  00007f5666ba6740(0000) GS:ffff8eb2472b9000(0000) knlGS:0000000000000000
[  148.576242] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  148.576247] CR2: 0000000000000118 CR3: 000000011ad17000 CR4: 0000000000350ef0
[  148.576252] Call Trace:
[  148.576258]  <TASK>
[  148.576269]  _dev_warn+0x7c/0x96
[  148.576290]  libie_fwlog_deinit+0x112/0x117 [libie_fwlog]
[  148.576303]  ixgbe_remove+0x63/0x290 [ixgbe]
[  148.576342]  pci_device_remove+0x42/0xb0
[  148.576354]  device_release_driver_internal+0x19c/0x200
[  148.576365]  driver_detach+0x48/0x90
[  148.576372]  bus_remove_driver+0x6d/0xf0
[  148.576383]  pci_unregister_driver+0x2e/0xb0
[  148.576393]  ixgbe_exit_module+0x1c/0xd50 [ixgbe]
[  148.576430]  __do_sys_delete_module.isra.0+0x1bc/0x2e0
[  148.576446]  do_syscall_64+0x7f/0x980

It can be reproduced by trying to unload ixgbe driver in recovery mode.

Fix that by checking if fwlog is supported before doing unroll.

Fixes: 641585bc97 ("ixgbe: fwlog support for e610")
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Tested-by: Rinitha S <sx.rinitha@intel.com> (A Contingent worker at Intel)
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-03-03 13:06:04 -08:00
Zilin Guan
fe868b499d ice: Fix memory leak in ice_set_ringparam()
In ice_set_ringparam, tx_rings and xdp_rings are allocated before
rx_rings. If the allocation of rx_rings fails, the code jumps to
the done label leaking both tx_rings and xdp_rings. Furthermore, if
the setup of an individual Rx ring fails during the loop, the code jumps
to the free_tx label which releases tx_rings but leaks xdp_rings.

Fix this by introducing a free_xdp label and updating the error paths to
ensure both xdp_rings and tx_rings are properly freed if rx_rings
allocation or setup fails.

Compile tested only. Issue found using a prototype static analysis tool
and code review.

Fixes: fcea6f3da5 ("ice: Add stats and ethtool support")
Fixes: efc2214b60 ("ice: Add support for XDP")
Signed-off-by: Zilin Guan <zilin@seu.edu.cn>
Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Tested-by: Rinitha S <sx.rinitha@intel.com> (A Contingent worker at Intel)
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-03-03 13:06:04 -08:00
Jakub Staniszewski
fb4903b335 ice: fix retry for AQ command 0x06EE
Executing ethtool -m can fail reporting a netlink I/O error while firmware
link management holds the i2c bus used to communicate with the module.

According to Intel(R) Ethernet Controller E810 Datasheet Rev 2.8 [1]
Section 3.3.10.4 Read/Write SFF EEPROM (0x06EE)
request should to be retried upon receiving EBUSY from firmware.

Commit e9c9692c8a ("ice: Reimplement module reads used by ethtool")
implemented it only for part of ice_get_module_eeprom(), leaving all other
calls to ice_aq_sff_eeprom() vulnerable to returning early on getting
EBUSY without retrying.

Remove the retry loop from ice_get_module_eeprom() and add Admin Queue
(AQ) command with opcode 0x06EE to the list of commands that should be
retried on receiving EBUSY from firmware.

Cc: stable@vger.kernel.org
Fixes: e9c9692c8a ("ice: Reimplement module reads used by ethtool")
Signed-off-by: Jakub Staniszewski <jakub.staniszewski@linux.intel.com>
Co-developed-by: Dawid Osuchowski <dawid.osuchowski@linux.intel.com>
Signed-off-by: Dawid Osuchowski <dawid.osuchowski@linux.intel.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Link: https://www.intel.com/content/www/us/en/content-details/613875/intel-ethernet-controller-e810-datasheet.html [1]
Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
Tested-by: Rinitha S <sx.rinitha@intel.com> (A Contingent worker at Intel)
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-03-03 13:06:04 -08:00
Jakub Staniszewski
326256c0a7 ice: reintroduce retry mechanism for indirect AQ
Add retry mechanism for indirect Admin Queue (AQ) commands. To do so we
need to keep the command buffer.

This technically reverts commit 43a630e37e
("ice: remove unused buffer copy code in ice_sq_send_cmd_retry()"),
but combines it with a fix in the logic by using a kmemdup() call,
making it more robust and less likely to break in the future due to
programmer error.

Cc: Michal Schmidt <mschmidt@redhat.com>
Cc: stable@vger.kernel.org
Fixes: 3056df93f7 ("ice: Re-send some AQ commands, as result of EBUSY AQ error")
Signed-off-by: Jakub Staniszewski <jakub.staniszewski@linux.intel.com>
Co-developed-by: Dawid Osuchowski <dawid.osuchowski@linux.intel.com>
Signed-off-by: Dawid Osuchowski <dawid.osuchowski@linux.intel.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
Tested-by: Rinitha S <sx.rinitha@intel.com> (A Contingent worker at Intel)
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-03-03 13:06:04 -08:00
Larysa Zaremba
eef33aa449 ice: fix adding AQ LLDP filter for VF
The referenced commit came from a misunderstanding of the FW LLDP filter
AQ (Admin Queue) command due to the error in the internal documentation.
Contrary to the assumptions in the original commit, VFs can be added and
deleted from this filter without any problems. Introduced dev_info message
proved to be useful, so reverting the whole commit does not make sense.

Without this fix, trusted VFs do not receive LLDP traffic, if there is an
AQ LLDP filter on PF. When trusted VF attempts to add an LLDP multicast
MAC address, the following message can be seen in dmesg on host:

ice 0000:33:00.0: Failed to add Rx LLDP rule on VSI 20 error: -95

Revert checking VSI type when adding LLDP filter through AQ.

Fixes: 4d5a1c4e6d ("ice: do not add LLDP-specific filter if not necessary")
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-03-03 13:06:04 -08:00
Eric Biggers
3875ceb592 crypto: testmgr - Fix stale references to aes-generic
Due to commit a248447427 ("crypto: aes - Replace aes-generic with
wrapper around lib"), the "aes-generic" driver name has been replaced
with "aes-lib".  Update a couple testmgr entries that were added
concurrently with this change.

Fixes: a22d48cbe5 ("crypto: testmgr - Add test vectors for authenc(hmac(sha224),cbc(aes))")
Fixes: 030218dede ("crypto: testmgr - Add test vectors for authenc(hmac(sha384),cbc(aes))")
Acked-by: Aleksander Jan Bajkowski <olek2@wp.pl>
Link: https://lore.kernel.org/r/20260302234856.30569-1-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
2026-03-03 11:57:15 -08:00
Thomas Fourier
e4eb6e4dd6 drm/msm: Fix dma_free_attrs() buffer size
The gpummu->table buffer is alloc'd with size TABLE_SIZE + 32 in
a2xx_gpummu_new() but freed with size TABLE_SIZE in
a2xx_gpummu_destroy().

Change the free size to match the allocation.

Fixes: c2052a4e5c ("drm/msm: implement a2xx mmu")
Cc: <stable@vger.kernel.org>
Signed-off-by: Thomas Fourier <fourier.thomas@gmail.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
Patchwork: https://patchwork.freedesktop.org/patch/707340/
Message-ID: <20260226095714.12126-2-fourier.thomas@gmail.com>
Signed-off-by: Rob Clark <robin.clark@oss.qualcomm.com>
2026-03-03 10:39:06 -08:00
Akhil P Oommen
20f644f42e drm/msm/a6xx: Fix the bogus protect error on X2-85
Update the X2-85 gpu's register protect count configuration with the
correct count_max value to avoid blocking the entire MMIO region from the
UMD.

Protect configurations are a bit complicated on A8xx. There are 2 set of
protect registers with different counts: Global and Pipe-specific. The
last-span-unbound feature is available only on the Pipe-specific protect
registers. Due to this, we cannot use the BUILD_BUG sanity check for A8x
protect configurations, so remove the A840 entry from there.

Fixes: 01ff3bf272 ("drm/msm/a8xx: Add support for Adreno X2-85 GPU")
Signed-off-by: Akhil P Oommen <akhilpo@oss.qualcomm.com>
Reviewed-by: Konrad Dybcio <konrad.dybcio@oss.qualcomm.com>
Patchwork: https://patchwork.freedesktop.org/patch/706944/
Message-ID: <20260225-glymur-protect-fix-v1-1-0deddedf9277@oss.qualcomm.com>
Signed-off-by: Rob Clark <robin.clark@oss.qualcomm.com>
2026-03-03 10:36:35 -08:00
Zhang Rui
3817b1d344 cpupower: Add intel_pstate turbo boost support for Intel platforms
On modern Intel platforms, the intel_pstate driver is commonly used and
it provides turbo boost control via
/sys/devices/system/cpu/intel_pstate/no_turbo.

However, cpupower doesn't handle this. it
1. shows turbo boost as "active" blindly for Intel platforms
2. controls turbo boost functionality via the generic
   /sys/devices/system/cpu/cpufreq/boost sysfs interface only.

Enhance the cpupower tool to ensure the "--boost" command works
seamlessly on Intel platforms with intel_pstate driver running.

Without this patch,
   $ echo 1 | sudo tee /sys/devices/system/cpu/intel_pstate/no_turbo
   1
   $ sudo cpupower frequency-info --boost
   analyzing CPU 21:
     boost state support:
       Supported: yes
       Active: yes
   $ sudo cpupower set --boost 0
   Error setting turbo-boost
   $ sudo cpupower set --boost 1
   Error setting turbo-boost

With this patch,
   $ cat /sys/devices/system/cpu/intel_pstate/no_turbo
   0
   $ sudo cpupower set --boost 0
   $ sudo cpupower frequency-info --boost
   analyzing CPU 21:
     boost state support:
       Supported: yes
       Active: no
   $ cat /sys/devices/system/cpu/intel_pstate/no_turbo
   1
   $ sudo cpupower set --boost 1
   $ sudo cpupower frequency-info --boost
   analyzing CPU 28:
     boost state support:
       Supported: yes
       Active: yes
   $ cat /sys/devices/system/cpu/intel_pstate/no_turbo
   0

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
2026-03-03 11:18:53 -07:00
Jan Kiszka
50ad1a31be cpupower: Add support for setting EPP via systemd service
Extend the systemd service so that it can be used for tuning the Energy
Performance Preference (EPP) as well. Available options can be read from
/sys/devices/system/cpu/cpufreq/policy0/energy_performance_available_preferences.
The desired one can then be set in cpupower-service.conf.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
2026-03-03 11:07:06 -07:00
Alison Schofield
19d2f0b97a cxl/port: Fix use after free of parent_port in cxl_detach_ep()
cxl_detach_ep() is called during bottom-up removal when all CXL memory
devices beneath a switch port have been removed. For each port in the
hierarchy it locks both the port and its parent, removes the endpoint,
and if the port is now empty, marks it dead and unregisters the port
by calling delete_switch_port(). There are two places during this work
where the parent_port may be used after freeing:

First, a concurrent detach may have already processed a port by the
time a second worker finds it via bus_find_device(). Without pinning
parent_port, it may already be freed when we discover port->dead and
attempt to unlock the parent_port. In a production kernel that's a
silent memory corruption, with lock debug, it looks like this:

[]DEBUG_LOCKS_WARN_ON(__owner_task(owner) != get_current())
[]WARNING: kernel/locking/mutex.c:949 at __mutex_unlock_slowpath+0x1ee/0x310
[]Call Trace:
[]mutex_unlock+0xd/0x20
[]cxl_detach_ep+0x180/0x400 [cxl_core]
[]devm_action_release+0x10/0x20
[]devres_release_all+0xa8/0xe0
[]device_unbind_cleanup+0xd/0xa0
[]really_probe+0x1a6/0x3e0

Second, delete_switch_port() releases three devm actions registered
against parent_port. The last of those is unregister_port() and it
calls device_unregister() on the child port, which can cascade. If
parent_port is now also empty the device core may unregister and free
it too. So by the time delete_switch_port() returns, parent_port may
be free, and the subsequent device_unlock(&parent_port->dev) operates
on freed memory. The kernel log looks same as above, with a different
offset in cxl_detach_ep().

Both of these issues stem from the absence of a lifetime guarantee
between a child port and its parent port.

Establish a lifetime rule for ports: child ports hold a reference to
their parent device until release. Take the reference when the port
is allocated and drop it when released. This ensures the parent is
valid for the full lifetime of the child and eliminates the use after
free window in cxl_detach_ep().

This is easily reproduced with a reload of cxl_acpi in QEMU with CXL
devices present.

Fixes: 2345df5424 ("cxl/memdev: Fix endpoint port removal")
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Li Ming <ming.li@zohomail.com>
Signed-off-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Link: https://patch.msgid.link/20260226184439.1732841-1-alison.schofield@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2026-03-03 10:20:19 -07:00
Linus Torvalds
c44db6c820 Merge tag 'for-7.0-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs fixes from David Sterba:
 "One-liner or short fixes for minor/moderate problems reported recently:

   - fixes or level adjustments of error messages

   - fix leaked transaction handles after aborted transactions, when
     using the remap tree feature

   - fix a few leaked chunk maps after errors

   - fix leaked page array in io_uring encoded read if an error occurs
     and the 'finished' is not called

   - fix double release of reserved extents when doing a range COW

   - don't commit super block when the filesystem is in shutdown state

   - fix squota accounting condition when checking members vs parent
     usage

   - other error handling fixes"

* tag 'for-7.0-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: check block group lookup in remove_range_from_remap_tree()
  btrfs: fix transaction handle leaks in btrfs_last_identity_remap_gone()
  btrfs: fix chunk map leak in btrfs_map_block() after btrfs_translate_remap()
  btrfs: fix chunk map leak in btrfs_map_block() after btrfs_chunk_map_num_copies()
  btrfs: fix compat mask in error messages in btrfs_check_features()
  btrfs: print correct subvol num if active swapfile prevents deletion
  btrfs: fix warning in scrub_verify_one_metadata()
  btrfs: fix objectid value in error message in check_extent_data_ref()
  btrfs: fix incorrect key offset in error message in check_dev_extent_item()
  btrfs: fix error message order of parameters in btrfs_delete_delayed_dir_index()
  btrfs: don't commit the super block when unmounting a shutdown filesystem
  btrfs: free pages on error in btrfs_uring_read_extent()
  btrfs: fix referenced/exclusive check in squota_check_parent_usage()
  btrfs: remove pointless WARN_ON() in cache_save_setup()
  btrfs: convert log messages to error level in btrfs_replay_log()
  btrfs: remove btrfs_handle_fs_error() after failure to recover log trees
  btrfs: remove redundant warning message in btrfs_check_uuid_tree()
  btrfs: change warning messages to error level in open_ctree()
  btrfs: fix a double release on reserved extents in cow_one_range()
  btrfs: handle discard errors in in btrfs_finish_extent_commit()
2026-03-03 09:08:00 -08:00
Nilay Shroff
147dae1298 sparc/PCI: Initialize msi_addr_mask for OF-created PCI devices
Recent changes replaced the use of no_64bit_msi with msi_addr_mask, which
is now expected to be initialized to DMA_BIT_MASK(64) during PCI device
setup. On SPARC systems, this initialization was inadvertently missed for
devices instantiated from device tree nodes, leaving msi_addr_mask unset
for OF-created pci_dev instances. As a result, MSI address validation fails
during probe, causing affected devices to fail initialization.

Initialize pdev->msi_addr_mask to DMA_BIT_MASK(64) in of_create_pci_dev()
so that MSI address validation succeeds and PCI device probing works as
expected.

Fixes: 386ced19e9 ("PCI/MSI: Convert the boolean no_64bit_msi flag to a DMA address mask")
Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Tested-by: Han Gao <gaohan@iscas.ac.cn> # SPARC Enterprise T5220
Tested-by: Nathaniel Roach <nroach44@nroach44.id.au> # SPARC T5-2
Reviewed-by: Vivian Wang <wangruikang@iscas.ac.cn>
Link: https://patch.msgid.link/20260220070239.1693303-3-nilay@linux.ibm.com
2026-03-03 10:29:48 -06:00
Nilay Shroff
2185904ff8 powerpc/pci: Initialize msi_addr_mask for OF-created PCI devices
Recent changes replaced the use of no_64bit_msi with msi_addr_mask.  As a
result, msi_addr_mask is now expected to be initialized to DMA_BIT_MASK(64)
when a pci_dev is set up. However, this initialization was missed on
powerpc due to differences in the device initialization path compared to
other (x86) architecture. Due to this, now PCI device probe method fails on
powerpc system.

On powerpc systems, struct pci_dev instances are created from device tree
nodes via of_create_pci_dev(). Because msi_addr_mask was not initialized
there, it remained zero. Later, during MSI setup, msi_verify_entries()
validates the programmed MSI address against pdev->msi_addr_mask. Since the
mask was not set correctly, the validation fails, causing PCI driver probe
failures for devices on powerpc systems.

Initialize pdev->msi_addr_mask to DMA_BIT_MASK(64) in of_create_pci_dev()
so that MSI address validation succeeds and device probe works as expected.

Fixes: 386ced19e9 ("PCI/MSI: Convert the boolean no_64bit_msi flag to a DMA address mask")
Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Tested-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
Tested-by: Nam Cao <namcao@linutronix.de>
Reviewed-by: Nam Cao <namcao@linutronix.de>
Reviewed-by: Vivian Wang <wangruikang@iscas.ac.cn>
Acked-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Link: https://patch.msgid.link/20260220070239.1693303-2-nilay@linux.ibm.com
2026-03-03 10:29:15 -06:00
Cheng-Yang Chou
1336b579f6 sched_ext: Remove redundant css_put() in scx_cgroup_init()
The iterator css_for_each_descendant_pre() walks the cgroup hierarchy
under cgroup_lock(). It does not increment the reference counts on
yielded css structs.

According to the cgroup documentation, css_put() should only be used
to release a reference obtained via css_get() or css_tryget_online().
Since the iterator does not use either of these to acquire a reference,
calling css_put() in the error path of scx_cgroup_init() causes a
refcount underflow.

Remove the unbalanced css_put() to prevent a potential Use-After-Free
(UAF) vulnerability.

Fixes: 8195136669 ("sched_ext: Add cgroup support")
Cc: stable@vger.kernel.org # v6.12+
Signed-off-by: Cheng-Yang Chou <yphbchou0911@gmail.com>
Reviewed-by: Andrea Righi <arighi@nvidia.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-03-03 06:22:37 -10:00
Filipe Manana
0749cab617 btrfs: remove duplicated definition of btrfs_printk_in_rcu()
It's defined twice in a row for the !CONFIG_PRINTK case, so remove one
of the duplicates.

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-03-03 17:20:51 +01:00
Filipe Manana
8dd0e6807b btrfs: remove unnecessary transaction abort in the received subvol ioctl
If we fail to remove an item from the uuid tree, we don't need to abort
the transaction since we have not done any change before. So remove that
transaction abort.

Reviewed-by: Anand Jain <asj@kernel.org>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-03-03 17:20:39 +01:00
Filipe Manana
0f475ee0eb btrfs: abort transaction on failure to update root in the received subvol ioctl
If we failed to update the root we don't abort the transaction, which is
wrong since we already used the transaction to remove an item from the
uuid tree.

Fixes: dd5f9615fc ("Btrfs: maintain subvolume items in the UUID tree")
CC: stable@vger.kernel.org # 3.12+
Reviewed-by: Anand Jain <asj@kernel.org>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-03-03 17:03:59 +01:00
Filipe Manana
87f2c46003 btrfs: fix transaction abort on set received ioctl due to item overflow
If the set received ioctl fails due to an item overflow when attempting to
add the BTRFS_UUID_KEY_RECEIVED_SUBVOL we have to abort the transaction
since we did some metadata updates before.

This means that if a user calls this ioctl with the same received UUID
field for a lot of subvolumes, we will hit the overflow, trigger the
transaction abort and turn the filesystem into RO mode. A malicious user
could exploit this, and this ioctl does not even requires that a user
has admin privileges (CAP_SYS_ADMIN), only that he/she owns the subvolume.

Fix this by doing an early check for item overflow before starting a
transaction. This is also race safe because we are holding the subvol_sem
semaphore in exclusive (write) mode.

A test case for fstests will follow soon.

Fixes: dd5f9615fc ("Btrfs: maintain subvolume items in the UUID tree")
CC: stable@vger.kernel.org # 3.12+
Reviewed-by: Anand Jain <asj@kernel.org>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-03-03 17:03:59 +01:00
Filipe Manana
e1b18b9590 btrfs: fix transaction abort when snapshotting received subvolumes
Currently a user can trigger a transaction abort by snapshotting a
previously received snapshot a bunch of times until we reach a
BTRFS_UUID_KEY_RECEIVED_SUBVOL item overflow (the maximum item size we
can store in a leaf). This is very likely not common in practice, but
if it happens, it turns the filesystem into RO mode. The snapshot, send
and set_received_subvol and subvol_setflags (used by receive) don't
require CAP_SYS_ADMIN, just inode_owner_or_capable(). A malicious user
could use this to turn a filesystem into RO mode and disrupt a system.

Reproducer script:

  $ cat test.sh
  #!/bin/bash

  DEV=/dev/sdi
  MNT=/mnt/sdi

  # Use smallest node size to make the test faster.
  mkfs.btrfs -f --nodesize 4K $DEV
  mount $DEV $MNT

  # Create a subvolume and set it to RO so that it can be used for send.
  btrfs subvolume create $MNT/sv
  touch $MNT/sv/foo
  btrfs property set $MNT/sv ro true

  # Send and receive the subvolume into snaps/sv.
  mkdir $MNT/snaps
  btrfs send $MNT/sv | btrfs receive $MNT/snaps

  # Now snapshot the received subvolume, which has a received_uuid, a
  # lot of times to trigger the leaf overflow.
  total=500
  for ((i = 1; i <= $total; i++)); do
      echo -ne "\rCreating snapshot $i/$total"
      btrfs subvolume snapshot -r $MNT/snaps/sv $MNT/snaps/sv_$i > /dev/null
  done
  echo

  umount $MNT

When running the test:

  $ ./test.sh
  (...)
  Create subvolume '/mnt/sdi/sv'
  At subvol /mnt/sdi/sv
  At subvol sv
  Creating snapshot 496/500ERROR: Could not create subvolume: Value too large for defined data type
  Creating snapshot 497/500ERROR: Could not create subvolume: Read-only file system
  Creating snapshot 498/500ERROR: Could not create subvolume: Read-only file system
  Creating snapshot 499/500ERROR: Could not create subvolume: Read-only file system
  Creating snapshot 500/500ERROR: Could not create subvolume: Read-only file system

And in dmesg/syslog:

  $ dmesg
  (...)
  [251067.627338] BTRFS warning (device sdi): insert uuid item failed -75 (0x4628b21c4ac8d898, 0x2598bee2b1515c91) type 252!
  [251067.629212] ------------[ cut here ]------------
  [251067.630033] BTRFS: Transaction aborted (error -75)
  [251067.630871] WARNING: fs/btrfs/transaction.c:1907 at create_pending_snapshot.cold+0x52/0x465 [btrfs], CPU#10: btrfs/615235
  [251067.632851] Modules linked in: btrfs dm_zero (...)
  [251067.644071] CPU: 10 UID: 0 PID: 615235 Comm: btrfs Tainted: G        W           6.19.0-rc8-btrfs-next-225+ #1 PREEMPT(full)
  [251067.646165] Tainted: [W]=WARN
  [251067.646733] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.2-0-gea1b7a073390-prebuilt.qemu.org 04/01/2014
  [251067.648735] RIP: 0010:create_pending_snapshot.cold+0x55/0x465 [btrfs]
  [251067.649984] Code: f0 48 0f (...)
  [251067.653313] RSP: 0018:ffffce644908fae8 EFLAGS: 00010292
  [251067.653987] RAX: 00000000ffffff01 RBX: ffff8e5639e63a80 RCX: 00000000ffffffd3
  [251067.655042] RDX: ffff8e53faa76b00 RSI: 00000000ffffffb5 RDI: ffffffffc0919750
  [251067.656077] RBP: ffffce644908fbd8 R08: 0000000000000000 R09: ffffce644908f820
  [251067.657068] R10: ffff8e5adc1fffa8 R11: 0000000000000003 R12: ffff8e53c0431bd0
  [251067.658050] R13: ffff8e5414593600 R14: ffff8e55efafd000 R15: 00000000ffffffb5
  [251067.659019] FS:  00007f2a4944b3c0(0000) GS:ffff8e5b27dae000(0000) knlGS:0000000000000000
  [251067.660115] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  [251067.660943] CR2: 00007ffc5aa57898 CR3: 00000005813a2003 CR4: 0000000000370ef0
  [251067.661972] Call Trace:
  [251067.662292]  <TASK>
  [251067.662653]  create_pending_snapshots+0x97/0xc0 [btrfs]
  [251067.663413]  btrfs_commit_transaction+0x26e/0xc00 [btrfs]
  [251067.664257]  ? btrfs_qgroup_convert_reserved_meta+0x35/0x390 [btrfs]
  [251067.665238]  ? _raw_spin_unlock+0x15/0x30
  [251067.665837]  ? record_root_in_trans+0xa2/0xd0 [btrfs]
  [251067.666531]  btrfs_mksubvol+0x330/0x580 [btrfs]
  [251067.667145]  btrfs_mksnapshot+0x74/0xa0 [btrfs]
  [251067.667827]  __btrfs_ioctl_snap_create+0x194/0x1d0 [btrfs]
  [251067.668595]  btrfs_ioctl_snap_create_v2+0x107/0x130 [btrfs]
  [251067.669479]  btrfs_ioctl+0x1580/0x2690 [btrfs]
  [251067.670093]  ? count_memcg_events+0x6d/0x180
  [251067.670849]  ? handle_mm_fault+0x1a0/0x2a0
  [251067.671652]  __x64_sys_ioctl+0x92/0xe0
  [251067.672406]  do_syscall_64+0x50/0xf20
  [251067.673129]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
  [251067.674096] RIP: 0033:0x7f2a495648db
  [251067.674812] Code: 00 48 89 (...)
  [251067.678227] RSP: 002b:00007ffc5aa57840 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
  [251067.679691] RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007f2a495648db
  [251067.681145] RDX: 00007ffc5aa588b0 RSI: 0000000050009417 RDI: 0000000000000004
  [251067.682511] RBP: 0000000000000002 R08: 0000000000000000 R09: 0000000000000000
  [251067.683842] R10: 000000000000000a R11: 0000000000000246 R12: 00007ffc5aa59910
  [251067.685176] R13: 00007ffc5aa588b0 R14: 0000000000000004 R15: 0000000000000006
  [251067.686524]  </TASK>
  [251067.686972] ---[ end trace 0000000000000000 ]---
  [251067.687890] BTRFS: error (device sdi state A) in create_pending_snapshot:1907: errno=-75 unknown
  [251067.689049] BTRFS info (device sdi state EA): forced readonly
  [251067.689054] BTRFS warning (device sdi state EA): Skipping commit of aborted transaction.
  [251067.690119] BTRFS: error (device sdi state EA) in cleanup_transaction:2043: errno=-75 unknown
  [251067.702028] BTRFS info (device sdi state EA): last unmount of filesystem 46dc3975-30a2-4a69-a18f-418b859cccda

Fix this by ignoring -EOVERFLOW errors from btrfs_uuid_tree_add() in the
snapshot creation code when attempting to add the
BTRFS_UUID_KEY_RECEIVED_SUBVOL item. This is OK because it's not critical
and we are still able to delete the snapshot, as snapshot/subvolume
deletion ignores if a BTRFS_UUID_KEY_RECEIVED_SUBVOL is missing (see
inode.c:btrfs_delete_subvolume()). As for send/receive, we can still do
send/receive operations since it always peeks the first root ID in the
existing BTRFS_UUID_KEY_RECEIVED_SUBVOL (it could peek any since all
snapshots have the same content), and even if the key is missing, it
falls back to searching by BTRFS_UUID_KEY_SUBVOL key.

A test case for fstests will be sent soon.

Fixes: dd5f9615fc ("Btrfs: maintain subvolume items in the UUID tree")
CC: stable@vger.kernel.org # 3.12+
Reviewed-by: Boris Burkov <boris@bur.io>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-03-03 17:03:59 +01:00
Filipe Manana
2d1ababded btrfs: fix transaction abort on file creation due to name hash collision
If we attempt to create several files with names that result in the same
hash, we have to pack them in same dir item and that has a limit inherent
to the leaf size. However if we reach that limit, we trigger a transaction
abort and turns the filesystem into RO mode. This allows for a malicious
user to disrupt a system, without the need to have administration
privileges/capabilities.

Reproducer:

  $ cat exploit-hash-collisions.sh
  #!/bin/bash

  DEV=/dev/sdi
  MNT=/mnt/sdi

  # Use smallest node size to make the test faster and require fewer file
  # names that result in hash collision.
  mkfs.btrfs -f --nodesize 4K $DEV
  mount $DEV $MNT

  # List of names that result in the same crc32c hash for btrfs.
  declare -a names=(
   'foobar'
   '%a8tYkxfGMLWRGr55QSeQc4PBNH9PCLIvR6jZnkDtUUru1t@RouaUe_L:@xGkbO3nCwvLNYeK9vhE628gss:T$yZjZ5l-Nbd6CbC$M=hqE-ujhJICXyIxBvYrIU9-TDC'
   'AQci3EUB%shMsg-N%frgU:02ByLs=IPJU0OpgiWit5nexSyxZDncY6WB:=zKZuk5Zy0DD$Ua78%MelgBuMqaHGyKsJUFf9s=UW80PcJmKctb46KveLSiUtNmqrMiL9-Y0I_l5Fnam04CGIg=8@U:Z'
   'CvVqJpJzueKcuA$wqwePfyu7VxuWNN3ho$p0zi2H8QFYK$7YlEqOhhb%:hHgjhIjW5vnqWHKNP4'
   'ET:vk@rFU4tsvMB0$C_p=xQHaYZjvoF%-BTc%wkFW8yaDAPcCYoR%x$FH5O:'
   'HwTon%v7SGSP4FE08jBwwiu5aot2CFKXHTeEAa@38fUcNGOWvE@Mz6WBeDH_VooaZ6AgsXPkVGwy9l@@ZbNXabUU9csiWrrOp0MWUdfi$EZ3w9GkIqtz7I_eOsByOkBOO'
   'Ij%2VlFGXSuPvxJGf5UWy6O@1svxGha%b@=%wjkq:CIgE6u7eJOjmQY5qTtxE2Rjbis9@us'
   'KBkjG5%9R8K9sOG8UTnAYjxLNAvBmvV5vz3IiZaPmKuLYO03-6asI9lJ_j4@6Xo$KZicaLWJ3Pv8XEwVeUPMwbHYWwbx0pYvNlGMO9F:ZhHAwyctnGy%_eujl%WPd4U2BI7qooOSr85J-C2V$LfY'
   'NcRfDfuUQ2=zP8K3CCF5dFcpfiOm6mwenShsAb_F%n6GAGC7fT2JFFn:c35X-3aYwoq7jNX5$ZJ6hI3wnZs$7KgGi7wjulffhHNUxAT0fRRLF39vJ@NvaEMxsMO'
   'Oj42AQAEzRoTxa5OuSKIr=A_lwGMy132v4g3Pdq1GvUG9874YseIFQ6QU'
   'Ono7avN5GjC:_6dBJ_'
   'WHmN2gnmaN-9dVDy4aWo:yNGFzz8qsJyJhWEWcud7$QzN2D9R0efIWWEdu5kwWr73NZm4=@CoCDxrrZnRITr-kGtU_cfW2:%2_am'
   'WiFnuTEhAG9FEC6zopQmj-A-$LDQ0T3WULz%ox3UZAPybSV6v1Z$b4L_XBi4M4BMBtJZpz93r9xafpB77r:lbwvitWRyo$odnAUYlYMmU4RvgnNd--e=I5hiEjGLETTtaScWlQp8mYsBovZwM2k'
   'XKyH=OsOAF3p%uziGF_ZVr$ivrvhVgD@1u%5RtrV-gl_vqAwHkK@x7YwlxX3qT6WKKQ%PR56NrUBU2dOAOAdzr2=5nJuKPM-T-$ZpQfCL7phxQbUcb:BZOTPaFExc-qK-gDRCDW2'
   'd3uUR6OFEwZr%ns1XH_@tbxA@cCPmbBRLdyh7p6V45H$P2$F%w0RqrD3M0g8aGvWpoTFMiBdOTJXjD:JF7=h9a_43xBywYAP%r$SPZi%zDg%ql-KvkdUCtF9OLaQlxmd'
   'ePTpbnit%hyNm@WELlpKzNZYOzOTf8EQ$sEfkMy1VOfIUu3coyvIr13-Y7Sv5v-Ivax2Go_GQRFMU1b3362nktT9WOJf3SpT%z8sZmM3gvYQBDgmKI%%RM-G7hyrhgYflOw%z::ZRcv5O:lDCFm'
   'evqk743Y@dvZAiG5J05L_ROFV@$2%rVWJ2%3nxV72-W7$e$-SK3tuSHA2mBt$qloC5jwNx33GmQUjD%akhBPu=VJ5g$xhlZiaFtTrjeeM5x7dt4cHpX0cZkmfImndYzGmvwQG:$euFYmXn$_2rA9mKZ'
   'gkgUtnihWXsZQTEkrMAWIxir09k3t7jk_IK25t1:cy1XWN0GGqC%FrySdcmU7M8MuPO_ppkLw3=Dfr0UuBAL4%GFk2$Ma10V1jDRGJje%Xx9EV2ERaWKtjpwiZwh0gCSJsj5UL7CR8RtW5opCVFKGGy8Cky'
   'hNgsG_8lNRik3PvphqPm0yEH3P%%fYG:kQLY=6O-61Wa6nrV_WVGR6TLB09vHOv%g4VQRP8Gzx7VXUY1qvZyS'
   'isA7JVzN12xCxVPJZ_qoLm-pTBuhjjHMvV7o=F:EaClfYNyFGlsfw-Kf%uxdqW-kwk1sPl2vhbjyHU1A6$hz'
   'kiJ_fgcdZFDiOptjgH5PN9-PSyLO4fbk_:u5_2tz35lV_iXiJ6cx7pwjTtKy-XGaQ5IefmpJ4N_ZqGsqCsKuqOOBgf9LkUdffHet@Wu'
   'lvwtxyhE9:%Q3UxeHiViUyNzJsy:fm38pg_b6s25JvdhOAT=1s0$pG25x=LZ2rlHTszj=gN6M4zHZYr_qrB49i=pA--@WqWLIuX7o1S_SfS@2FSiUZN'
   'rC24cw3UBDZ=5qJBUMs9e$=S4Y94ni%Z8639vnrGp=0Hv4z3dNFL0fBLmQ40=EYIY:Z=SLc@QLMSt2zsss2ZXrP7j4='
   'uwGl2s-fFrf@GqS=DQqq2I0LJSsOmM%xzTjS:lzXguE3wChdMoHYtLRKPvfaPOZF2fER@j53evbKa7R%A7r4%YEkD=kicJe@SFiGtXHbKe4gCgPAYbnVn'
   'UG37U6KKua2bgc:IHzRs7BnB6FD:2Mt5Cc5NdlsW%$1tyvnfz7S27FvNkroXwAW:mBZLA1@qa9WnDbHCDmQmfPMC9z-Eq6QT0jhhPpqyymaD:R02ghwYo%yx7SAaaq-:x33LYpei$5g8DMl3C'
   'y2vjek0FE1PDJC0qpfnN:x8k2wCFZ9xiUF2ege=JnP98R%wxjKkdfEiLWvQzmnW'
   '8-HCSgH5B%K7P8_jaVtQhBXpBk:pE-$P7ts58U0J@iR9YZntMPl7j$s62yAJO@_9eanFPS54b=UTw$94C-t=HLxT8n6o9P=QnIxq-f1=Ne2dvhe6WbjEQtc'
   'YPPh:IFt2mtR6XWSmjHptXL_hbSYu8bMw-JP8@PNyaFkdNFsk$M=xfL6LDKCDM-mSyGA_2MBwZ8Dr4=R1D%7-mCaaKGxb990jzaagRktDTyp'
   '9hD2ApKa_t_7x-a@GCG28kY:7$M@5udI1myQ$x5udtggvagmCQcq9QXWRC5hoB0o-_zHQUqZI5rMcz_kbMgvN5jr63LeYA4Cj-c6F5Ugmx6DgVf@2Jqm%MafecpgooqreJ53P-QTS'
  )

  # Now create files with all those names in the same parent directory.
  # It should not fail since a 4K leaf has enough space for them.
  for name in "${names[@]}"; do
       touch $MNT/$name
  done

  # Now add one more file name that causes a crc32c hash collision.
  # This should fail, but it should not turn the filesystem into RO mode
  # (which could be exploited by malicious users) due to a transaction
  # abort.
  touch $MNT/'W6tIm-VK2@BGC@IBfcgg6j_p:pxp_QUqtWpGD5Ok_GmijKOJJt'

  # Check that we are able to create another file, with a name that does not cause
  # a crc32c hash collision.
  echo -n "hello world" > $MNT/baz

  # Unmount and mount again, verify file baz exists and with the right content.
  umount $MNT
  mount $DEV $MNT
  echo "File baz content: $(cat $MNT/baz)"

  umount $MNT

When running the reproducer:

  $ ./exploit-hash-collisions.sh
  (...)
  touch: cannot touch '/mnt/sdi/W6tIm-VK2@BGC@IBfcgg6j_p:pxp_QUqtWpGD5Ok_GmijKOJJt': Value too large for defined data type
  ./exploit-hash-collisions.sh: line 57: /mnt/sdi/baz: Read-only file system
  cat: /mnt/sdi/baz: No such file or directory
  File baz content:

And the transaction abort stack trace in dmesg/syslog:

  $ dmesg
  (...)
  [758240.509761] ------------[ cut here ]------------
  [758240.510668] BTRFS: Transaction aborted (error -75)
  [758240.511577] WARNING: fs/btrfs/inode.c:6854 at btrfs_create_new_inode+0x805/0xb50 [btrfs], CPU#6: touch/888644
  [758240.513513] Modules linked in: btrfs dm_zero (...)
  [758240.523221] CPU: 6 UID: 0 PID: 888644 Comm: touch Tainted: G        W           6.19.0-rc8-btrfs-next-225+ #1 PREEMPT(full)
  [758240.524621] Tainted: [W]=WARN
  [758240.525037] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.2-0-gea1b7a073390-prebuilt.qemu.org 04/01/2014
  [758240.526331] RIP: 0010:btrfs_create_new_inode+0x80b/0xb50 [btrfs]
  [758240.527093] Code: 0f 82 cf (...)
  [758240.529211] RSP: 0018:ffffce64418fbb48 EFLAGS: 00010292
  [758240.529935] RAX: 00000000ffffffd3 RBX: 0000000000000000 RCX: 00000000ffffffb5
  [758240.531040] RDX: 0000000d04f33e06 RSI: 00000000ffffffb5 RDI: ffffffffc0919dd0
  [758240.531920] RBP: ffffce64418fbc10 R08: 0000000000000000 R09: 00000000ffffffb5
  [758240.532928] R10: 0000000000000000 R11: ffff8e52c0000000 R12: ffff8e53eee7d0f0
  [758240.533818] R13: ffff8e57f70932a0 R14: ffff8e5417629568 R15: 0000000000000000
  [758240.534664] FS:  00007f1959a2a740(0000) GS:ffff8e5b27cae000(0000) knlGS:0000000000000000
  [758240.535821] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  [758240.536644] CR2: 00007f1959b10ce0 CR3: 000000012a2cc005 CR4: 0000000000370ef0
  [758240.537517] Call Trace:
  [758240.537828]  <TASK>
  [758240.538099]  btrfs_create_common+0xbf/0x140 [btrfs]
  [758240.538760]  path_openat+0x111a/0x15b0
  [758240.539252]  do_filp_open+0xc2/0x170
  [758240.539699]  ? preempt_count_add+0x47/0xa0
  [758240.540200]  ? __virt_addr_valid+0xe4/0x1a0
  [758240.540800]  ? __check_object_size+0x1b3/0x230
  [758240.541661]  ? alloc_fd+0x118/0x180
  [758240.542315]  do_sys_openat2+0x70/0xd0
  [758240.543012]  __x64_sys_openat+0x50/0xa0
  [758240.543723]  do_syscall_64+0x50/0xf20
  [758240.544462]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
  [758240.545397] RIP: 0033:0x7f1959abc687
  [758240.546019] Code: 48 89 fa (...)
  [758240.548522] RSP: 002b:00007ffe16ff8690 EFLAGS: 00000202 ORIG_RAX: 0000000000000101
  [758240.566278] RAX: ffffffffffffffda RBX: 00007f1959a2a740 RCX: 00007f1959abc687
  [758240.567068] RDX: 0000000000000941 RSI: 00007ffe16ffa333 RDI: ffffffffffffff9c
  [758240.567860] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000
  [758240.568707] R10: 00000000000001b6 R11: 0000000000000202 R12: 0000561eec7c4b90
  [758240.569712] R13: 0000561eec7c311f R14: 00007ffe16ffa333 R15: 0000000000000000
  [758240.570758]  </TASK>
  [758240.571040] ---[ end trace 0000000000000000 ]---
  [758240.571681] BTRFS: error (device sdi state A) in btrfs_create_new_inode:6854: errno=-75 unknown
  [758240.572899] BTRFS info (device sdi state EA): forced readonly

Fix this by checking for hash collision, and if the adding a new name is
possible, early in btrfs_create_new_inode() before we do any tree updates,
so that we don't need to abort the transaction if we cannot add the new
name due to the leaf size limit.

A test case for fstests will be sent soon.

Fixes: caae78e032 ("btrfs: move common inode creation code into btrfs_create_new_inode()")
CC: stable@vger.kernel.org # 6.1+
Reviewed-by: Boris Burkov <boris@bur.io>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-03-03 17:03:59 +01:00
Mark Harmstone
ae1238b77f btrfs: read key again after incrementing slot in move_existing_remaps()
Fix move_existing_remaps() so that if we increment the slot because the
key we encounter isn't a REMAP_BACKREF, we don't reuse the objectid and
offset of the old item.

Link: https://lore.kernel.org/linux-btrfs/20260125123908.2096548-1-clm@meta.com/
Reported-by: Chris Mason <clm@fb.com>
Fixes: bbea42dfb9 ("btrfs: move existing remaps before relocating block group")
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Mark Harmstone <mark@harmstone.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-03-03 17:03:59 +01:00
Bart Van Assche
b2840e3312 btrfs: add missing RCU unlock in error path in try_release_subpage_extent_buffer()
Call rcu_read_lock() before exiting the loop in
try_release_subpage_extent_buffer() because there is a rcu_read_unlock()
call past the loop.

This has been detected by the Clang thread-safety analyzer.

Fixes: ad580dfa38 ("btrfs: fix subpage deadlock in try_release_subpage_extent_buffer()")
CC: stable@vger.kernel.org # 6.18+
Reviewed-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: Boris Burkov <boris@bur.io>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-03-03 17:03:51 +01:00
Heiko Carstens
674c5ff0f4 s390/stackleak: Fix __stackleak_poison() inline assembly constraint
The __stackleak_poison() inline assembly comes with a "count" operand where
the "d" constraint is used. "count" is used with the exrl instruction and
"d" means that the compiler may allocate any register from 0 to 15.

If the compiler would allocate register 0 then the exrl instruction would
not or the value of "count" into the executed instruction - resulting in a
stackframe which is only partially poisoned.

Use the correct "a" constraint, which excludes register 0 from register
allocation.

Fixes: 2a405f6bb3 ("s390/stackleak: provide fast __stackleak_poison() implementation")
Cc: stable@vger.kernel.org
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Reviewed-by: Vasily Gorbik <gor@linux.ibm.com>
Link: https://lore.kernel.org/r/20260302133500.1560531-4-hca@linux.ibm.com
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
2026-03-03 16:42:14 +01:00
Heiko Carstens
87ff6da300 s390/xor: Improve inline assembly constraints
The inline assembly constraint for the "bytes" operand is "d" for all xor()
inline assemblies. "d" means that any register from 0 to 15 can be used. If
the compiler would use register 0 then the exrl instruction would not or
the value of "bytes" into the executed instruction - resulting in an
incorrect result.

However all the xor() inline assemblies make hard-coded use of register 0,
and it is correctly listed in the clobber list, so that this cannot happen.

Given that this is quite subtle use the better "a" constraint, which
excludes register 0 from register allocation in any case.

Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Reviewed-by: Vasily Gorbik <gor@linux.ibm.com>
Link: https://lore.kernel.org/r/20260302133500.1560531-3-hca@linux.ibm.com
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
2026-03-03 16:42:14 +01:00
Heiko Carstens
f775276edc s390/xor: Fix xor_xc_2() inline assembly constraints
The inline assembly constraints for xor_xc_2() are incorrect. "bytes",
"p1", and "p2" are input operands, while all three of them are modified
within the inline assembly. Given that the function consists only of this
inline assembly it seems unlikely that this may cause any problems, however
fix this in any case.

Fixes: 2cfc5f9ce7 ("s390/xor: optimized xor routing using the XC instruction")
Cc: stable@vger.kernel.org
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Reviewed-by: Vasily Gorbik <gor@linux.ibm.com>
Link: https://lore.kernel.org/r/20260302133500.1560531-2-hca@linux.ibm.com
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
2026-03-03 16:42:14 +01:00
Vasily Gorbik
5f25805303 s390/xor: Fix xor_xc_5() inline assembly
xor_xc_5() contains a larl 1,2f that is not used by the asm and is not
declared as a clobber. This can corrupt a compiler-allocated value in %r1
and lead to miscompilation. Remove the instruction.

Fixes: 745600ed69 ("s390/lib: Use exrl instead of ex in xor functions")
Cc: stable@vger.kernel.org
Reviewed-by: Juergen Christ <jchrist@linux.ibm.com>
Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
Reviewed-by: Sven Schnelle <svens@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
2026-03-03 16:42:14 +01:00
Boris Burkov
5131fa077f btrfs: set BTRFS_ROOT_ORPHAN_CLEANUP during subvol create
We have recently observed a number of subvolumes with broken dentries.
ls-ing the parent dir looks like:

drwxrwxrwt 1 root root 16 Jan 23 16:49 .
drwxr-xr-x 1 root root 24 Jan 23 16:48 ..
d????????? ? ?    ?     ?            ? broken_subvol

and similarly stat-ing the file fails.

In this state, deleting the subvol fails with ENOENT, but attempting to
create a new file or subvol over it errors out with EEXIST and even
aborts the fs. Which leaves us a bit stuck.

dmesg contains a single notable error message reading:
"could not do orphan cleanup -2"

2 is ENOENT and the error comes from the failure handling path of
btrfs_orphan_cleanup(), with the stack leading back up to
btrfs_lookup().

btrfs_lookup
btrfs_lookup_dentry
btrfs_orphan_cleanup // prints that message and returns -ENOENT

After some detailed inspection of the internal state, it became clear
that:
- there are no orphan items for the subvol
- the subvol is otherwise healthy looking, it is not half-deleted or
  anything, there is no drop progress, etc.
- the subvol was created a while ago and does the meaningful first
  btrfs_orphan_cleanup() call that sets BTRFS_ROOT_ORPHAN_CLEANUP much
  later.
- after btrfs_orphan_cleanup() fails, btrfs_lookup_dentry() returns -ENOENT,
  which results in a negative dentry for the subvolume via
  d_splice_alias(NULL, dentry), leading to the observed behavior. The
  bug can be mitigated by dropping the dentry cache, at which point we
  can successfully delete the subvolume if we want.

i.e.,
btrfs_lookup()
  btrfs_lookup_dentry()
    if (!sb_rdonly(inode->vfs_inode)->vfs_inode)
    btrfs_orphan_cleanup(sub_root)
      test_and_set_bit(BTRFS_ROOT_ORPHAN_CLEANUP)
      btrfs_search_slot() // finds orphan item for inode N
      ...
      prints "could not do orphan cleanup -2"
  if (inode == ERR_PTR(-ENOENT))
    inode = NULL;
  return d_splice_alias(NULL, dentry) // NEGATIVE DENTRY for valid subvolume

btrfs_orphan_cleanup() does test_and_set_bit(BTRFS_ROOT_ORPHAN_CLEANUP)
on the root when it runs, so it cannot run more than once on a given
root, so something else must run concurrently. However, the obvious
routes to deleting an orphan when nlinks goes to 0 should not be able to
run without first doing a lookup into the subvolume, which should run
btrfs_orphan_cleanup() and set the bit.

The final important observation is that create_subvol() calls
d_instantiate_new() but does not set BTRFS_ROOT_ORPHAN_CLEANUP, so if
the dentry cache gets dropped, the next lookup into the subvolume will
make a real call into btrfs_orphan_cleanup() for the first time. This
opens up the possibility of concurrently deleting the inode/orphan items
but most typical evict() paths will be holding a reference on the parent
dentry (child dentry holds parent->d_lockref.count via dget in
d_alloc(), released in __dentry_kill()) and prevent the parent from
being removed from the dentry cache.

The one exception is delayed iputs. Ordered extent creation calls
igrab() on the inode. If the file is unlinked and closed while those
refs are held, iput() in __dentry_kill() decrements i_count but does
not trigger eviction (i_count > 0). The child dentry is freed and the
subvol dentry's d_lockref.count drops to 0, making it evictable while
the inode is still alive.

Since there are two races (the race between writeback and unlink and
the race between lookup and delayed iputs), and there are too many moving
parts, the following three diagrams show the complete picture.
(Only the second and third are races)

Phase 1:
Create Subvol in dentry cache without BTRFS_ROOT_ORPHAN_CLEANUP set

btrfs_mksubvol()
  lookup_one_len()
    __lookup_slow()
      d_alloc_parallel()
        __d_alloc() // d_lockref.count = 1
  create_subvol(dentry)
    // doesn't touch the bit..
    d_instantiate_new(dentry, inode) // dentry in cache with d_lockref.count == 1

Phase 2:
Create a delayed iput for a file in the subvol but leave the subvol in
state where its dentry can be evicted (d_lockref.count == 0)

T1 (task)                    T2 (writeback)                   T3 (OE workqueue)

write() // dirty pages
                              btrfs_writepages()
                                btrfs_run_delalloc_range()
                                  cow_file_range()
                                    btrfs_alloc_ordered_extent()
                                      igrab() // i_count: 1 -> 2
btrfs_unlink_inode()
  btrfs_orphan_add()
close()
  __fput()
    dput()
      finish_dput()
        __dentry_kill()
          dentry_unlink_inode()
            iput() // 2 -> 1
          --parent->d_lockref.count // 1 -> 0; evictable
                                                                finish_ordered_fn()
                                                                  btrfs_finish_ordered_io()
                                                                    btrfs_put_ordered_extent()
                                                                      btrfs_add_delayed_iput()

Phase 3:
Once the delayed iput is pending and the subvol dentry is evictable,
the shrinker can free it, causing the next lookup to go through
btrfs_lookup() and call btrfs_orphan_cleanup() for the first time.
If the cleaner kthread processes the delayed iput concurrently, the
two race:

  T1 (shrinker)              T2 (cleaner kthread)                          T3 (lookup)

  super_cache_scan()
    prune_dcache_sb()
      __dentry_kill()
      // subvol dentry freed
                              btrfs_run_delayed_iputs()
                                iput()  // i_count -> 0
                                  evict()  // sets I_FREEING
                                    btrfs_evict_inode()
                                      // truncation loop
                                                                            btrfs_lookup()
                                                                              btrfs_lookup_dentry()
                                                                                btrfs_orphan_cleanup()
                                                                                  // first call (bit never set)
                                                                                  btrfs_iget()
                                                                                    // blocks on I_FREEING

                                      btrfs_orphan_del()
                                      // inode freed
                                                                                    // returns -ENOENT
                                                                                  btrfs_del_orphan_item()
                                                                                    // -ENOENT
                                                                                // "could not do orphan cleanup -2"
                                                                            d_splice_alias(NULL, dentry)
                                                                            // negative dentry for valid subvol

The most straightforward fix is to ensure the invariant that a dentry
for a subvolume can exist if and only if that subvolume has
BTRFS_ROOT_ORPHAN_CLEANUP set on its root (and is known to have no
orphans or ran btrfs_orphan_cleanup()).

Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Boris Burkov <boris@bur.io>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-03-03 16:25:59 +01:00
Johannes Thumshirn
17da926ca8 btrfs: zoned: move btrfs_zoned_reserve_data_reloc_bg() after kthread start
btrfs_zoned_reserve_data_reloc_bg() is called on each mount of a file
system and allocates a new block-group, to assign it to be the dedicated
relocation target, if no pre-existing usable block-group for this task is
found.

If for some reason the transaction is aborted, btrfs_end_transaction()
will wake up the transaction kthread. But the transaction kthread is not
yet initialized at the time btrfs_zoned_reserve_data_reloc_bg() is
called, leading to the following NULL-pointer dereference:

  RSP: 0018:ffffc9000c617c98 EFLAGS: 00010046
  RAX: 0000000000000000 RBX: 000000000000073c RCX: 0000000000000002
  RDX: 0000000000000001 RSI: 0000000000000003 RDI: 0000000000000001
  RBP: 0000000000000207 R08: ffffffff8223c71d R09: 0000000000000635
  R10: ffff888108588000 R11: 0000000000000003 R12: 0000000000000003
  R13: 000000000000073c R14: 0000000000000000 R15: ffff888114dd6000
  FS:  00007f2993745840(0000) GS:ffff8882b508d000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 000000000000073c CR3: 0000000121a82006 CR4: 0000000000770eb0
  PKRU: 55555554
  Call Trace:
   <TASK>
   try_to_wake_up (./include/linux/spinlock.h:557 kernel/sched/core.c:4106)
   __btrfs_end_transaction (fs/btrfs/transaction.c:1115 (discriminator 2))
   btrfs_zoned_reserve_data_reloc_bg (fs/btrfs/zoned.c:2840)
   open_ctree (fs/btrfs/disk-io.c:3588)
   btrfs_get_tree.cold (fs/btrfs/super.c:982 fs/btrfs/super.c:1944 fs/btrfs/super.c:2087 fs/btrfs/super.c:2121)
   vfs_get_tree (fs/super.c:1752)
   __do_sys_fsconfig (fs/fsopen.c:231 fs/fsopen.c:295 fs/fsopen.c:473)
   do_syscall_64 (arch/x86/entry/syscall_64.c:63 (discriminator 1) arch/x86/entry/syscall_64.c:94 (discriminator 1))
   entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:131)
  RIP: 0033:0x7f299392740e

Move the call to btrfs_zoned_reserve_data_reloc_bg() after the
transaction_kthread has been initialized to fix this problem.

Fixes: 694ce5e143 ("btrfs: zoned: reserve data_reloc block group on mount")
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-03-03 16:25:32 +01:00
Sun YangKai
b8883b61f2 btrfs: hold space_info->lock when clearing periodic reclaim ready
btrfs_set_periodic_reclaim_ready() requires space_info->lock to be held,
as enforced by lockdep_assert_held(). However, btrfs_reclaim_sweep() was
calling it after do_reclaim_sweep() returns, at which point
space_info->lock is no longer held.

Fix this by explicitly acquiring space_info->lock before clearing the
periodic reclaim ready flag in btrfs_reclaim_sweep().

Reported-by: Chris Mason <clm@meta.com>
Link: https://lore.kernel.org/linux-btrfs/20260208182556.891815-1-clm@meta.com/
Fixes: 19eff93dc7 ("btrfs: fix periodic reclaim condition")
Reviewed-by: Boris Burkov <boris@bur.io>
Signed-off-by: Sun YangKai <sunk67188@gmail.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-03-03 15:54:00 +01:00
Mark Harmstone
b85cfdf46b btrfs: print-tree: add remap tree definitions
Add the definitions for the remap tree to print-tree.c, so that we get
more useful information if a tree is dumped to dmesg.

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Mark Harmstone <mark@harmstone.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-03-03 15:53:51 +01:00
Rafael J. Wysocki
00fd9aad55 Revert "ACPI: PM: Let acpi_dev_pm_attach() skip devices without ACPI PM"
Revert commit 88fad6ce09 ("ACPI: PM: Let acpi_dev_pm_attach() skip
devices without ACPI PM") that introduced a SoundWire suspend regression
[1].

It is actually not true that the commit above doesn't make a functional
difference because acpi_subsys_suspend(), for example, may resume
devices in runtime-suspend which affects the subsequent handling of
those devices during the suspend transition.  For this reason, the
devices that were handled by the ACPI PM domain before that commit may
be handled differently now which may lead to suspend-resume issues.

Fixes: 88fad6ce09 ("ACPI: PM: Let acpi_dev_pm_attach() skip devices without ACPI PM")
Reported-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com>
Closes: https://github.com/thesofproject/linux/pull/5677#issuecomment-3984375077 [1]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Link: https://patch.msgid.link/2829615.mvXUDI8C0e@rafael.j.wysocki
2026-03-03 15:32:53 +01:00
Charles Keepax
2799018103 ASoC: SDCA: Add allocation failure check for Entity name
Currently find_sdca_entity_iot() can allocate a string for the
Entity name but it doesn't check if that allocation succeeded.
Add the missing NULL check after the allocation.

Fixes: 48fa77af2f ("ASoC: SDCA: Add terminal type into input/output widget name")
Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://patch.msgid.link/20260303141707.3841635-1-ckeepax@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-03 14:23:32 +00:00
Hou Wenlong
7271cb98e4 x86/PVH: Use boot params to pass RSDP address in start_info page
After commit e6e094e053 ("x86/acpi, x86/boot: Take RSDP address from
boot params if available"), the RSDP address can be passed in boot
params. Therefore, store the RSDP address in start_info page into boot
params in the PVH entry instead of registering a different callback.
This removes an absolute reference during the PVH entry and is more
standardized.

Signed-off-by: Hou Wenlong <houwenlong.hwl@antgroup.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Message-ID: <76675c4d49d3a8f72252076812ef8f22276230c2.1772282441.git.houwenlong.hwl@antgroup.com>
2026-03-03 15:06:19 +01:00
kexinsun
b8c460a045 x86/xen: update outdated comment
The function xen_flush_tlb_others() was renamed xen_flush_tlb_multi()
by commit 4ce94eabac ("x86/mm/tlb: Flush remote and local TLBs
concurrently").  Update the comment accordingly.

Signed-off-by: kexinsun <kexinsun@smail.nju.edu.cn>
Reviewed-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Message-ID: <20260224022424.1718-1-kexinsun@smail.nju.edu.cn>
2026-03-03 14:54:59 +01:00
David Thomson
8b57227d59 xen/acpi-processor: fix _CST detection using undersized evaluation buffer
read_acpi_id() attempts to evaluate _CST using a stack buffer of
sizeof(union acpi_object) (48 bytes), but _CST returns a nested Package
of sub-Packages (one per C-state, each containing a register descriptor,
type, latency, and power) requiring hundreds of bytes. The evaluation
always fails with AE_BUFFER_OVERFLOW.

On modern systems using FFH/MWAIT entry (where pblk is zero), this
causes the function to return before setting the acpi_id_cst_present
bit. In check_acpi_ids(), flags.power is then zero for all Phase 2 CPUs
(physical CPUs beyond dom0's vCPU count), so push_cxx_to_hypervisor() is
never called for them.

On a system with dom0_max_vcpus=2 and 8 physical CPUs, only PCPUs 0-1
receive C-state data. PCPUs 2-7 are stuck in C0/C1 idle, unable to
enter C2/C3. This costs measurable wall power (4W observed on an Intel
Core Ultra 7 265K with Xen 4.20).

The function never uses the _CST return value -- it only needs to know
whether _CST exists. Replace the broken acpi_evaluate_object() call with
acpi_has_method(), which correctly detects _CST presence using
acpi_get_handle() without any buffer allocation. This brings C-state
detection to parity with the P-state path, which already works correctly
for Phase 2 CPUs.

Fixes: 59a5680291 ("xen/acpi-processor: C and P-state driver that uploads said data to hypervisor.")
Signed-off-by: David Thomson <dt@linux-mail.net>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Message-ID: <20260224093707.19679-1-dt@linux-mail.net>
2026-03-03 14:53:46 +01:00
Hou Wenlong
63dc2c34a9 x86/xen: Build identity mapping page tables dynamically for XENPV
After commit 47ffe0578a ("x86/pvh: Add 64bit relocation page tables"),
the PVH entry uses a new set of page tables instead of the
preconstructed page tables in head64.S. Since those preconstructed page
tables are only used in XENPV now and XENPV does not actually need the
preconstructed identity page tables directly, they can be filled in
xen_setup_kernel_pagetable(). Therefore, build the identity mapping page
table dynamically to remove the preconstructed page tables and make the
code cleaner.

Signed-off-by: Hou Wenlong <houwenlong.hwl@antgroup.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Acked-by: "Borislav Petkov (AMD)" <bp@alien8.de>
Signed-off-by: Juergen Gross <jgross@suse.com>
Message-ID: <453981eae7e8158307f971d1632d5023adbe03c3.1769074722.git.houwenlong.hwl@antgroup.com>
2026-03-03 14:21:44 +01:00
Thorsten Blum
d1a196e0a6 platform/x86: dell-wmi-sysman: Don't hex dump plaintext password data
set_new_password() hex dumps the entire buffer, which contains plaintext
password data, including current and new passwords. Remove the hex dump
to avoid leaking credentials.

Fixes: e8a60aa740 ("platform/x86: Introduce support for Systems Management Driver over WMI for Dell Systems")
Cc: stable@vger.kernel.org
Signed-off-by: Thorsten Blum <thorsten.blum@linux.dev>
Link: https://patch.msgid.link/20260303113050.58127-2-thorsten.blum@linux.dev
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2026-03-03 14:45:17 +02:00
YiFei Zhu
1a86a1f7d8 net: Fix rcu_tasks stall in threaded busypoll
I was debugging a NIC driver when I noticed that when I enable
threaded busypoll, bpftrace hangs when starting up. dmesg showed:

  rcu_tasks_wait_gp: rcu_tasks grace period number 85 (since boot) is 10658 jiffies old.
  rcu_tasks_wait_gp: rcu_tasks grace period number 85 (since boot) is 40793 jiffies old.
  rcu_tasks_wait_gp: rcu_tasks grace period number 85 (since boot) is 131273 jiffies old.
  rcu_tasks_wait_gp: rcu_tasks grace period number 85 (since boot) is 402058 jiffies old.
  INFO: rcu_tasks detected stalls on tasks:
  00000000769f52cd: .N nvcsw: 2/2 holdout: 1 idle_cpu: -1/64
  task:napi/eth2-8265  state:R  running task     stack:0     pid:48300 tgid:48300 ppid:2      task_flags:0x208040 flags:0x00004000
  Call Trace:
   <TASK>
   ? napi_threaded_poll_loop+0x27c/0x2c0
   ? __pfx_napi_threaded_poll+0x10/0x10
   ? napi_threaded_poll+0x26/0x80
   ? kthread+0xfa/0x240
   ? __pfx_kthread+0x10/0x10
   ? ret_from_fork+0x31/0x50
   ? __pfx_kthread+0x10/0x10
   ? ret_from_fork_asm+0x1a/0x30
   </TASK>

The cause is that in threaded busypoll, the main loop is in
napi_threaded_poll rather than napi_threaded_poll_loop, where the
latter rarely iterates more than once within its loop. For
rcu_softirq_qs_periodic inside napi_threaded_poll_loop to report its
qs state, the last_qs must be 100ms behind, and this can't happen
because napi_threaded_poll_loop rarely iterates in threaded busypoll,
and each time napi_threaded_poll_loop is called last_qs is reset to
latest jiffies.

This patch changes so that in threaded busypoll, last_qs is saved
in the outer napi_threaded_poll, and whether busy_poll_last_qs
is NULL indicates whether napi_threaded_poll_loop is called for
busypoll. This way last_qs would not reset to latest jiffies on
each invocation of napi_threaded_poll_loop.

Fixes: c18d4b190a ("net: Extend NAPI threaded polling to allow kthread based busy polling")
Cc: stable@vger.kernel.org
Signed-off-by: YiFei Zhu <zhuyifei@google.com>
Reviewed-by: Samiullah Khawaja <skhawaja@google.com>
Link: https://patch.msgid.link/20260227221937.1060857-1-zhuyifei@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-03 13:44:28 +01:00
Danilo Krummrich
9de68394a6 Revert "driver core: enforce device_lock for driver_match_device()"
This reverts commit dc23806a7c ("driver core: enforce device_lock for
driver_match_device()") and commit 289b14592c ("driver core: fix
inverted "locked" suffix of driver_match_device()").

While technically correct, there is a major downside to this approach:

When a device is already present in the system and a driver is
registered on the same bus, we iterate over all devices registered on
this bus to see if one of them matches. If we come across an already
bound one where the corresponding driver crashed while holding the
device lock (e.g. in probe()) we can't make any progress anymore.

However, drivers are typically the least tested code in the kernel and
hence it is a case that is likely to happen regularly. Besides hurting
developer ergonomics, it potentially decreases chances of shutting
things down cleanly and obtaining logs in production environments as
well [1].

This came up in the context of a firewire bug, which only in combination
with the reverted commit, caused the machine to hang [2]. Additionally,
it was observed in [3].

Thus, revert commit dc23806a7c ("driver core: enforce device_lock for
driver_match_device()") and add a brief note clarifying that an
implementer of struct bus_type must not expect match() to be called with
the device lock held.

Link: https://lore.kernel.org/driver-core/DGRGTIRHA62X.3RY09D9SOK77P@kernel.org/ [1]
Link: https://lore.kernel.org/all/67f655bb-4d81-4609-b008-68d200255dd2@davidgow.net/ [2]
Link: https://lore.kernel.org/lkml/CALbr=LZ4v7N=tO1vgOsyj9AS+XuNbn6kG-QcF+PacdMjSo0iyw@mail.gmail.com/ [3]
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Closes: https://lore.kernel.org/driver-core/CAHk-=wgJ_L1C=HjcYJotg_zrZEmiLFJaoic+PWthjuQrutrfJw@mail.gmail.com/
Reviewed-by: Gui-Dong Han <hanguidong02@gmail.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://patch.msgid.link/20260302002545.19389-1-dakr@kernel.org
[ Add additional Link: reference. - Danilo ]
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
2026-03-03 13:12:42 +01:00
Allison Henderson
6a877ececd net/rds: Fix circular locking dependency in rds_tcp_tune
syzbot reported a circular locking dependency in rds_tcp_tune() where
sk_net_refcnt_upgrade() is called while holding the socket lock:

======================================================
WARNING: possible circular locking dependency detected
======================================================
kworker/u10:8/15040 is trying to acquire lock:
ffffffff8e9aaf80 (fs_reclaim){+.+.}-{0:0},
at: __kmalloc_cache_noprof+0x4b/0x6f0

but task is already holding lock:
ffff88805a3c1ce0 (k-sk_lock-AF_INET6){+.+.}-{0:0},
at: rds_tcp_tune+0xd7/0x930

The issue occurs because sk_net_refcnt_upgrade() performs memory
allocation (via get_net_track() -> ref_tracker_alloc()) while the
socket lock is held, creating a circular dependency with fs_reclaim.

Fix this by moving sk_net_refcnt_upgrade() outside the socket lock
critical section. This is safe because the fields modified by the
sk_net_refcnt_upgrade() call (sk_net_refcnt, ns_tracker) are not
accessed by any concurrent code path at this point.

v2:
  - Corrected fixes tag
  - check patch line wrap nits
  - ai commentary nits

Reported-by: syzbot+2e2cf5331207053b8106@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=2e2cf5331207053b8106
Fixes: 3a58f13a88 ("net: rds: acquire refcount on TCP sockets")
Signed-off-by: Allison Henderson <achender@kernel.org>
Link: https://patch.msgid.link/20260227202336.167757-1-achender@kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-03 12:57:06 +01:00
Eric Dumazet
710f5c7658 indirect_call_wrapper: do not reevaluate function pointer
We have an increasing number of READ_ONCE(xxx->function)
combined with INDIRECT_CALL_[1234]() helpers.

Unfortunately this forces INDIRECT_CALL_[1234]() to read
xxx->function many times, which is not what we wanted.

Fix these macros so that xxx->function value is not reloaded.

$ scripts/bloat-o-meter -t vmlinux.0 vmlinux
add/remove: 0/0 grow/shrink: 1/65 up/down: 122/-1084 (-962)
Function                                     old     new   delta
ip_push_pending_frames                        59     181    +122
ip6_finish_output                            687     681      -6
__udp_enqueue_schedule_skb                  1078    1072      -6
ioam6_output                                2319    2312      -7
xfrm4_rcv_encap_finish2                       64      56      -8
xfrm4_output                                 297     289      -8
vrf_ip_local_out                             278     270      -8
vrf_ip6_local_out                            278     270      -8
seg6_input_finish                             64      56      -8
rpl_output                                   700     692      -8
ipmr_forward_finish                          124     116      -8
ip_forward_finish                            143     135      -8
ip6mr_forward2_finish                        100      92      -8
ip6_forward_finish                            73      65      -8
input_action_end_bpf                        1091    1083      -8
dst_input                                     52      44      -8
__xfrm6_output                               801     793      -8
__xfrm4_output                                83      75      -8
bpf_input                                    500     491      -9
__tcp_check_space                            530     521      -9
input_action_end_dt6                         291     280     -11
vti6_tnl_xmit                               1634    1622     -12
bpf_xmit                                    1203    1191     -12
rpl_input                                    497     483     -14
rawv6_send_hdrinc                           1355    1341     -14
ndisc_send_skb                              1030    1016     -14
ipv6_srh_rcv                                1377    1363     -14
ip_send_unicast_reply                       1253    1239     -14
ip_rcv_finish                                226     212     -14
ip6_rcv_finish                               300     286     -14
input_action_end_x_core                      205     191     -14
input_action_end_x                           355     341     -14
input_action_end_t                           205     191     -14
input_action_end_dx6_finish                  127     113     -14
input_action_end_dx4_finish                  373     359     -14
input_action_end_dt4                         426     412     -14
input_action_end_core                        186     172     -14
input_action_end_b6_encap                    292     278     -14
input_action_end_b6                          198     184     -14
igmp6_send                                  1332    1318     -14
ip_sublist_rcv                               864     848     -16
ip6_sublist_rcv                             1091    1075     -16
ipv6_rpl_srh_rcv                            1937    1920     -17
xfrm_policy_queue_process                   1246    1228     -18
seg6_output_core                             903     885     -18
mld_sendpack                                 856     836     -20
NF_HOOK                                      756     736     -20
vti_tunnel_xmit                             1447    1426     -21
input_action_end_dx6                         664     642     -22
input_action_end                            1502    1480     -22
sock_sendmsg_nosec                           134     111     -23
ip6mr_forward2                               388     364     -24
sock_recvmsg_nosec                           134     109     -25
seg6_input_core                              836     810     -26
ip_send_skb                                  172     146     -26
ip_local_out                                 140     114     -26
ip6_local_out                                140     114     -26
__sock_sendmsg                               162     136     -26
__ip_queue_xmit                             1196    1170     -26
__ip_finish_output                           405     379     -26
ipmr_queue_fwd_xmit                          373     346     -27
sock_recvmsg                                 173     145     -28
ip6_xmit                                    1635    1607     -28
xfrm_output_resume                          1418    1389     -29
ip_build_and_send_pkt                        625     591     -34
dst_output                                   504     432     -72
Total: Before=25217686, After=25216724, chg -0.00%

Fixes: 283c16a2df ("indirect call wrappers: helpers to speed-up indirect calls of builtin")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20260227172603.1700433-1-edumazet@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-03 12:41:29 +01:00
Lorenzo Bianconi
4e10a730d1 wifi: mt76: Fix possible oob access in mt76_connac2_mac_write_txwi_80211()
Check frame length before accessing the mgmt fields in
mt76_connac2_mac_write_txwi_80211 in order to avoid a possible oob
access.

Fixes: 577dbc6c65 ("mt76: mt7915: enable offloading of sequence number assignment")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://patch.msgid.link/20260226-mt76-addba-req-oob-access-v1-3-b0f6d1ad4850@kernel.org
[fix check to also cover mgmt->u.action.u.addba_req.capab,
correct Fixes tag]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2026-03-03 12:13:36 +01:00
Lorenzo Bianconi
c41a9abd6a wifi: mt76: mt7925: Fix possible oob access in mt7925_mac_write_txwi_80211()
Check frame length before accessing the mgmt fields in
mt7925_mac_write_txwi_80211 in order to avoid a possible oob access.

Fixes: c948b5da6b ("wifi: mt76: mt7925: add Mediatek Wi-Fi7 driver for mt7925 chips")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://patch.msgid.link/20260226-mt76-addba-req-oob-access-v1-2-b0f6d1ad4850@kernel.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2026-03-03 12:09:45 +01:00
Lorenzo Bianconi
6086284630 wifi: mt76: mt7996: Fix possible oob access in mt7996_mac_write_txwi_80211()
Check frame length before accessing the mgmt fields in
mt7996_mac_write_txwi_80211 in order to avoid a possible oob access.

Fixes: 98686cd216 ("wifi: mt76: mt7996: add driver for MediaTek Wi-Fi 7 (802.11be) devices")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://patch.msgid.link/20260226-mt76-addba-req-oob-access-v1-1-b0f6d1ad4850@kernel.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2026-03-03 12:09:45 +01:00
Johannes Berg
9003a0e3b6 Merge tag 'ath-current-20260302' of git://git.kernel.org/pub/scm/linux/kernel/git/ath/ath
Jeff Johnson says:
==================
ath.git update for v7.0-rc3

Fix issues with ath12k station statistics requests.
==================

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2026-03-03 12:05:21 +01:00
Bart Van Assche
72c6df8f28 wifi: wlcore: Fix a locking bug
Make sure that wl->mutex is locked before it is unlocked. This has been
detected by the Clang thread-safety analyzer.

Fixes: 45aa7f071b ("wlcore: Use generic runtime pm calls for wowlan elp configuration")
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://patch.msgid.link/20260223220102.2158611-26-bart.vanassche@linux.dev
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2026-03-03 12:02:05 +01:00
Bart Van Assche
d98c24617a wifi: cw1200: Fix locking in error paths
cw1200_wow_suspend() must only return with priv->conf_mutex locked if it
returns zero. This mutex must be unlocked if an error is returned. Add
mutex_unlock() calls to the error paths from which that call is missing.
This has been detected by the Clang thread-safety analyzer.

Fixes: a910e4a94f ("cw1200: add driver for the ST-E CW1100 & CW1200 WLAN chipsets")
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://patch.msgid.link/20260223220102.2158611-25-bart.vanassche@linux.dev
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2026-03-03 12:00:51 +01:00
Paolo Abeni
699f3b2e51 Merge branch 'avoid-compiler-and-iq-oq-reordering'
Vimlesh Kumar says:

====================
avoid compiler and IQ/OQ reordering

Utilize READ_ONCE and WRITE_ONCE APIs to prevent compiler
optimization and reordering. Ensure IO queue OUT/IN_CNT
registers are flushed. Relocate IQ/OQ IN/OUT_CNTS updates
to occur before NAPI completion, and replace napi_complete
with napi_complete_done.
====================

Link: https://patch.msgid.link/20260227091402.1773833-1-vimleshk@marvell.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-03 11:34:22 +01:00
Vimlesh Kumar
6c73126ecd octeon_ep_vf: avoid compiler and IQ/OQ reordering
Utilize READ_ONCE and WRITE_ONCE APIs for IO queue Tx/Rx
variable access to prevent compiler optimization and reordering.
Additionally, ensure IO queue OUT/IN_CNT registers are flushed
by performing a read-back after writing.

The compiler could reorder reads/writes to pkts_pending, last_pkt_count,
etc., causing stale values to be used when calculating packets to process
or register updates to send to hardware. The Octeon hardware requires a
read-back after writing to OUT_CNT/IN_CNT registers to ensure the write
has been flushed through any posted write buffers before the interrupt
resend bit is set. Without this, we have observed cases where the hardware
didn't properly update its internal state.

wmb/rmb only provides ordering guarantees but doesn't prevent the compiler
from performing optimizations like caching in registers, load tearing etc.

Fixes: 1cd3b40797 ("octeon_ep_vf: add Tx/Rx processing and interrupt support")
Signed-off-by: Sathesh Edara <sedara@marvell.com>
Signed-off-by: Shinas Rasheed <srasheed@marvell.com>
Signed-off-by: Vimlesh Kumar <vimleshk@marvell.com>
Link: https://patch.msgid.link/20260227091402.1773833-5-vimleshk@marvell.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-03 11:34:20 +01:00
Vimlesh Kumar
2ae7d20fb2 octeon_ep_vf: Relocate counter updates before NAPI
Relocate IQ/OQ IN/OUT_CNTS updates to occur before NAPI completion.
Moving the IQ/OQ counter updates before napi_complete_done ensures
1. Counter registers are updated before re-enabling interrupts.
2. Prevents a race where new packets arrive but counters aren't properly
   synchronized.

Fixes: 1cd3b40797 ("octeon_ep_vf: add Tx/Rx processing and interrupt support")
Signed-off-by: Sathesh Edara <sedara@marvell.com>
Signed-off-by: Shinas Rasheed <srasheed@marvell.com>
Signed-off-by: Vimlesh Kumar <vimleshk@marvell.com>
Link: https://patch.msgid.link/20260227091402.1773833-4-vimleshk@marvell.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-03 11:34:20 +01:00
Vimlesh Kumar
43b3160cb6 octeon_ep: avoid compiler and IQ/OQ reordering
Utilize READ_ONCE and WRITE_ONCE APIs for IO queue Tx/Rx
variable access to prevent compiler optimization and reordering.
Additionally, ensure IO queue OUT/IN_CNT registers are flushed
by performing a read-back after writing.

The compiler could reorder reads/writes to pkts_pending, last_pkt_count,
etc., causing stale values to be used when calculating packets to process
or register updates to send to hardware. The Octeon hardware requires a
read-back after writing to OUT_CNT/IN_CNT registers to ensure the write
has been flushed through any posted write buffers before the interrupt
resend bit is set. Without this, we have observed cases where the hardware
didn't properly update its internal state.

wmb/rmb only provides ordering guarantees but doesn't prevent the compiler
from performing optimizations like caching in registers, load tearing etc.

Fixes: 37d79d0596 ("octeon_ep: add Tx/Rx processing and interrupt support")
Signed-off-by: Sathesh Edara <sedara@marvell.com>
Signed-off-by: Shinas Rasheed <srasheed@marvell.com>
Signed-off-by: Vimlesh Kumar <vimleshk@marvell.com>
Link: https://patch.msgid.link/20260227091402.1773833-3-vimleshk@marvell.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-03 11:34:20 +01:00
Vimlesh Kumar
18c04a808c octeon_ep: Relocate counter updates before NAPI
Relocate IQ/OQ IN/OUT_CNTS updates to occur before NAPI completion,
and replace napi_complete with napi_complete_done.

Moving the IQ/OQ counter updates before napi_complete_done ensures
1. Counter registers are updated before re-enabling interrupts.
2. Prevents a race where new packets arrive but counters aren't properly
   synchronized.
napi_complete_done (vs napi_complete) allows for better
interrupt coalescing.

Fixes: 37d79d0596 ("octeon_ep: add Tx/Rx processing and interrupt support")
Signed-off-by: Sathesh Edara <sedara@marvell.com>
Signed-off-by: Shinas Rasheed <srasheed@marvell.com>
Signed-off-by: Vimlesh Kumar <vimleshk@marvell.com>
Link: https://patch.msgid.link/20260227091402.1773833-2-vimleshk@marvell.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-03 11:34:20 +01:00
Paolo Abeni
210fd8f408 Merge branch 'bonding-fix-missing-xdp-compat-check-on-xmit_hash_policy-change'
Jiayuan Chen says:

====================
bonding: fix missing XDP compat check on xmit_hash_policy change

syzkaller reported a bug https://syzkaller.appspot.com/bug?extid=5a287bcdc08104bc3132

When a bond device is in 802.3ad or balance-xor mode, XDP is supported
only when xmit_hash_policy != vlan+srcmac.  This constraint is enforced
in bond_option_mode_set() via bond_xdp_check(), which prevents switching
to an XDP-incompatible mode while a program is loaded.  However, the
symmetric path -- changing xmit_hash_policy while XDP is loaded -- had
no such guard in bond_option_xmit_hash_policy_set().

This means the following sequence silently creates an inconsistent state:

  1. Create a bond in 802.3ad mode with xmit_hash_policy=layer2+3.
  2. Attach a native XDP program to the bond.
  3. Change xmit_hash_policy to vlan+srcmac (no error, not checked).

Now bond->xdp_prog is set but bond_xdp_check() returns false for the
same device.  When the bond is later torn down (e.g. netns deletion),
dev_xdp_uninstall() calls bond_xdp_set(dev, NULL) to remove the
program, which hits the bond_xdp_check() guard and returns -EOPNOTSUPP,
triggering a kernel WARNING:

  bond1 (unregistering): Error: No native XDP support for the current bonding mode
  ------------[ cut here ]------------
  dev_xdp_install(dev, mode, bpf_op, NULL, 0, NULL)
  WARNING: net/core/dev.c:10361 at dev_xdp_uninstall net/core/dev.c:10361 [inline], CPU#0: kworker/u8:22/11031
  Modules linked in:
  CPU: 0 UID: 0 PID: 11031 Comm: kworker/u8:22 Not tainted syzkaller #0 PREEMPT(full)
  Workqueue: netns cleanup_net
  RIP: 0010:dev_xdp_uninstall net/core/dev.c:10361 [inline]
  RIP: 0010:unregister_netdevice_many_notify+0x1efd/0x2370 net/core/dev.c:12393
  RSP: 0018:ffffc90003b2f7c0 EFLAGS: 00010293
  RAX: ffffffff8971e99c RBX: ffff888052f84c40 RCX: ffff88807896bc80
  RDX: 0000000000000000 RSI: 00000000ffffffa1 RDI: 0000000000000000
  RBP: ffffc90003b2f930 R08: ffffc90003b2f207 R09: 1ffff92000765e40
  R10: dffffc0000000000 R11: fffff52000765e41 R12: 00000000ffffffa1
  R13: ffff888052f84c38 R14: 1ffff1100a5f0988 R15: ffffc9000df67000
  FS:  0000000000000000(0000) GS:ffff8881254ae000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 00007f60871d5d58 CR3: 000000006c41c000 CR4: 00000000003526f0
  Call Trace:
   <TASK>
   ops_exit_rtnl_list net/core/net_namespace.c:187 [inline]
   ops_undo_list+0x3d3/0x940 net/core/net_namespace.c:248
   cleanup_net+0x56b/0x800 net/core/net_namespace.c:704
   process_one_work kernel/workqueue.c:3275 [inline]
   process_scheduled_works+0xaec/0x17a0 kernel/workqueue.c:3358
   worker_thread+0xa50/0xfc0 kernel/workqueue.c:3439
   kthread+0x388/0x470 kernel/kthread.c:467
   ret_from_fork+0x51e/0xb90 arch/x86/kernel/process.c:158
   ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245
   </TASK>

Beyond the WARNING itself, when dev_xdp_install() fails during
dev_xdp_uninstall(), bond_xdp_set() returns early without calling
bpf_prog_put() on the old program.  dev_xdp_uninstall() then releases
only the reference held by dev->xdp_state[], while the reference held
by bond->xdp_prog is never dropped, leaking the struct bpf_prog.

The fix refactors the core logic of bond_xdp_check() into a new helper
__bond_xdp_check_mode(mode, xmit_policy) that takes both parameters
explicitly, avoiding the need to read them from the bond struct.
bond_xdp_check() becomes a thin wrapper around it.
bond_option_xmit_hash_policy_set() then uses __bond_xdp_check_mode()
directly, passing the candidate xmit_policy before it is committed,
mirroring exactly what bond_option_mode_set() already does for mode
changes.

Patch 1 adds the kernel fix.
Patch 2 adds a selftest that reproduces the WARNING by attaching native
XDP to a bond in 802.3ad mode, then attempting to change xmit_hash_policy
to vlan+srcmac -- verifying the change is rejected with the fix applied.
====================

Link: https://patch.msgid.link/20260226080306.98766-1-jiayuan.chen@linux.dev
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-03 10:47:44 +01:00
Jiayuan Chen
181cafbd8a selftests/bpf: add test for xdp_bonding xmit_hash_policy compat
Add a selftest to verify that changing xmit_hash_policy to vlan+srcmac
is rejected when a native XDP program is loaded on a bond in 802.3ad
mode.  Without the fix in bond_option_xmit_hash_policy_set(), the change
succeeds silently, creating an inconsistent state that triggers a kernel
WARNING in dev_xdp_uninstall() when the bond is torn down.

The test attaches native XDP to a bond0 (802.3ad, layer2+3), then
attempts to switch xmit_hash_policy to vlan+srcmac and asserts the
operation fails.  It also verifies the change succeeds after XDP is
detached, confirming the rejection is specific to the XDP-loaded state.

Signed-off-by: Jiayuan Chen <jiayuan.chen@shopee.com>
Link: https://patch.msgid.link/20260226080306.98766-3-jiayuan.chen@linux.dev
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-03 10:47:38 +01:00
Jiayuan Chen
479d589b40 bpf/bonding: reject vlan+srcmac xmit_hash_policy change when XDP is loaded
bond_option_mode_set() already rejects mode changes that would make a
loaded XDP program incompatible via bond_xdp_check().  However,
bond_option_xmit_hash_policy_set() has no such guard.

For 802.3ad and balance-xor modes, bond_xdp_check() returns false when
xmit_hash_policy is vlan+srcmac, because the 802.1q payload is usually
absent due to hardware offload.  This means a user can:

1. Attach a native XDP program to a bond in 802.3ad/balance-xor mode
   with a compatible xmit_hash_policy (e.g. layer2+3).
2. Change xmit_hash_policy to vlan+srcmac while XDP remains loaded.

This leaves bond->xdp_prog set but bond_xdp_check() now returning false
for the same device.  When the bond is later destroyed, dev_xdp_uninstall()
calls bond_xdp_set(dev, NULL, NULL) to remove the program, which hits
the bond_xdp_check() guard and returns -EOPNOTSUPP, triggering:

WARN_ON(dev_xdp_install(dev, mode, bpf_op, NULL, 0, NULL))

Fix this by rejecting xmit_hash_policy changes to vlan+srcmac when an
XDP program is loaded on a bond in 802.3ad or balance-xor mode.

commit 39a0876d59 ("net, bonding: Disallow vlan+srcmac with XDP")
introduced bond_xdp_check() which returns false for 802.3ad/balance-xor
modes when xmit_hash_policy is vlan+srcmac.  The check was wired into
bond_xdp_set() to reject XDP attachment with an incompatible policy, but
the symmetric path -- preventing xmit_hash_policy from being changed to an
incompatible value after XDP is already loaded -- was left unguarded in
bond_option_xmit_hash_policy_set().

Note:
commit 094ee6017e ("bonding: check xdp prog when set bond mode")
later added a similar guard to bond_option_mode_set(), but
bond_option_xmit_hash_policy_set() remained unprotected.

Reported-by: syzbot+5a287bcdc08104bc3132@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/6995aff6.050a0220.2eeac1.014e.GAE@google.com/T/
Fixes: 39a0876d59 ("net, bonding: Disallow vlan+srcmac with XDP")
Signed-off-by: Jiayuan Chen <jiayuan.chen@shopee.com>
Link: https://patch.msgid.link/20260226080306.98766-2-jiayuan.chen@linux.dev
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-03-03 10:47:37 +01:00
wangdicheng
7ae0d8f1ab ALSA: hda/senary: Ensure EAPD is enabled during init
The driver sets spec->gen.own_eapd_ctl to take manual control of the
EAPD (External Amplifier). However, senary_init does not turn on the
EAPD, while senary_shutdown turns it off.

Since the generic driver skips EAPD handling when own_eapd_ctl is set,
the EAPD remains off after initialization (e.g., after resume), leaving
the codec in a non-functional state.

Explicitly call senary_auto_turn_eapd in senary_init to ensure the EAPD
is enabled and the codec is functional.

Signed-off-by: wangdicheng <wangdicheng@kylinos.cn>
Link: https://patch.msgid.link/20260303081516.583438-1-wangdich9700@163.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-03-03 09:58:45 +01:00
Matthew Wilcox
08d9a4580f tee: shm: Remove refcounting of kernel pages
Earlier TEE subsystem assumed to refcount all the memory pages to be
shared with TEE implementation to be refcounted. However, the slab
allocations within the kernel don't allow refcounting kernel pages.

It is rather better to trust the kernel clients to not free pages while
being shared with TEE implementation. Hence, remove refcounting of kernel
pages from register_shm_helper() API.

Fixes: b9c0e49abf ("mm: decline to manipulate the refcount on a slab page")
Reported-by: Marco Felsch <m.felsch@pengutronix.de>
Reported-by: Sven Püschel <s.pueschel@pengutronix.de>
Signed-off-by: Matthew Wilcox <willy@infradead.org>
Co-developed-by: Sumit Garg <sumit.garg@oss.qualcomm.com>
Signed-off-by: Sumit Garg <sumit.garg@oss.qualcomm.com>
Tested-by: Sven Püschel <s.pueschel@pengutronix.de>
Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
2026-03-03 09:03:04 +01:00
Zhao Mengmeng
75ad518259 selftests/sched_ext: Fix peek_dsq.bpf.c compile error for clang 17
When compiling sched_ext selftests using clang 17.0.6, it raised
compiler crash and build error:

Error at line 68: Unsupport signed division for DAG: 0x55b2f9a60240:
i64 = sdiv 0x55b2f9a609b0, Constant:i64<100>, peek_dsq.bpf.c:68:25 @[
peek_dsq.bpf.c:95:4 @[ peek_dsq.bpf.c:169:8 @[ peek
_dsq.bpf.c:140:6 ] ] ]Please convert to unsigned div/mod

After digging, it's not a compiler error, clang supported Signed division
only when using -mcpu=v4, while we use -mcpu=v3 currently, the better way
is to use unsigned div, see [1] for details.

[1] https://github.com/llvm/llvm-project/issues/70433

Signed-off-by: Zhao Mengmeng <zhaomengmeng@kylinos.cn>
Reviewed-by: Andrea Righi <arighi@nvidia.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-03-02 22:00:34 -10:00
Zhao Mengmeng
01a867c2e0 selftests/sched_ext: Add -fms-extensions to bpf build flags
Similar to commit 835a507535 ("selftests/bpf: Add -fms-extensions to
bpf build flags") and commit 639f58a0f4 ("bpftool: Fix build warnings
due to MS extensions")

Fix "declaration does not declare anything" warning by using
-fms-extensions and -Wno-microsoft-anon-tag flags to build bpf programs
that #include "vmlinux.h"

Signed-off-by: Zhao Mengmeng <zhaomengmeng@kylinos.cn>
Reviewed-by: Andrea Righi <arighi@nvidia.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-03-02 22:00:31 -10:00
Zhao Mengmeng
9af832c0a7 tools/sched_ext: Add -fms-extensions to bpf build flags
Similar to commit 835a507535 ("selftests/bpf: Add -fms-extensions to
bpf build flags") and commit 639f58a0f4 ("bpftool: Fix build warnings
due to MS extensions")

The kernel is now built with -fms-extensions, therefore
generated vmlinux.h contains types like:
struct aes_key {
        struct aes_enckey;
        union aes_invkey_arch inv_k;
};

struct ns_common {
	...
        union {
                struct ns_tree;
                struct callback_head ns_rcu;
        };
};

Which raise warning like below when building scx scheduler:

tools/sched_ext/build/include/vmlinux.h:50533:3: warning:
declaration does not declare anything [-Wmissing-declarations]
 50533 |                 struct ns_tree;
       |                 ^
Fix it by using -fms-extensions and -Wno-microsoft-anon-tag flags
to build bpf programs that #include "vmlinux.h"

Signed-off-by: Zhao Mengmeng <zhaomengmeng@kylinos.cn>
Reviewed-by: Andrea Righi <arighi@nvidia.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-03-02 22:00:23 -10:00
zhidao su
3f27958b72 sched_ext: Use READ_ONCE() for plain reads of scx_watchdog_timeout
scx_watchdog_timeout is written with WRITE_ONCE() in scx_enable():

    WRITE_ONCE(scx_watchdog_timeout, timeout);

However, three read-side accesses use plain reads without the matching
READ_ONCE():

    /* check_rq_for_timeouts() - L2824 */
    last_runnable + scx_watchdog_timeout

    /* scx_watchdog_workfn() - L2852 */
    scx_watchdog_timeout / 2

    /* scx_enable() - L5179 */
    scx_watchdog_timeout / 2

The KCSAN documentation requires that if one accessor uses WRITE_ONCE()
to annotate lock-free access, all other accesses must also use the
appropriate accessor. Plain reads alongside WRITE_ONCE() leave the pair
incomplete and can trigger KCSAN warnings.

Note that scx_tick() already uses the correct READ_ONCE() annotation:

    last_check + READ_ONCE(scx_watchdog_timeout)

Fix the three remaining plain reads to match, making all accesses to
scx_watchdog_timeout consistently annotated and KCSAN-clean.

Signed-off-by: zhidao su <suzhidao@xiaomi.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-03-02 22:00:02 -10:00
Frank Li
398c0c8bbc dt-bindings: auxdisplay: ht16k33: Use unevaluatedProperties to fix common property warning
Change additionalProperties to unevaluatedProperties because it refs to
/schemas/input/matrix-keymap.yaml.

Fix below CHECK_DTBS warnings:
arch/arm/boot/dts/nxp/imx/imx6dl-victgo.dtb: keypad@70 (holtek,ht16k33): 'keypad,num-columns', 'keypad,num-rows' do not match any of the regexes: '^pinctrl-[0-9]+$'
        from schema $id: http://devicetree.org/schemas/auxdisplay/holtek,ht16k33.yaml#

Fixes: f12b457c6b ("dt-bindings: auxdisplay: ht16k33: Convert to json-schema")
Acked-by: Rob Herring (Arm) <robh@kernel.org>
Signed-off-by: Frank Li <Frank.Li@nxp.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
2026-03-03 08:59:02 +01:00
Isaac J. Manjarres
a116bac871 dma-buf: Include ioctl.h in UAPI header
include/uapi/linux/dma-buf.h uses several macros from ioctl.h to define
its ioctl commands. However, it does not include ioctl.h itself. So,
if userspace source code tries to include the dma-buf.h file without
including ioctl.h, it can result in build failures.

Therefore, include ioctl.h in the dma-buf UAPI header.

Signed-off-by: Isaac J. Manjarres <isaacmanjarres@google.com>
Reviewed-by: T.J. Mercier <tjmercier@google.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Christian König <christian.koenig@amd.com>
Link: https://lore.kernel.org/r/20260303002309.1401849-1-isaacmanjarres@google.com
2026-03-03 08:55:39 +01:00
MeiChia Chiu
8fb54c7307 wifi: mac80211: fix missing ieee80211_eml_params member initialization
The missing initialization causes driver to misinterpret the EML control bitmap,
resulting in incorrect link bitmap handling.

Fixes: 0d95280a2d ("wifi: mac80211: Add eMLSR/eMLMR action frame parsing support")
Signed-off-by: MeiChia Chiu <MeiChia.Chiu@mediatek.com>
Acked-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://patch.msgid.link/20260303054725.471548-1-MeiChia.Chiu@mediatek.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2026-03-03 08:37:29 +01:00
wangdicheng
83307aebe6 ALSA: hda/senary: Use codec->core.afg for GPIO access
Replace the hardcoded GPIO node ID (0x01) with codec->core.afg.
This follows the standard HDA driver practice and makes the driver
more robust against different hardware configurations.

Signed-off-by: wangdicheng <wangdicheng@kylinos.cn>
Link: https://patch.msgid.link/20260303054242.318062-1-wangdich9700@163.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-03-03 07:59:47 +01:00
Rong Zhang
93992667d0 ALSA: doc: usb-audio: Add doc for QUIRK_FLAG_SKIP_IFACE_SETUP
QUIRK_FLAG_SKIP_IFACE_SETUP was introduced into usb-audio before without
appropriate documentation, so add it.

Fixes: 38c322068a ("ALSA: usb-audio: Add QUIRK_FLAG_SKIP_IFACE_SETUP")
Cc: stable@vger.kernel.org
Signed-off-by: Rong Zhang <i@rong.moe>
Link: https://patch.msgid.link/20260302173300.322673-1-i@rong.moe
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-03-03 07:35:24 +01:00
Eric Biggers
a300000233 fsverity: add dependency on 64K or smaller pages
Currently, all filesystems that support fsverity (ext4, f2fs, and btrfs)
cache the Merkle tree in the pagecache at a 64K aligned offset after the
end of the file data.  This offset needs to be a multiple of the page
size, which is guaranteed only when the page size is 64K or smaller.

64K was chosen to be the "largest reasonable page size".  But it isn't
the largest *possible* page size: the hexagon and powerpc ports of Linux
support 256K pages, though that configuration is rarely used.

For now, just disable support for FS_VERITY in these odd configurations
to ensure it isn't used in cases where it would have incorrect behavior.

Fixes: 671e67b47e ("fs-verity: add Kconfig and the helper functions for hashing")
Reported-by: Christoph Hellwig <hch@lst.de>
Closes: https://lore.kernel.org/r/20260119063349.GA643@lst.de
Reviewed-by: Theodore Ts'o <tytso@mit.edu>
Link: https://lore.kernel.org/r/20260221204525.30426-1-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
2026-03-02 21:05:34 -08:00
Arthur Kiyanovski
1939d9816d MAINTAINERS: ena: update AMAZON ETHERNET maintainers
Remove Shay Agroskin and Saeed Bishara.
Promote David Arinzon to maintainer.

Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
Link: https://patch.msgid.link/20260301191652.5916-1-akiyano@amazon.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-02 19:03:34 -08:00
Simon Baatz
3f10543c5b selftests/net: packetdrill: restore tcp_rcv_big_endseq.pkt
Commit 1cc93c48b5 ("selftests/net: packetdrill: remove tests for
tcp_rcv_*big") removed the test for the reverted commit 1d2fbaad7c
("tcp: stronger sk_rcvbuf checks") but also the one for commit
9ca48d616e ("tcp: do not accept packets beyond window").

Restore the test with the necessary adaptation: expect a delayed ACK
instead of an immediate one, since tcp_can_ingest() does not fail
anymore for the last data packet.

Signed-off-by: Simon Baatz <gmbnomis@gmail.com>
Link: https://patch.msgid.link/20260301-tcp_rcv_big_endseq-v1-1-86ab7415ab58@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-02 18:47:46 -08:00
Mieczyslaw Nalewaj
7cbe98f7be net: dsa: realtek: rtl8365mb: fix rtl8365mb_phy_ocp_write return value
Function rtl8365mb_phy_ocp_write() always returns 0, even when an error
occurs during register access. This patch fixes the return value to
propagate the actual error code from regmap operations.

Link: https://lore.kernel.org/netdev/a2dfde3c-d46f-434b-9d16-1e251e449068@yahoo.com/
Fixes: 2796728460 ("net: dsa: realtek: rtl8365mb: serialize indirect PHY register access")
Signed-off-by: Mieczyslaw Nalewaj <namiltd@yahoo.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Luiz Angelo Daros de Luca <luizluca@gmail.com>
Reviewed-by: Linus Walleij <linusw@kernel.org>
Link: https://patch.msgid.link/20260301-realtek_namiltd_fix1-v1-1-43a6bb707f9c@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-02 18:32:40 -08:00
Geert Uytterhoeven
f33ac74f9c crypto: Clean up help text for CRYPTO_CRC32
F2fs and RoCEv2 stopped using this CRC32 implementation in commits
3ca4bec40e ("f2fs: switch to using the crc32 library") and
ccca5e8aa1 ("RDMA/rxe: switch to using the crc32 library").

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/0f76ebf05bb1b6ca50db97988f9ac20944534b4c.1772116160.git.geert+renesas@glider.be
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
2026-03-02 15:35:26 -08:00
Geert Uytterhoeven
0ef6eb10f2 crypto: Clean up help text for CRYPTO_CRC32C
Ext4, jbd2, iSCSI, NVMeoF/TCP, and Btrfs stopped using this CRC32c
implementation in commits f2b4fa1964 ("ext4: switch to using the
crc32c library"), dd348f054b ("jbd2: switch to using the crc32c
library"), 92186c1455 ("scsi: iscsi_tcp: Switch to using the
crc32c library"), 427fff9aff ("nvme-tcp: use crc32c() and
skb_copy_and_crc32c_datagram_iter()"), and fe11ac191c ("btrfs:
switch to library APIs for checksums").

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/f567add7840bc612382237b3e76f3a8bdbd671e6.1772116160.git.geert+renesas@glider.be
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
2026-03-02 15:35:26 -08:00
Geert Uytterhoeven
a9ad29b7ad crypto: Clean up help text for CRYPTO_XXHASH
Btrfs stopped using this xxHash implementation in commit
fe11ac191c ("btrfs: switch to library APIs for checksums").

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/3b632975201074ccaa129f4901a66aff87b19742.1772116160.git.geert+renesas@glider.be
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
2026-03-02 15:35:26 -08:00
Geert Uytterhoeven
a70d9d655f crypto: Clean up help text for CRYPTO_SHA256
NFS, Ceph, SMB, and Btrfs stopped using this SHA-256 implementation in
commits c2c90a8b26 ("nfsd: use SHA-256 library API instead of
crypto_shash API"), 27c0a7b05d ("libceph: Use HMAC-SHA256 library
instead of crypto_shash"), 924067ef18 ("ksmbd: Use HMAC-SHA256
library for message signing and key generation"), and fe11ac191c
("btrfs: switch to library APIs for checksums").

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/bf8e1c229b36fc5349e29701e962d0dfd4fd21b6.1772116160.git.geert+renesas@glider.be
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
2026-03-02 15:35:26 -08:00
Geert Uytterhoeven
5d75c7bcc4 crypto: Clean up help text for CRYPTO_BLAKE2B
Btrfs stopped using this BLAKE2b implementation in commit
fe11ac191c ("btrfs: switch to library APIs for checksums").

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/98b983d2f2bddf0e5e8e1c970446c3c64527ef89.1772116160.git.geert+renesas@glider.be
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
2026-03-02 15:35:22 -08:00
Eric Biggers
20d6f07004 lib/crypto: tests: Add a .kunitconfig file
Add a .kunitconfig file to the lib/crypto/ directory so that the crypto
library tests can be run more easily using kunit.py.  Example with UML:

    tools/testing/kunit/kunit.py run --kunitconfig=lib/crypto

Example with QEMU:

    tools/testing/kunit/kunit.py run --kunitconfig=lib/crypto --arch=arm64 --make_options LLVM=1

Acked-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20260301040140.490310-1-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
2026-03-02 15:35:05 -08:00
Mark Brown
70c3054505 ASoC: fsl_easrc: Fix control writes
Merge series from Mark Brown <broonie@kernel.org>:

I noticed that neither of the put() operations for the controls defined
by the fsl_easrc driver was flagging value changes properly, fix that.
2026-03-02 23:11:46 +00:00
Vladimir Yakovlev
3b46d61890 spi: spi-dw-dma: fix print error log when wait finish transaction
If an error occurs, the device may not have a current message. In this
case, the system will crash.

In this case, it's better to use dev from the struct ctlr (struct spi_controller*).

Signed-off-by: Vladimir Yakovlev <vovchkir@gmail.com>
Link: https://patch.msgid.link/20260302222017.992228-2-vovchkir@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-02 22:33:32 +00:00
Dillon Varone
30d937f63b drm/amd/display: Fallback to boot snapshot for dispclk
[WHY & HOW]
If the dentist is unavailable, fallback to reading CLKIP via the boot
snapshot to get the current dispclk.

Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Signed-off-by: Dillon Varone <Dillon.Varone@amd.com>
Signed-off-by: Alex Hung <alex.hung@amd.com>
Cc: Mario Limonciello <mario.limonciello@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Tested-by: Dan Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 2ab77600d1e55a042c02437326d3c7563e853c6c)
Cc: stable@vger.kernel.org
2026-03-02 17:13:52 -05:00
sguttula
389c2024ca drm/amdgpu: Enable DPG support for VCN5
This will set DPG flags for enabling power gating on GFX11_5_4

Signed-off-by: sguttula <suresh.guttula@amd.com>
Reviewed-by: Pratik Vishwakarma <Pratik.Vishwakarma@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit a503c266d70d3363ba6bffb883cd6ecdb092670c)
2026-03-02 17:13:29 -05:00
Alex Hung
a4fa2355e0 drm/amd/display: Enable DEGAMMA and reject COLOR_PIPELINE+DEGAMMA_LUT
[WHAT]
Create DEGAMMA properties even if color pipeline is enabled, and enforce
the mutual exclusion in atomic check by rejecting any commit that
attempts to enable both COLOR_PIPELINE on the plane and DEGAMMA_LUT on
the CRTC simultaneously.

Fixes: 18a4127e93 ("drm/amd/display: Disable CRTC degamma when color pipeline is enabled")
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4963
Reviewed-by: Melissa Wen <mwen@igalia.com>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 196a6aa727f1f15eb54dda5e60a41543ea9397ee)
2026-03-02 17:13:13 -05:00
Alex Hung
c28b3ec3ca drm/amd/display: Use mpc.preblend flag to indicate 3D LUT
[WHAT]
New ASIC's 3D LUT is indicated by mpc.preblend.

Fixes: 0de2b1afea ("drm/amd/display: add 3D LUT colorop")
Reviewed-by: Melissa Wen <mwen@igalia.com>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 43175f6164d32cb96362d16e357689f74298145c)
2026-03-02 17:12:58 -05:00
Nathan Chancellor
fdb12c8a24 kbuild: Leave objtool binary around with 'make clean'
The difference between 'make clean' and 'make mrproper' is documented in
'make help' as:

  clean     - Remove most generated files but keep the config and
              enough build support to build external modules
  mrproper  - Remove all generated files + config + various backup files

After commit 68b4fe32d7 ("kbuild: Add objtool to top-level clean
target"), running 'make clean' then attempting to build an external
module with the resulting build directory fails with

  $ make ARCH=x86_64 O=build clean

  $ make -C build M=... MO=...
  ...
  /bin/sh: line 1: .../build/tools/objtool/objtool: No such file or directory

as 'make clean' removes the objtool binary.

Split the objtool clean target into mrproper and clean like Kbuild does
and remove all generated artifacts with 'make clean' except for the
objtool binary, which is removed with 'make mrproper'. To avoid a small
race when running the objtool clean target through both objtool_mrproper
and objtool_clean when running 'make mrproper', modify objtool's clean
up find command to avoid using find's '-delete' command by piping the
files into 'xargs rm -f' like the rest of Kbuild does.

Cc: stable@vger.kernel.org
Fixes: 68b4fe32d7 ("kbuild: Add objtool to top-level clean target")
Reported-by: Michal Suchanek <msuchanek@suse.de>
Closes: https://lore.kernel.org/20260225112633.6123-1-msuchanek@suse.de/
Reported-by: Rainer Fiebig <jrf@mailbox.org>
Closes: https://lore.kernel.org/62d12399-76e5-3d40-126a-7490b4795b17@mailbox.org/
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Nicolas Schier <nsc@kernel.org>
Tested-by: Nicolas Schier <nsc@kernel.org>
Link: https://patch.msgid.link/20260227-avoid-objtool-binary-removal-clean-v1-1-122f3e55eae9@kernel.org
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
2026-03-02 14:22:39 -07:00
Saket Dumbre
800ca7b88a ACPICA: Update the _CPC definition to match ACPI 6.6
Update the _CPC definition to also support return package sub-type of
a Package (with Integer and Buffer) as per ACPI Spec 6.6.

Link: https://github.com/acpica/acpica/commit/17a761944cc2
Signed-off-by: Saket Dumbre <saket.dumbre@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Link: https://patch.msgid.link/2829238.mvXUDI8C0e@rafael.j.wysocki
2026-03-02 22:02:35 +01:00
Sean Christopherson
f8211e95df Documentation: KVM: Formalizing taking vcpu->mutex *outside* of kvm->slots_lock
Explicitly document the ordering of vcpu->mutex being taken *outside* of
kvm->slots_lock.  While somewhat unintuitive since vCPUs conceptually have
narrower scope than VMs, the scope of the owning object (vCPU versus VM)
doesn't automatically carry over to the lock.  In this case, vcpu->mutex
has far broader scope than kvm->slots_lock.  As Paolo put it, it's a
"don't worry about multiple ioctls at the same time" mutex that's intended
to be taken at the outer edges of KVM.

More importantly, arm64 and x86 have gained flows that take kvm->slots_lock
inside of vcpu->mutex.  x86's kvm_inhibit_apic_access_page() is particularly
nasty, as slots_lock is taken quite deep within KVM_RUN, i.e. simply
swapping the ordering isn't an option.

Commit to the vcpu->mutex => kvm->slots_lock ordering, as vcpu->mutex
really is intended to be a "top-level" lock, whereas kvm->slots_lock is
"just" a helper lock.

Opportunistically document that vcpu->mutex is also taken outside of
slots_arch_lock, e.g. when allocating shadow roots on x86 (which is the
entire reason slots_arch_lock exists, as shadow roots must be allocated
while holding kvm->srcu)

  kvm_mmu_new_pgd()
  |
  -> kvm_mmu_reload()
     |
     -> kvm_mmu_load()
        |
        -> mmu_alloc_shadow_roots()
           |
           -> mmu_first_shadow_root_alloc()

but also when manipulating memslots in vCPU context, e.g. when inhibiting
the APIC-access page via the aforementioned kvm_inhibit_apic_access_page()

  kvm_inhibit_apic_access_page()
  |
  -> __x86_set_memory_region()
     |
     -> kvm_set_internal_memslot()
        |
        -> kvm_set_memory_region()
           |
           -> kvm_set_memslot()

Cc: Oliver Upton <oliver.upton@linux.dev>
Cc: Marc Zyngier <maz@kernel.org>
Link: https://patch.msgid.link/20260302170239.596810-1-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
2026-03-02 09:52:09 -08:00
Jiri Olsa
3ebc98c1ae ftrace: Add missing ftrace_lock to update_ftrace_direct_add/del
Ihor and Kumar reported splat from ftrace_get_addr_curr [1], which happened
because of the missing ftrace_lock in update_ftrace_direct_add/del functions
allowing concurrent access to ftrace internals.

The ftrace_update_ops function must be guarded by ftrace_lock, adding that.

Fixes: 05dc5e9c1f ("ftrace: Add update_ftrace_direct_add function")
Fixes: 8d2c1233f3 ("ftrace: Add update_ftrace_direct_del function")
Reported-by: Ihor Solodrai <ihor.solodrai@linux.dev>
Reported-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Closes: https://lore.kernel.org/bpf/1b58ffb2-92ae-433a-ba46-95294d6edea2@linux.dev/
Tested-by: Ihor Solodrai <ihor.solodrai@linux.dev>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Link: https://lore.kernel.org/r/20260302081622.165713-1-jolsa@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-03-02 09:51:07 -08:00
Lizhi Hou
6270ee26e1 accel/amdxdna: Fix NULL pointer dereference of mgmt_chann
mgmt_chann may be set to NULL if the firmware returns an unexpected
error in aie2_send_mgmt_msg_wait(). This can later lead to a NULL
pointer dereference in aie2_hw_stop().

Fix this by introducing a dedicated helper to destroy mgmt_chann
and by adding proper NULL checks before accessing it.

Fixes: b87f920b93 ("accel/amdxdna: Support hardware mailbox")
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Link: https://patch.msgid.link/20260226213857.3068474-1-lizhi.hou@amd.com
2026-03-02 09:43:22 -08:00
David Laight
af4e9ef3d7 uaccess: Fix scoped_user_read_access() for 'pointer to const'
If a 'const struct foo __user *ptr' is used for the address passed to
scoped_user_read_access() then you get a warning/error

  uaccess.h:691:1: error: initialization discards 'const' qualifier from pointer target type [-Werror=discarded-qualifiers]

for the

  void __user *_tmpptr = __scoped_user_access_begin(mode, uptr, size, elbl)

assignment.

Fix by using 'auto' for both _tmpptr and the redeclaration of uptr.
Replace the CLASS() with explicit __cleanup() functions on uptr.

Fixes: e497310b4f ("uaccess: Provide scoped user access regions")
Signed-off-by: David Laight <david.laight.linux@gmail.com>
Reviewed-and-tested-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2026-03-02 09:24:32 -08:00
zhidao su
494eaf4651 sched_ext: Replace naked scx_root dereferences in kobject callbacks
scx_attr_ops_show() and scx_uevent() access scx_root->ops.name directly.
This is problematic for two reasons:

1. The file-level comment explicitly identifies naked scx_root
   dereferences as a temporary measure that needs to be replaced
   with proper per-instance access.

2. scx_attr_events_show(), the neighboring sysfs show function in
   the same group, already uses the correct pattern:

       struct scx_sched *sch = container_of(kobj, struct scx_sched, kobj);

   Having inconsistent access patterns in the same sysfs/uevent
   group is error-prone.

The kobject embedded in struct scx_sched is initialized as:

    kobject_init_and_add(&sch->kobj, &scx_ktype, NULL, "root");

so container_of(kobj, struct scx_sched, kobj) correctly retrieves
the owning scx_sched instance in both callbacks.

Replace the naked scx_root dereferences with container_of()-based
access, consistent with scx_attr_events_show() and in preparation
for proper multi-instance scx_sched support.

Signed-off-by: zhidao su <suzhidao@xiaomi.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-03-02 07:23:09 -10:00
zhidao su
9adfcef334 sched_ext: Use READ_ONCE() for the read side of dsq->nr update
scx_bpf_dsq_nr_queued() reads dsq->nr via READ_ONCE() without holding
any lock, making dsq->nr a lock-free concurrently accessed variable.
However, dsq_mod_nr(), the sole writer of dsq->nr, only uses
WRITE_ONCE() on the write side without the matching READ_ONCE() on the
read side:

    WRITE_ONCE(dsq->nr, dsq->nr + delta);
                        ^^^^^^^
                        plain read -- KCSAN data race

The KCSAN documentation requires that if one accessor uses READ_ONCE()
or WRITE_ONCE() on a variable to annotate lock-free access, all other
accesses must also use the appropriate accessor. A plain read on the
right-hand side of WRITE_ONCE() leaves the pair incomplete and will
trigger KCSAN warnings.

Fix by using READ_ONCE() for the read side of the update:

    WRITE_ONCE(dsq->nr, READ_ONCE(dsq->nr) + delta);

This is consistent with scx_bpf_dsq_nr_queued() and makes the
concurrent access annotation complete and KCSAN-clean.

Signed-off-by: zhidao su <suzhidao@xiaomi.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-03-02 07:23:00 -10:00
Arnd Bergmann
b11b9b6751 kunit: reduce stack usage in kunit_run_tests()
Some of the recent changes to the kunit framework caused the stack usage
for kunit_run_tests() to grow higher than most other kernel functions,
which triggers a warning when CONFIG_FRAME_WARN is set to a relatively
low value:

lib/kunit/test.c: In function 'kunit_run_tests':
lib/kunit/test.c:801:1: error: the frame size of 1312 bytes is larger than 1280 bytes [-Werror=frame-larger-than=]

Split out the inner loop into a separate function to ensure that each
function remains under the limit, and pass the kunit_result_stats
structures by reference to avoid excessive copies.

Fixed checkpatch warnings at commit time:
Shuah Khan <skhan@linuxfoundation.org>

Cc: Carlos Llamas <cmllamas@google.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: David Gow <davidgow@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
2026-03-02 10:11:06 -07:00
Linus Torvalds
1b37ac211a Merge tag 'nfsd-7.0-1' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux
Pull nfsd fixes from Chuck Lever:

 - Restore previous nfsd thread count reporting behavior

 - Fix credential reference leaks in the NFSD netlink admin protocol

* tag 'nfsd-7.0-1' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux:
  nfsd: report the requested maximum number of threads instead of number running
  nfsd: Fix cred ref leak in nfsd_nl_listener_set_doit().
  nfsd: Fix cred ref leak in nfsd_nl_threads_set_doit().
2026-03-02 09:05:20 -08:00
Shuvam Pandey
40804c4974 kunit: tool: copy caller args in run_kernel to prevent mutation
run_kernel() appended KUnit flags directly to the caller-provided args
list. When exec_tests() calls run_kernel() repeatedly (e.g. with
--run_isolated), each call mutated the same list, causing later runs
to inherit stale filter_glob values and duplicate kunit.enable flags.

Fix this by copying args at the start of run_kernel(). Add a regression
test that calls run_kernel() twice with the same list and verifies the
original remains unchanged.

Fixes: ff9e09a376 ("kunit: tool: support running each suite/test separately")
Signed-off-by: Shuvam Pandey <shuvampandey1@gmail.com>
Reviewed-by: David Gow <david@davidgow.net>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
2026-03-02 10:02:06 -07:00
Alexandre Courbot
7dd34dfc8d rust: kunit: fix warning when !CONFIG_PRINTK
If `CONFIG_PRINTK` is not set, then the following warnings are issued
during build:

  warning: unused variable: `args`
    --> ../rust/kernel/kunit.rs:16:12
    |
  16 | pub fn err(args: fmt::Arguments<'_>) {
    |            ^^^^ help: if this is intentional, prefix it with an underscore: `_args`
    |
    = note: `#[warn(unused_variables)]` (part of `#[warn(unused)]`) on by default

  warning: unused variable: `args`
    --> ../rust/kernel/kunit.rs:32:13
    |
  32 | pub fn info(args: fmt::Arguments<'_>) {
    |             ^^^^ help: if this is intentional, prefix it with an underscore: `_args`

Fix this by adding a no-op assignment using `args` when `CONFIG_PRINTK`
is not set.

Fixes: a66d733da8 ("rust: support running Rust documentation tests as KUnit ones")
Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
Reviewed-by: Alice Ryhl <aliceryhl@google.com>
Reviewed-by: David Gow <david@davidgow.net>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
2026-03-02 10:01:15 -07:00
David Gow
03464a48cc MAINTAINERS: Update email address for David Gow
Update my email address for KUnit related things in MAINTAINERS (and add
an entry to .mailmap so nothing gets lost).

Signed-off-by: David Gow <davidgow@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
2026-03-02 09:54:04 -07:00
Thomas Hellström
b570f37a2c mm: Fix a hmm_range_fault() livelock / starvation problem
If hmm_range_fault() fails a folio_trylock() in do_swap_page,
trying to acquire the lock of a device-private folio for migration,
to ram, the function will spin until it succeeds grabbing the lock.

However, if the process holding the lock is depending on a work
item to be completed, which is scheduled on the same CPU as the
spinning hmm_range_fault(), that work item might be starved and
we end up in a livelock / starvation situation which is never
resolved.

This can happen, for example if the process holding the
device-private folio lock is stuck in
   migrate_device_unmap()->lru_add_drain_all()
sinc lru_add_drain_all() requires a short work-item
to be run on all online cpus to complete.

A prerequisite for this to happen is:
a) Both zone device and system memory folios are considered in
   migrate_device_unmap(), so that there is a reason to call
   lru_add_drain_all() for a system memory folio while a
   folio lock is held on a zone device folio.
b) The zone device folio has an initial mapcount > 1 which causes
   at least one migration PTE entry insertion to be deferred to
   try_to_migrate(), which can happen after the call to
   lru_add_drain_all().
c) No or voluntary only preemption.

This all seems pretty unlikely to happen, but indeed is hit by
the "xe_exec_system_allocator" igt test.

Resolve this by waiting for the folio to be unlocked if the
folio_trylock() fails in do_swap_page().

Rename migration_entry_wait_on_locked() to
softleaf_entry_wait_unlock() and update its documentation to
indicate the new use-case.

Future code improvements might consider moving
the lru_add_drain_all() call in migrate_device_unmap() to be
called *after* all pages have migration entries inserted.
That would eliminate also b) above.

v2:
- Instead of a cond_resched() in hmm_range_fault(),
  eliminate the problem by waiting for the folio to be unlocked
  in do_swap_page() (Alistair Popple, Andrew Morton)
v3:
- Add a stub migration_entry_wait_on_locked() for the
  !CONFIG_MIGRATION case. (Kernel Test Robot)
v4:
- Rename migrate_entry_wait_on_locked() to
  softleaf_entry_wait_on_locked() and update docs (Alistair Popple)
v5:
- Add a WARN_ON_ONCE() for the !CONFIG_MIGRATION
  version of softleaf_entry_wait_on_locked().
- Modify wording around function names in the commit message
  (Andrew Morton)

Suggested-by: Alistair Popple <apopple@nvidia.com>
Fixes: 1afaeb8293 ("mm/migrate: Trylock device page in do_swap_page")
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Jason Gunthorpe <jgg@mellanox.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: linux-mm@kvack.org
Cc: <dri-devel@lists.freedesktop.org>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: <stable@vger.kernel.org> # v6.15+
Reviewed-by: John Hubbard <jhubbard@nvidia.com> #v3
Reviewed-by: Alistair Popple <apopple@nvidia.com>
Link: https://patch.msgid.link/20260210115653.92413-1-thomas.hellstrom@linux.intel.com
(cherry picked from commit a69d1ab971a624c6f112cea61536569d579c3215)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
2026-03-02 11:51:51 -05:00
Nilay Shroff
539d1b47e9 block: break pcpu_alloc_mutex dependency on freeze_lock
While nr_hw_update allocates tagset tags it acquires ->pcpu_alloc_mutex
after ->freeze_lock is acquired or queue is frozen. This potentially
creates a circular dependency involving ->fs_reclaim if reclaim is
triggered simultaneously in a code path which first acquires ->pcpu_
alloc_mutex. As the queue is already frozen while nr_hw_queue update
allocates tagsets, the reclaim can't forward progress and thus it could
cause a potential deadlock as reported in lockdep splat[1].

Fix this by pre-allocating tagset tags before we freeze queue during
nr_hw_queue update. Later the allocated tagset tags could be safely
installed and used after queue is frozen.

Reported-by: Yi Zhang <yi.zhang@redhat.com>
Closes: https://lore.kernel.org/all/CAHj4cs8F=OV9s3La2kEQ34YndgfZP-B5PHS4Z8_b9euKG6J4mw@mail.gmail.com/ [1]
Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Tested-by: Yi Zhang <yi.zhang@redhat.com>
Reviewed-by: Yu Kuai <yukuai@fnnas.com>
[axboe: fix brace style issue]
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2026-03-02 09:23:04 -07:00
Pavel Begunkov
c36e28becd io_uring/net: reject SEND_VECTORIZED when unsupported
IORING_SEND_VECTORIZED with registered buffers is not implemented but
could be. Don't silently ignore the flag in this case but reject it with
an error. It only affects sendzc as normal sends don't support
registered buffers.

Fixes: 6f02527729 ("io_uring/net: Allow to do vectorized send")
Cc: stable@vger.kernel.org
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2026-03-02 09:17:04 -07:00
Chaitanya Kulkarni
da46b5dfef blktrace: fix __this_cpu_read/write in preemptible context
tracing_record_cmdline() internally uses __this_cpu_read() and
__this_cpu_write() on the per-CPU variable trace_cmdline_save, and
trace_save_cmdline() explicitly asserts preemption is disabled via
lockdep_assert_preemption_disabled(). These operations are only safe
when preemption is off, as they were designed to be called from the
scheduler context (probe_wakeup_sched_switch() / probe_wakeup()).

__blk_add_trace() was calling tracing_record_cmdline(current) early in
the blk_tracer path, before ring buffer reservation, from process
context where preemption is fully enabled. This triggers the following
using blktests/blktrace/002:

blktrace/002 (blktrace ftrace corruption with sysfs trace)   [failed]
    runtime  0.367s  ...  0.437s
    something found in dmesg:
    [   81.211018] run blktests blktrace/002 at 2026-02-25 22:24:33
    [   81.239580] null_blk: disk nullb1 created
    [   81.357294] BUG: using __this_cpu_read() in preemptible [00000000] code: dd/2516
    [   81.362842] caller is tracing_record_cmdline+0x10/0x40
    [   81.362872] CPU: 16 UID: 0 PID: 2516 Comm: dd Tainted: G                 N  7.0.0-rc1lblk+ #84 PREEMPT(full)
    [   81.362877] Tainted: [N]=TEST
    [   81.362878] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.17.0-0-gb52ca86e094d-prebuilt.qemu.org 04/01/2014
    [   81.362881] Call Trace:
    [   81.362884]  <TASK>
    [   81.362886]  dump_stack_lvl+0x8d/0xb0
    ...
    (See '/mnt/sda/blktests/results/nodev/blktrace/002.dmesg' for the entire message)

[   81.211018] run blktests blktrace/002 at 2026-02-25 22:24:33
[   81.239580] null_blk: disk nullb1 created
[   81.357294] BUG: using __this_cpu_read() in preemptible [00000000] code: dd/2516
[   81.362842] caller is tracing_record_cmdline+0x10/0x40
[   81.362872] CPU: 16 UID: 0 PID: 2516 Comm: dd Tainted: G                 N  7.0.0-rc1lblk+ #84 PREEMPT(full)
[   81.362877] Tainted: [N]=TEST
[   81.362878] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.17.0-0-gb52ca86e094d-prebuilt.qemu.org 04/01/2014
[   81.362881] Call Trace:
[   81.362884]  <TASK>
[   81.362886]  dump_stack_lvl+0x8d/0xb0
[   81.362895]  check_preemption_disabled+0xce/0xe0
[   81.362902]  tracing_record_cmdline+0x10/0x40
[   81.362923]  __blk_add_trace+0x307/0x5d0
[   81.362934]  ? lock_acquire+0xe0/0x300
[   81.362940]  ? iov_iter_extract_pages+0x101/0xa30
[   81.362959]  blk_add_trace_bio+0x106/0x1e0
[   81.362968]  submit_bio_noacct_nocheck+0x24b/0x3a0
[   81.362979]  ? lockdep_init_map_type+0x58/0x260
[   81.362988]  submit_bio_wait+0x56/0x90
[   81.363009]  __blkdev_direct_IO_simple+0x16c/0x250
[   81.363026]  ? __pfx_submit_bio_wait_endio+0x10/0x10
[   81.363038]  ? rcu_read_lock_any_held+0x73/0xa0
[   81.363051]  blkdev_read_iter+0xc1/0x140
[   81.363059]  vfs_read+0x20b/0x330
[   81.363083]  ksys_read+0x67/0xe0
[   81.363090]  do_syscall_64+0xbf/0xf00
[   81.363102]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
[   81.363106] RIP: 0033:0x7f281906029d
[   81.363111] Code: 31 c0 e9 c6 fe ff ff 50 48 8d 3d 66 63 0a 00 e8 59 ff 01 00 66 0f 1f 84 00 00 00 00 00 80 3d 41 33 0e 00 00 74 17 31 c0 0f 05 <48> 3d 00 f0 ff ff 77 5b c3 66 2e 0f 1f 84 00 00 00 00 00 48 83 ec
[   81.363113] RSP: 002b:00007ffca127dd48 EFLAGS: 00000246 ORIG_RAX: 0000000000000000
[   81.363120] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f281906029d
[   81.363122] RDX: 0000000000001000 RSI: 0000559f8bfae000 RDI: 0000000000000000
[   81.363123] RBP: 0000000000001000 R08: 0000002863a10a81 R09: 00007f281915f000
[   81.363124] R10: 00007f2818f77b60 R11: 0000000000000246 R12: 0000559f8bfae000
[   81.363126] R13: 0000000000000000 R14: 0000000000000000 R15: 000000000000000a
[   81.363142]  </TASK>

The same BUG fires from blk_add_trace_plug(), blk_add_trace_unplug(),
and blk_add_trace_rq() paths as well.

The purpose of tracing_record_cmdline() is to cache the task->comm for
a given PID so that the trace can later resolve it. It is only
meaningful when a trace event is actually being recorded. Ring buffer
reservation via ring_buffer_lock_reserve() disables preemption, and
preemption remains disabled until the event is committed :-

__blk_add_trace()
       	__trace_buffer_lock_reserve()
       		__trace_buffer_lock_reserve()
       			ring_buffer_lock_reserve()
       				preempt_disable_notrace();  <---

With this fix blktests for blktrace pass:

  blktests (master) # ./check blktrace
  blktrace/001 (blktrace zone management command tracing)      [passed]
      runtime  3.650s  ...  3.647s
  blktrace/002 (blktrace ftrace corruption with sysfs trace)   [passed]
      runtime  0.411s  ...  0.384s

Fixes: 7ffbd48d5c ("tracing: Cache comms only after an event occurred")
Reported-by: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Suggested-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2026-03-02 09:14:58 -07:00
Tomasz Lis
99f9b5343c drm/xe/queue: Call fini on exec queue creation fail
Every call to queue init should have a corresponding fini call.
Skipping this would mean skipping removal of the queue from GuC list
(which is part of guc_id allocation). A damaged queue stored in
exec_queue_lookup list would lead to invalid memory reference,
sooner or later.

Call fini to free guc_id. This must be done before any internal
LRCs are freed.

Since the finalization with this extra call became very similar to
__xe_exec_queue_fini(), reuse that. To make this reuse possible,
alter xe_lrc_put() so it can survive NULL parameters, like other
similar functions.

v2: Reuse _xe_exec_queue_fini(). Make xe_lrc_put() aware of NULLs.

Fixes: 3c1fa4aa60 ("drm/xe: Move queue init before LRC creation")
Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com> (v1)
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Link: https://patch.msgid.link/20260226212701.2937065-2-tomasz.lis@intel.com
(cherry picked from commit 393e5fea6f7d7054abc2c3d97a4cfe8306cd6079)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
2026-03-02 11:12:40 -05:00
Shuicheng Lin
e377182f02 drm/xe/configfs: Free ctx_restore_mid_bb in release
ctx_restore_mid_bb memory is allocated in wa_bb_store(), but
xe_config_device_release() only frees ctx_restore_post_bb.

Free ctx_restore_mid_bb[0].cs as well to avoid leaking the allocation
when the configfs device is removed.

Fixes: b30d5de3d4 ("drm/xe/configfs: Add mid context restore bb")
Signed-off-by: Shuicheng Lin <shuicheng.lin@intel.com>
Reviewed-by: Nitin Gote <nitin.r.gote@intel.com>
Link: https://patch.msgid.link/20260225013448.3547687-2-shuicheng.lin@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
(cherry picked from commit a235e7d0098337c3f2d1e8f3610c719a589e115f)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
2026-03-02 11:12:34 -05:00
Matthew Brost
cdc8a1e11f drm/xe: Do not preempt fence signaling CS instructions
If a batch buffer is complete, it makes little sense to preempt the
fence signaling instructions in the ring, as the largest portion of the
work (the batch buffer) is already done and fence signaling consists of
only a few instructions. If these instructions are preempted, the GuC
would need to perform a context switch just to signal the fence, which
is costly and delays fence signaling. Avoid this scenario by disabling
preemption immediately after the BB start instruction and re-enabling it
after executing the fence signaling instructions.

Fixes: dd08ebf6c3 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Carlos Santa <carlos.santa@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Link: https://patch.msgid.link/20260115004546.58060-1-matthew.brost@intel.com
(cherry picked from commit 2bcbf2dcde0c839a73af664a3c77d4e77d58a3eb)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
2026-03-02 11:12:28 -05:00
Julian Orth
2e3649e237 drm/syncobj: Fix handle <-> fd ioctls with dirty stack
Consider the following application:

    #include <fcntl.h>
    #include <string.h>
    #include <drm/drm.h>
    #include <sys/ioctl.h>

    int main(void) {
        int fd = open("/dev/dri/renderD128", O_RDWR);
        struct drm_syncobj_create arg1;
        ioctl(fd, DRM_IOCTL_SYNCOBJ_CREATE, &arg1);
        struct drm_syncobj_handle arg2;
        memset(&arg2, 1, sizeof(arg2)); // simulate dirty stack
        arg2.handle = arg1.handle;
        arg2.flags = 0;
        arg2.fd = 0;
        arg2.pad = 0;
        // arg2.point = 0; // userspace is required to set point to 0
        ioctl(fd, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &arg2);
    }

The last ioctl returns EINVAL because args->point is not 0. However,
userspace developed against older kernel versions is not aware of the
new point field and might therefore not initialize it.

The correct check would be

    if (args->flags & DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_TIMELINE)
        return -EINVAL;

However, there might already be userspace that relies on this not
returning an error as long as point == 0. Therefore use the more lenient
check.

Fixes: c2d3a73006 ("drm/syncobj: Extend EXPORT_SYNC_FILE for timeline syncobjs")
Signed-off-by: Julian Orth <ju.orth@gmail.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Christian König <christian.koenig@amd.com>
Link: https://lore.kernel.org/r/20260301-point-v1-1-21fc5fd98614@gmail.com
2026-03-02 16:52:02 +01:00
Michael Walle
e176ad7b57 dt-bindings: hwmon: sl28cpld: Drop sa67mcu compatible
I was just informed that this product is discontinued (without being
ever released to the market). Pull the plug and let's not waste any more
maintainers time and revert commit 0f6eae86e6 ("dt-bindings: hwmon:
sl28cpld: add sa67mcu compatible").

Acked-by: Conor Dooley <conor.dooley@microchip.com>
Signed-off-by: Michael Walle <mwalle@kernel.org>
Link: https://lore.kernel.org/r/20260302122540.1377444-8-mwalle@kernel.org
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
2026-03-02 07:04:32 -08:00
Franz Schnyder
2d85ecd6fb regulator: pf9453: Respect IRQ trigger settings from firmware
The datasheet specifies, that the IRQ_B pin is pulled low when any
unmasked interrupt bit status is changed, and it is released high once
the application processor reads the INT1 register. As it specifies a
level-low behavior, it should not force a falling-edge interrupt.

Remove the IRQF_TRIGGER_FALLING to not force the falling-edge interrupt
and instead rely on the flag from the device tree.

Fixes: 0959b67063 ("regulator: pf9453: add PMIC PF9453 support")
Cc: stable@vger.kernel.org
Signed-off-by: Franz Schnyder <franz.schnyder@toradex.com>
Link: https://patch.msgid.link/20260218102518.238943-2-fra.schnyder@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-02 14:46:28 +00:00
Sheetal
a8df7892a9 ASoC: dt-bindings: tegra: Add compatible for Tegra238 sound card
Tegra238 requires different PLLA and PLLA_OUT0 clock rates compared to
other Tegra platforms. Add Tegra238 compatible string to the APE
tegra-audio-graph-card bindings.

Signed-off-by: Sheetal <sheetal@nvidia.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
Link: https://patch.msgid.link/20260302085323.3139571-2-sheetal@nvidia.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-02 14:33:45 +00:00
Sebastian Krzyszkowiak
d973b1039c wifi: rsi: Don't default to -EOPNOTSUPP in rsi_mac80211_config
This triggers a WARN_ON in ieee80211_hw_conf_init and isn't the expected
behavior from the driver - other drivers default to 0 too.

Fixes: 0a44dfc070 ("wifi: mac80211: simplify non-chanctx drivers")
Signed-off-by: Sebastian Krzyszkowiak <sebastian.krzyszkowiak@puri.sm>
Link: https://patch.msgid.link/20260221-rsi-config-ret-v1-1-9a8f805e2f31@puri.sm
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2026-03-02 12:06:03 +01:00
Chris Brandt
fb797a7010 drm: renesas: rz-du: mipi_dsi: Set DSI divider
Before the MIPI DSI clock source can be configured, the target divide
ratio needs to be set.

Signed-off-by: Chris Brandt <chris.brandt@renesas.com>
Reviewed-by: Biju Das <biju.das.jz@bp.renesas.com>
Tested-by: Biju Das <biju.das.jz@bp.renesas.com>
Fixes: 5a4326f2e3 ("clk: renesas: rzg2l: Remove DSI clock rate restrictions")
Link: https://patch.msgid.link/20260227015216.2721504-1-chris.brandt@renesas.com
Signed-off-by: Biju Das <biju.das.jz@bp.renesas.com>
2026-03-02 10:28:38 +00:00
Frank Li
7e1e6d6845 dt-bindings: net: can: nxp,sja1000: add reference to mc-peripheral-props.yaml
Add a reference to mc-peripheral-props.yaml to allow vendor-specific
properties for memory access timings.

Fix below CHECK_DTBS warings:
arch/arm/boot/dts/nxp/imx/imx27-phytec-phycore-rdk.dtb: can@4,0 (nxp,sja1000): Unevaluated properties are not allowed ('fsl,weim-cs-timing' was unexpected)
        from schema $id: http://devicetree.org/schemas/net/can/nxp,sja1000.yaml

Signed-off-by: Frank Li <Frank.Li@nxp.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
Link: https://patch.msgid.link/20260212163000.1195586-1-Frank.Li@nxp.com
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
2026-03-02 11:23:40 +01:00
Marc Kleine-Budde
2df6162785 can: gs_usb: gs_can_open(): always configure bitrates before starting device
So far the driver populated the struct can_priv::do_set_bittiming() and
struct can_priv::fd::do_set_data_bittiming() callbacks.

Before bringing up the interface, user space has to configure the bitrates.
With these callbacks the configuration is directly forwarded into the CAN
hardware. Then the interface can be brought up.

An ifdown-ifup cycle (without changing the bit rates) doesn't re-configure
the bitrates in the CAN hardware. This leads to a problem with the
CANable-2.5 [1] firmware, which resets the configured bit rates during
ifdown.

To fix the problem remove both bit timing callbacks and always configure
the bitrates in the struct net_device_ops::ndo_open() callback.

[1] https://github.com/Elmue/CANable-2.5-firmware-Slcan-and-Candlelight

Cc: stable@vger.kernel.org
Fixes: d08e973a77 ("can: gs_usb: Added support for the GS_USB CAN devices")
Link: https://patch.msgid.link/20260219-gs_usb-always-configure-bitrates-v2-1-671f8ba5b0a5@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
2026-03-02 11:09:36 +01:00
Kim Phillips
9073428bb2 x86/sev: Allow IBPB-on-Entry feature for SNP guests
The SEV-SNP IBPB-on-Entry feature does not require a guest-side
implementation. It was added in Zen5 h/w, after the first SNP Zen
implementation, and thus was not accounted for when the initial set of SNP
features were added to the kernel.

In its abundant precaution, commit

  8c29f01654 ("x86/sev: Add SEV-SNP guest feature negotiation support")

included SEV_STATUS' IBPB-on-Entry bit as a reserved bit, thereby masking
guests from using the feature.

Allow guests to make use of IBPB-on-Entry when supported by the hypervisor, as
the bit is now architecturally defined and safe to expose.

Fixes: 8c29f01654 ("x86/sev: Add SEV-SNP guest feature negotiation support")
Signed-off-by: Kim Phillips <kim.phillips@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Nikunj A Dadhania <nikunj@amd.com>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Cc: stable@kernel.org
Link: https://patch.msgid.link/20260203222405.4065706-2-kim.phillips@amd.com
2026-03-02 11:08:59 +01:00
Tom Lendacky
4ca191cec1 x86/boot/sev: Move SEV decompressor variables into the .data section
As part of the work to remove the dependency on calling into the decompressor
code (startup_64()) for a UEFI boot, a call to rmpadjust() was removed from
sev_enable() in favor of checking the value of the snp_vmpl variable.

When booting through a non-UEFI path and calling startup_64(), the call to
sev_enable() is performed before the BSS section is zeroed. With the removal
of the rmpadjust() call and the corresponding check of the return code, the
snp_vmpl variable is checked.

Since the kernel is running at VMPL0, the snp_vmpl variable will not have been
set and should be the default value of 0.  However, since the call occurs
before the BSS is zeroed, the snp_vmpl variable may not actually be zero,
which will cause the guest boot to fail.

Since the decompressor relocates itself, the BSS would need to be cleared both
before and after the relocation, but this would, in effect, cause all of the
changes to BSS variables before relocation to be lost after relocation.

Instead, move the snp_vmpl variable into the .data section so that it is
initialized and the value made safe during relocation. As a pre-caution
against future changes, move other SEV-related decompressor variables into the
.data section, too.

Fixes: 68a501d7fd ("x86/boot: Drop redundant RMPADJUST in SEV SVSM presence check")
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Reviewed-by: Changyuan Lyu <changyuanl@google.com>
Tested-by: Kevin Hui <kevinhui@meta.com>
Tested-by: Changyuan Lyu <changyuanl@google.com>
Cc: stable@vger.kernel.org
Link: https://patch.msgid.link/5648b7de5b0a5d0dfef3785f9582b718678c6448.1770217260.git.thomas.lendacky@amd.com
2026-03-02 11:08:33 +01:00
Marc Kleine-Budde
6dfd65a69e Merge patch series "can: usb: f81604: handle short interrupt urb messages properly"
In this series Greg Kroah-Hartman takes the recent fixes on the gs_usb
driver and applies similar fixes to the f81604 driver.

Link: https://patch.msgid.link/2026022331-opal-evaluator-a928@gregkh
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
2026-03-02 11:04:38 +01:00
Greg Kroah-Hartman
952caa5da1 can: usb: f81604: correctly anchor the urb in the read bulk callback
When submitting an urb, that is using the anchor pattern, it needs to be
anchored before submitting it otherwise it could be leaked if
usb_kill_anchored_urbs() is called.  This logic is correctly done
elsewhere in the driver, except in the read bulk callback so do that
here also.

Cc: Ji-Ze Hong (Peter Hong) <peter_hong@fintek.com.tw>
Cc: Marc Kleine-Budde <mkl@pengutronix.de>
Cc: Vincent Mailhol <mailhol@kernel.org>
Cc: stable@kernel.org
Assisted-by: gkh_clanker_2000
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://patch.msgid.link/2026022334-starlight-scaling-2cea@gregkh
Fixes: 88da174369 ("can: usb: f81604: add Fintek F81604 support")
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
2026-03-02 11:04:15 +01:00
Greg Kroah-Hartman
51f9478072 can: usb: f81604: handle bulk write errors properly
If a write urb fails then more needs to be done other than just logging
the message, otherwise the transmission could be stalled.  Properly
increment the error counters and wake up the queues so that data will
continue to flow.

Cc: Ji-Ze Hong (Peter Hong) <peter_hong@fintek.com.tw>
Cc: Marc Kleine-Budde <mkl@pengutronix.de>
Cc: Vincent Mailhol <mailhol@kernel.org>
Cc: stable@kernel.org
Assisted-by: gkh_clanker_2000
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://patch.msgid.link/2026022334-slackness-dynamic-9195@gregkh
Fixes: 88da174369 ("can: usb: f81604: add Fintek F81604 support")
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
2026-03-02 11:04:15 +01:00
Greg Kroah-Hartman
7299b1b39a can: usb: f81604: handle short interrupt urb messages properly
If an interrupt urb is received that is not the correct length, properly
detect it and don't attempt to treat the data as valid.

Cc: Ji-Ze Hong (Peter Hong) <peter_hong@fintek.com.tw>
Cc: Marc Kleine-Budde <mkl@pengutronix.de>
Cc: Vincent Mailhol <mailhol@kernel.org>
Cc: stable@kernel.org
Assisted-by: gkh_clanker_2000
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://patch.msgid.link/2026022331-opal-evaluator-a928@gregkh
Fixes: 88da174369 ("can: usb: f81604: add Fintek F81604 support")
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
2026-03-02 11:04:14 +01:00
Greg Kroah-Hartman
5eaad4f768 can: usb: etas_es58x: correctly anchor the urb in the read bulk callback
When submitting an urb, that is using the anchor pattern, it needs to be
anchored before submitting it otherwise it could be leaked if
usb_kill_anchored_urbs() is called.  This logic is correctly done
elsewhere in the driver, except in the read bulk callback so do that
here also.

Cc: Vincent Mailhol <mailhol@kernel.org>
Cc: Marc Kleine-Budde <mkl@pengutronix.de>
Cc: stable@kernel.org
Assisted-by: gkh_clanker_2000
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Vincent Mailhol <mailhol@kernel.org>
Tested-by: Vincent Mailhol <mailhol@kernel.org>
Link: https://patch.msgid.link/2026022320-poser-stiffly-9d84@gregkh
Fixes: 8537257874 ("can: etas_es58x: add core support for ETAS ES58X CAN USB interfaces")
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
2026-03-02 11:03:42 +01:00
Greg Kroah-Hartman
1e446fd058 can: ucan: Fix infinite loop from zero-length messages
If a broken ucan device gets a message with the message length field set
to 0, then the driver will loop for forever in
ucan_read_bulk_callback(), hanging the system.  If the length is 0, just
skip the message and go on to the next one.

This has been fixed in the kvaser_usb driver in the past in commit
0c73772cd2 ("can: kvaser_usb: leaf: Fix potential infinite loop in
command parsers"), so there must be some broken devices out there like
this somewhere.

Cc: Marc Kleine-Budde <mkl@pengutronix.de>
Cc: Vincent Mailhol <mailhol@kernel.org>
Cc: stable@kernel.org
Assisted-by: gkh_clanker_2000
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://patch.msgid.link/2026022319-huff-absurd-6a18@gregkh
Fixes: 9f2d3eae88 ("can: ucan: add driver for Theobroma Systems UCAN devices")
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
2026-03-02 11:03:42 +01:00
Greg Kroah-Hartman
38a01c9700 can: ems_usb: ems_usb_read_bulk_callback(): check the proper length of a message
When looking at the data in a USB urb, the actual_length is the size of
the buffer passed to the driver, not the transfer_buffer_length which is
set by the driver as the max size of the buffer.

When parsing the messages in ems_usb_read_bulk_callback() properly check
the size both at the beginning of parsing the message to make sure it is
big enough for the expected structure, and at the end of the message to
make sure we don't overflow past the end of the buffer for the next
message.

Cc: Vincent Mailhol <mailhol@kernel.org>
Cc: Marc Kleine-Budde <mkl@pengutronix.de>
Cc: stable@kernel.org
Assisted-by: gkh_clanker_2000
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://patch.msgid.link/2026022316-answering-strainer-a5db@gregkh
Fixes: 702171adee ("ems_usb: Added support for EMS CPC-USB/ARM7 CAN/USB interface")
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
2026-03-02 11:03:42 +01:00
Ziyi Guo
968b098220 can: esd_usb: add endpoint type validation
esd_usb_probe() constructs bulk pipes for two endpoints without
verifying their transfer types:

  - usb_rcvbulkpipe(dev->udev, 1) for RX (version reply, async RX data)
  - usb_sndbulkpipe(dev->udev, 2) for TX (version query, CAN frames)

A malformed USB device can present these endpoints with transfer types
that differ from what the driver assumes, triggering the WARNING in
usb_submit_urb().

Use usb_find_common_endpoints() to discover and validate the first
bulk IN and bulk OUT endpoints at probe time, before any allocation.
Found pipes are saved to struct esd_usb and code uses them directly
instead of making pipes in place.

Similar to
- commit 136bed0bfd ("can: mcba_usb: properly check endpoint type")
  which established the usb_find_common_endpoints() + stored pipes
  pattern for CAN USB drivers.

Fixes: 96d8e90382 ("can: Add driver for esd CAN-USB/2 device")
Suggested-by: Vincent Mailhol <mailhol@kernel.org>
Signed-off-by: Ziyi Guo <n7l8m4@u.northwestern.edu>
Reviewed-by: Vincent Mailhol <mailhol@kernel.org>
Link: https://patch.msgid.link/20260213203927.599163-1-n7l8m4@u.northwestern.edu
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
2026-03-02 11:03:32 +01:00
Sheetal
5f4338e563 ALSA: hda/hdmi: Add Tegra238 HDA codec device ID
Add Tegra238 HDA codec device in hda_device_id list.

Signed-off-by: Sheetal <sheetal@nvidia.com>
Link: https://patch.msgid.link/20260302084217.3135982-1-sheetal@nvidia.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-03-02 10:59:47 +01:00
Alban Bedel
ab3f894de2 can: mcp251x: fix deadlock in error path of mcp251x_open
The mcp251x_open() function call free_irq() in its error path with the
mpc_lock mutex held. But if an interrupt already occurred the
interrupt handler will be waiting for the mpc_lock and free_irq() will
deadlock waiting for the handler to finish.

This issue is similar to the one fixed in commit 7dd9c26bd6 ("can:
mcp251x: fix deadlock if an interrupt occurs during mcp251x_open") but
for the error path.

To solve this issue move the call to free_irq() after the lock is
released. Setting `priv->force_quit = 1` beforehand ensure that the IRQ
handler will exit right away once it acquired the lock.

Signed-off-by: Alban Bedel <alban.bedel@lht.dlh.de>
Link: https://patch.msgid.link/20260209144706.2261954-1-alban.bedel@lht.dlh.de
Fixes: bf66f3736a ("can: mcp251x: Move to threaded interrupts instead of workqueues.")
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
2026-03-02 10:24:41 +01:00
Oliver Hartkopp
c77bfbdd6a can: dummy_can: dummy_can_init(): fix packet statistics
The former implementation was only counting the tx_packets value but not
the tx_bytes as the skb was dropped on driver layer.

Enable CAN echo support (IFF_ECHO) in dummy_can_init(), which activates the
code for setting and retrieving the echo SKB and counts the tx_bytes
correctly.

Fixes: 816cf430e8 ("can: add dummy_can driver")
Cc: Vincent Mailhol <mailhol@kernel.org>
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Reviewed-by: Vincent Mailhol <mailhol@kernel.org>
Link: https://patch.msgid.link/20260126104540.21024-1-socketcan@hartkopp.net
[mkl: make commit message imperative]
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
2026-03-02 10:24:41 +01:00
Oliver Hartkopp
c35636e91e can: bcm: fix locking for bcm_op runtime updates
Commit c2aba69d0c ("can: bcm: add locking for bcm_op runtime updates")
added a locking for some variables that can be modified at runtime when
updating the sending bcm_op with a new TX_SETUP command in bcm_tx_setup().

Usually the RX_SETUP only handles and filters incoming traffic with one
exception: When the RX_RTR_FRAME flag is set a predefined CAN frame is
sent when a specific RTR frame is received. Therefore the rx bcm_op uses
bcm_can_tx() which uses the bcm_tx_lock that was only initialized in
bcm_tx_setup(). Add the missing spin_lock_init() when allocating the
bcm_op in bcm_rx_setup() to handle the RTR case properly.

Fixes: c2aba69d0c ("can: bcm: add locking for bcm_op runtime updates")
Reported-by: syzbot+5b11eccc403dd1cea9f8@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/linux-can/699466e4.a70a0220.2c38d7.00ff.GAE@google.com/
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Link: https://patch.msgid.link/20260218-bcm_spin_lock_init-v1-1-592634c8a5b5@hartkopp.net
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
2026-03-02 10:24:40 +01:00
Vlastimil Babka
48647d3f9a slab: distinguish lock and trylock for sheaf_flush_main()
sheaf_flush_main() can be called from __pcs_replace_full_main() where
it's fine if the trylock fails, and pcs_flush_all() where it's not
expected to and for some flush callers (when destroying the cache or
memory hotremove) it would be actually a problem if it failed and left
the main sheaf not flushed. The flush callers can however safely use
local_lock() instead of trylock.

The trylock failure should not happen in practice on !PREEMPT_RT, but
can happen on PREEMPT_RT. The impact is limited in practice because when
a trylock fails in the kmem_cache_destroy() path, it means someone is
using the cache while destroying it, which is a bug on its own. The memory
hotremove path is unlikely to be employed in a production RT config, but
it's possible.

To fix this, split the function into sheaf_flush_main() (using
local_lock()) and sheaf_try_flush_main() (using local_trylock()) where
both call __sheaf_flush_main_batch() to flush a single batch of objects.
This will also allow lockdep to verify our context assumptions.

The problem was raised in an off-list question by Marcelo.

Fixes: 2d517aa09b ("slab: add opt-in caching layer of percpu sheaves")
Cc: stable@vger.kernel.org
Reported-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
Reviewed-by: Hao Li <hao.li@linux.dev>
Link: https://patch.msgid.link/20260211-b4-sheaf-flush-v1-1-4e7f492f0055@suse.cz
Signed-off-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
2026-03-02 10:04:22 +01:00
Roshan Kumar
0d10393d5e xfrm: iptfs: validate inner IPv4 header length in IPTFS payload
Add validation of the inner IPv4 packet tot_len and ihl fields parsed
from decrypted IPTFS payloads in __input_process_payload(). A crafted
ESP packet containing an inner IPv4 header with tot_len=0 causes an
infinite loop: iplen=0 leads to capturelen=min(0, remaining)=0, so the
data offset never advances and the while(data < tail) loop never
terminates, spinning forever in softirq context.

Reject inner IPv4 packets where tot_len < ihl*4 or ihl*4 < sizeof(struct
iphdr), which catches both the tot_len=0 case and malformed ihl values.
The normal IP stack performs this validation in ip_rcv_core(), but IPTFS
extracts and processes inner packets before they reach that layer.

Reported-by: Roshan Kumar <roshaen09@gmail.com>
Fixes: 6c82d24336 ("xfrm: iptfs: add basic receive packet (tunnel egress) handling")
Cc: stable@vger.kernel.org
Signed-off-by: Roshan Kumar <roshaen09@gmail.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2026-03-02 08:53:00 +01:00
ZhangGuoDong
6f0402539b smb: update some doc references
To make it easier to locate the documentation during development.

Signed-off-by: ZhangGuoDong <zhangguodong@kylinos.cn>
Reviewed-by: ChenXiaoSong <chenxiaosong@kylinos.cn>
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-03-01 17:59:52 -06:00
ChenXiaoSong
7d0bf050a5 smb/client: make SMB2 maperror KUnit tests a separate module
Build the SMB2 maperror KUnit tests as `smb2maperror_test.ko`.

Link: https://lore.kernel.org/linux-cifs/20260210081040.4156383-1-geert@linux-m68k.org/
Suggested-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Paulo Alcantara (Red Hat) <pc@manguebit.org>
Signed-off-by: ChenXiaoSong <chenxiaosong@kylinos.cn>
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-03-01 17:59:52 -06:00
Felix Gu
23942b71f0 regulator: mt6363: Fix incorrect and redundant IRQ disposal in probe
In mt6363_regulator_probe(), devm_add_action_or_reset() is used to
automatically dispose of the IRQ mapping if the probe fails or the
device is removed.

The manual call to irq_dispose_mapping() in the error path was redundant
as the reset action already triggers mt6363_irq_remove(). Furthermore,
the manual call incorrectly passed the hardware IRQ number (info->hwirq)
instead of the virtual IRQ mapping (info->virq).

Remove the redundant and incorrect manual disposal.

Fixes: 3c36965df8 ("regulator: Add support for MediaTek MT6363 SPMI PMIC Regulators")
Signed-off-by: Felix Gu <ustc.gu@gmail.com>
Link: https://patch.msgid.link/20260223-mt6363-v1-1-c99a2e8ac621@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-01 23:48:17 +00:00
Richard Fitzgerald
ca5056f5a7 ASoC: cs35l56: Suppress pointless warning about number of GPIO pulls
In cs35l56_process_xu_onchip_speaker_id() the warning that the number
of pulls != number of GPIOs should only be printed if pulls are defined.

Pull settings are optional because there would normally be an external
resistor providing the pull. The warning would still be true if pulls
are not defined, but in that case is just log noise.

While we're changing that block of code, also fix the indenting of the
arguments to the dev_warn().

Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Link: https://patch.msgid.link/20260226113511.1768838-1-rf@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-01 23:48:11 +00:00
Simon Trimmer
fd13fc700e ASoC: amd: acp: Add ACP6.3 match entries for Cirrus Logic parts
This adds some match entries for a few system configurations:

cs42l43 link 0 UID 0
cs35l56 link 1 UID 0
cs35l56 link 1 UID 1
cs35l56 link 1 UID 2
cs35l56 link 1 UID 3

cs42l45 link 1 UID 0
cs35l63 link 0 UID 0
cs35l63 link 0 UID 2
cs35l63 link 0 UID 4
cs35l63 link 0 UID 6

cs42l45 link 0 UID 0
cs35l63 link 1 UID 0
cs35l63 link 1 UID 1

cs42l45 link 0 UID 0
cs35l63 link 1 UID 1
cs35l63 link 1 UID 3

cs42l45 link 1 UID 0
cs35l63 link 0 UID 0
cs35l63 link 0 UID 1

cs42l43 link 1 UID 0
cs35l56 link 1 UID 0
cs35l56 link 1 UID 1
cs35l56 link 1 UID 2
cs35l56 link 1 UID 3

cs35l56 link 1 UID 0
cs35l56 link 1 UID 1
cs35l56 link 1 UID 2
cs35l56 link 1 UID 3

cs35l63 link 0 UID 0
cs35l63 link 0 UID 2
cs35l63 link 0 UID 4
cs35l63 link 0 UID 6

cs42l43 link 0 UID 1

cs42l43b link 0 UID 1

cs42l45 link 0 UID 0

cs42l45 link 1 UID 0

Signed-off-by: Simon Trimmer <simont@opensource.cirrus.com>
Link: https://patch.msgid.link/20260224130307.526626-1-simont@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-01 23:48:10 +00:00
Oliver Freyermuth
70eddf6a0a ASoC: Intel: sof_sdw: Add quirk for Alienware Area 51 (2025) 0CCD SKU
This adds the necessary quirk for the Alienware 18 Area 51 (2025).
Complements commit 1b03391d07 ("ASoC: Intel: sof_sdw: Add quirk
for Alienware Area 51 (2025) 0CCC SKU").

Signed-off-by: Oliver Freyermuth <o.freyermuth@googlemail.com>
Tested-by: Oliver Freyermuth <o.freyermuth@googlemail.com>
Link: https://patch.msgid.link/20260224190224.30630-1-o.freyermuth@googlemail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-01 23:48:09 +00:00
Shuming Fan
986841dcad ASoC: rt1321: fix DMIC ch2/3 mask issue
This patch fixed the DMIC ch2/3 mask missing problem.

Signed-off-by: Shuming Fan <shumingf@realtek.com>
Link: https://patch.msgid.link/20260225091210.3648905-1-shumingf@realtek.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-01 23:48:08 +00:00
Richard Fitzgerald
9351cf3fd9 ASoC: cs35l56: Only patch ASP registers if the DAI is part of a DAIlink
Move the ASP register patches to a separate struct and apply this from the
ASP DAI probe() function so that the registers are only patched if the DAI
is part of a DAI link.

Some systems use the ASP as a special-purpose interconnect and on these
systems the ASP registers are configured by a third party (the firmware,
the BIOS, or another device using the amp's secondary host control
interface).

If the machine driver does not hook up the ASP DAI then the ASP registers
must be omitted from the patch to prevent overwriting the third party
configuration.

If the machine driver includes the ASP DAI in a DAI link, this implies that
the machine driver and higher components (such as alsa-ucm) are taking
ownership of the ASP. In this case the ASP registers are patched to known
defaults and the machine driver should configure the ASP.

Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Link: https://patch.msgid.link/20260226110137.1664562-1-rf@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-01 23:48:07 +00:00
Mark Brown
31ddc62c1c ASoC: fsl_easrc: Fix event generation in fsl_easrc_iec958_set_reg()
ALSA controls should return 1 if the value in the control changed but the
control put operation fsl_easrc_set_reg() only returns 0 or a negative
error code, causing ALSA to not generate any change events. Add a suitable
check by using regmap_update_bits_check() with the underlying regmap, this
is more clearly and simply correct than trying to verify that one of the
generic ops is exactly equivalent to this one.

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://patch.msgid.link/20260205-asoc-fsl-easrc-fix-events-v1-2-39d4c766918b@kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-01 23:48:02 +00:00
Mark Brown
54a86cf48e ASoC: fsl_easrc: Fix event generation in fsl_easrc_iec958_put_bits()
ALSA controls should return 1 if the value in the control changed but the
control put operation fsl_easrc_iec958_put_bits() unconditionally returns
0, causing ALSA to not generate any change events. This is detected by
mixer-test with large numbers of messages in the form:

    No event generated for Context 3 IEC958 CS5
    Context 3 IEC958 CS5.0 orig 5224 read 5225, is_volatile 0

Add a suitable check.

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://patch.msgid.link/20260205-asoc-fsl-easrc-fix-events-v1-1-39d4c766918b@kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-03-01 23:48:01 +00:00
Linus Torvalds
11439c4635 Linux 7.0-rc2 2026-03-01 15:39:31 -08:00
Linus Torvalds
949d0a46ad Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini:
 "Arm:

   - Make sure we don't leak any S1POE state from guest to guest when
     the feature is supported on the HW, but not enabled on the host

   - Propagate the ID registers from the host into non-protected VMs
     managed by pKVM, ensuring that the guest sees the intended feature
     set

   - Drop double kern_hyp_va() from unpin_host_sve_state(), which could
     bite us if we were to change kern_hyp_va() to not being idempotent

   - Don't leak stage-2 mappings in protected mode

   - Correctly align the faulting address when dealing with single page
     stage-2 mappings for PAGE_SIZE > 4kB

   - Fix detection of virtualisation-capable GICv5 IRS, due to the
     maintainer being obviously fat fingered... [his words, not mine]

   - Remove duplication of code retrieving the ASID for the purpose of
     S1 PT handling

   - Fix slightly abusive const-ification in vgic_set_kvm_info()

  Generic:

   - Remove internal Kconfigs that are now set on all architectures

   - Remove per-architecture code to enable KVM_CAP_SYNC_MMU, all
     architectures finally enable it in Linux 7.0"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: always define KVM_CAP_SYNC_MMU
  KVM: remove CONFIG_KVM_GENERIC_MMU_NOTIFIER
  KVM: arm64: Deduplicate ASID retrieval code
  irqchip/gic-v5: Fix inversion of IRS_IDR0.virt flag
  KVM: arm64: Revert accidental drop of kvm_uninit_stage2_mmu() for non-NV VMs
  KVM: arm64: Fix protected mode handling of pages larger than 4kB
  KVM: arm64: vgic: Handle const qualifier from gic_kvm_info allocation type
  KVM: arm64: Remove redundant kern_hyp_va() in unpin_host_sve_state()
  KVM: arm64: Fix ID register initialization for non-protected pKVM guests
  KVM: arm64: Optimise away S1POE handling when not supported by host
  KVM: arm64: Hide S1POE from guests when not supported by the host
2026-03-01 15:34:47 -08:00
Linus Torvalds
e2bd1b1369 Merge tag 'core-debugobjects-2026-03-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull debugobjects fix from Thomas Gleixner:
 "A single fix for debugobjects.

  The deferred page initialization prevents debug objects from
  allocating slab pages until the initialization is complete. That
  causes depletion of the pool and disabling of debugobjects.

  The reason is that debugobjects uses __GFP_HIGH for allocations as it
  might be invoked from arbitrary contexts. When PREEMPT_COUNT is
  disabled there is no way to know whether the context is safe to set
  __GFP_KSWAPD_RECLAIM.

  This worked until v6.18. Since then allocations w/o a reclaim flag
  cause new_slab() to end up in alloc_frozen_pages_nolock_noprof(),
  which returns early when deferred page initialization has not yet
  completed.

  Work around that when PREEMPT_COUNT is enabled as the preempt counter
  allows debugobjects to add __GFP_KSWAPD_RECLAIM to the GFP flags when
  the context is preemtible. When PREEMPT_COUNT is disabled the context
  is unknown and the reclaim bit can't be set because the caller might
  hold locks which might deadlock in the allocator.

  That makes debugobjects depend on PREEMPT_COUNT ||
  !DEFERRED_STRUCT_PAGE_INIT, which limits the coverage slightly, but
  keeps it functional for most cases"

* tag 'core-debugobjects-2026-03-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  debugobject: Make it work with deferred page initialization - again
2026-03-01 13:32:32 -08:00
Linus Torvalds
5920da4455 Merge tag 'x86-urgent-2026-03-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Ingo Molnar:

 - Fix speculative safety in fred_extint()

 - Fix __WARN_printf() trap in early_fixup_exception()

 - Fix clang-build boot bug for unusual alignments, triggered by
   CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B=y

 - Replace the final few __ASSEMBLY__ stragglers that snuck in lately
   into non-UAPI x86 headers and use __ASSEMBLER__ consistently (again)

* tag 'x86-urgent-2026-03-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/headers: Replace __ASSEMBLY__ stragglers with __ASSEMBLER__
  x86/cfi: Fix CFI rewrite for odd alignments
  x86/bug: Handle __WARN_printf() trap in early_fixup_exception()
  x86/fred: Correct speculative safety in fred_extint()
2026-03-01 13:16:35 -08:00
Linus Torvalds
f6542af922 Merge tag 'timers-urgent-2026-03-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull timer fix from Ingo Molnar:
 "Improve the inlining of jiffies_to_msecs() and jiffies_to_usecs(), for
  the common HZ=100, 250 or 1000 cases. Only use a function call for odd
  HZ values like HZ=300 that generate more code.

  The function call overhead showed up in performance tests of the TCP
  code"

* tag 'timers-urgent-2026-03-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  time/jiffies: Inline jiffies_to_msecs() and jiffies_to_usecs()
2026-03-01 12:15:58 -08:00
Linus Torvalds
6170625149 Merge tag 'sched-urgent-2026-03-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler fixes from Ingo Molnar:

 - Fix zero_vruntime tracking when there's a single task running

 - Fix slice protection logic

 - Fix the ->vprot logic for reniced tasks

 - Fix lag clamping in mixed slice workloads

 - Fix objtool uaccess warning (and bug) in the
   !CONFIG_RSEQ_SLICE_EXTENSION case caused by unexpected un-inlining,
   which triggers with older compilers

 - Fix a comment in the rseq registration rseq_size bound check code

 - Fix a legacy RSEQ ABI quirk that handled 32-byte area sizes
   differently, which special size we now reached naturally and want to
   avoid. The visible ugliness of the new reserved field will be avoided
   the next time the RSEQ area is extended.

* tag 'sched-urgent-2026-03-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  rseq: slice ext: Ensure rseq feature size differs from original rseq size
  rseq: Clarify rseq registration rseq_size bound check comment
  sched/core: Fix wakeup_preempt's next_class tracking
  rseq: Mark rseq_arm_slice_extension_timer() __always_inline
  sched/fair: Fix lag clamp
  sched/eevdf: Update se->vprot in reweight_entity()
  sched/fair: Only set slice protection at pick time
  sched/fair: Fix zero_vruntime tracking
2026-03-01 11:09:24 -08:00
Linus Torvalds
cb36eabcaf Merge tag 'perf-urgent-2026-03-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf events fixes from Ingo Molnar:

 - Fix lock ordering bug found by lockdep in perf_event_wakeup()

 - Fix uncore counter enumeration on Granite Rapids and Sierra Forest

 - Fix perf_mmap() refcount bug found by Syzkaller

 - Fix __perf_event_overflow() vs perf_remove_from_context() race

* tag 'perf-urgent-2026-03-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf: Fix __perf_event_overflow() vs perf_remove_from_context() race
  perf/core: Fix refcount bug and potential UAF in perf_mmap
  perf/x86/intel/uncore: Add per-scheduler IMC CAS count events
  perf/core: Fix invalid wait context in ctx_sched_in()
2026-03-01 11:07:20 -08:00
Linus Torvalds
b410220870 Merge tag 'locking-urgent-2026-03-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull locking fix from Ingo Molnar:
 "Now that LLVM 22 has been released officially, require a release
  version to use the new CONFIG_WARN_CONTEXT_ANALYSIS feature.

  In particular this avoids the widely used Android clang 22.0.1
  pre-release build which is known to be broken for this usecase"

* tag 'locking-urgent-2026-03-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  lib/Kconfig.debug: Require a release version of LLVM 22 for context analysis
2026-03-01 11:00:43 -08:00
Linus Torvalds
afa844360b Merge tag 'irq-urgent-2026-03-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull irqchip driver fixes from Ingo Molnar:

 - Fix frozen interrupt bug in the sifive-plic driver

 - Limit per-device MSI interrupts on uncommon gic-v3-its hardware
   variants

 - Address Sparse warning by constifying a variable in the MMP driver

 - Revert broken commit and also fix an error check in the ls-extirq
   driver

* tag 'irq-urgent-2026-03-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  irqchip/ls-extirq: Fix devm_of_iomap() error check
  Revert "irqchip/ls-extirq: Use for_each_of_imap_item iterator"
  irqchip/mmp: Make icu_irq_chip variable static const
  irqchip/gic-v3-its: Limit number of per-device MSIs to the range the ITS supports
  irqchip/sifive-plic: Fix frozen interrupt due to affinity setting
2026-03-01 10:58:16 -08:00
Linus Torvalds
39c6332614 Merge tag 'scsi-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
Pull SCSI fixes from James Bottomley:
 "All changes in drivers (well technically SES is enclosure services,
  but its change is minor). The biggest is the write combining change in
  lpfc followed by the additional NULL checks in mpi3mr"

* tag 'scsi-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi:
  scsi: ufs: core: Fix shift out of bounds when MAXQ=32
  scsi: ufs: core: Move link recovery for hibern8 exit failure to wl_resume
  scsi: ufs: core: Fix possible NULL pointer dereference in ufshcd_add_command_trace()
  scsi: snic: MAINTAINERS: Update snic maintainers
  scsi: snic: Remove unused linkstatus
  scsi: pm8001: Fix use-after-free in pm8001_queue_command()
  scsi: mpi3mr: Add NULL checks when resetting request and reply queues
  scsi: ufs: core: Reset urgent_bkops_lvl to allow runtime PM power mode
  scsi: ses: Fix devices attaching to different hosts
  scsi: ufs: core: Fix RPMB region size detection for UFS 2.2
  scsi: storvsc: Fix scheduling while atomic on PREEMPT_RT
  scsi: lpfc: Properly set WC for DPP mapping
2026-03-01 09:59:29 -08:00
Linus Torvalds
eb71ab2bf7 Merge tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Pull bpf fixes from Alexei Starovoitov:

 - Fix alignment of arm64 JIT buffer to prevent atomic tearing (Fuad
   Tabba)

 - Fix invariant violation for single value tnums in the verifier
   (Harishankar Vishwanathan, Paul Chaignon)

 - Fix a bunch of issues found by ASAN in selftests/bpf (Ihor Solodrai)

 - Fix race in devmpa and cpumap on PREEMPT_RT (Jiayuan Chen)

 - Fix show_fdinfo of kprobe_multi when cookies are not present (Jiri
   Olsa)

 - Fix race in freeing special fields in BPF maps to prevent memory
   leaks (Kumar Kartikeya Dwivedi)

 - Fix OOB read in dmabuf_collector (T.J. Mercier)

* tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf: (36 commits)
  selftests/bpf: Avoid simplification of crafted bounds test
  selftests/bpf: Test refinement of single-value tnum
  bpf: Improve bounds when tnum has a single possible value
  bpf: Introduce tnum_step to step through tnum's members
  bpf: Fix race in devmap on PREEMPT_RT
  bpf: Fix race in cpumap on PREEMPT_RT
  selftests/bpf: Add tests for special fields races
  bpf: Retire rcu_trace_implies_rcu_gp() from local storage
  bpf: Delay freeing fields in local storage
  bpf: Lose const-ness of map in map_check_btf()
  bpf: Register dtor for freeing special fields
  selftests/bpf: Fix OOB read in dmabuf_collector
  selftests/bpf: Fix a memory leak in xdp_flowtable test
  bpf: Fix stack-out-of-bounds write in devmap
  bpf: Fix kprobe_multi cookies access in show_fdinfo callback
  bpf, arm64: Force 8-byte alignment for JIT buffer to prevent atomic tearing
  selftests/bpf: Don't override SIGSEGV handler with ASAN
  selftests/bpf: Check BPFTOOL env var in detect_bpftool_path()
  selftests/bpf: Fix out-of-bounds array access bugs reported by ASAN
  selftests/bpf: Fix array bounds warning in jit_disasm_helpers
  ...
2026-02-28 19:54:28 -08:00
Linus Torvalds
63a43faf6a Merge tag 'driver-core-7.0-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/driver-core/driver-core
Pull driver core fixes from Danilo Krummrich:

 - Do not register imx_clk_scu_driver in imx8qxp_clk_probe(); besides
   fixing two other issues, this avoids a deadlock in combination with
   commit dc23806a7c ("driver core: enforce device_lock for
   driver_match_device()")

 - Move secondary node lookup from device_get_next_child_node() to
   fwnode_get_next_child_node(); this avoids issues when users switch
   from the device API to the fwnode API

 - Export io_define_{read,write}!() to avoid unused import warnings when
   CONFIG_PCI=n

* tag 'driver-core-7.0-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/driver-core/driver-core:
  clk: scu/imx8qxp: do not register driver in probe()
  rust: io: macro_export io_define_read!() and io_define_write!()
  device property: Allow secondary lookup in fwnode_get_next_child_node()
2026-02-28 19:35:30 -08:00
Eric Biggers
4478e8eeb8 lib/crypto: tests: Depend on library options rather than selecting them
The convention for KUnit tests is to have the test kconfig options
visible only when the code they depend on is already enabled.  This way
only the tests that are relevant to the particular kernel build can be
enabled, either manually or via KUNIT_ALL_TESTS.

Update lib/crypto/tests/Kconfig to follow that convention, i.e. depend
on the corresponding library options rather than selecting them.  This
fixes an issue where enabling KUNIT_ALL_TESTS enabled non-test code.

This does mean that it becomes a bit more difficult to enable *all* the
crypto library tests (which is what I do as a maintainer of the code),
since doing so will now require enabling other options that select the
libraries.  Regardless, we should follow the standard KUnit convention.
I'll also add a .kunitconfig file that does enable all these options.

Note: currently most of the crypto library options are selected by
visible options in crypto/Kconfig, which can be used to enable them
without too much trouble.  If in the future we end up with more cases
like CRYPTO_LIB_CURVE25519 which is selected only by WIREGUARD (thus
making CRYPTO_LIB_CURVE25519_KUNIT_TEST effectively depend on WIREGUARD
after this commit), we could consider adding a new kconfig option that
enables all the library code specifically for testing.

Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Closes: https://lore.kernel.org/r/CAMuHMdULzMdxuTVfg8_4jdgzbzjfx-PHkcgbGSthcUx_sHRNMg@mail.gmail.com
Fixes: 4dcf6cadda ("lib/crypto: tests: Add KUnit tests for SHA-224 and SHA-256")
Fixes: 571eaeddb6 ("lib/crypto: tests: Add KUnit tests for SHA-384 and SHA-512")
Fixes: 6dd4d9f791 ("lib/crypto: tests: Add KUnit tests for Poly1305")
Fixes: 66b1306079 ("lib/crypto: tests: Add KUnit tests for SHA-1 and HMAC-SHA1")
Fixes: d6b6aac0cd ("lib/crypto: tests: Add KUnit tests for MD5 and HMAC-MD5")
Fixes: afc4e4a5f1 ("lib/crypto: tests: Migrate Curve25519 self-test to KUnit")
Fixes: 6401fd334d ("lib/crypto: tests: Add KUnit tests for BLAKE2b")
Fixes: 15c64c47e4 ("lib/crypto: tests: Add SHA3 kunit tests")
Fixes: b3aed551b3 ("lib/crypto: tests: Add KUnit tests for POLYVAL")
Fixes: ed894faccb ("lib/crypto: tests: Add KUnit tests for ML-DSA verification")
Fixes: 7246fe6cd6 ("lib/crypto: tests: Add KUnit tests for NH")
Cc: stable@vger.kernel.org
Reviewed-by: David Gow <david@davidgow.net>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20260226191749.39397-1-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
2026-02-28 19:17:30 -08:00
Prithvi Tambewagh
14d4ac19d1 scsi: target: Fix recursive locking in __configfs_open_file()
In flush_write_buffer, &p->frag_sem is acquired and then the loaded store
function is called, which, here, is target_core_item_dbroot_store().  This
function called filp_open(), following which these functions were called
(in reverse order), according to the call trace:

  down_read
  __configfs_open_file
  do_dentry_open
  vfs_open
  do_open
  path_openat
  do_filp_open
  file_open_name
  filp_open
  target_core_item_dbroot_store
  flush_write_buffer
  configfs_write_iter

target_core_item_dbroot_store() tries to validate the new file path by
trying to open the file path provided to it; however, in this case, the bug
report shows:

db_root: not a directory: /sys/kernel/config/target/dbroot

indicating that the same configfs file was tried to be opened, on which it
is currently working on. Thus, it is trying to acquire frag_sem semaphore
of the same file of which it already holds the semaphore obtained in
flush_write_buffer(), leading to acquiring the semaphore in a nested manner
and a possibility of recursive locking.

Fix this by modifying target_core_item_dbroot_store() to use kern_path()
instead of filp_open() to avoid opening the file using filesystem-specific
function __configfs_open_file(), and further modifying it to make this fix
compatible.

Reported-by: syzbot+f6e8174215573a84b797@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=f6e8174215573a84b797
Tested-by: syzbot+f6e8174215573a84b797@syzkaller.appspotmail.com
Cc: stable@vger.kernel.org
Signed-off-by: Prithvi Tambewagh <activprithvi@gmail.com>
Reviewed-by: Dmitry Bogdanov <d.bogdanov@yadro.com>
Link: https://patch.msgid.link/20260216062002.61937-1-activprithvi@gmail.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2026-02-28 20:41:52 -05:00
Florian Fuchs
80bf3b28d3 scsi: devinfo: Add BLIST_SKIP_IO_HINTS for Iomega ZIP
The Iomega ZIP 100 (Z100P2) can't process IO Advice Hints Grouping mode
page query. It immediately switches to the status phase 0xb8 after
receiving the subpage code 0x05 of MODE_SENSE_10 command, which fails
imm_out() and turns into DID_ERROR of this command, which leads to unusable
device. This was tested with an Iomega ZIP 100 (Z100P2) connected with a
StarTech PEX1P2 AX99100 PCIe parallel port card.

Prior to this fix, Test Unit Ready fails and the drive can't be used:
        IMM: returned SCSI status b8
        sd 7:0:6:0: [sdh] Test Unit Ready failed: Result: hostbyte=0x01 driverbyte=DRIVER_OK

Signed-off-by: Florian Fuchs <fuchsfl@gmail.com>
Link: https://patch.msgid.link/20260227181823.892932-1-fuchsfl@gmail.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2026-02-28 18:18:05 -05:00
Ranjan Kumar
dbd53975ed scsi: mpi3mr: Clear reset history on ready and recheck state after timeout
The driver retains reset history even after the IOC has successfully
reached the READY state. That leaves stale reset information active during
normal operation and can mislead recovery and diagnostics.  In addition, if
the IOC becomes READY just as the ready timeout loop exits, the driver
still follows the failure path and may retry or report failure incorrectly.

Clear reset history once READY is confirmed so driver state matches actual
IOC status. After the timeout loop, recheck the IOC state and treat READY
as success instead of failing.

Signed-off-by: Ranjan Kumar <ranjan.kumar@broadcom.com>
Link: https://patch.msgid.link/20260225082622.82588-1-ranjan.kumar@broadcom.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2026-02-28 17:40:25 -05:00
Junxiao Bi
1ac22c8eae scsi: core: Fix refcount leak for tagset_refcnt
This leak will cause a hang when tearing down the SCSI host. For example,
iscsid hangs with the following call trace:

[130120.652718] scsi_alloc_sdev: Allocation failure during SCSI scanning, some SCSI devices might not be configured

PID: 2528     TASK: ffff9d0408974e00  CPU: 3    COMMAND: "iscsid"
 #0 [ffffb5b9c134b9e0] __schedule at ffffffff860657d4
 #1 [ffffb5b9c134ba28] schedule at ffffffff86065c6f
 #2 [ffffb5b9c134ba40] schedule_timeout at ffffffff86069fb0
 #3 [ffffb5b9c134bab0] __wait_for_common at ffffffff8606674f
 #4 [ffffb5b9c134bb10] scsi_remove_host at ffffffff85bfe84b
 #5 [ffffb5b9c134bb30] iscsi_sw_tcp_session_destroy at ffffffffc03031c4 [iscsi_tcp]
 #6 [ffffb5b9c134bb48] iscsi_if_recv_msg at ffffffffc0292692 [scsi_transport_iscsi]
 #7 [ffffb5b9c134bb98] iscsi_if_rx at ffffffffc02929c2 [scsi_transport_iscsi]
 #8 [ffffb5b9c134bbf0] netlink_unicast at ffffffff85e551d6
 #9 [ffffb5b9c134bc38] netlink_sendmsg at ffffffff85e554ef

Fixes: 8fe4ce5836 ("scsi: core: Fix a use-after-free")
Cc: stable@vger.kernel.org
Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
Reviewed-by: Mike Christie <michael.christie@oracle.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Link: https://patch.msgid.link/20260223232728.93350-1-junxiao.bi@oracle.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2026-02-28 17:27:09 -05:00
Raju Rangoju
9439a661c2 amd-xgbe: fix MAC_TCR_SS register width for 2.5G and 10M speeds
Extend the MAC_TCR_SS (Speed Select) register field width from 2 bits
to 3 bits to properly support all speed settings.

The MAC_TCR register's SS field encoding requires 3 bits to represent
all supported speeds:
  - 0x00: 10Gbps (XGMII)
  - 0x02: 2.5Gbps (GMII) / 100Mbps
  - 0x03: 1Gbps / 10Mbps
  - 0x06: 2.5Gbps (XGMII) - P100a only

With only 2 bits, values 0x04-0x07 cannot be represented, which breaks
2.5G XGMII mode on newer platforms and causes incorrect speed select
values to be programmed.

Fixes: 07445f3c7c ("amd-xgbe: Add support for 10 Mbps speed")
Co-developed-by: Guruvendra Punugupati <Guruvendra.Punugupati@amd.com>
Signed-off-by: Guruvendra Punugupati <Guruvendra.Punugupati@amd.com>
Signed-off-by: Raju Rangoju <Raju.Rangoju@amd.com>
Link: https://patch.msgid.link/20260226170753.250312-1-Raju.Rangoju@amd.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-28 14:22:34 -08:00
MD Danish Anwar
147792c395 net: ti: icssg-prueth: Fix ping failure after offload mode setup when link speed is not 1G
When both eth interfaces with links up are added to a bridge or hsr
interface, ping fails if the link speed is not 1Gbps (e.g., 100Mbps).

The issue is seen because when switching to offload (bridge/hsr) mode,
prueth_emac_restart() restarts the firmware and clears DRAM with
memset_io(), setting all memory to 0. This includes PORT_LINK_SPEED_OFFSET
which firmware reads for link speed. The value 0 corresponds to
FW_LINK_SPEED_1G (0x00), so for 1Gbps links the default value is correct
and ping works. For 100Mbps links, the firmware needs FW_LINK_SPEED_100M
(0x01) but gets 0 instead, causing ping to fail. The function
emac_adjust_link() is called to reconfigure, but it detects no state change
(emac->link is still 1, speed/duplex match PHY) so new_state remains false
and icssg_config_set_speed() is never called to correct the firmware speed
value.

The fix resets emac->link to 0 before calling emac_adjust_link() in
prueth_emac_common_start(). This forces new_state=true, ensuring
icssg_config_set_speed() is called to write the correct speed value to
firmware memory.

Fixes: 06feac1540 ("net: ti: icssg-prueth: Fix emac link speed handling")
Signed-off-by: MD Danish Anwar <danishanwar@ti.com>
Link: https://patch.msgid.link/20260226102356.2141871-1-danishanwar@ti.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-28 13:41:35 -08:00
Kaushlendra Kumar
dff8e3c025 cpupower: fix swapped power/energy unit labels
Fix error where microWatts and microJoules units were interchanged.

Signed-off-by: Kaushlendra Kumar <kaushlendra.kumar@intel.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
2026-02-28 13:13:49 -07:00
Linus Torvalds
42eb017830 Merge tag 'v7.0rc1-smb3-client-fixes' of git://git.samba.org/sfrench/cifs-2.6
Pull smb client fixes from Steve French:

 - Two multichannel fixes

 - Locking fix for superblock flags

 - Fix to remove debug message that could log password

 - Cleanup fix for setting credentials

* tag 'v7.0rc1-smb3-client-fixes' of git://git.samba.org/sfrench/cifs-2.6:
  smb: client: Use snprintf in cifs_set_cifscreds
  smb: client: Don't log plaintext credentials in cifs_set_cifscreds
  smb: client: fix broken multichannel with krb5+signing
  smb: client: use atomic_t for mnt_cifs_flags
  smb: client: fix cifs_pick_channel when channels are equally loaded
2026-02-28 10:45:56 -08:00
Takashi Sakamoto
9197e5949a firewire: ohci: initialize page array to use alloc_pages_bulk() correctly
The call of alloc_pages_bulk() skips to fill entries of page array when
the entries already have values. While, 1394 OHCI PCI driver passes the
page array without initializing. It could cause invalid state at PFN
validation in vmap().

Fixes: f2ae92780a ("firewire: ohci: split page allocation from dma mapping")
Reported-by: John Ogness <john.ogness@linutronix.de>
Reported-and-tested-by: Harald Arnesen <linux@skogtun.org>
Reported-and-tested-by: David Gow <david@davidgow.net>
Closes: https://lore.kernel.org/lkml/87tsv1vig5.fsf@jogness.linutronix.de/
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2026-02-28 10:09:24 -08:00
Jiayuan Chen
101bacb303 atm: lec: fix null-ptr-deref in lec_arp_clear_vccs
syzkaller reported a null-ptr-deref in lec_arp_clear_vccs().
This issue can be easily reproduced using the syzkaller reproducer.

In the ATM LANE (LAN Emulation) module, the same atm_vcc can be shared by
multiple lec_arp_table entries (e.g., via entry->vcc or entry->recv_vcc).
When the underlying VCC is closed, lec_vcc_close() iterates over all
ARP entries and calls lec_arp_clear_vccs() for each matched entry.

For example, when lec_vcc_close() iterates through the hlists in
priv->lec_arp_empty_ones or other ARP tables:

1. In the first iteration, for the first matched ARP entry sharing the VCC,
lec_arp_clear_vccs() frees the associated vpriv (which is vcc->user_back)
and sets vcc->user_back to NULL.
2. In the second iteration, for the next matched ARP entry sharing the same
VCC, lec_arp_clear_vccs() is called again. It obtains a NULL vpriv from
vcc->user_back (via LEC_VCC_PRIV(vcc)) and then attempts to dereference it
via `vcc->pop = vpriv->old_pop`, leading to a null-ptr-deref crash.

Fix this by adding a null check for vpriv before dereferencing
it. If vpriv is already NULL, it means the VCC has been cleared
by a previous call, so we can safely skip the cleanup and just
clear the entry's vcc/recv_vcc pointers.

The entire cleanup block (including vcc_release_async()) is placed inside
the vpriv guard because a NULL vpriv indicates the VCC has already been
fully released by a prior iteration — repeating the teardown would
redundantly set flags and trigger callbacks on an already-closing socket.

The Fixes tag points to the initial commit because the entry->vcc path has
been vulnerable since the original code. The entry->recv_vcc path was later
added by commit 8d9f73c0ad ("atm: fix a memory leak of vcc->user_back")
with the same pattern, and both paths are fixed here.

Reported-by: syzbot+72e3ea390c305de0e259@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/68c95a83.050a0220.3c6139.0e5c.GAE@google.com/T/
Fixes: 1da177e4c3 ("Linux-2.6.12-rc2")
Suggested-by: Dan Carpenter <dan.carpenter@linaro.org>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Jiayuan Chen <jiayuan.chen@shopee.com>
Link: https://patch.msgid.link/20260225123250.189289-1-jiayuan.chen@linux.dev
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-28 09:33:26 -08:00
Linus Torvalds
2f9339c052 Merge tag 'spi-fix-v7.0-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi
Pull spi fixes from Mark Brown:
 "One fix for the stm32 driver which got broken for DMA chaining cases,
  plus a removal of some straggling bindings for the Bikal SoC which has
  been pulled out of the kernel"

* tag 'spi-fix-v7.0-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi:
  spi: stm32: fix missing pointer assignment in case of dma chaining
  spi: dt-bindings: snps,dw-abp-ssi: Remove unused bindings
2026-02-28 09:21:18 -08:00
Linus Torvalds
463e133751 Merge tag 'regulator-fix-v7.0-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator
Pull regulator fixes from Mark Brown:
 "A small pile of fixes, none of which are super major - the code fixes
  are improved error handling and fixing a leak of a device node.

  We also have a typo fix and an improvement to make the binding example
  for mt6359 more directly usable"

* tag 'regulator-fix-v7.0-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator:
  regulator: Kconfig: fix a typo
  regulator: bq257xx: Fix device node reference leak in bq257xx_reg_dt_parse_gpio()
  regulator: fp9931: Fix PM runtime reference leak in fp9931_hwmon_read()
  regulator: tps65185: check devm_kzalloc() result in probe
  regulator: dt-bindings: mt6359: make regulator names unique
2026-02-28 09:18:02 -08:00
Guenter Roeck
74badb9c20 dpaa2-switch: Fix interrupt storm after receiving bad if_id in IRQ handler
Commit 31a7a0bbeb ("dpaa2-switch: add bounds check for if_id in IRQ
handler") introduces a range check for if_id to avoid an out-of-bounds
access. If an out-of-bounds if_id is detected, the interrupt status is
not cleared. This may result in an interrupt storm.

Clear the interrupt status after detecting an out-of-bounds if_id to avoid
the problem.

Found by an experimental AI code review agent at Google.

Fixes: 31a7a0bbeb ("dpaa2-switch: add bounds check for if_id in IRQ handler")
Cc: Junrui Luo <moonafterrain@outlook.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Link: https://patch.msgid.link/20260227055812.1777915-1-linux@roeck-us.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-28 09:01:41 -08:00
Linus Torvalds
201795a1b7 Merge tag 's390-7.0-3' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
Pull s390 fixes from Vasily Gorbik:

 - Fix guest pfault init to pass a physical address to DIAG 0x258,
   restoring pfault interrupts and avoiding vCPU stalls during host
   page-in

 - Fix kexec/kdump hangs with stack protector by marking
   s390_reset_system() __no_stack_protector; set_prefix(0) switches
   lowcore and the canary no longer matches

 - Fix idle/vtime cputime accounting (idle-exit ordering, vtimer
   double-forwarding) and small cleanups

* tag 's390-7.0-3' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux:
  s390/pfault: Fix virtual vs physical address confusion
  s390/kexec: Disable stack protector in s390_reset_system()
  s390/idle: Remove psw_idle() prototype
  s390/vtime: Use lockdep_assert_irqs_disabled() instead of BUG_ON()
  s390/vtime: Use __this_cpu_read() / get rid of READ_ONCE()
  s390/irq/idle: Remove psw bits early
  s390/idle: Inline update_timer_idle()
  s390/idle: Slightly optimize idle time accounting
  s390/idle: Add comment for non obvious code
  s390/vtime: Fix virtual timer forwarding
  s390/idle: Fix cpu idle exit cpu time accounting
2026-02-28 09:01:33 -08:00
Jakub Kicinski
0eb5965b29 Merge branch 'xsk-fixes-for-af_xdp-fragment-handling'
Nikhil P. Rao says:

====================
xsk: Fixes for AF_XDP fragment handling

This series fixes two issues in AF_XDP zero-copy fragment handling:

Patch 1 fixes a buffer leak caused by incorrect list node handling after
commit b692bf9a75. The list_node field is now reused for both the xskb
pool list and the buffer free list. Using list_del() instead of
list_del_init() causes list_empty() checks in xp_free() to fail, preventing
buffers from being added to the free list.

Patch 2 fixes partial packet delivery to userspace. In the zero-copy path,
if the Rx queue fills up while enqueuing fragments, the remaining fragments
are dropped, causing the application to receive incomplete packets. The fix
ensures the Rx queue has sufficient space for all fragments before starting
to enqueue them.

[1] https://lore.kernel.org/oe-kbuild-all/202602051720.YfZO23pZ-lkp@intel.com/
[2] https://lore.kernel.org/oe-kbuild-all/202602172046.vf9DtpdF-lkp@intel.com/
====================

Link: https://patch.msgid.link/20260225000456.107806-1-nikhil.rao@amd.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-28 08:55:15 -08:00
Nikhil P. Rao
f7387d6579 xsk: Fix zero-copy AF_XDP fragment drop
AF_XDP should ensure that only a complete packet is sent to application.
In the zero-copy case, if the Rx queue gets full as fragments are being
enqueued, the remaining fragments are dropped.

For the multi-buffer case, add a check to ensure that the Rx queue has
enough space for all fragments of a packet before starting to enqueue
them.

Fixes: 24ea50127e ("xsk: support mbuf on ZC RX")
Signed-off-by: Nikhil P. Rao <nikhil.rao@amd.com>
Link: https://patch.msgid.link/20260225000456.107806-3-nikhil.rao@amd.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-28 08:55:11 -08:00
Nikhil P. Rao
60abb0ac11 xsk: Fix fragment node deletion to prevent buffer leak
After commit b692bf9a75 ("xsk: Get rid of xdp_buff_xsk::xskb_list_node"),
the list_node field is reused for both the xskb pool list and the buffer
free list, this causes a buffer leak as described below.

xp_free() checks if a buffer is already on the free list using
list_empty(&xskb->list_node). When list_del() is used to remove a node
from the xskb pool list, it doesn't reinitialize the node pointers.
This means list_empty() will return false even after the node has been
removed, causing xp_free() to incorrectly skip adding the buffer to the
free list.

Fix this by using list_del_init() instead of list_del() in all fragment
handling paths, this ensures the list node is reinitialized after removal,
allowing the list_empty() to work correctly.

Fixes: b692bf9a75 ("xsk: Get rid of xdp_buff_xsk::xskb_list_node")
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Nikhil P. Rao <nikhil.rao@amd.com>
Link: https://patch.msgid.link/20260225000456.107806-2-nikhil.rao@amd.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-28 08:55:11 -08:00
Jakub Kicinski
6df0022b6c Merge branch '200GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue
Tony Nguyen says:

====================
Intel Wired LAN Driver Updates 2026-02-19 (idpf, ice, i40e, ixgbevf, e1000e)

For idpf:
Li Li moves the check for software marker to occur after incrementing
next to clean to avoid re-encountering the same packet. He also adds a
couple of checks to prevent NULL pointer dereferences and NULLs rss_key,
after free, in error path so that later checks are properly evaluated.

Brian Vazquez adjusts IRQ naming to have correlation with netdev naming.

Sreedevi removes validation of action type as part of ntuple rule
deletion.

For ice:
Aaron Ma breaks RDMA initialization into two steps and adjusts calls so
that VSIs are entirely configured before plugging.

Michal Schmidt fixes initialization of loopback VSI to have proper
resources allocated to allow for loopback testing to occur.

For i40e:
Thomas Gleixner fixes a leak of preempt count by replacing get_cpu()
with smp_processor_id().

For ixgbevf:
Jedrzej adds a check for mailbox version before attempting to call an
associated link state call that is supported in that mailbox version.

For e1000e:
Vitaly clears power gating feature for Panther Lake systems to avoid
packet issues.

* '200GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue:
  e1000e: clear DPG_EN after reset to avoid autonomous power-gating
  e1000e: introduce new board type for Panther Lake PCH
  ixgbevf: fix link setup issue
  i40e: Fix preempt count leak in napi poll tracepoint
  ice: fix crash in ethtool offline loopback test
  ice: recap the VSI and QoS info after rebuild
  idpf: Fix flow rule delete failure due to invalid validation
  idpf: change IRQ naming to match netdev and ethtool queue numbering
  idpf: nullify pointers after they are freed
  idpf: skip deallocating txq group's txqs if it is NULL
  idpf: skip deallocating bufq_sets from rx_qgrp if it is NULL
  idpf: increment completion queue next_to_clean in sw marker wait routine

====================

Link: https://patch.msgid.link/20260225211546.1949260-1-anthony.l.nguyen@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-28 08:43:56 -08:00
Jakub Kicinski
1cc93c48b5 selftests/net: packetdrill: remove tests for tcp_rcv_*big
Since commit 1d2fbaad7c ("tcp: stronger sk_rcvbuf checks")
has been reverted we need to remove the corresponding tests.

Link: https://lore.kernel.org/20260227003359.2391017-1-kuba@kernel.org
Link: https://patch.msgid.link/20260227033446.2596457-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-28 07:55:52 -08:00
Jakub Kicinski
026dfef287 tcp: give up on stronger sk_rcvbuf checks (for now)
We hit another corner case which leads to TcpExtTCPRcvQDrop

Connections which send RPCs in the 20-80kB range over loopback
experience spurious drops. The exact conditions for most of
the drops I investigated are that:
 - socket exchanged >1MB of data so its not completely fresh
 - rcvbuf is around 128kB (default, hasn't grown)
 - there is ~60kB of data in rcvq
 - skb > 64kB arrives

The sum of skb->len (!) of both of the skbs (the one already
in rcvq and the arriving one) is larger than rwnd.
My suspicion is that this happens because __tcp_select_window()
rounds the rwnd up to (1 << wscale) if less than half of
the rwnd has been consumed.

Eric suggests that given the number of Fixes we already have
pointing to 1d2fbaad7c it's probably time to give up on it,
until a bigger revamp of rmem management.

Also while we could risk tweaking the rwnd math, there are other
drops on workloads I investigated, after the commit in question,
not explained by this phenomenon.

Suggested-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/20260225122355.585fd57b@kernel.org
Fixes: 1d2fbaad7c ("tcp: stronger sk_rcvbuf checks")
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20260227003359.2391017-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-28 07:55:39 -08:00
Kuniyuki Iwashima
6996a2d2d0 udp: Unhash auto-bound connected sk from 4-tuple hash table when disconnected.
Let's say we bind() an UDP socket to the wildcard address with a
non-zero port, connect() it to an address, and disconnect it from
the address.

bind() sets SOCK_BINDPORT_LOCK on sk->sk_userlocks (but not
SOCK_BINDADDR_LOCK), and connect() calls udp_lib_hash4() to put
the socket into the 4-tuple hash table.

Then, __udp_disconnect() calls sk->sk_prot->rehash(sk).

It computes a new hash based on the wildcard address and moves
the socket to a new slot in the 4-tuple hash table, leaving a
garbage in the chain that no packet hits.

Let's remove such a socket from 4-tuple hash table when disconnected.

Note that udp_sk(sk)->udp_portaddr_hash needs to be udpated after
udp_hash4_dec(hslot2) in udp_unhash4().

Fixes: 78c91ae2c6 ("ipv4/udp: Add 4-tuple hash for connected socket")
Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20260227035547.3321327-1-kuniyu@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-28 07:46:24 -08:00
Paolo Bonzini
55365ab85a Merge tag 'kvmarm-fixes-7.0-1' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/arm64 fixes for 7.0, take #1

- Make sure we don't leak any S1POE state from guest to guest when
  the feature is supported on the HW, but not enabled on the host

- Propagate the ID registers from the host into non-protected VMs
  managed by pKVM, ensuring that the guest sees the intended feature set

- Drop double kern_hyp_va() from unpin_host_sve_state(), which could
  bite us if we were to change kern_hyp_va() to not being idempotent

- Don't leak stage-2 mappings in protected mode

- Correctly align the faulting address when dealing with single page
  stage-2 mappings for PAGE_SIZE > 4kB

- Fix detection of virtualisation-capable GICv5 IRS, due to the
  maintainer being obviously fat fingered...

- Remove duplication of code retrieving the ASID for the purpose of
  S1 PT handling

- Fix slightly abusive const-ification in vgic_set_kvm_info()
2026-02-28 15:33:34 +01:00
Paolo Bonzini
70295a479d KVM: always define KVM_CAP_SYNC_MMU
KVM_CAP_SYNC_MMU is provided by KVM's MMU notifiers, which are now always
available.  Move the definition from individual architectures to common
code.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2026-02-28 15:31:35 +01:00
Paolo Bonzini
407fd8b8d8 KVM: remove CONFIG_KVM_GENERIC_MMU_NOTIFIER
All architectures now use MMU notifier for KVM page table management.
Remove the Kconfig symbol and the code that is used when it is
disabled.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2026-02-28 15:31:35 +01:00
Jens Axboe
0ed2e8bf61 io_uring: correct comment for IORING_SETUP_TASKRUN_FLAG
Sync with a recent liburing fix, which corrects the comment explaining
when the IORING_SETUP_TASKRUN_FLAG setup flag is valid to use. May be
use with COOP_TASKRUN or DEFER_TASKRUN, not useful without either of
this task_work mechanisms being used.

Link: https://github.com/axboe/liburing/pull/1543
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2026-02-28 04:56:20 -07:00
Takashi Iwai
d4d5633d8b ALSA: firewire: dice: Fix printf warning with W=1
The use of snprintf() may cause a warning with W=1 due to the possibly
truncated string.  As the truncation doesn't really matter (and won't
happen practically) in the case of dice driver, just shut it up by
replacing with scnprintf().

Link: https://patch.msgid.link/20260227155705.1557224-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-02-28 09:31:13 +01:00
Shenghao Ding
05ac3846ff ALSA: hda/tas2781: A workaround solution to lower-vol issue among lower calibrated-impedance micro-speaker on TAS2781
On TAS2781, if the Speaker calibrated impedance is lower than default
value hard-coded inside the TAS2781, it will cuase vol lower than
normal. In order to fix this issue, the parameter of SineGainI need
updating.

Signed-off-by: Shenghao Ding <shenghao-ding@ti.com>
Tested-by: Matthew Schwartz <matthew.schwartz@linux.dev>
Link: https://patch.msgid.link/20260227144641.1243-1-shenghao-ding@ti.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-02-28 09:28:06 +01:00
Zhang Heng
068641bc9d ALSA: hda/realtek: Add quirk for HP Pavilion 15-eh1xxx to enable mute LED
The HP Pavilion 15-eh1xxx series uses the HP mainboard 88D1 with ALC245
and needs the ALC245_FIXUP_HP_MUTE_LED_V1_COEFBIT quirk to make the
mute led working.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=215978
Cc: <stable@vger.kernel.org>
Signed-off-by: Zhang Heng <zhangheng@kylinos.cn>
Link: https://patch.msgid.link/20260227121327.3751341-1-zhangheng@kylinos.cn
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-02-28 09:27:28 +01:00
Lianqin Hu
bd72a37b21 ALSA: usb-audio: Add iface reset and delay quirk for AB13X USB Audio
Setting up the interface when suspended/resumeing fail on this card.
Adding a reset and delay quirk will eliminate this problem.

usb 1-1: New USB device found, idVendor=0624, idProduct=3d3f
usb 1-1: New USB device strings: Mfr=1, Product=2, SerialNumber=3
usb 1-1: Product: AB13X USB Audio
usb 1-1: Manufacturer: Generic
usb 1-1: SerialNumber: 20210726905926

Signed-off-by: Lianqin Hu <hulianqin@vivo.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Link: https://patch.msgid.link/TYUPR06MB621795D087BF2D594027C235D273A@TYUPR06MB6217.apcprd06.prod.outlook.com
2026-02-28 09:27:02 +01:00
Matt DeVillier
778031e165 ALSA: hda/ca0132: Set HP/Speaker auto-detect default from headphone pin verb
HP/Speaker auto-detect (VNID_HP_ASEL) has been off by default for every
CA0132 device since the driver was added in 2012. vnode_lswitch is
always initialized to 0 in ca0132_init_chip(), and no quirk or other
code path enables it. As a result, headphone jack detection works only
after the user manually turns on "HP/Speaker Auto Detect" in alsamixer,
which is not obvious on laptops with combo jacks (e.g. Google Link,
Alienware).

Change the default to follow the headphone pin config: if the pin verb
has presence detect enabled (no AC_DEFCFG_MISC_NO_PRESENCE) and the
codec supports it (AC_PINCAP_PRES_DETECT), enable HP_ASEL by default.
This lets firmware (coreboot, UEFI, etc.) express whether the headphone
jack supports insertion detection. Devices with combo jacks can default
to auto-detect; devices with fixed/no jack leave it off.

Signed-off-by: Matt DeVillier <matt.devillier@gmail.com>
Link: https://patch.msgid.link/20260226163055.825167-1-matt.devillier@gmail.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-02-28 09:24:21 +01:00
Lizhi Hou
89ff45359a accel/amdxdna: Fill invalid payload for failed command
Newer userspace applications may read the payload of a failed command
to obtain detailed error information. However, the driver and old firmware
versions may not support returning advanced error information.
In this case, initialize the command payload with an invalid value so
userspace can detect that no detailed error information is available.

Fixes: aac243092b ("accel/amdxdna: Add command execution")
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Link: https://patch.msgid.link/20260227004841.3080241-1-lizhi.hou@amd.com
2026-02-27 23:01:36 -08:00
Thorsten Blum
d240b079a3 crypto: atmel-sha204a - Fix OOM ->tfm_count leak
If memory allocation fails, decrement ->tfm_count to avoid blocking
future reads.

Cc: stable@vger.kernel.org
Fixes: da001fb651 ("crypto: atmel-i2c - add support for SHA204A random number generator")
Signed-off-by: Thorsten Blum <thorsten.blum@linux.dev>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2026-02-28 12:53:25 +09:00
Alper Ak
889b0e2721 crypto: ccp - Fix use-after-free on error path
In the error path of sev_tsm_init_locked(), the code dereferences 't'
after it has been freed with kfree(). The pr_err() statement attempts
to access t->tio_en and t->tio_init_done after the memory has been
released.

Move the pr_err() call before kfree(t) to access the fields while the
memory is still valid.

This issue reported by Smatch static analyser

Fixes:4be423572da1 ("crypto/ccp: Implement SEV-TIO PCIe IDE (phase1)")
Signed-off-by: Alper Ak <alperyasinak1@gmail.com>
Acked-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2026-02-28 12:51:58 +09:00
Ashish Kalra
8168a7b72b crypto: ccp - allow callers to use HV-Fixed page API when SEV is disabled
When SEV is disabled, the HV-Fixed page allocation call fails, which in
turn causes SFS initialization to fail.

Fix the HV-Fixed API so callers (for example, SFS) can use it even when
SEV is disabled by performing normal page allocation and freeing.

Fixes: e09701dcdd ("crypto: ccp - Add new HV-Fixed page allocation/free API")
Cc: stable@vger.kernel.org
Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2026-02-28 12:51:58 +09:00
Long Li
dabffd0854 net: mana: Ring doorbell at 4 CQ wraparounds
MANA hardware requires at least one doorbell ring every 8 wraparounds
of the CQ. The driver rings the doorbell as a form of flow control to
inform hardware that CQEs have been consumed.

The NAPI poll functions mana_poll_tx_cq() and mana_poll_rx_cq() can
poll up to CQE_POLLING_BUFFER (512) completions per call. If the CQ
has fewer than 512 entries, a single poll call can process more than
4 wraparounds without ringing the doorbell. The doorbell threshold
check also uses ">" instead of ">=", delaying the ring by one extra
CQE beyond 4 wraparounds. Combined, these issues can cause the driver
to exceed the 8-wraparound hardware limit, leading to missed
completions and stalled queues.

Fix this by capping the number of CQEs polled per call to 4 wraparounds
of the CQ in both TX and RX paths. Also change the doorbell threshold
from ">" to ">=" so the doorbell is rung as soon as 4 wraparounds are
reached.

Cc: stable@vger.kernel.org
Fixes: 58a63729c9 ("net: mana: Fix doorbell out of order violation and avoid unnecessary doorbell rings")
Signed-off-by: Long Li <longli@microsoft.com>
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
Reviewed-by: Vadim Fedorenko <vadim.fedorenko@linux.dev>
Link: https://patch.msgid.link/20260226192833.1050807-1-longli@microsoft.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-27 19:29:38 -08:00
Valentin Spreckels
15fba71533 net: usb: r8152: add TRENDnet TUC-ET2G
The TRENDnet TUC-ET2G is a RTL8156 based usb ethernet adapter. Add its
vendor and product IDs.

Signed-off-by: Valentin Spreckels <valentin@spreckels.dev>
Link: https://patch.msgid.link/20260226195409.7891-2-valentin@spreckels.dev
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-27 19:27:31 -08:00
Victor Nogueira
b14e82abf7 selftests/tc-testing: Create tests to exercise act_ct binding restrictions
Add 4 test cases to exercise new act_ct binding restrictions:

- Try to attach act_ct to an ets qdisc
- Attach act_ct to an ingress qdisc
- Attach act_ct to a clsact/egress qdisc
- Attach act_ct to a shared block

Signed-off-by: Victor Nogueira <victor@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://patch.msgid.link/20260225134349.1287037-2-victor@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-27 19:06:21 -08:00
Victor Nogueira
11cb63b0d1 net/sched: Only allow act_ct to bind to clsact/ingress qdiscs and shared blocks
As Paolo said earlier [1]:

"Since the blamed commit below, classify can return TC_ACT_CONSUMED while
the current skb being held by the defragmentation engine. As reported by
GangMin Kim, if such packet is that may cause a UaF when the defrag engine
later on tries to tuch again such packet."

act_ct was never meant to be used in the egress path, however some users
are attaching it to egress today [2]. Attempting to reach a middle
ground, we noticed that, while most qdiscs are not handling
TC_ACT_CONSUMED, clsact/ingress qdiscs are. With that in mind, we
address the issue by only allowing act_ct to bind to clsact/ingress
qdiscs and shared blocks. That way it's still possible to attach act_ct to
egress (albeit only with clsact).

[1] https://lore.kernel.org/netdev/674b8cbfc385c6f37fb29a1de08d8fe5c2b0fbee.1771321118.git.pabeni@redhat.com/
[2] https://lore.kernel.org/netdev/cc6bfb4a-4a2b-42d8-b9ce-7ef6644fb22b@ovn.org/

Reported-by: GangMin Kim <km.kim1503@gmail.com>
Fixes: 3f14b377d0 ("net/sched: act_ct: fix skb leak and crash on ooo frags")
CC: stable@vger.kernel.org
Signed-off-by: Victor Nogueira <victor@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://patch.msgid.link/20260225134349.1287037-1-victor@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-27 19:06:21 -08:00
Florian Westphal
ba14798653 selftests: netfilter: nft_queue.sh: avoid flakes on debug kernels
Jakub reports test flakes on debug kernels:
 FAIL: test_udp_gro_ct: Expected software segmentation to occur, had 23 and 17

This test assumes that the kernels nfnetlink_queue module sees N GSO
packets, segments them into M skbs and queues them to userspace for
reinjection.

Hence, if M >= N, no segmentation occurred.

However, its possible that this happens:
- nfnetlink_queue gets GSO packet
- segments that into n skbs
- userspace buffer is full, kernel drops the segmented skbs

-> "toqueue" counter incremented by 1, "fromqueue" is unchanged.

If this happens often enough in a single run, M >= N check triggers
incorrectly.

To solve this, allow the nf_queue.c test program to set the FAIL_OPEN
flag so that the segmented skbs bypass the queueing step in the kernel
if the receive buffer is full.

Also, reduce number of sending socat instances, decrease their priority
and increase nice value for the nf_queue program itself to reduce the
probability of overruns happening in the first place.

Fixes: 59ecffa399 ("selftests: netfilter: nft_queue.sh: add udp fraglist gro test case")
Reported-by: Jakub Kicinski <kuba@kernel.org>
Closes: https://lore.kernel.org/netdev/20260218184114.0b405b72@kernel.org/
Signed-off-by: Florian Westphal <fw@strlen.de>
Link: https://patch.msgid.link/20260226161920.1205-1-fw@strlen.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-27 18:36:59 -08:00
Jakub Kicinski
71347b9d8c Merge branch 'net-sched-sch_cake-fixes-for-cake_mq'
Jonas Köppeler says:

====================
net/sched: sch_cake: fixes for cake_mq

This patch contains two fixes for cake_mq:
- do not sync when bandwidth is unlimited
- adjust the rates for all tins during sync
====================

Link: https://patch.msgid.link/20260226-cake-mq-skip-sync-bandwidth-unlimited-v1-0-01830bb4db87@tu-berlin.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-27 18:35:42 -08:00
Jonas Köppeler
15c2715a52 net/sched: sch_cake: fixup cake_mq rate adjustment for diffserv config
cake_mq's rate adjustment during the sync periods did not adjust the
rates for every tin in a diffserv config. This lead to inconsistencies
of rates between the tins. Fix this by setting the rates for all tins
during synchronization.

Fixes: 1bddd758ba ("net/sched: sch_cake: share shaper state across sub-instances of cake_mq")
Signed-off-by: Jonas Köppeler <j.koeppeler@tu-berlin.de>
Acked-by: Toke Høiland-Jørgensen <toke@toke.dk>
Link: https://patch.msgid.link/20260226-cake-mq-skip-sync-bandwidth-unlimited-v1-2-01830bb4db87@tu-berlin.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-27 18:35:40 -08:00
Jonas Köppeler
0b3cd139be net/sched: sch_cake: avoid sync overhead when unlimited
Skip inter-instance sync when no rate limit is configured, as it serves
no purpose and only adds overhead.

Fixes: 1bddd758ba ("net/sched: sch_cake: share shaper state across sub-instances of cake_mq")
Signed-off-by: Jonas Köppeler <j.koeppeler@tu-berlin.de>
Acked-by: Toke Høiland-Jørgensen <toke@toke.dk>
Link: https://patch.msgid.link/20260226-cake-mq-skip-sync-bandwidth-unlimited-v1-1-01830bb4db87@tu-berlin.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-27 18:35:40 -08:00
Eric Dumazet
29252397bc inet: annotate data-races around isk->inet_num
UDP/TCP lookups are using RCU, thus isk->inet_num accesses
should use READ_ONCE() and WRITE_ONCE() where needed.

Fixes: 3ab5aee7fe ("net: Convert TCP & DCCP hash tables to use RCU / hlist_nulls")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20260225203545.1512417-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-27 17:16:59 -08:00
Alexei Starovoitov
b9c0a5c483 Merge branch 'fix-invariant-violation-for-single-value-tnums'
Paul Chaignon says:

====================
Fix invariant violation for single-value tnums

We're hitting an invariant violation in Cilium that sometimes leads to
BPF programs being rejected and Cilium failing to start [1]. As far as
I know this is the first case of invariant violation found in a real
program (i.e., not by a fuzzer). The following extract from verifier
logs shows what's happening:

  from 201 to 236: R1=0 R6=ctx() R7=1 R9=scalar(smin=umin=smin32=umin32=3584,smax=umax=smax32=umax32=3840,var_off=(0xe00; 0x100)) R10=fp0
  236: R1=0 R6=ctx() R7=1 R9=scalar(smin=umin=smin32=umin32=3584,smax=umax=smax32=umax32=3840,var_off=(0xe00; 0x100)) R10=fp0
  ; if (magic == MARK_MAGIC_HOST || magic == MARK_MAGIC_OVERLAY || magic == MARK_MAGIC_ENCRYPT) @ bpf_host.c:1337
  236: (16) if w9 == 0xe00 goto pc+45   ; R9=scalar(smin=umin=smin32=umin32=3585,smax=umax=smax32=umax32=3840,var_off=(0xe00; 0x100))
  237: (16) if w9 == 0xf00 goto pc+1
  verifier bug: REG INVARIANTS VIOLATION (false_reg1): range bounds violation u64=[0xe01, 0xe00] s64=[0xe01, 0xe00] u32=[0xe01, 0xe00] s32=[0xe01, 0xe00] var_off=(0xe00, 0x0)

More details are given in the second patch, but in short, the verifier
should be able to detect that the false branch of instruction 237 is
never true. After instruction 236, the u64 range and the tnum overlap
in a single value, 0xf00.

The long-term solution to invariant violation is likely to rely on the
refinement + invariant violation check to detect dead branches, as
started by Eduard. To fix the current issue, we need something with
less refactoring that we can backport to affected kernels.

The solution implemented in the second patch is to improve the bounds
refinement to avoid this case. It relies on a new tnum helper,
tnum_step, first sent as an RFC in [2]. The last two patches extend and
update the selftests.

Link: https://github.com/cilium/cilium/issues/44216 [1]
Link: https://lore.kernel.org/bpf/20251107192328.2190680-2-harishankar.vishwanathan@gmail.com/ [2]

Changes in v3:
  - Fix commit description error spotted by AI bot.
  - Simplify constants in first two tests (Eduard).
  - Rework comment on third test (Eduard).
  - Add two new negative test cases (Eduard).
  - Rebased.
Changes in v2:
  - Add guard suggested by Hari in tnum_step, to avoid undefined
    behavior spotted by AI code review.
  - Add explanation diagrams in code as suggested by Eduard.
  - Rework conditions for readability as suggested by Eduard.
  - Updated reference to SMT formula.
  - Rebased.
====================

Link: https://patch.msgid.link/cover.1772225741.git.paul.chaignon@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-02-27 16:11:50 -08:00
Paul Chaignon
024cea2d64 selftests/bpf: Avoid simplification of crafted bounds test
The reg_bounds_crafted tests validate the verifier's range analysis
logic. They focus on the actual ranges and thus ignore the tnum. As a
consequence, they carry the assumption that the tested cases can be
reproduced in userspace without using the tnum information.

Unfortunately, the previous change the refinement logic breaks that
assumption for one test case:

  (u64)2147483648 (u32)<op> [4294967294; 0x100000000]

The tested bytecode is shown below. Without our previous improvement, on
the false branch of the condition, R7 is only known to have u64 range
[0xfffffffe; 0x100000000]. With our improvement, and using the tnum
information, we can deduce that R7 equals 0x100000000.

  19: (bc) w0 = w6                ; R6=0x80000000
  20: (bc) w0 = w7                ; R7=scalar(smin=umin=0xfffffffe,smax=umax=0x100000000,smin32=-2,smax32=0,var_off=(0x0; 0x1ffffffff))
  21: (be) if w6 <= w7 goto pc+3  ; R6=0x80000000 R7=0x100000000

R7's tnum is (0; 0x1ffffffff). On the false branch, regs_refine_cond_op
refines R7's u32 range to [0; 0x7fffffff]. Then, __reg32_deduce_bounds
refines the s32 range to 0 using u32 and finally also sets u32=0.
From this, __reg_bound_offset improves the tnum to (0; 0x100000000).
Finally, our previous patch uses this new tnum to deduce that it only
intersect with u64=[0xfffffffe; 0x100000000] in a single value:
0x100000000.

Because the verifier uses the tnum to reach this constant value, the
selftest is unable to reproduce it by only simulating ranges. The
solution implemented in this patch is to change the test case such that
there is more than one overlap value between u64 and the tnum. The max.
u64 value is thus changed from 0x100000000 to 0x300000000.

Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Paul Chaignon <paul.chaignon@gmail.com>
Link: https://lore.kernel.org/r/50641c6a7ef39520595dcafa605692427c1006ec.1772225741.git.paul.chaignon@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-02-27 16:11:50 -08:00
Paul Chaignon
e6ad477d1b selftests/bpf: Test refinement of single-value tnum
This patch introduces selftests to cover the new bounds refinement
logic introduced in the previous patch. Without the previous patch,
the first two tests fail because of the invariant violation they
trigger. The last test fails because the R10 access is not detected as
dead code. In addition, all three tests fail because of R0 having a
non-constant value in the verifier logs.

In addition, the last two cases are covering the negative cases: when we
shouldn't refine the bounds because the u64 and tnum overlap in at least
two values.

Signed-off-by: Paul Chaignon <paul.chaignon@gmail.com>
Link: https://lore.kernel.org/r/90d880c8cf587b9f7dc715d8961cd1b8111d01a8.1772225741.git.paul.chaignon@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-02-27 16:11:50 -08:00
Paul Chaignon
efc11a6678 bpf: Improve bounds when tnum has a single possible value
We're hitting an invariant violation in Cilium that sometimes leads to
BPF programs being rejected and Cilium failing to start [1]. The
following extract from verifier logs shows what's happening:

  from 201 to 236: R1=0 R6=ctx() R7=1 R9=scalar(smin=umin=smin32=umin32=3584,smax=umax=smax32=umax32=3840,var_off=(0xe00; 0x100)) R10=fp0
  236: R1=0 R6=ctx() R7=1 R9=scalar(smin=umin=smin32=umin32=3584,smax=umax=smax32=umax32=3840,var_off=(0xe00; 0x100)) R10=fp0
  ; if (magic == MARK_MAGIC_HOST || magic == MARK_MAGIC_OVERLAY || magic == MARK_MAGIC_ENCRYPT) @ bpf_host.c:1337
  236: (16) if w9 == 0xe00 goto pc+45   ; R9=scalar(smin=umin=smin32=umin32=3585,smax=umax=smax32=umax32=3840,var_off=(0xe00; 0x100))
  237: (16) if w9 == 0xf00 goto pc+1
  verifier bug: REG INVARIANTS VIOLATION (false_reg1): range bounds violation u64=[0xe01, 0xe00] s64=[0xe01, 0xe00] u32=[0xe01, 0xe00] s32=[0xe01, 0xe00] var_off=(0xe00, 0x0)

We reach instruction 236 with two possible values for R9, 0xe00 and
0xf00. This is perfectly reflected in the tnum, but of course the ranges
are less accurate and cover [0xe00; 0xf00]. Taking the fallthrough path
at instruction 236 allows the verifier to reduce the range to
[0xe01; 0xf00]. The tnum is however not updated.

With these ranges, at instruction 237, the verifier is not able to
deduce that R9 is always equal to 0xf00. Hence the fallthrough pass is
explored first, the verifier refines the bounds using the assumption
that R9 != 0xf00, and ends up with an invariant violation.

This pattern of impossible branch + bounds refinement is common to all
invariant violations seen so far. The long-term solution is likely to
rely on the refinement + invariant violation check to detect dead
branches, as started by Eduard. To fix the current issue, we need
something with less refactoring that we can backport.

This patch uses the tnum_step helper introduced in the previous patch to
detect the above situation. In particular, three cases are now detected
in the bounds refinement:

1. The u64 range and the tnum only overlap in umin.
   u64:  ---[xxxxxx]-----
   tnum: --xx----------x-

2. The u64 range and the tnum only overlap in the maximum value
   represented by the tnum, called tmax.
   u64:  ---[xxxxxx]-----
   tnum: xx-----x--------

3. The u64 range and the tnum only overlap in between umin (excluded)
   and umax.
   u64:  ---[xxxxxx]-----
   tnum: xx----x-------x-

To detect these three cases, we call tnum_step(tnum, umin), which
returns the smallest member of the tnum greater than umin, called
tnum_next here. We're in case (1) if umin is part of the tnum and
tnum_next is greater than umax. We're in case (2) if umin is not part of
the tnum and tnum_next is equal to tmax. Finally, we're in case (3) if
umin is not part of the tnum, tnum_next is inferior or equal to umax,
and calling tnum_step a second time gives us a value past umax.

This change implements these three cases. With it, the above bytecode
looks as follows:

  0: (85) call bpf_get_prandom_u32#7    ; R0=scalar()
  1: (47) r0 |= 3584                    ; R0=scalar(smin=0x8000000000000e00,umin=umin32=3584,smin32=0x80000e00,var_off=(0xe00; 0xfffffffffffff1ff))
  2: (57) r0 &= 3840                    ; R0=scalar(smin=umin=smin32=umin32=3584,smax=umax=smax32=umax32=3840,var_off=(0xe00; 0x100))
  3: (15) if r0 == 0xe00 goto pc+2      ; R0=3840
  4: (15) if r0 == 0xf00 goto pc+1
  4: R0=3840
  6: (95) exit

In addition to the new selftests, this change was also verified with
Agni [3]. For the record, the raw SMT is available at [4]. The property
it verifies is that: If a concrete value x is contained in all input
abstract values, after __update_reg_bounds, it will continue to be
contained in all output abstract values.

Link: https://github.com/cilium/cilium/issues/44216 [1]
Link: https://pchaigno.github.io/test-verifier-complexity.html [2]
Link: https://github.com/bpfverif/agni [3]
Link: https://pastebin.com/raw/naCfaqNx [4]
Fixes: 0df1a55afa ("bpf: Warn on internal verifier errors")
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Tested-by: Marco Schirrmeister <mschirrmeister@gmail.com>
Co-developed-by: Harishankar Vishwanathan <harishankar.vishwanathan@gmail.com>
Signed-off-by: Harishankar Vishwanathan <harishankar.vishwanathan@gmail.com>
Signed-off-by: Paul Chaignon <paul.chaignon@gmail.com>
Link: https://lore.kernel.org/r/ef254c4f68be19bd393d450188946821c588565d.1772225741.git.paul.chaignon@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-02-27 16:11:50 -08:00
Harishankar Vishwanathan
76e954155b bpf: Introduce tnum_step to step through tnum's members
This commit introduces tnum_step(), a function that, when given t, and a
number z returns the smallest member of t larger than z. The number z
must be greater or equal to the smallest member of t and less than the
largest member of t.

The first step is to compute j, a number that keeps all of t's known
bits, and matches all unknown bits to z's bits. Since j is a member of
the t, it is already a candidate for result. However, we want our result
to be (minimally) greater than z.

There are only two possible cases:

(1) Case j <= z. In this case, we want to increase the value of j and
make it > z.
(2) Case j > z. In this case, we want to decrease the value of j while
keeping it > z.

(Case 1) j <= z

t = xx11x0x0
z = 10111101 (189)
j = 10111000 (184)
         ^
         k

(Case 1.1) Let's first consider the case where j < z. We will address j
== z later.

Since z > j, there had to be a bit position that was 1 in z and a 0 in
j, beyond which all positions of higher significance are equal in j and
z. Further, this position could not have been unknown in a, because the
unknown positions of a match z. This position had to be a 1 in z and
known 0 in t.

Let k be position of the most significant 1-to-0 flip. In our example, k
= 3 (starting the count at 1 at the least significant bit).  Setting (to
1) the unknown bits of t in positions of significance smaller than
k will not produce a result > z. Hence, we must set/unset the unknown
bits at positions of significance higher than k. Specifically, we look
for the next larger combination of 1s and 0s to place in those
positions, relative to the combination that exists in z. We can achieve
this by concatenating bits at unknown positions of t into an integer,
adding 1, and writing the bits of that result back into the
corresponding bit positions previously extracted from z.

>From our example, considering only positions of significance greater
than k:

t =  xx..x
z =  10..1
    +    1
     -----
     11..0

This is the exact combination 1s and 0s we need at the unknown bits of t
in positions of significance greater than k. Further, our result must
only increase the value minimally above z. Hence, unknown bits in
positions of significance smaller than k should remain 0. We finally
have,

result = 11110000 (240)

(Case 1.2) Now consider the case when j = z, for example

t = 1x1x0xxx
z = 10110100 (180)
j = 10110100 (180)

Matching the unknown bits of the t to the bits of z yielded exactly z.
To produce a number greater than z, we must set/unset the unknown bits
in t, and *all* the unknown bits of t candidates for being set/unset. We
can do this similar to Case 1.1, by adding 1 to the bits extracted from
the masked bit positions of z. Essentially, this case is equivalent to
Case 1.1, with k = 0.

t =  1x1x0xxx
z =  .0.1.100
    +       1
    ---------
     .0.1.101

This is the exact combination of bits needed in the unknown positions of
t. After recalling the known positions of t, we get

result = 10110101 (181)

(Case 2) j > z

t = x00010x1
z = 10000010 (130)
j = 10001011 (139)
	^
	k

Since j > z, there had to be a bit position which was 0 in z, and a 1 in
j, beyond which all positions of higher significance are equal in j and
z. This position had to be a 0 in z and known 1 in t. Let k be the
position of the most significant 0-to-1 flip. In our example, k = 4.

Because of the 0-to-1 flip at position k, a member of t can become
greater than z if the bits in positions greater than k are themselves >=
to z. To make that member *minimally* greater than z, the bits in
positions greater than k must be exactly = z. Hence, we simply match all
of t's unknown bits in positions more significant than k to z's bits. In
positions less significant than k, we set all t's unknown bits to 0
to retain minimality.

In our example, in positions of greater significance than k (=4),
t=x000. These positions are matched with z (1000) to produce 1000. In
positions of lower significance than k, t=10x1. All unknown bits are set
to 0 to produce 1001. The final result is:

result = 10001001 (137)

This concludes the computation for a result > z that is a member of t.

The procedure for tnum_step() in this commit implements the idea
described above. As a proof of correctness, we verified the algorithm
against a logical specification of tnum_step. The specification asserts
the following about the inputs t, z and output res that:

1. res is a member of t, and
2. res is strictly greater than z, and
3. there does not exist another value res2 such that
	3a. res2 is also a member of t, and
	3b. res2 is greater than z
	3c. res2 is smaller than res

We checked the implementation against this logical specification using
an SMT solver. The verification formula in SMTLIB format is available
at [1]. The verification returned an "unsat": indicating that no input
assignment exists for which the implementation and the specification
produce different outputs.

In addition, we also automatically generated the logical encoding of the
C implementation using Agni [2] and verified it against the same
specification. This verification also returned an "unsat", confirming
that the implementation is equivalent to the specification. The formula
for this check is also available at [3].

Link: https://pastebin.com/raw/2eRWbiit [1]
Link: https://github.com/bpfverif/agni [2]
Link: https://pastebin.com/raw/EztVbBJ2 [3]
Co-developed-by: Srinivas Narayana <srinivas.narayana@rutgers.edu>
Signed-off-by: Srinivas Narayana <srinivas.narayana@rutgers.edu>
Co-developed-by: Santosh Nagarakatte <santosh.nagarakatte@rutgers.edu>
Signed-off-by: Santosh Nagarakatte <santosh.nagarakatte@rutgers.edu>
Signed-off-by: Harishankar Vishwanathan <harishankar.vishwanathan@gmail.com>
Link: https://lore.kernel.org/r/93fdf71910411c0f19e282ba6d03b4c65f9c5d73.1772225741.git.paul.chaignon@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-02-27 16:11:50 -08:00
Paul Moses
62413a9c3c net/sched: act_gate: snapshot parameters with RCU on replace
The gate action can be replaced while the hrtimer callback or dump path is
walking the schedule list.

Convert the parameters to an RCU-protected snapshot and swap updates under
tcf_lock, freeing the previous snapshot via call_rcu(). When REPLACE omits
the entry list, preserve the existing schedule so the effective state is
unchanged.

Fixes: a51c328df3 ("net: qos: introduce a gate control flow action")
Cc: stable@vger.kernel.org
Signed-off-by: Paul Moses <p@1g4.org>
Tested-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Reviewed-by: Victor Nogueira <victor@mojatatu.com>
Link: https://patch.msgid.link/20260223150512.2251594-2-p@1g4.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-27 16:10:36 -08:00
Alexei Starovoitov
8c0d9e178d Merge branch 'bpf-cpumap-devmap-fix-per-cpu-bulk-queue-races-on-preempt_rt'
Jiayuan Chen says:

====================
bpf: Fix per-CPU bulk queue races on PREEMPT_RT

On PREEMPT_RT kernels, local_bh_disable() only calls migrate_disable()
(when PREEMPT_RT_NEEDS_BH_LOCK is not set) and does not disable
preemption. This means CFS scheduling can preempt a task inside the
per-CPU bulk queue (bq) operations in cpumap and devmap, allowing
another task on the same CPU to concurrently access the same bq,
leading to use-after-free, list corruption, and kernel panics.

Patch 1 fixes the cpumap race in bq_flush_to_queue(), originally
reported by syzbot [1].

Patch 2 fixes the same class of race in devmap's bq_xmit_all(),
identified by code inspection after Sebastian Andrzej Siewior pointed
out that devmap has the same per-CPU bulk queue pattern [2].

Both patches use local_lock_nested_bh() to serialize access to the
per-CPU bq. On non-RT this is a pure lockdep annotation with no
overhead; on PREEMPT_RT it provides a per-CPU sleeping lock.

To reproduce the devmap race, insert an mdelay(100) in bq_xmit_all()
after "cnt = bq->count" and before the actual transmit loop. Then pin
two threads to the same CPU, each running BPF_PROG_TEST_RUN with an XDP
program that redirects to a DEVMAP entry (e.g. a veth pair). CFS
timeslicing during the mdelay window causes interleaving. Without the
fix, KASAN reports null-ptr-deref due to operating on freed frames:

  BUG: KASAN: null-ptr-deref in __build_skb_around+0x22d/0x340
  Write of size 32 at addr 0000000000000d50 by task devmap_race_rep/449

  CPU: 0 UID: 0 PID: 449 Comm: devmap_race_rep Not tainted 6.19.0+ #31 PREEMPT_RT
  Call Trace:
   <TASK>
   __build_skb_around+0x22d/0x340
   build_skb_around+0x25/0x260
   __xdp_build_skb_from_frame+0x103/0x860
   veth_xdp_rcv_bulk_skb.isra.0+0x162/0x320
   veth_xdp_rcv.constprop.0+0x61e/0xbb0
   veth_poll+0x280/0xb50
   __napi_poll.constprop.0+0xa5/0x590
   net_rx_action+0x4b0/0xea0
   handle_softirqs.isra.0+0x1b3/0x780
   __local_bh_enable_ip+0x12a/0x240
   xdp_test_run_batch.constprop.0+0xedd/0x1f60
   bpf_test_run_xdp_live+0x304/0x640
   bpf_prog_test_run_xdp+0xd24/0x1b70
   __sys_bpf+0x61c/0x3e00
   </TASK>

  Kernel panic - not syncing: Fatal exception in interrupt

[1] https://lore.kernel.org/all/69369331.a70a0220.38f243.009d.GAE@google.com/T/
[2] https://lore.kernel.org/bpf/20260212023634.366343-1-jiayuan.chen@linux.dev/

v3 -> v4: https://lore.kernel.org/all/20260213034018.284146-1-jiayuan.chen@linux.dev/
- Move panic trace to cover letter. (Sebastian Andrzej Siewior)
- Add Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> to both patches
  from cover letter.

v2 -> v3: https://lore.kernel.org/bpf/20260212023634.366343-1-jiayuan.chen@linux.dev/
- Fix commit message: remove incorrect "spin_lock() becomes rt_mutex"
claim, the per-CPU bq has no spin_lock at all. (Sebastian Andrzej Siewior)
- Fix commit message: accurately describe local_lock_nested_bh()
behavior instead of referencing local_lock(). (Sebastian Andrzej Siewior)
- Remove incomplete discussion of snapshot alternative.
(Sebastian Andrzej Siewior)
- Remove panic trace from commit message. (Sebastian Andrzej Siewior)
- Add patch 2/2 for devmap, same race pattern. (Sebastian Andrzej Siewior)

v1 -> v2: https://lore.kernel.org/bpf/20260211064417.196401-1-jiayuan.chen@linux.dev/
- Use local_lock_nested_bh()/local_unlock_nested_bh() instead of
local_lock()/local_unlock(), since these paths already run under
local_bh_disable(). (Sebastian Andrzej Siewior)
- Replace "Caller must hold bq->bq_lock" comment with
lockdep_assert_held() in bq_flush_to_queue(). (Sebastian Andrzej Siewior)
- Fix Fixes tag to 3253cb49cb ("softirq: Allow to drop the
softirq-BKL lock on PREEMPT_RT") which is the actual commit that
makes the race possible. (Sebastian Andrzej Siewior)
====================

Link: https://patch.msgid.link/20260225121459.183121-1-jiayuan.chen@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-02-27 16:08:34 -08:00
Jiayuan Chen
1872e75375 bpf: Fix race in devmap on PREEMPT_RT
On PREEMPT_RT kernels, the per-CPU xdp_dev_bulk_queue (bq) can be
accessed concurrently by multiple preemptible tasks on the same CPU.

The original code assumes bq_enqueue() and __dev_flush() run atomically
with respect to each other on the same CPU, relying on
local_bh_disable() to prevent preemption. However, on PREEMPT_RT,
local_bh_disable() only calls migrate_disable() (when
PREEMPT_RT_NEEDS_BH_LOCK is not set) and does not disable
preemption, which allows CFS scheduling to preempt a task during
bq_xmit_all(), enabling another task on the same CPU to enter
bq_enqueue() and operate on the same per-CPU bq concurrently.

This leads to several races:

1. Double-free / use-after-free on bq->q[]: bq_xmit_all() snapshots
   cnt = bq->count, then iterates bq->q[0..cnt-1] to transmit frames.
   If preempted after the snapshot, a second task can call bq_enqueue()
   -> bq_xmit_all() on the same bq, transmitting (and freeing) the
   same frames. When the first task resumes, it operates on stale
   pointers in bq->q[], causing use-after-free.

2. bq->count and bq->q[] corruption: concurrent bq_enqueue() modifying
   bq->count and bq->q[] while bq_xmit_all() is reading them.

3. dev_rx/xdp_prog teardown race: __dev_flush() clears bq->dev_rx and
   bq->xdp_prog after bq_xmit_all(). If preempted between
   bq_xmit_all() return and bq->dev_rx = NULL, a preempting
   bq_enqueue() sees dev_rx still set (non-NULL), skips adding bq to
   the flush_list, and enqueues a frame. When __dev_flush() resumes,
   it clears dev_rx and removes bq from the flush_list, orphaning the
   newly enqueued frame.

4. __list_del_clearprev() on flush_node: similar to the cpumap race,
   both tasks can call __list_del_clearprev() on the same flush_node,
   the second dereferences the prev pointer already set to NULL.

The race between task A (__dev_flush -> bq_xmit_all) and task B
(bq_enqueue -> bq_xmit_all) on the same CPU:

  Task A (xdp_do_flush)          Task B (ndo_xdp_xmit redirect)
  ----------------------         --------------------------------
  __dev_flush(flush_list)
    bq_xmit_all(bq)
      cnt = bq->count  /* e.g. 16 */
      /* start iterating bq->q[] */
    <-- CFS preempts Task A -->
                                   bq_enqueue(dev, xdpf)
                                     bq->count == DEV_MAP_BULK_SIZE
                                     bq_xmit_all(bq, 0)
                                       cnt = bq->count  /* same 16! */
                                       ndo_xdp_xmit(bq->q[])
                                       /* frames freed by driver */
                                       bq->count = 0
    <-- Task A resumes -->
      ndo_xdp_xmit(bq->q[])
      /* use-after-free: frames already freed! */

Fix this by adding a local_lock_t to xdp_dev_bulk_queue and acquiring
it in bq_enqueue() and __dev_flush(). These paths already run under
local_bh_disable(), so use local_lock_nested_bh() which on non-RT is
a pure annotation with no overhead, and on PREEMPT_RT provides a
per-CPU sleeping lock that serializes access to the bq.

Fixes: 3253cb49cb ("softirq: Allow to drop the softirq-BKL lock on PREEMPT_RT")
Reported-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Jiayuan Chen <jiayuan.chen@shopee.com>
Signed-off-by: Jiayuan Chen <jiayuan.chen@linux.dev>
Link: https://lore.kernel.org/r/20260225121459.183121-3-jiayuan.chen@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-02-27 16:08:10 -08:00
Jiayuan Chen
869c63d597 bpf: Fix race in cpumap on PREEMPT_RT
On PREEMPT_RT kernels, the per-CPU xdp_bulk_queue (bq) can be accessed
concurrently by multiple preemptible tasks on the same CPU.

The original code assumes bq_enqueue() and __cpu_map_flush() run
atomically with respect to each other on the same CPU, relying on
local_bh_disable() to prevent preemption. However, on PREEMPT_RT,
local_bh_disable() only calls migrate_disable() (when
PREEMPT_RT_NEEDS_BH_LOCK is not set) and does not disable
preemption, which allows CFS scheduling to preempt a task during
bq_flush_to_queue(), enabling another task on the same CPU to enter
bq_enqueue() and operate on the same per-CPU bq concurrently.

This leads to several races:

1. Double __list_del_clearprev(): after bq->count is reset in
   bq_flush_to_queue(), a preempting task can call bq_enqueue() ->
   bq_flush_to_queue() on the same bq when bq->count reaches
   CPU_MAP_BULK_SIZE. Both tasks then call __list_del_clearprev()
   on the same bq->flush_node, the second call dereferences the
   prev pointer that was already set to NULL by the first.

2. bq->count and bq->q[] races: concurrent bq_enqueue() can corrupt
   the packet queue while bq_flush_to_queue() is processing it.

The race between task A (__cpu_map_flush -> bq_flush_to_queue) and
task B (bq_enqueue -> bq_flush_to_queue) on the same CPU:

  Task A (xdp_do_flush)          Task B (cpu_map_enqueue)
  ----------------------         ------------------------
  bq_flush_to_queue(bq)
    spin_lock(&q->producer_lock)
    /* flush bq->q[] to ptr_ring */
    bq->count = 0
    spin_unlock(&q->producer_lock)
                                   bq_enqueue(rcpu, xdpf)
    <-- CFS preempts Task A -->      bq->q[bq->count++] = xdpf
                                     /* ... more enqueues until full ... */
                                     bq_flush_to_queue(bq)
                                       spin_lock(&q->producer_lock)
                                       /* flush to ptr_ring */
                                       spin_unlock(&q->producer_lock)
                                       __list_del_clearprev(flush_node)
                                         /* sets flush_node.prev = NULL */
    <-- Task A resumes -->
    __list_del_clearprev(flush_node)
      flush_node.prev->next = ...
      /* prev is NULL -> kernel oops */

Fix this by adding a local_lock_t to xdp_bulk_queue and acquiring it
in bq_enqueue() and __cpu_map_flush(). These paths already run under
local_bh_disable(), so use local_lock_nested_bh() which on non-RT is
a pure annotation with no overhead, and on PREEMPT_RT provides a
per-CPU sleeping lock that serializes access to the bq.

To reproduce, insert an mdelay(100) between bq->count = 0 and
__list_del_clearprev() in bq_flush_to_queue(), then run reproducer
provided by syzkaller.

Fixes: 3253cb49cb ("softirq: Allow to drop the softirq-BKL lock on PREEMPT_RT")
Reported-by: syzbot+2b3391f44313b3983e91@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/69369331.a70a0220.38f243.009d.GAE@google.com/T/
Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Jiayuan Chen <jiayuan.chen@shopee.com>
Signed-off-by: Jiayuan Chen <jiayuan.chen@linux.dev>
Link: https://lore.kernel.org/r/20260225121459.183121-2-jiayuan.chen@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-02-27 16:07:14 -08:00
Alexei Starovoitov
5263e30fff Merge branch 'close-race-in-freeing-special-fields-and-map-value'
Kumar Kartikeya Dwivedi says:

====================
Close race in freeing special fields and map value

There exists a race across various map types where the freeing of
special fields (tw, timer, wq, kptr, etc.) can be done eagerly when a
logical delete operation is done on a map value, such that the program
which continues to have access to such a map value can recreate the
fields and cause them to leak.

The set contains fixes for this case. It is a continuation of Mykyta's
previous attempt in [0], but applies to all fields. A test is included
which reproduces the bug reliably in absence of the fixes.

Local Storage Benchmarks
------------------------
Evaluation Setup: Benchmarked on a dual-socket Intel Xeon Gold 6348 (Ice
Lake) @ 2.60GHz (56 cores / 112 threads), with the CPU governor set to
performance. Bench was pinned to a single NUMA node throughout the test.

Benchmark comes from [1] using the following command:
./bench -p 1 local-storage-create --storage-type <socket,task> --batch-size <16,32,64>

Before the test, 10 runs of all cases ([socket|task] x 3 batch sizes x 7
iterations per batch size) are done to warm up and prime the machine.

Then, 3 runs of all cases are done (with and without the patch, across
reboots).

For each comparison, we have 21 samples, i.e. per batch size (e.g.
socket 16) of a given local storage, we have 3 runs x 7 iterations.

The statistics (mean, median, stddev) and t-test is done for each
scenario (local storage and batch size pair) individually (21 samples
for either case). All values are for local storage creations in thousand
creations / sec (k/s).

	       Baseline (without patch)               With patch                       Delta
     Case      Median        Mean   Std. Dev.   Median        Mean   Std. Dev.   Median       %
---------------------------------------------------------------------------------------------------
socket 16     432.026     431.941    1.047     431.347     431.953    1.635      -0.679    -0.16%
socket 32     432.641     432.818    1.535     432.488     432.302    1.508      -0.153    -0.04%
socket 64     431.504     431.996    1.337     429.145     430.326    2.469      -2.359    -0.55%
  task 16      38.816      39.382    1.456      39.657      39.337    1.831      +0.841    +2.17%
  task 32      38.815      39.644    2.690      38.721      39.122    1.636      -0.094    -0.24%
  task 64      37.562      38.080    1.701      39.554      38.563    1.689      +1.992    +5.30%

The cases for socket are within the range of noise, and improvements in task
local storage are due to high variance (CV ~4%-6% across batch sizes). The only
statistically significant case worth mentioning is socket with batch size 64
with p-value from t-test < 0.05, but the absolute difference is small (~2k/s).

TL;DR there doesn't appear to be any significant regression or improvement.

  [0]: https://lore.kernel.org/bpf/20260216131341.1285427-1-mykyta.yatsenko5@gmail.com
  [1]: https://lore.kernel.org/bpf/20260205222916.1788211-1-ameryhung@gmail.com

Changelog:
----------
v2 -> v3
v2: https://lore.kernel.org/bpf/20260227052031.3988575-1-memxor@gmail.com

 * Add syzbot Tested-by.
 * Add Amery's Reviewed-by.
 * Fix missing rcu_dereference_check() in __bpf_selem_free_rcu. (BPF CI Bot)
 * Remove migrate_disable() in bpf_selem_free_rcu. (Alexei)

v1 -> v2
v1: https://lore.kernel.org/bpf/20260225185121.2057388-1-memxor@gmail.com

 * Add Paul's Reviewed-by.
 * Fix use-after-free in accessing bpf_mem_alloc embedded in map. (syzbot CI)
 * Add benchmark numbers for local storage.
 * Add extra test case for per-cpu hashmap coverage with up to 16 refcount leaks.
 * Target bpf tree.
====================

Link: https://patch.msgid.link/20260227224806.646888-1-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-02-27 15:39:01 -08:00
Kumar Kartikeya Dwivedi
2939d7b3b0 selftests/bpf: Add tests for special fields races
Add a couple of tests to ensure that the refcount drops to zero when we
exercise the race where creation of a special field succeeds the logical
bpf_obj_free_fields done when deleting an element. Prior to previous
changes, the fields would be freed eagerly and repopulate and end up
leaking, causing the reference to not drop down correctly. Running this
test on a kernel without fixes will cause a hang in delete_module, since
the module reference stays active due to the leaked kptr not dropping
it. After the fixes tests succeed as expected.

Reviewed-by: Amery Hung <ameryhung@gmail.com>
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20260227224806.646888-6-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-02-27 15:39:00 -08:00
Kumar Kartikeya Dwivedi
baa35b3cb6 bpf: Retire rcu_trace_implies_rcu_gp() from local storage
This assumption will always hold going forward, hence just remove the
various checks and assume it is true with a comment for the uninformed
reader.

Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
Reviewed-by: Amery Hung <ameryhung@gmail.com>
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20260227224806.646888-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-02-27 15:39:00 -08:00
Kumar Kartikeya Dwivedi
f41deee082 bpf: Delay freeing fields in local storage
Currently, when use_kmalloc_nolock is false, the freeing of fields for a
local storage selem is done eagerly before waiting for the RCU or RCU
tasks trace grace period to elapse. This opens up a window where the
program which has access to the selem can recreate the fields after the
freeing of fields is done eagerly, causing memory leaks when the element
is finally freed and returned to the kernel.

Make a few changes to address this. First, delay the freeing of fields
until after the grace periods have expired using a __bpf_selem_free_rcu
wrapper which is eventually invoked after transitioning through the
necessary number of grace period waits. Replace usage of the kfree_rcu
with call_rcu to be able to take a custom callback. Finally, care needs
to be taken to extend the rcu barriers for all cases, and not just when
use_kmalloc_nolock is true, as RCU and RCU tasks trace callbacks can be
in flight for either case and access the smap field, which is used to
obtain the BTF record to walk over special fields in the map value.

While we're at it, drop migrate_disable() from bpf_selem_free_rcu, since
migration should be disabled for RCU callbacks already.

Fixes: 9bac675e63 ("bpf: Postpone bpf_obj_free_fields to the rcu callback")
Reviewed-by: Amery Hung <ameryhung@gmail.com>
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20260227224806.646888-4-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-02-27 15:39:00 -08:00
Kumar Kartikeya Dwivedi
ae51772b1e bpf: Lose const-ness of map in map_check_btf()
BPF hash map may now use the map_check_btf() callback to decide whether
to set a dtor on its bpf_mem_alloc or not. Unlike C++ where members can
opt out of const-ness using mutable, we must lose the const qualifier on
the callback such that we can avoid the ugly cast. Make the change and
adjust all existing users, and lose the comment in hashtab.c.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20260227224806.646888-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-02-27 15:39:00 -08:00
Kumar Kartikeya Dwivedi
1df97a7453 bpf: Register dtor for freeing special fields
There is a race window where BPF hash map elements can leak special
fields if the program with access to the map value recreates these
special fields between the check_and_free_fields done on the map value
and its eventual return to the memory allocator.

Several ways were explored prior to this patch, most notably [0] tried
to use a poison value to reject attempts to recreate special fields for
map values that have been logically deleted but still accessible to BPF
programs (either while sitting in the free list or when reused). While
this approach works well for task work, timers, wq, etc., it is harder
to apply the idea to kptrs, which have a similar race and failure mode.

Instead, we change bpf_mem_alloc to allow registering destructor for
allocated elements, such that when they are returned to the allocator,
any special fields created while they were accessible to programs in the
mean time will be freed. If these values get reused, we do not free the
fields again before handing the element back. The special fields thus
may remain initialized while the map value sits in a free list.

When bpf_mem_alloc is retired in the future, a similar concept can be
introduced to kmalloc_nolock-backed kmem_cache, paired with the existing
idea of a constructor.

Note that the destructor registration happens in map_check_btf, after
the BTF record is populated and (at that point) avaiable for inspection
and duplication. Duplication is necessary since the freeing of embedded
bpf_mem_alloc can be decoupled from actual map lifetime due to logic
introduced to reduce the cost of rcu_barrier()s in mem alloc free path in
9f2c6e96c6 ("bpf: Optimize rcu_barrier usage between hash map and bpf_mem_alloc.").

As such, once all callbacks are done, we must also free the duplicated
record. To remove dependency on the bpf_map itself, also stash the key
size of the map to obtain value from htab_elem long after the map is
gone.

  [0]: https://lore.kernel.org/bpf/20260216131341.1285427-1-mykyta.yatsenko5@gmail.com

Fixes: 14a324f6a6 ("bpf: Wire up freeing of referenced kptr")
Fixes: 1bfbc267ec ("bpf: Enable bpf_timer and bpf_wq in any context")
Reported-by: Alexei Starovoitov <ast@kernel.org>
Tested-by: syzbot@syzkaller.appspotmail.com
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20260227224806.646888-2-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-02-27 15:39:00 -08:00
Linus Torvalds
4d349ee5c7 Merge tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm64 fixes from Will Deacon:
 "The diffstat is dominated by changes to our TLB invalidation errata
  handling and the introduction of a new GCS selftest to catch one of
  the issues that is fixed here relating to PROT_NONE mappings.

   - Fix cpufreq warning due to attempting a cross-call with interrupts
     masked when reading local AMU counters

   - Fix DEBUG_PREEMPT warning from the delay loop when it tries to
     access per-cpu errata workaround state for the virtual counter

   - Re-jig and optimise our TLB invalidation errata workarounds in
     preparation for more hardware brokenness

   - Fix GCS mappings to interact properly with PROT_NONE and to avoid
     corrupting the pte on CPUs with FEAT_LPA2

   - Fix ioremap_prot() to extract only the memory attributes from the
     user pte and ignore all the other 'prot' bits"

* tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux:
  arm64: topology: Fix false warning in counters_read_on_cpu() for same-CPU reads
  arm64: Fix sampling the "stable" virtual counter in preemptible section
  arm64: tlb: Optimize ARM64_WORKAROUND_REPEAT_TLBI
  arm64: tlb: Allow XZR argument to TLBI ops
  kselftest: arm64: Check access to GCS after mprotect(PROT_NONE)
  arm64: gcs: Honour mprotect(PROT_NONE) on shadow stack mappings
  arm64: gcs: Do not set PTE_SHARED on GCS mappings if FEAT_LPA2 is enabled
  arm64: io: Extract user memory type in ioremap_prot()
  arm64: io: Rename ioremap_prot() to __ioremap_prot()
2026-02-27 13:40:30 -08:00
Linus Torvalds
1c63df24be Merge tag 'pci-v7.0-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/pci/pci
Pull pci fixes from Bjorn Helgaas:

 - Update MAINTAINERS email address (Shawn Guo)

 - Refresh cached Endpoint driver MSI Message Address to fix a v7.0
   regression when kernel changes the address after firmware has
   configured it (Niklas Cassel)

 - Flush Endpoint MSI-X writes so they complete before the outbound ATU
   entry is unmapped (Niklas Cassel)

 - Correct the PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 value, which broke VMM use
   of PCI capabilities (Bjorn Helgaas)

* tag 'pci-v7.0-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/pci/pci:
  PCI: Correct PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 value
  PCI: dwc: ep: Flush MSI-X write before unmapping its ATU entry
  PCI: dwc: ep: Refresh MSI Message Address cache on change
  MAINTAINERS: Update Shawn Guo's address for HiSilicon PCIe controller driver
2026-02-27 13:32:52 -08:00
Christian Brauner
10047142d6 Merge patch series "tighten nstree visibility checks"
Christian Brauner <brauner@kernel.org> says:

Listing various namespaces is currently only scoped on owning namespace.
We can make this more fine-grained so that we scope visibility even
tighter. To make it possible to change behavior restrict visibility for
now. This shouldn't be a big deal as there aren't actual large users out
there and paves the way to make this even cleaner in the future.

* patches from https://patch.msgid.link/20260226-work-visibility-fixes-v1-0-d2c2853313bd@kernel.org:
  selftests: fix mntns iteration selftests
  nstree: tighten permission checks for listing
  nsfs: tighten permission checks for handle opening
  nsfs: tighten permission checks for ns iteration ioctls

Link: https://patch.msgid.link/20260226-work-visibility-fixes-v1-0-d2c2853313bd@kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
2026-02-27 22:00:19 +01:00
Christian Brauner
4c7b2ec23c selftests: fix mntns iteration selftests
Now that we changed permission checking make sure that we reflect that
in the selftests.

Link: https://patch.msgid.link/20260226-work-visibility-fixes-v1-4-d2c2853313bd@kernel.org
Fixes: 9d87b10673 ("selftests: add tests for mntns iteration")
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Cc: stable@kernel.org # v6.14+
Signed-off-by: Christian Brauner <brauner@kernel.org>
2026-02-27 22:00:11 +01:00
Christian Brauner
8d76afe84f nstree: tighten permission checks for listing
Even privileged services should not necessarily be able to see other
privileged service's namespaces so they can't leak information to each
other. Use may_see_all_namespaces() helper that centralizes this policy
until the nstree adapts.

Link: https://patch.msgid.link/20260226-work-visibility-fixes-v1-3-d2c2853313bd@kernel.org
Fixes: 76b6f5dfb3 ("nstree: add listns()")
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Cc: stable@kernel.org # v6.19+
Signed-off-by: Christian Brauner <brauner@kernel.org>
2026-02-27 22:00:11 +01:00
Christian Brauner
d2324a9317 nsfs: tighten permission checks for handle opening
Even privileged services should not necessarily be able to see other
privileged service's namespaces so they can't leak information to each
other. Use may_see_all_namespaces() helper that centralizes this policy
until the nstree adapts.

Link: https://patch.msgid.link/20260226-work-visibility-fixes-v1-2-d2c2853313bd@kernel.org
Fixes: 5222470b2f ("nsfs: support file handles")
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Cc: stable@kernel.org # v6.18+
Signed-off-by: Christian Brauner <brauner@kernel.org>
2026-02-27 22:00:11 +01:00
Christian Brauner
e6b899f080 nsfs: tighten permission checks for ns iteration ioctls
Even privileged services should not necessarily be able to see other
privileged service's namespaces so they can't leak information to each
other. Use may_see_all_namespaces() helper that centralizes this policy
until the nstree adapts.

Link: https://patch.msgid.link/20260226-work-visibility-fixes-v1-1-d2c2853313bd@kernel.org
Fixes: a1d220d9da ("nsfs: iterate through mount namespaces")
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Cc: stable@kernel.org # v6.12+
Signed-off-by: Christian Brauner <brauner@kernel.org>
2026-02-27 22:00:08 +01:00
Anna Schumaker
4529e00154 NFS: Fix NFS KConfig typos
Two issues were noticed after the NFS v4.0 KConfig changes were merged
upstream. First, the text of CONFIG_NFS_V4 should not encourage people
to select it if they are unsure. Second, the new CONFIG_NFS_V4_0 option
should default to "on" instead of "off" to avoid breaking people's
setups if they are using NFS v4.0.

Reported-by: Niklas Cassel <cassel@kernel.org>
Reported-by: Geert Uytterhoeven <geert+renesas@glider.be>
Fixes: 4e02693525 ("NFS: Add a way to disable NFS v4.0 via KConfig")
Fixes: 7537db2480 ("NFS: Merge CONFIG_NFS_V4_1 with CONFIG_NFS_V4")
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
2026-02-27 15:42:14 -05:00
Eric Badger
7b6275c80a xprtrdma: Decrement re_receiving on the early exit paths
In the event that rpcrdma_post_recvs() fails to create a work request
(due to memory allocation failure, say) or otherwise exits early, we
should decrement ep->re_receiving before returning. Otherwise we will
hang in rpcrdma_xprt_drain() as re_receiving will never reach zero and
the completion will never be triggered.

On a system with high memory pressure, this can appear as the following
hung task:

    INFO: task kworker/u385:17:8393 blocked for more than 122 seconds.
          Tainted: G S          E       6.19.0 #3
    "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
    task:kworker/u385:17 state:D stack:0     pid:8393  tgid:8393  ppid:2      task_flags:0x4248060 flags:0x00080000
    Workqueue: xprtiod xprt_autoclose [sunrpc]
    Call Trace:
     <TASK>
     __schedule+0x48b/0x18b0
     ? ib_post_send_mad+0x247/0xae0 [ib_core]
     schedule+0x27/0xf0
     schedule_timeout+0x104/0x110
     __wait_for_common+0x98/0x180
     ? __pfx_schedule_timeout+0x10/0x10
     wait_for_completion+0x24/0x40
     rpcrdma_xprt_disconnect+0x444/0x460 [rpcrdma]
     xprt_rdma_close+0x12/0x40 [rpcrdma]
     xprt_autoclose+0x5f/0x120 [sunrpc]
     process_one_work+0x191/0x3e0
     worker_thread+0x2e3/0x420
     ? __pfx_worker_thread+0x10/0x10
     kthread+0x10d/0x230
     ? __pfx_kthread+0x10/0x10
     ret_from_fork+0x273/0x2b0
     ? __pfx_kthread+0x10/0x10
     ret_from_fork_asm+0x1a/0x30

Fixes: 15788d1d10 ("xprtrdma: Do not refresh Receive Queue while it is draining")
Signed-off-by: Eric Badger <ebadger@purestorage.com>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
2026-02-27 15:42:14 -05:00
Jakub Kicinski
3d17d76d1f io_uring/zcrx: don't set rx_page_size when not requested
The rx_buf_len parameter was recently added to the Rx zero-copy
implementation. The expectation is that when not set system will
maintain previous behavior and use the default buffer size (PAGE_SIZE).

This works correctly at the iouring level, but we don't preserve
the same "zero means default" semantics when registering the memory
provider on the netdev. mp_param.rx_page_size is unconditionally
set to PAGE_SIZE. This causes __net_mp_open_rxq() to check for
QCFG_RX_PAGE_SIZE support in the driver, and return -EOPNOTSUPP
for drivers that don't advertise it -- even though the user never
asked for large buffers.

Only set mp_param.rx_page_size when rx_buf_len was explicitly provided,
so that the default page size path works on all zcrx-capable drivers.
mlx5 and fbnic only support 4kB pages in the current release.

Fixes: 795663b4d1 ("io_uring/zcrx: implement large rx buffer support")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2026-02-27 12:32:49 -07:00
David Carlier
032e084f0d tools/sched_ext: fix strtoul() misuse in scx_hotplug_seq()
scx_hotplug_seq() uses strtoul() but validates the result with a
negative check (val < 0), which can never be true for an unsigned
return value.

Use the endptr mechanism to verify the entire string was consumed,
and check errno == ERANGE for overflow detection.

Signed-off-by: David Carlier <devnexen@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-02-27 09:17:44 -10:00
Linus Torvalds
aed968f8a6 Merge tag 'cxl-fixes-7.0-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl
Pull cxl fixes from Dave Jiang:

 - Fix incorrect usages of decoder flags

 - Validate payload size before accessing contents

 - Fix race condition when creating nvdimm objects

 - Fix deadlock on attach failure

* tag 'cxl-fixes-7.0-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl:
  cxl/region: Test CXL_DECODER_F_NORMALIZED_ADDRESSING as a bitmask
  cxl: Test CXL_DECODER_F_LOCK as a bitmask
  cxl/mbox: validate payload size before accessing contents in cxl_payload_from_user_allowed()
  cxl: Fix race of nvdimm_bus object when creating nvdimm objects
  cxl: Move devm_cxl_add_nvdimm_bridge() to cxl_pmem.ko
  cxl/port: Hold port host lock during dport adding.
  cxl/port: Introduce port_to_host() helper
  cxl/memdev: fix deadlock in cxl_memdev_autoremove() on attach failure
2026-02-27 10:52:57 -08:00
Linus Torvalds
962336b9ff Merge tag 'mmc-v7.0-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/mmc
Pull MMC fixes from Ulf Hansson:
 "MMC core:
   - Avoid bitfield RMW for claim/retune flags

  MMC host:
   - dw_mmc-rockchip: Fix runtime PM support for internal phase support
   - mmci: Fix device_node reference leak in of_get_dml_pipe_index()
   - sdhci-brcmstb: Use correct register offset for V1 pin_sel restore"

* tag 'mmc-v7.0-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/mmc:
  mmc: core: Avoid bitfield RMW for claim/retune flags
  mmc: sdhci-brcmstb: use correct register offset for V1 pin_sel restore
  mmc: dw_mmc-rockchip: Fix runtime PM support for internal phase support
  mmc: mmci: Fix device_node reference leak in of_get_dml_pipe_index()
2026-02-27 10:49:54 -08:00
Linus Torvalds
78d964c47f Merge tag 'block-7.0-20260227' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux
Pull block fixes from Jens Axboe:
 "Two sets of fixes, one for drbd, and one for the zoned loop driver"

* tag 'block-7.0-20260227' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux:
  zloop: check for spurious options passed to remove
  zloop: advertise a volatile write cache
  drbd: fix null-pointer dereference on local read error
  drbd: Replace deprecated strcpy with strscpy
  drbd: fix "LOGIC BUG" in drbd_al_begin_io_nonblock()
2026-02-27 10:42:02 -08:00
Linus Torvalds
530b0b61df Merge tag 'io_uring-7.0-20260227' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux
Pull io_uring fixes from Jens Axboe:
 "Just two minor patches in here, ensuring the use of READ_ONCE() for
  sqe field reading is consistent across the codebase. There were two
  missing cases, now they are covered too"

* tag 'io_uring-7.0-20260227' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux:
  io_uring/timeout: READ_ONCE sqe->addr
  io_uring/cmd_net: use READ_ONCE() for ->addr3 read
2026-02-27 10:39:11 -08:00
Linus Torvalds
764a167f54 Merge tag 'xfs-fixes-7.0-rc2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull xfs fixes from Carlos Maiolino:
 "Nothing reeeally stands out here: a few bug fixes, some refactoring to
  easily fit the bug fixes, and a couple cosmetic changes"

* tag 'xfs-fixes-7.0-rc2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  xfs: add static size checks for ioctl UABI
  xfs: remove duplicate static size checks
  xfs: Add comments for usages of some macros.
  xfs: Update lazy counters in xfs_growfs_rt_bmblock()
  xfs: Add a comment in xfs_log_sb()
  xfs: Fix xfs_last_rt_bmblock()
  xfs: don't report half-built inodes to fserror
  xfs: don't report metadata inodes to fserror
  xfs: fix potential pointer access race in xfs_healthmon_get
  xfs: fix xfs_group release bug in xfs_dax_notify_dev_failure
  xfs: fix xfs_group release bug in xfs_verify_report_losses
  xfs: fix copy-paste error in previous fix
  xfs: Fix error pointer dereference
  xfs: remove metafile inodes from the active inode stat
  xfs: cleanup inode counter stats
  xfs: fix code alignment issues in xfs_ondisk.c
  xfs: Replace &rtg->rtg_group with rtg_group()
  xfs: Refactoring the nagcount and delta calculation
  xfs: Replace ASSERT with XFS_IS_CORRUPT in xfs_rtcopy_summary()
2026-02-27 10:21:06 -08:00
Romain Sioen
e31b556c0b HID: mcp2221: cancel last I2C command on read error
When an I2C SMBus read operation fails, the MCP2221 internal state machine
may not reset correctly, causing subsequent transactions to fail.

By adding a short delay and explicitly cancelling the last command,
we ensure the device is ready for the next operation.

Fix an issue where i2cdetect was not able to detect all devices correctly
on the bus.

Signed-off-by: Romain Sioen <romain.sioen@microchip.com>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
2026-02-27 18:54:09 +01:00
Linus Torvalds
3feb464fb7 Merge tag 'slab-for-7.0-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab
Pull slab fixes from Vlastimil Babka:

 - Fix for spurious page allocation warnings on sheaf refill (Harry Yoo)

 - Fix for CONFIG_MEM_ALLOC_PROFILING_DEBUG warnings (Suren
   Baghdasaryan)

 - Fix for kernel-doc warning on ksize() (Sanjay Chitroda)

 - Fix to avoid setting slab->stride later than on slab allocation.
   Doesn't yet fix the reports from powerpc; debugging is making
   progress (Harry Yoo)

* tag 'slab-for-7.0-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab:
  mm/slab: initialize slab->stride early to avoid memory ordering issues
  mm/slub: drop duplicate kernel-doc for ksize()
  mm/slab: mark alloc tags empty for sheaves allocated with __GFP_NO_OBJ_EXT
  mm/slab: pass __GFP_NOWARN to refill_sheaf() if fallback is available
2026-02-27 09:54:02 -08:00
Linus Torvalds
d5a8e4be46 Merge tag 'gpio-fixes-for-v7.0-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux
Pull gpio fixes from Bartosz Golaszewski:

 - fix memory leaks in shared GPIO management

 - normalize the return values of gpio_chip::get() in GPIO core on
   behalf of drivers that return invalid values (this is done because
   adding stricter sanitization of callback retvals led to breakages in
   existing users, we'll revert that once all are fixed)

* tag 'gpio-fixes-for-v7.0-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux:
  gpiolib: normalize the return value of gc->get() on behalf of buggy drivers
  gpio: shared: fix memory leaks
2026-02-27 09:42:17 -08:00
Linus Torvalds
bbcb2cd675 Merge tag 'sound-7.0-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound
Pull sound fixes from Takashi Iwai:
 "A bunch of small device-specific fixes. Mostly quirks and fix-ups for
  USB- and HD-audio at this time, in addition to a couple of ASoC AMD
  and Cirrus fixes"

* tag 'sound-7.0-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound: (24 commits)
  ASoC: SDCA: Fix comments for sdca_irq_request()
  ALSA: us144mkii: Drop kernel-doc markers
  ALSA: usb: qcom: Correct parameter comment for uaudio_transfer_buffer_setup()
  ALSA: usb-audio: Drop superfluous kernel-doc markers
  ALSA: hda: cs35l56: Remove unnecessary struct cs_dsp_client_ops
  ALSA: hda: cs35l56: Fix signedness error in cs35l56_hda_posture_put()
  ALSA: usb-audio: Use correct version for UAC3 header validation
  ALSA: hda/realtek: add quirk for Acer Nitro ANV15-51
  ALSA: hda/intel: increase default bdl_pos_adj for Nvidia controllers
  ALSA: usb-audio: Use inclusive terms
  ALSA: usb-audio: Avoid implicit feedback mode on DIYINHK USB Audio 2.0
  ALSA: usb-audio: Check max frame size for implicit feedback mode, too
  ALSA: usb-audio: Cap the packet size pre-calculations
  ASoC: amd: yc: Add ASUS EXPERTBOOK BM1503CDA to quirk table
  ASoC: cs42l43: Report insert for exotic peripherals
  ALSA: usb-audio: Skip clock selector for Focusrite devices
  ALSA: usb-audio: Add QUIRK_FLAG_SKIP_IFACE_SETUP
  ALSA: usb-audio: Remove VALIDATE_RATES quirk for Focusrite devices
  ALSA: usb-audio: Improve Focusrite sample rate filtering
  ALSA: hda/realtek: add quirk for Samsung Galaxy Book Flex (NT950QCT-A38A)
  ...
2026-02-27 09:34:02 -08:00
Maíra Canal
550bae2c09 pmdomain: bcm: bcm2835-power: Fix broken reset status read
bcm2835_reset_status() has a misplaced parenthesis on every PM_READ()
call. Since PM_READ(reg) expands to readl(power->base + (reg)), the
expression:

    PM_READ(PM_GRAFX & PM_V3DRSTN)

computes the bitwise AND of the register offset PM_GRAFX with the
bitmask PM_V3DRSTN before using the result as a register offset, reading
from the wrong MMIO address instead of the intended PM_GRAFX register.
The same issue affects the PM_IMAGE cases.

Fix by moving the closing parenthesis so PM_READ() receives only the
register offset, and the bitmask is applied to the value returned by
the read.

Fixes: 670c672608 ("soc: bcm: bcm2835-pm: Add support for power domains under a new binding.")
Signed-off-by: Maíra Canal <mcanal@igalia.com>
Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
Reviewed-by: Stefan Wahren <wahrenst@gmx.net>
Cc: stable@vger.kernel.org
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2026-02-27 18:02:35 +01:00
Linus Torvalds
466d6175e3 Merge tag 'drm-fixes-2026-02-27' of https://gitlab.freedesktop.org/drm/kernel
Pull drm fixes from Dave Airlie:
 "Regular fixes pull, amdxdna and amdgpu are the main ones, with a
  couple of intel fixes, then a scattering of fixes across drivers,
  nothing too major.

  i915/display:
   - Fix Panel Replay stuck with X during mode transitions on Panther
     Lake

  xe:
   - W/a fix for multi-cast registers
   - Fix xe_sync initialization issues

  amdgpu:
   - UserQ fixes
   - DC fix
   - RAS fixes
   - VCN 5 fix
   - Slot reset fix
   - Remove MES workaround that's no longer needed

  amdxdna:
   - deadlock fix
   - NULL ptr deref fix
   - suspend failure fix
   - OOB access fix
   - buffer overflow fix
   - input sanitiation fix
   - firmware loading fix

  dw-dp:
   - An error handling fix

  ethosu:
   - A binary shift overflow fix

  imx:
   - An error handling fix

  logicvc:
   - A dt node reference leak fix

  nouveau:
   - A WARN_ON removal

  samsung-dsim:
   - A memory leak fix

  tiny:
   - sharp-memory: NULL pointer deref fix

  vmwgfx:
   - A reference count and error handling fix"

* tag 'drm-fixes-2026-02-27' of https://gitlab.freedesktop.org/drm/kernel: (39 commits)
  drm/amd: Disable MES LR compute W/A
  drm/amdgpu: Fix error handling in slot reset
  drm/amdgpu/vcn5: Add SMU dpm interface type
  drm/amdgpu: Fix locking bugs in error paths
  drm/amdgpu: Unlock a mutex before destroying it
  drm/amd/display: Use GFP_ATOMIC in dc_create_stream_for_sink
  drm/amdgpu: add upper bound check on user inputs in wait ioctl
  drm/amdgpu: add upper bound check on user inputs in signal ioctl
  drm/amdgpu/userq: Do not allow userspace to trivially triger kernel warnings
  drm/amdgpu/userq: Fix reference leak in amdgpu_userq_wait_ioctl
  accel/amdxdna: Use a different name for latest firmware
  drm/client: Do not destroy NULL modes
  drm/gpusvm: Fix drm_gpusvm_pages_valid_unlocked() kernel-doc
  drm/xe/sync: Fix user fence leak on alloc failure
  drm/xe/sync: Cleanup partially initialized sync on parse failure
  drm/xe/wa: Steer RMW of MCR registers while building default LRC
  accel/amdxdna: Validate command buffer payload count
  accel/amdxdna: Prevent ubuf size overflow
  accel/amdxdna: Fix out-of-bounds memset in command slot handling
  accel/amdxdna: Fix command hang on suspended hardware context
  ...
2026-02-27 08:56:07 -08:00
Bjorn Helgaas
39195990e4 PCI: Correct PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 value
fb82437fdd ("PCI: Change capability register offsets to hex") incorrectly
converted the PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 value from decimal 52 to hex
0x32:

  -#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 52      /* v2 endpoints with link end here */
  +#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 0x32    /* end of v2 EPs w/ link */

This broke PCI capabilities in a VMM because subsequent ones weren't
DWORD-aligned.

Change PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 to the correct value of 0x34.

fb82437fdd was from Baruch Siach <baruch@tkos.co.il>, but this was not
Baruch's fault; it's a mistake I made when applying the patch.

Fixes: fb82437fdd ("PCI: Change capability register offsets to hex")
Reported-by: David Woodhouse <dwmw2@infradead.org>
Closes: https://lore.kernel.org/all/3ae392a0158e9d9ab09a1d42150429dd8ca42791.camel@infradead.org
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Krzysztof Wilczyński <kwilczynski@kernel.org>
2026-02-27 10:24:25 -06:00
Thorsten Blum
f505a45776 smb: client: Use snprintf in cifs_set_cifscreds
Replace unbounded sprintf() calls with the safer snprintf(). Avoid using
magic numbers and use strlen() to calculate the key descriptor buffer
size. Save the size in a local variable and reuse it for the bounded
snprintf() calls. Remove CIFSCREDS_DESC_SIZE.

Signed-off-by: Thorsten Blum <thorsten.blum@linux.dev>
Acked-by: Paulo Alcantara (Red Hat) <pc@manguebit.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-02-27 10:19:54 -06:00
Keith Busch
0f5197ea9a nvme-multipath: fix leak on try_module_get failure
We need to fall back to the synchronous removal if we can't get a
reference on the module needed for the deferred removal.

Fixes: 62188639ec ("nvme-multipath: introduce delayed removal of the multipath head node")
Reviewed-by: Nilay Shroff <nilay@linux.ibm.com>
Reviewed-by: John Garry <john.g.garry@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
2026-02-27 07:35:15 -08:00
Arnd Bergmann
663eb8763c i3c: simplify combined i3c/i2c dependencies
All combined i2c/i3c drivers appear to suffer from the same link
time problem when CONFIG_I3C is set to 'm':

arm-linux-gnueabi-ld: drivers/iio/magnetometer/mmc5633.o: in function `mmc5633_i3c_driver_init':
mmc5633.c:(.init.text+0x30): undefined reference to `i3c_driver_register_with_owner'

This was previously fixed every time by marking individual
drivers as 'depends on I2C; depends on I3C || !I3C', but this gets
tedious and is somewhat confusing.

Add a Kconfig symbol 'I3C_OR_I2C' to help replace those dependencies,
and use this in all the existing drivers that had already fixed it
as well as the new mmc5633 driver.

Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Link: https://patch.msgid.link/20260204164216.544409-1-arnd@kernel.org
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
2026-02-27 16:33:07 +01:00
Harry Yoo
e9217ca77d mm/slab: initialize slab->stride early to avoid memory ordering issues
When alloc_slab_obj_exts() is called later (instead of during slab
allocation and initialization), slab->stride and slab->obj_exts are
updated after the slab is already accessible by multiple CPUs.

The current implementation does not enforce memory ordering between
slab->stride and slab->obj_exts. For correctness, slab->stride must be
visible before slab->obj_exts. Otherwise, concurrent readers may observe
slab->obj_exts as non-zero while stride is still stale.

With stale slab->stride, slab_obj_ext() could return the wrong obj_ext.
This could cause two problems:

  - obj_cgroup_put() is called on the wrong objcg, leading to
    a use-after-free due to incorrect reference counting [1] by
    decrementing the reference count more than it was incremented.

  - refill_obj_stock() is called on the wrong objcg, leading to
    a page_counter overflow [2] by uncharging more memory than charged.

Fix this by unconditionally initializing slab->stride in
alloc_slab_obj_exts_early(), before the need_slab_obj_exts() check.
In the case of SLAB_OBJ_EXT_IN_OBJ, it is overridden in the function.

This ensures updates to slab->stride become visible before the slab
can be accessed by other CPUs via the per-node partial slab list
(protected by spinlock with acquire/release semantics).

Thanks to Shakeel Butt for pointing out this issue [3].

[vbabka@kernel.org: the bug reports [1] and [2] are not yet fully fixed,
 with investigation ongoing, but it is nevertheless a step in the right
 direction to only set stride once after allocating the slab and not
 change it later ]

Fixes: 7a8e71bc61 ("mm/slab: use stride to access slabobj_ext")
Reported-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
Link: https://lore.kernel.org/lkml/ca241daa-e7e7-4604-a48d-de91ec9184a5@linux.ibm.com [1]
Link: https://lore.kernel.org/all/ddff7c7d-c0c3-4780-808f-9a83268bbf0c@linux.ibm.com [2]
Link: https://lore.kernel.org/linux-mm/aZu9G9mVIVzSm6Ft@hyeyoo [3]
Signed-off-by: Harry Yoo <harry.yoo@oracle.com>
Signed-off-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
2026-02-27 16:22:57 +01:00
Abel Vesa
81af9e40e2 phy: qcom: qmp-ufs: Fix SM8650 PCS table for Gear 4
According to internal documentation, on SM8650, when the PHY is configured
in Gear 4, the QPHY_V6_PCS_UFS_PLL_CNTL register needs to have the same
value as for Gear 5.

At the moment, there is no board that comes with a UFS 3.x device, so
this issue doesn't show up, but with the new Eliza SoC, which uses the
same init sequence as SM8650, on the MTP board, the link startup fails
with the current Gear 4 PCS table.

So fix that by moving the entry into the PCS generic table instead,
while keeping the value from Gear 5 configuration.

Cc: stable@vger.kernel.org # v6.10
Fixes: b9251e64a9 ("phy: qcom: qmp-ufs: update SM8650 tables for Gear 4 & 5")
Suggested-by: Nitin Rawat <nitin.rawat@oss.qualcomm.com>
Signed-off-by: Abel Vesa <abel.vesa@oss.qualcomm.com>
Reviewed-by: Konrad Dybcio <konrad.dybcio@oss.qualcomm.com>
Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Tested-by: Neil Armstrong <neil.armstrong@linaro.org> # on SM8650-HDK
Link: https://patch.msgid.link/20260219-phy-qcom-qmp-ufs-fix-sm8650-pcs-g4-table-v1-1-f136505b57f6@oss.qualcomm.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
2026-02-27 20:46:57 +05:30
Felix Gu
584b457f41 phy: ti: j721e-wiz: Fix device node reference leak in wiz_get_lane_phy_types()
The serdes device_node is obtained using of_get_child_by_name(),
which increments the reference count. However, it is never put,
leading to a reference leak.

Add the missing of_node_put() calls to ensure the reference count is
properly balanced.

Fixes: 7ae14cf581 ("phy: ti: j721e-wiz: Implement DisplayPort mode to the wiz driver")
Suggested-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: Felix Gu <ustc.gu@gmail.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Link: https://patch.msgid.link/20260212-wiz-v2-1-6e8bd4cc7a4a@gmail.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
2026-02-27 20:43:29 +05:30
Yixun Lan
f0cf0a882a phy: k1-usb: add disconnect function support
A disconnect status BIT of USB2 PHY need to be cleared, otherwise
it will fail to work properly during next connection when devices
connect to roothub directly.

Fixes: fe4bc1a086 ("phy: spacemit: support K1 USB2.0 PHY controller")
Signed-off-by: Yixun Lan <dlan@kernel.org>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Link: https://patch.msgid.link/20260216152653.25244-1-dlan@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
2026-02-27 20:27:00 +05:30
Tvrtko Ursulin
6e3f4514e3 drm/ttm: Fix ttm_pool_beneficial_order() return type
Fix a nasty copy and paste bug, where the incorrect boolean return type of
the ttm_pool_beneficial_order() helper had a consequence of avoiding
direct reclaim too eagerly for drivers which use this feature (currently
amdgpu).

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Fixes: 7e9c548d37 ("drm/ttm: Allow drivers to specify maximum beneficial TTM pool size")
Cc: Christian König <christian.koenig@amd.com>
Cc: Thadeu Lima de Souza Cascardo <cascardo@igalia.com>
Cc: dri-devel@lists.freedesktop.org
Cc: <stable@vger.kernel.org> # v6.19+
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Tvrtko Ursulin <tursulin@ursulin.net>
Link: https://lore.kernel.org/r/20260227124901.3177-1-tvrtko.ursulin@igalia.com
2026-02-27 14:54:31 +00:00
Vladimir Oltean
a258d843a3 phy: lynx-28g: skip CDR lock workaround for lanes disabled in the device tree
The blamed commit introduced support for specifying individual lanes as
OF nodes in the device, and these can have status = "disabled".

When that happens, for_each_available_child_of_node() skips them and
lynx_28g_probe_lane() -> devm_phy_create() is not called, so lane->phy
will be NULL. Yet it will be dereferenced in lynx_28g_cdr_lock_check(),
resulting in a crash.

This used to be well handled in v3 of that patch:
https://lore.kernel.org/linux-phy/20250926180505.760089-14-vladimir.oltean@nxp.com/
but until v5 was merged, the logic to support per-lane OF nodes was
split into a separate change, and the per-SoC compatible strings patch
was deferred to a "part 2" set. The splitting was done improperly, and
that handling of NULL lane->phy pointers was not integrated into the
proper commit.

Fixes: 7df7d58abb ("phy: lynx-28g: support individual lanes as OF PHY providers")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://patch.msgid.link/20260226182853.1103616-1-vladimir.oltean@nxp.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
2026-02-27 19:21:01 +05:30
Vladimir Oltean
48fafffcf2 phy: make PHY_COMMON_PROPS Kconfig symbol conditionally user-selectable
Geert reports that enabling CONFIG_KUNIT_ALL_TESTS shouldn't enable
features that aren't enabled without it. That isn't what "*all* tests"
means, but as the prompt puts it, "All KUnit tests with satisfied
dependencies".

The impact is that enabling CONFIG_KUNIT_ALL_TESTS brings features which
cannot be disabled as built-in into the kernel.

Keep the pattern where consumer drivers have to "select PHY_COMMON_PROPS",
but if KUNIT_ALL_TESTS is enabled, also make PHY_COMMON_PROPS user
selectable, so it can be turned off.

Modify PHY_COMMON_PROPS_TEST to depend on PHY_COMMON_PROPS rather than
select it.

Fixes: e7556b59ba ("phy: add phy_get_rx_polarity() and phy_get_tx_polarity()")
Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Closes: https://lore.kernel.org/linux-phy/CAMuHMdUBaoYKNj52gn8DQeZFZ42Cvm6xT6fvo0-_twNv1k3Jhg@mail.gmail.com/
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://patch.msgid.link/20260226153315.3530378-1-vladimir.oltean@nxp.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
2026-02-27 19:20:39 +05:30
Denis Benato
377f8e7889 HID: asus: add xg mobile 2023 external hardware support
XG mobile stations have the 0x5a endpoint and has to be initialized:
add them to hid-asus.

Signed-off-by: Denis Benato <denis.benato@linux.dev>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
2026-02-27 14:07:20 +01:00
Randy Dunlap
3350c2b3f2 platform_data/mlxreg: mlxreg.h: fix all kernel-doc warnings
Use the correct kernel-doc format & notation to eliminate
kernel-doc warnings:

Warning: include/linux/platform_data/mlxreg.h:24 Enum value
 'MLX_WDT_TYPE1' not described in enum 'mlxreg_wdt_type'
Warning: include/linux/platform_data/mlxreg.h:24 Enum value
 'MLX_WDT_TYPE2' not described in enum 'mlxreg_wdt_type'
Warning: include/linux/platform_data/mlxreg.h:24 Enum value
 'MLX_WDT_TYPE3' not described in enum 'mlxreg_wdt_type'
Warning: include/linux/platform_data/mlxreg.h:37 bad line:
 PHYs ready / unready state;
Warning: include/linux/platform_data/mlxreg.h:153 struct member 'np'
 not described in 'mlxreg_core_data'
Warning: include/linux/platform_data/mlxreg.h:153 struct member 'hpdev'
 not described in 'mlxreg_core_data'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://patch.msgid.link/20260226051232.549537-1-rdunlap@infradead.org
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2026-02-27 13:11:56 +02:00
Denis Benato
c55b84fa56 platform/x86: asus-armoury: add support for FA401UM
Add TDP data for laptop model FA401UM.

Signed-off-by: Denis Benato <denis.benato@linux.dev>
Link: https://patch.msgid.link/20260226141944.352923-3-denis.benato@linux.dev
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2026-02-27 13:11:00 +02:00
Denis Benato
622cc8d078 platform/x86: asus-armoury: add support for GX650RX
Add TDP data for laptop model GX650RX.

Signed-off-by: Denis Benato <denis.benato@linux.dev>
Link: https://patch.msgid.link/20260226141944.352923-2-denis.benato@linux.dev
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2026-02-27 13:10:58 +02:00
Mario Limonciello
916727cfdb platform/x86: hp-bioscfg: Support allocations of larger data
Some systems have much larger amounts of enumeration attributes
than have been previously encountered. This can lead to page allocation
failures when using kcalloc().  Switch over to using kvcalloc() to
allow larger allocations.

Fixes: 6b2770bfd6 ("platform/x86: hp-bioscfg: enum-attributes")
Cc: stable@vger.kernel.org
Reported-by: Paul Kerry <p.kerry@sheffield.ac.uk>
Tested-by: Paul Kerry <p.kerry@sheffield.ac.uk>
Closes: https://bugs.debian.org/1127612
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Link: https://patch.msgid.link/20260225210646.59381-1-mario.limonciello@amd.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2026-02-27 13:10:16 +02:00
Antheas Kapenekakis
cd0883055b platform/x86: oxpec: Add support for Aokzoe A2 Pro
Aokzoe A2 Pro is an older device that the oxpec driver is missing the
quirk for. It has the same behavior as the AOKZOE A1 devices. Add a
quirk for it to the oxpec driver.

Signed-off-by: Antheas Kapenekakis <lkml@antheas.dev>
Link: https://patch.msgid.link/20260223183004.2696892-5-lkml@antheas.dev
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2026-02-27 13:09:47 +02:00
Antheas Kapenekakis
2a3b4a8c10 platform/x86: oxpec: Add support for OneXPlayer X1 Air
X1 Air is an X1 variant with a newer Intel chipset. It uses the same
registers as the X1. Add a quirk for it to the oxpec driver.

Signed-off-by: Antheas Kapenekakis <lkml@antheas.dev>
Link: https://patch.msgid.link/20260223183004.2696892-4-lkml@antheas.dev
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2026-02-27 13:09:45 +02:00
Antheas Kapenekakis
4049c46edb platform/x86: oxpec: Add support for OneXPlayer X1z
X1z is a variant of OneXPlayer X1 A with 8840U. It seems that only one
user has this one. Add a quirk for it to the oxpec driver.

Signed-off-by: Antheas Kapenekakis <lkml@antheas.dev>
Link: https://patch.msgid.link/20260223183004.2696892-3-lkml@antheas.dev
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2026-02-27 13:09:44 +02:00
Antheas Kapenekakis
3385ea97c1 platform/x86: oxpec: Add support for OneXPlayer APEX
OneXPlayer Apex is a new Strix Halo handheld. It uses the same registers
as the OneXPlayer Fly devices. Add a quirk for it to the oxpec driver.

Signed-off-by: Antheas Kapenekakis <lkml@antheas.dev>
Link: https://patch.msgid.link/20260223183004.2696892-2-lkml@antheas.dev
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2026-02-27 13:09:42 +02:00
Ariel Silver
24d8771272 media: dvb-net: fix OOB access in ULE extension header tables
The ule_mandatory_ext_handlers[] and ule_optional_ext_handlers[] tables
in handle_one_ule_extension() are declared with 255 elements (valid
indices 0-254), but the index htype is derived from network-controlled
data as (ule_sndu_type & 0x00FF), giving a range of 0-255. When
htype equals 255, an out-of-bounds read occurs on the function pointer
table, and the OOB value may be called as a function pointer.

Add a bounds check on htype against the array size before either table
is accessed. Out-of-range values now cause the SNDU to be discarded.

Fixes: 1da177e4c3 ("Linux-2.6.12-rc2")
Reported-by: Ariel Silver <arielsilver77@gmail.com>
Signed-off-by: Ariel Silver <arielsilver77@gmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
2026-02-27 10:57:48 +01:00
Alice Ryhl
a0b9b0f143 rust_binder: use lock_vma_under_rcu() in use_page_slow()
There's no reason to lock the whole mm when we are doing operations on
the vma if we can help it, so to reduce contention, use the
lock_vma_under_rcu() abstraction.

Signed-off-by: Alice Ryhl <aliceryhl@google.com>
Reviewed-by: Jann Horn <jannh@google.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Link: https://patch.msgid.link/20260218-binder-vma-rcu-v1-1-8bd45b2b1183@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-02-26 21:34:14 -08:00
Alice Ryhl
2e303f0feb rust_binder: call set_notification_done() without proc lock
Consider the following sequence of events on a death listener:
1. The remote process dies and sends a BR_DEAD_BINDER message.
2. The local process invokes the BC_CLEAR_DEATH_NOTIFICATION command.
3. The local process then invokes the BC_DEAD_BINDER_DONE.
Then, the kernel will reply to the BC_DEAD_BINDER_DONE command with a
BR_CLEAR_DEATH_NOTIFICATION_DONE reply using push_work_if_looper().

However, this can result in a deadlock if the current thread is not a
looper. This is because dead_binder_done() still holds the proc lock
during set_notification_done(), which called push_work_if_looper().
Normally, push_work_if_looper() takes the thread lock, which is fine to
take under the proc lock. But if the current thread is not a looper,
then it falls back to delivering the reply to the process work queue,
which involves taking the proc lock. Since the proc lock is already
held, this is a deadlock.

Fix this by releasing the proc lock during set_notification_done(). It
was not intentional that it was held during that function to begin with.

I don't think this ever happens in Android because BC_DEAD_BINDER_DONE
is only invoked in response to BR_DEAD_BINDER messages, and the kernel
always delivers BR_DEAD_BINDER to a looper. So there's no scenario where
Android userspace will call BC_DEAD_BINDER_DONE on a non-looper thread.

Cc: stable <stable@kernel.org>
Fixes: eafedbc7c0 ("rust_binder: add Rust Binder driver")
Reported-by: syzbot+c8287e65a57a89e7fb72@syzkaller.appspotmail.com
Tested-by: syzbot+c8287e65a57a89e7fb72@syzkaller.appspotmail.com
Signed-off-by: Alice Ryhl <aliceryhl@google.com>
Reviewed-by: Gary Guo <gary@garyguo.net>
Reviewed-by: Andreas Hindborg <a.hindborg@kernel.org>
Link: https://patch.msgid.link/20260224-binder-dead-binder-done-proc-lock-v1-1-bbe1b8a6e74a@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-02-26 21:33:03 -08:00
Alice Ryhl
4cb9e13fec rust_binder: avoid reading the written value in offsets array
When sending a transaction, its offsets array is first copied into the
target proc's vma, and then the values are read back from there. This is
normally fine because the vma is a read-only mapping, so the target
process cannot change the value under us.

However, if the target process somehow gains the ability to write to its
own vma, it could change the offset before it's read back, causing the
kernel to misinterpret what the sender meant. If the sender happens to
send a payload with a specific shape, this could in the worst case lead
to the receiver being able to privilege escalate into the sender.

The intent is that gaining the ability to change the read-only vma of
your own process should not be exploitable, so remove this TOCTOU read
even though it's unexploitable without another Binder bug.

Cc: stable <stable@kernel.org>
Fixes: eafedbc7c0 ("rust_binder: add Rust Binder driver")
Reported-by: Jann Horn <jannh@google.com>
Reviewed-by: Jann Horn <jannh@google.com>
Signed-off-by: Alice Ryhl <aliceryhl@google.com>
Acked-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Link: https://patch.msgid.link/20260218-binder-vma-check-v2-2-60f9d695a990@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-02-26 21:32:59 -08:00
Alice Ryhl
8ef2c15aea rust_binder: check ownership before using vma
When installing missing pages (or zapping them), Rust Binder will look
up the vma in the mm by address, and then call vm_insert_page (or
zap_page_range_single). However, if the vma is closed and replaced with
a different vma at the same address, this can lead to Rust Binder
installing pages into the wrong vma.

By installing the page into a writable vma, it becomes possible to write
to your own binder pages, which are normally read-only. Although you're
not supposed to be able to write to those pages, the intent behind the
design of Rust Binder is that even if you get that ability, it should not
lead to anything bad. Unfortunately, due to another bug, that is not the
case.

To fix this, store a pointer in vm_private_data and check that the vma
returned by vma_lookup() has the right vm_ops and vm_private_data before
trying to use the vma. This should ensure that Rust Binder will refuse
to interact with any other VMA. The plan is to introduce more vma
abstractions to avoid this unsafe access to vm_ops and vm_private_data,
but for now let's start with the simplest possible fix.

C Binder performs the same check in a slightly different way: it
provides a vm_ops->close that sets a boolean to true, then checks that
boolean after calling vma_lookup(), but this is more fragile
than the solution in this patch. (We probably still want to do both, but
the vm_ops->close callback will be added later as part of the follow-up
vma API changes.)

It's still possible to remap the vma so that pages appear in the right
vma, but at the wrong offset, but this is a separate issue and will be
fixed when Rust Binder gets a vm_ops->close callback.

Cc: stable <stable@kernel.org>
Fixes: eafedbc7c0 ("rust_binder: add Rust Binder driver")
Reported-by: Jann Horn <jannh@google.com>
Reviewed-by: Jann Horn <jannh@google.com>
Signed-off-by: Alice Ryhl <aliceryhl@google.com>
Acked-by: Danilo Krummrich <dakr@kernel.org>
Acked-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Link: https://patch.msgid.link/20260218-binder-vma-check-v2-1-60f9d695a990@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-02-26 21:32:55 -08:00
Carlos Llamas
4fc87c240b rust_binder: fix oneway spam detection
The spam detection logic in TreeRange was executed before the current
request was inserted into the tree. So the new request was not being
factored in the spam calculation. Fix this by moving the logic after
the new range has been inserted.

Also, the detection logic for ArrayRange was missing altogether which
meant large spamming transactions could get away without being detected.
Fix this by implementing an equivalent low_oneway_space() in ArrayRange.

Note that I looked into centralizing this logic in RangeAllocator but
iterating through 'state' and 'size' got a bit too complicated (for me)
and I abandoned this effort.

Cc: stable <stable@kernel.org>
Cc: Alice Ryhl <aliceryhl@google.com>
Fixes: eafedbc7c0 ("rust_binder: add Rust Binder driver")
Signed-off-by: Carlos Llamas <cmllamas@google.com>
Reviewed-by: Alice Ryhl <aliceryhl@google.com>
Link: https://patch.msgid.link/20260210232949.3770644-1-cmllamas@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-02-26 21:31:50 -08:00
Chintan Vankar
be11a53722 net: ethernet: ti: am65-cpsw-nuss/cpsw-ale: Fix multicast entry handling in ALE table
In the current implementation, flushing multicast entries in MAC mode
incorrectly deletes entries for all ports instead of only the target port,
disrupting multicast traffic on other ports. The cause is adding multicast
entries by setting only host port bit, and not setting the MAC port bits.

Fix this by setting the MAC port's bit in the port mask while adding the
multicast entry. Also fix the flush logic to preserve the host port bit
during removal of MAC port and free ALE entries when mask contains only
host port.

Fixes: 5c50a856d5 ("drivers: net: ethernet: cpsw: add multicast address to ALE table")
Signed-off-by: Chintan Vankar <c-vankar@ti.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20260224181359.2055322-1-c-vankar@ti.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-26 19:43:54 -08:00
Jakub Kicinski
7e5b450c49 Merge branch 'bridge-check-relevant-options-in-vlan-range-grouping'
Danielle Ratson says:

====================
bridge: Check relevant options in VLAN range grouping

The br_vlan_opts_eq_range() function determines if consecutive VLANs can
be grouped together in a range for compact netlink notifications. It
currently checks state, tunnel info, and multicast router configuration,
but misses two categories of per-VLAN options that affect the output:
1. User-visible priv_flags (neigh_suppress, mcast_enabled)
2. Port multicast context options (mcast_max_groups, mcast_n_groups)

When VLANs have different settings for these options, they are incorrectly
grouped into ranges, causing netlink notifications to report only one
VLAN's settings for the entire range.

Fix by checking priv_flags equality, but only for flags that affect netlink
output (BR_VLFLAG_NEIGH_SUPPRESS_ENABLED and BR_VLFLAG_MCAST_ENABLED),
and comparing multicast context options (mcast_max_groups, mcast_n_groups).

Add a test with four test cases for each option, to ensure that VLANs with
different values are not grouped into ranges and VLANs with matching
values are properly grouped together.
====================

Link: https://patch.msgid.link/20260225143956.3995415-1-danieller@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-26 19:24:33 -08:00
Danielle Ratson
13540021be selftests: net: Add bridge VLAN range grouping tests
Add a new test file bridge_vlan_dump.sh with four test cases that verify
VLANs with different per-VLAN options are not incorrectly grouped into
ranges in the dump output.

The tests verify the kernel's br_vlan_opts_eq_range() function correctly
prevents VLAN range grouping when neigh_suppress, mcast_max_groups,
mcast_n_groups, or mcast_enabled options differ.

Each test verifies that VLANs with different option values appear as
individual entries rather than ranges, and that VLANs with matching
values are properly grouped together.

Example output:

$ ./bridge_vlan_dump.sh
TEST: VLAN range grouping with neigh_suppress                       [ OK ]
TEST: VLAN range grouping with mcast_max_groups                     [ OK ]
TEST: VLAN range grouping with mcast_n_groups                       [ OK ]
TEST: VLAN range grouping with mcast_enabled                        [ OK ]

Signed-off-by: Danielle Ratson <danieller@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Link: https://patch.msgid.link/20260225143956.3995415-3-danieller@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-26 19:24:29 -08:00
Danielle Ratson
93c9475c04 bridge: Check relevant per-VLAN options in VLAN range grouping
The br_vlan_opts_eq_range() function determines if consecutive VLANs can
be grouped together in a range for compact netlink notifications. It
currently checks state, tunnel info, and multicast router configuration,
but misses two categories of per-VLAN options that affect the output:
1. User-visible priv_flags (neigh_suppress, mcast_enabled)
2. Port multicast context (mcast_max_groups, mcast_n_groups)

When VLANs have different settings for these options, they are incorrectly
grouped into ranges, causing netlink notifications to report only one
VLAN's settings for the entire range.

Fix by checking priv_flags equality, but only for flags that affect netlink
output (BR_VLFLAG_NEIGH_SUPPRESS_ENABLED and BR_VLFLAG_MCAST_ENABLED),
and comparing multicast context (mcast_max_groups and mcast_n_groups).

Example showing the bugs before the fix:

$ bridge vlan set vid 10 dev dummy1 neigh_suppress on
$ bridge vlan set vid 11 dev dummy1 neigh_suppress off
$ bridge -d vlan show dev dummy1
  port             vlan-id
  dummy1           10-11
                      ... neigh_suppress on

$ bridge vlan set vid 10 dev dummy1 mcast_max_groups 100
$ bridge vlan set vid 11 dev dummy1 mcast_max_groups 200
$ bridge -d vlan show dev dummy1
  port             vlan-id
  dummy1           10-11
                      ... mcast_max_groups 100

After the fix, VLANs 10 and 11 are shown as separate entries with their
correct individual settings.

Fixes: a1aee20d5d ("net: bridge: Add netlink knobs for number / maximum MDB entries")
Fixes: 83f6d60079 ("bridge: vlan: Allow setting VLAN neighbor suppression state")
Signed-off-by: Danielle Ratson <danieller@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Link: https://patch.msgid.link/20260225143956.3995415-2-danieller@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-26 19:24:29 -08:00
Eric Dumazet
2ef2b20cf4 net: annotate data-races around sk->sk_{data_ready,write_space}
skmsg (and probably other layers) are changing these pointers
while other cpus might read them concurrently.

Add corresponding READ_ONCE()/WRITE_ONCE() annotations
for UDP, TCP and AF_UNIX.

Fixes: 604326b41a ("bpf, sockmap: convert to generic sk_msg interface")
Reported-by: syzbot+87f770387a9e5dc6b79b@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/netdev/699ee9fc.050a0220.1cd54b.0009.GAE@google.com/
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Jakub Sitnicki <jakub@cloudflare.com>
Cc: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20260225131547.1085509-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-26 19:23:03 -08:00
Jakub Kicinski
754a3d081a Merge tag 'batadv-net-pullrequest-20260225' of https://git.open-mesh.org/linux-merge
Simon Wunderlich says:

====================
Here is a batman-adv bugfix:

 - Avoid double-rtnl_lock ELP metric worker, by Sven Eckelmann

* tag 'batadv-net-pullrequest-20260225' of https://git.open-mesh.org/linux-merge:
  batman-adv: Avoid double-rtnl_lock ELP metric worker
====================

Link: https://patch.msgid.link/20260225084614.229077-1-sw@simonwunderlich.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-26 19:15:09 -08:00
Davide Caratti
e35626f610 net/sched: ets: fix divide by zero in the offload path
Offloading ETS requires computing each class' WRR weight: this is done by
averaging over the sums of quanta as 'q_sum' and 'q_psum'. Using unsigned
int, the same integer size as the individual DRR quanta, can overflow and
even cause division by zero, like it happened in the following splat:

 Oops: divide error: 0000 [#1] SMP PTI
 CPU: 13 UID: 0 PID: 487 Comm: tc Tainted: G            E       6.19.0-virtme #45 PREEMPT(full)
 Tainted: [E]=UNSIGNED_MODULE
 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
 RIP: 0010:ets_offload_change+0x11f/0x290 [sch_ets]
 Code: e4 45 31 ff eb 03 41 89 c7 41 89 cb 89 ce 83 f9 0f 0f 87 b7 00 00 00 45 8b 08 31 c0 45 01 cc 45 85 c9 74 09 41 6b c4 64 31 d2 <41> f7 f2 89 c2 44 29 fa 45 89 df 41 83 fb 0f 0f 87 c7 00 00 00 44
 RSP: 0018:ffffd0a180d77588 EFLAGS: 00010246
 RAX: 00000000ffffff38 RBX: ffff8d3d482ca000 RCX: 0000000000000000
 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffd0a180d77660
 RBP: ffffd0a180d77690 R08: ffff8d3d482ca2d8 R09: 00000000fffffffe
 R10: 0000000000000000 R11: 0000000000000000 R12: 00000000fffffffe
 R13: ffff8d3d472f2000 R14: 0000000000000003 R15: 0000000000000000
 FS:  00007f440b6c2740(0000) GS:ffff8d3dc9803000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 000000003cdd2000 CR3: 0000000007b58002 CR4: 0000000000172ef0
 Call Trace:
  <TASK>
  ets_qdisc_change+0x870/0xf40 [sch_ets]
  qdisc_create+0x12b/0x540
  tc_modify_qdisc+0x6d7/0xbd0
  rtnetlink_rcv_msg+0x168/0x6b0
  netlink_rcv_skb+0x5c/0x110
  netlink_unicast+0x1d6/0x2b0
  netlink_sendmsg+0x22e/0x470
  ____sys_sendmsg+0x38a/0x3c0
  ___sys_sendmsg+0x99/0xe0
  __sys_sendmsg+0x8a/0xf0
  do_syscall_64+0x111/0xf80
  entry_SYSCALL_64_after_hwframe+0x77/0x7f
 RIP: 0033:0x7f440b81c77e
 Code: 4d 89 d8 e8 d4 bc 00 00 4c 8b 5d f8 41 8b 93 08 03 00 00 59 5e 48 83 f8 fc 74 11 c9 c3 0f 1f 80 00 00 00 00 48 8b 45 10 0f 05 <c9> c3 83 e2 39 83 fa 08 75 e7 e8 13 ff ff ff 0f 1f 00 f3 0f 1e fa
 RSP: 002b:00007fff951e4c10 EFLAGS: 00000202 ORIG_RAX: 000000000000002e
 RAX: ffffffffffffffda RBX: 0000000000481820 RCX: 00007f440b81c77e
 RDX: 0000000000000000 RSI: 00007fff951e4cd0 RDI: 0000000000000003
 RBP: 00007fff951e4c20 R08: 0000000000000000 R09: 0000000000000000
 R10: 0000000000000000 R11: 0000000000000202 R12: 00007fff951f4fa8
 R13: 00000000699ddede R14: 00007f440bb01000 R15: 0000000000486980
  </TASK>
 Modules linked in: sch_ets(E) netdevsim(E)
 ---[ end trace 0000000000000000 ]---
 RIP: 0010:ets_offload_change+0x11f/0x290 [sch_ets]
 Code: e4 45 31 ff eb 03 41 89 c7 41 89 cb 89 ce 83 f9 0f 0f 87 b7 00 00 00 45 8b 08 31 c0 45 01 cc 45 85 c9 74 09 41 6b c4 64 31 d2 <41> f7 f2 89 c2 44 29 fa 45 89 df 41 83 fb 0f 0f 87 c7 00 00 00 44
 RSP: 0018:ffffd0a180d77588 EFLAGS: 00010246
 RAX: 00000000ffffff38 RBX: ffff8d3d482ca000 RCX: 0000000000000000
 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffd0a180d77660
 RBP: ffffd0a180d77690 R08: ffff8d3d482ca2d8 R09: 00000000fffffffe
 R10: 0000000000000000 R11: 0000000000000000 R12: 00000000fffffffe
 R13: ffff8d3d472f2000 R14: 0000000000000003 R15: 0000000000000000
 FS:  00007f440b6c2740(0000) GS:ffff8d3dc9803000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 000000003cdd2000 CR3: 0000000007b58002 CR4: 0000000000172ef0
 Kernel panic - not syncing: Fatal exception
 Kernel Offset: 0x30000000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff)
 ---[ end Kernel panic - not syncing: Fatal exception ]---

Fix this using 64-bit integers for 'q_sum' and 'q_psum'.

Cc: stable@vger.kernel.org
Fixes: d35eb52bd2 ("net: sch_ets: Make the ETS qdisc offloadable")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Link: https://patch.msgid.link/28504887df314588c7255e9911769c36f751edee.1771964872.git.dcaratti@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-26 18:28:47 -08:00
Thorsten Blum
2f37dc436d smb: client: Don't log plaintext credentials in cifs_set_cifscreds
When debug logging is enabled, cifs_set_cifscreds() logs the key
payload and exposes the plaintext username and password. Remove the
debug log to avoid exposing credentials.

Fixes: 8a8798a5ff ("cifs: fetch credentials out of keyring for non-krb5 auth multiuser mounts")
Cc: stable@vger.kernel.org
Acked-by: Paulo Alcantara (Red Hat) <pc@manguebit.org>
Signed-off-by: Thorsten Blum <thorsten.blum@linux.dev>
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-02-26 18:17:20 -06:00
Paulo Alcantara
d9d1e319b3 smb: client: fix broken multichannel with krb5+signing
When mounting a share with 'multichannel,max_channels=n,sec=krb5i',
the client was duplicating signing key for all secondary channels,
thus making the server fail all commands sent from secondary channels
due to bad signatures.

Every channel has its own signing key, so when establishing a new
channel with krb5 auth, make sure to use the new session key as the
derived key to generate channel's signing key in SMB2_auth_kerberos().

Repro:

$ mount.cifs //srv/share /mnt -o multichannel,max_channels=4,sec=krb5i
$ sleep 5
$ umount /mnt
$ dmesg
  ...
  CIFS: VFS: sign fail cmd 0x5 message id 0x2
  CIFS: VFS: \\srv SMB signature verification returned error = -13
  CIFS: VFS: sign fail cmd 0x5 message id 0x2
  CIFS: VFS: \\srv SMB signature verification returned error = -13
  CIFS: VFS: sign fail cmd 0x4 message id 0x2
  CIFS: VFS: \\srv SMB signature verification returned error = -13

Reported-by: Xiaoli Feng <xifeng@redhat.com>
Reviewed-by: Enzo Matsumiya <ematsumiya@suse.de>
Signed-off-by: Paulo Alcantara (Red Hat) <pc@manguebit.org>
Cc: David Howells <dhowells@redhat.com>
Cc: linux-cifs@vger.kernel.org
Cc: stable@vger.kernel.org
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-02-26 18:17:20 -06:00
Paulo Alcantara
4fc3a433c1 smb: client: use atomic_t for mnt_cifs_flags
Use atomic_t for cifs_sb_info::mnt_cifs_flags as it's currently
accessed locklessly and may be changed concurrently in mount/remount
and reconnect paths.

Signed-off-by: Paulo Alcantara (Red Hat) <pc@manguebit.org>
Reviewed-by: David Howells <dhowells@redhat.com>
Cc: linux-cifs@vger.kernel.org
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-02-26 18:17:08 -06:00
Linus Torvalds
a75cb869a8 Merge tag 'v7.0-rc1-ksmbd-server-fixes' of git://git.samba.org/ksmbd
Pull smb server fixes from Steve French:

 - auth security improvement

 - fix potential buffer overflow in smbdirect negotiation

* tag 'v7.0-rc1-ksmbd-server-fixes' of git://git.samba.org/ksmbd:
  ksmbd: fix signededness bug in smb_direct_prepare_negotiation()
  ksmbd: Compare MACs in constant time
2026-02-26 16:01:18 -08:00
Greg Kroah-Hartman
994d5dfa4e Merge tag 'iio-fixes-for-7.0a' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/jic23/iio into work-linus
Jonathan writes:

IIO: 1st set of fixes for the 7.0 cycle

Usual mixed bag of ancient bugs that have been discovered and more
recent stuff.

core
- Cleanup a wait_queue if a driver is removed at exacty the wrong
  moment.
adi,adf4377
- Check correct masks when waiting for reset to complete.
adi,adis
- Fix a NULL pointer dereference if ops not provided to adis_init()
bosch,bme680
- Fix typo in value used to calculate measurement wait duration.
infineon,tlv493d
- Drop incorrect shifting of some bits for x-axis
invensense,icm42600
- Fix corner case of output data rate being set to the value it already
  has which resulted in waiting for ever for a flag to say the update was
  completed.
- Fix a case where the buffer is turned off whilst ODR switch is in progress.
invensense,icm45600
- Interrupt 1 drive bit was inverted.
- Fix a underflow for regulator put warning if probe fails
invensense,mpu9150
- Work around a hardware quirk where reading from irq status is not sufficient
  to acknowledge an interrupt.
maxim,ds4424
- Reject -128 as a possible raw value as it's out of range with the sign
  / magnitude encoding used by this chip.
microchip,mcp4131
- Shift the wiper value only once.
rohm,bh1780
- Fix a runtime reference count issue on an error path.
sensiron,sps30
- Fix two buffer size issues due to sizeof() wrong thing.
tyhx,hx9023s
- Ensure count used by __counted_by is set before accessing the buffer.
- Avoid a potential division by zero.

* tag 'iio-fixes-for-7.0a' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/jic23/iio:
  iio: imu: adis: Fix NULL pointer dereference in adis_init
  iio: imu: inv_icm45600: fix regulator put warning when probe fails
  iio: buffer: Fix wait_queue not being removed
  iio: gyro: mpu3050-core: fix pm_runtime error handling
  iio: gyro: mpu3050-i2c: fix pm_runtime error handling
  iio: adc: ad7768-1: Fix ERR_PTR dereference in ad7768_fill_scale_tbl
  iio: chemical: sps30_serial: fix buffer size in sps30_serial_read_meas()
  iio: chemical: sps30_i2c: fix buffer size in sps30_i2c_read_meas()
  iio: magnetometer: tlv493d: remove erroneous shift in X-axis data
  iio: proximity: hx9023s: Protect against division by zero in set_samp_freq
  iio: proximity: hx9023s: fix assignment order for __counted_by
  iio: chemical: bme680: Fix measurement wait duration calculation
  iio: dac: ds4424: reject -128 RAW value
  iio: imu: inv_icm45600: fix INT1 drive bit inverted
  iio: potentiometer: mcp4131: fix double application of wiper shift
  iio: imu: inv-mpu9150: fix irq ack preventing irq storms
  iio: frequency: adf4377: Fix duplicated soft reset mask
  iio: light: bh1780: fix PM runtime leak on error path
  iio: imu: inv_icm42600: fix odr switch when turning buffer off
  iio: imu: inv_icm42600: fix odr switch to the same value
2026-02-26 15:48:29 -08:00
Greg Kroah-Hartman
dceddeecce Merge tag 'stratix10_rsu_fix_for_v7.0' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/dinguyen/linux into work-linus
Dinh writes:

firmware: stratix10-rsu: fix NULL pointer dereference when RSU is disabled
- Fix a kernel panic that happens in the driver when the First Stage Boot Loader
  has not enabled the Remote System Update(RSU).

* tag 'stratix10_rsu_fix_for_v7.0' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/dinguyen/linux:
  firmware: stratix10-rsu: Fix NULL pointer dereference when RSU is disabled
2026-02-26 15:47:15 -08:00
Linus Torvalds
69062f234a Merge tag 'mm-hotfixes-stable-2026-02-26-14-14' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull misc fixes from Andrew Morton:
 "12 hotfixes.  7 are cc:stable.  8 are for MM.

  All are singletons - please see the changelogs for details"

* tag 'mm-hotfixes-stable-2026-02-26-14-14' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm:
  MAINTAINERS: update Yosry Ahmed's email address
  mailmap: add entry for Daniele Alessandrelli
  mm: fix NULL NODE_DATA dereference for memoryless nodes on boot
  mm/tracing: rss_stat: ensure curr is false from kthread context
  mm/kfence: fix KASAN hardware tag faults during late enablement
  mm/damon/core: disallow non-power of two min_region_sz
  Squashfs: check metadata block offset is within range
  MAINTAINERS, mailmap: update e-mail address for Vlastimil Babka
  liveupdate: luo_file: remember retrieve() status
  mm: thp: deny THP for files on anonymous inodes
  mm: change vma_alloc_folio_noprof() macro to inline function
  mm/kfence: disable KFENCE upon KASAN HW tags enablement
2026-02-26 15:27:41 -08:00
Dave Airlie
103d53eb6f Merge tag 'amd-drm-fixes-7.0-2026-02-26' of https://gitlab.freedesktop.org/agd5f/linux into drm-fixes
amd-drm-fixes-7.0-2026-02-26:

amdgpu:
- UserQ fixes
- DC fix
- RAS fixes
- VCN 5 fix
- Slot reset fix
- Remove MES workaround that's no longer needed

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patch.msgid.link/20260226161330.3549393-1-alexander.deucher@amd.com
2026-02-27 09:19:47 +10:00
Linus Torvalds
944e15f200 Merge tag 'dma-mapping-7.0-2026-02-26' of git://git.kernel.org/pub/scm/linux/kernel/git/mszyprowski/linux
Pull dma-mapping fixes from Marek Szyprowski:
 "Two DMA-mapping fixes for the recently merged API rework (Jiri Pirko
  and Stian Halseth)"

* tag 'dma-mapping-7.0-2026-02-26' of git://git.kernel.org/pub/scm/linux/kernel/git/mszyprowski/linux:
  sparc: Fix page alignment in dma mapping
  dma-mapping: avoid random addr value print out on error path
2026-02-26 15:19:16 -08:00
Alain Volmat
e96493229a spi: stm32: fix missing pointer assignment in case of dma chaining
Commit c4f2c05ab0 ("spi: stm32: fix pointer-to-pointer variables usage")
introduced a regression since dma descriptors generated as part of the
stm32_spi_prepare_rx_dma_mdma_chaining function are not well propagated
to the caller function, leading to mdma-dma chaining being no more
functional.

Fixes: c4f2c05ab0 ("spi: stm32: fix pointer-to-pointer variables usage")
Signed-off-by: Alain Volmat <alain.volmat@foss.st.com>
Acked-by: Antonio Quartulli <antonio@mandelbit.com>
Link: https://patch.msgid.link/20260224-spi-stm32-chaining-fix-v1-1-5da7a4851b66@foss.st.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-02-26 23:16:12 +00:00
Dave Airlie
82a499d2ca Merge tag 'drm-xe-fixes-2026-02-26' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes
- W/a fix for multi-cast registers (Roper)
- Fix xe_sync initialization issues (Shuicheng)

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patch.msgid.link/aaBGHy_0RLGGIBP5@intel.com
2026-02-27 09:13:58 +10:00
Linus Torvalds
e094883b50 Merge tag 'acpi-7.0-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull ACPI fixes from Rafael Wysocki:
 "New platform quirks for two systems:

   - Add a quirk for Lenovo G70-35 to save the ACPI NVS memory on system
     suspend (Piotr Mazek)

   - Add a DMI quirk for Acer Aspire One D255 to work around a backlight
     issue by returning false to _OSI("Windows 2009") (Sofia Schneider)"

* tag 'acpi-7.0-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
  ACPI: OSI: Add DMI quirk for Acer Aspire One D255
  ACPI: PM: Save NVS memory on Lenovo G70-35
2026-02-26 14:45:29 -08:00
Andy Shevchenko
b6c3af46c2 pinctrl: cy8c95x0: Don't miss reading the last bank registers
When code had been changed to use for_each_set_clump8(), it mistakenly
switched from chip->nport to chip->tpin since the cy8c9540 and cy8c9560
have a 4-pin gap. This, in particular, led to the missed read of
the last bank interrupt status register and hence missing interrupts
on those pins. Restore the upper limit in for_each_set_clump8() to take
into consideration that gap.

Fixes: 83e29a7a1f ("pinctrl: cy8c95x0; Switch to use for_each_set_clump8()")
Cc: stable@vger.kernel.org
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Linus Walleij <linusw@kernel.org>
2026-02-26 23:41:04 +01:00
Linus Torvalds
db5781c407 Merge tag 'pm-7.0-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull power management fixes from Rafael Wysocki:
 "These fix two intel_pstate driver issues causing it to crash on sysfs
  attribute accesses when some CPUs in the system are offline, finalize
  changes related to turning pm_runtime_put() into a void function, and
  update Daniel Lezcano's contact information:

   - Fix two issues in the intel_pstate driver causing it to crash when
     its sysfs interface is used on a system with some offline CPUs
     (David Arcari, Srinivas Pandruvada)

   - Update the last user of the pm_runtime_put() return value to
     discard it and turn pm_runtime_put() into a void function (Rafael
     Wysocki)

   - Update Daniel Lezcano's contact information in MAINTAINERS and
     .mailmap (Daniel Lezcano)"

* tag 'pm-7.0-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
  MAINTAINERS: Update contact with the kernel.org address
  cpufreq: intel_pstate: Fix crash during turbo disable
  cpufreq: intel_pstate: Fix NULL pointer dereference in update_cpu_qos_request()
  PM: runtime: Change pm_runtime_put() return type to void
  pmdomain: imx: gpcv2: Discard pm_runtime_put() return value
2026-02-26 14:40:21 -08:00
Justin Tee
dd677d0598 nvmet-fcloop: Check remoteport port_state before calling done callback
In nvme_fc_handle_ls_rqst_work, the lsrsp->done callback is only set when
remoteport->port_state is FC_OBJSTATE_ONLINE.  Otherwise, the
nvme_fc_xmt_ls_rsp's LLDD call to lport->ops->xmt_ls_rsp is expected to
fail and the nvme-fc transport layer itself will directly call
nvme_fc_xmt_ls_rsp_free instead of relying on LLDD's done callback to free
the lsrsp resources.

Update the fcloop_t2h_xmt_ls_rsp routine to check remoteport->port_state.
If online, then lsrsp->done callback will free the lsrsp.  Else, return
-ENODEV to signal the nvme-fc transport to handle freeing lsrsp.

Cc: Ewan D. Milne <emilne@redhat.com>
Tested-by: Aristeu Rozanski <aris@redhat.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Reviewed-by: Daniel Wagner <dwagner@suse.de>
Closes: https://lore.kernel.org/linux-nvme/21255200-a271-4fa0-b099-97755c8acd4c@work/
Fixes: 10c165af35 ("nvmet-fcloop: call done callback even when remote port is gone")
Signed-off-by: Justin Tee <justintee8345@gmail.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>
2026-02-26 14:35:32 -08:00
Linus Torvalds
c45be7c420 Merge tag 'for-linus-7.0-1' of https://github.com/cminyard/linux-ipmi
Pull IPMI driver fixes from Corey Minyard:
 "This mostly revolves around getting the driver to behave when the IPMI
  device misbehaves. Past attempts have not worked very well because I
  didn't have hardware I could make do this, and AI was fairly useless
  for help on this.

  So I modified qemu and my test suite so I could reproduce a
  misbehaving IPMI device, and with that I was able to fix the issues"

* tag 'for-linus-7.0-1' of https://github.com/cminyard/linux-ipmi:
  ipmi:si: Fix check for a misbehaving BMC
  ipmi:msghandler: Handle error returns from the SMI sender
  ipmi:si: Don't block module unload if the BMC is messed up
  ipmi:si: Use a long timeout when the BMC is misbehaving
  ipmi:si: Handle waiting messages when BMC failure detected
  ipmi:ls2k: Make ipmi_ls2k_platform_driver static
  ipmi: ipmb: initialise event handler read bytes
  ipmi: Consolidate the run to completion checking for xmit msgs lock
  ipmi: Fix use-after-free and list corruption on sender error
2026-02-26 14:34:21 -08:00
David Carlier
749989b2d9 sched_ext: Fix SCX_EFLAG_INITIALIZED being a no-op flag
SCX_EFLAG_INITIALIZED is the sole member of enum scx_exit_flags with no
explicit value, so the compiler assigns it 0. This makes the bitwise OR
in scx_ops_init() a no-op:

    sch->exit_info->flags |= SCX_EFLAG_INITIALIZED; /* |= 0 */

As a result, BPF schedulers cannot distinguish whether ops.init()
completed successfully by inspecting exit_info->flags.

Assign the value 1LLU << 0 so the flag is actually set.

Fixes: f3aec2adce ("sched_ext: Add SCX_EFLAG_INITIALIZED to indicate successful ops.init()")
Signed-off-by: David Carlier <devnexen@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-02-26 12:03:24 -10:00
Thomas Weißschuh
459cb3c054 kbuild: install-extmod-build: Package resolve_btfids if necessary
When CONFIG_DEBUG_INFO_BTF_MODULES is enabled and vmlinux is available,
Makefile.modfinal and gen-btf.sh will try to use resolve_btfids on the
module .ko. install-extmod-build currently does not package
resolve_btfids, so that step fails.

Package resolve_btfids if it may be used.

Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Reviewed-by: Nicolas Schier <nsc@kernel.org>
Link: https://patch.msgid.link/20260226-kbuild-resolve_btfids-v1-1-2bf38b93dfe7@linutronix.de
[nathan: Small commit message tweaks]
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
2026-02-26 14:49:34 -07:00
Rafael J. Wysocki
3d9b8b00da Merge branch 'acpi-pm'
Add a quirk for Lenovo G70-35 to save the ACPI NVS memory on system
suspend (Piotr Mazek)

* acpi-pm:
  ACPI: PM: Save NVS memory on Lenovo G70-35
2026-02-26 21:51:33 +01:00
Rafael J. Wysocki
b78030d0f1 Merge branches 'pm-cpufreq' and 'pm-runtime'
Merge cpufreq and runtime PM updates for 7.0-rc2:

 - Fix two issues in the intel_pstate driver causing it to crash when
   its sysfs interface is used on a system with some offline CPUs (David
   Arcari, Srinivas Pandruvada)

 - Update the last user of the pm_runtime_put() return value to discard
   it and turn pm_runtime_put() into a void function (Rafael Wysocki)

* pm-cpufreq:
  cpufreq: intel_pstate: Fix crash during turbo disable
  cpufreq: intel_pstate: Fix NULL pointer dereference in update_cpu_qos_request()

* pm-runtime:
  PM: runtime: Change pm_runtime_put() return type to void
  pmdomain: imx: gpcv2: Discard pm_runtime_put() return value
2026-02-26 21:35:47 +01:00
Dave Airlie
5e061aac4c Merge tag 'drm-misc-fixes-2026-02-26' of https://gitlab.freedesktop.org/drm/misc/kernel into drm-fixes
Several fixes for:
  - amdxdna: Fix for a deadlock, a NULL pointer dereference, a suspend
    failure, a hang, an out-of-bounds access, a buffer overflow, input
    sanitization and other minor fixes.
  - dw-dp: An error handling fix
  - ethosu: A binary shift overflow fix
  - imx: An error handling fix
  - logicvc: A dt node reference leak fix
  - nouveau: A WARN_ON removal
  - samsung-dsim: A memory leak fix
  - sharp-memory: A NULL pointer dereference fix
  - vmgfx: A reference count and error handling fix

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Maxime Ripard <mripard@redhat.com>
Link: https://patch.msgid.link/20260226-heretic-stimulating-swine-6a2f27@penduick
2026-02-27 05:49:50 +10:00
Dave Airlie
f91d5e9422 Merge tag 'drm-intel-fixes-2026-02-25' of https://gitlab.freedesktop.org/drm/i915/kernel into drm-fixes
- Fix #7153: Panel Replay stuck with X during mode transitions on Panther Lake

Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patch.msgid.link/aZ8JxQkN5oMxXsT6@jlahtine-mobl
2026-02-27 05:43:38 +10:00
T.J. Mercier
6881af27f9 selftests/bpf: Fix OOB read in dmabuf_collector
Dmabuf name allocations can be less than DMA_BUF_NAME_LEN characters,
but bpf_probe_read_kernel always tries to read exactly that many bytes.
If a name is less than DMA_BUF_NAME_LEN characters,
bpf_probe_read_kernel will read past the end. bpf_probe_read_kernel_str
stops at the first NUL terminator so use it instead, like
iter_dmabuf_for_each already does.

Fixes: ae5d2c59ec ("selftests/bpf: Add test for dmabuf_iter")
Reported-by: Jerome Lee <jaewookl@quicinc.com>
Signed-off-by: T.J. Mercier <tjmercier@google.com>
Link: https://lore.kernel.org/r/20260225003349.113746-1-tjmercier@google.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-02-26 11:28:04 -08:00
Ihor Solodrai
60e3cbef35 selftests/bpf: Fix a memory leak in xdp_flowtable test
test_progs run with ASAN reported [1]:

  ==126==ERROR: LeakSanitizer: detected memory leaks

  Direct leak of 32 byte(s) in 1 object(s) allocated from:
      #0 0x7f1ff3cfa340 in calloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:77
      #1 0x5610c15bb520 in bpf_program_attach_fd /codebuild/output/src685977285/src/actions-runner/_work/vmtest/vmtest/src/tools/lib/bpf/libbpf.c:13164
      #2 0x5610c15bb740 in bpf_program__attach_xdp /codebuild/output/src685977285/src/actions-runner/_work/vmtest/vmtest/src/tools/lib/bpf/libbpf.c:13204
      #3 0x5610c14f91d3 in test_xdp_flowtable /codebuild/output/src685977285/src/actions-runner/_work/vmtest/vmtest/src/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c:138
      #4 0x5610c1533566 in run_one_test /codebuild/output/src685977285/src/actions-runner/_work/vmtest/vmtest/src/tools/testing/selftests/bpf/test_progs.c:1406
      #5 0x5610c1537fb0 in main /codebuild/output/src685977285/src/actions-runner/_work/vmtest/vmtest/src/tools/testing/selftests/bpf/test_progs.c:2097
      #6 0x7f1ff25df1c9  (/lib/x86_64-linux-gnu/libc.so.6+0x2a1c9) (BuildId: 8e9fd827446c24067541ac5390e6f527fb5947bb)
      #7 0x7f1ff25df28a in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2a28a) (BuildId: 8e9fd827446c24067541ac5390e6f527fb5947bb)
      #8 0x5610c0bd3180 in _start (/tmp/work/vmtest/vmtest/selftests/bpf/test_progs+0x593180) (BuildId: cdf9f103f42307dc0a2cd6cfc8afcbc1366cf8bd)

Fix by properly destroying bpf_link on exit in xdp_flowtable test.

[1] https://github.com/kernel-patches/vmtest/actions/runs/22361085418/job/64716490680

Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
Reviewed-by: Subbaraya Sundeep <sbhatta@marvell.com>
Link: https://lore.kernel.org/r/20260225003351.465104-1-ihor.solodrai@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-02-26 11:27:08 -08:00
Kohei Enju
b7bf516c3e bpf: Fix stack-out-of-bounds write in devmap
get_upper_ifindexes() iterates over all upper devices and writes their
indices into an array without checking bounds.

Also the callers assume that the max number of upper devices is
MAX_NEST_DEV and allocate excluded_devices[1+MAX_NEST_DEV] on the stack,
but that assumption is not correct and the number of upper devices could
be larger than MAX_NEST_DEV (e.g., many macvlans), causing a
stack-out-of-bounds write.

Add a max parameter to get_upper_ifindexes() to avoid the issue.
When there are too many upper devices, return -EOVERFLOW and abort the
redirect.

To reproduce, create more than MAX_NEST_DEV(8) macvlans on a device with
an XDP program attached using BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS.
Then send a packet to the device to trigger the XDP redirect path.

Reported-by: syzbot+10cc7f13760b31bd2e61@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/698c4ce3.050a0220.340abe.000b.GAE@google.com/T/
Fixes: aeea1b86f9 ("bpf, devmap: Exclude XDP broadcast to master device")
Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Kohei Enju <kohei@enjuk.jp>
Link: https://lore.kernel.org/r/20260225053506.4738-1-kohei@enjuk.jp
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-02-26 11:25:53 -08:00
Jiri Olsa
ad6fface76 bpf: Fix kprobe_multi cookies access in show_fdinfo callback
We don't check if cookies are available on the kprobe_multi link
before accessing them in show_fdinfo callback, we should.

Cc: stable@vger.kernel.org
Fixes: da7e9c0a7f ("bpf: Add show_fdinfo for kprobe_multi")
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/r/20260225111249.186230-1-jolsa@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-02-26 11:23:57 -08:00
Fuad Tabba
ef06fd16d4 bpf, arm64: Force 8-byte alignment for JIT buffer to prevent atomic tearing
struct bpf_plt contains a u64 target field. Currently, the BPF JIT
allocator requests an alignment of 4 bytes (sizeof(u32)) for the JIT
buffer.

Because the base address of the JIT buffer can be 4-byte aligned (e.g.,
ending in 0x4 or 0xc), the relative padding logic in build_plt() fails
to ensure that target lands on an 8-byte boundary.

This leads to two issues:
1. UBSAN reports misaligned-access warnings when dereferencing the
   structure.
2. More critically, target is updated concurrently via WRITE_ONCE() in
   bpf_arch_text_poke() while the JIT'd code executes ldr. On arm64,
   64-bit loads/stores are only guaranteed to be single-copy atomic if
   they are 64-bit aligned. A misaligned target risks a torn read,
   causing the JIT to jump to a corrupted address.

Fix this by increasing the allocation alignment requirement to 8 bytes
(sizeof(u64)) in bpf_jit_binary_pack_alloc(). This anchors the base of
the JIT buffer to an 8-byte boundary, allowing the relative padding math
in build_plt() to correctly align the target field.

Fixes: b2ad54e153 ("bpf, arm64: Implement bpf_arch_text_poke() for arm64")
Signed-off-by: Fuad Tabba <tabba@google.com>
Acked-by: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20260226075525.233321-1-tabba@google.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-02-26 11:19:00 -08:00
Nathan Chancellor
d2395bb194 genksyms: Fix parsing a declarator with a preceding attribute
After commit 07919126ec ("netfilter: annotate NAT helper hook pointers
with __rcu"), genksyms fails to parse the __rcu annotation when building
with CONFIG_DEBUG_INFO_BTF=y, CONFIG_PAHOLE_HAS_BTF_TAG=y, and a version
of clang that supports btf_type_tag.

  $ clang --version | head -1
  ClangBuiltLinux clang version 22.1.0 (https://github.com/llvm/llvm-project.git 4434dabb69916856b824f68a64b029c67175e532)

  $ cat kernel/configs/repro.config
  CONFIG_BPF_SYSCALL=y
  CONFIG_MODVERSIONS=y
  # CONFIG_DEBUG_INFO_NONE is not set
  CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
  CONFIG_DEBUG_INFO_BTF=y

  $ make -skj"$(nproc)" ARCH=x86_64 LLVM=1 mrproper defconfig repro.config all
  WARNING: modpost: EXPORT symbol "nf_nat_ftp_hook" [vmlinux] version generation failed, symbol will not be versioned.
  ...
  WARNING: modpost: EXPORT symbol "nf_nat_irc_hook" [vmlinux] version generation failed, symbol will not be versioned.
  ...

genksyms falls over parsing the __rcu attribute in the declarator:

  # Kernel reproducer
  $ make -skj"$(nproc)" ARCH=x86_64 KCFLAGS=-D__GENKSYMS__ LLVM=1 net/netfilter/nf_conntrack_ftp.i

  $ scripts/genksyms/genksyms -w <net/netfilter/nf_conntrack_ftp.i &| rg 'syntax error'
  include/linux/netfilter/nf_conntrack_ftp.h:29: syntax error
  net/netfilter/nf_conntrack_ftp.c:46: syntax error

  # Trivial reproducer
  $ cat test.c
  int (*func)(void *foo, int bar);
  int (__attribute__((btf_type_tag("rcu"))) *func_with_attr)(void *foo, int bar);

  $ scripts/genksyms/genksyms -w <test.c
  <stdin>:2: syntax error

Optionally allow an attribute to precede a declarator to resolve this
error and properly generate symbol versions.

Fixes: 07919126ec ("netfilter: annotate NAT helper hook pointers with __rcu")
Link: https://patch.msgid.link/20260225-genksyms-fix-attribute-declarator-v1-1-1b21478663fb@kernel.org
Tested-by: Nicolas Schier <nsc@kernel.org>
Reviewed-by: Nicolas Schier <nsc@kernel.org>
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
2026-02-26 11:53:05 -07:00
Nathan Chancellor
8678591b47 kbuild: Split .modinfo out from ELF_DETAILS
Commit 3e86e4d74c ("kbuild: keep .modinfo section in
vmlinux.unstripped") added .modinfo to ELF_DETAILS while removing it
from COMMON_DISCARDS, as it was needed in vmlinux.unstripped and
ELF_DETAILS was present in all architecture specific vmlinux linker
scripts. While this shuffle is fine for vmlinux, ELF_DETAILS and
COMMON_DISCARDS may be used by other linker scripts, such as the s390
and x86 compressed boot images, which may not expect to have a .modinfo
section. In certain circumstances, this could result in a bootloader
failing to load the compressed kernel [1].

Commit ddc6cbef3e ("s390/boot/vmlinux.lds.S: Ensure bzImage ends with
SecureBoot trailer") recently addressed this for the s390 bzImage but
the same bug remains for arm, parisc, and x86. The presence of .modinfo
in the x86 bzImage was the root cause of the issue worked around with
commit d50f210913 ("kbuild: align modinfo section for Secureboot
Authenticode EDK2 compat"). misc.c in arch/x86/boot/compressed includes
lib/decompress_unzstd.c, which in turn includes lib/xxhash.c and its
MODULE_LICENSE / MODULE_DESCRIPTION macros due to the STATIC definition.

Split .modinfo out from ELF_DETAILS into its own macro and handle it in
all vmlinux linker scripts. Discard .modinfo in the places where it was
previously being discarded from being in COMMON_DISCARDS, as it has
never been necessary in those uses.

Cc: stable@vger.kernel.org
Fixes: 3e86e4d74c ("kbuild: keep .modinfo section in vmlinux.unstripped")
Reported-by: Ed W <lists@wildgooses.com>
Closes: https://lore.kernel.org/587f25e0-a80e-46a5-9f01-87cb40cfa377@wildgooses.com/ [1]
Tested-by: Ed W <lists@wildgooses.com> # x86_64
Link: https://patch.msgid.link/20260225-separate-modinfo-from-elf-details-v1-1-387ced6baf4b@kernel.org
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
2026-02-26 11:50:19 -07:00
Sumit Gupta
df6e4ab654 arm64: topology: Fix false warning in counters_read_on_cpu() for same-CPU reads
The counters_read_on_cpu() function warns when called with IRQs
disabled to prevent deadlock in smp_call_function_single(). However,
this warning is spurious when reading counters on the current CPU,
since no IPI is needed for same CPU reads.

Commit 12eb8f4fff24 ("cpufreq: CPPC: Update FIE arch_freq_scale in
ticks for non-PCC regs") changed the CPPC Frequency Invariance Engine
to read AMU counters directly from the scheduler tick for non-PCC
register spaces (like FFH), instead of deferring to a kthread. This
means counters_read_on_cpu() is now called with IRQs disabled from the
tick handler, triggering the warning.

Fix this by restructuring the logic: when IRQs are disabled (tick
context), call the function directly for same-CPU reads. Otherwise
use smp_call_function_single().

Fixes: 997c021abc ("cpufreq: CPPC: Update FIE arch_freq_scale in ticks for non-PCC regs")
Suggested-by: Will Deacon <will@kernel.org>
Signed-off-by: Sumit Gupta <sumitg@nvidia.com>
Signed-off-by: Will Deacon <will@kernel.org>
2026-02-26 18:27:15 +00:00
Marc Zyngier
e5cb94ba5f arm64: Fix sampling the "stable" virtual counter in preemptible section
Ben reports that when running with CONFIG_DEBUG_PREEMPT, using
__arch_counter_get_cntvct_stable() results in well deserves warnings,
as we access a per-CPU variable without preemption disabled.

Fix the issue by disabling preemption on reading the counter. We can
probably do a lot better by not disabling preemption on systems that
do not require horrible workarounds to return a valid counter value,
but this plugs the issue for the time being.

Fixes: 29cc0f3aa7 ("arm64: Force the use of CNTVCT_EL0 in __delay()")
Reported-by: Ben Horgan <ben.horgan@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/aZw3EGs4rbQvbAzV@e134344.arm.com
Tested-by: Ben Horgan <ben.horgan@arm.com>
Tested-by: André Draszik <andre.draszik@linaro.org>
Signed-off-by: Will Deacon <will@kernel.org>
2026-02-26 18:12:48 +00:00
Linus Torvalds
3f4a08e644 Merge tag 'kmalloc_obj-v7.0-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux
Pull kmalloc_obj fixes from Kees Cook:

 - Fix pointer-to-array allocation types for ubd and kcsan

 - Force size overflow helpers to __always_inline

 - Bump __builtin_counted_by_ref to Clang 22.1 from 22.0 (Nathan Chancellor)

* tag 'kmalloc_obj-v7.0-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux:
  kcsan: test: Adjust "expect" allocation type for kmalloc_obj
  overflow: Make sure size helpers are always inlined
  init/Kconfig: Adjust fixed clang version for __builtin_counted_by_ref
  ubd: Use pointer-to-pointers for io_thread_req arrays
2026-02-26 10:05:15 -08:00
Kees Cook
795469820c kcsan: test: Adjust "expect" allocation type for kmalloc_obj
The call to kmalloc_obj(observed.lines) returns "char (*)[3][512]",
a pointer to the whole 2D array. But "expect" wants to be "char (*)[512]",
the decayed pointer type, as if it were observed.lines itself (though
without the "3" bounds). This produces the following build error:

../kernel/kcsan/kcsan_test.c: In function '__report_matches':
../kernel/kcsan/kcsan_test.c:171:16: error: assignment to 'char (*)[512]' from incompatible pointer type 'char (*)[3][512]'
[-Wincompatible-pointer-types]
  171 |         expect = kmalloc_obj(observed.lines);
      |                ^

Instead of changing the "expect" type to "char (*)[3][512]" and
requiring a dereference at each use (e.g. "(expect*)[0]"), just
explicitly cast the return to the desired type.

Note that I'm intentionally not switching back to byte-based "kmalloc"
here because I cannot find a way for the Coccinelle script (which will
be used going forward to catch future conversions) to exclude this case.

Tested with:

$ ./tools/testing/kunit/kunit.py run \
	--kconfig_add CONFIG_DEBUG_KERNEL=y \
	--kconfig_add CONFIG_KCSAN=y \
	--kconfig_add CONFIG_KCSAN_KUNIT_TEST=y \
	--arch=x86_64 --qemu_args '-smp 2' kcsan

Reported-by: Nathan Chancellor <nathan@kernel.org>
Fixes: 69050f8d6d ("treewide: Replace kmalloc with kmalloc_obj for non-scalar types")
Signed-off-by: Kees Cook <kees@kernel.org>
2026-02-26 09:54:08 -08:00
Takashi Iwai
71c1978ab6 ASoC: SDCA: Fix comments for sdca_irq_request()
The kernel-doc comments for sdca_irq_request() contained some typos
that lead to build warnings with W=1.  Let's correct them.

Fixes: b126394d9e ("ASoC: SDCA: Generic interrupt support")
Acked-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Link: https://patch.msgid.link/20260226154753.1083320-1-tiwai@suse.de
2026-02-26 17:57:37 +01:00
Linus Torvalds
e3c81bae4f Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma fixes from Jason Gunthorpe:
 "Seems bigger than usual, a number of things were posted near/during
  the merg window:

   - Fix some compilation regressions related to the new DMABUF code

   - Close a race with ib_register_device() vs netdev events that causes
     GID table corruption

   - Compilation warnings with some compilers in bng_re

   - Correct error unwind in bng_re and the umem pinned dmabuf

   - Avoid NULL pointer crash in ionic during query_port()

   - Check the size for uAPI validation checks in EFA

   - Several system call stack leaks in drivers found with AI

   - Fix the new restricted_node_type so it works with wildcard listens
     too"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
  RDMA/uverbs: Import DMA-BUF module in uverbs_std_types_dmabuf file
  RDMA/umem: Fix double dma_buf_unpin in failure path
  RDMA/core: Check id_priv->restricted_node_type in cma_listen_on_dev()
  RDMA/ionic: Fix kernel stack leak in ionic_create_cq()
  RDMA/irdma: Fix kernel stack leak in irdma_create_user_ah()
  IB/mthca: Add missed mthca_unmap_user_db() for mthca_create_srq()
  RDMA/efa: Fix typo in efa_alloc_mr()
  RDMA/ionic: Fix potential NULL pointer dereference in ionic_query_port
  RDMA/bng_re: Unwind bng_re_dev_init properly
  RDMA/bng_re: Remove unnessary validity checks
  RDMA/core: Fix stale RoCE GIDs during netdev events at registration
  RDMA/uverbs: select CONFIG_DMA_SHARED_BUFFER
2026-02-26 08:37:18 -08:00
Sanjay Chitroda
2b351ea428 mm/slub: drop duplicate kernel-doc for ksize()
The implementation of ksize() was updated with kernel-doc by commit
fab0694646 ("mm/slab: move [__]ksize and slab_ksize() to mm/slub.c")
However, the public header still contains a kernel-doc comment
attached to the ksize() prototype.

Having documentation both in the header and next to the implementation
causes Sphinx to treat the function as being documented twice,
resulting in the warning:

  WARNING: Duplicate C declaration, also defined at core-api/mm-api:521
  Declaration is '.. c:function:: size_t ksize(const void *objp)'

Kernel-doc guidelines recommend keeping the documentation with the
function implementation. Therefore remove the redundant kernel-doc
block from include/linux/slab.h so that the implementation in slub.c
remains the canonical source for documentation.

No functional change.

Fixes: fab0694646 ("mm/slab: move [__]ksize and slab_ksize() to mm/slub.c")
Signed-off-by: Sanjay Chitroda <sanjayembeddedse@gmail.com>
Link: https://patch.msgid.link/20260226054712.3610744-1-sanjayembedded@gmail.com
Signed-off-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
2026-02-26 17:30:32 +01:00
Suren Baghdasaryan
f3ec502b67 mm/slab: mark alloc tags empty for sheaves allocated with __GFP_NO_OBJ_EXT
alloc_empty_sheaf() allocates sheaves from SLAB_KMALLOC caches using
__GFP_NO_OBJ_EXT to avoid recursion, however it does not mark their
allocation tags empty before freeing, which results in a warning when
CONFIG_MEM_ALLOC_PROFILING_DEBUG is set. Fix this by marking allocation
tags for such sheaves as empty.

The problem was technically introduced in commit 4c0a17e283 but only
becomes possible to hit with commit 913ffd3a1b.

Fixes: 4c0a17e283 ("slab: prevent recursive kmalloc() in alloc_empty_sheaf()")
Fixes: 913ffd3a1b ("slab: handle kmalloc sheaves bootstrap")
Reported-by: David Wang <00107082@163.com>
Closes: https://lore.kernel.org/all/20260223155128.3849-1-00107082@163.com/
Analyzed-by: Harry Yoo <harry.yoo@oracle.com>
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
Tested-by: Harry Yoo <harry.yoo@oracle.com>
Tested-by: David Wang <00107082@163.com>
Link: https://patch.msgid.link/20260225163407.2218712-1-surenb@google.com
Signed-off-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
2026-02-26 17:30:32 +01:00
Harry Yoo
021ca6b670 mm/slab: pass __GFP_NOWARN to refill_sheaf() if fallback is available
When refill_sheaf() is called, failing to refill the sheaf doesn't
necessarily mean the allocation will fail because a fallback path
might be available and serve the allocation request.

Suppress spurious warnings by passing __GFP_NOWARN along with
__GFP_NOMEMALLOC whenever a fallback path is available.

When the caller is alloc_full_sheaf() or __pcs_replace_empty_main(),
the kernel always falls back to the slowpath (__slab_alloc_node()).
For __prefill_sheaf_pfmemalloc(), the fallback path is available
only when gfp_pfmemalloc_allowed() returns true.

Reported-and-tested-by: Chris Bainbridge <chris.bainbridge@gmail.com>
Closes: https://lore.kernel.org/linux-mm/aZt2-oS9lkmwT7Ch@debian.local
Fixes: 1ce20c28ea ("slab: handle pfmemalloc slabs properly with sheaves")
Link: https://lore.kernel.org/linux-mm/aZwSreGj9-HHdD-j@hyeyoo
Signed-off-by: Harry Yoo <harry.yoo@oracle.com>
Link: https://patch.msgid.link/20260223133322.16705-1-harry.yoo@oracle.com
Tested-by: Mikhail Gavrilov <mikhail.v.gavrilov@gmail.com>
Signed-off-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
2026-02-26 17:30:06 +01:00
Werner Sembach
ec3070f01f HID: multitouch: Keep latency normal on deactivate for reactivation gesture
Uniwill devices have a built in gesture in the touchpad to de- and
reactivate it by double taping the upper left corner. This gesture stops
working when latency is set to high, so this patch keeps the latency on
normal.

Cc: stable@vger.kernel.org
Signed-off-by: Werner Sembach <wse@tuxedocomputers.com>
[jkosina@suse.com: change bit from 24 to 25]
[jkosina@suse.com: update shortlog]
Signed-off-by: Jiri Kosina <jkosina@suse.com>
2026-02-26 17:10:22 +01:00
Linus Torvalds
b9c8fc2cae Merge tag 'net-7.0-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Paolo Abeni:
 "Including fixes from IPsec, Bluetooth and netfilter

  Current release - regressions:

   - wifi: fix dev_alloc_name() return value check

   - rds: fix recursive lock in rds_tcp_conn_slots_available

  Current release - new code bugs:

   - vsock: lock down child_ns_mode as write-once

  Previous releases - regressions:

   - core:
      - do not pass flow_id to set_rps_cpu()
      - consume xmit errors of GSO frames

   - netconsole: avoid OOB reads, msg is not nul-terminated

   - netfilter: h323: fix OOB read in decode_choice()

   - tcp: re-enable acceptance of FIN packets when RWIN is 0

   - udplite: fix null-ptr-deref in __udp_enqueue_schedule_skb().

   - wifi: brcmfmac: fix potential kernel oops when probe fails

   - phy: register phy led_triggers during probe to avoid AB-BA deadlock

   - eth:
      - bnxt_en: fix deleting of Ntuple filters
      - wan: farsync: fix use-after-free bugs caused by unfinished tasklets
      - xscale: check for PTP support properly

  Previous releases - always broken:

   - tcp: fix potential race in tcp_v6_syn_recv_sock()

   - kcm: fix zero-frag skb in frag_list on partial sendmsg error

   - xfrm:
      - fix race condition in espintcp_close()
      - always flush state and policy upon NETDEV_UNREGISTER event

   - bluetooth:
      - purge error queues in socket destructors
      - fix response to L2CAP_ECRED_CONN_REQ

   - eth:
      - mlx5:
         - fix circular locking dependency in dump
         - fix "scheduling while atomic" in IPsec MAC address query
      - gve: fix incorrect buffer cleanup for QPL
      - team: avoid NETDEV_CHANGEMTU event when unregistering slave
      - usb: validate USB endpoints"

* tag 'net-7.0-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (72 commits)
  netfilter: nf_conntrack_h323: fix OOB read in decode_choice()
  dpaa2-switch: validate num_ifs to prevent out-of-bounds write
  net: consume xmit errors of GSO frames
  vsock: document write-once behavior of the child_ns_mode sysctl
  vsock: lock down child_ns_mode as write-once
  selftests/vsock: change tests to respect write-once child ns mode
  net/mlx5e: Fix "scheduling while atomic" in IPsec MAC address query
  net/mlx5: Fix missing devlink lock in SRIOV enable error path
  net/mlx5: E-switch, Clear legacy flag when moving to switchdev
  net/mlx5: LAG, disable MPESW in lag_disable_change()
  net/mlx5: DR, Fix circular locking dependency in dump
  selftests: team: Add a reference count leak test
  team: avoid NETDEV_CHANGEMTU event when unregistering slave
  net: mana: Fix double destroy_workqueue on service rescan PCI path
  MAINTAINERS: Update maintainer entry for QUALCOMM ETHQOS ETHERNET DRIVER
  dpll: zl3073x: Remove redundant cleanup in devm_dpll_init()
  selftests/net: packetdrill: Verify acceptance of FIN packets when RWIN is 0
  tcp: re-enable acceptance of FIN packets when RWIN is 0
  vsock: Use container_of() to get net namespace in sysctl handlers
  net: usb: kaweth: validate USB endpoints
  ...
2026-02-26 08:00:13 -08:00
Takashi Iwai
dc9786a06d ALSA: us144mkii: Drop kernel-doc markers
We don't process this driver code for kernel-doc, and the "/**" marker
leads to warnings with W=1 builds.  Drop the superfluous markers, and
also fix the invalid mark up, too.

Link: https://patch.msgid.link/20260226155456.1092186-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-02-26 16:56:36 +01:00
Takashi Iwai
1d6452a0ce ALSA: usb: qcom: Correct parameter comment for uaudio_transfer_buffer_setup()
At fixing the memory leak of xfer buffer, we forgot to update the
corresponding comment, too.  This resulted in a kernel-doc warning
with W=1.  Let's correct it.

Fixes: 5c7ef50012 ("ALSA: qc_audio_offload: avoid leaking xfer_buf allocation")
Link: https://patch.msgid.link/20260226154414.1081568-4-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-02-26 16:51:32 +01:00
Takashi Iwai
3540cc453f ALSA: usb-audio: Drop superfluous kernel-doc markers
We don't process USB-audio driver code for kernel-doc, and the "/**"
marker leads to warnings with W=1 builds.  Drop the superfluous
markers.

Link: https://patch.msgid.link/20260226154414.1081568-3-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-02-26 16:51:32 +01:00
Baochen Qiang
7259b1a0e5 wifi: ath12k: fix station lookup failure when disconnecting from AP
In ath12k_wmi_tlv_fw_stats_data_parse() and
ath12k_wmi_tlv_rssi_chain_parse(), the driver uses
ieee80211_find_sta_by_ifaddr() to look up the station associated with the
incoming firmware statistics. This works under normal conditions but fails
during AP disconnection, resulting in log messages like:

 wlan0: deauthenticating from xxxxxx by local choice (Reason: 3=DEAUTH_LEAVING)
 wlan0: moving STA xxxxxx to state 3
 wlan0: moving STA xxxxxx to state 2
 wlan0: moving STA xxxxxx to state 1
 ath12k_pci 0000:02:00.0: not found station bssid xxxxxx for vdev stat
 ath12k_pci 0000:02:00.0: not found station of bssid xxxxxx for rssi chain
 ath12k_pci 0000:02:00.0: failed to pull fw stats: -71
 ath12k_pci 0000:02:00.0: time out while waiting for get fw stats
 wlan0: Removed STA xxxxxx
 wlan0: Destroyed STA xxxxxx

The failure happens because the station has already been removed from
ieee80211_local::sta_hash by the time firmware statistics are requested
through drv_sta_statistics().

Switch the lookup to ath12k_link_sta_find_by_addr(), which searches the
driver's link station hash table that still has the station recorded
at that time.  This also implicitly fixes another issue: the current code
always uses deflink regardless of which link the statistics belong to,
which is incorrect in MLO scenarios. The new helper returns the correct
link station.

Additionally, raise the log level on lookup failures. With the updated
helper, such failures should no longer occur under normal conditions.

Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.1.c5-00302-QCAHMTSWPL_V1.0_V2.0_SILICONZ-1.115823.3

Fixes: 79e7b04b53 ("wifi: ath12k: report station mode signal strength")
Fixes: 6af5bc381b ("wifi: ath12k: report station mode per-chain signal strength")
Signed-off-by: Baochen Qiang <baochen.qiang@oss.qualcomm.com>
Reviewed-by: Vasanthakumar Thiagarajan <vasanthakumar.thiagarajan@oss.qualcomm.com>
Link: https://patch.msgid.link/20260129-ath12k-fw-stats-fixes-v1-2-55d66064f4d5@oss.qualcomm.com
Signed-off-by: Jeff Johnson <jeff.johnson@oss.qualcomm.com>
2026-02-26 07:18:32 -08:00
Baochen Qiang
8f153eb745 wifi: ath12k: use correct pdev id when requesting firmware stats
To get firmware statistics, currently ar->pdev->pdev_id is passed as an
argument to ath12k_mac_get_fw_stats() in ath12k_mac_op_sta_statistics().
For single pdev device like WCN7850, its value is 0 which represents the
SoC pdev id. As a result, WCN7850 firmware sends the same reply to host
twice, which further results in memory leak:

  unreferenced object 0xffff88812e286000 (size 192):
  comm "softirq", pid 0, jiffies 4294981997
  hex dump (first 32 bytes):
    10 a5 40 11 81 88 ff ff 10 a5 40 11 81 88 ff ff  ..@.......@.....
    00 00 00 00 00 00 00 00 80 ff ff ff 33 05 00 00  ............3...
  backtrace (crc cecc8c82):
    __kmalloc_cache_noprof
    ath12k_wmi_tlv_fw_stats_parse
    ath12k_wmi_tlv_iter
    ath12k_wmi_op_rx
    ath12k_htc_rx_completion_handler
    ath12k_ce_per_engine_service
    ath12k_pci_ce_workqueue
    process_one_work
    bh_worker
    tasklet_action
    handle_softirqs

Detailed explanation is:

  1. ath12k_mac_get_fw_stats() called in ath12k_mac_op_sta_statistics() to
     get vdev statistics, making the caller thread wait.
  2. firmware sends the first reply, ath12k_wmi_tlv_fw_stats_data_parse()
     allocates buffers to cache necessary information. Following that, in
     ath12k_wmi_fw_stats_process() if events of all started vdev haved been
     received, is_end flag is set hence the waiting thread gets waken up by
     the ar->fw_stats_done/->fw_stats_complete signals.
  3. ath12k_mac_get_fw_stats() wakes up and returns successfully.
     ath12k_mac_op_sta_statistics() saves required parameters and calls
     ath12k_fw_stats_reset() to free buffers allocated earlier.
  4. firmware sends the second reply. As usual, buffers are allocated and
     attached to the ar->fw_stats.vdevs list. Note this time there is no
     thread waiting, therefore no chance to free those buffers.
  5. ath12k module gets unloaded. If there has been no more firmware
     statistics request made since step 4, or if the request fails (see
     the example in the following patch), there is no chance to call
     ath12k_fw_stats_reset(). Consequently those buffers leak.

Actually for single pdev device, using SoC pdev id in
ath12k_mac_op_sta_statistics() is wrong, because the purpose is to get
statistics of a specific station, which is mapped to a specific pdev. That
said, the id of actual individual pdev should be fetched and used instead.
The helper ath12k_mac_get_target_pdev_id() serves for this purpose, hence
use it to fix this issue. Note it also works for other devices as well due
to the single_pdev_only check inside.

The same applies to ath12k_mac_op_get_txpower() and
ath12k_mac_op_link_sta_statistics() as well.

Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.1.c5-00302-QCAHMTSWPL_V1.0_V2.0_SILICONZ-1.115823.3

Fixes: 79e7b04b53 ("wifi: ath12k: report station mode signal strength")
Fixes: e92c658b05 ("wifi: ath12k: add get_txpower mac ops")
Fixes: ebebe66ec2 ("wifi: ath12k: fill link station statistics for MLO")
Signed-off-by: Baochen Qiang <baochen.qiang@oss.qualcomm.com>
Reviewed-by: Vasanthakumar Thiagarajan <vasanthakumar.thiagarajan@oss.qualcomm.com>
Link: https://patch.msgid.link/20260129-ath12k-fw-stats-fixes-v1-1-55d66064f4d5@oss.qualcomm.com
Signed-off-by: Jeff Johnson <jeff.johnson@oss.qualcomm.com>
2026-02-26 07:18:32 -08:00
Takashi Iwai
7c698de0dc HID: apple: Add EPOMAKER TH87 to the non-apple keyboards list
EPOMAKER TH87 has the very same ID as Apple Aluminum keyboard
(05ac:024f) although it doesn't work as expected in compatible way.

Put three entries to the non-apple keyboards list to exclude this
device: one for BT ("TH87"), one for USB ("HFD Epomaker TH87") and one
for dongle ("2.4G Wireless Receiver").

Link: https://bugzilla.suse.com/show_bug.cgi?id=1258455
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
2026-02-26 16:00:49 +01:00
Zhang Lixu
22f8bcec5a HID: intel-ish-hid: ipc: Add Nova Lake-H/S PCI device IDs
Add device IDs of Nova Lake-H and Nova Lake-S into ishtp support list.

Signed-off-by: Zhang Lixu <lixu.zhang@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@intel.com>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
2026-02-26 15:53:12 +01:00
Mark Harmstone
f8db8009ea btrfs: check block group lookup in remove_range_from_remap_tree()
Add a check in remove_range_from_remap_tree() after we call
btrfs_lookup_block_group(), to check if it is NULL. This shouldn't
happen, but if it does we at least get an error rather than a segfault.

Reported-by: Chris Mason <clm@fb.com>
Link: https://lore.kernel.org/linux-btrfs/20260125125129.2245240-1-clm@meta.com/
Fixes: 979e1dc3d6 ("btrfs: handle deletions from remapped block group")
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Mark Harmstone <mark@harmstone.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-02-26 15:03:29 +01:00
Mark Harmstone
7885ca40c3 btrfs: fix transaction handle leaks in btrfs_last_identity_remap_gone()
btrfs_abort_transaction(), unlike btrfs_commit_transaction(), doesn't
also free the transaction handle. Fix the instances in
btrfs_last_identity_remap_gone() where we're also leaking the
transaction on abort.

Reported-by: Chris Mason <clm@fb.com>
Link: https://lore.kernel.org/linux-btrfs/20260125125129.2245240-1-clm@meta.com/
Fixes: 979e1dc3d6 ("btrfs: handle deletions from remapped block group")
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Mark Harmstone <mark@harmstone.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-02-26 15:03:29 +01:00
Mark Harmstone
54b9395b18 btrfs: fix chunk map leak in btrfs_map_block() after btrfs_translate_remap()
If the call to btrfs_translate_remap() in btrfs_map_block() returns an
error code, we were leaking the chunk map. Fix it by jumping to out
rather than returning directly.

Reported-by: Chris Mason <clm@fb.com>
Link: https://lore.kernel.org/linux-btrfs/20260125125830.2352988-1-clm@meta.com/
Fixes: 18ba649928 ("btrfs: redirect I/O for remapped block groups")
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Mark Harmstone <mark@harmstone.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-02-26 15:03:29 +01:00
Mark Harmstone
f15fb3d415 btrfs: fix chunk map leak in btrfs_map_block() after btrfs_chunk_map_num_copies()
Fix a chunk map leak in btrfs_map_block(): if we return early with -EINVAL,
we're not freeing the chunk map that we've just looked up.

Fixes: 0ae653fbec ("btrfs: reduce chunk_map lookups in btrfs_map_block()")
CC: stable@vger.kernel.org # 6.12+
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Mark Harmstone <mark@harmstone.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-02-26 15:03:29 +01:00
Mark Harmstone
587bb33b10 btrfs: fix compat mask in error messages in btrfs_check_features()
Commit d7f67ac9a9 ("btrfs: relax block-group-tree feature dependency
checks") introduced a regression when it comes to handling unsupported
incompat or compat_ro flags. Beforehand we only printed the flags that
we didn't recognize, afterwards we printed them all, which is less
useful. Fix the error handling so it behaves like it used to.

Fixes: d7f67ac9a9 ("btrfs: relax block-group-tree feature dependency checks")
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Mark Harmstone <mark@harmstone.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-02-26 15:03:28 +01:00
Mark Harmstone
1c7e9111f4 btrfs: print correct subvol num if active swapfile prevents deletion
Fix the error message in btrfs_delete_subvolume() if we can't delete a
subvolume because it has an active swapfile: we were printing the number
of the parent rather than the target.

Fixes: 60021bd754 ("btrfs: prevent subvol with swapfile from being deleted")
Reviewed-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Mark Harmstone <mark@harmstone.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-02-26 15:03:28 +01:00
Mark Harmstone
44e2fda664 btrfs: fix warning in scrub_verify_one_metadata()
Commit b471965fdb ("btrfs: fix replace/scrub failure with
metadata_uuid") fixed the comparison in scrub_verify_one_metadata() to
use metadata_uuid rather than fsid, but left the warning as it was. Fix
it so it matches what we're doing.

Fixes: b471965fdb ("btrfs: fix replace/scrub failure with metadata_uuid")
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Mark Harmstone <mark@harmstone.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-02-26 15:03:28 +01:00
Mark Harmstone
a101727805 btrfs: fix objectid value in error message in check_extent_data_ref()
Fix a copy-paste error in check_extent_data_ref(): we're printing root
as in the message above, we should be printing objectid.

Fixes: f333a3c7e8 ("btrfs: tree-checker: validate dref root and objectid")
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Mark Harmstone <mark@harmstone.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-02-26 15:03:28 +01:00
Mark Harmstone
511dc8912a btrfs: fix incorrect key offset in error message in check_dev_extent_item()
Fix the error message in check_dev_extent_item(), when an overlapping
stripe is encountered. For dev extents, objectid is the disk number and
offset the physical address, so prev_key->objectid should actually be
prev_key->offset.

(I can't take any credit for this one - this was discovered by Chris and
his friend Claude.)

Reported-by: Chris Mason <clm@fb.com>
Fixes: 008e2512dc ("btrfs: tree-checker: add dev extent item checks")
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Mark Harmstone <mark@harmstone.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-02-26 15:03:28 +01:00
Mark Harmstone
3cf0f35779 btrfs: fix error message order of parameters in btrfs_delete_delayed_dir_index()
Fix the error message in btrfs_delete_delayed_dir_index() if
__btrfs_add_delayed_item() fails: the message says root, inode, index,
error, but we're actually passing index, root, inode, error.

Fixes: adc1ef55dc ("btrfs: add details to error messages at btrfs_delete_delayed_dir_index()")
Signed-off-by: Mark Harmstone <mark@harmstone.com>
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-02-26 15:03:27 +01:00
Miquel Sabaté Solà
a752653312 btrfs: don't commit the super block when unmounting a shutdown filesystem
When unmounting a filesystem we will try, among many other things, to
commit the super block. On a filesystem that was shutdown, though, this
will always fail with -EROFS as writes are forbidden on this context;
and an error will be reported.

Don't commit the super block on this situation, which should be fine as
the filesystem is frozen before shutdown and, therefore, it should be at
a consistent state.

Signed-off-by: Miquel Sabaté Solà <mssola@mssola.com>
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-02-26 15:03:27 +01:00
Miquel Sabaté Solà
3f501412f2 btrfs: free pages on error in btrfs_uring_read_extent()
In this function the 'pages' object is never freed in the hopes that it is
picked up by btrfs_uring_read_finished() whenever that executes in the
future. But that's just the happy path. Along the way previous
allocations might have gone wrong, or we might not get -EIOCBQUEUED from
btrfs_encoded_read_regular_fill_pages(). In all these cases, we go to a
cleanup section that frees all memory allocated by this function without
assuming any deferred execution, and this also needs to happen for the
'pages' allocation.

Fixes: 34310c442e ("btrfs: add io_uring command for encoded reads (ENCODED_READ ioctl)")
Signed-off-by: Miquel Sabaté Solà <mssola@mssola.com>
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-02-26 15:03:27 +01:00
Boris Burkov
2ab2244642 btrfs: fix referenced/exclusive check in squota_check_parent_usage()
We compared rfer_cmpr against excl_cmpr_sum instead of rfer_cmpr_sum
which is confusing.

I expect that
rfer_cmpr == excl_cmpr in squota, but it is much better to be consistent
in case of any surprises or bugs.

Reported-by: Chris Mason <clm@meta.com>
Link: https://lore.kernel.org/linux-btrfs/cover.1764796022.git.boris@bur.io/T/#mccb231643ffd290b44a010d4419474d280be5537
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Boris Burkov <boris@bur.io>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-02-26 15:03:27 +01:00
Filipe Manana
8ac7fad32b btrfs: remove pointless WARN_ON() in cache_save_setup()
This WARN_ON(ret) is never executed since the previous if statement makes
us jump into the 'out_put' label when ret is not zero. The existing
transaction abort inside the if statement also gives us a stack trace,
so we don't need to move the WARN_ON(ret) into the if statement either.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-02-26 15:03:27 +01:00
Filipe Manana
912db40655 btrfs: convert log messages to error level in btrfs_replay_log()
We are logging messages as warnings but they should really have an error
level instead, as if the respective conditions are met the mount will
fail. So convert them to error level and also log the error code returned
by read_tree_block().

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-02-26 15:03:27 +01:00
Filipe Manana
4db8d56c6f btrfs: remove btrfs_handle_fs_error() after failure to recover log trees
There is no need to call btrfs_handle_fs_error() (which we are trying to
deprecate) if we fail to recover log trees:

1) Such a failure results in failing the mount immediately;

2) If the recovery started a transaction before failing, it has already
   aborted the transaction down in the call chain.

So remove the btrfs_handle_fs_error() call, replace it with an error
message and assert that the FS is in error state (so that no partial
updates are committed due to a transaction that was not aborted).

Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-02-26 15:03:26 +01:00
Filipe Manana
64def7d7d6 btrfs: remove redundant warning message in btrfs_check_uuid_tree()
If we fail to start the UUID rescan kthread, btrfs_check_uuid_tree() logs
an error message and returns the error to the single caller, open_ctree().

This however is redundant since the caller already logs an error message,
which is also more informative since it logs the error code. Some remove
the warning message from btrfs_check_uuid_tree() as it doesn't add any
value.

Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-02-26 15:03:26 +01:00
Filipe Manana
0649355303 btrfs: change warning messages to error level in open_ctree()
Failure to read the fs root results in a mount error, but we log a warning
message. Same goes for checking the UUID tree, an error results in a mount
failure but we log a warning message. Change the level of the logged
messages from warning to error.

Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-02-26 15:03:26 +01:00
Qu Wenruo
a4fe134fc1 btrfs: fix a double release on reserved extents in cow_one_range()
[BUG]
Commit c28214bde6 ("btrfs: refactor the main loop of
cow_file_range()") refactored the handling of COWing one range.

However it changed the error handling of the reserved extent.

The old cleanup looks like this:

out_drop_extent_cache:
	btrfs_drop_extent_map_range(inode, start, start + cur_alloc_size - 1, false);
out_reserve:
	btrfs_dec_block_group_reservations(fs_info, ins.objectid);
	btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, true);
	[...]
	clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
		     EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV;
	page_ops = PAGE_UNLOCK | PAGE_START_WRITEBACK | PAGE_END_WRITEBACK;
	/*
	 * For the range (2). If we reserved an extent for our delalloc range
	 * (or a subrange) and failed to create the respective ordered extent,
	 * then it means that when we reserved the extent we decremented the
	 * extent's size from the data space_info's bytes_may_use counter and
	 * incremented the space_info's bytes_reserved counter by the same
	 * amount. We must make sure extent_clear_unlock_delalloc() does not try
	 * to decrement again the data space_info's bytes_may_use counter,
	 * therefore we do not pass it the flag EXTENT_CLEAR_DATA_RESV.
	 */
	if (cur_alloc_size) {
	        extent_clear_unlock_delalloc(inode, start,
	                                     start + cur_alloc_size - 1,
	                                     locked_folio, &cached, clear_bits,
	                                     page_ops);
	        btrfs_qgroup_free_data(inode, NULL, start, cur_alloc_size, NULL);
	}

Which only calls EXTENT_CLEAR_META_RESV.
As the reserved extent is properly handled by
btrfs_free_reserved_extent().

However the new cleanup is:

	extent_clear_unlock_delalloc(inode, file_offset, cur_end, locked_folio, cached,
				     EXTENT_LOCKED | EXTENT_DELALLOC |
				     EXTENT_DELALLOC_NEW |
				     EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING,
				     PAGE_UNLOCK | PAGE_START_WRITEBACK |
				     PAGE_END_WRITEBACK);
	btrfs_qgroup_free_data(inode, NULL, file_offset, cur_len, NULL);
	btrfs_dec_block_group_reservations(fs_info, ins->objectid);
	btrfs_free_reserved_extent(fs_info, ins->objectid, ins->offset, true);

The flag EXTENT_DO_ACCOUNTING implies both EXTENT_CLEAR_META_RESV and
EXTENT_CLEAR_DATA_RESV, which will release the bytes_may_use, which
later btrfs_free_reserved_extent() will do again, causing incorrect
double release (and may underflow bytes_may_use).

[FIX]
Use EXTENT_CLEAR_META_RESV to replace EXTENT_DO_ACCOUNTING, and add back
the comments on why we only use EXTENT_CLEAR_META_RESV.

Fixes: c28214bde6 ("btrfs: refactor the main loop of cow_file_range()")
Reported-by: Chris Mason <clm@meta.com>
Link: https://lore.kernel.org/linux-btrfs/20260208184920.1102719-1-clm@meta.com/
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-02-26 15:03:26 +01:00
Jingkai Tan
2970525f78 btrfs: handle discard errors in in btrfs_finish_extent_commit()
Coverity (ID: 1226842) reported that the return value of
btrfs_discard_extent() is assigned to ret but is immediately
overwritten by unpin_extent_range() without being checked.

Use the same error handling that is done later in the same function.

Signed-off-by: Jingkai Tan <contact@jingk.ai>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2026-02-26 15:03:26 +01:00
David Howells
a0b4c7a491 netfs: Fix unbuffered/DIO writes to dispatch subrequests in strict sequence
Fix netfslib such that when it's making an unbuffered or DIO write, to make
sure that it sends each subrequest strictly sequentially, waiting till the
previous one is 'committed' before sending the next so that we don't have
pieces landing out of order and potentially leaving a hole if an error
occurs (ENOSPC for example).

This is done by copying in just those bits of issuing, collecting and
retrying subrequests that are necessary to do one subrequest at a time.
Retrying, in particular, is simpler because if the current subrequest needs
retrying, the source iterator can just be copied again and the subrequest
prepped and issued again without needing to be concerned about whether it
needs merging with the previous or next in the sequence.

Note that the issuing loop waits for a subrequest to complete right after
issuing it, but this wait could be moved elsewhere allowing preparatory
steps to be performed whilst the subrequest is in progress.  In particular,
once content encryption is available in netfslib, that could be done whilst
waiting, as could cleanup of buffers that have been completed.

Fixes: 153a9961b5 ("netfs: Implement unbuffered/DIO write support")
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://patch.msgid.link/58526.1772112753@warthog.procyon.org.uk
Tested-by: Steve French <sfrench@samba.org>
Reviewed-by: Paulo Alcantara (Red Hat) <pc@manguebit.org>
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
2026-02-26 14:44:32 +01:00
Richard Fitzgerald
786ea2b694 ALSA: hda: cs35l56: Remove unnecessary struct cs_dsp_client_ops
Since commit af37511305 ("firmware: cs_dsp: Don't require client to
provide a struct cs_dsp_client_ops") the client doesn't have to provide
a struct cs_dsp_client_ops. So remove the dummy cs_dsp_client_ops.

Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Link: https://patch.msgid.link/20260226124115.1811187-1-rf@opensource.cirrus.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-02-26 14:05:24 +01:00
Francesco Lavra
36d9579fed drm/solomon: Fix page start when updating rectangle in page addressing mode
In page addressing mode, the pixel values of a dirty rectangle must be sent
to the display controller one page at a time. The range of pages
corresponding to a given rectangle is being incorrectly calculated as if
the Y value of the top left coordinate of the rectangle was 0. This can
result in rectangle updates being displayed on wrong parts of the screen.

Fix the above issue by consolidating the start page calculation in a single
place at the beginning of the update_rect function, and using the
calculated value for all addressing modes.

Fixes: b0daaa5cfa ("drm/ssd130x: Support page addressing mode")
Signed-off-by: Francesco Lavra <flavra@baylibre.com>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patch.msgid.link/20260210180932.736502-1-flavra@baylibre.com
Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
2026-02-26 13:42:09 +01:00
Takashi Iwai
1a7ba00901 Merge tag 'asoc-fix-v7.0-rc1' of https://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-linus
ASoC: Fixes for v7.0

One quirk and a fix for handling of exotic peripherals on cs42l43.
2026-02-26 13:17:44 +01:00
Vahagn Vardanian
baed0d9ba9 netfilter: nf_conntrack_h323: fix OOB read in decode_choice()
In decode_choice(), the boundary check before get_len() uses the
variable `len`, which is still 0 from its initialization at the top of
the function:

    unsigned int type, ext, len = 0;
    ...
    if (ext || (son->attr & OPEN)) {
        BYTE_ALIGN(bs);
        if (nf_h323_error_boundary(bs, len, 0))  /* len is 0 here */
            return H323_ERROR_BOUND;
        len = get_len(bs);                        /* OOB read */

When the bitstream is exactly consumed (bs->cur == bs->end), the check
nf_h323_error_boundary(bs, 0, 0) evaluates to (bs->cur + 0 > bs->end),
which is false.  The subsequent get_len() call then dereferences
*bs->cur++, reading 1 byte past the end of the buffer.  If that byte
has bit 7 set, get_len() reads a second byte as well.

This can be triggered remotely by sending a crafted Q.931 SETUP message
with a User-User Information Element containing exactly 2 bytes of
PER-encoded data ({0x08, 0x00}) to port 1720 through a firewall with
the nf_conntrack_h323 helper active.  The decoder fully consumes the
PER buffer before reaching this code path, resulting in a 1-2 byte
heap-buffer-overflow read confirmed by AddressSanitizer.

Fix this by checking for 2 bytes (the maximum that get_len() may read)
instead of the uninitialized `len`.  This matches the pattern used at
every other get_len() call site in the same file, where the caller
checks for 2 bytes of available data before calling get_len().

Fixes: ec8a8f3c31 ("netfilter: nf_ct_h323: Extend nf_h323_error_boundary to work on bits as well")
Signed-off-by: Vahagn Vardanian <vahagn@redrays.io>
Signed-off-by: Florian Westphal <fw@strlen.de>
Link: https://patch.msgid.link/20260225130619.1248-2-fw@strlen.de
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-02-26 12:50:42 +01:00
Richard Fitzgerald
003ce8c9b2 ALSA: hda: cs35l56: Fix signedness error in cs35l56_hda_posture_put()
In cs35l56_hda_posture_put() assign ucontrol->value.integer.value[0] to
a long instead of an unsigned long. ucontrol->value.integer.value[0] is
a long.

This fixes the sparse warning:

sound/hda/codecs/side-codecs/cs35l56_hda.c:256:20: warning: unsigned value
that used to be signed checked against zero?
sound/hda/codecs/side-codecs/cs35l56_hda.c:252:29: signed value source

Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Fixes: 73cfbfa9ca ("ALSA: hda/cs35l56: Add driver for Cirrus Logic CS35L56 amplifier")
Link: https://patch.msgid.link/20260226111728.1700431-1-rf@opensource.cirrus.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-02-26 12:42:59 +01:00
Junrui Luo
8a5752c6dc dpaa2-switch: validate num_ifs to prevent out-of-bounds write
The driver obtains sw_attr.num_ifs from firmware via dpsw_get_attributes()
but never validates it against DPSW_MAX_IF (64). This value controls
iteration in dpaa2_switch_fdb_get_flood_cfg(), which writes port indices
into the fixed-size cfg->if_id[DPSW_MAX_IF] array. When firmware reports
num_ifs >= 64, the loop can write past the array bounds.

Add a bound check for num_ifs in dpaa2_switch_init().

dpaa2_switch_fdb_get_flood_cfg() appends the control interface (port
num_ifs) after all matched ports. When num_ifs == DPSW_MAX_IF and all
ports match the flood filter, the loop fills all 64 slots and the control
interface write overflows by one entry.

The check uses >= because num_ifs == DPSW_MAX_IF is also functionally
broken.

build_if_id_bitmap() silently drops any ID >= 64:
      if (id[i] < DPSW_MAX_IF)
          bmap[id[i] / 64] |= ...

Fixes: 539dda3c5d ("staging: dpaa2-switch: properly setup switching domains")
Signed-off-by: Junrui Luo <moonafterrain@outlook.com>
Reviewed-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Link: https://patch.msgid.link/SYBPR01MB78812B47B7F0470B617C408AAF74A@SYBPR01MB7881.ausprd01.prod.outlook.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-02-26 12:37:21 +01:00
Jakub Kicinski
7aa767d0d3 net: consume xmit errors of GSO frames
udpgro_frglist.sh and udpgro_bench.sh are the flakiest tests
currently in NIPA. They fail in the same exact way, TCP GRO
test stalls occasionally and the test gets killed after 10min.

These tests use veth to simulate GRO. They attach a trivial
("return XDP_PASS;") XDP program to the veth to force TSO off
and NAPI on.

Digging into the failure mode we can see that the connection
is completely stuck after a burst of drops. The sender's snd_nxt
is at sequence number N [1], but the receiver claims to have
received (rcv_nxt) up to N + 3 * MSS [2]. Last piece of the puzzle
is that senders rtx queue is not empty (let's say the block in
the rtx queue is at sequence number N - 4 * MSS [3]).

In this state, sender sends a retransmission from the rtx queue
with a single segment, and sequence numbers N-4*MSS:N-3*MSS [3].
Receiver sees it and responds with an ACK all the way up to
N + 3 * MSS [2]. But sender will reject this ack as TCP_ACK_UNSENT_DATA
because it has no recollection of ever sending data that far out [1].
And we are stuck.

The root cause is the mess of the xmit return codes. veth returns
an error when it can't xmit a frame. We end up with a loss event
like this:

  -------------------------------------------------
  |   GSO super frame 1   |   GSO super frame 2   |
  |-----------------------------------------------|
  | seg | seg | seg | seg | seg | seg | seg | seg |
  |  1  |  2  |  3  |  4  |  5  |  6  |  7  |  8  |
  -------------------------------------------------
     x    ok    ok    <ok>|  ok    ok    ok   <x>
                          \\
			   snd_nxt

"x" means packet lost by veth, and "ok" means it went thru.
Since veth has TSO disabled in this test it sees individual segments.
Segment 1 is on the retransmit queue and will be resent.

So why did the sender not advance snd_nxt even tho it clearly did
send up to seg 8? tcp_write_xmit() interprets the return code
from the core to mean that data has not been sent at all. Since
TCP deals with GSO super frames, not individual segment the crux
of the problem is that loss of a single segment can be interpreted
as loss of all. TCP only sees the last return code for the last
segment of the GSO frame (in <> brackets in the diagram above).

Of course for the problem to occur we need a setup or a device
without a Qdisc. Otherwise Qdisc layer disconnects the protocol
layer from the device errors completely.

We have multiple ways to fix this.

 1) make veth not return an error when it lost a packet.
    While this is what I think we did in the past, the issue keeps
    reappearing and it's annoying to debug. The game of whack
    a mole is not great.

 2) fix the damn return codes
    We only talk about NETDEV_TX_OK and NETDEV_TX_BUSY in the
    documentation, so maybe we should make the return code from
    ndo_start_xmit() a boolean. I like that the most, but perhaps
    some ancient, not-really-networking protocol would suffer.

 3) make TCP ignore the errors
    It is not entirely clear to me what benefit TCP gets from
    interpreting the result of ip_queue_xmit()? Specifically once
    the connection is established and we're pushing data - packet
    loss is just packet loss?

 4) this fix
    Ignore the rc in the Qdisc-less+GSO case, since it's unreliable.
    We already always return OK in the TCQ_F_CAN_BYPASS case.
    In the Qdisc-less case let's be a bit more conservative and only
    mask the GSO errors. This path is taken by non-IP-"networks"
    like CAN, MCTP etc, so we could regress some ancient thing.
    This is the simplest, but also maybe the hackiest fix?

Similar fix has been proposed by Eric in the past but never committed
because original reporter was working with an OOT driver and wasn't
providing feedback (see Link).

Link: https://lore.kernel.org/CANn89iJcLepEin7EtBETrZ36bjoD9LrR=k4cfwWh046GB+4f9A@mail.gmail.com
Fixes: 1f59533f9c ("qdisc: validate frames going through the direct_xmit path")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20260223235100.108939-1-kuba@kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-02-26 11:35:00 +01:00
Paolo Abeni
f0a2f2aadb Merge branch 'vsock-add-write-once-semantics-to-child_ns_mode'
Bobby Eshleman says:

====================
vsock: add write-once semantics to child_ns_mode

Two administrator processes may race when setting child_ns_mode: one
sets it to "local" and creates a namespace, but another changes it to
"global" in between. The first process ends up with a namespace in the
wrong mode. Make child_ns_mode write-once so that a namespace manager
can set it once, check the value, and be guaranteed it won't change
before creating its namespaces. Writing a different value after the
first write returns -EBUSY.

One patch for the implementation, one for docs, and one for tests.

v2: https://lore.kernel.org/r/20260218-vsock-ns-write-once-v2-0-19e4c50d509a@meta.com
v1: https://lore.kernel.org/r/20260217-vsock-ns-write-once-v1-1-a1fb30f289a9@meta.com
====================

Link: https://patch.msgid.link/20260223-vsock-ns-write-once-v3-0-c0cde6959923@meta.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-02-26 11:10:06 +01:00
Bobby Eshleman
b6302e057f vsock: document write-once behavior of the child_ns_mode sysctl
Update the vsock child_ns_mode documentation to include the new
write-once semantics of setting child_ns_mode. The semantics are
implemented in a preceding patch in this series.

Signed-off-by: Bobby Eshleman <bobbyeshleman@meta.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Link: https://patch.msgid.link/20260223-vsock-ns-write-once-v3-3-c0cde6959923@meta.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-02-26 11:10:03 +01:00
Bobby Eshleman
102eab95f0 vsock: lock down child_ns_mode as write-once
Two administrator processes may race when setting child_ns_mode as one
process sets child_ns_mode to "local" and then creates a namespace, but
another process changes child_ns_mode to "global" between the write and
the namespace creation. The first process ends up with a namespace in
"global" mode instead of "local". While this can be detected after the
fact by reading ns_mode and retrying, it is fragile and error-prone.

Make child_ns_mode write-once so that a namespace manager can set it
once and be sure it won't change. Writing a different value after the
first write returns -EBUSY. This applies to all namespaces, including
init_net, where an init process can write "local" to lock all future
namespaces into local mode.

Fixes: eafb64f40c ("vsock: add netns to vsock core")
Suggested-by: Daan De Meyer <daan.j.demeyer@gmail.com>
Suggested-by: Stefano Garzarella <sgarzare@redhat.com>
Co-developed-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: Bobby Eshleman <bobbyeshleman@meta.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Link: https://patch.msgid.link/20260223-vsock-ns-write-once-v3-2-c0cde6959923@meta.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-02-26 11:10:03 +01:00
Bobby Eshleman
a382a34276 selftests/vsock: change tests to respect write-once child ns mode
The child_ns_mode sysctl parameter becomes write-once in a future patch
in this series, which breaks existing tests. This patch updates the
tests to respect this new policy. No additional tests are added.

Add "global-parent" and "local-parent" namespaces as intermediaries to
spawn namespaces in the given modes. This avoids the need to change
"child_ns_mode" in the init_ns. nsenter must be used because ip netns
unshares the mount namespace so nested "ip netns add" breaks exec calls
from the init ns. Adds nsenter to the deps check.

Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: Bobby Eshleman <bobbyeshleman@meta.com>
Link: https://patch.msgid.link/20260223-vsock-ns-write-once-v3-1-c0cde6959923@meta.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-02-26 11:10:03 +01:00
Leon Romanovsky
7c2889af82 RDMA/uverbs: Import DMA-BUF module in uverbs_std_types_dmabuf file
Fix the following compilation error:

ERROR: modpost: module ib_uverbs uses symbol dma_buf_move_notify
	from namespace DMA_BUF, but does not import it.

Fixes: 0ac6f4056c ("RDMA/uverbs: Add DMABUF object type and operations")
Link: https://patch.msgid.link/20260225-fix-uverbs-compilation-v1-1-acf7b3d0f9fa@nvidia.com
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
2026-02-26 04:58:24 -05:00
Christian Brauner
28aaa9c399 kthread: consolidate kthread exit paths to prevent use-after-free
Guillaume reported crashes via corrupted RCU callback function pointers
during KUnit testing. The crash was traced back to the pidfs rhashtable
conversion which replaced the 24-byte rb_node with an 8-byte rhash_head
in struct pid, shrinking it from 160 to 144 bytes.

struct kthread (without CONFIG_BLK_CGROUP) is also 144 bytes. With
CONFIG_SLAB_MERGE_DEFAULT and SLAB_HWCACHE_ALIGN both round up to
192 bytes and share the same slab cache. struct pid.rcu.func and
struct kthread.affinity_node both sit at offset 0x78.

When a kthread exits via make_task_dead() it bypasses kthread_exit() and
misses the affinity_node cleanup. free_kthread_struct() frees the memory
while the node is still linked into the global kthread_affinity_list. A
subsequent list_del() by another kthread writes through dangling list
pointers into the freed and reused memory, corrupting the pid's
rcu.func pointer.

Instead of patching free_kthread_struct() to handle the missed cleanup,
consolidate all kthread exit paths. Turn kthread_exit() into a macro
that calls do_exit() and add kthread_do_exit() which is called from
do_exit() for any task with PF_KTHREAD set. This guarantees that
kthread-specific cleanup always happens regardless of the exit path -
make_task_dead(), direct do_exit(), or kthread_exit().

Replace __to_kthread() with a new tsk_is_kthread() accessor in the
public header. Export do_exit() since module code using the
kthread_exit() macro now needs it directly.

Reported-by: Guillaume Tucker <gtucker@gtucker.io>
Tested-by: Guillaume Tucker <gtucker@gtucker.io>
Tested-by: Mark Brown <broonie@kernel.org>
Tested-by: David Gow <davidgow@google.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/all/20260224-mittlerweile-besessen-2738831ae7f6@brauner
Co-developed-by: Linus Torvalds <torvalds@linux-foundation.org>
Fixes: 4d13f4304f ("kthread: Implement preferred affinity")
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
2026-02-26 10:45:49 +01:00
Darrick J. Wong
cd3c877d04 iomap: don't report direct-io retries to fserror
iomap's directio implementation has two magic errno codes that it uses
to signal callers -- ENOTBLK tells the filesystem that it should retry
a write with the pagecache; and EAGAIN tells the caller that pagecache
flushing or invalidation failed and that it should try again.

Neither of these indicate data loss, so let's not report them.

Fixes: a9d573ee88 ("iomap: report file I/O errors to the VFS")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Link: https://patch.msgid.link/20260224154637.GD2390381@frogsfrogsfrogs
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
2026-02-26 09:23:22 +01:00
Jun Seo
54f9d645a5 ALSA: usb-audio: Use correct version for UAC3 header validation
The entry of the validators table for UAC3 AC header descriptor is
defined with the wrong protocol version UAC_VERSION_2, while it should
have been UAC_VERSION_3.  This results in the validator never matching
for actual UAC3 devices (protocol == UAC_VERSION_3), causing their
header descriptors to bypass validation entirely.  A malicious USB
device presenting a truncated UAC3 header could exploit this to cause
out-of-bounds reads when the driver later accesses unvalidated
descriptor fields.

The bug was introduced in the same commit as the recently fixed UAC3
feature unit sub-type typo, and appears to be from the same copy-paste
error when the UAC3 section was created from the UAC2 section.

Fixes: 57f8770620 ("ALSA: usb-audio: More validations of descriptor units")
Cc: <stable@vger.kernel.org>
Signed-off-by: Jun Seo <jun.seo.93@proton.me>
Link: https://patch.msgid.link/20260226010820.36529-1-jun.seo.93@proton.me
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-02-26 07:37:29 +01:00
Zhang Heng
aa4876fe2d ALSA: hda/realtek: add quirk for Acer Nitro ANV15-51
fix mute/micmute LEDs and headset microphone for Acer Nitro ANV15-51.

[ The headset microphone issue is solved by Kailang]

Link: https://bugzilla.kernel.org/show_bug.cgi?id=220279
Cc: stable@vger.kernel.org
Signed-off-by: Zhang Heng <zhangheng@kylinos.cn>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Link: https://patch.msgid.link/20260209134149.3076957-1-zhangheng@kylinos.cn
2026-02-26 07:33:32 +01:00
Jakub Kicinski
97f87e5788 Merge branch 'mlx5-misc-fixes-2026-02-24'
Tariq Toukan says:

====================
mlx5 misc fixes 2026-02-24

This patchset provides misc bug fixes from the team to the mlx5
core and Eth drivers.
====================

Link: https://patch.msgid.link/20260224114652.1787431-1-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-25 20:01:53 -08:00
Jianbo Liu
859380694f net/mlx5e: Fix "scheduling while atomic" in IPsec MAC address query
Fix a "scheduling while atomic" bug in mlx5e_ipsec_init_macs() by
replacing mlx5_query_mac_address() with ether_addr_copy() to get the
local MAC address directly from netdev->dev_addr.

The issue occurs because mlx5_query_mac_address() queries the hardware
which involves mlx5_cmd_exec() that can sleep, but it is called from
the mlx5e_ipsec_handle_event workqueue which runs in atomic context.

The MAC address is already available in netdev->dev_addr, so no need
to query hardware. This avoids the sleeping call and resolves the bug.

Call trace:
  BUG: scheduling while atomic: kworker/u112:2/69344/0x00000200
  __schedule+0x7ab/0xa20
  schedule+0x1c/0xb0
  schedule_timeout+0x6e/0xf0
  __wait_for_common+0x91/0x1b0
  cmd_exec+0xa85/0xff0 [mlx5_core]
  mlx5_cmd_exec+0x1f/0x50 [mlx5_core]
  mlx5_query_nic_vport_mac_address+0x7b/0xd0 [mlx5_core]
  mlx5_query_mac_address+0x19/0x30 [mlx5_core]
  mlx5e_ipsec_init_macs+0xc1/0x720 [mlx5_core]
  mlx5e_ipsec_build_accel_xfrm_attrs+0x422/0x670 [mlx5_core]
  mlx5e_ipsec_handle_event+0x2b9/0x460 [mlx5_core]
  process_one_work+0x178/0x2e0
  worker_thread+0x2ea/0x430

Fixes: cee137a634 ("net/mlx5e: Handle ESN update events")
Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20260224114652.1787431-6-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-25 20:01:44 -08:00
Shay Drory
60253042c0 net/mlx5: Fix missing devlink lock in SRIOV enable error path
The cited commit miss to add locking in the error path of
mlx5_sriov_enable(). When pci_enable_sriov() fails,
mlx5_device_disable_sriov() is called to clean up. This cleanup function
now expects to be called with the devlink instance lock held.

Add the missing devl_lock(devlink) and devl_unlock(devlink)

Fixes: 84a433a40d ("net/mlx5: Lock mlx5 devlink reload callbacks")
Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20260224114652.1787431-5-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-25 20:01:44 -08:00
Shay Drory
d7073e8b97 net/mlx5: E-switch, Clear legacy flag when moving to switchdev
The cited commit introduced MLX5_PRIV_FLAGS_SWITCH_LEGACY to identify
when a transition to legacy mode is requested via devlink.  However, the
logic failed to clear this flag if the mode was subsequently changed
back to MLX5_ESWITCH_OFFLOADS (switchdev).  Consequently, if a user
toggled from legacy to switchdev, the flag remained set, leaving the
driver with wrong state indicating

Fix this by explicitly clearing the MLX5_PRIV_FLAGS_SWITCH_LEGACY bit
when the requested mode is MLX5_ESWITCH_OFFLOADS.

Fixes: 2a4f56fbcc ("net/mlx5e: Keep netdev when leave switchdev for devlink set legacy only")
Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20260224114652.1787431-4-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-25 20:01:44 -08:00
Shay Drory
bd7b9f83fb net/mlx5: LAG, disable MPESW in lag_disable_change()
mlx5_lag_disable_change() unconditionally called mlx5_disable_lag() when
LAG was active, which is incorrect for MLX5_LAG_MODE_MPESW.
Hnece, call mlx5_disable_mpesw() when running in MPESW mode.

Fixes: a32327a3a0 ("net/mlx5: Lag, Control MultiPort E-Switch single FDB mode")
Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20260224114652.1787431-3-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-25 20:01:44 -08:00
Shay Drory
2700b7e603 net/mlx5: DR, Fix circular locking dependency in dump
Fix a circular locking dependency between dbg_mutex and the domain
rx/tx mutexes that could lead to a deadlock.

The dump path in dr_dump_domain_all() was acquiring locks in the order:
  dbg_mutex -> rx.mutex -> tx.mutex

While the table/matcher creation paths acquire locks in the order:
  rx.mutex -> tx.mutex -> dbg_mutex

This inverted lock ordering creates a circular dependency. Fix this by
changing dr_dump_domain_all() to acquire the domain lock before
dbg_mutex, matching the order used in mlx5dr_table_create() and
mlx5dr_matcher_create().

Lockdep splat:
 ======================================================
 WARNING: possible circular locking dependency detected
 6.19.0-rc6net_next_e817c4e #1 Not tainted
 ------------------------------------------------------
 sos/30721 is trying to acquire lock:
 ffff888102df5900 (&dmn->info.rx.mutex){+.+.}-{4:4}, at:
dr_dump_start+0x131/0x450 [mlx5_core]

 but task is already holding lock:
 ffff888102df5bc0 (&dmn->dump_info.dbg_mutex){+.+.}-{4:4}, at:
dr_dump_start+0x10b/0x450 [mlx5_core]

 which lock already depends on the new lock.

 the existing dependency chain (in reverse order) is:

 -> #2 (&dmn->dump_info.dbg_mutex){+.+.}-{4:4}:
        __mutex_lock+0x91/0x1060
        mlx5dr_matcher_create+0x377/0x5e0 [mlx5_core]
        mlx5_cmd_dr_create_flow_group+0x62/0xd0 [mlx5_core]
        mlx5_create_flow_group+0x113/0x1c0 [mlx5_core]
        mlx5_chains_create_prio+0x453/0x2290 [mlx5_core]
        mlx5_chains_get_table+0x2e2/0x980 [mlx5_core]
        esw_chains_create+0x1e6/0x3b0 [mlx5_core]
        esw_create_offloads_fdb_tables.cold+0x62/0x63f [mlx5_core]
        esw_offloads_enable+0x76f/0xd20 [mlx5_core]
        mlx5_eswitch_enable_locked+0x35a/0x500 [mlx5_core]
        mlx5_devlink_eswitch_mode_set+0x561/0x950 [mlx5_core]
        devlink_nl_eswitch_set_doit+0x67/0xe0
        genl_family_rcv_msg_doit+0xe0/0x130
        genl_rcv_msg+0x188/0x290
        netlink_rcv_skb+0x4b/0xf0
        genl_rcv+0x24/0x40
        netlink_unicast+0x1ed/0x2c0
        netlink_sendmsg+0x210/0x450
        __sock_sendmsg+0x38/0x60
        __sys_sendto+0x119/0x180
        __x64_sys_sendto+0x20/0x30
        do_syscall_64+0x70/0xd00
        entry_SYSCALL_64_after_hwframe+0x4b/0x53

 -> #1 (&dmn->info.tx.mutex){+.+.}-{4:4}:
        __mutex_lock+0x91/0x1060
        mlx5dr_table_create+0x11d/0x530 [mlx5_core]
        mlx5_cmd_dr_create_flow_table+0x62/0x140 [mlx5_core]
        __mlx5_create_flow_table+0x46f/0x960 [mlx5_core]
        mlx5_create_flow_table+0x16/0x20 [mlx5_core]
        esw_create_offloads_fdb_tables+0x136/0x240 [mlx5_core]
        esw_offloads_enable+0x76f/0xd20 [mlx5_core]
        mlx5_eswitch_enable_locked+0x35a/0x500 [mlx5_core]
        mlx5_devlink_eswitch_mode_set+0x561/0x950 [mlx5_core]
        devlink_nl_eswitch_set_doit+0x67/0xe0
        genl_family_rcv_msg_doit+0xe0/0x130
        genl_rcv_msg+0x188/0x290
        netlink_rcv_skb+0x4b/0xf0
        genl_rcv+0x24/0x40
        netlink_unicast+0x1ed/0x2c0
        netlink_sendmsg+0x210/0x450
        __sock_sendmsg+0x38/0x60
        __sys_sendto+0x119/0x180
        __x64_sys_sendto+0x20/0x30
        do_syscall_64+0x70/0xd00
        entry_SYSCALL_64_after_hwframe+0x4b/0x53

 -> #0 (&dmn->info.rx.mutex){+.+.}-{4:4}:
        __lock_acquire+0x18b6/0x2eb0
        lock_acquire+0xd3/0x2c0
        __mutex_lock+0x91/0x1060
        dr_dump_start+0x131/0x450 [mlx5_core]
        seq_read_iter+0xe3/0x410
        seq_read+0xfb/0x130
        full_proxy_read+0x53/0x80
        vfs_read+0xba/0x330
        ksys_read+0x65/0xe0
        do_syscall_64+0x70/0xd00
        entry_SYSCALL_64_after_hwframe+0x4b/0x53

  Possible unsafe locking scenario:

        CPU0                    CPU1
        ----                    ----
   lock(&dmn->dump_info.dbg_mutex);
                                lock(&dmn->info.tx.mutex);
                                lock(&dmn->dump_info.dbg_mutex);
   lock(&dmn->info.rx.mutex);

                   *** DEADLOCK ***

Fixes: 9222f0b27d ("net/mlx5: DR, Add support for dumping steering info")
Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
Reviewed-by: Alex Vesker <valex@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20260224114652.1787431-2-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-25 20:01:43 -08:00
Jakub Kicinski
6668c6f2dd Merge tag 'wireless-2026-02-25' of https://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless
Johannes Berg says:

====================
A good number of fixes:
 - cfg80211:
   - cancel rfkill work appropriately
   - fix radiotap parsing to correctly reject field 18
   - fix wext (yes...) off-by-one for IGTK key ID
 - mac80211:
   - fix for mesh NULL pointer dereference
   - fix for stack out-of-bounds (2 bytes) write on
     specific multi-link action frames
   - set default WMM parameters for all links
 - mwifiex: check dev_alloc_name() return value correctly
 - libertas: fix potential timer use-after-free
 - brcmfmac: fix crash on probe failure

* tag 'wireless-2026-02-25' of https://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless:
  wifi: mac80211: fix NULL pointer dereference in mesh_rx_csa_frame()
  wifi: mac80211: bounds-check link_id in ieee80211_ml_reconfiguration
  wifi: mac80211: set default WMM parameters on all links
  wifi: libertas: fix use-after-free in lbs_free_adapter()
  wifi: mwifiex: Fix dev_alloc_name() return value check
  wifi: brcmfmac: Fix potential kernel oops when probe fails
  wifi: radiotap: reject radiotap with unknown bits
  wifi: cfg80211: cancel rfkill_block work in wiphy_unregister()
  wifi: cfg80211: wext: fix IGTK key ID off-by-one
====================

Link: https://patch.msgid.link/20260225113159.360574-3-johannes@sipsolutions.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-25 19:54:28 -08:00
Jakub Kicinski
77da71283c Merge branch 'team-fix-reference-count-leak-when-changing-port-netns'
Ido Schimmel says:

====================
team: Fix reference count leak when changing port netns

Patch #1 fixes a reference count leak that was reported by syzkaller.
The leak happens when a net device that is member in a team is changing
netns. The fix is to align the team driver with the bond driver and have
it suppress NETDEV_CHANGEMTU events for a net device that is being
unregistered.

Without this change, the NETDEV_CHANGEMTU event causes inetdev_event()
to recreate an inet device for this net device in its original netns,
after it was previously destroyed upon NETDEV_UNREGISTER. Later on, when
inetdev_event() receives a NETDEV_REGISTER event for this net device in
the new nents, it simply leaks the reference:

case NETDEV_REGISTER:
        pr_debug("%s: bug\n", __func__);
        RCU_INIT_POINTER(dev->ip_ptr, NULL);
        break;

addrconf_notify() handles this differently and reuses the existing inet6
device if one exists when a NETDEV_REGISTER event is received. This
creates a different problem where it is possible for a net device to
reference an inet6 device that was created in a previous netns.

A more generic fix that we can try in net-next is to revert the changes
in the bond and team drivers and instead have IPv4 and IPv6 destroy and
recreate an inet device if one already exists upon NETDEV_REGISTER.

Patch #2 adds a selftest that passes with the fix and hangs without it.
====================

Link: https://patch.msgid.link/20260224125709.317574-1-idosch@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-25 19:17:12 -08:00
Ido Schimmel
58f8ef625e selftests: team: Add a reference count leak test
Add a test for the issue that was fixed in "team: avoid NETDEV_CHANGEMTU
event when unregistering slave".

The test hangs due to a reference count leak without the fix:

 # make -C tools/testing/selftests TARGETS="drivers/net/team" TEST_PROGS=refleak.sh TEST_GEN_PROGS="" run_tests
 [...]
 TAP version 13
 1..1
 # timeout set to 45
 # selftests: drivers/net/team: refleak.sh
 [   50.681299][  T496] unregister_netdevice: waiting for dummy1 to become free. Usage count = 3
 [   71.185325][  T496] unregister_netdevice: waiting for dummy1 to become free. Usage count = 3

And passes with the fix:

 # make -C tools/testing/selftests TARGETS="drivers/net/team" TEST_PROGS=refleak.sh TEST_GEN_PROGS="" run_tests
 [...]
 TAP version 13
 1..1
 # timeout set to 45
 # selftests: drivers/net/team: refleak.sh
 ok 1 selftests: drivers/net/team: refleak.sh

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Acked-by: Stanislav Fomichev <sdf@fomichev.me>
Link: https://patch.msgid.link/20260224125709.317574-3-idosch@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-25 19:17:05 -08:00
Tetsuo Handa
bb4c698633 team: avoid NETDEV_CHANGEMTU event when unregistering slave
syzbot is reporting

  unregister_netdevice: waiting for netdevsim0 to become free. Usage count = 3
  ref_tracker: netdev@ffff88807dcf8618 has 1/2 users at
       __netdev_tracker_alloc include/linux/netdevice.h:4400 [inline]
       netdev_hold include/linux/netdevice.h:4429 [inline]
       inetdev_init+0x201/0x4e0 net/ipv4/devinet.c:286
       inetdev_event+0x251/0x1610 net/ipv4/devinet.c:1600
       notifier_call_chain+0x19d/0x3a0 kernel/notifier.c:85
       call_netdevice_notifiers_mtu net/core/dev.c:2318 [inline]
       netif_set_mtu_ext+0x5aa/0x800 net/core/dev.c:9886
       netif_set_mtu+0xd7/0x1b0 net/core/dev.c:9907
       dev_set_mtu+0x126/0x260 net/core/dev_api.c:248
       team_port_del+0xb07/0xcb0 drivers/net/team/team_core.c:1333
       team_del_slave drivers/net/team/team_core.c:1936 [inline]
       team_device_event+0x207/0x5b0 drivers/net/team/team_core.c:2929
       notifier_call_chain+0x19d/0x3a0 kernel/notifier.c:85
       call_netdevice_notifiers_extack net/core/dev.c:2281 [inline]
       call_netdevice_notifiers net/core/dev.c:2295 [inline]
       __dev_change_net_namespace+0xcb7/0x2050 net/core/dev.c:12592
       do_setlink+0x2ce/0x4590 net/core/rtnetlink.c:3060
       rtnl_changelink net/core/rtnetlink.c:3776 [inline]
       __rtnl_newlink net/core/rtnetlink.c:3935 [inline]
       rtnl_newlink+0x15a9/0x1be0 net/core/rtnetlink.c:4072
       rtnetlink_rcv_msg+0x7d5/0xbe0 net/core/rtnetlink.c:6958
       netlink_rcv_skb+0x232/0x4b0 net/netlink/af_netlink.c:2550
       netlink_unicast_kernel net/netlink/af_netlink.c:1318 [inline]
       netlink_unicast+0x80f/0x9b0 net/netlink/af_netlink.c:1344
       netlink_sendmsg+0x813/0xb40 net/netlink/af_netlink.c:1894

problem. Ido Schimmel found steps to reproduce

  ip link add name team1 type team
  ip link add name dummy1 mtu 1499 master team1 type dummy
  ip netns add ns1
  ip link set dev dummy1 netns ns1
  ip -n ns1 link del dev dummy1

and also found that the same issue was fixed in the bond driver in
commit f51048c3e0 ("bonding: avoid NETDEV_CHANGEMTU event when
unregistering slave").

Let's do similar thing for the team driver, with commit ad7c7b2172 ("net:
hold netdev instance lock during sysfs operations") and commit 303a8487a6
("net: s/__dev_set_mtu/__netif_set_mtu/") also applied.

Reported-by: syzbot+881d65229ca4f9ae8c84@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=881d65229ca4f9ae8c84
Suggested-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Fixes: 3d249d4ca7 ("net: introduce ethernet teaming device")
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Stanislav Fomichev <sdf@fomichev.me>
Link: https://patch.msgid.link/20260224125709.317574-2-idosch@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-25 19:17:05 -08:00
Dipayaan Roy
f975a09552 net: mana: Fix double destroy_workqueue on service rescan PCI path
While testing corner cases in the driver, a use-after-free crash
was found on the service rescan PCI path.

When mana_serv_reset() calls mana_gd_suspend(), mana_gd_cleanup()
destroys gc->service_wq. If the subsequent mana_gd_resume() fails
with -ETIMEDOUT or -EPROTO, the code falls through to
mana_serv_rescan() which triggers pci_stop_and_remove_bus_device().
This invokes the PCI .remove callback (mana_gd_remove), which calls
mana_gd_cleanup() a second time, attempting to destroy the already-
freed workqueue. Fix this by NULL-checking gc->service_wq in
mana_gd_cleanup() and setting it to NULL after destruction.

Call stack of issue for reference:
[Sat Feb 21 18:53:48 2026] Call Trace:
[Sat Feb 21 18:53:48 2026]  <TASK>
[Sat Feb 21 18:53:48 2026]  mana_gd_cleanup+0x33/0x70 [mana]
[Sat Feb 21 18:53:48 2026]  mana_gd_remove+0x3a/0xc0 [mana]
[Sat Feb 21 18:53:48 2026]  pci_device_remove+0x41/0xb0
[Sat Feb 21 18:53:48 2026]  device_remove+0x46/0x70
[Sat Feb 21 18:53:48 2026]  device_release_driver_internal+0x1e3/0x250
[Sat Feb 21 18:53:48 2026]  device_release_driver+0x12/0x20
[Sat Feb 21 18:53:48 2026]  pci_stop_bus_device+0x6a/0x90
[Sat Feb 21 18:53:48 2026]  pci_stop_and_remove_bus_device+0x13/0x30
[Sat Feb 21 18:53:48 2026]  mana_do_service+0x180/0x290 [mana]
[Sat Feb 21 18:53:48 2026]  mana_serv_func+0x24/0x50 [mana]
[Sat Feb 21 18:53:48 2026]  process_one_work+0x190/0x3d0
[Sat Feb 21 18:53:48 2026]  worker_thread+0x16e/0x2e0
[Sat Feb 21 18:53:48 2026]  kthread+0xf7/0x130
[Sat Feb 21 18:53:48 2026]  ? __pfx_worker_thread+0x10/0x10
[Sat Feb 21 18:53:48 2026]  ? __pfx_kthread+0x10/0x10
[Sat Feb 21 18:53:48 2026]  ret_from_fork+0x269/0x350
[Sat Feb 21 18:53:48 2026]  ? __pfx_kthread+0x10/0x10
[Sat Feb 21 18:53:48 2026]  ret_from_fork_asm+0x1a/0x30
[Sat Feb 21 18:53:48 2026]  </TASK>

Fixes: 505cc26bca ("net: mana: Add support for auxiliary device servicing events")
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/aZ2bzL64NagfyHpg@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-25 19:16:09 -08:00
Mohd Ayaan Anwar
916864e5ed MAINTAINERS: Update maintainer entry for QUALCOMM ETHQOS ETHERNET DRIVER
Replace Vinod Koul with Mohd Ayaan Anwar as the maintainer of the
QUALCOMM ETHQOS ETHERNET DRIVER. Vinod confirmed he is no longer
active in this area and agreed to be removed.

Acked-by: Vinod Koul <vkoul@kernel.org>
Suggested-by: Russell King (Oracle) <linux@armlinux.org.uk>
Signed-off-by: Mohd Ayaan Anwar <mohd.anwar@oss.qualcomm.com>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Link: https://patch.msgid.link/20260224-qcom_ethqos_maintainer-v1-1-24e02701ea52@oss.qualcomm.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-25 19:15:06 -08:00
Felix Gu
676c7af91f dpll: zl3073x: Remove redundant cleanup in devm_dpll_init()
The devm_add_action_or_reset() function already executes the cleanup
action on failure before returning an error, so the explicit goto error
and subsequent zl3073x_dev_dpll_fini() call causes double cleanup.

Fixes: ebb1031c51 ("dpll: zl3073x: Refactor DPLL initialization")
Reviewed-by: Ivan Vecera <ivecera@redhat.com>
Signed-off-by: Felix Gu <ustc.gu@gmail.com>
Link: https://patch.msgid.link/20260224-dpll-v2-1-d7786414a830@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-25 19:13:20 -08:00
Jakub Kicinski
7c9db1a1cd Merge branch 'tcp-re-enable-acceptance-of-fin-packets-when-rwin-is-0'
Simon Baatz says:

====================
tcp: re-enable acceptance of FIN packets when RWIN is 0

this series restores the ability to accept in‑sequence FIN packets
even when the advertised receive window is zero, and adds a
packetdrill test to guard the behavior.
====================

Link: https://patch.msgid.link/20260224-fix_zero_wnd_fin-v2-0-a16677ea7cea@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-25 19:07:07 -08:00
Simon Baatz
7451133230 selftests/net: packetdrill: Verify acceptance of FIN packets when RWIN is 0
Add a packetdrill test that verifies we accept bare FIN packets when
the advertised receive window is zero.

Signed-off-by: Simon Baatz <gmbnomis@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20260224-fix_zero_wnd_fin-v2-2-a16677ea7cea@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-25 19:07:02 -08:00
Simon Baatz
1e3bb184e9 tcp: re-enable acceptance of FIN packets when RWIN is 0
Commit 2bd99aef1b ("tcp: accept bare FIN packets under memory
pressure") allowed accepting FIN packets in tcp_data_queue() even when
the receive window was closed, to prevent ACK/FIN loops with broken
clients.

Such a FIN packet is in sequence, but because the FIN consumes a
sequence number, it extends beyond the window. Before commit
9ca48d616e ("tcp: do not accept packets beyond window"),
tcp_sequence() only required the seq to be within the window. After
that change, the entire packet (including the FIN) must fit within the
window. As a result, such FIN packets are now dropped and the handling
path is no longer reached.

Be more lenient by not counting the sequence number consumed by the
FIN when calling tcp_sequence(), restoring the previous behavior for
cases where only the FIN extends beyond the window.

Fixes: 9ca48d616e ("tcp: do not accept packets beyond window")
Signed-off-by: Simon Baatz <gmbnomis@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20260224-fix_zero_wnd_fin-v2-1-a16677ea7cea@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-25 19:07:02 -08:00
Greg Kroah-Hartman
5cc619583c vsock: Use container_of() to get net namespace in sysctl handlers
current->nsproxy is should not be accessed directly as syzbot has found
that it could be NULL at times, causing crashes.  Fix up the af_vsock
sysctl handlers to use container_of() to deal with the current net
namespace instead of attempting to rely on current.

This is the same type of change done in commit 7f5611cbc4 ("rds:
sysctl: rds_tcp_{rcv,snd}buf: avoid using current->nsproxy")

Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Bobby Eshleman <bobbyeshleman@meta.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Fixes: eafb64f40c ("vsock: add netns to vsock core")
Link: https://patch.msgid.link/2026022318-rearview-gallery-ae13@gregkh
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-25 18:59:18 -08:00
Greg Kroah-Hartman
4b063c002c net: usb: kaweth: validate USB endpoints
The kaweth driver should validate that the device it is probing has the
proper number and types of USB endpoints it is expecting before it binds
to it.  If a malicious device were to not have the same urbs the driver
will crash later on when it blindly accesses these endpoints.

Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Simon Horman <horms@kernel.org>
Fixes: 1da177e4c3 ("Linux-2.6.12-rc2")
Link: https://patch.msgid.link/2026022305-substance-virtual-c728@gregkh
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-25 18:58:05 -08:00
Greg Kroah-Hartman
c58b6c29a4 net: usb: kalmia: validate USB endpoints
The kalmia driver should validate that the device it is probing has the
proper number and types of USB endpoints it is expecting before it binds
to it.  If a malicious device were to not have the same urbs the driver
will crash later on when it blindly accesses these endpoints.

Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Simon Horman <horms@kernel.org>
Fixes: d40261236e ("net/usb: Add Samsung Kalmia driver for Samsung GT-B3730")
Link: https://patch.msgid.link/2026022326-shack-headstone-ef6f@gregkh
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-25 18:56:43 -08:00
Greg Kroah-Hartman
11de1d3ae5 net: usb: pegasus: validate USB endpoints
The pegasus driver should validate that the device it is probing has the
proper number and types of USB endpoints it is expecting before it binds
to it.  If a malicious device were to not have the same urbs the driver
will crash later on when it blindly accesses these endpoints.

Cc: Petko Manolov <petkan@nucleusys.com>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://patch.msgid.link/2026022347-legibly-attest-cc5c@gregkh
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-25 18:52:49 -08:00
Greg Kroah-Hartman
12133a483d nfc: pn533: properly drop the usb interface reference on disconnect
When the device is disconnected from the driver, there is a "dangling"
reference count on the usb interface that was grabbed in the probe
callback.  Fix this up by properly dropping the reference after we are
done with it.

Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Simon Horman <horms@kernel.org>
Fixes: c46ee38620 ("NFC: pn533: add NXP pn533 nfc device driver")
Link: https://patch.msgid.link/2026022329-flashing-ought-7573@gregkh
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-25 18:51:37 -08:00
Linus Torvalds
f4d0ec0aa2 Merge tag 'erofs-for-7.0-rc2-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs
Pull erofs fixes from Gao Xiang:

 - Do not share the page cache if the real @aops differs

 - Fix the incomplete condition for interlaced plain extents

 - Get rid of more unnecessary #ifdefs

* tag 'erofs-for-7.0-rc2-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs:
  erofs: fix interlaced plain identification for encoded extents
  erofs: remove more unnecessary #ifdefs
  erofs: allow sharing page cache with the same aops only
2026-02-25 16:39:25 -08:00
David Carlier
2a064262eb sched_ext: Fix out-of-bounds access in scx_idle_init_masks()
scx_idle_node_masks is allocated with num_possible_nodes() elements but
indexed by NUMA node IDs via for_each_node(). On systems with
non-contiguous NUMA node numbering (e.g. nodes 0 and 4), node IDs can
exceed the array size, causing out-of-bounds memory corruption.

Use nr_node_ids instead, which represents the maximum node ID range and
is the correct size for arrays indexed by node ID.

Fixes: 7c60329e3521 ("sched_ext: Add NUMA-awareness to the default idle selection policy")
Signed-off-by: David Carlier <devnexen@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-02-25 13:12:28 -10:00
Mario Limonciello
6b0d812971 drm/amd: Disable MES LR compute W/A
A workaround was introduced in commit 1fb710793c ("drm/amdgpu: Enable
MES lr_compute_wa by default") to help with some hangs observed in gfx1151.

This WA didn't fully fix the issue.  It was actually fixed by adjusting
the VGPR size to the correct value that matched the hardware in commit
b42f3bf953 ("drm/amdkfd: bump minimum vgpr size for gfx1151").

There are reports of instability on other products with newer GC microcode
versions, and I believe they're caused by this workaround. As we don't
need the workaround any more, remove it.

Fixes: b42f3bf953 ("drm/amdkfd: bump minimum vgpr size for gfx1151")
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 9973e64bd6ee7642860a6f3b6958cbf14e89cabd)
Cc: stable@vger.kernel.org
2026-02-25 17:58:06 -05:00
Lijo Lazar
b57c4ec98c drm/amdgpu: Fix error handling in slot reset
If the device has not recovered after slot reset is called, it goes to
out label for error handling. There it could make decision based on
uninitialized hive pointer and could result in accessing an uninitialized
list.

Initialize the list and hive properly so that it handles the error
situation and also releases the reset domain lock which is acquired
during error_detected callback.

Fixes: 732c6cefc1 ("drm/amdgpu: Replace tmp_adev with hive in amdgpu_pci_slot_reset")
Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Ce Sun <cesun102@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit bb71362182e59caa227e4192da5a612b09349696)
2026-02-25 17:57:55 -05:00
sguttula
a5fe1a5451 drm/amdgpu/vcn5: Add SMU dpm interface type
This will set AMDGPU_VCN_SMU_DPM_INTERFACE_* smu_type
based on soc type and fixing ring timeout issue seen
for DPM enabled case.

Signed-off-by: sguttula <suresh.guttula@amd.com>
Reviewed-by: Pratik Vishwakarma <Pratik.Vishwakarma@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit f0f23c315b38c55e8ce9484cf59b65811f350630)
2026-02-25 17:57:06 -05:00
Bart Van Assche
480ad5f6ea drm/amdgpu: Fix locking bugs in error paths
Do not unlock psp->ras_context.mutex if it has not been locked. This has
been detected by the Clang thread-safety analyzer.

Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: YiPeng Chai <YiPeng.Chai@amd.com>
Cc: Hawking Zhang <Hawking.Zhang@amd.com>
Cc: amd-gfx@lists.freedesktop.org
Fixes: b3fb79cda5 ("drm/amdgpu: add mutex to protect ras shared memory")
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 6fa01b4335978051d2cd80841728fd63cc597970)
2026-02-25 17:56:50 -05:00
Bart Van Assche
5e0bcc7b88 drm/amdgpu: Unlock a mutex before destroying it
Mutexes must be unlocked before these are destroyed. This has been detected
by the Clang thread-safety analyzer.

Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Yang Wang <kevinyang.wang@amd.com>
Cc: Hawking Zhang <Hawking.Zhang@amd.com>
Cc: amd-gfx@lists.freedesktop.org
Fixes: f5e4cc8461 ("drm/amdgpu: implement RAS ACA driver framework")
Reviewed-by: Yang Wang <kevinyang.wang@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 270258ba320beb99648dceffb67e86ac76786e55)
2026-02-25 17:56:43 -05:00
Natalie Vock
28dfe43175 drm/amd/display: Use GFP_ATOMIC in dc_create_stream_for_sink
This can be called while preemption is disabled, for example by
dcn32_internal_validate_bw which is called with the FPU active.

Fixes "BUG: scheduling while atomic" messages I encounter on my Navi31
machine.

Signed-off-by: Natalie Vock <natalie.vock@gmx.de>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit b42dae2ebc5c84a68de63ec4ffdfec49362d53f1)
Cc: stable@vger.kernel.org
2026-02-25 17:56:22 -05:00
Sunil Khatri
64ac7c09fc drm/amdgpu: add upper bound check on user inputs in wait ioctl
Huge input values in amdgpu_userq_wait_ioctl can lead to a OOM and
could be exploited.

So check these input value against AMDGPU_USERQ_MAX_HANDLES
which is big enough value for genuine use cases and could
potentially avoid OOM.

v2: squash in Srini's fix

Signed-off-by: Sunil Khatri <sunil.khatri@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit fcec012c664247531aed3e662f4280ff804d1476)
Cc: stable@vger.kernel.org
2026-02-25 17:54:57 -05:00
Sunil Khatri
ea78f8c68f drm/amdgpu: add upper bound check on user inputs in signal ioctl
Huge input values in amdgpu_userq_signal_ioctl can lead to a OOM and
could be exploited.

So check these input value against AMDGPU_USERQ_MAX_HANDLES
which is big enough value for genuine use cases and could
potentially avoid OOM.

Signed-off-by: Sunil Khatri <sunil.khatri@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit be267e15f99bc97cbe202cd556717797cdcf79a5)
Cc: stable@vger.kernel.org
2026-02-25 17:49:48 -05:00
Tvrtko Ursulin
7b7d7693a5 drm/amdgpu/userq: Do not allow userspace to trivially triger kernel warnings
Userspace can either deliberately pass in the too small num_fences, or the
required number can legitimately grow between the two calls to the userq
wait ioctl. In both cases we do not want the emit the kernel warning
backtrace since nothing is wrong with the kernel and userspace will simply
get an errno reported back. So lets simply drop the WARN_ONs.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Fixes: a292fdecd7 ("drm/amdgpu: Implement userqueue signal/wait IOCTL")
Cc: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 2c333ea579de6cc20ea7bc50e9595ef72863e65c)
2026-02-25 17:49:28 -05:00
Tvrtko Ursulin
49abfa8126 drm/amdgpu/userq: Fix reference leak in amdgpu_userq_wait_ioctl
Drop reference to syncobj and timeline fence when aborting the ioctl due
output array being too small.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Fixes: a292fdecd7 ("drm/amdgpu: Implement userqueue signal/wait IOCTL")
Cc: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 68951e9c3e6bb22396bc42ef2359751c8315dd27)
Cc: <stable@vger.kernel.org> # v6.16+
2026-02-25 17:49:02 -05:00
Lizhi Hou
75c151ceaa accel/amdxdna: Use a different name for latest firmware
Using legacy driver with latest firmware causes a power off issue.

Fix this by assigning a different filename (npu_7.sbin) to the latest
firmware. The driver attempts to load the latest firmware first and falls
back to the previous firmware version if loading fails.

Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/5009
Fixes: f1eac46fe5 ("accel/amdxdna: Update firmware version check for latest firmware")
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Link: https://patch.msgid.link/20260225204752.2711734-1-lizhi.hou@amd.com
2026-02-25 13:51:31 -08:00
Niklas Cassel
c22533c66c PCI: dwc: ep: Flush MSI-X write before unmapping its ATU entry
Endpoint drivers use dw_pcie_ep_raise_msix_irq() to raise an MSI-X
interrupt to the host using a writel(), which generates a PCI posted write
transaction.  There's no completion for posted writes, so the writel() may
return before the PCI write completes.  dw_pcie_ep_raise_msix_irq() also
unmaps the outbound ATU entry used for the PCI write, so the write races
with the unmap.

If the PCI write loses the race with the ATU unmap, the write may corrupt
host memory or cause IOMMU errors, e.g., these when running fio with a
larger queue depth against nvmet-pci-epf:

  arm-smmu-v3 fc900000.iommu:      0x0000010000000010
  arm-smmu-v3 fc900000.iommu:      0x0000020000000000
  arm-smmu-v3 fc900000.iommu:      0x000000090000f040
  arm-smmu-v3 fc900000.iommu:      0x0000000000000000
  arm-smmu-v3 fc900000.iommu: event: F_TRANSLATION client: 0000:01:00.0 sid: 0x100 ssid: 0x0 iova: 0x90000f040 ipa: 0x0
  arm-smmu-v3 fc900000.iommu: unpriv data write s1 "Input address caused fault" stag: 0x0

Flush the write by performing a readl() of the same address to ensure that
the write has reached the destination before the ATU entry is unmapped.

The same problem was solved for dw_pcie_ep_raise_msi_irq() in commit
8719c64e76 ("PCI: dwc: ep: Cache MSI outbound iATU mapping"), but there
it was solved by dedicating an outbound iATU only for MSI. We can't do the
same for MSI-X because each vector can have a different msg_addr and the
msg_addr may be changed while the vector is masked.

Fixes: beb4641a78 ("PCI: dwc: Add MSI-X callbacks handler")
Signed-off-by: Niklas Cassel <cassel@kernel.org>
[bhelgaas: commit log]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Link: https://patch.msgid.link/20260211175540.105677-2-cassel@kernel.org
2026-02-25 15:44:20 -06:00
Niklas Cassel
468711a40d PCI: dwc: ep: Refresh MSI Message Address cache on change
Endpoint drivers use dw_pcie_ep_raise_msi_irq() to raise MSI interrupts to
the host.  After 8719c64e76 ("PCI: dwc: ep: Cache MSI outbound iATU
mapping"), dw_pcie_ep_raise_msi_irq() caches the Message Address from the
MSI Capability in ep->msi_msg_addr.  But that Message Address is controlled
by the host, and it may change.  For example, if:

  - firmware on the host configures the Message Address and triggers an
    MSI,

  - a driver on the Endpoint raises the MSI via dw_pcie_ep_raise_msi_irq(),
    which caches the Message Address,

  - a kernel on the host reconfigures the Message Address and the host
    kernel driver triggers another MSI,

dw_pcie_ep_raise_msi_irq() notices that the Message Address no longer
matches the cached ep->msi_msg_addr, warns about it, and returns error
instead of raising the MSI.  The host kernel may hang because it never
receives the MSI.

This was seen with the nvmet_pci_epf_driver: the host UEFI performs NVMe
commands, e.g. Identify Controller to get the name of the controller,
nvmet-pci-epf posts the completion queue entry and raises an IRQ using
dw_pcie_ep_raise_msi_irq().  When the host boots Linux, we see a
WARN_ON_ONCE() from dw_pcie_ep_raise_msi_irq(), and the host kernel hangs
because the nvme driver never gets an IRQ.

Remove the warning when dw_pcie_ep_raise_msi_irq() notices that Message
Address has changed, remap using the new address, and update the
ep->msi_msg_addr cache.

Fixes: 8719c64e76 ("PCI: dwc: ep: Cache MSI outbound iATU mapping")
Signed-off-by: Niklas Cassel <cassel@kernel.org>
[bhelgaas: commit log]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Tested-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Tested-by: Koichiro Den <den@valinux.co.jp>
Acked-by: Manivannan Sadhasivam <mani@kernel.org>
Link: https://patch.msgid.link/20260210181225.3926165-2-cassel@kernel.org
2026-02-25 15:41:43 -06:00
Mark Rutland
a8f78680ee arm64: tlb: Optimize ARM64_WORKAROUND_REPEAT_TLBI
The ARM64_WORKAROUND_REPEAT_TLBI workaround is used to mitigate several
errata where broadcast TLBI;DSB sequences don't provide all the
architecturally required synchronization. The workaround performs more
work than necessary, and can have significant overhead. This patch
optimizes the workaround, as explained below.

The workaround was originally added for Qualcomm Falkor erratum 1009 in
commit:

  d9ff80f83e ("arm64: Work around Falkor erratum 1009")

As noted in the message for that commit, the workaround is applied even
in cases where it is not strictly necessary.

The workaround was later reused without changes for:

* Arm Cortex-A76 erratum #1286807
  SDEN v33: https://developer.arm.com/documentation/SDEN-885749/33-0/

* Arm Cortex-A55 erratum #2441007
  SDEN v16: https://developer.arm.com/documentation/SDEN-859338/1600/

* Arm Cortex-A510 erratum #2441009
  SDEN v19: https://developer.arm.com/documentation/SDEN-1873351/1900/

The important details to note are as follows:

1. All relevant errata only affect the ordering and/or completion of
   memory accesses which have been translated by an invalidated TLB
   entry. The actual invalidation of TLB entries is unaffected.

2. The existing workaround is applied to both broadcast and local TLB
   invalidation, whereas for all relevant errata it is only necessary to
   apply a workaround for broadcast invalidation.

3. The existing workaround replaces every TLBI with a TLBI;DSB;TLBI
   sequence, whereas for all relevant errata it is only necessary to
   execute a single additional TLBI;DSB sequence after any number of
   TLBIs are completed by a DSB.

   For example, for a sequence of batched TLBIs:

       TLBI <op1>[, <arg1>]
       TLBI <op2>[, <arg2>]
       TLBI <op3>[, <arg3>]
       DSB ISH

   ... the existing workaround will expand this to:

       TLBI <op1>[, <arg1>]
       DSB ISH                  // additional
       TLBI <op1>[, <arg1>]     // additional
       TLBI <op2>[, <arg2>]
       DSB ISH                  // additional
       TLBI <op2>[, <arg2>]     // additional
       TLBI <op3>[, <arg3>]
       DSB ISH                  // additional
       TLBI <op3>[, <arg3>]     // additional
       DSB ISH

   ... whereas it is sufficient to have:

       TLBI <op1>[, <arg1>]
       TLBI <op2>[, <arg2>]
       TLBI <op3>[, <arg3>]
       DSB ISH
       TLBI <opX>[, <argX>]     // additional
       DSB ISH                  // additional

   Using a single additional TBLI and DSB at the end of the sequence can
   have significantly lower overhead as each DSB which completes a TLBI
   must synchronize with other PEs in the system, with potential
   performance effects both locally and system-wide.

4. The existing workaround repeats each specific TLBI operation, whereas
   for all relevant errata it is sufficient for the additional TLBI to
   use *any* operation which will be broadcast, regardless of which
   translation regime or stage of translation the operation applies to.

   For example, for a single TLBI:

       TLBI ALLE2IS
       DSB ISH

   ... the existing workaround will expand this to:

       TLBI ALLE2IS
       DSB ISH
       TLBI ALLE2IS             // additional
       DSB ISH                  // additional

   ... whereas it is sufficient to have:

       TLBI ALLE2IS
       DSB ISH
       TLBI VALE1IS, XZR        // additional
       DSB ISH                  // additional

   As the additional TLBI doesn't have to match a specific earlier TLBI,
   the additional TLBI can be implemented in separate code, with no
   memory of the earlier TLBIs. The additional TLBI can also use a
   cheaper TLBI operation.

5. The existing workaround is applied to both Stage-1 and Stage-2 TLB
   invalidation, whereas for all relevant errata it is only necessary to
   apply a workaround for Stage-1 invalidation.

   Architecturally, TLBI operations which invalidate only Stage-2
   information (e.g. IPAS2E1IS) are not required to invalidate TLB
   entries which combine information from Stage-1 and Stage-2
   translation table entries, and consequently may not complete memory
   accesses translated by those combined entries. In these cases,
   completion of memory accesses is only guaranteed after subsequent
   invalidation of Stage-1 information (e.g. VMALLE1IS).

Taking the above points into account, this patch reworks the workaround
logic to reduce overhead:

* New __tlbi_sync_s1ish() and __tlbi_sync_s1ish_hyp() functions are
  added and used in place of any dsb(ish) which is used to complete
  broadcast Stage-1 TLB maintenance. When the
  ARM64_WORKAROUND_REPEAT_TLBI workaround is enabled, these helpers will
  execute an additional TLBI;DSB sequence.

  For consistency, it might make sense to add __tlbi_sync_*() helpers
  for local and stage 2 maintenance. For now I've left those with
  open-coded dsb() to keep the diff small.

* The duplication of TLBIs in __TLBI_0() and __TLBI_1() is removed. This
  is no longer needed as the necessary synchronization will happen in
  __tlbi_sync_s1ish() or __tlbi_sync_s1ish_hyp().

* The additional TLBI operation is chosen to have minimal impact:

  - __tlbi_sync_s1ish() uses "TLBI VALE1IS, XZR". This is only used at
    EL1 or at EL2 with {E2H,TGE}=={1,1}, where it will target an unused
    entry for the reserved ASID in the kernel's own translation regime,
    and have no adverse affect.

  - __tlbi_sync_s1ish_hyp() uses "TLBI VALE2IS, XZR". This is only used
    in hyp code, where it will target an unused entry in the hyp code's
    TTBR0 mapping, and should have no adverse effect.

* As __TLBI_0() and __TLBI_1() no longer replace each TLBI with a
  TLBI;DSB;TLBI sequence, batching TLBIs is worthwhile, and there's no
  need for arch_tlbbatch_should_defer() to consider
  ARM64_WORKAROUND_REPEAT_TLBI.

When building defconfig with GCC 15.1.0, compared to v6.19-rc1, this
patch saves ~1KiB of text, makes the vmlinux ~42KiB smaller, and makes
the resulting Image 64KiB smaller:

| [mark@lakrids:~/src/linux]% size vmlinux-*
|    text    data     bss     dec     hex filename
| 21179831        19660919         708216 41548966        279fca6 vmlinux-after
| 21181075        19660903         708216 41550194        27a0172 vmlinux-before
| [mark@lakrids:~/src/linux]% ls -l vmlinux-*
| -rwxr-xr-x 1 mark mark 157771472 Feb  4 12:05 vmlinux-after
| -rwxr-xr-x 1 mark mark 157815432 Feb  4 12:05 vmlinux-before
| [mark@lakrids:~/src/linux]% ls -l Image-*
| -rw-r--r-- 1 mark mark 41007616 Feb  4 12:05 Image-after
| -rw-r--r-- 1 mark mark 41073152 Feb  4 12:05 Image-before

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Oliver Upton <oupton@kernel.org>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
2026-02-25 21:37:44 +00:00
Mark Rutland
bfd9c931d1 arm64: tlb: Allow XZR argument to TLBI ops
The TLBI instruction accepts XZR as a register argument, and for TLBI
operations with a register argument, there is no functional difference
between using XZR or another GPR which contains zeroes. Operations
without a register argument are encoded as if XZR were used.

Allow the __TLBI_1() macro to use XZR when a register argument is all
zeroes.

Today this only results in a trivial code saving in
__do_compat_cache_op()'s workaround for Neoverse-N1 erratum #1542419. In
subsequent patches this pattern will be used more generally.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Oliver Upton <oupton@kernel.org>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
2026-02-25 21:37:44 +00:00
Catalin Marinas
9d1a7c4a45 kselftest: arm64: Check access to GCS after mprotect(PROT_NONE)
A GCS mapping should not be accessible after mprotect(PROT_NONE). Add a
kselftest for this scenario.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Shuah Khan <skhan@linuxfoundation.org>
Cc: Mark Brown <broonie@kernel.org>
Cc: Will Deacon <will@kernel.org>
Cc: David Hildenbrand <david@kernel.org>
Reviewed-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
2026-02-25 19:53:58 +00:00
Catalin Marinas
47a8aad135 arm64: gcs: Honour mprotect(PROT_NONE) on shadow stack mappings
vm_get_page_prot() short-circuits the protection_map[] lookup for a
VM_SHADOW_STACK mapping since it uses a different PIE index from the
typical read/write/exec permissions. However, the side effect is that it
also ignores mprotect(PROT_NONE) by creating an accessible PTE.

Special-case the !(vm_flags & VM_ACCESS_FLAGS) flags to use the
protection_map[VM_NONE] permissions instead. No GCS attributes are
required for an inaccessible PTE.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Fixes: 6497b66ba6 ("arm64/mm: Map pages for guarded control stack")
Cc: stable@vger.kernel.org
Cc: Mark Brown <broonie@kernel.org>
Cc: Will Deacon <will@kernel.org>
Cc: David Hildenbrand <david@kernel.org>
Reviewed-by: David Hildenbrand (Arm) <david@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
2026-02-25 19:53:58 +00:00
Catalin Marinas
8a85b31312 arm64: gcs: Do not set PTE_SHARED on GCS mappings if FEAT_LPA2 is enabled
When FEAT_LPA2 is enabled, bits 8-9 of the PTE replace the
shareability attribute with bits 50-51 of the output address. The
_PAGE_GCS{,_RO} definitions include the PTE_SHARED bits as 0b11 (this
matches the other _PAGE_* definitions) but using this macro directly
leads to the following panic when enabling GCS on a system/model with
LPA2:

  Unable to handle kernel paging request at virtual address fffff1ffc32d8008
  Mem abort info:
    ESR = 0x0000000096000004
    EC = 0x25: DABT (current EL), IL = 32 bits
    SET = 0, FnV = 0
    EA = 0, S1PTW = 0
    FSC = 0x04: level 0 translation fault
  Data abort info:
    ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000
    CM = 0, WnR = 0, TnD = 0, TagAccess = 0
    GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
  swapper pgtable: 4k pages, 52-bit VAs, pgdp=0000000060f4d000
  [fffff1ffc32d8008] pgd=100000006184b003, p4d=0000000000000000
  Internal error: Oops: 0000000096000004 [#1]  SMP
  CPU: 0 UID: 0 PID: 513 Comm: gcs_write_fault Tainted: G   M                7.0.0-rc1 #1 PREEMPT
  Tainted: [M]=MACHINE_CHECK
  Hardware name: QEMU QEMU Virtual Machine, BIOS 2025.02-8+deb13u1 11/08/2025
  pstate: 03402005 (nzcv daif +PAN -UAO +TCO +DIT -SSBS BTYPE=--)
  pc : zap_huge_pmd+0x168/0x468
  lr : zap_huge_pmd+0x2c/0x468
  sp : ffff800080beb660
  x29: ffff800080beb660 x28: fff00000c2058180 x27: ffff800080beb898
  x26: fff00000c2058180 x25: ffff800080beb820 x24: 00c800010b600f41
  x23: ffffc1ffc30af1a8 x22: fff00000c2058180 x21: 0000ffff8dc00000
  x20: fff00000c2bc6370 x19: ffff800080beb898 x18: ffff800080bebb60
  x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000007
  x14: 000000000000000a x13: 0000aaaacbbbffff x12: 0000000000000000
  x11: 0000ffff8ddfffff x10: 00000000000001fe x9 : 0000ffff8ddfffff
  x8 : 0000ffff8de00000 x7 : 0000ffff8da00000 x6 : fff00000c2bc6370
  x5 : 0000ffff8da00000 x4 : 000000010b600000 x3 : ffffc1ffc0000000
  x2 : fff00000c2058180 x1 : fffff1ffc32d8000 x0 : 000000c00010b600
  Call trace:
   zap_huge_pmd+0x168/0x468 (P)
   unmap_page_range+0xd70/0x1560
   unmap_single_vma+0x48/0x80
   unmap_vmas+0x90/0x180
   unmap_region+0x88/0xe4
   vms_complete_munmap_vmas+0xf8/0x1e0
   do_vmi_align_munmap+0x158/0x180
   do_vmi_munmap+0xac/0x160
   __vm_munmap+0xb0/0x138
   vm_munmap+0x14/0x20
   gcs_free+0x70/0x80
   mm_release+0x1c/0xc8
   exit_mm_release+0x28/0x38
   do_exit+0x190/0x8ec
   do_group_exit+0x34/0x90
   get_signal+0x794/0x858
   arch_do_signal_or_restart+0x11c/0x3e0
   exit_to_user_mode_loop+0x10c/0x17c
   el0_da+0x8c/0x9c
   el0t_64_sync_handler+0xd0/0xf0
   el0t_64_sync+0x198/0x19c
  Code: aa1603e2 d34cfc00 cb813001 8b011861 (f9400420)

Similarly to how the kernel handles protection_map[], use a
gcs_page_prot variable to store the protection bits and clear PTE_SHARED
if LPA2 is enabled.

Also remove the unused PAGE_GCS{,_RO} macros.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Fixes: 6497b66ba6 ("arm64/mm: Map pages for guarded control stack")
Reported-by: Emanuele Rocca <emanuele.rocca@arm.com>
Cc: stable@vger.kernel.org
Cc: Mark Brown <broonie@kernel.org>
Cc: Will Deacon <will@kernel.org>
Reviewed-by: David Hildenbrand (Arm) <david@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
2026-02-25 19:53:57 +00:00
Will Deacon
8f09803713 arm64: io: Extract user memory type in ioremap_prot()
The only caller of ioremap_prot() outside of the generic ioremap()
implementation is generic_access_phys(), which passes a 'pgprot_t' value
determined from the user mapping of the target 'pfn' being accessed by
the kernel. On arm64, the 'pgprot_t' contains all of the non-address
bits from the pte, including the permission controls, and so we end up
returning a new user mapping from ioremap_prot() which faults when
accessed from the kernel on systems with PAN:

  | Unable to handle kernel read from unreadable memory at virtual address ffff80008ea89000
  | ...
  | Call trace:
  |   __memcpy_fromio+0x80/0xf8
  |   generic_access_phys+0x20c/0x2b8
  |   __access_remote_vm+0x46c/0x5b8
  |   access_remote_vm+0x18/0x30
  |   environ_read+0x238/0x3e8
  |   vfs_read+0xe4/0x2b0
  |   ksys_read+0xcc/0x178
  |   __arm64_sys_read+0x4c/0x68

Extract only the memory type from the user 'pgprot_t' in ioremap_prot()
and assert that we're being passed a user mapping, to protect us against
any changes in future that may require additional handling. To avoid
falsely flagging users of ioremap(), provide our own ioremap() macro
which simply wraps __ioremap_prot().

Cc: Zeng Heng <zengheng4@huawei.com>
Cc: Jinjiang Tu <tujinjiang@huawei.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Fixes: 893dea9ccd ("arm64: Add HAVE_IOREMAP_PROT support")
Reported-by: Jinjiang Tu <tujinjiang@huawei.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will@kernel.org>
2026-02-25 19:49:52 +00:00
Will Deacon
f6bf47ab32 arm64: io: Rename ioremap_prot() to __ioremap_prot()
Rename our ioremap_prot() implementation to __ioremap_prot() and convert
all arch-internal callers over to the new function.

ioremap_prot() remains as a #define to __ioremap_prot() for
generic_access_phys() and will be subsequently extended to handle user
permissions in 'prot'.

Cc: Zeng Heng <zengheng4@huawei.com>
Cc: Jinjiang Tu <tujinjiang@huawei.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will@kernel.org>
2026-02-25 19:49:51 +00:00
Vitaly Lifshits
0942fc6d32 e1000e: clear DPG_EN after reset to avoid autonomous power-gating
Panther Lake systems introduced an autonomous power gating feature for
the integrated Gigabit Ethernet in shutdown state (S5) state. As part of
it, the reset value of DPG_EN bit was changed to 1. Clear this bit after
performing hardware reset to avoid errors such as Tx/Rx hangs, or packet
loss/corruption.

Fixes: 0c9183ce61 ("e1000e: Add support for the next LOM generation")
Signed-off-by: Vitaly Lifshits <vitaly.lifshits@intel.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Tested-by: Avigail Dahan <avigailx.dahan@intel.com>
Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-02-25 11:43:57 -08:00
Vitaly Lifshits
5b644464ee e1000e: introduce new board type for Panther Lake PCH
Add new board type for Panther Lake devices for separating device-specific
features and flows.
Additionally, remove the deprecated device IDs 0x57B5 and 0x57B6, which
are not used by any existing devices.

Signed-off-by: Vitaly Lifshits <vitaly.lifshits@intel.com>
Tested-by: Avigail Dahan <avigailx.dahan@intel.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-02-25 11:43:57 -08:00
Jedrzej Jagielski
feae40a6a1 ixgbevf: fix link setup issue
It may happen that VF spawned for E610 adapter has problem with setting
link up. This happens when ixgbevf supporting mailbox API 1.6 cooperates
with PF driver which doesn't support this version of API, and hence
doesn't support new approach for getting PF link data.

In that case VF asks PF to provide link data but as PF doesn't support
it, returns -EOPNOTSUPP what leads to early bail from link configuration
sequence.

Avoid such situation by using legacy VFLINKS approach whenever negotiated
API version is less than 1.6.

To reproduce the issue just create VF and set its link up - adapter must
be any from the E610 family, ixgbevf must support API 1.6 or higher while
ixgbevf must not.

Fixes: 53f0eb62b4 ("ixgbevf: fix getting link speed data for E610 devices")
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Reviewed-by: Piotr Kwapulinski <piotr.kwapulinski@intel.com>
Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
Cc: stable@vger.kernel.org
Signed-off-by: Jedrzej Jagielski <jedrzej.jagielski@intel.com>
Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-02-25 11:43:57 -08:00
Thomas Gleixner
4b3d54a85b i40e: Fix preempt count leak in napi poll tracepoint
Using get_cpu() in the tracepoint assignment causes an obvious preempt
count leak because nothing invokes put_cpu() to undo it:

  softirq: huh, entered softirq 3 NET_RX with preempt_count 00000100, exited with 00000101?

This clearly has seen a lot of testing in the last 3+ years...

Use smp_processor_id() instead.

Fixes: 6d4d584a7e ("i40e: Add i40e_napi_poll tracepoint")
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Cc: Tony Nguyen <anthony.l.nguyen@intel.com>
Cc: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Cc: intel-wired-lan@lists.osuosl.org
Cc: netdev@vger.kernel.org
Reviewed-by: Joe Damato <joe@dama.to>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-02-25 11:43:57 -08:00
Michal Schmidt
a9c354e656 ice: fix crash in ethtool offline loopback test
Since the conversion of ice to page pool, the ethtool loopback test
crashes:

 BUG: kernel NULL pointer dereference, address: 000000000000000c
 #PF: supervisor write access in kernel mode
 #PF: error_code(0x0002) - not-present page
 PGD 1100f1067 P4D 0
 Oops: Oops: 0002 [#1] SMP NOPTI
 CPU: 23 UID: 0 PID: 5904 Comm: ethtool Kdump: loaded Not tainted 6.19.0-0.rc7.260128g1f97d9dcf5364.49.eln154.x86_64 #1 PREEMPT(lazy)
 Hardware name: [...]
 RIP: 0010:ice_alloc_rx_bufs+0x1cd/0x310 [ice]
 Code: 83 6c 24 30 01 66 41 89 47 08 0f 84 c0 00 00 00 41 0f b7 dc 48 8b 44 24 18 48 c1 e3 04 41 bb 00 10 00 00 48 8d 2c 18 8b 04 24 <89> 45 0c 41 8b 4d 00 49 d3 e3 44 3b 5c 24 24 0f 83 ac fe ff ff 44
 RSP: 0018:ff7894738aa1f768 EFLAGS: 00010246
 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
 RDX: 0000000000000000 RSI: 0000000000000700 RDI: 0000000000000000
 RBP: 0000000000000000 R08: ff16dcae79880200 R09: 0000000000000019
 R10: 0000000000000001 R11: 0000000000001000 R12: 0000000000000000
 R13: 0000000000000000 R14: 0000000000000000 R15: ff16dcae6c670000
 FS:  00007fcf428850c0(0000) GS:ff16dcb149710000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 000000000000000c CR3: 0000000121227005 CR4: 0000000000773ef0
 PKRU: 55555554
 Call Trace:
  <TASK>
  ice_vsi_cfg_rxq+0xca/0x460 [ice]
  ice_vsi_cfg_rxqs+0x54/0x70 [ice]
  ice_loopback_test+0xa9/0x520 [ice]
  ice_self_test+0x1b9/0x280 [ice]
  ethtool_self_test+0xe5/0x200
  __dev_ethtool+0x1106/0x1a90
  dev_ethtool+0xbe/0x1a0
  dev_ioctl+0x258/0x4c0
  sock_do_ioctl+0xe3/0x130
  __x64_sys_ioctl+0xb9/0x100
  do_syscall_64+0x7c/0x700
  entry_SYSCALL_64_after_hwframe+0x76/0x7e
  [...]

It crashes because we have not initialized libeth for the rx ring.

Fix it by treating ICE_VSI_LB VSIs slightly more like normal PF VSIs and
letting them have a q_vector. It's just a dummy, because the loopback
test does not use interrupts, but it contains a napi struct that can be
passed to libeth_rx_fq_create() called from ice_vsi_cfg_rxq() ->
ice_rxq_pp_create().

Fixes: 93f53db9f9 ("ice: switch to Page Pool")
Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Tested-by: Rinitha S <sx.rinitha@intel.com> (A Contingent worker at Intel)
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-02-25 11:43:57 -08:00
Aaron Ma
6aa07e23dd ice: recap the VSI and QoS info after rebuild
Fix IRDMA hardware initialization timeout (-110) after resume by
separating VSI-dependent configuration from RDMA resource allocation,
ensuring VSI is rebuilt before IRDMA accesses it.

After resume from suspend, IRDMA hardware initialization fails:
  ice: IRDMA hardware initialization FAILED init_state=4 status=-110

Separate RDMA initialization into two phases:
1. ice_init_rdma() - Allocate resources only (no VSI/QoS access, no plug)
2. ice_rdma_finalize_setup() - Assign VSI/QoS info and plug device

This allows:
- ice_init_rdma() to stay in ice_resume() (mirrors ice_deinit_rdma()
  in ice_suspend())
- VSI assignment deferred until after ice_vsi_rebuild() completes
- QoS info updated after ice_dcb_rebuild() completes
- Device plugged only when control queues, VSI, and DCB are all ready

Fixes: bc69ad7486 ("ice: avoid IRQ collision to fix init failure on ACPI S3 resume")
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Aaron Ma <aaron.ma@canonical.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-02-25 11:43:57 -08:00
Sreedevi Joshi
2c31557336 idpf: Fix flow rule delete failure due to invalid validation
When deleting a flow rule using "ethtool -N <dev> delete <location>",
idpf_sideband_action_ena() incorrectly validates fsp->ring_cookie even
though ethtool doesn't populate this field for delete operations. The
uninitialized ring_cookie may randomly match RX_CLS_FLOW_DISC or
RX_CLS_FLOW_WAKE, causing validation to fail and preventing legitimate
rule deletions. Remove the unnecessary sideband action enable check and
ring_cookie validation during delete operations since action validation
is not required when removing existing rules.

Fixes: ada3e24b84 ("idpf: add flow steering support")
Signed-off-by: Sreedevi Joshi <sreedevi.joshi@intel.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-02-25 11:43:57 -08:00
Brian Vazquez
1500a8662d idpf: change IRQ naming to match netdev and ethtool queue numbering
The code uses the vidx for the IRQ name but that doesn't match ethtool
reporting nor netdev naming, this makes it hard to tune the device and
associate queues with IRQs. Sequentially requesting irqs starting from
'0' makes the output consistent.

This commit changes the interrupt numbering but preserves the name
format, maintaining ABI compatibility. Existing tools relying on the old
numbering are already non-functional, as they lack a useful correlation
to the interrupts.

Before:

ethtool -L eth1 tx 1 combined 3

grep . /proc/irq/*/*idpf*/../smp_affinity_list
/proc/irq/67/idpf-Mailbox-0/../smp_affinity_list:0-55,112-167
/proc/irq/68/idpf-eth1-TxRx-1/../smp_affinity_list:0
/proc/irq/70/idpf-eth1-TxRx-3/../smp_affinity_list:1
/proc/irq/71/idpf-eth1-TxRx-4/../smp_affinity_list:2
/proc/irq/72/idpf-eth1-Tx-5/../smp_affinity_list:3

ethtool -S eth1 | grep -v ': 0'
NIC statistics:
     tx_q-0_pkts: 1002
     tx_q-1_pkts: 2679
     tx_q-2_pkts: 1113
     tx_q-3_pkts: 1192 <----- tx_q-3 vs idpf-eth1-Tx-5
     rx_q-0_pkts: 1143
     rx_q-1_pkts: 3172
     rx_q-2_pkts: 1074

After:

ethtool -L eth1 tx 1 combined 3

grep . /proc/irq/*/*idpf*/../smp_affinity_list

/proc/irq/67/idpf-Mailbox-0/../smp_affinity_list:0-55,112-167
/proc/irq/68/idpf-eth1-TxRx-0/../smp_affinity_list:0
/proc/irq/70/idpf-eth1-TxRx-1/../smp_affinity_list:1
/proc/irq/71/idpf-eth1-TxRx-2/../smp_affinity_list:2
/proc/irq/72/idpf-eth1-Tx-3/../smp_affinity_list:3

ethtool -S eth1 | grep -v ': 0'
NIC statistics:
     tx_q-0_pkts: 118
     tx_q-1_pkts: 134
     tx_q-2_pkts: 228
     tx_q-3_pkts: 138 <--- tx_q-3 matches idpf-eth1-Tx-3
     rx_q-0_pkts: 111
     rx_q-1_pkts: 366
     rx_q-2_pkts: 120

Fixes: d4d5587182 ("idpf: initialize interrupts and enable vport")
Signed-off-by: Brian Vazquez <brianvv@google.com>
Reviewed-by: Brett Creeley <brett.creeley@amd.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Tested-by: Samuel Salin <Samuel.salin@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-02-25 11:43:57 -08:00
Li Li
bc3b319773 idpf: nullify pointers after they are freed
rss_data->rss_key needs to be nullified after it is freed.
Checks like "if (!rss_data->rss_key)" in the code could fail
if it is not nullified.

Tested: built and booted the kernel.

Fixes: 83f38f210b ("idpf: Fix RSS LUT NULL pointer crash on early ethtool operations")
Signed-off-by: Li Li <boolli@google.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Tested-by: Samuel Salin <Samuel.salin@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-02-25 11:43:57 -08:00
Li Li
e403bf4013 idpf: skip deallocating txq group's txqs if it is NULL
In idpf_txq_group_alloc(), if any txq group's txqs failed to
allocate memory:

	for (j = 0; j < tx_qgrp->num_txq; j++) {
		tx_qgrp->txqs[j] = kzalloc(sizeof(*tx_qgrp->txqs[j]),
					   GFP_KERNEL);
		if (!tx_qgrp->txqs[j])
			goto err_alloc;
	}

It would cause a NULL ptr kernel panic in idpf_txq_group_rel():

	for (j = 0; j < txq_grp->num_txq; j++) {
		if (flow_sch_en) {
			kfree(txq_grp->txqs[j]->refillq);
			txq_grp->txqs[j]->refillq = NULL;
		}

		kfree(txq_grp->txqs[j]);
		txq_grp->txqs[j] = NULL;
	}

[    6.532461] BUG: kernel NULL pointer dereference, address: 0000000000000058
...
[    6.534433] RIP: 0010:idpf_txq_group_rel+0xc9/0x110
...
[    6.538513] Call Trace:
[    6.538639]  <TASK>
[    6.538760]  idpf_vport_queues_alloc+0x75/0x550
[    6.538978]  idpf_vport_open+0x4d/0x3f0
[    6.539164]  idpf_open+0x71/0xb0
[    6.539324]  __dev_open+0x142/0x260
[    6.539506]  netif_open+0x2f/0xe0
[    6.539670]  dev_open+0x3d/0x70
[    6.539827]  bond_enslave+0x5ed/0xf50
[    6.540005]  ? rcutree_enqueue+0x1f/0xb0
[    6.540193]  ? call_rcu+0xde/0x2a0
[    6.540375]  ? barn_get_empty_sheaf+0x5c/0x80
[    6.540594]  ? __kfree_rcu_sheaf+0xb6/0x1a0
[    6.540793]  ? nla_put_ifalias+0x3d/0x90
[    6.540981]  ? kvfree_call_rcu+0xb5/0x3b0
[    6.541173]  ? kvfree_call_rcu+0xb5/0x3b0
[    6.541365]  do_set_master+0x114/0x160
[    6.541547]  do_setlink+0x412/0xfb0
[    6.541717]  ? security_sock_rcv_skb+0x2a/0x50
[    6.541931]  ? sk_filter_trim_cap+0x7c/0x320
[    6.542136]  ? skb_queue_tail+0x20/0x50
[    6.542322]  ? __nla_validate_parse+0x92/0xe50
ro[o t   t o6 .d5e4f2a5u4l0t]-  ? security_capable+0x35/0x60
[    6.542792]  rtnl_newlink+0x95c/0xa00
[    6.542972]  ? __rtnl_unlock+0x37/0x70
[    6.543152]  ? netdev_run_todo+0x63/0x530
[    6.543343]  ? allocate_slab+0x280/0x870
[    6.543531]  ? security_capable+0x35/0x60
[    6.543722]  rtnetlink_rcv_msg+0x2e6/0x340
[    6.543918]  ? __pfx_rtnetlink_rcv_msg+0x10/0x10
[    6.544138]  netlink_rcv_skb+0x16a/0x1a0
[    6.544328]  netlink_unicast+0x20a/0x320
[    6.544516]  netlink_sendmsg+0x304/0x3b0
[    6.544748]  __sock_sendmsg+0x89/0xb0
[    6.544928]  ____sys_sendmsg+0x167/0x1c0
[    6.545116]  ? ____sys_recvmsg+0xed/0x150
[    6.545308]  ___sys_sendmsg+0xdd/0x120
[    6.545489]  ? ___sys_recvmsg+0x124/0x1e0
[    6.545680]  ? rcutree_enqueue+0x1f/0xb0
[    6.545867]  ? rcutree_enqueue+0x1f/0xb0
[    6.546055]  ? call_rcu+0xde/0x2a0
[    6.546222]  ? evict+0x286/0x2d0
[    6.546389]  ? rcutree_enqueue+0x1f/0xb0
[    6.546577]  ? kmem_cache_free+0x2c/0x350
[    6.546784]  __x64_sys_sendmsg+0x72/0xc0
[    6.546972]  do_syscall_64+0x6f/0x890
[    6.547150]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
[    6.547393] RIP: 0033:0x7fc1a3347bd0
...
[    6.551375] RIP: 0010:idpf_txq_group_rel+0xc9/0x110
...
[    6.578856] Rebooting in 10 seconds..

We should skip deallocating txqs[j] if it is NULL in the first place.

Tested: with this patch, the kernel panic no longer appears.

Fixes: 1c325aac10 ("idpf: configure resources for TX queues")
Signed-off-by: Li Li <boolli@google.com>
Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Tested-by: Samuel Salin <Samuel.salin@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-02-25 11:43:56 -08:00
Li Li
d11e5da2d6 idpf: skip deallocating bufq_sets from rx_qgrp if it is NULL
In idpf_rxq_group_alloc(), if rx_qgrp->splitq.bufq_sets failed to get
allocated:

	rx_qgrp->splitq.bufq_sets = kcalloc(vport->num_bufqs_per_qgrp,
					    sizeof(struct idpf_bufq_set),
					    GFP_KERNEL);
	if (!rx_qgrp->splitq.bufq_sets) {
		err = -ENOMEM;
		goto err_alloc;
	}

idpf_rxq_group_rel() would attempt to deallocate it in
idpf_rxq_sw_queue_rel(), causing a kernel panic:

```
[    7.967242] early-network-sshd-n-rexd[3148]: knetbase: Info: [    8.127804] BUG: kernel NULL pointer dereference, address: 00000000000000c0
...
[    8.129779] RIP: 0010:idpf_rxq_group_rel+0x101/0x170
...
[    8.133854] Call Trace:
[    8.133980]  <TASK>
[    8.134092]  idpf_vport_queues_alloc+0x286/0x500
[    8.134313]  idpf_vport_open+0x4d/0x3f0
[    8.134498]  idpf_open+0x71/0xb0
[    8.134668]  __dev_open+0x142/0x260
[    8.134840]  netif_open+0x2f/0xe0
[    8.135004]  dev_open+0x3d/0x70
[    8.135166]  bond_enslave+0x5ed/0xf50
[    8.135345]  ? nla_put_ifalias+0x3d/0x90
[    8.135533]  ? kvfree_call_rcu+0xb5/0x3b0
[    8.135725]  ? kvfree_call_rcu+0xb5/0x3b0
[    8.135916]  do_set_master+0x114/0x160
[    8.136098]  do_setlink+0x412/0xfb0
[    8.136269]  ? security_sock_rcv_skb+0x2a/0x50
[    8.136509]  ? sk_filter_trim_cap+0x7c/0x320
[    8.136714]  ? skb_queue_tail+0x20/0x50
[    8.136899]  ? __nla_validate_parse+0x92/0xe50
[    8.137112]  ? security_capable+0x35/0x60
[    8.137304]  rtnl_newlink+0x95c/0xa00
[    8.137483]  ? __rtnl_unlock+0x37/0x70
[    8.137664]  ? netdev_run_todo+0x63/0x530
[    8.137855]  ? allocate_slab+0x280/0x870
[    8.138044]  ? security_capable+0x35/0x60
[    8.138235]  rtnetlink_rcv_msg+0x2e6/0x340
[    8.138431]  ? __pfx_rtnetlink_rcv_msg+0x10/0x10
[    8.138650]  netlink_rcv_skb+0x16a/0x1a0
[    8.138840]  netlink_unicast+0x20a/0x320
[    8.139028]  netlink_sendmsg+0x304/0x3b0
[    8.139217]  __sock_sendmsg+0x89/0xb0
[    8.139399]  ____sys_sendmsg+0x167/0x1c0
[    8.139588]  ? ____sys_recvmsg+0xed/0x150
[    8.139780]  ___sys_sendmsg+0xdd/0x120
[    8.139960]  ? ___sys_recvmsg+0x124/0x1e0
[    8.140152]  ? rcutree_enqueue+0x1f/0xb0
[    8.140341]  ? rcutree_enqueue+0x1f/0xb0
[    8.140528]  ? call_rcu+0xde/0x2a0
[    8.140695]  ? evict+0x286/0x2d0
[    8.140856]  ? rcutree_enqueue+0x1f/0xb0
[    8.141043]  ? kmem_cache_free+0x2c/0x350
[    8.141236]  __x64_sys_sendmsg+0x72/0xc0
[    8.141424]  do_syscall_64+0x6f/0x890
[    8.141603]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
[    8.141841] RIP: 0033:0x7f2799d21bd0
...
[    8.149905] Kernel panic - not syncing: Fatal exception
[    8.175940] Kernel Offset: 0xf800000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff)
[    8.176425] Rebooting in 10 seconds..
```

Tested: With this patch, the kernel panic no longer appears.

Fixes: 95af467d9a ("idpf: configure resources for RX queues")
Signed-off-by: Li Li <boolli@google.com>
Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Tested-by: Samuel Salin <Samuel.salin@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-02-25 11:43:56 -08:00
Li Li
712896ac4b idpf: increment completion queue next_to_clean in sw marker wait routine
Currently, in idpf_wait_for_sw_marker_completion(), when an
IDPF_TXD_COMPLT_SW_MARKER packet is found, the routine breaks out of
the for loop and does not increment the next_to_clean counter. This
causes the subsequent NAPI polls to run into the same
IDPF_TXD_COMPLT_SW_MARKER packet again and print out the following:

    [   23.261341] idpf 0000:05:00.0 eth1: Unknown TX completion type: 5

Instead, we should increment next_to_clean regardless when an
IDPF_TXD_COMPLT_SW_MARKER packet is found.

Tested: with the patch applied, we do not see the errors above from NAPI
polls anymore.

Fixes: 9d39447051 ("idpf: remove SW marker handling from NAPI")
Signed-off-by: Li Li <boolli@google.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2026-02-25 11:43:56 -08:00
Anirudh Rayabharam (Microsoft)
622d68772d mshv: add arm64 support for doorbell & intercept SINTs
On x86, the HYPERVISOR_CALLBACK_VECTOR is used to receive synthetic
interrupts (SINTs) from the hypervisor for doorbells and intercepts.
There is no such vector reserved for arm64.

On arm64, the hypervisor exposes a synthetic register that can be read
to find the INTID that should be used for SINTs. This INTID is in the
PPI range.

To better unify the code paths, introduce mshv_sint_vector_init() that
either reads the synthetic register and obtains the INTID (arm64) or
just uses HYPERVISOR_CALLBACK_VECTOR as the interrupt vector (x86).

Reviewed-by: Michael Kelley <mhklinux@outlook.com>
Reviewed-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
Signed-off-by: Anirudh Rayabharam (Microsoft) <anirudh@anirudhrb.com>
Signed-off-by: Wei Liu <wei.liu@kernel.org>
2026-02-25 19:09:49 +00:00
Anirudh Rayabharam (Microsoft)
5a674ef871 mshv: refactor synic init and cleanup
Rename mshv_synic_init() to mshv_synic_cpu_init() and
mshv_synic_cleanup() to mshv_synic_cpu_exit() to better reflect that
these functions handle per-cpu synic setup and teardown.

Use mshv_synic_init/cleanup() to perform init/cleanup that is not per-cpu.
Move all the synic related setup from mshv_parent_partition_init.

Move the reboot notifier to mshv_synic.c because it currently only
operates on the synic cpuhp state.

Move out synic_pages from the global mshv_root since its use is now
completely local to mshv_synic.c.

This is in preparation for adding more stuff to mshv_synic_init().

No functional change.

Reviewed-by: Michael Kelley <mhklinux@outlook.com>
Signed-off-by: Anirudh Rayabharam (Microsoft) <anirudh@anirudhrb.com>
Signed-off-by: Wei Liu <wei.liu@kernel.org>
2026-02-25 19:09:24 +00:00
Linus Torvalds
d9d32e5bd5 Merge tag 'ata-7.0-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/libata/linux
Pull ata fixes from Niklas Cassel:

 - The newly introduced feature that issues a deferred (non-NCQ) command
   from a workqueue, forgot to consider the case where the deferred QC
   times out. Fix the code to take timeouts into consideration, which
   avoids a use after free (Damien)

 - The newly introduced feature that issues a deferred (non-NCQ) command
   from a workqueue, when unloading the module, calls cancel_work_sync(),
   a function that can sleep, while holding a spin lock. Move the function
   call outside the lock (Damien)

* tag 'ata-7.0-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/libata/linux:
  ata: libata-core: fix cancellation of a port deferred qc work
  ata: libata-eh: correctly handle deferred qc timeouts
2026-02-25 10:41:14 -08:00
Linus Torvalds
0e335a7745 Merge tag 'vfs-7.0-rc2.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull vfs fixes from Christian Brauner:

 - Fix an uninitialized variable in file_getattr().

   The flags_valid field wasn't initialized before calling
   vfs_fileattr_get(), triggering KMSAN uninit-value reports in fuse

 - Fix writeback wakeup and logging timeouts when DETECT_HUNG_TASK is
   not enabled.

   sysctl_hung_task_timeout_secs is 0 in that case causing spurious
   "waiting for writeback completion for more than 1 seconds" warnings

 - Fix a null-ptr-deref in do_statmount() when the mount is internal

 - Add missing kernel-doc description for the @private parameter in
   iomap_readahead()

 - Fix mount namespace creation to hold namespace_sem across the mount
   copy in create_new_namespace().

   The previous drop-and-reacquire pattern was fragile and failed to
   clean up mount propagation links if the real rootfs was a shared or
   dependent mount

 - Fix /proc mount iteration where m->index wasn't updated when
   m->show() overflows, causing a restart to repeatedly show the same
   mount entry in a rapidly expanding mount table

 - Return EFSCORRUPTED instead of ENOSPC in minix_new_inode() when the
   inode number is out of range

 - Fix unshare(2) when CLONE_NEWNS is set and current->fs isn't shared.

   copy_mnt_ns() received the live fs_struct so if a subsequent
   namespace creation failed the rollback would leave pwd and root
   pointing to detached mounts. Always allocate a new fs_struct when
   CLONE_NEWNS is requested

 - fserror bug fixes:

    - Remove the unused fsnotify_sb_error() helper now that all callers
      have been converted to fserror_report_metadata

    - Fix a lockdep splat in fserror_report() where igrab() takes
      inode::i_lock which can be held in IRQ context.

      Replace igrab() with a direct i_count bump since filesystems
      should not report inodes that are about to be freed or not yet
      exposed

 - Handle error pointer in procfs for try_lookup_noperm()

 - Fix an integer overflow in ep_loop_check_proc() where recursive calls
   returning INT_MAX would overflow when +1 is added, breaking the
   recursion depth check

 - Fix a misleading break in pidfs

* tag 'vfs-7.0-rc2.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  pidfs: avoid misleading break
  eventpoll: Fix integer overflow in ep_loop_check_proc()
  proc: Fix pointer error dereference
  fserror: fix lockdep complaint when igrabbing inode
  fsnotify: drop unused helper
  unshare: fix unshare_fs() handling
  minix: Correct errno in minix_new_inode
  namespace: fix proc mount iteration
  mount: hold namespace_sem across copy in create_new_namespace()
  iomap: Describe @private in iomap_readahead()
  statmount: Fix the null-ptr-deref in do_statmount()
  writeback: Fix wakeup and logging timeouts for !DETECT_HUNG_TASK
  fs: init flags_valid before calling vfs_fileattr_get
2026-02-25 10:34:23 -08:00
Ziyi Guo
1be3b77de4 usb: image: mdc800: kill download URB on timeout
mdc800_device_read() submits download_urb and waits for completion.
If the timeout fires and the device has not responded, the function
returns without killing the URB, leaving it active.

A subsequent read() resubmits the same URB while it is still
in-flight, triggering the WARN in usb_submit_urb():

  "URB submitted while active"

Check the return value of wait_event_timeout() and kill the URB if
it indicates timeout, ensuring the URB is complete before its status
is inspected or the URB is resubmitted.

Similar to
- commit 372c931319 ("USB: yurex: fix control-URB timeout handling")
- commit b98d5000c5 ("media: rc: iguanair: handle timeouts")

Signed-off-by: Ziyi Guo <n7l8m4@u.northwestern.edu>
Cc: stable <stable@kernel.org>
Link: https://patch.msgid.link/20260209151937.2247202-1-n7l8m4@u.northwestern.edu
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-02-25 10:27:01 -08:00
Oliver Neukum
2d6d260e9a usb: mdc800: handle signal and read racing
If a signal arrives after a read has partially completed,
we need to return the number of bytes read. -EINTR is correct
only if that number is zero.

Signed-off-by: Oliver Neukum <oneukum@suse.com>
Cc: stable <stable@kernel.org>
Link: https://patch.msgid.link/20260209142048.1503791-1-oneukum@suse.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-02-25 10:26:46 -08:00
Chen Ridong
085f067389 cgroup/cpuset: fix null-ptr-deref in rebuild_sched_domains_cpuslocked
A null-pointer-dereference bug was reported by syzbot:

Oops: general protection fault, probably for address 0xdffffc0000000000:
KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007]
RIP: 0010:bitmap_subset include/linux/bitmap.h:433 [inline]
RIP: 0010:cpumask_subset include/linux/cpumask.h:836 [inline]
RIP: 0010:rebuild_sched_domains_locked kernel/cgroup/cpuset.c:967
RSP: 0018:ffffc90003ecfbc0 EFLAGS: 00010246
RAX: 0000000000000000 RBX: 0000000000000001 RCX: 0000000000000020
RDX: ffff888028de0000 RSI: ffffffff8200f003 RDI: ffffffff8df14f28
RBP: 0000000000000000 R08: 0000000000000cc0 R09: 00000000ffffffff
R10: ffffffff8e7d95b3 R11: 0000000000000001 R12: 0000000000000000
R13: 00000000000f4240 R14: dffffc0000000000 R15: 0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000001b2f463fff CR3: 000000003704c000 CR4: 00000000003526f0
Call Trace:
 <TASK>
 rebuild_sched_domains_cpuslocked kernel/cgroup/cpuset.c:983 [inline]
 rebuild_sched_domains+0x21/0x40 kernel/cgroup/cpuset.c:990
 sched_rt_handler+0xb5/0xe0 kernel/sched/rt.c:2911
 proc_sys_call_handler+0x47f/0x5a0 fs/proc/proc_sysctl.c:600
 new_sync_write fs/read_write.c:595 [inline]
 vfs_write+0x6ac/0x1070 fs/read_write.c:688
 ksys_write+0x12a/0x250 fs/read_write.c:740
 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
 do_syscall_64+0x106/0xf80 arch/x86/entry/syscall_64.c:94
 entry_SYSCALL_64_after_hwframe+0x77/0x7f

The issue occurs when generate_sched_domains() returns ndoms = 1 and
doms = NULL due to a kmalloc failure. This leads to a null-pointer
dereference when accessing doms in rebuild_sched_domains_locked().

Fix this by adding a NULL check for doms before accessing it.

Fixes: 6ee43047e8 ("cpuset: Remove unnecessary checks in rebuild_sched_domains_locked")
Reported-by: syzbot+460792609a79c085f79f@syzkaller.appspotmail.com
Acked-by: Waiman Long <longman@redhat.com>
Signed-off-by: Chen Ridong <chenridong@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-02-25 07:39:04 -10:00
Daniel Lezcano
3ea20672d2 MAINTAINERS: Update contact with the kernel.org address
Use the kernel.org address as a unified single entry to send patches
to. At the same time, update mailmap to group all past contributions.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
2026-02-25 18:03:19 +01:00
Chen Ni
0410e1a4c5 mtd: rawnand: cadence: Fix error check for dma_alloc_coherent() in cadence_nand_init()
Fix wrong variable used for error checking after dma_alloc_coherent()
call. The function checks cdns_ctrl->dma_cdma_desc instead of
cdns_ctrl->cdma_desc, which could lead to incorrect error handling.

Fixes: ec4ba01e89 ("mtd: rawnand: Add new Cadence NAND driver to MTD subsystem")
Cc: stable@vger.kernel.org
Signed-off-by: Chen Ni <nichen@iscas.ac.cn>
Reviewed-by: Alok Tiwari <alok.a.tiwari@oracle.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
2026-02-25 17:48:52 +01:00
Finn Thain
8e2f802027 mtd: Avoid boot crash in RedBoot partition table parser
Given CONFIG_FORTIFY_SOURCE=y and a recent compiler,
commit 439a1bcac6 ("fortify: Use __builtin_dynamic_object_size() when
available") produces the warning below and an oops.

    Searching for RedBoot partition table in 50000000.flash at offset 0x7e0000
    ------------[ cut here ]------------
    WARNING: lib/string_helpers.c:1035 at 0xc029e04c, CPU#0: swapper/0/1
    memcmp: detected buffer overflow: 15 byte read of buffer size 14
    Modules linked in:
    CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.19.0 #1 NONE

As Kees said, "'names' is pointing to the final 'namelen' many bytes
of the allocation ... 'namelen' could be basically any length at all.
This fortify warning looks legit to me -- this code used to be reading
beyond the end of the allocation."

Since the size of the dynamic allocation is calculated with strlen()
we can use strcmp() instead of memcmp() and remain within bounds.

Cc: Kees Cook <kees@kernel.org>
Cc: stable@vger.kernel.org
Cc: linux-hardening@vger.kernel.org
Link: https://lore.kernel.org/all/202602151911.AD092DFFCD@keescook/
Fixes: 1da177e4c3 ("Linux-2.6.12-rc2")
Suggested-by: Kees Cook <kees@kernel.org>
Signed-off-by: Finn Thain <fthain@linux-m68k.org>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
2026-02-25 17:47:30 +01:00
Tudor Ambarus
82d938d526 MAINTAINERS: add Takahiro Kuwano as SPI NOR reviewer
Takahiro has been an active contributor to the SPI NOR subsystem,
providing valuable patches and reviews. Add him as a designated
reviewer to help facilitate patch processing and maintenance.

Cc: Takahiro Kuwano <takahiro.kuwano@infineon.com>
Signed-off-by: Tudor Ambarus <tudor.ambarus@linaro.org>
Acked-by: Takahiro Kuwano <takahiro.kuwano@infineon.com>
Acked-by: Michael Walle <mwalle@kernel.org>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
2026-02-25 17:43:09 +01:00
Tudor Ambarus
2b476739f9 MAINTAINERS: remove Tudor Ambarus as SPI NOR maintainer
I have not been actively involved in SPI NOR development recently and
would like to step down to focus on my current day-to-day work.

The subsystem remains in good hands with Pratyush and Michael.

Signed-off-by: Tudor Ambarus <tudor.ambarus@linaro.org>
Acked-by: Michael Walle <mwalle@kernel.org>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
2026-02-25 17:43:09 +01:00
Alexander Gordeev
d879ac6756 s390/pfault: Fix virtual vs physical address confusion
When Linux is running as guest, runs a user space process and the
user space process accesses a page that the host has paged out,
the guest gets a pfault interrupt and schedules a different process.
Without this mechanism the host would have to suspend the whole
virtual CPU until the page has been paged in.

To setup the pfault interrupt the real address of parameter list
should be passed to DIAGNOSE 0x258, but a virtual address is passed
instead.

That has a performance impact, since the pfault setup never succeeds,
the interrupt is never delivered to a guest and the whole virtual CPU
is suspended as result.

Cc: stable@vger.kernel.org
Fixes: c98d2ecae0 ("s390/mm: Uncouple physical vs virtual address spaces")
Reported-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
2026-02-25 17:00:25 +01:00
Vasily Gorbik
1623a554c6 s390/kexec: Disable stack protector in s390_reset_system()
s390_reset_system() calls set_prefix(0), which switches back to the
absolute lowcore. At that point the stack protector canary no longer
matches the canary from the lowcore the function was entered with, so
the stack check fails.

Mark s390_reset_system() __no_stack_protector. This is safe here since
its callers (__do_machine_kdump() and __do_machine_kexec()) are
effectively no-return and fall back to disabled_wait() on failure.

Fixes: f5730d44e0 ("s390: Add stackprotector support")
Reported-by: Nikita Dubrovskii <nikita@linux.ibm.com>
Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
Acked-by: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
2026-02-25 17:00:25 +01:00
Vasily Gorbik
05c58a6605 Merge branch 'idle-vtime-fixes-cleanups' into fixes
Pull a small idle/vtime series with two cputime accounting fixes plus
related cleanups and minor improvements.

* idle-vtime-fixes-cleanups:
  s390/idle: Remove psw_idle() prototype
  s390/vtime: Use lockdep_assert_irqs_disabled() instead of BUG_ON()
  s390/vtime: Use __this_cpu_read() / get rid of READ_ONCE()
  s390/irq/idle: Remove psw bits early
  s390/idle: Inline update_timer_idle()
  s390/idle: Slightly optimize idle time accounting
  s390/idle: Add comment for non obvious code
  s390/vtime: Fix virtual timer forwarding
  s390/idle: Fix cpu idle exit cpu time accounting

Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
2026-02-25 16:59:16 +01:00
Heiko Carstens
acbc0437ca s390/idle: Remove psw_idle() prototype
psw_idle() does not exist anymore. Remove its prototype.

Reviewed-by: Sven Schnelle <svens@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
2026-02-25 16:46:07 +01:00
Heiko Carstens
80f63fd09e s390/vtime: Use lockdep_assert_irqs_disabled() instead of BUG_ON()
Use lockdep_assert_irqs_disabled() instead of BUG_ON(). This avoids
crashing the kernel, and generates better code if CONFIG_PROVE_LOCKING
is disabled.

Reviewed-by: Sven Schnelle <svens@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
2026-02-25 16:46:07 +01:00
Heiko Carstens
e282ccd308 s390/vtime: Use __this_cpu_read() / get rid of READ_ONCE()
do_account_vtime() runs always with interrupts disabled, therefore use
__this_cpu_read() instead of this_cpu_read() to get rid of a pointless
preempt_disable() / preempt_enable() pair.

Also there are no concurrent writers to the cpu time accounting fields
in lowcore. Therefore get rid of READ_ONCE() usages.

Reviewed-by: Sven Schnelle <svens@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
2026-02-25 16:46:07 +01:00
Heiko Carstens
d8b5cf9c63 s390/irq/idle: Remove psw bits early
Remove wait, io, external interrupt bits early in do_io_irq()/do_ext_irq()
when previous context was idle. This saves one conditional branch and is
closer to the original old assembly code.

Reviewed-by: Sven Schnelle <svens@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
2026-02-25 16:46:07 +01:00
Heiko Carstens
257c14e5a1 s390/idle: Inline update_timer_idle()
Inline update_timer_idle() again to avoid an extra function call. This
way the generated code is close to old assembler version again.

Reviewed-by: Sven Schnelle <svens@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
2026-02-25 16:46:07 +01:00
Heiko Carstens
00d8b035eb s390/idle: Slightly optimize idle time accounting
Slightly optimize account_idle_time_irq() and update_timer_idle():

- Use fast single instruction __atomic64() primitives to update per
  cpu idle_time and idle_count, instead of READ_ONCE() / WRITE_ONCE()
  pairs

- stcctm() is an inline assembly with a full memory barrier. This
  leads to a not necessary extra dereference of smp_cpu_mtid in
  update_timer_idle(). Avoid this and read smp_cpu_mtid into a
  variable

- Use __this_cpu_add() instead of this_cpu_add() to avoid disabling /
  enabling of preemption several times in a loop in update_timer_idle().

Reviewed-by: Sven Schnelle <svens@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
2026-02-25 16:46:07 +01:00
Heiko Carstens
aefa6ec890 s390/idle: Add comment for non obvious code
Add a comment to update_timer_idle() which describes why wall time (not
steal time) is added to steal_timer. This is not obvious and was reported
by Frederic Weisbecker.

Reported-by: Frederic Weisbecker <frederic@kernel.org>
Closes: https://lore.kernel.org/all/aXEVM-04lj0lntMr@localhost.localdomain/
Reviewed-by: Sven Schnelle <svens@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
2026-02-25 16:46:07 +01:00
Heiko Carstens
dbc0fb3567 s390/vtime: Fix virtual timer forwarding
Since delayed accounting of system time [1] the virtual timer is
forwarded by do_account_vtime() but also vtime_account_kernel(),
vtime_account_softirq(), and vtime_account_hardirq(). This leads
to double accounting of system, guest, softirq, and hardirq time.

Remove accounting from the vtime_account*() family to restore old behavior.

There is only one user of the vtimer interface, which might explain
why nobody noticed this so far.

Fixes: b7394a5f4c ("sched/cputime, s390: Implement delayed accounting of system time") [1]
Reviewed-by: Sven Schnelle <svens@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
2026-02-25 16:46:07 +01:00
Heiko Carstens
0d785e2c32 s390/idle: Fix cpu idle exit cpu time accounting
With the conversion to generic entry [1] cpu idle exit cpu time accounting
was converted from assembly to C. This introduced an reversed order of cpu
time accounting.

On cpu idle exit the current accounting happens with the following call
chain:

-> do_io_irq()/do_ext_irq()
 -> irq_enter_rcu()
  -> account_hardirq_enter()
   -> vtime_account_irq()
    -> vtime_account_kernel()

vtime_account_kernel() accounts the passed cpu time since last_update_timer
as system time, and updates last_update_timer to the current cpu timer
value.

However the subsequent call of

 -> account_idle_time_irq()

will incorrectly subtract passed cpu time from timer_idle_enter to the
updated last_update_timer value from system_timer. Then last_update_timer
is updated to a sys_enter_timer, which means that last_update_timer goes
back in time.

Subsequently account_hardirq_exit() will account too much cpu time as
hardirq time. The sum of all accounted cpu times is still correct, however
some cpu time which was previously accounted as system time is now
accounted as hardirq time, plus there is the oddity that last_update_timer
goes back in time.

Restore previous behavior by extracting cpu time accounting code from
account_idle_time_irq() into a new update_timer_idle() function and call it
before irq_enter_rcu().

Fixes: 56e62a7370 ("s390: convert to generic entry") [1]
Reviewed-by: Sven Schnelle <svens@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
2026-02-25 16:46:07 +01:00
Pavel Begunkov
85f6c439a6 io_uring/timeout: READ_ONCE sqe->addr
We should use READ_ONCE when reading from a SQE, make sure timeout gets
a stable timespec address.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2026-02-25 08:36:05 -07:00
Panagiotis Foliadis
e9fb2028f1 ALSA: hda/intel: increase default bdl_pos_adj for Nvidia controllers
The default bdl_pos_adj of 32 for Nvidia HDA controllers is
insufficient on GA102 (and likely other recent Nvidia GPUs) after S3
suspend/resume. The controller's DMA timing degrades after resume,
causing premature IRQ detection in azx_position_ok() which results in
silent HDMI/DP audio output despite userspace reporting a valid
playback state and correct ELD data.

Increase bdl_pos_adj to 64 for AZX_DRIVER_NVIDIA, matching the value
already used by Intel Apollo Lake for the same class of timing issue.

Cc: stable@vger.kernel.org
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=221069
Suggested-by: Charalampos Mitrodimas <charmitro@posteo.net>
Signed-off-by: Panagiotis Foliadis <pfoliadis@posteo.net>
Link: https://patch.msgid.link/20260225-nvidia-audio-fix-v1-1-b1383c37ec49@posteo.net
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-02-25 15:56:57 +01:00
Nathan Chancellor
ab6088e7a9 lib/Kconfig.debug: Require a release version of LLVM 22 for context analysis
Using a prerelease version as a minimum supported version for
CONFIG_WARN_CONTEXT_ANALYSIS was reasonable to do while LLVM 22 was the
development version so that people could immediately build from main and
start testing and validating this in their own code. However, it can be
problematic when using prerelease versions of LLVM 22, such as Android
clang 22.0.1 (the current android mainline compiler) or when bisecting
LLVM between llvmorg-22-init and llvmorg-23-init, to build the kernel,
as all compiler fixes for the context analysis may not be present,
potentially resulting in warnings that can easily turn into errors.

Now that LLVM 22 is released as 22.1.0, upgrade the check to require at
least this version to ensure that a user's toolchain actually has all
the changes needed for a smooth experience with context analysis.

Fixes: 3269701cb2 ("compiler-context-analysis: Add infrastructure for Context Analysis with Clang")
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Marco Elver <elver@google.com>
Link: https://patch.msgid.link/20260224-bump-clang-ver-context-analysis-v1-1-16cc7a90a040@kernel.org
2026-02-25 15:36:02 +01:00
Peter Zijlstra
c9bc1753b3 perf: Fix __perf_event_overflow() vs perf_remove_from_context() race
Make sure that __perf_event_overflow() runs with IRQs disabled for all
possible callchains. Specifically the software events can end up running
it with only preemption disabled.

This opens up a race vs perf_event_exit_event() and friends that will go
and free various things the overflow path expects to be present, like
the BPF program.

Fixes: 592903cdcb ("perf_counter: add an event_list")
Reported-by: Simond Hu <cmdhh1767@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Simond Hu <cmdhh1767@gmail.com>
Link: https://patch.msgid.link/20260224122909.GV1395416@noisy.programming.kicks-ass.net
2026-02-25 15:02:34 +01:00
Srinivas Pandruvada
6b050482ec cpufreq: intel_pstate: Fix crash during turbo disable
When the system is booted with kernel command line argument "nosmt" or
"maxcpus" to limit the number of CPUs, disabling turbo via:

 echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo

results in a crash:

 PF: supervisor read access in kernel mode
 PF: error_code(0x0000) - not-present page
 PGD 0 P4D 0
 Oops: Oops: 0000 [#1] SMP PTI
 ...
 RIP: 0010:store_no_turbo+0x100/0x1f0
 ...

This occurs because for_each_possible_cpu() returns CPUs even if they
are not online. For those CPUs, all_cpu_data[] will be NULL. Since
commit 973207ae3d ("cpufreq: intel_pstate: Rearrange max frequency
updates handling code"), all_cpu_data[] is dereferenced even for CPUs
which are not online, causing the NULL pointer dereference.

To fix that, pass CPU number to intel_pstate_update_max_freq() and use
all_cpu_data[] for those CPUs for which there is a valid cpufreq policy.

Fixes: 973207ae3d ("cpufreq: intel_pstate: Rearrange max frequency updates handling code")
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=221068
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: 6.16+ <stable@vger.kernel.org> # 6.16+
Link: https://patch.msgid.link/20260225001752.890164-1-srinivas.pandruvada@linux.intel.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2026-02-25 14:39:19 +01:00
Wilfred Mallawa
650b774cf9 xfs: add static size checks for ioctl UABI
The ioctl structures in libxfs/xfs_fs.h are missing static size checks.
It is useful to have static size checks for these structures as adding
new fields to them could cause issues (e.g. extra padding that may be
inserted by the compiler). So add these checks to xfs/xfs_ondisk.h.

Due to different padding/alignment requirements across different
architectures, to avoid build failures, some structures are ommited from
the size checks. For example, structures with "compat_" definitions in
xfs/xfs_ioctl32.h are ommited.

Signed-off-by: Wilfred Mallawa <wilfred.mallawa@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-02-25 13:58:50 +01:00
Wilfred Mallawa
e97cbf863d xfs: remove duplicate static size checks
In libxfs/xfs_ondisk.h, remove some duplicate entries of
XFS_CHECK_STRUCT_SIZE().

Signed-off-by: Wilfred Mallawa <wilfred.mallawa@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-02-25 13:58:49 +01:00
Nirjhar Roy (IBM)
9a654a8fa3 xfs: Add comments for usages of some macros.
Add comments explaining when to use XFS_IS_CORRUPT() and ASSERT()

Suggested-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nirjhar Roy (IBM) <nirjhar.roy.lists@gmail.com>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-02-25 13:58:49 +01:00
Nirjhar Roy (IBM)
c2368fc89a xfs: Update lazy counters in xfs_growfs_rt_bmblock()
Update lazy counters in xfs_growfs_rt_bmblock() similar to the way it
is done xfs_growfs_data_private(). This is because the lazy counters are
not always updated and synching the counters will avoid inconsistencies
between frexents and rtextents(total realtime extent count). This will
be more useful once realtime shrink is implemented as this will prevent
some transient state to occur where frexents might be greater than
total rtextents.

Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nirjhar Roy (IBM) <nirjhar.roy.lists@gmail.com>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-02-25 13:58:49 +01:00
Nirjhar Roy (IBM)
ac1d977096 xfs: Add a comment in xfs_log_sb()
Add a comment explaining why the sb_frextents are updated outside the
if (xfs_has_lazycount(mp) check even though it is a lazycounter.
RT groups are supported only in v5 filesystems which always have
lazycounter enabled - so putting it inside the if(xfs_has_lazycount(mp)
check is redundant.

Suggested-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nirjhar Roy (IBM) <nirjhar.roy.lists@gmail.com>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-02-25 13:58:49 +01:00
Nirjhar Roy (IBM)
8baa9bccc0 xfs: Fix xfs_last_rt_bmblock()
Bug description:

If the size of the last rtgroup i.e, the rtg passed to
xfs_last_rt_bmblock() is such that the last rtextent falls in 0th word
offset of a bmblock of the bitmap file tracking this (last) rtgroup,
then in that case xfs_last_rt_bmblock() incorrectly returns the next
bmblock number instead of the current/last used bmblock number.
When xfs_last_rt_bmblock() incorrectly returns the next bmblock,
the loop to grow/modify the bmblocks in xfs_growfs_rtg() doesn't
execute and xfs_growfs basically does a nop in certain cases.

xfs_growfs will do a nop when the new size of the fs will have the same
number of rtgroups i.e, we are only growing the last rtgroup.

Reproduce:
$ mkfs.xfs -m metadir=0 -r rtdev=/dev/loop1 /dev/loop0 \
	-r size=32769b -f
$ mount -o rtdev=/dev/loop1 /dev/loop0 /mnt/scratch
$ xfs_growfs -R $(( 32769 + 1 )) /mnt/scratch
$ xfs_info /mnt/scratch | grep rtextents
$ # We can see that rtextents hasn't changed

Fix:
Fix this by returning the current/last used bmblock when the last
rtgroup size is not a multiple xfs_rtbitmap_rtx_per_rbmblock()
and the next bmblock when the rtgroup size is a multiple of
xfs_rtbitmap_rtx_per_rbmblock() i.e, the existing blocks are
completely used up.
Also, I have renamed xfs_last_rt_bmblock() to
xfs_last_rt_bmblock_to_extend() to signify that this function
returns the bmblock number to extend and NOT always the last used
bmblock number.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Nirjhar Roy (IBM) <nirjhar.roy.lists@gmail.com>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-02-25 13:58:49 +01:00
Darrick J. Wong
115ea07b94 xfs: don't report half-built inodes to fserror
Sam Sun apparently found a syzbot way to fuzz a filesystem such that
xfs_iget_cache_miss would free the inode before the fserror code could
catch up.  Frustratingly he doesn't use the syzbot dashboard so there's
no C reproducer and not even a full error report, so I'm guessing that:

Inodes that are being constructed or torn down inside XFS are not
visible to the VFS.  They should never be reported to fserror.
Also, any inode that has been freshly allocated in _cache_miss should be
marked INEW immediately because, well, it's an incompletely constructed
inode that isn't yet visible to the VFS.

Reported-by: Sam Sun <samsun1006219@gmail.com>
Fixes: 5eb4cb18e4 ("xfs: convey metadata health events to the health monitor")
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-02-25 13:58:49 +01:00
Darrick J. Wong
75690e5fdd xfs: don't report metadata inodes to fserror
Internal metadata inodes are not exposed to userspace programs, so it
makes no sense to pass them to the fserror functions (aka fsnotify).
Instead, report metadata file problems as general filesystem corruption.

Fixes: 5eb4cb18e4 ("xfs: convey metadata health events to the health monitor")
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-02-25 13:58:49 +01:00
Darrick J. Wong
94014a23e9 xfs: fix potential pointer access race in xfs_healthmon_get
Pankaj Raghav asks about this code in xfs_healthmon_get:

  hm = mp->m_healthmon;
  if (hm && !refcount_inc_not_zero(&hm->ref))
    hm = NULL;
  rcu_read_unlock();
  return hm;

(slightly edited to compress a mailing list thread)

"Nit: Should we do a READ_ONCE(mp->m_healthmon) here to avoid any
compiler tricks that can result in an undefined behaviour? I am not sure
if I am being paranoid here.

"So this is my understanding: RCU guarantees that we get a valid object
(actual data of m_healthmon) but does not guarantee the compiler will
not reread the pointer between checking if hm is !NULL and accessing the
pointer as we are doing it lockless.

"So just a barrier() call in rcu_read_lock is enough to make sure this
doesn't happen and probably adding a READ_ONCE() is not needed?"

After some initial confusion I concluded that he's correct.  The
compiler could very well eliminate the hm variable in favor of walking
the pointers twice, turning the code into:

  if (mp->m_healthmon && !refcount_inc_not_zero(&mp->m_healthmon->ref))

If this happens, then xfs_healthmon_detach can sneak in between the
two sides of the && expression and set mp->m_healthmon to NULL, and
thereby cause a null pointer dereference crash.  Fix this by using the
rcu pointer assignment and dereference functions, which ensure that the
proper reordering barriers are in place.

Practically speaking, gcc seems to allocate an actual variable for hm
and only reads mp->m_healthmon once (as intended), but we ought to be
more explicit about requiring this.

Reported-by: Pankaj Raghav <pankaj.raghav@linux.dev>
Fixes: a48373e7d3 ("xfs: start creating infrastructure for health monitoring")
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Pankaj Raghav <p.raghav@samsung.com>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-02-25 13:58:49 +01:00
Darrick J. Wong
eb8550fb75 xfs: fix xfs_group release bug in xfs_dax_notify_dev_failure
Chris Mason reports that his AI tools noticed that we were using
xfs_perag_put and xfs_group_put to release the group reference returned
by xfs_group_next_range.  However, the iterator function returns an
object with an active refcount, which means that we must use the correct
function to release the active refcount, which is _rele.

Cc: <stable@vger.kernel.org> # v6.0
Fixes: 6f643c57d5 ("xfs: implement ->notify_failure() for XFS")
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-02-25 13:58:49 +01:00
Darrick J. Wong
161456987a xfs: fix xfs_group release bug in xfs_verify_report_losses
Chris Mason reports that his AI tools noticed that we were using
xfs_perag_put and xfs_group_put to release the group reference returned
by xfs_group_next_range.  However, the iterator function returns an
object with an active refcount, which means that we must use the correct
function to release the active refcount, which is _rele.

Fixes: b8accfd65d ("xfs: add media verification ioctl")
Reported-by: Chris Mason <clm@meta.com>
Link: https://lore.kernel.org/linux-xfs/20260206030527.2506821-1-clm@meta.com/
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-02-25 13:58:48 +01:00
Darrick J. Wong
e764dd439d xfs: fix copy-paste error in previous fix
Chris Mason noticed that there is a copy-paste error in a recent change
to xrep_dir_teardown that nulls out pointers after freeing the
resources.

Fixes: ba408d299a ("xfs: only call xf{array,blob}_destroy if we have a valid pointer")
Link: https://lore.kernel.org/linux-xfs/20260205194211.2307232-1-clm@meta.com/
Reported-by: Chris Mason <clm@meta.com>
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-02-25 13:58:48 +01:00
Ethan Tidmore
cddfa648f1 xfs: Fix error pointer dereference
The function try_lookup_noperm() can return an error pointer and is not
checked for one.

Add checks for error pointer in xrep_adoption_check_dcache() and
xrep_adoption_zap_dcache().

Detected by Smatch:
fs/xfs/scrub/orphanage.c:449 xrep_adoption_check_dcache() error:
'd_child' dereferencing possible ERR_PTR()

fs/xfs/scrub/orphanage.c:485 xrep_adoption_zap_dcache() error:
'd_child' dereferencing possible ERR_PTR()

Fixes: 73597e3e42 ("xfs: ensure dentry consistency when the orphanage adopts a file")
Cc: stable@vger.kernel.org # v6.16
Signed-off-by: Ethan Tidmore <ethantidmore06@gmail.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Nirjhar Roy (IBM) <nirjhar.roy.lists@gmail.com>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-02-25 13:58:48 +01:00
Christoph Hellwig
47553dd60b xfs: remove metafile inodes from the active inode stat
The active inode (or active vnode until recently) stat can get much larger
than expected on file systems with a lot of metafile inodes like zoned
file systems on SMR hard disks with 10.000s of rtg rmap inodes.

Remove all metafile inodes from the active counter to make it more useful
to track actual workloads and add a separate counter for active metafile
inodes.

This fixes xfs/177 on SMR hard drives.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-02-25 13:58:48 +01:00
Christoph Hellwig
03a6d6c4c8 xfs: cleanup inode counter stats
Most of them are unused, so mark them as such.  Give the remaining ones
names that match their use instead of the historic IRIX ones based on
vnodes.  Note that the names are purely internal to the XFS code, the
user interface is based on section names and arrays of counters.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-02-25 13:58:48 +01:00
Wilfred Mallawa
fd81d3fd01 xfs: fix code alignment issues in xfs_ondisk.c
Fixup some code alignment issues in xfs_ondisk.c

Signed-off-by: Wilfred Mallawa <wilfred.mallawa@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-02-25 13:58:48 +01:00
Nirjhar Roy (IBM)
4ad85e633b xfs: Replace &rtg->rtg_group with rtg_group()
Use the already existing rtg_group() wrapper instead of directly
accessing the struct xfs_group member in struct xfs_rtgroup.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nirjhar Roy (IBM) <nirjhar.roy.lists@gmail.com>
[cem: Conflict resolution against 06873dbd94]
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-02-25 13:58:48 +01:00
Nirjhar Roy (IBM)
a49b7ff63f xfs: Refactoring the nagcount and delta calculation
Introduce xfs_growfs_compute_delta() to calculate the nagcount
and delta blocks and refactor the code from xfs_growfs_data_private().
No functional changes.

Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nirjhar Roy (IBM) <nirjhar.roy.lists@gmail.com>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-02-25 13:58:48 +01:00
Nirjhar Roy (IBM)
18c16f602a xfs: Replace ASSERT with XFS_IS_CORRUPT in xfs_rtcopy_summary()
Replace ASSERT(sum > 0) with an XFS_IS_CORRUPT() and place it just
after the call to xfs_rtget_summary() so that we don't end up using
an illegal value of sum.

Signed-off-by: Nirjhar Roy (IBM) <nirjhar.roy.lists@gmail.com>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2026-02-25 13:58:48 +01:00
Jacob Moroni
104016eb67 RDMA/umem: Fix double dma_buf_unpin in failure path
In ib_umem_dmabuf_get_pinned_with_dma_device(), the call to
ib_umem_dmabuf_map_pages() can fail. If this occurs, the dmabuf
is immediately unpinned but the umem_dmabuf->pinned flag is still
set. Then, when ib_umem_release() is called, it calls
ib_umem_dmabuf_revoke() which will call dma_buf_unpin() again.

Fix this by removing the immediate unpin upon failure and just let
the ib_umem_release/revoke path handle it. This also ensures the
proper unmap-unpin unwind ordering if the dmabuf_map_pages call
happened to fail due to dma_resv_wait_timeout (and therefore has
a non-NULL umem_dmabuf->sgt).

Fixes: 1e4df4a21c ("RDMA/umem: Allow pinned dmabuf umem usage")
Signed-off-by: Jacob Moroni <jmoroni@google.com>
Link: https://patch.msgid.link/20260224234153.1207849-1-jmoroni@google.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
2026-02-25 07:50:58 -05:00
Stefan Metzmacher
93a4a9b732 RDMA/core: Check id_priv->restricted_node_type in cma_listen_on_dev()
When listening on wildcard addresses we have a global list for the application
layer rdma_cm_id and for any existing device or any device added in future we
try to listen on any wildcard listener.

When the listener has a restricted_node_type we should prevent listening on
devices with a different node type.

While there fix the documentation comment of rdma_restrict_node_type()
to include rdma_resolve_addr() instead of having rdma_bind_addr() twice.

Fixes: a760e80e90 ("RDMA/core: introduce rdma_restrict_node_type()")
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: Steve French <smfrench@gmail.com>
Cc: Namjae Jeon <linkinjeon@kernel.org>
Cc: Tom Talpey <tom@talpey.com>
Cc: Long Li <longli@microsoft.com>
Cc: linux-rdma@vger.kernel.org
Cc: linux-cifs@vger.kernel.org
Cc: samba-technical@lists.samba.org
Signed-off-by: Stefan Metzmacher <metze@samba.org>
Link: https://patch.msgid.link/20260224165951.3582093-2-metze@samba.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
2026-02-25 07:50:10 -05:00
Marc Zyngier
54e367cb94 KVM: arm64: Deduplicate ASID retrieval code
We currently have three versions of the ASID retrieval code, one
in the S1 walker, and two in the VNCR handling (although the last
two are limited to the EL2&0 translation regime).

Make this code common, and take this opportunity to also simplify
the code a bit while switching over to the TTBRx_EL1_ASID macro.

Reviewed-by: Joey Gouly <joey.gouly@arm.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Link: https://patch.msgid.link/20260225104718.14209-1-maz@kernel.org
Signed-off-by: Marc Zyngier <maz@kernel.org>
2026-02-25 12:19:33 +00:00
Vinicius Costa Gomes
ee66bc2957 dmaengine: idxd: Fix leaking event log memory
During the device remove process, the device is reset, causing the
configuration registers to go back to their default state, which is
zero. As the driver is checking if the event log support was enabled
before deallocating, it will fail if a reset happened before.

Do not check if the support was enabled, the check for 'idxd->evl'
being valid (only allocated if the HW capability is available) is
enough.

Fixes: 244da66cda ("dmaengine: idxd: setup event log configuration")
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Link: https://patch.msgid.link/20260121-idxd-fix-flr-on-kernel-queues-v3-v3-10-7ed70658a9d1@intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
2026-02-25 16:39:18 +05:30
Vinicius Costa Gomes
c311f5e924 dmaengine: idxd: Fix freeing the allocated ida too late
It can happen that when the cdev .release() is called, the driver
already called ida_destroy(). Move ida_free() to the _del() path.

We see with DEBUG_KOBJECT_RELEASE enabled and forcing an early PCI
unbind.

Fixes: 04922b7445 ("dmaengine: idxd: fix cdev setup and free device lifetime issues")
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Link: https://patch.msgid.link/20260121-idxd-fix-flr-on-kernel-queues-v3-v3-9-7ed70658a9d1@intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
2026-02-25 16:39:18 +05:30
Vinicius Costa Gomes
d9cfb5193a dmaengine: idxd: Fix memory leak when a wq is reset
idxd_wq_disable_cleanup() which is called from the reset path for a
workqueue, sets the wq type to NONE, which for other parts of the
driver mean that the wq is empty (all its resources were released).

Only set the wq type to NONE after its resources are released.

Fixes: da32b28c95 ("dmaengine: idxd: cleanup workqueue config after disabling")
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Link: https://patch.msgid.link/20260121-idxd-fix-flr-on-kernel-queues-v3-v3-8-7ed70658a9d1@intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
2026-02-25 16:39:18 +05:30
Vinicius Costa Gomes
3d33de353b dmaengine: idxd: Fix not releasing workqueue on .release()
The workqueue associated with an DSA/IAA device is not released when
the object is freed.

Fixes: 47c16ac27d ("dmaengine: idxd: fix idxd conf_dev 'struct device' lifetime")
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Link: https://patch.msgid.link/20260121-idxd-fix-flr-on-kernel-queues-v3-v3-7-7ed70658a9d1@intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
2026-02-25 16:39:18 +05:30
Vinicius Costa Gomes
4fd3c4679f dmaengine: idxd: Wait for submitted operations on .device_synchronize()
When the dmaengine "core" asks the driver to synchronize, send a Drain
operation to the device workqueue, which will wait for the already
submitted operations to finish.

Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Link: https://patch.msgid.link/20260121-idxd-fix-flr-on-kernel-queues-v3-v3-6-7ed70658a9d1@intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
2026-02-25 16:39:18 +05:30
Vinicius Costa Gomes
2a93f5747d dmaengine: idxd: Flush all pending descriptors
When used as a dmaengine, the DMA "core" might ask the driver to
terminate all pending requests, when that happens, flush all pending
descriptors.

In this context, flush means removing the requests from the pending
lists, so even if they are completed after, the user is not notified.

Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Link: https://patch.msgid.link/20260121-idxd-fix-flr-on-kernel-queues-v3-v3-5-7ed70658a9d1@intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
2026-02-25 16:39:18 +05:30
Vinicius Costa Gomes
f019d7814b dmaengine: idxd: Flush kernel workqueues on Function Level Reset
When a Function Level Reset (FLR) happens, terminate the pending
descriptors that were issued by in-kernel users and disable the
interrupts associated with those. They will be re-enabled after FLR
finishes.

idxd_wq_flush_desc() is declared on idxd.h because it's going to be
used in by the DMA backend in a future patch.

Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Link: https://patch.msgid.link/20260121-idxd-fix-flr-on-kernel-queues-v3-v3-4-7ed70658a9d1@intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
2026-02-25 16:39:17 +05:30
Vinicius Costa Gomes
d6077df7b7 dmaengine: idxd: Fix possible invalid memory access after FLR
In the case that the first Function Level Reset (FLR) concludes
correctly, but in the second FLR the scratch area for the saved
configuration cannot be allocated, it's possible for a invalid memory
access to happen.

Always set the deallocated scratch area to NULL after FLR completes.

Fixes: 98d187a989 ("dmaengine: idxd: Enable Function Level Reset (FLR) for halt")
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Link: https://patch.msgid.link/20260121-idxd-fix-flr-on-kernel-queues-v3-v3-3-7ed70658a9d1@intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
2026-02-25 16:39:17 +05:30
Vinicius Costa Gomes
52d2edea0d dmaengine: idxd: Fix crash when the event log is disabled
If reporting errors to the event log is not supported by the hardware,
and an error that causes Function Level Reset (FLR) is received, the
driver will try to restore the event log even if it was not allocated.

Also, only try to free the event log if it was properly allocated.

Fixes: 6078a315ae ("dmaengine: idxd: Add idxd_device_config_save() and idxd_device_config_restore() helpers")
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Link: https://patch.msgid.link/20260121-idxd-fix-flr-on-kernel-queues-v3-v3-2-7ed70658a9d1@intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
2026-02-25 16:39:17 +05:30
Vinicius Costa Gomes
caf91cdf2d dmaengine: idxd: Fix lockdep warnings when calling idxd_device_config()
Move the check for IDXD_FLAG_CONFIGURABLE and the locking to "inside"
idxd_device_config(), as this is common to all callers, and the one
that wasn't holding the lock was an error (that was causing the
lockdep warning).

Suggested-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Link: https://patch.msgid.link/20260121-idxd-fix-flr-on-kernel-queues-v3-v3-1-7ed70658a9d1@intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
2026-02-25 16:39:17 +05:30
Mike Rapoport (Microsoft)
a4b0bf6a40 x86/efi: defer freeing of boot services memory
efi_free_boot_services() frees memory occupied by EFI_BOOT_SERVICES_CODE
and EFI_BOOT_SERVICES_DATA using memblock_free_late().

There are two issue with that: memblock_free_late() should be used for
memory allocated with memblock_alloc() while the memory reserved with
memblock_reserve() should be freed with free_reserved_area().

More acutely, with CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
efi_free_boot_services() is called before deferred initialization of the
memory map is complete.

Benjamin Herrenschmidt reports that this causes a leak of ~140MB of
RAM on EC2 t3a.nano instances which only have 512MB or RAM.

If the freed memory resides in the areas that memory map for them is
still uninitialized, they won't be actually freed because
memblock_free_late() calls memblock_free_pages() and the latter skips
uninitialized pages.

Using free_reserved_area() at this point is also problematic because
__free_page() accesses the buddy of the freed page and that again might
end up in uninitialized part of the memory map.

Delaying the entire efi_free_boot_services() could be problematic
because in addition to freeing boot services memory it updates
efi.memmap without any synchronization and that's undesirable late in
boot when there is concurrency.

More robust approach is to only defer freeing of the EFI boot services
memory.

Split efi_free_boot_services() in two. First efi_unmap_boot_services()
collects ranges that should be freed into an array then
efi_free_boot_services() later frees them after deferred init is complete.

Link: https://lore.kernel.org/all/ec2aaef14783869b3be6e3c253b2dcbf67dbc12a.camel@kernel.crashing.org
Fixes: 916f676f8d ("x86, efi: Retain boot service code until after switching to virtual mode")
Cc: <stable@vger.kernel.org>
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
2026-02-25 12:02:48 +01:00
Shenghui Shi
77b19d053a dmaengine: dw-edma: fix MSI data programming for multi-IRQ case
When using MSI (not MSI-X) with multiple IRQs, the MSI data value
must be unique per vector to ensure correct interrupt delivery.
Currently, the driver fails to increment the MSI data per vector,
causing interrupts to be misrouted.

Fix this by caching the base MSI data and adjusting each vector's
data accordingly during IRQ setup.

Fixes: e63d79d1ff04 ("dmaengine: dw-edma: Add Synopsys DesignWare eDMA IP core driver")
Signed-off-by: Shenghui Shi <brody.shi@m2semi.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Link: https://patch.msgid.link/20260209103726.414-1-brody.shi@m2semi.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
2026-02-25 16:32:11 +05:30
Gao Xiang
4a2d046e4b erofs: fix interlaced plain identification for encoded extents
Only plain data whose start position and on-disk physical length are
both aligned to the block size should be classified as interlaced
plain extents. Otherwise, it must be treated as shifted plain extents.

This issue was found by syzbot using a crafted compressed image
containing plain extents with unaligned physical lengths, which can
cause OOB read in z_erofs_transform_plain().

Reported-and-tested-by: syzbot+d988dc155e740d76a331@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/r/699d5714.050a0220.cdd3c.03e7.GAE@google.com
Fixes: 1d191b4ca5 ("erofs: implement encoded extent metadata")
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
2026-02-25 17:40:58 +08:00
Joy Zou
2e7b5cf72e dmaengine: fsl-edma: fix channel parameter config for fixed channel requests
Configure only the requested channel when a fixed channel is specified
to avoid modifying other channels unintentionally.

Fix parameter configuration when a fixed DMA channel is requested on
i.MX9 AON domain and i.MX8QM/QXP/DXL platforms. When a client requests
a fixed channel (e.g., channel 6), the driver traverses channels 0-5
and may unintentionally modify their configuration if they are unused.

This leads to issues such as setting the `is_multi_fifo` flag unexpectedly,
causing memcpy tests to fail when using the dmatest tool.

Only affect edma memcpy test when the channel is fixed.

Fixes: 72f5801a4e ("dmaengine: fsl-edma: integrate v3 support")
Signed-off-by: Joy Zou <joy.zou@nxp.com>
Cc: stable@vger.kernel.org
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Link: https://patch.msgid.link/20250917-b4-edma-chanconf-v1-1-886486e02e91@nxp.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
2026-02-25 14:55:10 +05:30
Takashi Iwai
4e9113c533 ALSA: usb-audio: Use inclusive terms
Replace the remaining with inclusive terms; it's only this function
name we overlooked at the previous conversion.

Fixes: 53837b4ac2 ("ALSA: usb-audio: Replace slave/master terms")
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Link: https://patch.msgid.link/20260225085233.316306-5-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-02-25 10:17:28 +01:00
Takashi Iwai
c5bf24c8ab ALSA: usb-audio: Avoid implicit feedback mode on DIYINHK USB Audio 2.0
Although DIYINHK USB Audio 2.0 (ID 20b1:2009) shows the implicit
feedback source for the capture stream, this would cause several
problems for the playback.  Namely, the device can get wMaxPackSize
1024 for 24/32 bit format with 6 channels, and when a high sample rate
like 352.8kHz or 384kHz is played, the packet size overflows the max
limit.  Also, the device has another two playback altsets, and those
aren't properly handled with the implicit feedback.

Since the device has been working well even before introducing the
implicit feedback, we can assume that it works fine in the async mode.
This patch adds the explicit skip of the implicit fb detection to make
the playback running in the async mode.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=221076
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Link: https://patch.msgid.link/20260225085233.316306-4-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-02-25 10:17:28 +01:00
Takashi Iwai
7cb2a5422f ALSA: usb-audio: Check max frame size for implicit feedback mode, too
When the packet sizes are taken from the capture stream in the
implicit feedback mode, the sizes might be larger than the upper
boundary defined by the descriptor.  As already done for other
transfer modes, we have to cap the sizes accordingly at sending,
otherwise this would lead to an error in USB core at submission of
URBs.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=221076
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Link: https://patch.msgid.link/20260225085233.316306-3-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-02-25 10:17:28 +01:00
Takashi Iwai
7fe8dec3f6 ALSA: usb-audio: Cap the packet size pre-calculations
We calculate the possible packet sizes beforehand for adaptive and
synchronous endpoints, but we didn't take care of the max frame size
for those pre-calculated values.  When a device or a bus limits the
packet size, a high sample rate or a high number of channels may lead
to the packet sizes that are larger than the given limit, which
results in an error from the USB core at submitting URBs.

As a simple workaround, just add the sanity checks of pre-calculated
packet sizes to have the upper boundary of ep->maxframesize.

Fixes: f0bd62b640 ("ALSA: usb-audio: Improve frames size computation")
Link: https://bugzilla.kernel.org/show_bug.cgi?id=221076
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Link: https://patch.msgid.link/20260225085233.316306-2-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-02-25 10:17:27 +01:00
Sascha Bischoff
29c8b85adb irqchip/gic-v5: Fix inversion of IRS_IDR0.virt flag
It appears that a !! became ! during a cleanup, resulting in inverted
logic when detecting if a host GICv5 implementation is capable of
virtualization.

Re-add the missing !, fixing the behaviour.

Fixes: 3227c3a89d ("irqchip/gic-v5: Check if impl is virt capable")
Signed-off-by: Sascha Bischoff <sascha.bischoff@arm.com>
Link: https://patch.msgid.link/20260225083130.3378490-1-sascha.bischoff@arm.com
Signed-off-by: Marc Zyngier <maz@kernel.org>
2026-02-25 08:55:59 +00:00
Sabrina Dubroca
0c0eef8ccd esp: fix skb leak with espintcp and async crypto
When the TX queue for espintcp is full, esp_output_tail_tcp will
return an error and not free the skb, because with synchronous crypto,
the common xfrm output code will drop the packet for us.

With async crypto (esp_output_done), we need to drop the skb when
esp_output_tail_tcp returns an error.

Fixes: e27cca96cd ("xfrm: add espintcp (RFC 8229)")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2026-02-25 09:11:40 +01:00
Sabrina Dubroca
7d2fc41f91 xfrm: call xdo_dev_state_delete during state update
When we update an SA, we construct a new state and call
xdo_dev_state_add, but never insert it. The existing state is updated,
then we immediately destroy the new state. Since we haven't added it,
we don't go through the standard state delete code, and we're skipping
removing it from the device (but xdo_dev_state_free will get called
when we destroy the temporary state).

This is similar to commit c5d4d7d831 ("xfrm: Fix deletion of
offloaded SAs on failure.").

Fixes: d77e38e612 ("xfrm: Add an IPsec hardware offloading API")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2026-02-25 09:11:33 +01:00
Sabrina Dubroca
b57defcf8f xfrm: fix the condition on x->pcpu_num in xfrm_sa_len
pcpu_num = 0 is a valid value. The marker for "unset pcpu_num" which
makes copy_to_user_state_extra not add the XFRMA_SA_PCPU attribute is
UINT_MAX.

Fixes: 1ddf9916ac ("xfrm: Add support for per cpu xfrm state handling.")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2026-02-25 09:11:26 +01:00
Sabrina Dubroca
aa8a3f3c67 xfrm: add missing extack for XFRMA_SA_PCPU in add_acquire and allocspi
We're returning an error caused by invalid user input without setting
an extack. Add one.

Fixes: 1ddf9916ac ("xfrm: Add support for per cpu xfrm state handling.")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2026-02-25 09:11:04 +01:00
Tejun Heo
83236b2e43 sched_ext: Disable preemption between scx_claim_exit() and kicking helper work
scx_claim_exit() atomically sets exit_kind, which prevents scx_error() from
triggering further error handling. After claiming exit, the caller must kick
the helper kthread work which initiates bypass mode and teardown.

If the calling task gets preempted between claiming exit and kicking the
helper work, and the BPF scheduler fails to schedule it back (since error
handling is now disabled), the helper work is never queued, bypass mode
never activates, tasks stop being dispatched, and the system wedges.

Disable preemption across scx_claim_exit() and the subsequent work kicking
in all callers - scx_disable() and scx_vexit(). Add
lockdep_assert_preemption_disabled() to scx_claim_exit() to enforce the
requirement.

Fixes: f0e1a0643a ("sched_ext: Implement BPF extensible scheduler class")
Cc: stable@vger.kernel.org # v6.12+
Reviewed-by: Andrea Righi <arighi@nvidia.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-02-24 21:39:58 -10:00
Jonathan Cavitt
c601fd5414 drm/client: Do not destroy NULL modes
'modes' in drm_client_modeset_probe may fail to kcalloc.  If this
occurs, we jump to 'out', calling modes_destroy on it, which
dereferences it.  This may result in a NULL pointer dereference in the
error case.  Prevent that.

Fixes: 3039cc0c06 ("drm/client: Make copies of modes")
Signed-off-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patch.msgid.link/20260224221227.69126-2-jonathan.cavitt@intel.com
2026-02-25 09:31:54 +02:00
Liwei Song
c45f726310 firmware: stratix10-rsu: Fix NULL pointer dereference when RSU is disabled
When the Remote System Update (RSU) isn't enabled in the First Stage
Boot Loader (FSBL), the driver encounters a NULL pointer dereference when
excute svc_normal_to_secure_thread() thread, resulting in a kernel panic:

Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008
Mem abort info:
...
Data abort info:
...
[0000000000000008] user address but active_mm is swapper
Internal error: Oops: 0000000096000004 [#1]  SMP
Modules linked in:
CPU: 0 UID: 0 PID: 79 Comm: svc_smc_hvc_thr Not tainted 6.19.0-rc8-yocto-standard+ #59 PREEMPT
Hardware name: SoCFPGA Stratix 10 SoCDK (DT)
pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
pc : svc_normal_to_secure_thread+0x38c/0x990
lr : svc_normal_to_secure_thread+0x144/0x990
...
Call trace:
 svc_normal_to_secure_thread+0x38c/0x990 (P)
 kthread+0x150/0x210
 ret_from_fork+0x10/0x20
Code: 97cfc113 f9400260 aa1403e1 f9400400 (f9400402)
---[ end trace 0000000000000000 ]---

The issue occurs because rsu_send_async_msg() fails when RSU is not enabled
in firmware, causing the channel to be freed via stratix10_svc_free_channel().
However, the probe function continues execution and registers
svc_normal_to_secure_thread(), which subsequently attempts to access the
already-freed channel, triggering the NULL pointer dereference.

Fix this by properly cleaning up the async client and returning early on
failure, preventing the thread from being used with an invalid channel.

Fixes: 15847537b6 ("firmware: stratix10-rsu: Migrate RSU driver to use stratix10 asynchronous framework.")
Cc: stable@kernel.org
Signed-off-by: Liwei Song <liwei.song@windriver.com>
Signed-off-by: Dinh Nguyen <dinguyen@kernel.org>
2026-02-24 21:40:06 -06:00
Russell King (Oracle)
2f61f38a21 net: stmmac: fix timestamping configuration after suspend/resume
When stmmac_init_timestamping() is called, it clears the receive and
transmit path booleans that allow timestamps to be read. These are
never re-initialised until after userspace requests timestamping
features to be enabled.

However, our copy of the timestamp configuration is not cleared, which
means we return the old configuration to userspace when requested.
This is inconsistent. Fix this by clearing the timestamp configuration.

Fixes: d6228b7cdd ("net: stmmac: implement the SIOCGHWTSTAMP ioctl")
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/E1vuUu4-0000000Afea-0j9B@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-24 17:46:15 -08:00
Kees Cook
4b44cbb264 overflow: Make sure size helpers are always inlined
With kmalloc_obj() performing implicit size calculations, the embedded
size_mul() calls, while marked inline, were not always being inlined.
I noticed a couple places where allocations were making a call out for
things that would otherwise be compile-time calculated. Force the
compilers to always inline these calculations.

Reviewed-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Link: https://patch.msgid.link/20260224232451.work.614-kees@kernel.org
Signed-off-by: Kees Cook <kees@kernel.org>
2026-02-24 15:46:31 -08:00
Eric Biggers
201ceb94aa kunit: irq: Ensure timer doesn't fire too frequently
Fix a bug where kunit_run_irq_test() could hang if the system is too
slow.  This was noticed with the crypto library tests in certain VMs.

Specifically, if kunit_irq_test_timer_func() and the associated hrtimer
code took over 5us to run, then the CPU would spend all its time
executing that code in hardirq context.  As a result, the task executing
kunit_run_irq_test() never had a chance to run, exit the loop, and
cancel the timer.

To fix it, make kunit_irq_test_timer_func() increase the timer interval
when the other contexts aren't having a chance to run.

Fixes: 950a81224e ("lib/crypto: tests: Add hash-test-template.h and gen-hash-testvecs.py")
Cc: stable@vger.kernel.org
Reviewed-by: David Gow <david@davidgow.net>
Link: https://lore.kernel.org/r/20260224033751.97615-1-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
2026-02-24 14:44:21 -08:00
Christoph Hellwig
3c4617117a zloop: check for spurious options passed to remove
Zloop uses a command option parser for all control commands,
but most options are only valid for adding a new device.  Check
for incorrectly specified options in the remove handler.

Fixes: eb0570c7df ("block: new zoned loop block device driver")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2026-02-24 14:50:18 -07:00
Christoph Hellwig
6acf7860dc zloop: advertise a volatile write cache
Zloop is file system backed and thus needs to sync the underlying file
system to persist data.  Set BLK_FEAT_WRITE_CACHE so that the block
layer actually send flush commands, and fix the flush implementation
as sync_filesystem requires s_umount to be held and the code currently
misses that.

Fixes: eb0570c7df ("block: new zoned loop block device driver")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2026-02-24 14:50:18 -07:00
Jens Axboe
bfbc0b5b32 media: dvb-core: fix wrong reinitialization of ringbuffer on reopen
dvb_dvr_open() calls dvb_ringbuffer_init() when a new reader opens the
DVR device.  dvb_ringbuffer_init() calls init_waitqueue_head(), which
reinitializes the waitqueue list head to empty.

Since dmxdev->dvr_buffer.queue is a shared waitqueue (all opens of the
same DVR device share it), this orphans any existing waitqueue entries
from io_uring poll or epoll, leaving them with stale prev/next pointers
while the list head is reset to {self, self}.

The waitqueue and spinlock in dvr_buffer are already properly
initialized once in dvb_dmxdev_init().  The open path only needs to
reset the buffer data pointer, size, and read/write positions.

Replace the dvb_ringbuffer_init() call in dvb_dvr_open() with direct
assignment of data/size and a call to dvb_ringbuffer_reset(), which
properly resets pread, pwrite, and error with correct memory ordering
without touching the waitqueue or spinlock.

Cc: stable@vger.kernel.org
Fixes: 34731df288 ("V4L/DVB (3501): Dmxdev: use dvb_ringbuffer")
Reported-by: syzbot+ab12f0c08dd7ab8d057c@syzkaller.appspotmail.com
Tested-by: syzbot+ab12f0c08dd7ab8d057c@syzkaller.appspotmail.com
Link: https://lore.kernel.org/all/698a26d3.050a0220.3b3015.007d.GAE@google.com/
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2026-02-24 12:39:00 -08:00
wangshuaiwei
2f38fd99c0 scsi: ufs: core: Fix shift out of bounds when MAXQ=32
According to JESD223F, the maximum number of queues (MAXQ) is 32. When MCQ
is enabled and ESI is disabled, nr_hw_queues=32 causes a shift overflow
problem.

Fix this by using 64-bit intermediate values to handle the nr_hw_queues=32
case safely.

Signed-off-by: wangshuaiwei <wangshuaiwei1@xiaomi.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Link: https://patch.msgid.link/20260224063228.50112-1-wangshuaiwei1@xiaomi.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2026-02-24 14:23:07 -05:00
Yosry Ahmed
410aed670c MAINTAINERS: update Yosry Ahmed's email address
Use my kernel.org email address.

Link: https://lkml.kernel.org/r/20260223160027.122307-1-yosry@kernel.org
Signed-off-by: Yosry Ahmed <yosry@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Nhat Pham <nphamcs@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-02-24 11:13:28 -08:00
Daniele Alessandrelli
37a012c5c1 mailmap: add entry for Daniele Alessandrelli
My Intel email is going to bounce soon.  Map it to my personal Gmail
address.

Link: https://lkml.kernel.org/r/20260223170905.278956-1-daniele.alessandrelli@intel.com
Signed-off-by: Daniele Alessandrelli <daniele.alessandrelli@intel.com>
Cc: Daniele Alessandrelli <daniele.alessandrelli@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-02-24 11:13:28 -08:00
Ming Lei
a4ab97e34b mm: fix NULL NODE_DATA dereference for memoryless nodes on boot
Commit d49004c5f0 ("arch, mm: consolidate initialization of nodes, zones
and memory map") moved free_area_init() from setup_arch() to
mm_core_init_early(), which runs after setup_arch() returns.

This changed the ordering relative to init_cpu_to_node() on x86.  Before
the commit, free_area_init() ran during paging_init() (called from
setup_arch()) *before* init_cpu_to_node().  After the commit, it runs
*after* init_cpu_to_node().

On machines with memoryless NUMA nodes (e.g., node 0 has CPUs but no
memory), this causes a NULL pointer dereference:

 1. numa_register_nodes() skips memoryless nodes: no alloc_node_data()
    and no node_set_online() for them.
 2. init_cpu_to_node() sets memoryless nodes online (they have CPUs)
    but does not allocate NODE_DATA.
 3. free_area_init() checks "if (!node_online(nid))" to decide whether
    to call alloc_offline_node_data(). Since the memoryless node is now
    online, the allocation is skipped, leaving NODE_DATA(nid) == NULL.
 4. The immediate "pgdat = NODE_DATA(nid)" dereferences NULL.

The crash happens before console_init(), so no output is visible without
earlyprintk.  With earlyprintk enabled, the following panic is observed:

 BUG: unable to handle page fault for address: 000000000002a1e0
 Oops: Oops: 0000 [#1] SMP NOPTI
 RIP: 0010:free_area_init_node+0x3a/0x540
 Call Trace:
  <TASK>
  free_area_init+0x331/0x4e0
  start_kernel+0x69/0x4a0
  x86_64_start_reservations+0x24/0x30
  x86_64_start_kernel+0x125/0x130
  common_startup_64+0x13e/0x148
  </TASK>
 Kernel panic - not syncing: Attempted to kill the idle task!

Fix this by checking "if (!NODE_DATA(nid))" instead of "if
(!node_online(nid))".  This directly tests whether the per-node data
structure needs to be allocated, regardless of the node's online status. 
This change is also safe for non-x86 architectures as they all allocate
NODE_DATA for every node including memoryless ones, so the check simply
evaluates to false with no change in behavior.

Link: https://lkml.kernel.org/r/20260222115702.3659-1-ming.lei@redhat.com
Fixes: d49004c5f0 ("arch, mm: consolidate initialization of nodes, zones and memory map")
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-02-24 11:13:28 -08:00
Kalesh Singh
079c24d569 mm/tracing: rss_stat: ensure curr is false from kthread context
The rss_stat trace event allows userspace tools, like Perfetto [1], to
inspect per-process RSS metric changes over time.

The curr field was introduced to rss_stat in commit e4dcad204d
("rss_stat: add support to detect RSS updates of external mm").  Its
intent is to indicate whether the RSS update is for the mm_struct of the
current execution context; and is set to false when operating on a remote
mm_struct (e.g., via kswapd or a direct reclaimer).

However, an issue arises when a kernel thread temporarily adopts a user
process's mm_struct.  Kernel threads do not have their own mm_struct and
normally have current->mm set to NULL.  To operate on user memory, they
can "borrow" a memory context using kthread_use_mm(), which sets
current->mm to the user process's mm.

This can be observed, for example, in the USB Function Filesystem (FFS)
driver.  The ffs_user_copy_worker() handles AIO completions and uses
kthread_use_mm() to copy data to a user-space buffer.  If a page fault
occurs during this copy, the fault handler executes in the kthread's
context.

At this point, current is the kthread, but current->mm points to the user
process's mm.  Since the rss_stat event (from the page fault) is for that
same mm, the condition current->mm == mm becomes true, causing curr to be
incorrectly set to true when the trace event is emitted.

This is misleading because it suggests the mm belongs to the kthread,
confusing userspace tools that track per-process RSS changes and
corrupting their mm_id-to-process association.

Fix this by ensuring curr is always false when the trace event is emitted
from a kthread context by checking for the PF_KTHREAD flag.

Link: https://lkml.kernel.org/r/20260219233708.1971199-1-kaleshsingh@google.com
Link: https://perfetto.dev/ [1]
Fixes: e4dcad204d ("rss_stat: add support to detect RSS updates of external mm")
Signed-off-by: Kalesh Singh <kaleshsingh@google.com>
Acked-by: Zi Yan <ziy@nvidia.com>
Acked-by: SeongJae Park <sj@kernel.org>
Reviewed-by: Pedro Falcato <pfalcato@suse.de>
Cc: "David Hildenbrand (Arm)" <david@kernel.org>
Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: <stable@vger.kernel.org>	[5.10+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-02-24 11:13:27 -08:00
Alexander Potapenko
d155aab90f mm/kfence: fix KASAN hardware tag faults during late enablement
When KASAN hardware tags are enabled, re-enabling KFENCE late (via
/sys/module/kfence/parameters/sample_interval) causes KASAN faults.

This happens because the KFENCE pool and metadata are allocated via the
page allocator, which tags the memory, while KFENCE continues to access it
using untagged pointers during initialization.

Use __GFP_SKIP_KASAN for late KFENCE pool and metadata allocations to
ensure the memory remains untagged, consistent with early allocations from
memblock.  To support this, add __GFP_SKIP_KASAN to the allowlist in
__alloc_contig_verify_gfp_mask().

Link: https://lkml.kernel.org/r/20260220144940.2779209-1-glider@google.com
Fixes: 0ce20dd840 ("mm: add Kernel Electric-Fence infrastructure")
Signed-off-by: Alexander Potapenko <glider@google.com>
Suggested-by: Ernesto Martinez Garcia <ernesto.martinezgarcia@tugraz.at>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Kees Cook <kees@kernel.org>
Cc: Marco Elver <elver@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-02-24 11:13:27 -08:00
SeongJae Park
c80f46ac22 mm/damon/core: disallow non-power of two min_region_sz
DAMON core uses min_region_sz parameter value as the DAMON region
alignment.  The alignment is made using ALIGN() and ALIGN_DOWN(), which
support only the power of two alignments.  But DAMON core API callers can
set min_region_sz to an arbitrary number.  Users can also set it
indirectly, using addr_unit.

When the alignment is not properly set, DAMON behavior becomes difficult
to expect and understand, makes it effectively broken.  It doesn't cause a
kernel crash-like significant issue, though.

Fix the issue by disallowing min_region_sz input that is not a power of
two.  Add the check to damon_commit_ctx(), as all DAMON API callers who
set min_region_sz uses the function.

This can be a sort of behavioral change, but it does not break users, for
the following reasons.  As the symptom is making DAMON effectively broken,
it is not reasonable to believe there are real use cases of non-power of
two min_region_sz.  There is no known use case or issue reports from the
setup, either.

In future, if we find real use cases of non-power of two alignments and we
can support it with low enough overhead, we can consider moving the
restriction.  But, for now, simply disallowing the corner case should be
good enough as a hot fix.

Link: https://lkml.kernel.org/r/20260214214124.87689-1-sj@kernel.org
Fixes: d8f867fa08 ("mm/damon: add damon_ctx->min_sz_region")
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: Quanmin Yan <yanquanmin1@huawei.com>
Cc: <stable@vger.kernel.org>	[6.18+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-02-24 11:13:27 -08:00
Phillip Lougher
fdb24a820a Squashfs: check metadata block offset is within range
Syzkaller reports a "general protection fault in squashfs_copy_data"

This is ultimately caused by a corrupted index look-up table, which
produces a negative metadata block offset.

This is subsequently passed to squashfs_copy_data (via
squashfs_read_metadata) where the negative offset causes an out of bounds
access.

The fix is to check that the offset is within range in
squashfs_read_metadata.  This will trap this and other cases.

Link: https://lkml.kernel.org/r/20260217050955.138351-1-phillip@squashfs.org.uk
Fixes: f400e12656 ("Squashfs: cache operations")
Reported-by: syzbot+a9747fe1c35a5b115d3f@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/699234e2.a70a0220.2c38d7.00e2.GAE@google.com/
Signed-off-by: Phillip Lougher <phillip@squashfs.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-02-24 11:13:27 -08:00
Vlastimil Babka (SUSE)
319d0bff22 MAINTAINERS, mailmap: update e-mail address for Vlastimil Babka
Hopefully improve e-mail performance.

Link: https://lkml.kernel.org/r/20260217102151.10425-2-vbabka@kernel.org
Signed-off-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-02-24 11:13:26 -08:00
Pratyush Yadav (Google)
f85b1c6af5 liveupdate: luo_file: remember retrieve() status
LUO keeps track of successful retrieve attempts on a LUO file.  It does so
to avoid multiple retrievals of the same file.  Multiple retrievals cause
problems because once the file is retrieved, the serialized data
structures are likely freed and the file is likely in a very different
state from what the code expects.

The retrieve boolean in struct luo_file keeps track of this, and is passed
to the finish callback so it knows what work was already done and what it
has left to do.

All this works well when retrieve succeeds.  When it fails,
luo_retrieve_file() returns the error immediately, without ever storing
anywhere that a retrieve was attempted or what its error code was.  This
results in an errored LIVEUPDATE_SESSION_RETRIEVE_FD ioctl to userspace,
but nothing prevents it from trying this again.

The retry is problematic for much of the same reasons listed above.  The
file is likely in a very different state than what the retrieve logic
normally expects, and it might even have freed some serialization data
structures.  Attempting to access them or free them again is going to
break things.

For example, if memfd managed to restore 8 of its 10 folios, but fails on
the 9th, a subsequent retrieve attempt will try to call
kho_restore_folio() on the first folio again, and that will fail with a
warning since it is an invalid operation.

Apart from the retry, finish() also breaks.  Since on failure the
retrieved bool in luo_file is never touched, the finish() call on session
close will tell the file handler that retrieve was never attempted, and it
will try to access or free the data structures that might not exist, much
in the same way as the retry attempt.

There is no sane way of attempting the retrieve again.  Remember the error
retrieve returned and directly return it on a retry.  Also pass this
status code to finish() so it can make the right decision on the work it
needs to do.

This is done by changing the bool to an integer.  A value of 0 means
retrieve was never attempted, a positive value means it succeeded, and a
negative value means it failed and the error code is the value.

Link: https://lkml.kernel.org/r/20260216132221.987987-1-pratyush@kernel.org
Fixes: 7c722a7f44 ("liveupdate: luo_file: implement file systems callbacks")
Signed-off-by: Pratyush Yadav (Google) <pratyush@kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-02-24 11:13:26 -08:00
Deepanshu Kartikey
dd085fe9a8 mm: thp: deny THP for files on anonymous inodes
file_thp_enabled() incorrectly allows THP for files on anonymous inodes
(e.g. guest_memfd and secretmem). These files are created via
alloc_file_pseudo(), which does not call get_write_access() and leaves
inode->i_writecount at 0. Combined with S_ISREG(inode->i_mode) being
true, they appear as read-only regular files when
CONFIG_READ_ONLY_THP_FOR_FS is enabled, making them eligible for THP
collapse.

Anonymous inodes can never pass the inode_is_open_for_write() check
since their i_writecount is never incremented through the normal VFS
open path. The right thing to do is to exclude them from THP eligibility
altogether, since CONFIG_READ_ONLY_THP_FOR_FS was designed for real
filesystem files (e.g. shared libraries), not for pseudo-filesystem
inodes.

For guest_memfd, this allows khugepaged and MADV_COLLAPSE to create
large folios in the page cache via the collapse path, but the
guest_memfd fault handler does not support large folios. This triggers
WARN_ON_ONCE(folio_test_large(folio)) in kvm_gmem_fault_user_mapping().

For secretmem, collapse_file() tries to copy page contents through the
direct map, but secretmem pages are removed from the direct map. This
can result in a kernel crash:

    BUG: unable to handle page fault for address: ffff88810284d000
    RIP: 0010:memcpy_orig+0x16/0x130
    Call Trace:
     collapse_file
     hpage_collapse_scan_file
     madvise_collapse

Secretmem is not affected by the crash on upstream as the memory failure
recovery handles the failed copy gracefully, but it still triggers
confusing false memory failure reports:

    Memory failure: 0x106d96f: recovery action for clean unevictable
    LRU page: Recovered

Check IS_ANON_FILE(inode) in file_thp_enabled() to deny THP for all
anonymous inode files.

Link: https://syzkaller.appspot.com/bug?extid=33a04338019ac7e43a44
Link: https://lore.kernel.org/linux-mm/CAEvNRgHegcz3ro35ixkDw39ES8=U6rs6S7iP0gkR9enr7HoGtA@mail.gmail.com
Link: https://lkml.kernel.org/r/20260214001535.435626-1-kartikey406@gmail.com
Fixes: 7fbb5e1882 ("mm: remove VM_EXEC requirement for THP eligibility")
Signed-off-by: Deepanshu Kartikey <Kartikey406@gmail.com>
Reported-by: syzbot+33a04338019ac7e43a44@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=33a04338019ac7e43a44
Tested-by: syzbot+33a04338019ac7e43a44@syzkaller.appspotmail.com
Tested-by: Lance Yang <lance.yang@linux.dev>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Barry Song <baohua@kernel.org>
Reviewed-by: Ackerley Tng <ackerleytng@google.com>
Tested-by: Ackerley Tng <ackerleytng@google.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Fangrui Song <i@maskray.me>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Nico Pache <npache@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-02-24 11:13:26 -08:00
Arnd Bergmann
eb9549346f mm: change vma_alloc_folio_noprof() macro to inline function
In a few rare configurations with extra warnings eanbled, the new
drm_pagemap_migrate_populate_ram_pfn() calls vma_alloc_folio_noprof() but
that does not use all the arguments, leading to a harmless warning:

drivers/gpu/drm/drm_pagemap.c: In function 'drm_pagemap_migrate_populate_ram_pfn':
drivers/gpu/drm/drm_pagemap.c:701:63: error: parameter 'addr' set but not used [-Werror=unused-but-set-parameter=]
  701 |                                                 unsigned long addr)
      |                                                 ~~~~~~~~~~~~~~^~~~

Replace the macro with an inline function so the compiler can see how the
argument would be used, but is still able to optimize out the assignments.

Link: https://lkml.kernel.org/r/20260216121751.2378374-1-arnd@kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Acked-by: Zi Yan <ziy@nvidia.com>
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Brendan Jackman <jackmanb@google.com>
Cc: David Hildenbrand <david@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Joshua Hahn <joshua.hahnjy@gmail.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-02-24 11:13:26 -08:00
Alexander Potapenko
09833d99db mm/kfence: disable KFENCE upon KASAN HW tags enablement
KFENCE does not currently support KASAN hardware tags.  As a result, the
two features are incompatible when enabled simultaneously.

Given that MTE provides deterministic protection and KFENCE is a
sampling-based debugging tool, prioritize the stronger hardware
protections.  Disable KFENCE initialization and free the pre-allocated
pool if KASAN hardware tags are detected to ensure the system maintains
the security guarantees provided by MTE.

Link: https://lkml.kernel.org/r/20260213095410.1862978-1-glider@google.com
Fixes: 0ce20dd840 ("mm: add Kernel Electric-Fence infrastructure")
Signed-off-by: Alexander Potapenko <glider@google.com>
Suggested-by: Marco Elver <elver@google.com>
Reviewed-by: Marco Elver <elver@google.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Ernesto Martinez Garcia <ernesto.martinezgarcia@tugraz.at>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Kees Cook <kees@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-02-24 11:13:25 -08:00
Jens Axboe
a46435537a io_uring/cmd_net: use READ_ONCE() for ->addr3 read
Any SQE read should use READ_ONCE(), to ensure the result is read once
and only once. Doesn't really matter for this case, but it's better to
keep these 100% consistent and always use READ_ONCE() for the prep side
of SQE handling.

Fixes: 5d24321e4c ("io_uring: Introduce getsockname io_uring cmd")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2026-02-24 11:38:34 -07:00
Luka Gejak
a75281626f staging: rtl8723bs: fix potential out-of-bounds read in rtw_restruct_wmm_ie
The current code checks 'i + 5 < in_len' at the end of the if statement.
However, it accesses 'in_ie[i + 5]' before that check, which can lead
to an out-of-bounds read. Move the length check to the beginning of the
conditional to ensure the index is within bounds before accessing the
array.

Fixes: 554c0a3abf ("staging: Add rtl8723bs sdio wifi driver")
Cc: stable <stable@kernel.org>
Signed-off-by: Luka Gejak <luka.gejak@linux.dev>
Reviewed-by: Dan Carpenter <dan.carpenter@linaro.org>
Link: https://patch.msgid.link/20260224132647.11642-2-luka.gejak@linux.dev
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-02-24 10:01:20 -08:00
Cheng-Yang Chou
ee0ff6690f tools/sched_ext: Add Kconfig to sync with upstream
Add the missing Kconfig file to tools/sched_ext/ as referenced in
the README.

Ref: https://github.com/sched-ext/scx/blob/main/kernel.config

Signed-off-by: Cheng-Yang Chou <yphbchou0911@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-02-24 07:51:36 -10:00
Cheng-Yang Chou
095f569332 tools/sched_ext: Sync README.md Kconfig with upstream scx
Sync the documentation with the upstream scx repository to
reflect the current recommended configuration.

Ref: https://github.com/sched-ext/scx/blob/main/README.md#build--install

Signed-off-by: Cheng-Yang Chou <yphbchou0911@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-02-24 07:51:29 -10:00
Ioana Ciornei
fe5669e363 irqchip/ls-extirq: Fix devm_of_iomap() error check
The devm_of_iomap() function returns an ERR_PTR() encoded error code on
failure. Replace the incorrect check against NULL with IS_ERR().

Fixes: 05cd654829 ("irqchip/ls-extirq: Convert to a platform driver to make it work again")
Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Reviewed-by: Herve Codina <herve.codina@bootlin.com>
Link: https://patch.msgid.link/20260224113610.1129022-3-ioana.ciornei@nxp.com
Closes: https://lore.kernel.org/all/aYXvfbfT6w0TMsXS@stanley.mountain/
2026-02-24 18:35:49 +01:00
Ioana Ciornei
e08f2adcf9 Revert "irqchip/ls-extirq: Use for_each_of_imap_item iterator"
This reverts commit 3ac6dfe3d7.

The ls-extirq uses interrupt-map but it's a non-standard use documented
in fsl,ls-extirq.yaml:

        # The driver(drivers/irqchip/irq-ls-extirq.c) have not use standard DT
        # function to parser interrupt-map. So it doesn't consider '#address-size'
        # in parent interrupt controller, such as GIC.
        #
        # When dt-binding verify interrupt-map, item data matrix is spitted at
        # incorrect position. Remove interrupt-map restriction because it always
        # wrong.

This means that by using for_each_of_imap_item and the underlying
of_irq_parse_imap_parent() on its interrupt-map property will effectively
break its functionality

Revert the patch making use of for_each_of_imap_item() in ls-extirq.

Fixes: 3ac6dfe3d7 ("irqchip/ls-extirq: Use for_each_of_imap_item iterator")
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Acked-by: Herve Codina <herve.codina@bootlin.com>
Link: https://patch.msgid.link/20260224113610.1129022-2-ioana.ciornei@nxp.com
2026-02-24 18:35:48 +01:00
Felix Gu
bfd7db781e regulator: Kconfig: fix a typo
Fixes a typo in Kconfig, HWWON -> HWMON

Signed-off-by: Felix Gu <ustc.gu@gmail.com>
Link: https://patch.msgid.link/20260224-kconfig-v1-1-b0c5459ed7a0@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-02-24 17:31:56 +00:00
Felix Gu
4baaddaa44 regulator: bq257xx: Fix device node reference leak in bq257xx_reg_dt_parse_gpio()
In bq257xx_reg_dt_parse_gpio(), if fails to get subchild, it returns
without calling of_node_put(child), causing the device node reference
leak.

Fixes: 981dd162b6 ("regulator: bq257xx: Add bq257xx boost regulator driver")
Signed-off-by: Felix Gu <ustc.gu@gmail.com>
Link: https://patch.msgid.link/20260224-bq257-v1-1-8ebbc731c1c3@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-02-24 17:31:55 +00:00
Felix Gu
0902010c8d regulator: fp9931: Fix PM runtime reference leak in fp9931_hwmon_read()
In fp9931_hwmon_read(), if regmap_read() failed, the function returned
the error code without calling pm_runtime_put_autosuspend(), causing
a PM reference leak.

Fixes: 12d821bd13 ("regulator: Add FP9931/JD9930 driver")
Signed-off-by: Felix Gu <ustc.gu@gmail.com>
Reviewed-by: Andreas Kemnade <andreas@kemnade.info>
Link: https://patch.msgid.link/20260224-fp9931-v1-1-1cf05cabef4a@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-02-24 17:31:54 +00:00
Matthew Brost
74b6e83942 drm/gpusvm: Fix drm_gpusvm_pages_valid_unlocked() kernel-doc
The kernel-doc for drm_gpusvm_pages_valid_unlocked() was stale and still
referenced old range-based arguments and naming. Update the documentation
to match the current function arguments and signature.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: https://patch.msgid.link/20260219205029.1011336-1-matthew.brost@intel.com
2026-02-24 09:25:36 -08:00
Peter Wang
62c015373e scsi: ufs: core: Move link recovery for hibern8 exit failure to wl_resume
Move the link recovery trigger from ufshcd_uic_pwr_ctrl() to
__ufshcd_wl_resume(). Ensure link recovery is only attempted when hibern8
exit fails during resume, not during hibern8 enter in suspend. Improve
error handling and prevent unnecessary link recovery attempts.

Fixes: 35dabf4503 ("scsi: ufs: core: Use link recovery when h8 exit fails during runtime resume")
Signed-off-by: Peter Wang <peter.wang@mediatek.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Link: https://patch.msgid.link/20260223103906.2533654-1-peter.wang@mediatek.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2026-02-24 12:14:22 -05:00
Peter Wang
30df81f222 scsi: ufs: core: Fix possible NULL pointer dereference in ufshcd_add_command_trace()
The kernel log indicates a crash in ufshcd_add_command_trace, due to a NULL
pointer dereference when accessing hwq->id.  This can happen if
ufshcd_mcq_req_to_hwq() returns NULL.

This patch adds a NULL check for hwq before accessing its id field to
prevent a kernel crash.

Kernel log excerpt:
[<ffffffd5d192dc4c>] notify_die+0x4c/0x8c
[<ffffffd5d1814e58>] __die+0x60/0xb0
[<ffffffd5d1814d64>] die+0x4c/0xe0
[<ffffffd5d181575c>] die_kernel_fault+0x74/0x88
[<ffffffd5d1864db4>] __do_kernel_fault+0x314/0x318
[<ffffffd5d2a3cdf8>] do_page_fault+0xa4/0x5f8
[<ffffffd5d2a3cd34>] do_translation_fault+0x34/0x54
[<ffffffd5d1864524>] do_mem_abort+0x50/0xa8
[<ffffffd5d2a297dc>] el1_abort+0x3c/0x64
[<ffffffd5d2a29718>] el1h_64_sync_handler+0x44/0xcc
[<ffffffd5d181133c>] el1h_64_sync+0x80/0x88
[<ffffffd5d255c1dc>] ufshcd_add_command_trace+0x23c/0x320
[<ffffffd5d255bad8>] ufshcd_compl_one_cqe+0xa4/0x404
[<ffffffd5d2572968>] ufshcd_mcq_poll_cqe_lock+0xac/0x104
[<ffffffd5d11c7460>] ufs_mtk_mcq_intr+0x54/0x74 [ufs_mediatek_mod]
[<ffffffd5d19ab92c>] __handle_irq_event_percpu+0xc8/0x348
[<ffffffd5d19abca8>] handle_irq_event+0x3c/0xa8
[<ffffffd5d19b1f0c>] handle_fasteoi_irq+0xf8/0x294
[<ffffffd5d19aa778>] generic_handle_domain_irq+0x54/0x80
[<ffffffd5d18102bc>] gic_handle_irq+0x1d4/0x330
[<ffffffd5d1838210>] call_on_irq_stack+0x44/0x68
[<ffffffd5d183af30>] do_interrupt_handler+0x78/0xd8
[<ffffffd5d2a29c00>] el1_interrupt+0x48/0xa8
[<ffffffd5d2a29ba8>] el1h_64_irq_handler+0x14/0x24
[<ffffffd5d18113c4>] el1h_64_irq+0x80/0x88
[<ffffffd5d2527fb4>] arch_local_irq_enable+0x4/0x1c
[<ffffffd5d25282e4>] cpuidle_enter+0x34/0x54
[<ffffffd5d195a678>] do_idle+0x1dc/0x2f8
[<ffffffd5d195a7c4>] cpu_startup_entry+0x30/0x3c
[<ffffffd5d18155c4>] secondary_start_kernel+0x134/0x1ac
[<ffffffd5d18640bc>] __secondary_switched+0xc4/0xcc

Signed-off-by: Peter Wang <peter.wang@mediatek.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Link: https://patch.msgid.link/20260223065657.2432447-1-peter.wang@mediatek.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2026-02-24 12:13:15 -05:00
Wei Liu
f69cfd8e8f x86/hyperv: print out reserved vectors in hexadecimal
Signed-off-by: Wei Liu <wei.liu@kernel.org>
2026-02-24 17:10:44 +00:00
Shawn Guo
56e0a83827 MAINTAINERS: Update Shawn Guo's address for HiSilicon PCIe controller driver
Shawn is no longer with Linaro.  Use his korg email address instead.

Signed-off-by: Shawn Guo <shawnguo@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Link: https://patch.msgid.link/20260224075753.122091-1-shawnguo@kernel.org
2026-02-24 10:38:59 -06:00
Alexei Starovoitov
8feedae96f Merge branch 'selftests-bpf-fixes-for-userspace-asan'
Ihor Solodrai says:

====================
selftests/bpf: Fixes for userspace ASAN

This series includes various fixes aiming to enable test_progs run
with userspace address sanitizer on BPF CI.

The first five patches add a simplified implementation of strscpy() to
selftests/bpf and then replace strcpy/strncpy usages across the tests
with it. See relevant discussions [1][2].

Patch #6 fixes the selftests/bpf/test_progs build with:

    SAN_CFLAGS="-fsanitize=address -fno-omit-frame-pointer"

The subsequent patches fix bugs reported by the address sanitizer on
attempt to run the tests.

[1] https://lore.kernel.org/bpf/CAADnVQ+9uw2_o388j43EWiAPdMB=3FLx2jq-9zRSvqrv-wgRag@mail.gmail.com/
[2] https://lore.kernel.org/bpf/20260220182011.802116-1-ihor.solodrai@linux.dev/
---

v3->v4:
  - combine strscpy and ASAN series into one (Alexei)
  - make the count arg of strscpy() optional via macro and fixup
    relevant call sites (Alexei)
  - remove strscpy_cat() from this series (Alexei)

v3: https://lore.kernel.org/bpf/20260220222604.1155148-1-ihor.solodrai@linux.dev/

v2->v3:
  - rebase on top of "selftests/bpf: Add and use strscpy()"
    - https://lore.kernel.org/bpf/20260220182011.802116-1-ihor.solodrai@linux.dev/
  - uprobe_multi_test.c: memset static struct child at the beginning
    of a test *and* zero out child->thread in release_child (patch #9,
    Mykyta)
  - nits in test_sysctl.c (patch #11, Eduard)
  - bpftool_helpers.c: update to use strscpy (patch #14, Alexei)
  - add __asan_on_error handler to still dump test logs even with ASAN
    build (patch #15, Mykyta)

v2: https://lore.kernel.org/bpf/20260218003041.1156774-1-ihor.solodrai@linux.dev/

v1->v2:
  - rebase on bpf (v1 was targeting bpf-next)
  - add ASAN flag handling in selftests/bpf/Makefile (Eduard)
  - don't override SIGSEGV handler in test_progs with ASAN (Eduard)
  - add error messages in detect_bpftool_path (Mykyta)
  - various nits (Eduard, Jiri, Mykyta, Alexis)

v1: https://lore.kernel.org/bpf/20260212011356.3266753-1-ihor.solodrai@linux.dev/
====================

Link: https://patch.msgid.link/20260223190736.649171-1-ihor.solodrai@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-02-24 08:19:50 -08:00
Ihor Solodrai
4c9d07865c selftests/bpf: Don't override SIGSEGV handler with ASAN
test_progs has custom SIGSEGV handler, which interferes with the
address sanitizer [1]. Add an #ifndef to avoid this.

Additionally, declare an __asan_on_error() to dump the test logs in
the same way it happens in the custom SIGSEGV handler.

[1] https://lore.kernel.org/bpf/73d832948b01dbc0ebc60d85574bdf8537f3a810.camel@gmail.com/

Acked-by: Mykyta Yatsenko <yatsenko@meta.com>
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
Link: https://lore.kernel.org/r/20260223191118.655185-3-ihor.solodrai@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-02-24 08:19:49 -08:00
Ihor Solodrai
a2714e7303 selftests/bpf: Check BPFTOOL env var in detect_bpftool_path()
The bpftool_maps_access and bpftool_metadata tests may fail on BPF CI
with "command not found", depending on a workflow.
This happens because detect_bpftool_path() only checks two hardcoded
relative paths:
  - ./tools/sbin/bpftool
  - ../tools/sbin/bpftool

Add support for a BPFTOOL environment variable that allows specifying
the exact path to the bpftool binary.

Acked-by: Mykyta Yatsenko <yatsenko@meta.com>
Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
Link: https://lore.kernel.org/r/20260223191118.655185-2-ihor.solodrai@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-02-24 08:19:49 -08:00
Ihor Solodrai
ad90ecedad selftests/bpf: Fix out-of-bounds array access bugs reported by ASAN
- kmem_cache_iter: remove unnecessary debug output
- lwt_seg6local: change the type of foobar to char[]
  - the sizeof(foobar) returned the pointer size and not a string
    length as intended
- verifier_log: increase prog_name buffer size in verif_log_subtest()
  - compiler has a conservative estimate of fixed_log_sz value, making
    ASAN complain on snprint() call

Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
Link: https://lore.kernel.org/r/20260223191118.655185-1-ihor.solodrai@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-02-24 08:19:49 -08:00
Ihor Solodrai
3e711c8e47 selftests/bpf: Fix array bounds warning in jit_disasm_helpers
Compiler cannot infer upper bound for labels.cnt and warns about
potential buffer overflow in snprintf. Add an explicit bounds
check (... && i < MAX_LOCAL_LABELS) in the loop condition to fix the
warning.

Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
Link: https://lore.kernel.org/r/20260223190736.649171-18-ihor.solodrai@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-02-24 08:19:49 -08:00
Ihor Solodrai
2bb270a0ac selftests/bpf: Free bpf_object in test_sysctl
ASAN reported a resource leak due to the bpf_object not being tracked
in test_sysctl. Add obj field to struct sysctl_test to properly clean
it up.

Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
Link: https://lore.kernel.org/r/20260223190736.649171-17-ihor.solodrai@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-02-24 08:19:49 -08:00
Ihor Solodrai
71dca2950f selftests/bpf: Fix resource leaks caused by missing cleanups
ASAN reported a number of resource leaks:
  - Add missing  *__destroy(skel) calls
  - Replace bpf_link__detach() with bpf_link__destroy() where appropriate
  - cgrp_local_storage: Add bpf_link__destroy() when bpf_iter_create fails
  - dynptr: Add missing bpf_object__close()

Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
Link: https://lore.kernel.org/r/20260223190736.649171-16-ihor.solodrai@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-02-24 08:19:49 -08:00
Ihor Solodrai
68b8bea1ee selftests/bpf: Fix double thread join in uprobe_multi_test
ASAN reported a "joining already joined thread" error. The
release_child() may be called multiple times for the same struct
child.

Fix by resetting child->thread to 0 after pthread_join.

Also memset(0) static child variable in test_attach_api().

Acked-by: Mykyta Yatsenko <yatsenko@meta.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
Link: https://lore.kernel.org/r/20260223190736.649171-15-ihor.solodrai@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-02-24 08:19:49 -08:00
Ihor Solodrai
f7ac552be7 selftests/bpf: Fix use-after-free in xdp_metadata test
ASAN reported a use-after-free in close_xsk().

The xsk->socket internally references xsk->umem via socket->ctx->umem,
so the socket must be deleted before the umem. Fix the order of
operations in close_xsk().

Acked-by: Mykyta Yatsenko <yatsenko@meta.com>
Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
Link: https://lore.kernel.org/r/20260223190736.649171-14-ihor.solodrai@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-02-24 08:19:49 -08:00
Ihor Solodrai
ff9511410d veristat: Fix a memory leak for preset ENUMERATOR
ASAN detected a memory leak in veristat. The cleanup code handling
ENUMERATOR value missed freeing strdup-ed svalue. Fix it.

Acked-by: Mykyta Yatsenko <yatsenko@meta.com>
Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
Link: https://lore.kernel.org/r/20260223190736.649171-13-ihor.solodrai@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-02-24 08:19:49 -08:00
Ihor Solodrai
3eb4a2e399 selftests/bpf: Fix cleanup in check_fd_array_cnt__fd_array_too_big()
The Close() macro uses the passed in expression three times, which
leads to repeated execution in case it has side effects. That is,
Close(i--) would decrement i three times.

ASAN caught a stack-buffer-undeflow error at a point where this was
overlooked. Fix it.

Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
Link: https://lore.kernel.org/r/20260223190736.649171-12-ihor.solodrai@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-02-24 08:19:49 -08:00
Ihor Solodrai
9d0272c91f selftests/bpf: Fix memory leaks in tests
Fix trivial memory leaks detected by userspace ASAN:
  - htab_update: free value buffer in test_reenter_update cleanup
  - test_xsk: inline pkt_stream_replace() in testapp_stats_rx_full()
    and testapp_stats_fill_empty()
  - testing_helpers: free buffer allocated by getline() in
    parse_test_list_file

Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
Link: https://lore.kernel.org/r/20260223190736.649171-11-ihor.solodrai@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-02-24 08:19:49 -08:00
Ihor Solodrai
a1a771bd64 selftests/bpf: Refactor bpf_get_ksyms() trace helper
ASAN reported a memory leak in bpf_get_ksyms(): it allocates a struct
ksyms internally and never frees it.

Move struct ksyms to trace_helpers.h and return it from the
bpf_get_ksyms(), giving ownership to the caller. Add filtered_syms and
filtered_cnt fields to the ksyms to hold the filtered array of
symbols, previously returned by bpf_get_ksyms().

Fixup the call sites: kprobe_multi_test and bench_trigger.

Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20260223190736.649171-10-ihor.solodrai@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-02-24 08:19:49 -08:00
Ihor Solodrai
45897ced3c selftests/bpf: Add DENYLIST.asan
Add a denylist file for tests that should be skipped when built with
userspace ASAN:

    $ make ... SAN_CFLAGS="-fsanitize=address -fno-omit-frame-pointer"

Skip the following tests:

- *arena*: userspace ASAN does not understand BPF arena maps and gets
  confused particularly when map_extra is non-zero
  - non-zero map_extra leads to mmap with MAP_FIXED, and ASAN treats
    this as an unknown memory region
- task_local_data: ASAN complains about "incorrect" aligned_alloc()
  usage, but it's intentional in the test
- uprobe_multi_test: very slow with ASAN enabled

Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
Link: https://lore.kernel.org/r/20260223190736.649171-9-ihor.solodrai@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-02-24 08:19:49 -08:00
Ihor Solodrai
c5c1e31349 resolve_btfids: Fix memory leaks reported by ASAN
Running resolve_btfids with ASAN reveals memory leaks in btf_id
handling.

- Change get_id() to use a local buffer
- Make btf_id__add() strdup the name internally
- Add btf_id__free_all() that frees all nodese of a tree
- Call the cleanup function on exit for every tree

Acked-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
Link: https://lore.kernel.org/r/20260223190736.649171-8-ihor.solodrai@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-02-24 08:19:49 -08:00
Ihor Solodrai
4021848a90 selftests/bpf: Pass through build flags to bpftool and resolve_btfids
EXTRA_* and SAN_* build flags were not correctly propagated to bpftool
and resolve_btids when building selftests/bpf. This led to various
build errors on attempt to build with SAN_CFLAGS="-fsanitize=address",
for example.

Fix the makefiles to address this:
  - Pass SAN_CFLAGS/SAN_LDFLAGS to bpftool and resolve_btfids build
  - Propagate EXTRA_LDFLAGS to resolve_btfids link command
  - Use pkg-config to detect zlib and zstd for resolve_btfids, similar
    libelf handling

Also check for ASAN flag in selftests/bpf/Makefile for convenience.

Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
Link: https://lore.kernel.org/r/20260223190736.649171-7-ihor.solodrai@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-02-24 08:19:49 -08:00
Ihor Solodrai
9d8685239e selftests/bpf: Use memcpy() for bounded non-NULL-terminated copies
Replace strncpy() with memcpy() in cases where the source is
non-NULL-terminated and the copy length is known.

Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
Link: https://lore.kernel.org/r/20260223190736.649171-6-ihor.solodrai@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-02-24 08:19:49 -08:00
Ihor Solodrai
3ed0bc2d49 selftests/bpf: Use strscpy in bpftool_helpers.c
Replace strncpy() calls in bpftool_helpers.c with strscpy().

Pass the destination buffer size to detect_bpftool_path() instead of
hardcoding BPFTOOL_PATH_MAX_LEN.

Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
Link: https://lore.kernel.org/r/20260223190736.649171-5-ihor.solodrai@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-02-24 08:19:48 -08:00
Ihor Solodrai
43277d8f57 selftests/bpf: Replace strncpy() with strscpy()
strncpy() does not guarantee NULL-termination and is considered
deprecated [1]. Replace strncpy() calls with strscpy().

[1] https://docs.kernel.org/process/deprecated.html#strncpy-on-nul-terminated-strings

Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
Link: https://lore.kernel.org/r/20260223190736.649171-4-ihor.solodrai@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-02-24 08:19:33 -08:00
Alison Schofield
e46f25f5a8 cxl/region: Test CXL_DECODER_F_NORMALIZED_ADDRESSING as a bitmask
The CXL decoder flags are defined as bitmasks, not bit indices.
Using test_bit() to check them interprets the mask value as a bit
index, which is the wrong test.

For CXL_DECODER_F_NORMALIZED_ADDRESSING the test reads beyond the
flags word, making the flag sometimes appear set and blocking creation
of CXL region debugfs attributes that support poison operations.

Replace test_bit() with a bitmask check.

Found with cxl-test.

Fixes: 208f432406 ("cxl: Disable HPA/SPA translation handlers for Normalized Addressing")
Signed-off-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Gregory Price <gourry@gourry.net>
Tested-by: Gregory Price <gourry@gourry.net>
Link: https://patch.msgid.link/63fe4a6203e40e404347f1cdc7a1c55cb4959b86.1771873256.git.alison.schofield@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2026-02-24 08:33:30 -07:00
Alison Schofield
0a70b7cd39 cxl: Test CXL_DECODER_F_LOCK as a bitmask
The CXL decoder flags are defined as bitmasks, not bit indices.
Using test_bit() to check them interprets the mask value as a bit
index, which is the wrong test.

For CXL_DECODER_F_LOCK the test reads beyond the defined bits, causing
the test to always return false and allowing resets that should have
been blocked.

Replace test_bit() with a bitmask check.

Fixes: 2230c4bdc4 ("cxl: Add handling of locked CXL decoder")
Signed-off-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Gregory Price <gourry@gourry.net>
Tested-by: Gregory Price <gourry@gourry.net>
Link: https://patch.msgid.link/98851c4770e4631753cf9f75b58a3a6daeca2ea2.1771873256.git.alison.schofield@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2026-02-24 08:33:30 -07:00
Davidlohr Bueso
60b5d1f683 cxl/mbox: validate payload size before accessing contents in cxl_payload_from_user_allowed()
cxl_payload_from_user_allowed() casts and dereferences the input
payload without first verifying its size. When a raw mailbox command
is sent with an undersized payload (ie: 1 byte for CXL_MBOX_OP_CLEAR_LOG,
which expects a 16-byte UUID), uuid_equal() reads past the allocated buffer,
triggering a KASAN splat:

BUG: KASAN: slab-out-of-bounds in memcmp+0x176/0x1d0 lib/string.c:683
Read of size 8 at addr ffff88810130f5c0 by task syz.1.62/2258

CPU: 2 UID: 0 PID: 2258 Comm: syz.1.62 Not tainted 6.19.0-dirty #3 PREEMPT(voluntary)
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.17.0-0-gb52ca86e094d-prebuilt.qemu.org 04/01/2014
Call Trace:
 <TASK>
 __dump_stack lib/dump_stack.c:94 [inline]
 dump_stack_lvl+0xab/0xe0 lib/dump_stack.c:120
 print_address_description mm/kasan/report.c:378 [inline]
 print_report+0xce/0x650 mm/kasan/report.c:482
 kasan_report+0xce/0x100 mm/kasan/report.c:595
 memcmp+0x176/0x1d0 lib/string.c:683
 uuid_equal include/linux/uuid.h:73 [inline]
 cxl_payload_from_user_allowed drivers/cxl/core/mbox.c:345 [inline]
 cxl_mbox_cmd_ctor drivers/cxl/core/mbox.c:368 [inline]
 cxl_validate_cmd_from_user drivers/cxl/core/mbox.c:522 [inline]
 cxl_send_cmd+0x9c0/0xb50 drivers/cxl/core/mbox.c:643
 __cxl_memdev_ioctl drivers/cxl/core/memdev.c:698 [inline]
 cxl_memdev_ioctl+0x14f/0x190 drivers/cxl/core/memdev.c:713
 vfs_ioctl fs/ioctl.c:51 [inline]
 __do_sys_ioctl fs/ioctl.c:597 [inline]
 __se_sys_ioctl fs/ioctl.c:583 [inline]
 __x64_sys_ioctl+0x18e/0x210 fs/ioctl.c:583
 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
 do_syscall_64+0xa8/0x330 arch/x86/entry/syscall_64.c:94
 entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7fdaf331ba79
Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007fdaf1d77038 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
RAX: ffffffffffffffda RBX: 00007fdaf3585fa0 RCX: 00007fdaf331ba79
RDX: 00002000000001c0 RSI: 00000000c030ce02 RDI: 0000000000000003
RBP: 00007fdaf33749df R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
R13: 00007fdaf3586038 R14: 00007fdaf3585fa0 R15: 00007ffced2af768
 </TASK>

Add 'in_size' parameter to cxl_payload_from_user_allowed() and validate
the payload is large enough.

Fixes: 6179045ccc ("cxl/mbox: Block immediate mode in SET_PARTITION_INFO command")
Fixes: 206f9fa9d5 ("cxl/mbox: Add Clear Log mailbox command")
Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Link: https://patch.msgid.link/20260220001618.963490-2-dave@stgolabs.net
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2026-02-24 08:33:29 -07:00
Dave Jiang
96a1fd0d84 cxl: Fix race of nvdimm_bus object when creating nvdimm objects
Found issue during running of cxl-translate.sh unit test. Adding a 3s
sleep right before the test seems to make the issue reproduce fairly
consistently. The cxl_translate module has dependency on cxl_acpi and
causes orphaned nvdimm objects to reprobe after cxl_acpi is removed.
The nvdimm_bus object is registered by the cxl_nvb object when
cxl_acpi_probe() is called. With the nvdimm_bus object missing,
__nd_device_register() will trigger NULL pointer dereference when
accessing the dev->parent that points to &nvdimm_bus->dev.

[  192.884510] BUG: kernel NULL pointer dereference, address: 000000000000006c
[  192.895383] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS edk2-20250812-19.fc42 08/12/2025
[  192.897721] Workqueue: cxl_port cxl_bus_rescan_queue [cxl_core]
[  192.899459] RIP: 0010:kobject_get+0xc/0x90
[  192.924871] Call Trace:
[  192.925959]  <TASK>
[  192.926976]  ? pm_runtime_init+0xb9/0xe0
[  192.929712]  __nd_device_register.part.0+0x4d/0xc0 [libnvdimm]
[  192.933314]  __nvdimm_create+0x206/0x290 [libnvdimm]
[  192.936662]  cxl_nvdimm_probe+0x119/0x1d0 [cxl_pmem]
[  192.940245]  cxl_bus_probe+0x1a/0x60 [cxl_core]
[  192.943349]  really_probe+0xde/0x380

This patch also relies on the previous change where
devm_cxl_add_nvdimm_bridge() is called from drivers/cxl/pmem.c instead
of drivers/cxl/core.c to ensure the dependency of cxl_acpi on cxl_pmem.

1. Set probe_type of cxl_nvb to PROBE_FORCE_SYNCHRONOUS to ensure the
   driver is probed synchronously when add_device() is called.
2. Add a check in __devm_cxl_add_nvdimm_bridge() to ensure that the
   cxl_nvb driver is attached during cxl_acpi_probe().
3. Take the cxl_root uport_dev lock and the cxl_nvb->dev lock in
   devm_cxl_add_nvdimm() before checking nvdimm_bus is valid.
4. Set cxl_nvdimm flag to CXL_NVD_F_INVALIDATED so cxl_nvdimm_probe()
   will exit with -EBUSY.

The removal of cxl_nvdimm devices should prevent any orphaned devices
from probing once the nvdimm_bus is gone.

[ dj: Fixed 0-day reported kdoc issue. ]
[ dj: Fix cxl_nvb reference leak on error. Gregory (kreview-0811365) ]

Suggested-by: Dan Williams <dan.j.williams@intel.com>
Fixes: 8fdcb1704f ("cxl/pmem: Add initial infrastructure for pmem support")
Tested-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Alison Schofield <alison.schofield@intel.com?>
Link: https://patch.msgid.link/20260205001633.1813643-3-dave.jiang@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2026-02-24 08:33:21 -07:00
Bart Van Assche
07ed4f05bb hwmon: (it87) Check the it87_lock() return value
Return early in it87_resume() if it87_lock() fails instead of ignoring the
return value of that function. This patch suppresses a Clang thread-safety
warning.

Cc: Frank Crawford <frank@crawford.emu.id.au>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Jean Delvare <jdelvare@suse.com>
Cc: linux-hwmon@vger.kernel.org
Fixes: 376e1a937b ("hwmon: (it87) Add calls to smbus_enable/smbus_disable as required")
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20260223220102.2158611-15-bart.vanassche@linux.dev
[groeck: Declare 'ret' at the beginning of it87_resume()]
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
2026-02-24 07:31:35 -08:00
Andy Shevchenko
297318a1c2 spi: dt-bindings: snps,dw-abp-ssi: Remove unused bindings
As stated in the da0a672268 ("spi: dw: Remove not-going-to-be-supported
code for Baikal SoC") the Baikal platforms are not supported and
the respective driver code was removed. Remove the currently unused bindings.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://patch.msgid.link/20260224115218.3499222-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-02-24 15:30:12 +00:00
Jeff Layton
364410170a nfsd: report the requested maximum number of threads instead of number running
The current netlink and /proc interfaces deviate from their traditional
values when dynamic threading is enabled, and there is currently no way
to know what the current setting is. This patch brings the reporting
back in line with traditional behavior.

Make these interfaces report the requested maximum number of threads
instead of the number currently running. Also, update documentation and
comments to reflect that this value represents a maximum and not the
number currently running.

Fixes: d8316b837c ("nfsd: add controls to set the minimum number of threads per pool")
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
2026-02-24 10:27:51 -05:00
Michael Walle
e710b22837 Revert "hwmon: add SMARC-sAM67 support"
This reverts commit 443b39c82c.

I was just informed that this product is discontinued (without being
ever released to the market). Pull the plug and let's not waste any more
maintainers time.

Signed-off-by: Michael Walle <mwalle@kernel.org>
Link: https://lore.kernel.org/r/20260223100459.844967-4-mwalle@kernel.org
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
2026-02-24 07:25:26 -08:00
Sofia Schneider
5ede902062 ACPI: OSI: Add DMI quirk for Acer Aspire One D255
The screen backlight turns off during boot (specifically during udev device
initialization) when returning true for _OSI("Windows 2009").

Analyzing the device's DSDT reveals that the firmware takes a different
code path when Windows 7 is reported, which leads to the backlight shutoff.
Add a DMI quirk to invoke dmi_disable_osi_win7 for this model.

Signed-off-by: Sofia Schneider <sofia@schn.dev>
Link: https://patch.msgid.link/20260223025240.518509-1-sofia@schn.dev
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2026-02-24 15:43:51 +01:00
David Arcari
ab39cc4cb8 cpufreq: intel_pstate: Fix NULL pointer dereference in update_cpu_qos_request()
The update_cpu_qos_request() function attempts to initialize the 'freq'
variable by dereferencing 'cpudata' before verifying if the 'policy'
is valid.

This issue occurs on systems booted with the "nosmt" parameter, where
all_cpu_data[cpu] is NULL for the SMT sibling threads. As a result,
any call to update_qos_requests() will result in a NULL pointer
dereference as the code will attempt to access pstate.turbo_freq using
the NULL cpudata pointer.

Also, pstate.turbo_freq may be updated by intel_pstate_get_hwp_cap()
after initializing the 'freq' variable, so it is better to defer the
'freq' until intel_pstate_get_hwp_cap() has been called.

Fix this by deferring the 'freq' assignment until after the policy and
driver_data have been validated.

Fixes: ae1bdd23b9 ("cpufreq: intel_pstate: Adjust frequency percentage computations")
Reported-by: Jirka Hladky <jhladky@redhat.com>
Closes: https://lore.kernel.org/all/CAE4VaGDfiPvz3AzrwrwM4kWB3SCkMci25nPO8W1JmTBd=xHzZg@mail.gmail.com/
Signed-off-by: David Arcari <darcari@redhat.com>
Cc: 6.18+ <stable@vger.kernel.org> # 6.18+
[ rjw: Added one paragraph to the changelog ]
Link: https://patch.msgid.link/20260224122106.228116-1-darcari@redhat.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2026-02-24 15:38:16 +01:00
Paolo Abeni
1348659dc9 Merge tag 'for-net-2026-02-23' of git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth
Luiz Augusto von Dentz says:

====================
bluetooth pull request for net:

 - purge error queues in socket destructors
 - hci_sync: Fix CIS host feature condition
 - L2CAP: Fix invalid response to L2CAP_ECRED_RECONF_REQ
 - L2CAP: Fix result of L2CAP_ECRED_CONN_RSP when MTU is too short
 - L2CAP: Fix response to L2CAP_ECRED_CONN_REQ
 - L2CAP: Fix not checking output MTU is acceptable on L2CAP_ECRED_CONN_REQ
 - L2CAP: Fix missing key size check for L2CAP_LE_CONN_REQ
 - hci_qca: Cleanup on all setup failures

* tag 'for-net-2026-02-23' of git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth:
  Bluetooth: L2CAP: Fix missing key size check for L2CAP_LE_CONN_REQ
  Bluetooth: L2CAP: Fix not checking output MTU is acceptable on L2CAP_ECRED_CONN_REQ
  Bluetooth: Fix CIS host feature condition
  Bluetooth: L2CAP: Fix response to L2CAP_ECRED_CONN_REQ
  Bluetooth: hci_qca: Cleanup on all setup failures
  Bluetooth: purge error queues in socket destructors
  Bluetooth: L2CAP: Fix result of L2CAP_ECRED_CONN_RSP when MTU is too short
  Bluetooth: L2CAP: Fix invalid response to L2CAP_ECRED_RECONF_REQ
====================

Link: https://patch.msgid.link/20260223211634.3800315-1-luiz.dentz@gmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-02-24 15:03:08 +01:00
Shyam Sundar S K
fb73d0e19f MAINTAINERS: Update AMD XGBE driver maintainers
Due to additional responsibilities, Shyam Sundar S K will no longer be
supporting the AMD XGBE driver. Maintenance will be handled by
Raju Rangoju going forward.

Cc: Raju Rangoju <Raju.Rangoju@amd.com>
Signed-off-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
Link: https://patch.msgid.link/20260223074020.1987884-1-Shyam-sundar.S-k@amd.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-02-24 13:31:49 +01:00
Danilo Krummrich
78437ab3b7 clk: scu/imx8qxp: do not register driver in probe()
imx_clk_scu_init() registers the imx_clk_scu_driver while commonly being
called from IMX driver's probe() callbacks.

However, it neither makes sense to register drivers from probe()
callbacks of other drivers, nor does the driver core allow registering
drivers with a device lock already being held.

The latter was revealed by commit dc23806a7c ("driver core: enforce
device_lock for driver_match_device()") leading to a deadlock condition
described in [1].

Besides that, nothing seems to unregister the imx_clk_scu_driver once
the corresponding driver module is unloaded, which leaves the
driver-core with a dangling pointer.

Also, if there are multiple matching devices for the imx8qxp_clk_driver,
imx8qxp_clk_probe() calls imx_clk_scu_init() multiple times.  However,
any subsequent call after the first one will fail, since the driver-core
does not allow to register the same struct platform_driver multiple
times.

Hence, register the imx_clk_scu_driver from module_init() and unregister
it in module_exit().

Note that we first register the imx8qxp_clk_driver and then call
imx_clk_scu_module_init() to avoid having to call
imx_clk_scu_module_exit() in the unwind path of imx8qxp_clk_init().

Fixes: dc23806a7c ("driver core: enforce device_lock for driver_match_device()")
Fixes: 220175cd39 ("clk: imx: scu: fix build break when compiled as modules")
Reported-by: Alexander Stein <alexander.stein@ew.tq-group.com>
Closes: https://lore.kernel.org/lkml/13955113.uLZWGnKmhe@steina-w/
Tested-by: Alexander Stein <alexander.stein@ew.tq-group.com> # TQMa8x/MBa8x
Link: https://lore.kernel.org/lkml/DFU7CEPUSG9A.1KKGVW4HIPMSH@kernel.org/ [1]
Acked-by: Abel Vesa <abelvesa@kernel.org>
Reviewed-by: Daniel Baluta <daniel.baluta@nxp.com>
Link: https://patch.msgid.link/20260212235842.85934-1-dakr@kernel.org
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
2026-02-24 12:54:17 +01:00
Andrew Lunn
c8dbdc6e38 net: phy: register phy led_triggers during probe to avoid AB-BA deadlock
There is an AB-BA deadlock when both LEDS_TRIGGER_NETDEV and
LED_TRIGGER_PHY are enabled:

[ 1362.049207] [<8054e4b8>] led_trigger_register+0x5c/0x1fc             <-- Trying to get lock "triggers_list_lock" via down_write(&triggers_list_lock);
[ 1362.054536] [<80662830>] phy_led_triggers_register+0xd0/0x234
[ 1362.060329] [<8065e200>] phy_attach_direct+0x33c/0x40c
[ 1362.065489] [<80651fc4>] phylink_fwnode_phy_connect+0x15c/0x23c
[ 1362.071480] [<8066ee18>] mtk_open+0x7c/0xba0
[ 1362.075849] [<806d714c>] __dev_open+0x280/0x2b0
[ 1362.080384] [<806d7668>] __dev_change_flags+0x244/0x24c
[ 1362.085598] [<806d7698>] dev_change_flags+0x28/0x78
[ 1362.090528] [<807150e4>] dev_ioctl+0x4c0/0x654                       <-- Hold lock "rtnl_mutex" by calling rtnl_lock();
[ 1362.094985] [<80694360>] sock_ioctl+0x2f4/0x4e0
[ 1362.099567] [<802e9c4c>] sys_ioctl+0x32c/0xd8c
[ 1362.104022] [<80014504>] syscall_common+0x34/0x58

Here LED_TRIGGER_PHY is registering LED triggers during phy_attach
while holding RTNL and then taking triggers_list_lock.

[ 1362.191101] [<806c2640>] register_netdevice_notifier+0x60/0x168      <-- Trying to get lock "rtnl_mutex" via rtnl_lock();
[ 1362.197073] [<805504ac>] netdev_trig_activate+0x194/0x1e4
[ 1362.202490] [<8054e28c>] led_trigger_set+0x1d4/0x360                 <-- Hold lock "triggers_list_lock" by down_read(&triggers_list_lock);
[ 1362.207511] [<8054eb38>] led_trigger_write+0xd8/0x14c
[ 1362.212566] [<80381d98>] sysfs_kf_bin_write+0x80/0xbc
[ 1362.217688] [<8037fcd8>] kernfs_fop_write_iter+0x17c/0x28c
[ 1362.223174] [<802cbd70>] vfs_write+0x21c/0x3c4
[ 1362.227712] [<802cc0c4>] ksys_write+0x78/0x12c
[ 1362.232164] [<80014504>] syscall_common+0x34/0x58

Here LEDS_TRIGGER_NETDEV is being enabled on an LED. It first takes
triggers_list_lock and then RTNL. A classical AB-BA deadlock.

phy_led_triggers_registers() does not require the RTNL, it does not
make any calls into the network stack which require protection. There
is also no requirement the PHY has been attached to a MAC, the
triggers only make use of phydev state. This allows the call to
phy_led_triggers_registers() to be placed elsewhere. PHY probe() and
release() don't hold RTNL, so solving the AB-BA deadlock.

Reported-by: Shiji Yang <yangshiji66@outlook.com>
Closes: https://lore.kernel.org/all/OS7PR01MB13602B128BA1AD3FA38B6D1FFBC69A@OS7PR01MB13602.jpnprd01.prod.outlook.com/
Fixes: 06f502f57d ("leds: trigger: Introduce a NETDEV trigger")
Cc: stable@vger.kernel.org
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Tested-by: Shiji Yang <yangshiji66@outlook.com>
Link: https://patch.msgid.link/20260222152601.1978655-1-andrew@lunn.ch
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-02-24 12:39:54 +01:00
Christian Brauner
4a1ddb0f1c pidfs: avoid misleading break
The break would only break out of the scoped_guard() loop, not the
switch statement. It still works correct as is ofc but let's avoid the
confusion.

Reported-by: David Lechner <dlechner@baylibre.com>
Link:: https://lore.kernel.org/cd2153f1-098b-463c-bbc1-5c6ca9ef1f12@baylibre.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
2026-02-24 12:09:00 +01:00
Ziyi Guo
3d7e6ce34f net: usb: pegasus: enable basic endpoint checking
pegasus_probe() fills URBs with hardcoded endpoint pipes without
verifying the endpoint descriptors:

  - usb_rcvbulkpipe(dev, 1) for RX data
  - usb_sndbulkpipe(dev, 2) for TX data
  - usb_rcvintpipe(dev, 3)  for status interrupts

A malformed USB device can present these endpoints with transfer types
that differ from what the driver assumes.

Add a pegasus_usb_ep enum for endpoint numbers, replacing magic
constants throughout. Add usb_check_bulk_endpoints() and
usb_check_int_endpoints() calls before any resource allocation to
verify endpoint types before use, rejecting devices with mismatched
descriptors at probe time, and avoid triggering assertion.

Similar fix to
- commit 90b7f29617 ("net: usb: rtl8150: enable basic endpoint checking")
- commit 9e7021d2ae ("net: usb: catc: enable basic endpoint checking")

Fixes: 1da177e4c3 ("Linux-2.6.12-rc2")
Signed-off-by: Ziyi Guo <n7l8m4@u.northwestern.edu>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20260222050633.410165-1-n7l8m4@u.northwestern.edu
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-02-24 11:51:51 +01:00
Ferry Meng
bf4fde7db4 erofs: remove more unnecessary #ifdefs
Many #ifdefs can be replaced with IS_ENABLED() to improve code
readability.  No functional changes.

Signed-off-by: Ferry Meng <mengferry@linux.alibaba.com>
Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
2026-02-24 18:36:52 +08:00
Sebastian Andrzej Siewior
983512f3a8 net: Drop the lock in skb_may_tx_timestamp()
skb_may_tx_timestamp() may acquire sock::sk_callback_lock. The lock must
not be taken in IRQ context, only softirq is okay. A few drivers receive
the timestamp via a dedicated interrupt and complete the TX timestamp
from that handler. This will lead to a deadlock if the lock is already
write-locked on the same CPU.

Taking the lock can be avoided. The socket (pointed by the skb) will
remain valid until the skb is released. The ->sk_socket and ->file
member will be set to NULL once the user closes the socket which may
happen before the timestamp arrives.
If we happen to observe the pointer while the socket is closing but
before the pointer is set to NULL then we may use it because both
pointer (and the file's cred member) are RCU freed.

Drop the lock. Use READ_ONCE() to obtain the individual pointer. Add a
matching WRITE_ONCE() where the pointer are cleared.

Link: https://lore.kernel.org/all/20260205145104.iWinkXHv@linutronix.de
Fixes: b245be1f4d ("net-timestamp: no-payload only sysctl")
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Jason Xing <kerneljasonxing@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20260220183858.N4ERjFW6@linutronix.de
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-02-24 11:27:29 +01:00
Jason Gunthorpe
faa72102b1 RDMA/ionic: Fix kernel stack leak in ionic_create_cq()
struct ionic_cq_resp resp {
    __u32 cqid[2];         // offset 0 - PARTIALLY SET (see below)
    __u8  udma_mask;       // offset 8 - SET (resp.udma_mask = vcq->udma_mask)
    __u8  rsvd[7];         // offset 9 - NEVER SET <- LEAK
};

rsvd[7]: 7 bytes of stack memory leaked unconditionally.

cqid[2]: The loop at line 1256 iterates over udma_idx but skips indices
where !(vcq->udma_mask & BIT(udma_idx)). The array has 2 entries but
udma_count could be 1, meaning cqid[1] might never be written via
ionic_create_cq_common(). If udma_mask only has bit 0 set, cqid[1] (4
bytes) is also leaked. So potentially 11 bytes leaked.

Cc: stable@vger.kernel.org
Fixes: e8521822c7 ("RDMA/ionic: Register device ops for control path")
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://patch.msgid.link/4-v1-83e918d69e73+a9-rdma_udata_rc_jgg@nvidia.com
Acked-by: Abhijit Gangurde <abhijit.gangurde@amd.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
2026-02-24 05:03:15 -05:00
Jason Gunthorpe
74586c6da9 RDMA/irdma: Fix kernel stack leak in irdma_create_user_ah()
struct irdma_create_ah_resp {  // 8 bytes, no padding
    __u32 ah_id;               // offset 0 - SET (uresp.ah_id = ah->sc_ah.ah_info.ah_idx)
    __u8  rsvd[4];             // offset 4 - NEVER SET <- LEAK
};

rsvd[4]: 4 bytes of stack memory leaked unconditionally. Only ah_id is assigned before ib_respond_udata().

The reserved members of the structure were not zeroed.

Cc: stable@vger.kernel.org
Fixes: b48c24c2d7 ("RDMA/irdma: Implement device supported verb APIs")
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://patch.msgid.link/3-v1-83e918d69e73+a9-rdma_udata_rc_jgg@nvidia.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
2026-02-24 05:03:15 -05:00
Jason Gunthorpe
117942ca43 IB/mthca: Add missed mthca_unmap_user_db() for mthca_create_srq()
Fix a user triggerable leak on the system call failure path.

Cc: stable@vger.kernel.org
Fixes: ec34a922d2 ("[PATCH] IB/mthca: Add SRQ implementation")
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://patch.msgid.link/2-v1-83e918d69e73+a9-rdma_udata_rc_jgg@nvidia.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
2026-02-24 05:03:15 -05:00
Jason Gunthorpe
f22c77ce49 RDMA/efa: Fix typo in efa_alloc_mr()
The pattern is to check the entire driver request space, not just
sizeof something unrelated.

Fixes: 40909f664d ("RDMA/efa: Add EFA verbs implementation")
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://patch.msgid.link/1-v1-83e918d69e73+a9-rdma_udata_rc_jgg@nvidia.com
Acked-by: Michael Margolin <mrgolin@amazon.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
2026-02-24 05:01:32 -05:00
Kamal Heib
fd80bd7105 RDMA/ionic: Fix potential NULL pointer dereference in ionic_query_port
The function ionic_query_port() calls ib_device_get_netdev() without
checking the return value which could lead to NULL pointer dereference,
Fix it by checking the return value and return -ENODEV if the 'ndev' is
NULL.

Fixes: 2075bbe8ef ("RDMA/ionic: Register device ops for miscellaneous functionality")
Signed-off-by: Kamal Heib <kheib@redhat.com>
Link: https://patch.msgid.link/20260220222125.16973-2-kheib@redhat.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
2026-02-24 04:54:10 -05:00
Chen-Yu Tsai
01e10d0272 pinctrl: sunxi: Implement gpiochip::get_direction()
After commit 471e998c0e ("gpiolib: remove redundant callback check"),
a warning will be printed if the gpio driver does not implement this
callback. The warning was added in commit e623c4303e ("gpiolib:
sanitize the return value of gpio_chip::get_direction()"), but was
masked by the "redundant" check.

The warning can be triggered by any action that calls the callback,
such as dumping the GPIO state from /sys/kernel/debug/gpio.

Implement it for the sunxi driver. This is simply a matter of reading
out the mux value from the registers, then checking if it is one of
the GPIO functions and which direction it is.

Signed-off-by: Chen-Yu Tsai <wens@kernel.org>
Reviewed-by: Jernej Skrabec <jernej.skrabec@gmail.com>
Reviewed-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Signed-off-by: Linus Walleij <linusw@kernel.org>
2026-02-24 10:51:53 +01:00
Krzysztof Kozlowski
45fe459245 pinctrl: rockchip: Fix configuring a deferred pin
Commit e2c58cbe3a ("pinctrl: rockchip: Simplify locking with
scoped_guard()") added a scoped_guard() over existing code containing a
"break" instruction.  That "break" was for the outer (existing)
for-loop, which now exits inner, scoped_guard() loop.  If GPIO driver
did not probe, then driver will not bail out, but instead continue to
configure the pin.

Fix the issue by simplifying the code - the break in original code was
leading directly to end of the function returning 0, thus we can simply
return here rockchip_pinconf_defer_pin status.

Reported-by: David Lechner <dlechner@baylibre.com>
Closes: https://lore.kernel.org/r/f5b38942-a584-4e78-a893-de4a219070b2@baylibre.com/
Fixes: e2c58cbe3a ("pinctrl: rockchip: Simplify locking with scoped_guard()")
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
Signed-off-by: Linus Walleij <linusw@kernel.org>
2026-02-24 10:51:30 +01:00
Felix Gu
fd5bed798f pinctrl: cirrus: cs42l43: Fix double-put in cs42l43_pin_probe()
devm_add_action_or_reset() already invokes the action on failure,
so the explicit put causes a double-put.

Fixes: 9b07cdf86a ("pinctrl: cirrus: Fix fwnode leak in cs42l43_pin_probe()")
Signed-off-by: Felix Gu <ustc.gu@gmail.com>
Reviewed-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Signed-off-by: Linus Walleij <linusw@kernel.org>
2026-02-24 10:51:30 +01:00
Felix Gu
a2539b92e4 pinctrl: meson: amlogic-a4: Fix device node reference leak in aml_dt_node_to_map_pinmux()
The of_get_parent() function returns a device_node with an incremented
reference count.

Use the __free(device_node) cleanup attribute to ensure of_node_put()
is automatically called when pnode goes out of scope, fixing a
reference leak.

Fixes: 6e9be3abb7 ("pinctrl: Add driver support for Amlogic SoCs")
Signed-off-by: Felix Gu <ustc.gu@gmail.com>
Signed-off-by: Linus Walleij <linusw@kernel.org>
2026-02-24 10:51:30 +01:00
Krzysztof Kozlowski
e9e268ea9d pinctrl: qcom: sdm660-lpass-lpi: Make groups and functions variables static
File-scope 'sdm660_lpi_pinctrl_groups' and
'sdm660_lpi_pinctrl_functions' are not used outside of this unit, so
make them static to silence sparse warnings:

  pinctrl-sdm660-lpass-lpi.c:79:27: warning: symbol 'sdm660_lpi_pinctrl_groups' was not declared. Should it be static?
  pinctrl-sdm660-lpass-lpi.c:116:27: warning: symbol 'sdm660_lpi_pinctrl_functions' was not declared. Should it be static?

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
Reviewed-by: Konrad Dybcio <konrad.dybcio@oss.qualcomm.com>
Signed-off-by: Linus Walleij <linusw@kernel.org>
2026-02-24 10:51:30 +01:00
Krzysztof Kozlowski
c2e174994c pinctrl: cix: sky1: Unexport sky1_pinctrl_pm_ops
File-scope 'sky1_pinctrl_pm_ops' is not used outside of this unit (and
it should not be!), so unexport it and make it static to silence sparse
warning:

  pinctrl-sky1.c:525:25: warning: symbol 'sky1_pinctrl_pm_ops' was not declared. Should it be static?

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
Signed-off-by: Linus Walleij <linusw@kernel.org>
2026-02-24 10:51:29 +01:00
Krzysztof Kozlowski
a48150d051 pinctrl: amdisp: Make amdisp_pinctrl_ops variable static
File-scope 'amdisp_pinctrl_ops' is not used outside of this unit, so
make it static to silence sparse warning:

  pinctrl-amdisp.c:83:26: warning: symbol 'amdisp_pinctrl_ops' was not declared. Should it be static?

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
Signed-off-by: Linus Walleij <linusw@kernel.org>
2026-02-24 10:51:29 +01:00
Felix Gu
7a648d598c pinctrl: pinconf-generic: Fix memory leak in pinconf_generic_parse_dt_config()
In pinconf_generic_parse_dt_config(), if parse_dt_cfg() fails, it returns
directly. This bypasses the cleanup logic and results in a memory leak of
the cfg buffer.

Fix this by jumping to the out label on failure, ensuring kfree(cfg) is
called before returning.

Fixes: 90a18c5128 ("pinctrl: pinconf-generic: Handle string values for generic properties")
Signed-off-by: Felix Gu <ustc.gu@gmail.com>
Reviewed-by: Antonio Borneo <antonio.borneo@foss.st.com>
Signed-off-by: Linus Walleij <linusw@kernel.org>
2026-02-24 10:51:29 +01:00
Jakub Kicinski
82aec772fc netconsole: avoid OOB reads, msg is not nul-terminated
msg passed to netconsole from the console subsystem is not guaranteed
to be nul-terminated. Before recent
commit 7eab73b186 ("netconsole: convert to NBCON console infrastructure")
the message would be placed in printk_shared_pbufs, a static global
buffer, so KASAN had harder time catching OOB accesses. Now we see:

    printk: console [netcon_ext0] enabled
    BUG: KASAN: slab-out-of-bounds in string+0x1f7/0x240
    Read of size 1 at addr ffff88813b6d4c00 by task pr/netcon_ext0/594

    CPU: 65 UID: 0 PID: 594 Comm: pr/netcon_ext0 Not tainted 6.19.0-11754-g4246fd6547c9
    Call Trace:
     kasan_report+0xe4/0x120
     string+0x1f7/0x240
     vsnprintf+0x655/0xba0
     scnprintf+0xba/0x120
     netconsole_write+0x3fe/0xa10
     nbcon_emit_next_record+0x46e/0x860
     nbcon_kthread_func+0x623/0x750

    Allocated by task 1:
     nbcon_alloc+0x1ea/0x450
     register_console+0x26b/0xe10
     init_netconsole+0xbb0/0xda0

    The buggy address belongs to the object at ffff88813b6d4000
                which belongs to the cache kmalloc-4k of size 4096
    The buggy address is located 0 bytes to the right of
                allocated 3072-byte region [ffff88813b6d4000, ffff88813b6d4c00)

Fixes: c62c0a17f9 ("netconsole: Append kernel version to message")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20260219195021.2099699-1-kuba@kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-02-24 10:46:29 +01:00
Duoming Zhou
bae8a5d2e7 net: wan: farsync: Fix use-after-free bugs caused by unfinished tasklets
When the FarSync T-series card is being detached, the fst_card_info is
deallocated in fst_remove_one(). However, the fst_tx_task or fst_int_task
may still be running or pending, leading to use-after-free bugs when the
already freed fst_card_info is accessed in fst_process_tx_work_q() or
fst_process_int_work_q().

A typical race condition is depicted below:

CPU 0 (cleanup)           | CPU 1 (tasklet)
                          | fst_start_xmit()
fst_remove_one()          |   tasklet_schedule()
  unregister_hdlc_device()|
                          | fst_process_tx_work_q() //handler
  kfree(card) //free      |   do_bottom_half_tx()
                          |     card-> //use

The following KASAN trace was captured:

==================================================================
 BUG: KASAN: slab-use-after-free in do_bottom_half_tx+0xb88/0xd00
 Read of size 4 at addr ffff88800aad101c by task ksoftirqd/3/32
 ...
 Call Trace:
  <IRQ>
  dump_stack_lvl+0x55/0x70
  print_report+0xcb/0x5d0
  ? do_bottom_half_tx+0xb88/0xd00
  kasan_report+0xb8/0xf0
  ? do_bottom_half_tx+0xb88/0xd00
  do_bottom_half_tx+0xb88/0xd00
  ? _raw_spin_lock_irqsave+0x85/0xe0
  ? __pfx__raw_spin_lock_irqsave+0x10/0x10
  ? __pfx___hrtimer_run_queues+0x10/0x10
  fst_process_tx_work_q+0x67/0x90
  tasklet_action_common+0x1fa/0x720
  ? hrtimer_interrupt+0x31f/0x780
  handle_softirqs+0x176/0x530
  __irq_exit_rcu+0xab/0xe0
  sysvec_apic_timer_interrupt+0x70/0x80
 ...

 Allocated by task 41 on cpu 3 at 72.330843s:
  kasan_save_stack+0x24/0x50
  kasan_save_track+0x17/0x60
  __kasan_kmalloc+0x7f/0x90
  fst_add_one+0x1a5/0x1cd0
  local_pci_probe+0xdd/0x190
  pci_device_probe+0x341/0x480
  really_probe+0x1c6/0x6a0
  __driver_probe_device+0x248/0x310
  driver_probe_device+0x48/0x210
  __device_attach_driver+0x160/0x320
  bus_for_each_drv+0x101/0x190
  __device_attach+0x198/0x3a0
  device_initial_probe+0x78/0xa0
  pci_bus_add_device+0x81/0xc0
  pci_bus_add_devices+0x7e/0x190
  enable_slot+0x9b9/0x1130
  acpiphp_check_bridge.part.0+0x2e1/0x460
  acpiphp_hotplug_notify+0x36c/0x3c0
  acpi_device_hotplug+0x203/0xb10
  acpi_hotplug_work_fn+0x59/0x80
 ...

 Freed by task 41 on cpu 1 at 75.138639s:
  kasan_save_stack+0x24/0x50
  kasan_save_track+0x17/0x60
  kasan_save_free_info+0x3b/0x60
  __kasan_slab_free+0x43/0x70
  kfree+0x135/0x410
  fst_remove_one+0x2ca/0x540
  pci_device_remove+0xa6/0x1d0
  device_release_driver_internal+0x364/0x530
  pci_stop_bus_device+0x105/0x150
  pci_stop_and_remove_bus_device+0xd/0x20
  disable_slot+0x116/0x260
  acpiphp_disable_and_eject_slot+0x4b/0x190
  acpiphp_hotplug_notify+0x230/0x3c0
  acpi_device_hotplug+0x203/0xb10
  acpi_hotplug_work_fn+0x59/0x80
 ...

 The buggy address belongs to the object at ffff88800aad1000
  which belongs to the cache kmalloc-1k of size 1024
 The buggy address is located 28 bytes inside of
  freed 1024-byte region
 The buggy address belongs to the physical page:
 page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0xaad0
 head: order:3 mapcount:0 entire_mapcount:0 nr_pages_mapped:0 pincount:0
 flags: 0x100000000000040(head|node=0|zone=1)
 page_type: f5(slab)
 raw: 0100000000000040 ffff888007042dc0 dead000000000122 0000000000000000
 raw: 0000000000000000 0000000080100010 00000000f5000000 0000000000000000
 head: 0100000000000040 ffff888007042dc0 dead000000000122 0000000000000000
 head: 0000000000000000 0000000080100010 00000000f5000000 0000000000000000
 head: 0100000000000003 ffffea00002ab401 00000000ffffffff 00000000ffffffff
 head: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000
 page dumped because: kasan: bad access detected

 Memory state around the buggy address:
  ffff88800aad0f00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
  ffff88800aad0f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
 >ffff88800aad1000: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
                             ^
  ffff88800aad1080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
  ffff88800aad1100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
 ==================================================================

Fix this by ensuring that both fst_tx_task and fst_int_task are properly
canceled before the fst_card_info is released. Add tasklet_kill() in
fst_remove_one() to synchronize with any pending or running tasklets.
Since unregister_hdlc_device() stops data transmission and reception,
and fst_disable_intr() prevents further interrupts, it is appropriate
to place tasklet_kill() after these calls.

The bugs were identified through static analysis. To reproduce the issue
and validate the fix, a FarSync T-series card was simulated in QEMU and
delays(e.g., mdelay()) were introduced within the tasklet handler to
increase the likelihood of triggering the race condition.

Fixes: 2f623aaf9f ("net: farsync: Fix kmemleak when rmmods farsync")
Signed-off-by: Duoming Zhou <duoming@zju.edu.cn>
Reviewed-by: Jijie Shao <shaojijie@huawei.com>
Link: https://patch.msgid.link/20260219124637.72578-1-duoming@zju.edu.cn
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-02-24 10:31:52 +01:00
Jann Horn
fdcfce9307 eventpoll: Fix integer overflow in ep_loop_check_proc()
If a recursive call to ep_loop_check_proc() hits the `result = INT_MAX`,
an integer overflow will occur in the calling ep_loop_check_proc() at
`result = max(result, ep_loop_check_proc(ep_tovisit, depth + 1) + 1)`,
breaking the recursion depth check.

Fix it by using a different placeholder value that can't lead to an
overflow.

Reported-by: Guenter Roeck <linux@roeck-us.net>
Fixes: f2e467a482 ("eventpoll: Fix semi-unbounded recursion")
Cc: stable@vger.kernel.org
Signed-off-by: Jann Horn <jannh@google.com>
Link: https://patch.msgid.link/20260223-epoll-int-overflow-v1-1-452f35132224@google.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
2026-02-24 10:21:30 +01:00
Fernando Fernandez Mancera
021fd0f870 net/rds: fix recursive lock in rds_tcp_conn_slots_available
syzbot reported a recursive lock warning in rds_tcp_get_peer_sport() as
it calls inet6_getname() which acquires the socket lock that was already
held by __release_sock().

 kworker/u8:6/2985 is trying to acquire lock:
 ffff88807a07aa20 (k-sk_lock-AF_INET6){+.+.}-{0:0}, at: lock_sock include/net/sock.h:1709 [inline]
 ffff88807a07aa20 (k-sk_lock-AF_INET6){+.+.}-{0:0}, at: inet6_getname+0x15d/0x650 net/ipv6/af_inet6.c:533

 but task is already holding lock:
 ffff88807a07aa20 (k-sk_lock-AF_INET6){+.+.}-{0:0}, at: lock_sock include/net/sock.h:1709 [inline]
 ffff88807a07aa20 (k-sk_lock-AF_INET6){+.+.}-{0:0}, at: tcp_sock_set_cork+0x2c/0x2e0 net/ipv4/tcp.c:3694
   lock_sock_nested+0x48/0x100 net/core/sock.c:3780
   lock_sock include/net/sock.h:1709 [inline]
   inet6_getname+0x15d/0x650 net/ipv6/af_inet6.c:533
   rds_tcp_get_peer_sport net/rds/tcp_listen.c:70 [inline]
   rds_tcp_conn_slots_available+0x288/0x470 net/rds/tcp_listen.c:149
   rds_recv_hs_exthdrs+0x60f/0x7c0 net/rds/recv.c:265
   rds_recv_incoming+0x9f6/0x12d0 net/rds/recv.c:389
   rds_tcp_data_recv+0x7f1/0xa40 net/rds/tcp_recv.c:243
   __tcp_read_sock+0x196/0x970 net/ipv4/tcp.c:1702
   rds_tcp_read_sock net/rds/tcp_recv.c:277 [inline]
   rds_tcp_data_ready+0x369/0x950 net/rds/tcp_recv.c:331
   tcp_rcv_established+0x19e9/0x2670 net/ipv4/tcp_input.c:6675
   tcp_v6_do_rcv+0x8eb/0x1ba0 net/ipv6/tcp_ipv6.c:1609
   sk_backlog_rcv include/net/sock.h:1185 [inline]
   __release_sock+0x1b8/0x3a0 net/core/sock.c:3213

Reading from the socket struct directly is safe from possible paths. For
rds_tcp_accept_one(), the socket has just been accepted and is not yet
exposed to concurrent access. For rds_tcp_conn_slots_available(), direct
access avoids the recursive deadlock seen during backlog processing
where the socket lock is already held from the __release_sock().

However, rds_tcp_conn_slots_available() is also called from the normal
softirq path via tcp_data_ready() where the lock is not held. This is
also safe because inet_dport is a stable 16 bits field. A READ_ONCE()
annotation as the value might be accessed lockless in a concurrent
access context.

Note that it is also safe to call rds_tcp_conn_slots_available() from
rds_conn_shutdown() because the fan-out is disabled.

Fixes: 9d27a0fb12 ("net/rds: Trigger rds_send_ping() more than once")
Reported-by: syzbot+5efae91f60932839f0a5@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=5efae91f60932839f0a5
Signed-off-by: Fernando Fernandez Mancera <fmancera@suse.de>
Reviewed-by: Allison Henderson <achender@kernel.org>
Link: https://patch.msgid.link/20260219075738.4403-1-fmancera@suse.de
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-02-24 10:11:04 +01:00
Vahagn Vardanian
017c179252 wifi: mac80211: fix NULL pointer dereference in mesh_rx_csa_frame()
In mesh_rx_csa_frame(), elems->mesh_chansw_params_ie is dereferenced
at lines 1638 and 1642 without a prior NULL check:

    ifmsh->chsw_ttl = elems->mesh_chansw_params_ie->mesh_ttl;
    ...
    pre_value = le16_to_cpu(elems->mesh_chansw_params_ie->mesh_pre_value);

The mesh_matches_local() check above only validates the Mesh ID,
Mesh Configuration, and Supported Rates IEs.  It does not verify the
presence of the Mesh Channel Switch Parameters IE (element ID 118).
When a received CSA action frame omits that IE, ieee802_11_parse_elems()
leaves elems->mesh_chansw_params_ie as NULL, and the unconditional
dereference causes a kernel NULL pointer dereference.

A remote mesh peer with an established peer link (PLINK_ESTAB) can
trigger this by sending a crafted SPECTRUM_MGMT/CHL_SWITCH action frame
that includes a matching Mesh ID and Mesh Configuration IE but omits the
Mesh Channel Switch Parameters IE.  No authentication beyond the default
open mesh peering is required.

Crash confirmed on kernel 6.17.0-5-generic via mac80211_hwsim:

  BUG: kernel NULL pointer dereference, address: 0000000000000000
  Oops: Oops: 0000 [#1] SMP NOPTI
  RIP: 0010:ieee80211_mesh_rx_queued_mgmt+0x143/0x2a0 [mac80211]
  CR2: 0000000000000000

Fix by adding a NULL check for mesh_chansw_params_ie after
mesh_matches_local() returns, consistent with how other optional IEs
are guarded throughout the mesh code.

The bug has been present since v3.13 (released 2014-01-19).

Fixes: 8f2535b92d ("mac80211: process the CSA frame for mesh accordingly")
Cc: stable@vger.kernel.org
Signed-off-by: Vahagn Vardanian <vahagn@redrays.io>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2026-02-24 10:03:10 +01:00
Siva Reddy Kallam
3d2e5d12a2 RDMA/bng_re: Unwind bng_re_dev_init properly
Fix below smatch warning:
drivers/infiniband/hw/bng_re/bng_dev.c:270
bng_re_dev_init() warn: missing unwind goto?

Current bng_re_dev_init function is not having clear unwinding code.
So, added proper unwinding with ladder.

Fixes: 4f830cd8d7 ("RDMA/bng_re: Add infrastructure for enabling Firmware channel")
Reported-by: Simon Horman <horms@kernel.org>
Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <error27@gmail.com>
Closes: https://lore.kernel.org/r/202601010413.sWadrQel-lkp@intel.com/
Signed-off-by: Siva Reddy Kallam <siva.kallam@broadcom.com>
Link: https://patch.msgid.link/20260218091246.1764808-3-siva.kallam@broadcom.com
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
2026-02-24 03:56:28 -05:00
Siva Reddy Kallam
7a23af417d RDMA/bng_re: Remove unnessary validity checks
Fix below smatch warning:
drivers/infiniband/hw/bng_re/bng_dev.c:113
bng_re_net_ring_free() warn: variable dereferenced before check 'rdev'
(see line 107)

current driver has unnessary validity checks. So, removing these
unnessary validity checks.

Fixes: 4f830cd8d7 ("RDMA/bng_re: Add infrastructure for enabling Firmware channel")
Fixes: 745065770c ("RDMA/bng_re: Register and get the resources from bnge driver")
Fixes: 04e031ff6e ("RDMA/bng_re: Initialize the Firmware and Hardware")
Fixes: d0da769c19 ("RDMA/bng_re: Add Auxiliary interface")
Reported-by: Simon Horman <horms@kernel.org>
Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <error27@gmail.com>
Closes: https://lore.kernel.org/r/202601010413.sWadrQel-lkp@intel.com/
Signed-off-by: Siva Reddy Kallam <siva.kallam@broadcom.com>
Link: https://patch.msgid.link/20260218091246.1764808-2-siva.kallam@broadcom.com
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
2026-02-24 03:53:24 -05:00
Jiri Pirko
9af0feae80 RDMA/core: Fix stale RoCE GIDs during netdev events at registration
RoCE GID entries become stale when netdev properties change during the
IB device registration window. This is reproducible with a udev rule
that sets a MAC address when a VF netdev appears:

  ACTION=="add", SUBSYSTEM=="net", KERNEL=="eth4", \
    RUN+="/sbin/ip link set eth4 address 88:22:33:44:55:66"

After VF creation, show_gids displays GIDs derived from the original
random MAC rather than the configured one.

The root cause is a race between netdev event processing and device
registration:

  CPU 0 (driver)                    CPU 1 (udev/workqueue)
  ──────────────                    ──────────────────────
  ib_register_device()
    ib_cache_setup_one()
      gid_table_setup_one()
        _gid_table_setup_one()
          ← GID table allocated
        rdma_roce_rescan_device()
          ← GIDs populated with
            OLD MAC
                                    ip link set eth4 addr NEW_MAC
                                    NETDEV_CHANGEADDR queued
                                    netdevice_event_work_handler()
                                      ib_enum_all_roce_netdevs()
                                        ← Iterates DEVICE_REGISTERED
                                        ← Device NOT marked yet, SKIP!
    enable_device_and_get()
      xa_set_mark(DEVICE_REGISTERED)
          ← Too late, event was lost

The netdev event handler uses ib_enum_all_roce_netdevs() which only
iterates devices marked DEVICE_REGISTERED. However, this mark is set
late in the registration process, after the GID cache is already
populated. Events arriving in this window are silently dropped.

Fix this by introducing a new xarray mark DEVICE_GID_UPDATES that is
set immediately after the GID table is allocated and initialized. Use
the new mark in ib_enum_all_roce_netdevs() function to iterate devices
instead of DEVICE_REGISTERED.

This is safe because:
- After _gid_table_setup_one(), all required structures exist (port_data,
  immutable, cache.gid)
- The GID table mutex serializes concurrent access between the initial
  rescan and event handlers
- Event handlers correctly update stale GIDs even when racing with rescan
- The mark is cleared in ib_cache_cleanup_one() before teardown

This also fixes similar races for IP address events (inetaddr_event,
inet6addr_event) which use the same enumeration path.

Fixes: 0df91bb673 ("RDMA/devices: Use xarray to store the client_data")
Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Link: https://patch.msgid.link/20260127093839.126291-1-jiri@resnulli.us
Reported-by: syzbot+881d65229ca4f9ae8c84@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=881d65229ca4f9ae8c84
Signed-off-by: Leon Romanovsky <leon@kernel.org>
2026-02-24 03:43:16 -05:00
Alex Tran
b4f4fd947a selftests: hid: tests: test_wacom_generic: add tests for display devices and opaque devices
Verify Wacom devices set INPUT_PROP_DIRECT on display devices and
INPUT_PROP_POINTER on opaque devices. Verify INPUT_PROP_POINTER
is not set on display devices and INPUT_PROP_DIRECT is not set
on opaque devices.

Moved test_prop_pointer into TestOpaqueTablet. Created a
DirectTabletTest mixin class for test_prop_direct that can be
inherited by display tablet test classes.Used DirectTabletTest
for TestDTH2452Tablet case.

Signed-off-by: Alex Tran <alex.t.tran@gmail.com>
Tested-by: Erin Skomra <erin.skomra@wacom.com>
Reviewed-by: Erin Skomra <erin.skomra@wacom.com>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
2026-02-24 09:41:21 +01:00
Ian Ray
a2e70a89fa HID: multitouch: new class MT_CLS_EGALAX_P80H84
Fixes: f9e82295ee ("HID: multitouch: add eGalaxTouch P80H84 support")
Signed-off-by: Ian Ray <ian.ray@gehealthcare.com>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
2026-02-24 09:41:21 +01:00
Julius Lehmann
5f3518d774 HID: magicmouse: fix battery reporting for Apple Magic Trackpad 2
Battery reporting does not work for the Apple Magic Trackpad 2 if it is
connected via USB. The current hid descriptor fixup code checks for a
hid descriptor length of exactly 83 bytes. If the hid descriptor is
larger, which is the case for newer apple mice, the fixup is not
applied.

This fix checks for hid descriptor sizes greater/equal 83 bytes which
applies the fixup for newer devices as well.

Signed-off-by: Julius Lehmann <lehmanju@devpi.de>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
2026-02-24 09:41:21 +01:00
Pengyu Luo
ac47870fd7 drm/msm/dsi: fix hdisplay calculation when programming dsi registers
Recently, the hdisplay calculation is working for 3:1 compressed ratio
only. If we have a video panel with DSC BPP = 8, and BPC = 10, we still
use the default bits_per_pclk = 24, then we get the wrong hdisplay. We
can draw the conclusion by cross-comparing the calculation with the
calculation in dsi_adjust_pclk_for_compression().

Since CMD mode does not use this, we can remove
!(msm_host->mode_flags & MIPI_DSI_MODE_VIDEO) safely.

Fixes: efcbd6f9cd ("drm/msm/dsi: Enable widebus for DSI")
Signed-off-by: Pengyu Luo <mitltlatltl@gmail.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
Patchwork: https://patchwork.freedesktop.org/patch/704822/
Link: https://lore.kernel.org/r/20260214105145.105308-1-mitltlatltl@gmail.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
2026-02-24 09:22:40 +02:00
Ihor Solodrai
6a087ac23f selftests/bpf: Replace strcpy() calls with strscpy()
strcpy() does not perform bounds checking and is considered deprecated
[1]. Replace strcpy() calls with strscpy() defined in bpf_util.h.

[1] https://docs.kernel.org/process/deprecated.html#strcpy

Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
Link: https://lore.kernel.org/r/20260223190736.649171-3-ihor.solodrai@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-02-23 18:40:07 -08:00
Ihor Solodrai
63c49efc98 selftests/bpf: Add simple strscpy() implementation
Replace bpf_strlcpy() in bpf_util.h with a sized_strscpy(), which is a
simplified sized_strscpy() from the kernel (lib/string.c [1]). It:
  * takes a count (destination size) parameter
  * guarantees NULL-termination
  * returns the number of characters copied or -E2BIG

Re-define strscpy macro similar to in-kernel implementation [2]: allow
the count parameter to be optional.

Add #ifdef-s to tools/include/linux/args.h, as they may be defined in
other system headers (for example, __CONCAT in sys/cdefs.h).

Fixup the single existing bpf_strlcpy() call in cgroup_helpers.c

[1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/lib/string.c?h=v6.19#n113
[2] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/linux/string.h?h=v6.19#n91

Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
Link: https://lore.kernel.org/r/20260223190736.649171-2-ihor.solodrai@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-02-23 18:40:07 -08:00
Tung Nguyen
3aa677625c tipc: fix duplicate publication key in tipc_service_insert_publ()
TIPC uses named table to store TIPC services represented by type and
instance. Each time an application calls TIPC API bind() to bind a
type/instance to a socket, an entry is created and inserted into the
named table. It looks like this:

named table:
key1, entry1 (type, instance ...)
key2, entry2 (type, instance ...)

In the above table, each entry represents a route for sending data
from one socket to the other. For all publications originated from
the same node, the key is UNIQUE to identify each entry.
It is calculated by this formula:
key = socket portid + number of bindings + 1 (1)

where:
 - socket portid: unique and calculated by using linux kernel function
                  get_random_u32_below(). So, the value is randomized.
 - number of bindings: the number of times a type/instance pair is bound
                       to a socket. This number is linearly increased,
                       starting from 0.

While the socket portid is unique and randomized by linux kernel, the
linear increment of "number of bindings" in formula (1) makes "key" not
unique anymore. For example:
- Socket 1 is created with its associated port number 20062001. Type 1000,
instance 1 is bound to socket 1:
key1: 20062001 + 0 + 1 = 20062002

Then, bind() is called a second time on Socket 1 to by the same type 1000,
instance 1:
key2: 20062001 + 1 + 1 = 20062003

Named table:
key1 (20062002), entry1 (1000, 1 ...)
key2 (20062003), entry2 (1000, 1 ...)

- Socket 2 is created with its associated port number 20062002. Type 1000,
instance 1 is bound to socket 2:
key3: 20062002 + 0 + 1 = 20062003

TIPC looks up the named table and finds out that key2 with the same value
already exists and rejects the insertion into the named table.
This leads to failure of bind() call from application on Socket 2 with error
message EINVAL "Invalid argument".

This commit fixes this issue by adding more port id checking to make sure
that the key is unique to publications originated from the same port id
and node.

Fixes: 218527fe27 ("tipc: replace name table service range array with rb tree")
Signed-off-by: Tung Nguyen <tung.quang.nguyen@est.tech>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20260220050541.237962-1-tung.quang.nguyen@est.tech
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-23 17:40:52 -08:00
Ivan Vecera
4cfe066a82 dpll: zl3073x: fix REF_PHASE_OFFSET_COMP register width for some chip IDs
The REF_PHASE_OFFSET_COMP register is 48-bit wide on most zl3073x chip
variants, but only 32-bit wide on chip IDs 0x0E30, 0x0E93..0x0E97 and
0x1F60. The driver unconditionally uses 48-bit read/write operations,
which on 32-bit variants causes reading 2 bytes past the register
boundary (corrupting the value) and writing 2 bytes into the adjacent
register.

Fix this by storing the chip ID in the device structure during probe
and adding a helper to detect the affected variants. Use the correct
register width for read/write operations and the matching sign extension
bit (31 vs 47) when interpreting the phase compensation value.

Fixes: 6287262f76 ("dpll: zl3073x: Add support to adjust phase")
Signed-off-by: Ivan Vecera <ivecera@redhat.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20260220155755.448185-1-ivecera@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-23 17:35:39 -08:00
Jiayuan Chen
ca220141fa kcm: fix zero-frag skb in frag_list on partial sendmsg error
Syzkaller reported a warning in kcm_write_msgs() when processing a
message with a zero-fragment skb in the frag_list.

When kcm_sendmsg() fills MAX_SKB_FRAGS fragments in the current skb,
it allocates a new skb (tskb) and links it into the frag_list before
copying data. If the copy subsequently fails (e.g. -EFAULT from
user memory), tskb remains in the frag_list with zero fragments:

  head skb (msg being assembled, NOT yet in sk_write_queue)
  +-----------+
  | frags[17] |  (MAX_SKB_FRAGS, all filled with data)
  | frag_list-+--> tskb
  +-----------+    +----------+
                   | frags[0] |  (empty! copy failed before filling)
                   +----------+

For SOCK_SEQPACKET with partial data already copied, the error path
saves this message via partial_message for later completion. For
SOCK_SEQPACKET, sock_write_iter() automatically sets MSG_EOR, so a
subsequent zero-length write(fd, NULL, 0) completes the message and
queues it to sk_write_queue. kcm_write_msgs() then walks the
frag_list and hits:

  WARN_ON(!skb_shinfo(skb)->nr_frags)

TCP has a similar pattern where skbs are enqueued before data copy
and cleaned up on failure via tcp_remove_empty_skb(). KCM was
missing the equivalent cleanup.

Fix this by tracking the predecessor skb (frag_prev) when allocating
a new frag_list entry. On error, if the tail skb has zero frags,
use frag_prev to unlink and free it in O(1) without walking the
singly-linked frag_list. frag_prev is safe to dereference because
the entire message chain is only held locally (or in kcm->seq_skb)
and is not added to sk_write_queue until MSG_EOR, so the send path
cannot free it underneath us.

Also change the WARN_ON to WARN_ON_ONCE to avoid flooding the log
if the condition is somehow hit repeatedly.

There are currently no KCM selftests in the kernel tree; a simple
reproducer is available at [1].

[1] https://gist.github.com/mrpre/a94d431c757e8d6f168f4dd1a3749daa

Reported-by: syzbot+52624bdfbf2746d37d70@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/000000000000269a1405a12fdc77@google.com/T/
Fixes: ab7ac4eb98 ("kcm: Kernel Connection Multiplexor module")
Signed-off-by: Jiayuan Chen <jiayuan.chen@shopee.com>
Link: https://patch.msgid.link/20260219014256.370092-1-jiayuan.chen@linux.dev
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-23 17:26:55 -08:00
Ankit Garg
fb868db5f4 gve: fix incorrect buffer cleanup in gve_tx_clean_pending_packets for QPL
In DQ-QPL mode, gve_tx_clean_pending_packets() incorrectly uses the RDA
buffer cleanup path. It iterates num_bufs times and attempts to unmap
entries in the dma array.

This leads to two issues:
1. The dma array shares storage with tx_qpl_buf_ids (union).
 Interpreting buffer IDs as DMA addresses results in attempting to
 unmap incorrect memory locations.
2. num_bufs in QPL mode (counting 2K chunks) can significantly exceed
 the size of the dma array, causing out-of-bounds access warnings
(trace below is how we noticed this issue).

UBSAN: array-index-out-of-bounds in
drivers/net/ethernet/drivers/net/ethernet/google/gve/gve_tx_dqo.c:178:5 index 18 is out of
range for type 'dma_addr_t[18]' (aka 'unsigned long long[18]')
Workqueue: gve gve_service_task [gve]
Call Trace:
<TASK>
dump_stack_lvl+0x33/0xa0
__ubsan_handle_out_of_bounds+0xdc/0x110
gve_tx_stop_ring_dqo+0x182/0x200 [gve]
gve_close+0x1be/0x450 [gve]
gve_reset+0x99/0x120 [gve]
gve_service_task+0x61/0x100 [gve]
process_scheduled_works+0x1e9/0x380

Fix this by properly checking for QPL mode and delegating to
gve_free_tx_qpl_bufs() to reclaim the buffers.

Cc: stable@vger.kernel.org
Fixes: a6fb8d5a8b ("gve: Tx path for DQO-QPL")
Signed-off-by: Ankit Garg <nktgrg@google.com>
Reviewed-by: Jordan Rhee <jordanrhee@google.com>
Reviewed-by: Harshitha Ramamurthy <hramamurthy@google.com>
Signed-off-by: Joshua Washington <joshwash@google.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20260220215324.1631350-1-joshwash@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-23 17:22:48 -08:00
Hyunwoo Kim
7bb09315f9 tls: Fix race condition in tls_sw_cancel_work_tx()
This issue was discovered during a code audit.

After cancel_delayed_work_sync() is called from tls_sk_proto_close(),
tx_work_handler() can still be scheduled from paths such as the
Delayed ACK handler or ksoftirqd.
As a result, the tx_work_handler() worker may dereference a freed
TLS object.

The following is a simple race scenario:

          cpu0                         cpu1

tls_sk_proto_close()
  tls_sw_cancel_work_tx()
                                 tls_write_space()
                                   tls_sw_write_space()
                                     if (!test_and_set_bit(BIT_TX_SCHEDULED, &tx_ctx->tx_bitmask))
    set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask);
    cancel_delayed_work_sync(&ctx->tx_work.work);
                                     schedule_delayed_work(&tx_ctx->tx_work.work, 0);

To prevent this race condition, cancel_delayed_work_sync() is
replaced with disable_delayed_work_sync().

Fixes: f87e62d45e ("net/tls: remove close callback sock unlock/lock around TX work flush")
Signed-off-by: Hyunwoo Kim <imv4bel@gmail.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Reviewed-by: Sabrina Dubroca <sd@queasysnail.net>
Link: https://patch.msgid.link/aZgsFO6nfylfvLE7@v4bel
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-23 17:08:14 -08:00
Eric Dumazet
8a8a9fac9e net: do not pass flow_id to set_rps_cpu()
Blamed commit made the assumption that the RPS table for each receive
queue would have the same size, and that it would not change.

Compute flow_id in set_rps_cpu(), do not assume we can use the value
computed by get_rps_cpu(). Otherwise we risk out-of-bound access
and/or crashes.

Fixes: 48aa30443e ("net: Cache hash and flow_id to avoid recalculation")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Krishna Kumar <krikku@gmail.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20260220222605.3468081-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-23 17:07:34 -08:00
Jakub Kicinski
41e09ec73d MAINTAINERS: include all of framer under pef2256
The "framer" infrastructure only has one driver - pef2256
and is not covered by any MAINTAINERS entry of its own.
This leads to author not being CCed on patches.
Let's include all of framer/ under the pef2256 entry.
We can split it in the very unlikely event of another
driver appearing.

Link: https://lore.kernel.org/aZefB5f3EAkQQM1m@google.com
Acked-by: Herve Codina <herve.codina@bootlin.com>
Link: https://patch.msgid.link/20260221011858.3403605-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-23 17:05:25 -08:00
Damien Le Moal
55db009926 ata: libata-core: fix cancellation of a port deferred qc work
cancel_work_sync() is a sleeping function so it cannot be called with
the spin lock of a port being held. Move the call to this function in
ata_port_detach() after EH completes, with the port lock released,
together with other work cancellation calls.

Fixes: 0ea84089db ("ata: libata-scsi: avoid Non-NCQ command starvation")
Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Igor Pylypiv <ipylypiv@google.com>
2026-02-24 09:38:55 +09:00
Damien Le Moal
eddb98ad93 ata: libata-eh: correctly handle deferred qc timeouts
A deferred qc may timeout while waiting for the device queue to drain
to be submitted. In such case, since the qc is not active,
ata_scsi_cmd_error_handler() ends up calling scsi_eh_finish_cmd(),
which frees the qc. But as the port deferred_qc field still references
this finished/freed qc, the deferred qc work may eventually attempt to
call ata_qc_issue() against this invalid qc, leading to errors such as
reported by UBSAN (syzbot run):

UBSAN: shift-out-of-bounds in drivers/ata/libata-core.c:5166:24
shift exponent 4210818301 is too large for 64-bit type 'long long unsigned int'
...
Call Trace:
 <TASK>
 __dump_stack lib/dump_stack.c:94 [inline]
 dump_stack_lvl+0x100/0x190 lib/dump_stack.c:120
 ubsan_epilogue+0xa/0x30 lib/ubsan.c:233
 __ubsan_handle_shift_out_of_bounds+0x279/0x2a0 lib/ubsan.c:494
 ata_qc_issue.cold+0x38/0x9f drivers/ata/libata-core.c:5166
 ata_scsi_deferred_qc_work+0x154/0x1f0 drivers/ata/libata-scsi.c:1679
 process_one_work+0x9d7/0x1920 kernel/workqueue.c:3275
 process_scheduled_works kernel/workqueue.c:3358 [inline]
 worker_thread+0x5da/0xe40 kernel/workqueue.c:3439
 kthread+0x370/0x450 kernel/kthread.c:467
 ret_from_fork+0x754/0xd80 arch/x86/kernel/process.c:158
 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245
 </TASK>

Fix this by checking if the qc of a timed out SCSI command is a deferred
one, and in such case, clear the port deferred_qc field and finish the
SCSI command with DID_TIME_OUT.

Reported-by: syzbot+1f77b8ca15336fff21ff@syzkaller.appspotmail.com
Fixes: 0ea84089db ("ata: libata-scsi: avoid Non-NCQ command starvation")
Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Igor Pylypiv <ipylypiv@google.com>
2026-02-24 09:34:05 +09:00
Andrei Vagin
4ced4cf5c9 binfmt_elf_fdpic: fix AUXV size calculation for ELF_HWCAP3 and ELF_HWCAP4
Commit 4e6e8c2b75 ("binfmt_elf: Wire up AT_HWCAP3 at AT_HWCAP4") added
support for AT_HWCAP3 and AT_HWCAP4, but it missed updating the AUX
vector size calculation in create_elf_fdpic_tables() and
AT_VECTOR_SIZE_BASE in include/linux/auxvec.h.

Similar to the fix for AT_HWCAP2 in commit c6a09e342f ("binfmt_elf_fdpic:
fix AUXV size calculation when ELF_HWCAP2 is defined"), this omission
leads to a mismatch between the reserved space and the actual number of
AUX entries, eventually triggering a kernel BUG_ON(csp != sp).

Fix this by incrementing nitems when ELF_HWCAP3 or ELF_HWCAP4 are
defined and updating AT_VECTOR_SIZE_BASE.

Cc: Mark Brown <broonie@kernel.org>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Reviewed-by: Michal Koutný <mkoutny@suse.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Cyrill Gorcunov <gorcunov@gmail.com>
Reviewed-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@futurfusion.io>
Fixes: 4e6e8c2b75 ("binfmt_elf: Wire up AT_HWCAP3 at AT_HWCAP4")
Signed-off-by: Andrei Vagin <avagin@google.com>
Link: https://patch.msgid.link/20260217180108.1420024-2-avagin@google.com
Signed-off-by: Kees Cook <kees@kernel.org>
2026-02-23 16:09:00 -08:00
Thomas Weißschuh
5886cc8f89 drm/msm/dpu: Don't use %pK through printk (again)
In the past %pK was preferable to %p as it would not leak raw pointer
values into the kernel log.
Since commit ad67b74d24 ("printk: hash addresses printed with %p")
the regular %p has been improved to avoid this issue.
Furthermore, restricted pointers ("%pK") were never meant to be used
through printk(). They can still unintentionally leak raw pointers or
acquire sleeping locks in atomic contexts.

Switch to the regular pointer formatting which is safer and
easier to reason about.

This was previously fixed in this driver in commit 1ba9fbe403
("drm/msm: Don't use %pK through printk") but an additional usage
was reintroduced in commit 39a750ff5f ("drm/msm/dpu: Add DSPP GC
driver to provide GAMMA_LUT DRM property")

Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Fixes: 39a750ff5f ("drm/msm/dpu: Add DSPP GC driver to provide GAMMA_LUT DRM property")
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
Patchwork: https://patchwork.freedesktop.org/patch/706229/
Link: https://lore.kernel.org/r/20260223-restricted-pointers-msm-v1-1-14c0b451e372@linutronix.de
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
2026-02-24 01:32:20 +02:00
Dmitry Baryshkov
9f593419eb Revert "drm/msm/dpu: try reserving the DSPP-less LM first"
This reverts commit 42f62cd795 ("drm/msm/dpu: try reserving the
DSPP-less LM first"). It seems on later DPUs using higher LMs require
some additional setup or conflicts with the hardware defaults. Val (and
other developers) reported blue screen on Hamoa (X1E80100) laptops.
Revert the offending commit until we understand, what is the issue.

Fixes: 42f62cd795 ("drm/msm/dpu: try reserving the DSPP-less LM first")
Reported-by: Val Packett <val@packett.cool>
Closes: https://lore.kernel.org/r/33424a9d-10a6-4479-bba6-12f8ce60da1a@packett.cool
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
Tested-by: Manivannan Sadhasivam <mani@kernel.org> # T14s
Patchwork: https://patchwork.freedesktop.org/patch/704814/
Link: https://lore.kernel.org/r/20260214-revert-dspp-less-v1-1-be0d636a2a6e@oss.qualcomm.com
2026-02-24 01:30:00 +02:00
sunliming
b1dcc804f4 drm/msm/dpu: Fix smatch warnings about variable dereferenced before check
Fix below smatch warnings:
drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp_v13.c:161 dpu_hw_sspp_setup_pe_config_v13()
warn: variable dereferenced before check 'ctx' (see line 159)

Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <error27@gmail.com>
Closes: https://lore.kernel.org/r/202601252214.oEaY3UZM-lkp@intel.com/
Signed-off-by: sunliming <sunliming@kylinos.cn>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
Patchwork: https://patchwork.freedesktop.org/patch/701853/
Link: https://lore.kernel.org/r/20260130053615.24886-1-sunliming@linux.dev
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
2026-02-24 01:28:49 +02:00
Kees Cook
6f6f353519 drm/msm: Adjust msm_iommu_pagetable_prealloc_allocate() allocation type
In preparation for making the kmalloc family of allocators type aware,
we need to make sure that the returned type from the allocation matches
the type of the variable being assigned. (Before, the allocator would
always return "void *", which can be implicitly cast to any pointer type.)

The assigned type is "void **" but the returned type will be "void ***".
These are the same allocation size (pointer size), but the types do not
match. Adjust the allocation type to match the assignment.

Signed-off-by: Kees Cook <kees@kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
Patchwork: https://patchwork.freedesktop.org/patch/703588/
Link: https://lore.kernel.org/r/20260206222151.work.016-kees@kernel.org
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
2026-02-24 01:28:18 +02:00
Konrad Dybcio
f7bf131973 drm/msm/dpu: Fix LM size on a number of platforms
The register space has grown with what seems to be DPU8.
Bump up the .len to match.

Fixes: e3b1f369db ("drm/msm/dpu: Add X1E80100 support")
Fixes: 4a352c2fc1 ("drm/msm/dpu: Introduce SC8280XP")
Fixes: efcd010772 ("drm/msm/dpu: add support for SM8550")
Fixes: 100d7ef699 ("drm/msm/dpu: add support for SM8450")
Fixes: 1785751734 ("drm/msm/dpu: add catalog entry for SAR2130P")
Signed-off-by: Konrad Dybcio <konrad.dybcio@oss.qualcomm.com>
Reviewed-by: Abel Vesa <abel.vesa@oss.qualcomm.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
Patchwork: https://patchwork.freedesktop.org/patch/701063/
Link: https://lore.kernel.org/r/20260127-topic-lm_size_fix-v1-1-25f88d014dfd@oss.qualcomm.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
2026-02-24 01:24:46 +02:00
Akhil P Oommen
a9dece60cc drm/msm/adreno: Add GPU to MODULE_DEVICE_TABLE
Since it is possible to independently probe Adreno GPU, add GPU match
table to MODULE_DEVICE_TABLE to allow auto-loading of msm module.

Signed-off-by: Akhil P Oommen <akhilpo@oss.qualcomm.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
Patchwork: https://patchwork.freedesktop.org/patch/700656/
Link: https://lore.kernel.org/r/20260124-adreno-module-table-v1-1-9c2dbb2638b4@oss.qualcomm.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
2026-02-24 01:22:18 +02:00
Nathan Chancellor
f709859f33 init/Kconfig: Adjust fixed clang version for __builtin_counted_by_ref
Commit d39a1d7486 ("compiler_types: Disable __builtin_counted_by_ref
for Clang") used 22.0.0 as the fixed version for a compiler crash but
the fix was only merged in main (23.0.0) and release/22.x (22.1.0). With
the current fixed version number, prerelease or Android LLVM 22 builds
will still be able to hit the compiler crash when building the kernel.
This can be particularly disruptive when bisecting LLVM.

Use 21.1.0 as the fixed version number to ensure the fix for this crash
is always present.

Fixes: d39a1d7486 ("compiler_types: Disable __builtin_counted_by_ref for Clang")
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Link: https://patch.msgid.link/20260223-fix-clang-version-builtin-counted-by-ref-v1-1-3ea478a24f0a@kernel.org
Signed-off-by: Kees Cook <kees@kernel.org>
2026-02-23 14:35:16 -08:00
Kees Cook
96a7b71c44 ubd: Use pointer-to-pointers for io_thread_req arrays
Having an unbounded array for irq_req_buffer and io_req_buffer doesn't
provide any bounds safety, and confuses the needed allocation type,
which is returning a pointer to pointers. Instead of the implicit cast,
switch the variable types.

Reported-by: Nathan Chancellor <nathan@kernel.org>
Reported-by: Guenter Roeck <linux@roeck-us.net>
Closes: https://lore.kernel.org/all/b04b6c13-7d0e-4a89-9e68-b572b6c686ac@roeck-us.net
Fixes: 69050f8d6d ("treewide: Replace kmalloc with kmalloc_obj for non-scalar types")
Acked-by: Richard Weinberger <richard@nod.at>
Link: https://patch.msgid.link/20260223214341.work.846-kees@kernel.org
Signed-off-by: Kees Cook <kees@kernel.org>
2026-02-23 14:35:16 -08:00
Shuicheng Lin
0879c3f04f drm/xe/sync: Fix user fence leak on alloc failure
When dma_fence_chain_alloc() fails, properly release the user fence
reference to prevent a memory leak.

Fixes: 0995c2fc39 ("drm/xe: Enforce correct user fence signaling order using")
Cc: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Shuicheng Lin <shuicheng.lin@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patch.msgid.link/20260219233516.2938172-6-shuicheng.lin@intel.com
(cherry picked from commit a5d5634cde48a9fcd68c8504aa07f89f175074a0)
Cc: stable@vger.kernel.org
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
2026-02-23 16:48:43 -05:00
Shuicheng Lin
1bfd757509 drm/xe/sync: Cleanup partially initialized sync on parse failure
xe_sync_entry_parse() can allocate references (syncobj, fence, chain fence,
or user fence) before hitting a later failure path. Several of those paths
returned directly, leaving partially initialized state and leaking refs.

Route these error paths through a common free_sync label and call
xe_sync_entry_cleanup(sync) before returning the error.

Fixes: dd08ebf6c3 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Cc: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Shuicheng Lin <shuicheng.lin@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patch.msgid.link/20260219233516.2938172-5-shuicheng.lin@intel.com
(cherry picked from commit f939bdd9207a5d1fc55cced5459858480686ce22)
Cc: stable@vger.kernel.org
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
2026-02-23 16:48:29 -05:00
Luiz Augusto von Dentz
138d7eca44 Bluetooth: L2CAP: Fix missing key size check for L2CAP_LE_CONN_REQ
This adds a check for encryption key size upon receiving
L2CAP_LE_CONN_REQ which is required by L2CAP/LE/CFC/BV-15-C which
expects L2CAP_CR_LE_BAD_KEY_SIZE.

Link: https://lore.kernel.org/linux-bluetooth/5782243.rdbgypaU67@n9w6sw14/
Fixes: 27e2d4c8d2 ("Bluetooth: Add basic LE L2CAP connect request receiving support")
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Tested-by: Christian Eggers <ceggers@arri.de>
2026-02-23 16:08:15 -05:00
Luiz Augusto von Dentz
a8d1d73c81 Bluetooth: L2CAP: Fix not checking output MTU is acceptable on L2CAP_ECRED_CONN_REQ
Upon receiving L2CAP_ECRED_CONN_REQ the given MTU shall be checked
against the suggested MTU of the listening socket as that is required
by the likes of PTS L2CAP/ECFC/BV-27-C test which expects
L2CAP_CR_LE_UNACCEPT_PARAMS if the MTU is lowers than socket omtu.

In order to be able to set chan->omtu the code now allows setting
setsockopt(BT_SNDMTU), but it is only allowed when connection has not
been stablished since there is no procedure to reconfigure the output
MTU.

Link: https://github.com/bluez/bluez/issues/1895
Fixes: 15f02b9105 ("Bluetooth: L2CAP: Add initial code for Enhanced Credit Based Mode")
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-02-23 16:07:55 -05:00
Mariusz Skamra
7cff9a40c6 Bluetooth: Fix CIS host feature condition
This fixes the condition for sending the LE Set Host Feature command.
The command is sent to indicate host support for Connected Isochronous
Streams in this case. It has been observed that the system could not
initialize BIS-only capable controllers because the controllers do not
support the command.

As per Core v6.2 | Vol 4, Part E, Table 3.1 the command shall be
supported if CIS Central or CIS Peripheral is supported; otherwise,
the command is optional.

Fixes: 709788b154 ("Bluetooth: hci_core: Fix using {cis,bis}_capable for current settings")
Cc: stable@vger.kernel.org
Signed-off-by: Mariusz Skamra <mariusz.skamra@codecoup.pl>
Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-02-23 16:07:37 -05:00
Waiman Long
a84097e625 cgroup/cpuset: Call housekeeping_update() without holding cpus_read_lock
The current cpuset partition code is able to dynamically update
the sched domains of a running system and the corresponding
HK_TYPE_DOMAIN housekeeping cpumask to perform what is essentially the
"isolcpus=domain,..." boot command line feature at run time.

The housekeeping cpumask update requires flushing a number of different
workqueues which may not be safe with cpus_read_lock() held as the
workqueue flushing code may acquire cpus_read_lock() or acquiring locks
which have locking dependency with cpus_read_lock() down the chain. Below
is an example of such circular locking problem.

  ======================================================
  WARNING: possible circular locking dependency detected
  6.18.0-test+ #2 Tainted: G S
  ------------------------------------------------------
  test_cpuset_prs/10971 is trying to acquire lock:
  ffff888112ba4958 ((wq_completion)sync_wq){+.+.}-{0:0}, at: touch_wq_lockdep_map+0x7a/0x180

  but task is already holding lock:
  ffffffffae47f450 (cpuset_mutex){+.+.}-{4:4}, at: cpuset_partition_write+0x85/0x130

  which lock already depends on the new lock.

  the existing dependency chain (in reverse order) is:
  -> #4 (cpuset_mutex){+.+.}-{4:4}:
  -> #3 (cpu_hotplug_lock){++++}-{0:0}:
  -> #2 (rtnl_mutex){+.+.}-{4:4}:
  -> #1 ((work_completion)(&arg.work)){+.+.}-{0:0}:
  -> #0 ((wq_completion)sync_wq){+.+.}-{0:0}:

  Chain exists of:
    (wq_completion)sync_wq --> cpu_hotplug_lock --> cpuset_mutex

  5 locks held by test_cpuset_prs/10971:
   #0: ffff88816810e440 (sb_writers#7){.+.+}-{0:0}, at: ksys_write+0xf9/0x1d0
   #1: ffff8891ab620890 (&of->mutex#2){+.+.}-{4:4}, at: kernfs_fop_write_iter+0x260/0x5f0
   #2: ffff8890a78b83e8 (kn->active#187){.+.+}-{0:0}, at: kernfs_fop_write_iter+0x2b6/0x5f0
   #3: ffffffffadf32900 (cpu_hotplug_lock){++++}-{0:0}, at: cpuset_partition_write+0x77/0x130
   #4: ffffffffae47f450 (cpuset_mutex){+.+.}-{4:4}, at: cpuset_partition_write+0x85/0x130

  Call Trace:
   <TASK>
     :
   touch_wq_lockdep_map+0x93/0x180
   __flush_workqueue+0x111/0x10b0
   housekeeping_update+0x12d/0x2d0
   update_parent_effective_cpumask+0x595/0x2440
   update_prstate+0x89d/0xce0
   cpuset_partition_write+0xc5/0x130
   cgroup_file_write+0x1a5/0x680
   kernfs_fop_write_iter+0x3df/0x5f0
   vfs_write+0x525/0xfd0
   ksys_write+0xf9/0x1d0
   do_syscall_64+0x95/0x520
   entry_SYSCALL_64_after_hwframe+0x76/0x7e

To avoid such a circular locking dependency problem, we have to
call housekeeping_update() without holding the cpus_read_lock() and
cpuset_mutex. The current set of wq's flushed by housekeeping_update()
may not have work functions that call cpus_read_lock() directly,
but we are likely to extend the list of wq's that are flushed in the
future. Moreover, the current set of work functions may hold locks that
may have cpu_hotplug_lock down the dependency chain.

So housekeeping_update() is now called after releasing cpus_read_lock
and cpuset_mutex at the end of a cpuset operation. These two locks are
then re-acquired later before calling rebuild_sched_domains_locked().

To enable mutual exclusion between the housekeeping_update() call and
other cpuset control file write actions, a new top level cpuset_top_mutex
is introduced. This new mutex will be acquired first to allow sharing
variables used by both code paths. However, cpuset update from CPU
hotplug can still happen in parallel with the housekeeping_update()
call, though that should be rare in production environment.

As cpus_read_lock() is now no longer held when
tmigr_isolated_exclude_cpumask() is called, it needs to acquire it
directly.

The lockdep_is_cpuset_held() is also updated to return true if either
cpuset_top_mutex or cpuset_mutex is held.

Fixes: 03ff735101 ("cpuset: Update HK_TYPE_DOMAIN cpumask from cpuset")
Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-02-23 10:46:49 -10:00
Waiman Long
6df415aa46 cgroup/cpuset: Defer housekeeping_update() calls from CPU hotplug to workqueue
The cpuset_handle_hotplug() may need to invoke housekeeping_update(),
for instance, when an isolated partition is invalidated because its
last active CPU has been put offline.

As we are going to enable dynamic update to the nozh_full housekeeping
cpumask (HK_TYPE_KERNEL_NOISE) soon with the help of CPU hotplug,
allowing the CPU hotplug path to call into housekeeping_update() directly
from update_isolation_cpumasks() will likely cause deadlock. So we
have to defer any call to housekeeping_update() after the CPU hotplug
operation has finished. This is now done via the workqueue where
the update_hk_sched_domains() function will be invoked via the
hk_sd_workfn().

An concurrent cpuset control file write may have executed the required
update_hk_sched_domains() function before the work function is called. So
the work function call may become a no-op when it is invoked.

Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-02-23 10:42:11 -10:00
Waiman Long
3bfe479671 cgroup/cpuset: Move housekeeping_update()/rebuild_sched_domains() together
With the latest changes in sched/isolation.c, rebuild_sched_domains*()
requires the HK_TYPE_DOMAIN housekeeping cpumask to be properly
updated first, if needed, before the sched domains can be
rebuilt. So the two naturally fit together. Do that by creating a new
update_hk_sched_domains() helper to house both actions.

The name of the isolated_cpus_updating flag to control the
call to housekeeping_update() is now outdated. So change it to
update_housekeeping to better reflect its purpose. Also move the call
to update_hk_sched_domains() to the end of cpuset and hotplug operations
before releasing the cpuset_mutex.

Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-02-23 10:42:08 -10:00
Waiman Long
5e6aac573c kselftest/cgroup: Simplify test_cpuset_prs.sh by removing "S+" command
The "S+" command is used in the test matrix to enable the cpuset
controller. However this can be done automatically and we never use the
"S-" command to disable cpuset controller. Simplify the test matrix and
reduce clutter by removing the command and doing that automatically.
There is no functional change to the test cases.

Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-02-23 10:42:05 -10:00
Waiman Long
14713ed9e9 cgroup/cpuset: Set isolated_cpus_updating only if isolated_cpus is changed
As cpuset is updating HK_TYPE_DOMAIN housekeeping mask when there is
a change in the set of isolated CPUs, making this change is now more
costly than before.  Right now, the isolated_cpus_updating flag can be
set even if there is no real change in isolated_cpus. Put in additional
checks to make sure that isolated_cpus_updating is set only if there
is a real change in isolated_cpus.

Reviewed-by: Chen Ridong <chenridong@huaweicloud.com>
Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-02-23 10:41:09 -10:00
Waiman Long
17b1860034 cgroup/cpuset: Clarify exclusion rules for cpuset internal variables
Clarify the locking rules associated with file level internal variables
inside the cpuset code. There is no functional change.

Reviewed-by: Chen Ridong <chenridong@huaweicloud.com>
Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-02-23 10:41:04 -10:00
Waiman Long
68230aac8b cgroup/cpuset: Fix incorrect use of cpuset_update_tasks_cpumask() in update_cpumasks_hier()
Commit e2ffe502ba ("cgroup/cpuset: Add cpuset.cpus.exclusive for v2")
incorrectly changed the 2nd parameter of cpuset_update_tasks_cpumask()
from tmp->new_cpus to cp->effective_cpus. This second parameter is just
a temporary cpumask for internal use. The cpuset_update_tasks_cpumask()
function was originally called update_tasks_cpumask() before commit
381b53c3b5 ("cgroup/cpuset: rename functions shared between v1
and v2").

This mistake can incorrectly change the effective_cpus of the
cpuset when it is the top_cpuset or in arm64 architecture where
task_cpu_possible_mask() may differ from cpu_possible_mask.  So far
top_cpuset hasn't been passed to update_cpumasks_hier() yet, but arm64
arch can still be impacted. Fix it by reverting the incorrect change.

Fixes: e2ffe502ba ("cgroup/cpuset: Add cpuset.cpus.exclusive for v2")
Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-02-23 10:41:01 -10:00
Waiman Long
f9a1767ce3 cgroup/cpuset: Fix incorrect change to effective_xcpus in partition_xcpus_del()
The effective_xcpus of a cpuset can contain offline CPUs. In
partition_xcpus_del(), the xcpus parameter is incorrectly used as
a temporary cpumask to mask out offline CPUs. As xcpus can be the
effective_xcpus of a cpuset, this can result in unexpected changes
in that cpumask. Fix this problem by not making any changes to the
xcpus parameter.

Fixes: 11e5f407b6 ("cgroup/cpuset: Keep track of CPUs in isolated partitions")
Reviewed-by: Chen Ridong <chenridong@huaweicloud.com>
Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-02-23 10:40:58 -10:00
Luiz Augusto von Dentz
05761c2c2b Bluetooth: L2CAP: Fix response to L2CAP_ECRED_CONN_REQ
Similar to 03dba9cea7 ("Bluetooth: L2CAP: Fix not responding with
L2CAP_CR_LE_ENCRYPTION") the result code L2CAP_CR_LE_ENCRYPTION shall
be used when BT_SECURITY_MEDIUM is set since that means security mode 2
which mean it doesn't require authentication which results in
qualification test L2CAP/ECFC/BV-32-C failing.

Link: https://github.com/bluez/bluez/issues/1871
Fixes: 15f02b9105 ("Bluetooth: L2CAP: Add initial code for Enhanced Credit Based Mode")
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-02-23 15:31:10 -05:00
Jinwang Li
5c4e9a8b18 Bluetooth: hci_qca: Cleanup on all setup failures
The setup process previously combined error handling and retry gating
under one condition. As a result, the final failed attempt exited
without performing cleanup.

Update the failure path to always perform power and port cleanup on
setup failure, and reopen the port only when retrying.

Fixes: 9e80587aba ("Bluetooth: hci_qca: Enhance retry logic in qca_setup")
Signed-off-by: Jinwang Li <jinwang.li@oss.qualcomm.com>
Reviewed-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-02-23 15:30:41 -05:00
Heitor Alves de Siqueira
21e4271e65 Bluetooth: purge error queues in socket destructors
When TX timestamping is enabled via SO_TIMESTAMPING, SKBs may be queued
into sk_error_queue and will stay there until consumed. If userspace never
gets to read the timestamps, or if the controller is removed unexpectedly,
these SKBs will leak.

Fix by adding skb_queue_purge() calls for sk_error_queue in affected
bluetooth destructors. RFCOMM does not currently use sk_error_queue.

Fixes: 134f4b39df ("Bluetooth: add support for skb TX SND/COMPLETION timestamping")
Reported-by: syzbot+7ff4013eabad1407b70a@syzkaller.appspotmail.com
Closes: https://syzbot.org/bug?extid=7ff4013eabad1407b70a
Cc: stable@vger.kernel.org
Signed-off-by: Heitor Alves de Siqueira <halves@igalia.com>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-02-23 15:30:16 -05:00
Luiz Augusto von Dentz
c28d2bff70 Bluetooth: L2CAP: Fix result of L2CAP_ECRED_CONN_RSP when MTU is too short
Test L2CAP/ECFC/BV-26-C expect the response to L2CAP_ECRED_CONN_REQ with
and MTU value < L2CAP_ECRED_MIN_MTU (64) to be L2CAP_CR_LE_INVALID_PARAMS
rather than L2CAP_CR_LE_UNACCEPT_PARAMS.

Also fix not including the correct number of CIDs in the response since
the spec requires all CIDs being rejected to be included in the
response.

Link: https://github.com/bluez/bluez/issues/1868
Fixes: 15f02b9105 ("Bluetooth: L2CAP: Add initial code for Enhanced Credit Based Mode")
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-02-23 15:28:56 -05:00
Luiz Augusto von Dentz
7accb1c432 Bluetooth: L2CAP: Fix invalid response to L2CAP_ECRED_RECONF_REQ
This fixes responding with an invalid result caused by checking the
wrong size of CID which should have been (cmd_len - sizeof(*req)) and
on top of it the wrong result was use L2CAP_CR_LE_INVALID_PARAMS which
is invalid/reserved for reconf when running test like L2CAP/ECFC/BI-03-C:

> ACL Data RX: Handle 64 flags 0x02 dlen 14
      LE L2CAP: Enhanced Credit Reconfigure Request (0x19) ident 2 len 6
        MTU: 64
        MPS: 64
        Source CID: 64
< ACL Data TX: Handle 64 flags 0x00 dlen 10
      LE L2CAP: Enhanced Credit Reconfigure Respond (0x1a) ident 2 len 2
!        Result: Reserved (0x000c)
         Result: Reconfiguration failed - one or more Destination CIDs invalid (0x0003)

Fiix L2CAP/ECFC/BI-04-C which expects L2CAP_RECONF_INVALID_MPS (0x0002)
when more than one channel gets its MPS reduced:

> ACL Data RX: Handle 64 flags 0x02 dlen 16
      LE L2CAP: Enhanced Credit Reconfigure Request (0x19) ident 2 len 8
        MTU: 264
        MPS: 99
        Source CID: 64
!       Source CID: 65
< ACL Data TX: Handle 64 flags 0x00 dlen 10
      LE L2CAP: Enhanced Credit Reconfigure Respond (0x1a) ident 2 len 2
!        Result: Reconfiguration successful (0x0000)
         Result: Reconfiguration failed - reduction in size of MPS not allowed for more than one channel at a time (0x0002)

Fix L2CAP/ECFC/BI-05-C when SCID is invalid (85 unconnected):

> ACL Data RX: Handle 64 flags 0x02 dlen 14
      LE L2CAP: Enhanced Credit Reconfigure Request (0x19) ident 2 len 6
        MTU: 65
        MPS: 64
!        Source CID: 85
< ACL Data TX: Handle 64 flags 0x00 dlen 10
      LE L2CAP: Enhanced Credit Reconfigure Respond (0x1a) ident 2 len 2
!        Result: Reconfiguration successful (0x0000)
         Result: Reconfiguration failed - one or more Destination CIDs invalid (0x0003)

Fix L2CAP/ECFC/BI-06-C when MPS < L2CAP_ECRED_MIN_MPS (64):

> ACL Data RX: Handle 64 flags 0x02 dlen 14
      LE L2CAP: Enhanced Credit Reconfigure Request (0x19) ident 2 len 6
        MTU: 672
!       MPS: 63
        Source CID: 64
< ACL Data TX: Handle 64 flags 0x00 dlen 10
      LE L2CAP: Enhanced Credit Reconfigure Respond (0x1a) ident 2 len 2
!       Result: Reconfiguration failed - reduction in size of MPS not allowed for more than one channel at a time (0x0002)
        Result: Reconfiguration failed - other unacceptable parameters (0x0004)

Fix L2CAP/ECFC/BI-07-C when MPS reduced for more than one channel:

> ACL Data RX: Handle 64 flags 0x02 dlen 16
      LE L2CAP: Enhanced Credit Reconfigure Request (0x19) ident 3 len 8
        MTU: 84
!       MPS: 71
        Source CID: 64
!        Source CID: 65
< ACL Data TX: Handle 64 flags 0x00 dlen 10
      LE L2CAP: Enhanced Credit Reconfigure Respond (0x1a) ident 2 len 2
!       Result: Reconfiguration successful (0x0000)
        Result: Reconfiguration failed - reduction in size of MPS not allowed for more than one channel at a time (0x0002)

Link: https://github.com/bluez/bluez/issues/1865
Fixes: 15f02b9105 ("Bluetooth: L2CAP: Add initial code for Enhanced Credit Based Mode")
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2026-02-23 15:23:37 -05:00
Rob Herring (Arm)
f9b8885994 remoteproc: qcom_wcnss: Fix reserved region mapping failure
Commit c70b9d5fdc ("remoteproc: qcom: Use of_reserved_mem_region_*
functions for "memory-region"") switched from devm_ioremap_wc() to
devm_ioremap_resource_wc(). The difference is devm_ioremap_resource_wc()
also requests the resource which fails. Testing of both fixed and
dynamic reserved regions indicates that requesting the resource should
work, so I'm not sure why it doesn't work in this case. Fix the issue by
reverting back to devm_ioremap_wc().

Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
Reported-by: André Apitzsch <git@apitzsch.eu>
Fixes: c70b9d5fdc ("remoteproc: qcom: Use of_reserved_mem_region_* functions for "memory-region"")
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Tested-by: André Apitzsch <git@apitzsch.eu> # on BQ Aquaris M5
Link: https://lore.kernel.org/r/20260128220243.3018526-1-robh@kernel.org
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
2026-02-23 13:57:37 -06:00
Arnd Bergmann
16cb1a64dc RDMA/uverbs: select CONFIG_DMA_SHARED_BUFFER
The addition of dmabuf support in uverbs means that it is no
longer possible to build infiniband support if that is disabled:

arm-linux-gnueabi-ld: drivers/infiniband/core/ib_core_uverbs.o: in function `rdma_user_mmap_entry_remove.part.0':
ib_core_uverbs.c:(.text+0x508): undefined reference to `dma_buf_move_notify'
(dma_buf_move_notify): Unknown destination type (ARM/Thumb) in drivers/infiniband/core/ib_core_uverbs.o
ib_core_uverbs.c:(.text+0x518): undefined reference to `dma_resv_wait_timeout'
(dma_resv_wait_timeout): Unknown destination type (ARM/Thumb) in drivers/infiniband/core/ib_core_uverbs.o

Select this from Kconfig, as we do for the other users.

Fixes: 0ac6f4056c ("RDMA/uverbs: Add DMABUF object type and operations")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Link: https://patch.msgid.link/20260216121213.2088910-1-arnd@kernel.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
2026-02-23 14:49:21 -05:00
Ihor Solodrai
f9d69d5e7b module: Fix kernel panic when a symbol st_shndx is out of bounds
The module loader doesn't check for bounds of the ELF section index in
simplify_symbols():

       for (i = 1; i < symsec->sh_size / sizeof(Elf_Sym); i++) {
		const char *name = info->strtab + sym[i].st_name;

		switch (sym[i].st_shndx) {
		case SHN_COMMON:

		[...]

		default:
			/* Divert to percpu allocation if a percpu var. */
			if (sym[i].st_shndx == info->index.pcpu)
				secbase = (unsigned long)mod_percpu(mod);
			else
  /** HERE --> **/		secbase = info->sechdrs[sym[i].st_shndx].sh_addr;
			sym[i].st_value += secbase;
			break;
		}
	}

A symbol with an out-of-bounds st_shndx value, for example 0xffff
(known as SHN_XINDEX or SHN_HIRESERVE), may cause a kernel panic:

  BUG: unable to handle page fault for address: ...
  RIP: 0010:simplify_symbols+0x2b2/0x480
  ...
  Kernel panic - not syncing: Fatal exception

This can happen when module ELF is legitimately using SHN_XINDEX or
when it is corrupted.

Add a bounds check in simplify_symbols() to validate that st_shndx is
within the valid range before using it.

This issue was discovered due to a bug in llvm-objcopy, see relevant
discussion for details [1].

[1] https://lore.kernel.org/linux-modules/20251224005752.201911-1-ihor.solodrai@linux.dev/

Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
Reviewed-by: Daniel Gomez <da.gomez@samsung.com>
Reviewed-by: Petr Pavlu <petr.pavlu@suse.com>
Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
2026-02-23 19:37:28 +00:00
Oliver Neukum
7a875c0989 usb: yurex: fix race in probe
The bbu member of the descriptor must be set to the value
standing for uninitialized values before the URB whose
completion handler sets bbu is submitted. Otherwise there is
a window during which probing can overwrite already retrieved
data.

Cc: stable <stable@kernel.org>
Signed-off-by: Oliver Neukum <oneukum@suse.com>
Link: https://patch.msgid.link/20260209143720.1507500-1-oneukum@suse.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-02-23 20:25:07 +01:00
Kuen-Han Tsai
0d6c8144ca usb: gadget: f_ncm: Fix atomic context locking issue
The ncm_set_alt function was holding a mutex to protect against races
with configfs, which invokes the might-sleep function inside an atomic
context.

Remove the struct net_device pointer from the f_ncm_opts structure to
eliminate the contention. The connection state is now managed by a new
boolean flag to preserve the use-after-free fix from
commit 6334b8e455 ("usb: gadget: f_ncm: Fix UAF ncm object at re-bind
after usb ep transport error").

BUG: sleeping function called from invalid context
Call Trace:
 dump_stack_lvl+0x83/0xc0
 dump_stack+0x14/0x16
 __might_resched+0x389/0x4c0
 __might_sleep+0x8e/0x100
 ...
 __mutex_lock+0x6f/0x1740
 ...
 ncm_set_alt+0x209/0xa40
 set_config+0x6b6/0xb40
 composite_setup+0x734/0x2b40
 ...

Fixes: 56a512a9b4 ("usb: gadget: f_ncm: align net_device lifecycle with bind/unbind")
Cc: stable@kernel.org
Signed-off-by: Kuen-Han Tsai <khtsai@google.com>
Link: https://patch.msgid.link/20260221-legacy-ncm-v2-2-dfb891d76507@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-02-23 20:22:29 +01:00
Kuen-Han Tsai
fde0634ad9 usb: legacy: ncm: Fix NPE in gncm_bind
Commit 56a512a9b4 ("usb: gadget: f_ncm: align net_device lifecycle
with bind/unbind") deferred the allocation of the net_device. This
change leads to a NULL pointer dereference in the legacy NCM driver as
it attempts to access the net_device before it's fully instantiated.

Store the provided qmult, host_addr, and dev_addr into the struct
ncm_opts->net_opts during gncm_bind(). These values will be properly
applied to the net_device when it is allocated and configured later in
the binding process by the NCM function driver.

Fixes: 56a512a9b4 ("usb: gadget: f_ncm: align net_device lifecycle with bind/unbind")
Cc: stable@kernel.org
Reported-by: kernel test robot <oliver.sang@intel.com>
Closes: https://lore.kernel.org/oe-lkp/202602181727.fd76c561-lkp@intel.com
Signed-off-by: Kuen-Han Tsai <khtsai@google.com>
Link: https://patch.msgid.link/20260221-legacy-ncm-v2-1-dfb891d76507@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-02-23 20:22:29 +01:00
Jiasheng Jiang
b9fde50735 usb: gadget: f_tcm: Fix NULL pointer dereferences in nexus handling
The `tpg->tpg_nexus` pointer in the USB Target driver is dynamically
managed and tied to userspace configuration via ConfigFS. It can be
NULL if the USB host sends requests before the nexus is fully
established or immediately after it is dropped.

Currently, functions like `bot_submit_command()` and the data
transfer paths retrieve `tv_nexus = tpg->tpg_nexus` and immediately
dereference `tv_nexus->tvn_se_sess` without any validation. If a
malicious or misconfigured USB host sends a BOT (Bulk-Only Transport)
command during this race window, it triggers a NULL pointer
dereference, leading to a kernel panic (local DoS).

This exposes an inconsistent API usage within the module, as peer
functions like `usbg_submit_command()` and `bot_send_bad_response()`
correctly implement a NULL check for `tv_nexus` before proceeding.

Fix this by bringing consistency to the nexus handling. Add the
missing `if (!tv_nexus)` checks to the vulnerable BOT command and
request processing paths, aborting the command gracefully with an
error instead of crashing the system.

Fixes: c52661d60f ("usb-gadget: Initial merge of target module for UASP + BOT")
Cc: stable <stable@kernel.org>
Signed-off-by: Jiasheng Jiang <jiashengjiangcool@gmail.com>
Reviewed-by: Thinh Nguyen <Thinh.Nguyen@synopsys.com>
Link: https://patch.msgid.link/20260219023834.17976-1-jiashengjiangcool@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-02-23 20:22:00 +01:00
Linus Torvalds
7dff99b354 Remove WARN_ALL_UNSEEDED_RANDOM kernel config option
This config option goes way back - it used to be an internal debug
option to random.c (at that point called DEBUG_RANDOM_BOOT), then was
renamed and exposed as a config option as CONFIG_WARN_UNSEEDED_RANDOM,
and then further renamed to the current CONFIG_WARN_ALL_UNSEEDED_RANDOM.

It was all done with the best of intentions: the more limited
rate-limited reports were reporting some cases, but if you wanted to see
all the gory details, you'd enable this "ALL" option.

However, it turns out - perhaps not surprisingly - that when people
don't care about and fix the first rate-limited cases, they most
certainly don't care about any others either, and so warning about all
of them isn't actually helping anything.

And the non-ratelimited reporting causes problems, where well-meaning
people enable debug options, but the excessive flood of messages that
nobody cares about will hide actual real information when things go
wrong.

I just got a kernel bug report (which had nothing to do with randomness)
where two thirds of the the truncated dmesg was just variations of

   random: get_random_u32 called from __get_random_u32_below+0x10/0x70 with crng_init=0

and in the process early boot messages had been lost (in addition to
making the messages that _hadn't_ been lost harder to read).

The proper way to find these things for the hypothetical developer that
cares - if such a person exists - is almost certainly with boot time
tracing.  That gives you the option to get call graphs etc too, which is
likely a requirement for fixing any problems anyway.

See Documentation/trace/boottime-trace.rst for that option.

And if we for some reason do want to re-introduce actual printing of
these things, it will need to have some uniqueness filtering rather than
this "just print it all" model.

Fixes: cc1e127bfa ("random: remove ratelimiting for in-kernel unseeded randomness")
Acked-by: Jason Donenfeld <Jason@zx2c4.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2026-02-23 11:18:48 -08:00
A1RM4X
93cd0d6646 USB: add QUIRK_NO_BOS for video capture several devices
Several USB capture devices also need the USB_QUIRK_NO_BOS set for them
to work properly, odds are they are all the same chip inside, just
different vendor/product ids.

This fixes up:
  - ASUS TUF 4K PRO
  - Avermedia Live Gamer Ultra 2.1 (GC553G2)
  - UGREEN 35871
to now run at full speed (10 Gbps/4K 60 fps mode.)

Link: https://lore.kernel.org/r/CACy+XB-f-51xGpNQFCSm5pE_momTQLu=BaZggHYU1DiDmFX=ug@mail.gmail.com
Cc: stable <stable@kernel.org>
Signed-off-by: A1RM4X <dev@a1rm4x.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-02-23 20:08:06 +01:00
Matt Roper
43d37df67f drm/xe/wa: Steer RMW of MCR registers while building default LRC
When generating the default LRC, if a register is not masked, we apply
any save-restore programming necessary via a read-modify-write sequence
that will ensure we only update the relevant bits/fields without
clobbering the rest of the register.  However some of the registers that
need to be updated might be MCR registers which require steering to a
non-terminated instance to ensure we can read back a valid, non-zero
value. The steering of reads originating from a command streamer is
controlled by register CS_MMIO_GROUP_INSTANCE_SELECT.  Emit additional
MI_LRI commands to update the steering before any RMW of an MCR register
to ensure the reads are performed properly.

Note that needing to perform a RMW of an MCR register while building the
default LRC is pretty rare.  Most of the MCR registers that are part of
an engine's LRCs are also masked registers, so no MCR is necessary.

Fixes: f2f90989cc ("drm/xe: Avoid reading RMW registers in emit_wa_job")
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Link: https://patch.msgid.link/20260206223058.387014-2-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
(cherry picked from commit 6c2e331c915ba9e774aa847921262805feb00863)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
2026-02-23 13:54:48 -05:00
Martin K. Petersen
be237352f8 Merge branch '7.0/scsi-queue' into 7.0/scsi-fixes
Pull in remaining fixes from 7.0/scsi-queue.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2026-02-23 13:39:58 -05:00
Dave Jiang
e7e222ad73 cxl: Move devm_cxl_add_nvdimm_bridge() to cxl_pmem.ko
Moving the symbol devm_cxl_add_nvdimm_bridge() to
drivers/cxl/cxl_pmem.c, so that cxl_pmem can export a symbol that gives
cxl_acpi a depedency on cxl_pmem kernel module. This is a prepatory patch
to resolve the issue of a race for nvdimm_bus object that is created
during cxl_acpi_probe().

No functional changes besides moving code.

Suggested-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Ira Weiny <ira.weiny@intel.com>
Tested-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Alison Schofield <alison.schofield@intel.com?>
Link: https://patch.msgid.link/20260205001633.1813643-2-dave.jiang@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2026-02-23 11:29:05 -07:00
Cheng-Yang Chou
075e70d13e selftests/sched_ext: Remove duplicated unistd.h include in rt_stall.c
The header <unistd.h> is included twice in rt_stall.c. Remove the
redundant inclusion to clean up the code.

Signed-off-by: Cheng-Yang Chou <yphbchou0911@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-02-23 07:45:39 -10:00
Cheng-Yang Chou
cbb297323d tools/sched_ext: scx_sdt: Remove unused '-f' option
The '-f' option is defined in getopt() but not handled in the switch
statement or documented in the help text. Providing '-f' currently
triggers the default error path.

Remove it to sync the optstring with the actual implementation.

Signed-off-by: Cheng-Yang Chou <yphbchou0911@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-02-23 07:45:34 -10:00
Cheng-Yang Chou
0c36a6f6f0 tools/sched_ext: scx_central: Remove unused '-p' option
The '-p' option is defined in getopt() but not handled in the switch
statement or documented in the help text. Providing '-p' currently
triggers the default error path.

Remove it to sync the optstring with the actual implementation.

Signed-off-by: Cheng-Yang Chou <yphbchou0911@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-02-23 07:45:30 -10:00
Cheng-Yang Chou
1f0638604f selftests/sched_ext: Fix unused-result warning for read()
The read() call in run_test() triggers a warn_unused_result compiler
warning, which breaks the build under -Werror.

Check the return value of read() and exit the child process on failure to
satisfy the compiler and handle pipe read errors.

Reviewed-by: Andrea Righi <arighi@nvidia.com>
Signed-off-by: Cheng-Yang Chou <yphbchou0911@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-02-23 07:45:10 -10:00
Petr Pavlu
8d597ba6ec module: Fix the modversions and signing submenus
The module Kconfig file contains a set of options related to "Module
versioning support" (depends on MODVERSIONS) and "Module signature
verification" (depends on MODULE_SIG). The Kconfig tool automatically
creates submenus when an entry for a symbol is followed by consecutive
items that all depend on the symbol. However, this functionality doesn't
work for the mentioned module options. The MODVERSIONS options are
interleaved with ASM_MODVERSIONS, which has no 'depends on MODVERSIONS' but
instead uses 'default HAVE_ASM_MODVERSIONS && MODVERSIONS'. Similarly, the
MODULE_SIG options are interleaved by a comment warning not to forget
signing modules with scripts/sign-file, which uses the condition 'depends
on MODULE_SIG_FORCE && !MODULE_SIG_ALL'.

The result is that the options are confusingly shown when using
a menuconfig tool, as follows:

 [*]   Module versioning support
         Module versioning implementation (genksyms (from source code))  --->
 [ ]   Extended Module Versioning Support
 [*]   Basic Module Versioning Support
 [*]   Source checksum for all modules
 [*]   Module signature verification
 [ ]     Require modules to be validly signed
 [ ]     Automatically sign all modules
       Hash algorithm to sign modules (SHA-256)  --->

Fix the issue by using if/endif to group related options together in
kernel/module/Kconfig, similarly to how the MODULE_DEBUG options are
already grouped. Note that the signing-related options depend on
'MODULE_SIG || IMA_APPRAISE_MODSIG', with the exception of
MODULE_SIG_FORCE, which is valid only for MODULE_SIG and is therefore kept
separately. For consistency, do the same for the MODULE_COMPRESS entries.
The options are then properly placed into submenus, as follows:

 [*]   Module versioning support
         Module versioning implementation (genksyms (from source code))  --->
 [ ]     Extended Module Versioning Support
 [*]     Basic Module Versioning Support
 [*]   Source checksum for all modules
 [*]   Module signature verification
 [ ]     Require modules to be validly signed
 [ ]     Automatically sign all modules
         Hash algorithm to sign modules (SHA-256)  --->

Signed-off-by: Petr Pavlu <petr.pavlu@suse.com>
Reviewed-by: Daniel Gomez <da.gomez@samsung.com>
Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
2026-02-23 17:45:03 +00:00
Petr Pavlu
a7b4bc094f module: Remove duplicate freeing of lockdep classes
In the error path of load_module(), under the free_module label, the
code calls lockdep_free_key_range() to release lock classes associated
with the MOD_DATA, MOD_RODATA and MOD_RO_AFTER_INIT module regions, and
subsequently invokes module_deallocate().

Since commit ac3b432839 ("module: replace module_layout with
module_memory"), the module_deallocate() function calls free_mod_mem(),
which releases the lock classes as well and considers all module
regions.

Attempting to free these classes twice is unnecessary. Remove the
redundant code in load_module().

Fixes: ac3b432839 ("module: replace module_layout with module_memory")
Signed-off-by: Petr Pavlu <petr.pavlu@suse.com>
Reviewed-by: Daniel Gomez <da.gomez@samsung.com>
Reviewed-by: Aaron Tomlin <atomlin@atomlin.com>
Acked-by: Song Liu <song@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
2026-02-23 17:44:54 +00:00
Linus Torvalds
551d442001 default_gfp(): avoid using the "newfangled" __VA_OPT__ trick
The default_gfp() helper that I added is not wrong, but it turns out
that it causes unnecessary headaches for 'sparse' which doesn't support
the use of __VA_OPT__ (introduced in C++20 and C23, and supported by gcc
and clang for a long time).

We do already use __VA_OPT__ in some other cases in the kernel (drm/xe
and btrfs), but it has been fairly limited.  Now it triggers for pretty
much everything, and sparse ends up not working at all.

We can use the traditional gcc ',##__VA_ARGS__' syntax instead: it may
not be the "C standard" way and is slightly less natural in this
context, but it is the traditional model for this and avoids the sparse
problem.

Reported-and-tested-by: Ricardo Ribalda <ribalda@chromium.org>
Reported-and-tested-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Reported-by: Ben Dooks <ben.dooks@codethink.co.uk>
Fixes: e19e1b480a ("add default_gfp() helper macro and use it in the new *alloc_obj() helpers")
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2026-02-23 09:33:08 -08:00
Lizhi Hou
901ec34709 accel/amdxdna: Validate command buffer payload count
The count field in the command header is used to determine the valid
payload size. Verify that the valid payload does not exceed the remaining
buffer space.

Fixes: aac243092b ("accel/amdxdna: Add command execution")
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Link: https://patch.msgid.link/20260219211946.1920485-1-lizhi.hou@amd.com
2026-02-23 09:24:21 -08:00
Lizhi Hou
03808abb1d accel/amdxdna: Prevent ubuf size overflow
The ubuf size calculation may overflow, resulting in an undersized
allocation and possible memory corruption.

Use check_add_overflow() helpers to validate the size calculation before
allocation.

Fixes: bd72d4acda ("accel/amdxdna: Support user space allocated buffer")
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Link: https://patch.msgid.link/20260217192815.1784689-1-lizhi.hou@amd.com
2026-02-23 09:24:21 -08:00
Lizhi Hou
1110a94967 accel/amdxdna: Fix out-of-bounds memset in command slot handling
The remaining space in a command slot may be smaller than the size of
the command header. Clearing the command header with memset() before
verifying the available slot space can result in an out-of-bounds write
and memory corruption.

Fix this by moving the memset() call after the size validation.

Fixes: 3d32eb7a5e ("accel/amdxdna: Fix cu_idx being cleared by memset() during command setup")
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Link: https://patch.msgid.link/20260217185415.1781908-1-lizhi.hou@amd.com
2026-02-23 09:24:20 -08:00
Lizhi Hou
07efce5a66 accel/amdxdna: Fix command hang on suspended hardware context
When a hardware context is suspended, the job scheduler is stopped. If a
command is submitted while the context is suspended, the job is queued in
the scheduler but aie2_sched_job_run() is never invoked to restart the
hardware context. As a result, the command hangs.

Fix this by modifying the hardware context suspend routine to keep the job
scheduler running so that queued jobs can trigger context restart properly.

Fixes: aac243092b ("accel/amdxdna: Add command execution")
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Link: https://patch.msgid.link/20260211205341.722982-1-lizhi.hou@amd.com
2026-02-23 09:24:19 -08:00
Lizhi Hou
fdb65acfe6 accel/amdxdna: Fix suspend failure after enabling turbo mode
Enabling turbo mode disables hardware clock gating. Suspend requires
hardware clock gating to be re-enabled, otherwise suspend will fail.
Fix this by calling aie2_runtime_cfg() from aie2_hw_stop() to
re-enable clock gating during suspend. Also ensure that firmware is
initialized in aie2_hw_start() before modifying clock-gating
settings during resume.

Fixes: f4d7b8a6bc ("accel/amdxdna: Enhance power management settings")
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Link: https://patch.msgid.link/20260211204716.722788-1-lizhi.hou@amd.com
2026-02-23 09:24:19 -08:00
Lizhi Hou
1aa82181a3 accel/amdxdna: Fix dead lock for suspend and resume
When an application issues a query IOCTL while auto suspend is running,
a deadlock can occur. The query path holds dev_lock and then calls
pm_runtime_resume_and_get(), which waits for the ongoing suspend to
complete. Meanwhile, the suspend callback attempts to acquire dev_lock
and blocks, resulting in a deadlock.

Fix this by releasing dev_lock before calling pm_runtime_resume_and_get()
and reacquiring it after the call completes. Also acquire dev_lock in the
resume callback to keep the locking consistent.

Fixes: 063db45183 ("accel/amdxdna: Enhance runtime power management")
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Link: https://patch.msgid.link/20260211204644.722758-1-lizhi.hou@amd.com
2026-02-23 09:24:17 -08:00
Mario Limonciello
57aa3917a3 accel/amdxdna: Reduce log noise during process termination
During process termination, several error messages are logged that are
not actual errors but expected conditions when a process is killed or
interrupted. This creates unnecessary noise in the kernel log.

The specific scenarios are:

1. HMM invalidation returns -ERESTARTSYS when the wait is interrupted by
   a signal during process cleanup. This is expected when a process is
   being terminated and should not be logged as an error.

2. Context destruction returns -ENODEV when the firmware or device has
   already stopped, which commonly occurs during cleanup if the device
   was already torn down. This is also an expected condition during
   orderly shutdown.

Downgrade these expected error conditions from error level to debug level
to reduce log noise while still keeping genuine errors visible.

Fixes: 97f2757383 ("accel/amdxdna: Fix potential NULL pointer dereference in context cleanup")
Reviewed-by: Lizhi Hou <lizhi.hou@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Link: https://patch.msgid.link/20260210164521.1094274-3-mario.limonciello@amd.com
2026-02-23 09:24:16 -08:00
Lizhi Hou
8363c02863 accel/amdxdna: Fix crash when destroying a suspended hardware context
If userspace issues an ioctl to destroy a hardware context that has
already been automatically suspended, the driver may crash because the
mailbox channel pointer is NULL for the suspended context.

Fix this by checking the mailbox channel pointer in aie2_destroy_context()
before accessing it.

Fixes: 97f2757383 ("accel/amdxdna: Fix potential NULL pointer dereference in context cleanup")
Reviewed-by: Karol Wachowski <karol.wachowski@linux.intel.com>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Link: https://patch.msgid.link/20260206060306.4050531-1-lizhi.hou@amd.com
2026-02-23 09:24:15 -08:00
Lizhi Hou
c68a6af400 accel/amdxdna: Switch to always use chained command
Preempt commands are only supported when submitted as chained commands.
To ensure preempt support works consistently, always submit commands in
chained command format.

Set force_cmdlist to true so that single commands are filled using the
chained command layout, enabling correct handling of preempt commands.

Fixes: 3a0ff7b98a ("accel/amdxdna: Support preemption requests")
Reviewed-by: Karol Wachowski <karol.wachowski@linux.intel.com>
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Link: https://patch.msgid.link/20260206060251.4050512-1-lizhi.hou@amd.com
2026-02-23 09:24:15 -08:00
Lizhi Hou
08fe1b5166 accel/amdxdna: Remove buffer size check when creating command BO
Large command buffers may be used, and they do not always need to be
mapped or accessed by the driver. Performing a size check at command BO
creation time unnecessarily rejects valid use cases.

Remove the buffer size check from command BO creation, and defer vmap
and size validation to the paths where the driver actually needs to map
and access the command buffer.

Fixes: ac49797c18 ("accel/amdxdna: Add GEM buffer object management")
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Link: https://patch.msgid.link/20260206060237.4050492-1-lizhi.hou@amd.com
2026-02-23 09:23:41 -08:00
Cheng-Yang Chou
9d851afa48 selftests/sched_ext: Abort test loop on signal
The runner sets exit_req on SIGINT/SIGTERM but ignores it during the
main loop. This prevents users from cleanly interrupting a test run.

Check exit_req each iteration to safely break out on exit signals.

Signed-off-by: Cheng-Yang Chou <yphbchou0911@gmail.com>
Acked-by: Andrea Righi <arighi@nvidia.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-02-23 07:19:13 -10:00
Roberto Bergantinos Corpas
410666a298 nfs: return EISDIR on nfs3_proc_create if d_alias is a dir
If we found an alias through nfs3_do_create/nfs_add_or_obtain
/d_splice_alias which happens to be a dir dentry, we don't return
any error, and simply forget about this alias, but the original
dentry we were adding and passed as parameter remains negative.

This later causes an oops on nfs_atomic_open_v23/finish_open since we
supply a negative dentry to do_dentry_open.

This has been observed running lustre-racer, where dirs and files are
created/removed concurrently with the same name and O_EXCL is not
used to open files (frequent file redirection).

While d_splice_alias typically returns a directory alias or NULL, we
explicitly check d_is_dir() to ensure that we don't attempt to perform
file operations (like finish_open) on a directory inode, which triggers
the observed oops.

Fixes: 7c6c5249f0 ("NFS: add atomic_open for NFSv3 to handle O_TRUNC correctly.")
Reviewed-by: Olga Kornievskaia <okorniev@redhat.com>
Reviewed-by: Scott Mayhew <smayhew@redhat.com>
Signed-off-by: Roberto Bergantinos Corpas <rbergant@redhat.com>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
2026-02-23 11:41:38 -05:00
Li Ming
0066688dbc cxl/port: Hold port host lock during dport adding.
CXL testing environment can trigger following trace
 Oops: general protection fault, probably for non-canonical address 0xdffffc0000000092: 0000 [#1] SMP KASAN NOPTI
 KASAN: null-ptr-deref in range [0x0000000000000490-0x0000000000000497]
 RIP: 0010:cxl_dpa_to_region+0x105/0x1f0 [cxl_core]
 Call Trace:
  <TASK>
  cxl_event_trace_record+0xd1/0xa70 [cxl_core]
  __cxl_event_trace_record+0x12f/0x1e0 [cxl_core]
  cxl_mem_get_records_log+0x261/0x500 [cxl_core]
  cxl_mem_get_event_records+0x7c/0xc0 [cxl_core]
  cxl_mock_mem_probe+0xd38/0x1c60 [cxl_mock_mem]
  platform_probe+0x9d/0x130
  really_probe+0x1c8/0x960
  __driver_probe_device+0x187/0x3e0
  driver_probe_device+0x45/0x120
  __device_attach_driver+0x15d/0x280

When CXL subsystem adds a cxl port to a hierarchy, there is a small
window where the new port becomes visible before it is bound to a
driver. This happens because device_add() adds a device to bus device
list before bus_probe_device() binds it to a driver.
So if two cxl memdevs are trying to add a dport to a same port via
devm_cxl_enumerate_ports(), the second cxl memdev may observe the port
and attempt to add a dport, but fails because the port has not yet been
attached to cxl port driver. That causes the memdev->endpoint can not be
updated.

The sequence is like:

CPU 0					CPU 1
devm_cxl_enumerate_ports()
  # port not found, add it
  add_port_attach_ep()
    # hold the parent port lock
    # to add the new port
    devm_cxl_create_port()
      device_add()
	# Add dev to bus devs list
	bus_add_device()
					devm_cxl_enumerate_ports()
					  # found the port
					  find_cxl_port_by_uport()
					  # hold port lock to add a dport
					  device_lock(the port)
					  find_or_add_dport()
					    cxl_port_add_dport()
					      return -ENXIO because port->dev.driver is NULL
					  device_unlock(the port)
	bus_probe_device()
	  # hold the port lock
	  # for attaching
	  device_lock(the port)
	    attaching the new port
	  device_unlock(the port)

To fix this race, require that dport addition holds the host lock
of the target port(the host of CXL root and all cxl host bridge ports is
the platform firmware device, the host of all other ports is their
parent port). The CXL subsystem already requires holding the host lock
while attaching a new port. Therefore, successfully acquiring the host
lock guarantees that port attaching has completed.

Fixes: 4f06d81e7c ("cxl: Defer dport allocation for switch ports")
Signed-off-by: Li Ming <ming.li@zohomail.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Tested-by: Alison Schofield <alison.schofield@intel.com>
Link: https://patch.msgid.link/20260210-fix-port-enumeration-failure-v3-2-06acce0b9ead@zohomail.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2026-02-23 09:31:22 -07:00
Li Ming
822655e675 cxl/port: Introduce port_to_host() helper
In CXL subsystem, a port has its own host device for the port creation
and removal. The host of CXL root and all the first level ports is the
platform firmware device, the host of other ports is their parent port's
device. Create this new helper to much easier to get the host of a cxl
port.

Introduce port_to_host() and use it to replace all places where using
open coded to get the host of a port.

Remove endpoint_host() as its functionality can be replaced by
port_to_host().

[dj: Squashed commit 1 and 3 in the series to commit 1. ]

Signed-off-by: Li Ming <ming.li@zohomail.com>
Tested-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Link: https://patch.msgid.link/20260210-fix-port-enumeration-failure-v3-1-06acce0b9ead@zohomail.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2026-02-23 09:31:07 -07:00
Fuad Tabba
ec197dca87 KVM: arm64: Revert accidental drop of kvm_uninit_stage2_mmu() for non-NV VMs
Commit 0c4762e268 ("KVM: arm64: nv: Avoid NV stage-2 code when NV is
not supported") added an early return to several functions in
arch/arm64/kvm/nested.c to prevent a UBSAN shift-out-of-bounds error
when accessing the pgt union for non-nested VMs.

However, this early return was inadvertently applied to
kvm_arch_flush_shadow_all() as well, causing it to skip the call to
kvm_uninit_stage2_mmu(kvm) for all non-nested VMs.

For pKVM, skipping this teardown means the host never unshares the
guest's memory with the EL2 hypervisor. When the host kernel later
recycles these leaked pages for a new VM, it attempts to re-share them.
The hypervisor correctly rejects this with -EPERM, triggering a host
WARN_ON and hanging the guest.

Fix this by dropping the early return from kvm_arch_flush_shadow_all().
The for-loop guarding the nested MMU cleanup already bounds itself when
nested_mmus_size == 0, allowing execution to proceed to
kvm_uninit_stage2_mmu() as intended.

Reported-by: Mark Brown <broonie@kernel.org>
Closes: https://lore.kernel.org/all/60916cb6-f460-4751-b910-f63c58700ad0@sirena.org.uk/
Fixes: 0c4762e268 ("KVM: arm64: nv: Avoid NV stage-2 code when NV is not supported")
Signed-off-by: Fuad Tabba <tabba@google.com>
Tested-by: Mark Brown <broonie@kernel.org>
Link: https://patch.msgid.link/20260222083352.89503-1-tabba@google.com
Signed-off-by: Marc Zyngier <maz@kernel.org>
2026-02-23 16:26:08 +00:00
Armin Wolf
9836feedcf platform/x86: uniwill-laptop: Handle FN lock event
On many devices, the user can toggle the Fn lock state by
pressing Fn + Esc. Forward the associated event to the fn_lock
sysfs attribute as a poll notification.

Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Link: https://patch.msgid.link/20260218005101.73680-5-W_Armin@gmx.de
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2026-02-23 18:06:44 +02:00
Armin Wolf
2be519d945 platform/x86: uniwill-laptop: Mark FN lock status as being volatile
It turns out that the FN lock status can be changed by the underlying
hardware when the user presses a special key combination. Mark the
associated register as volatile to prevent regmap from caching said
value. Also add the necessary suspend/resume handling.

Fixes: d050479693 ("platform/x86: Add Uniwill laptop driver")
Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Link: https://patch.msgid.link/20260218005101.73680-4-W_Armin@gmx.de
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2026-02-23 18:06:42 +02:00
Armin Wolf
67e7eb4c13 platform/x86: uniwill-laptop: Fix crash on unexpected battery event
On devices that have not UNIWILL_FEATURE_BATTERY set, the underlying
hardware might still send the UNIWILL_OSD_BATTERY_ALERT event. In such
a situation, the driver will access uninitialized data structures when
handling said event.

Prevent this by only handling the UNIWILL_OSD_BATTERY_ALERT event when
UNIWILL_FEATURE_BATTERY is set.

Fixes: d050479693 ("platform/x86: Add Uniwill laptop driver")
Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Link: https://patch.msgid.link/20260218005101.73680-3-W_Armin@gmx.de
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2026-02-23 18:06:41 +02:00
Armin Wolf
2d53dfacf0 platform/x86: uniwill-laptop: Rename FN lock and super key lock attrs
It turns out that both sysfs attributes actually directly control
the FN lock status/super key enable status, rather than the
triggering of the associated events. This behavior was first observed
on a Tuxedo notebook and was belived to be a hardware quirk.
However, it seems that i simply misunderstood the manual of the
OEM software for Intel NUC devices. The correct behavior is:

- fn_lock_toggle_enable enables/disables FN lock mode
- super_key_toggle_enable enables/disables the super key

Rename both sysfs attributes to avoid confusing users.

Fixes: d050479693 ("platform/x86: Add Uniwill laptop driver")
Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Link: https://patch.msgid.link/20260218005101.73680-2-W_Armin@gmx.de
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2026-02-23 18:06:38 +02:00
Gregory Price
318c58852e cxl/memdev: fix deadlock in cxl_memdev_autoremove() on attach failure
cxl_memdev_autoremove() takes device_lock(&cxlmd->dev) via guard(device)
and then calls cxl_memdev_unregister() when the attach callback was
provided but cxl_mem_probe() failed to bind.

cxl_memdev_unregister() calls
  cdev_device_del()
    device_del()
      bus_remove_device()
        device_release_driver()

This path is reached when a driver uses the @attach parameter to
devm_cxl_add_memdev() and the CXL topology fails to enumerate (e.g.
DVSEC range registers decode outside platform-defined CXL ranges,
causing the endpoint port probe to fail).

Add cxl_memdev_attach_failed() to set the scope of the check correctly.

Reported-by: kreview-c94b85d6d2
Fixes: 29317f8dc6 ("cxl/mem: Introduce cxl_memdev_attach for CXL-dependent operation")
Signed-off-by: Gregory Price <gourry@gourry.net>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Davidlohr Bueso <dave@stgolabs.net>
Link: https://patch.msgid.link/20260211192228.2148713-1-gourry@gourry.net
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2026-02-23 09:03:44 -07:00
Claudiu Beznea
e0cf84109b reset: rzg2l-usbphy-ctrl: Check pwrrdy is valid before using it
The pwrrdy regmap_filed is allocated in rzg2l_usbphy_ctrl_pwrrdy_init()
only if the driver data is set to RZG2L_USBPHY_CTRL_PWRRDY. Check that
pwrrdy is valid before using it to avoid "Unable to handle kernel NULL
pointer dereference at virtual address" errors.

Fixes: c5b7cd9ade ("reset: rzg2l-usbphy-ctrl: Add suspend/resume support")
Signed-off-by: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
Reviewed-by: Biju Das <biju.das.jz@bp.renesas.com>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
2026-02-23 17:03:28 +01:00
Jesse Guo
e3aa6feaf0 platform/x86: redmi-wmi: Add more hotkey mappings
This patch adds more Fn hotkeys (like Refresh rate toggle).
Additionally, remap the setup key from KEY_SETUP to KEY_CONFIG.
As KEY_CONFIG is supported by Desktop Environments for launching
system settings, whereas KEY_SETUP is often ignored by userspace.

Signed-off-by: Jesse Guo <JesseGuoTech@outlook.com>
Reviewed-by: Gladyshev Ilya <foxido@foxido.dev>
Link: https://patch.msgid.link/TYCPR01MB6851636256C39B170F2312E5D192A@TYCPR01MB6851.jpnprd01.prod.outlook.com
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2026-02-23 18:02:59 +02:00
Kurt Borja
bd5914caeb platform/x86: alienware-wmi-wmax: Add G-Mode support to m18 laptops
Alienware m18 laptops support G-Mode. Therefore, match them with
G-Series quirks.

Cc: stable@vger.kernel.org
Tested-by: Olexa Bilaniuk <obilaniu@gmail.com>
Signed-off-by: Kurt Borja <kuurtb@gmail.com>
Link: https://patch.msgid.link/20260129-m18-gmode-v1-1-48be521487b9@gmail.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2026-02-23 18:02:15 +02:00
Anton Plotnikov
729ffcffa7 platform/x86: hp-wmi: add Omen 14-fb1xxx (board 8E41) support
Reverse engineering of the HP Omen Windows utility shows that for performance
mode it uses the same codes listed in hp_thermal_profile_omen_v1. Therefore it
seems sufficient to add the board model name to omen_thermal_profile_boards.

Tested on Omen 14-fb1xxx: CPU power in performance profile reaches the Windows
limit (65W), instead of 45W in automatic BIOS mode. Max fan speed was reached
as well.

Link: https://patch.msgid.link/20260203164832.40514-1-plotnikovanton@gmail.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2026-02-23 18:01:22 +02:00
Kurt Borja
26a7601471 platform/x86: dell-wmi: Add audio/mic mute key codes
Add audio/mic mute key codes found in Alienware m18 r1 AMD.

Cc: stable@vger.kernel.org
Tested-by: Olexa Bilaniuk <obilaniu@gmail.com>
Suggested-by: Olexa Bilaniuk <obilaniu@gmail.com>
Signed-off-by: Kurt Borja <kuurtb@gmail.com>
Acked-by: Pali Rohár <pali@kernel.org>
Link: https://patch.msgid.link/20260207-mute-keys-v2-1-c55e5471c9c1@gmail.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2026-02-23 17:56:12 +02:00
Victor Lattaro Volpini
249f05e625 platform/x86: hp-wmi: Add Victus 16-d0xxx support
This patch enables Victus thermal profile support for the HP
Victus 16-d0xxx. It does so by adding model's DMI board name 88F8 to
victus_thermal_profile_boards.

Tested on a Victus 16-d0xxx:
  - Victus thermal profile choices available (quiet, balanced, performance)
    instead of the default ones (cool, quiet, balanced, performance);
  - Profile switching works correctly;
  - About 4% increase in FPS using benchmark Cyberpunk 2077 on
  performance profile;
  - No noticeable regressions.

Signed-off-by: Victor Lattaro Volpini <victorlattaro@proton.me>
Link: https://patch.msgid.link/20260210000048.250280-1-victorlattaro@proton.me
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2026-02-23 17:55:11 +02:00
Leif Skunberg
b38d478dad platform/x86: intel-hid: Enable 5-button array on ThinkPad X1 Fold 16 Gen 1
The Lenovo ThinkPad X1 Fold 16 Gen 1 has physical volume up/down
buttons that are handled through the intel-hid 5-button array
interface. The firmware does not advertise 5-button array support via
HEBC, so the driver relies on a DMI allowlist to enable it.

Add the ThinkPad X1 Fold 16 Gen 1 to the button_array_table so the
volume buttons work out of the box.

Signed-off-by: Leif Skunberg <diamondback@cohunt.app>
Reviewed-by: Hans de Goede <johannes.goede@oss.qualcomm.com>
Link: https://patch.msgid.link/20260210085625.34380-1-diamondback@cohunt.app
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2026-02-23 17:50:55 +02:00
Leif Skunberg
2a7b7652b1 platform/x86: int3472: Handle GPIO type 0x10 (DOVDD)
The Lenovo ThinkPad X1 Fold 16 Gen 1 has an OV5675 sensor (ACPI HID
OVTI5675) behind an INT3472 discrete PMIC controller. The INT3472
_DSM returns GPIO type 0x10 for one of the pins, which controls the
DOVDD (digital I/O power) regulator enable.

Type 0x10 is not currently handled by the driver, causing the GPIO to
be ignored with a warning. Add INT3472_GPIO_TYPE_DOVDD (0x10) and
handle it as a regulator with con_id "dovdd" to match the supply name
used by sensor drivers (e.g. ov5675).

Also increase GPIO_SUPPLY_NAME_LENGTH from 5 to 6 to accommodate
the "dovdd" name (5 chars + null terminator).

Signed-off-by: Leif Skunberg <diamondback@cohunt.app>
Reviewed-by: Hans de Goede <johannes.goede@oss.qualcomm.com>
Link: https://patch.msgid.link/20260210132129.17943-1-diamondback@cohunt.app
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2026-02-23 17:49:36 +02:00
Greg Kroah-Hartman
f0109b9d3e staging: rtl8723bs: properly validate the data in rtw_get_ie_ex()
Just like in commit 154828bf95 ("staging: rtl8723bs: fix out-of-bounds
read in rtw_get_ie() parser"), we don't trust the data in the frame so
we should check the length better before acting on it

Cc: stable <stable@kernel.org>
Assisted-by: gkh_clanker_2000
Tested-by: Navaneeth K <knavaneeth786@gmail.com>
Reviewed-by: Navaneeth K <knavaneeth786@gmail.com>
Link: https://patch.msgid.link/2026022336-arrange-footwork-6e54@gregkh
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-02-23 16:34:44 +01:00
Artem Lytkin
8225489ddb staging: sm750fb: add missing pci_release_region on error and removal
hw_sm750_map() calls pci_request_region() but never releases the
region on error paths or in lynxfb_pci_remove(). This causes a
resource leak that prevents the PCI region from being mapped again
after driver removal or a failed probe. A TODO comment in the code
acknowledges this missing cleanup.

Restructure the error handling in hw_sm750_map() to properly release
the PCI region on ioremap failures, and add pci_release_region() to
lynxfb_pci_remove().

Signed-off-by: Artem Lytkin <iprintercanon@gmail.com>
Cc: stable <stable@kernel.org>
Link: https://patch.msgid.link/20260216202038.1828-1-iprintercanon@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2026-02-23 16:32:56 +01:00
Corey Minyard
cae66f1a1d ipmi:si: Fix check for a misbehaving BMC
There is a race on checking the state in the sender, it needs to be
checked under a lock.  But you also need a check to avoid issues with
a misbehaving BMC for run to completion mode.  So leave the check at
the beginning for run to completion, and add a check under the lock
to avoid the race.

Reported-by: Rafael J. Wysocki <rafael@kernel.org>
Fixes: bc3a9d2177 ("ipmi:si: Gracefully handle if the BMC is non-functional")
Cc: stable@vger.kernel.org # 4.18
Signed-off-by: Corey Minyard <corey@minyard.net>
Reviewed-by: Rafael J. Wysocki (Intel) <rafael@kernel.org>
2026-02-23 09:00:48 -06:00
Corey Minyard
62cd145453 ipmi:msghandler: Handle error returns from the SMI sender
It used to be, until recently, that the sender operation on the low
level interfaces would not fail.  That's not the case any more with
recent changes.

So check the return value from the sender operation, and propagate it
back up from there and handle the errors in all places.

Reported-by: Rafael J. Wysocki <rafael@kernel.org>
Fixes: bc3a9d2177 ("ipmi:si: Gracefully handle if the BMC is non-functional")
Cc: stable@vger.kernel.org # 4.18
Signed-off-by: Corey Minyard <corey@minyard.net>
Reviewed-by: Rafael J. Wysocki (Intel) <rafael@kernel.org>
2026-02-23 09:00:48 -06:00
Corey Minyard
f895e5df80 ipmi:si: Don't block module unload if the BMC is messed up
If the BMC is in a bad state, don't bother waiting for queues messages
since there can't be any.  Otherwise the unload is blocked until the
BMC is back in a good state.

Reported-by: Rafael J. Wysocki <rafael@kernel.org>
Fixes: bc3a9d2177 ("ipmi:si: Gracefully handle if the BMC is non-functional")
Cc: stable@vger.kernel.org # 4.18
Signed-off-by: Corey Minyard <corey@minyard.net>
Reviewed-by: Rafael J. Wysocki (Intel) <rafael@kernel.org>
2026-02-23 08:58:31 -06:00
Yufan Chen
4b73231b2a regulator: tps65185: check devm_kzalloc() result in probe
tps65185_probe() dereferences the allocation result immediately by using data->regmap. If devm_kzalloc() returns NULL under memory pressure, this leads to a NULL pointer dereference.

Add the missing allocation check and return -ENOMEM on failure.

Signed-off-by: Yufan Chen <ericterminal@gmail.com>
Link: https://patch.msgid.link/20260222104035.90790-1-ericterminal@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-02-23 14:52:16 +00:00
Azamat Almazbek uulu
32fc4168fa ASoC: amd: yc: Add ASUS EXPERTBOOK BM1503CDA to quirk table
The ASUS ExpertBook BM1503CDA (Ryzen 5 7535U, Barcelo-R) has an
internal DMIC connected through the AMD ACP (Audio CoProcessor)
but is missing from the DMI quirk table, so the acp6x machine
driver probe returns -ENODEV and no DMIC capture device is created.

Add the DMI entry so the internal microphone works out of the box.

Signed-off-by: Azamat Almazbek uulu <almazbek1608@gmail.com>
Reviewed-by: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Link: https://patch.msgid.link/20260221114813.5610-1-almazbek1608@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-02-23 14:52:11 +00:00
Denis Benato
37da69dbaa platform/x86: asus-armoury: add support for G733QS
Add TDP data for laptop model G733QS.

Signed-off-by: Denis Benato <denis.benato@linux.dev>
Link: https://patch.msgid.link/20260211212659.16542-1-denis.benato@linux.dev
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2026-02-23 16:42:37 +02:00
Peter Metz
857f5036f8 platform/x86: intel-hid: Add Dell 16 Plus 2-in-1 to dmi_vgbs_allow_list
The Dell 16 Plus 2-in-1 (model DB06250) requires the VGBS allow list
entry to correctly enable the tablet mode switch. Without this, the
chassis state is not reported, and the hinge rotation only emits
unknown scancodes.

Link: https://lore.kernel.org/platform-driver-x86/CAP3yi-BWm0LqkhfzTrGy5n-KQ=3+T8eRMoR+Z+7Ke2VJB43kTA@mail.gmail.com/
Signed-off-by: Peter Metz <peter.metz@unarin.com>
Reviewed-by: Hans de Goede <johannes.goede@oss.qualcomm.com>
Link: https://patch.msgid.link/20260213230310.299974-1-peter.metz@unarin.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2026-02-23 16:31:03 +02:00
Peter Metz
6b3fa0615c platform/x86: intel-hid: Add Dell 14 Plus 2-in-1 to dmi_vgbs_allow_list
The Dell 14 Plus 2-in-1 (model DB04250) requires the VGBS allow list
entry to correctly enable the tablet mode switch. Without this, the
chassis state is not reported, and the hinge rotation only emits
unknown scancodes.

Verified on Dell 14 Plus 2-in-1 DB04250.

Closes: https://bugzilla.kernel.org/show_bug.cgi?id=221090
Signed-off-by: Peter Metz <peter.metz@unarin.com>
Reviewed-by: Hans de Goede <johannes.goede@oss.qualcomm.com>
Link: https://patch.msgid.link/20260213044627.203638-1-peter.metz@unarin.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2026-02-23 16:31:01 +02:00
Jonathan Teh
53e977b1d5 platform/x86: thinkpad_acpi: Fix errors reading battery thresholds
Check whether the battery supports the relevant charge threshold before
reading the value to silence these errors:

thinkpad_acpi: acpi_evalf(BCTG, dd, ...) failed: AE_NOT_FOUND
ACPI: \_SB_.PCI0.LPC_.EC__.HKEY: BCTG: evaluate failed
thinkpad_acpi: acpi_evalf(BCSG, dd, ...) failed: AE_NOT_FOUND
ACPI: \_SB_.PCI0.LPC_.EC__.HKEY: BCSG: evaluate failed

when reading the charge thresholds via sysfs on platforms that do not
support them such as the ThinkPad T400.

Fixes: 2801b9683f ("thinkpad_acpi: Add support for battery thresholds")
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=202619
Signed-off-by: Jonathan Teh <jonathan.teh@outlook.com>
Reviewed-by: Mark Pearson <mpearson-lenovo@squebb.ca>
Link: https://patch.msgid.link/MI0P293MB01967B206E1CA6F337EBFB12926CA@MI0P293MB0196.ITAP293.PROD.OUTLOOK.COM
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2026-02-23 16:29:00 +02:00
Krishna Chomal
13fa3aaf02 platform/x86: hp-wmi: Add Omen 16-wf0xxx fan and thermal support
The HP Omen 16-wf0xxx (board ID: 8BAB) has the same WMI interface as
other Victus S boards, but requires quirks for correctly switching
thermal profile (similar to HP Omen 16-wf1xxx, board ID: 8C78).

Add the DMI board name to victus_s_thermal_profile_boards[] table and
map it to omen_v1_thermal_params.

Testing on HP Omen 16-wf0xxx confirmed that platform profile is
registered successfully and fan RPMs are readable and controllable.

Suggested-by: Noah Provenzano <noahpro@gmail.com>
Tested-by: Juan Martin Morales <juanm4morales@gmail.com>
Reported-by: Juan Martin Morales <juanm4morales@gmail.com>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220639
Signed-off-by: Krishna Chomal <krishna.chomal108@gmail.com>
Link: https://patch.msgid.link/20260216072003.90151-1-krishna.chomal108@gmail.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2026-02-23 16:28:06 +02:00
Hans de Goede
7d87ed70fc platform/x86: touchscreen_dmi: Add quirk for y-inverted Goodix touchscreen on SUPI S10
The touchscreen on the SUPI S10 tablet reports inverted Y coordinates,
causing touch input to be mirrored vertically relative to the display.

Add a quirk to set the "touchscreen-inverted-y" boolean device-property
on the touchscreen device, so that the goodix_ts driver will fixup
the coordinates.

Reported-by: Yajat Kumar <yajatapps3@gmail.com>
Closes: https://lore.kernel.org/linux-input/20251230221639.582406-1-yajatapps3@gmail.com/
Tested-by: Yajat Kumar <yajatapps3@gmail.com>
Signed-off-by: Hans de Goede <johannes.goede@oss.qualcomm.com>
Link: https://patch.msgid.link/20260217132346.34535-1-johannes.goede@oss.qualcomm.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2026-02-23 16:27:22 +02:00
Krishna Chomal
3c99a545b3 platform/x86: hp-wmi: Add Omen 16-xd0xxx fan and thermal support
The HP Omen 16-xd0xxx (board ID: 8BCD) has the same WMI interface as
other Victus S boards, but requires quirks for correctly switching
thermal profile (similar to HP Omen 16-wf1xxx, board ID: 8C78).

Add the DMI board name to victus_s_thermal_profile_boards[] table and
map it to omen_v1_thermal_params.

Testing on HP Omen 16-xd0xxx confirmed that platform profile is
registered successfully and fan RPMs are readable and controllable.

Tested-by: Varad Amol Pisale <varadpisale.work@gmail.com>
Signed-off-by: Krishna Chomal <krishna.chomal108@gmail.com>
Link: https://patch.msgid.link/20260218050235.94687-1-krishna.chomal108@gmail.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2026-02-23 16:17:47 +02:00
Chen Ni
3f4e403304 soc: fsl: cpm1: qmc: Fix error check for devm_ioremap_resource() in qmc_qe_init_resources()
Fix wrong variable used for error checking after devm_ioremap_resource()
call. The function checks qmc->scc_pram instead of qmc->dpram, which
could lead to incorrect error handling.

Fixes: eb680d5630 ("soc: fsl: cpm1: qmc: Add support for QUICC Engine (QE) implementation")
Signed-off-by: Chen Ni <nichen@iscas.ac.cn>
Acked-by: Herve Codina <herve.codina@bootlin.com>
Link: https://lore.kernel.org/r/20260209015904.871269-1-nichen@iscas.ac.cn
Signed-off-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
2026-02-23 14:49:27 +01:00
Richard Genoud
014077044e soc: fsl: qbman: fix race condition in qman_destroy_fq
When QMAN_FQ_FLAG_DYNAMIC_FQID is set, there's a race condition between
fq_table[fq->idx] state and freeing/allocating from the pool and
WARN_ON(fq_table[fq->idx]) in qman_create_fq() gets triggered.

Indeed, we can have:
         Thread A                             Thread B
    qman_destroy_fq()                    qman_create_fq()
      qman_release_fqid()
        qman_shutdown_fq()
        gen_pool_free()
           -- At this point, the fqid is available again --
                                           qman_alloc_fqid()
           -- so, we can get the just-freed fqid in thread B --
                                           fq->fqid = fqid;
                                           fq->idx = fqid * 2;
                                           WARN_ON(fq_table[fq->idx]);
                                           fq_table[fq->idx] = fq;
     fq_table[fq->idx] = NULL;

And adding some logs between qman_release_fqid() and
fq_table[fq->idx] = NULL makes the WARN_ON() trigger a lot more.

To prevent that, ensure that fq_table[fq->idx] is set to NULL before
gen_pool_free() is called by using smp_wmb().

Fixes: c535e923bb ("soc/fsl: Introduce DPAA 1.x QMan device driver")
Signed-off-by: Richard Genoud <richard.genoud@bootlin.com>
Tested-by: CHAMPSEIX Thomas <thomas.champseix@alstomgroup.com>
Link: https://lore.kernel.org/r/20251223072549.397625-1-richard.genoud@bootlin.com
Signed-off-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
2026-02-23 14:49:27 +01:00
Rafael J. Wysocki
3afd8df024 PM: runtime: Change pm_runtime_put() return type to void
The primary role of pm_runtime_put() is to decrement the runtime PM
usage counter of the given device.  It always does that regardless of
the value returned by it later.

In addition, if the runtime PM usage counter after decrementation turns
out to be zero, a work item is queued up to check whether or not the
device can be suspended.  This is not guaranteed to succeed though and
even if it is successful, the device may still not be suspended going
forward.

There are multiple valid reasons why pm_runtime_put() may not decide to
queue up the work item mentioned above, including, but not limited to,
the case when user space has written "on" to the device's runtime PM
"control" file in sysfs.  In all of those cases, pm_runtime_put()
returns a negative error code (even though the device's runtime PM
usage counter has been successfully decremented by it) which is very
confusing.  In fact, its return value should only be used for debug
purposes and care should be taken when doing it even in that case.

Accordingly, to avoid the confusion mentioned above, change the return
type of pm_runtime_put() to void.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Brian Norris <briannorris@chromium.org>
Link: https://patch.msgid.link/14387202.RDIVbhacDa@rafael.j.wysocki
2026-02-23 14:08:17 +01:00
Rafael J. Wysocki
7a73801fda pmdomain: imx: gpcv2: Discard pm_runtime_put() return value
Passing pm_runtime_put() return value to the callers is not particularly
useful.

Returning an error code from pm_runtime_put() merely means that it has
not queued up a work item to check whether or not the device can be
suspended and there are many perfectly valid situations in which that
can happen, like after writing "on" to the devices' runtime PM "control"
attribute in sysfs for one example.

Accordingly, update imx_pgc_domain_suspend() to simply discard the
return value of pm_runtime_put() and always return success to the
caller.

This will facilitate a planned change of the pm_runtime_put() return
type to void in the future.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Peng Fan <peng.fan@nxp.com>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://patch.msgid.link/15658107.tv2OnDr8pf@rafael.j.wysocki
2026-02-23 14:05:38 +01:00
Penghe Geng
901084c51a mmc: core: Avoid bitfield RMW for claim/retune flags
Move claimed and retune control flags out of the bitfield word to
avoid unrelated RMW side effects in asynchronous contexts.

The host->claimed bit shared a word with retune flags. Writes to claimed
in __mmc_claim_host() or retune_now in mmc_mq_queue_rq() can overwrite
other bits when concurrent updates happen in other contexts, triggering
spurious WARN_ON(!host->claimed). Convert claimed, can_retune,
retune_now and retune_paused to bool to remove shared-word coupling.

Fixes: 6c0cedd1ef ("mmc: core: Introduce host claiming by context")
Fixes: 1e8e55b670 ("mmc: block: Add CQE support")
Cc: stable@vger.kernel.org
Suggested-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Penghe Geng <pgeng@nvidia.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2026-02-23 13:45:50 +01:00
Charles Keepax
6510e1324b ASoC: cs42l43: Report insert for exotic peripherals
For some exotic peripherals the type detect can return a reserved value
of 0x4. This will currently return an error and not report anything to
user-space, update this to report the insert normally.

Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://patch.msgid.link/20260223093616.3800350-1-ckeepax@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-02-23 12:17:43 +00:00
Ariel Silver
162d331d83 wifi: mac80211: bounds-check link_id in ieee80211_ml_reconfiguration
link_id is taken from the ML Reconfiguration element (control & 0x000f),
so it can be 0..15. link_removal_timeout[] has IEEE80211_MLD_MAX_NUM_LINKS
(15) elements, so index 15 is out-of-bounds. Skip subelements with
link_id >= IEEE80211_MLD_MAX_NUM_LINKS to avoid a stack out-of-bounds
write.

Fixes: 8eb8dd2ffb ("wifi: mac80211: Support link removal using Reconfiguration ML element")
Reported-by: Ariel Silver <arielsilver77@gmail.com>
Signed-off-by: Ariel Silver <arielsilver77@gmail.com>
Cc: stable@vger.kernel.org
Link: https://patch.msgid.link/20260220101129.1202657-1-Ariel.Silver@cybereason.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2026-02-23 12:35:34 +01:00
Kamal Dasu
79ad471530 mmc: sdhci-brcmstb: use correct register offset for V1 pin_sel restore
The restore path for SDIO_CFG_CORE_V1 was incorrectly using
SDIO_CFG_SD_PIN_SEL (offset 0x44) instead of SDIO_CFG_V1_SD_PIN_SEL
(offset 0x54), causing the wrong register to be written on resume.
The save path already uses the correct V1-specific offset. This
affects BCM7445 and BCM72116 platforms which use the V1 config core.

Fixes: b7e614802e ("mmc: sdhci-brcmstb: save and restore registers during PM")
Signed-off-by: Kamal Dasu <kamal.dasu@broadcom.com>
Cc: stable@vger.kernel.org
Tested-by: Florian Fainelli <florian.fainelli@broadcom.com>
Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2026-02-23 12:05:20 +01:00
Shawn Lin
6465a8bbb0 mmc: dw_mmc-rockchip: Fix runtime PM support for internal phase support
RK3576 is the first platform to introduce internal phase support, and
subsequent platforms are expected to adopt a similar design. In this
architecture, runtime suspend powers off the attached power domain, which
resets registers, including vendor-specific ones such as SDMMC_TIMING_CON0,
SDMMC_TIMING_CON1, and SDMMC_MISC_CON. These registers must be saved and
restored, a requirement that falls outside the scope of the dw_mmc core.

Fixes: 59903441f5 ("mmc: dw_mmc-rockchip: Add internal phase support")
Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
Tested-by: Marco Schirrmeister <mschirrmeister@gmail.com>
Reviewed-by: Heiko Stuebner <heiko@sntech.de>
Cc: stable@vger.kernel.org
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2026-02-23 12:05:20 +01:00
Felix Gu
af12e64ae0 mmc: mmci: Fix device_node reference leak in of_get_dml_pipe_index()
When calling of_parse_phandle_with_args(), the caller is responsible
to call of_node_put() to release the reference of device node.
In of_get_dml_pipe_index(), it does not release the reference.

Fixes: 9cb15142d0 ("mmc: mmci: Add qcom dml support to the driver.")
Signed-off-by: Felix Gu <gu_0233@qq.com>
Cc: stable@vger.kernel.org
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2026-02-23 12:05:20 +01:00
Bartosz Golaszewski
ec2cceadfa gpiolib: normalize the return value of gc->get() on behalf of buggy drivers
Commit 86ef402d80 ("gpiolib: sanitize the return value of
gpio_chip::get()") started checking the return value of the .get()
callback in struct gpio_chip. Now - almost a year later - it turns out
that there are quite a few drivers in tree that can break with this
change. Partially revert it: normalize the return value in GPIO core but
also emit a warning.

Cc: stable@vger.kernel.org
Fixes: 86ef402d80 ("gpiolib: sanitize the return value of gpio_chip::get()")
Reported-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Closes: https://lore.kernel.org/all/aZSkqGTqMp_57qC7@google.com/
Reviewed-by: Linus Walleij <linusw@kernel.org>
Reviewed-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Link: https://patch.msgid.link/20260219-gpiolib-set-normalize-v2-1-f84630e45796@oss.qualcomm.com
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
2026-02-23 11:49:23 +01:00
Jouni Högander
eb4a7139e9 drm/i915/alpm: ALPM disable fixes
PORT_ALPM_CTL is supposed to be written only before link training. Remove
writing it from ALPM disable.

Also clearing ALPM_CTL_ALPM_AUX_LESS_ENABLE and is not about disabling ALPM
but switching to AUX-Wake ALPM. Stop touching this bit on ALPM disable.

Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/7153
Fixes: 1ccbf13586 ("drm/i915/psr: Enable ALPM on source side for eDP Panel replay")
Cc: Animesh Manna <animesh.manna@intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: <stable@vger.kernel.org> # v6.10+
Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Reviewed-by: Michał Grzelak <michal.grzelak@intel.com>
Link: https://patch.msgid.link/20260212062731.397801-1-jouni.hogander@intel.com
(cherry picked from commit 008304c9ae75c772d3460040de56e12112cdf5e6)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
2026-02-23 12:44:06 +02:00
Haocheng Yu
77de62ad3d perf/core: Fix refcount bug and potential UAF in perf_mmap
Syzkaller reported a refcount_t: addition on 0; use-after-free warning
in perf_mmap.

The issue is caused by a race condition between a failing mmap() setup
and a concurrent mmap() on a dependent event (e.g., using output
redirection).

In perf_mmap(), the ring_buffer (rb) is allocated and assigned to
event->rb with the mmap_mutex held. The mutex is then released to
perform map_range().

If map_range() fails, perf_mmap_close() is called to clean up.
However, since the mutex was dropped, another thread attaching to
this event (via inherited events or output redirection) can acquire
the mutex, observe the valid event->rb pointer, and attempt to
increment its reference count. If the cleanup path has already
dropped the reference count to zero, this results in a
use-after-free or refcount saturation warning.

Fix this by extending the scope of mmap_mutex to cover the
map_range() call. This ensures that the ring buffer initialization
and mapping (or cleanup on failure) happens atomically effectively,
preventing other threads from accessing a half-initialized or
dying ring buffer.

Closes: https://lore.kernel.org/oe-kbuild-all/202602020208.m7KIjdzW-lkp@intel.com/
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Haocheng Yu <yuhaocheng035@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260202162057.7237-1-yuhaocheng035@gmail.com
2026-02-23 11:19:25 +01:00
Zide Chen
6a8a48644c perf/x86/intel/uncore: Add per-scheduler IMC CAS count events
IMC on SPR and EMR does not support sub-channels.  In contrast, CPUs
that use gnr_uncores[] (e.g. Granite Rapids and Sierra Forest)
implement two command schedulers (SCH0/SCH1) per memory channel,
providing logically independent command and data paths.

Do not reuse the spr_uncore_imc[] configuration for these CPUs.
Instead, introduce a dedicated gnr_uncore_imc[] with per-scheduler
events, so userspace can monitor SCH0 and SCH1 independently.

On these CPUs, replace cas_count_{read,write} with
cas_count_{read,write}_sch{0,1}.  This may break existing userspace
that relies on cas_count_{read,write}, prompting it to switch to the
per-scheduler events, as the legacy event reports only partial
traffic (SCH0).

Fixes: 632c4bf6d0 ("perf/x86/intel/uncore: Support Granite Rapids")
Fixes: cb4a6ccf35 ("perf/x86/intel/uncore: Support Sierra Forest and Grand Ridge")
Reported-by: Reinette Chatre <reinette.chatre@intel.com>
Signed-off-by: Zide Chen <zide.chen@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Cc: stable@vger.kernel.org
Link: https://patch.msgid.link/20260210005225.20311-1-zide.chen@intel.com
2026-02-23 11:19:25 +01:00
Namhyung Kim
486ff5ad49 perf/core: Fix invalid wait context in ctx_sched_in()
Lockdep found a bug in the event scheduling when a pinned event was
failed and wakes up the threads in the ring buffer like below.

It seems it should not grab a wait-queue lock under perf-context lock.
Let's do it with irq_work.

  [   39.913691] =============================
  [   39.914157] [ BUG: Invalid wait context ]
  [   39.914623] 6.15.0-next-20250530-next-2025053 #1 Not tainted
  [   39.915271] -----------------------------
  [   39.915731] repro/837 is trying to lock:
  [   39.916191] ffff88801acfabd8 (&event->waitq){....}-{3:3}, at: __wake_up+0x26/0x60
  [   39.917182] other info that might help us debug this:
  [   39.917761] context-{5:5}
  [   39.918079] 4 locks held by repro/837:
  [   39.918530]  #0: ffffffff8725cd00 (rcu_read_lock){....}-{1:3}, at: __perf_event_task_sched_in+0xd1/0xbc0
  [   39.919612]  #1: ffff88806ca3c6f8 (&cpuctx_lock){....}-{2:2}, at: __perf_event_task_sched_in+0x1a7/0xbc0
  [   39.920748]  #2: ffff88800d91fc18 (&ctx->lock){....}-{2:2}, at: __perf_event_task_sched_in+0x1f9/0xbc0
  [   39.921819]  #3: ffffffff8725cd00 (rcu_read_lock){....}-{1:3}, at: perf_event_wakeup+0x6c/0x470

Fixes: f4b07fd62d ("perf/core: Use POLLHUP for a pinned event in error")
Closes: https://lore.kernel.org/lkml/aD2w50VDvGIH95Pf@ly-workstation
Reported-by: "Lai, Yi" <yi1.lai@linux.intel.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: "Lai, Yi" <yi1.lai@linux.intel.com>
Link: https://patch.msgid.link/20250603045105.1731451-1-namhyung@kernel.org
2026-02-23 11:19:25 +01:00
Mathieu Desnoyers
3b68df9781 rseq: slice ext: Ensure rseq feature size differs from original rseq size
Before rseq became extensible, its original size was 32 bytes even
though the active rseq area was only 20 bytes. This had the following
impact in terms of userspace ecosystem evolution:

* The GNU libc between 2.35 and 2.39 expose a __rseq_size symbol set
  to 32, even though the size of the active rseq area is really 20.
* The GNU libc 2.40 changes this __rseq_size to 20, thus making it
  express the active rseq area.
* Starting from glibc 2.41, __rseq_size corresponds to the
  AT_RSEQ_FEATURE_SIZE from getauxval(3).

This means that users of __rseq_size can always expect it to
correspond to the active rseq area, except for the value 32, for
which the active rseq area is 20 bytes.

Exposing a 32 bytes feature size would make life needlessly painful
for userspace. Therefore, add a reserved field at the end of the
rseq area to bump the feature size to 33 bytes. This reserved field
is expected to be replaced with whatever field will come next,
expecting that this field will be larger than 1 byte.

The effect of this change is to increase the size from 32 to 64 bytes
before we actually have fields using that memory.

Clarify the allocation size and alignment requirements in the struct
rseq uapi comment.

Change the value returned by getauxval(AT_RSEQ_ALIGN) to return the
value of the active rseq area size rounded up to next power of 2, which
guarantees that the rseq structure will always be aligned on the nearest
power of two large enough to contain it, even as it grows. Change the
alignment check in the rseq registration accordingly.

This will minimize the amount of ABI corner-cases we need to document
and require userspace to play games with. The rule stays simple when
__rseq_size != 32:

  #define rseq_field_available(field)	(__rseq_size >= offsetofend(struct rseq_abi, field))

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260220200642.1317826-3-mathieu.desnoyers@efficios.com
2026-02-23 11:19:19 +01:00
Mathieu Desnoyers
26d43a90be rseq: Clarify rseq registration rseq_size bound check comment
The rseq registration validates that the rseq_size argument is greater
or equal to 32 (the original rseq size), but the comment associated with
this check does not clearly state this.

Clarify the comment to that effect.

Fixes: ee3e3ac05c ("rseq: Introduce extensible rseq ABI")
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260220200642.1317826-2-mathieu.desnoyers@efficios.com
2026-02-23 11:19:19 +01:00
Peter Zijlstra
5324953c06 sched/core: Fix wakeup_preempt's next_class tracking
Kernel test robot reported that
tools/testing/selftests/kvm/hardware_disable_test was failing due to
commit 704069649b ("sched/core: Rework sched_class::wakeup_preempt()
and rq_modified_*()")

It turns out there were two related problems that could lead to a
missed preemption:

 - when hitting newidle balance from the idle thread, it would elevate
   rb->next_class from &idle_sched_class to &fair_sched_class, causing
   later wakeup_preempt() calls to not hit the sched_class_above()
   case, and not issue resched_curr().

   Notably, this modification pattern should only lower the
   next_class, and never raise it. Create two new helper functions to
   wrap this.

 - when doing schedule_idle(), it was possible to miss (re)setting
   rq->next_class to &idle_sched_class, leading to the very same
   problem.

Cc: Sean Christopherson <seanjc@google.com>
Fixes: 704069649b ("sched/core: Rework sched_class::wakeup_preempt() and rq_modified_*()")
Reported-by: kernel test robot <oliver.sang@intel.com>
Closes: https://lore.kernel.org/oe-lkp/202602122157.4e861298-lkp@intel.com
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260218163329.GQ1395416@noisy.programming.kicks-ass.net
2026-02-23 11:19:19 +01:00
Arnd Bergmann
4c652a4772 rseq: Mark rseq_arm_slice_extension_timer() __always_inline
objtool warns about this function being called inside of a uaccess
section:

kernel/entry/common.o: warning: objtool: irqentry_exit+0x1dc: call to rseq_arm_slice_extension_timer() with UACCESS enabled

Interestingly, this happens with CONFIG_RSEQ_SLICE_EXTENSION disabled,
so this is an empty function, as the normal implementation is
already marked __always_inline.

I could reproduce this multiple times with gcc-11 but not with gcc-15,
so the compiler probably got better at identifying the trivial function.

Mark all the empty helpers for !RSEQ_SLICE_EXTENSION as __always_inline
for consistency, avoiding this warning.

Fixes: 0ac3b5c3dc ("rseq: Implement time slice extension enforcement timer")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260206074122.709580-1-arnd@kernel.org
2026-02-23 11:19:19 +01:00
Peter Zijlstra
6e3c0a4e1a sched/fair: Fix lag clamp
Vincent reported that he was seeing undue lag clamping in a mixed
slice workload. Implement the max_slice tracking as per the todo
comment.

Fixes: 147f3efaa2 ("sched/fair: Implement an EEVDF-like scheduling policy")
Reported-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Vincent Guittot <vincent.guittot@linaro.org>
Tested-by: K Prateek Nayak <kprateek.nayak@amd.com>
Tested-by: Shubhang Kaushik <shubhang@os.amperecomputing.com>
Link: https://patch.msgid.link/20250422101628.GA33555@noisy.programming.kicks-ass.net
2026-02-23 11:19:18 +01:00
Wang Tao
ff38424030 sched/eevdf: Update se->vprot in reweight_entity()
In the EEVDF framework with Run-to-Parity protection, `se->vprot` is an
independent variable defining the virtual protection timestamp.

When `reweight_entity()` is called (e.g., via nice/renice), it performs
the following actions to preserve Lag consistency:
 1. Scales `se->vlag` based on the new weight.
 2. Calls `place_entity()`, which recalculates `se->vruntime` based on
    the new weight and scaled lag.

However, the current implementation fails to update `se->vprot`, leading
to mismatches between the task's actual runtime and its expected duration.

Fixes: 63304558ba ("sched/eevdf: Curb wakeup-preemption")
Suggested-by: Zhang Qiao <zhangqiao22@huawei.com>
Signed-off-by: Wang Tao <wangtao554@huawei.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
Tested-by: K Prateek Nayak <kprateek.nayak@amd.com>
Tested-by: Shubhang Kaushik <shubhang@os.amperecomputing.com>
Link: https://patch.msgid.link/20260120123113.3518950-1-wangtao554@huawei.com
2026-02-23 11:19:18 +01:00
Peter Zijlstra
bcd74b2ffd sched/fair: Only set slice protection at pick time
We should not (re)set slice protection in the sched_change pattern
which calls put_prev_task() / set_next_task().

Fixes: 63304558ba ("sched/eevdf: Curb wakeup-preemption")
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
Tested-by: K Prateek Nayak <kprateek.nayak@amd.com>
Tested-by: Shubhang Kaushik <shubhang@os.amperecomputing.com>
Link: https://patch.msgid.link/20260219080624.561421378%40infradead.org
2026-02-23 11:19:18 +01:00
Peter Zijlstra
b3d99f43c7 sched/fair: Fix zero_vruntime tracking
It turns out that zero_vruntime tracking is broken when there is but a single
task running. Current update paths are through __{en,de}queue_entity(), and
when there is but a single task, pick_next_task() will always return that one
task, and put_prev_set_next_task() will end up in neither function.

This can cause entity_key() to grow indefinitely large and cause overflows,
leading to much pain and suffering.

Furtermore, doing update_zero_vruntime() from __{de,en}queue_entity(), which
are called from {set_next,put_prev}_entity() has problems because:

 - set_next_entity() calls __dequeue_entity() before it does cfs_rq->curr = se.
   This means the avg_vruntime() will see the removal but not current, missing
   the entity for accounting.

 - put_prev_entity() calls __enqueue_entity() before it does cfs_rq->curr =
   NULL. This means the avg_vruntime() will see the addition *and* current,
   leading to double accounting.

Both cases are incorrect/inconsistent.

Noting that avg_vruntime is already called on each {en,de}queue, remove the
explicit avg_vruntime() calls (which removes an extra 64bit division for each
{en,de}queue) and have avg_vruntime() update zero_vruntime itself.

Additionally, have the tick call avg_vruntime() -- discarding the result, but
for the side-effect of updating zero_vruntime.

While there, optimize avg_vruntime() by noting that the average of one value is
rather trivial to compute.

Test case:
  # taskset -c -p 1 $$
  # taskset -c 2 bash -c 'while :; do :; done&'
  # cat /sys/kernel/debug/sched/debug | awk '/^cpu#/ {P=0} /^cpu#2,/ {P=1} {if (P) print $0}' | grep -e zero_vruntime -e "^>"

PRE:
    .zero_vruntime                 : 31316.407903
  >R            bash   487     50787.345112   E       50789.145972           2.800000     50780.298364        16     120         0.000000         0.000000         0.000000        /
    .zero_vruntime                 : 382548.253179
  >R            bash   487    427275.204288   E      427276.003584           2.800000    427268.157540        23     120         0.000000         0.000000         0.000000        /

POST:
    .zero_vruntime                 : 17259.709467
  >R            bash   526     17259.709467   E       17262.509467           2.800000     16915.031624         9     120         0.000000         0.000000         0.000000        /
    .zero_vruntime                 : 18702.723356
  >R            bash   526     18702.723356   E       18705.523356           2.800000     18358.045513         9     120         0.000000         0.000000         0.000000        /

Fixes: 79f3f9bedd ("sched/eevdf: Fix min_vruntime vs avg_vruntime")
Reported-by: K Prateek Nayak <kprateek.nayak@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: K Prateek Nayak <kprateek.nayak@amd.com>
Tested-by: Shubhang Kaushik <shubhang@os.amperecomputing.com>
Link: https://patch.msgid.link/20260219080624.438854780%40infradead.org
2026-02-23 11:19:17 +01:00
Thomas Huth
237dc6a054 x86/headers: Replace __ASSEMBLY__ stragglers with __ASSEMBLER__
After converting the __ASSEMBLY__ statements to __ASSEMBLER__ in
commit 24a295e4ef ("x86/headers: Replace __ASSEMBLY__ with
__ASSEMBLER__ in non-UAPI headers"), some new code has been
added that uses __ASSEMBLY__ again. Convert these stragglers, too.

This is a mechanical patch, done with a simple "sed -i" command.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20251218182029.166993-1-thuth@redhat.com
2026-02-23 11:19:12 +01:00
Peter Zijlstra
24c8147abb x86/cfi: Fix CFI rewrite for odd alignments
Rustam reported his clang builds did not boot properly; turns out his
.config has: CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B=y set.

Fix up the FineIBT code to deal with this unusual alignment.

Fixes: 931ab63664 ("x86/ibt: Implement FineIBT")
Reported-by: Rustam Kovhaev <rkovhaev@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Rustam Kovhaev <rkovhaev@gmail.com>
2026-02-23 11:19:11 +01:00
Hou Wenlong
a0cb371b52 x86/bug: Handle __WARN_printf() trap in early_fixup_exception()
The commit 5b472b6e5b ("x86_64/bug: Implement __WARN_printf()")
implemented __WARN_printf(), which changed the mechanism to use UD1
instead of UD2. However, it only handles the trap in the runtime IDT
handler, while the early booting IDT handler lacks this handling. As a
result, the usage of WARN() before the runtime IDT setup can lead to
kernel crashes. Since KMSAN is enabled after the runtime IDT setup, it
is safe to use handle_bug() directly in early_fixup_exception() to
address this issue.

Fixes: 5b472b6e5b ("x86_64/bug: Implement __WARN_printf()")
Signed-off-by: Hou Wenlong <houwenlong.hwl@antgroup.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/c4fb3645f60d3a78629d9870e8fcc8535281c24f.1768016713.git.houwenlong.hwl@antgroup.com
2026-02-23 11:19:11 +01:00
Andrew Cooper
aa280a08e7 x86/fred: Correct speculative safety in fred_extint()
array_index_nospec() is no use if the result gets spilled to the stack, as
it makes the believed safe-under-speculation value subject to memory
predictions.

For all practical purposes, this means array_index_nospec() must be used in
the expression that accesses the array.

As the code currently stands, it's the wrong side of irqentry_enter(), and
'index' is put into %ebp across the function call.

Remove the index variable and reposition array_index_nospec(), so it's
calculated immediately before the array access.

Fixes: 14619d912b ("x86/fred: FRED entry/exit and dispatch code")
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260106131504.679932-1-andrew.cooper3@citrix.com
2026-02-23 11:19:11 +01:00
Hongbo Li
03c0d030f5 erofs: allow sharing page cache with the same aops only
Inode with identical data but different @aops cannot be mixed
because the page cache is managed by different subsystems (e.g.,
@aops for compressed on-disk inodes cannot handle plain on-disk
inodes).

In this patch, we never allow inodes to share the page cache
among plain, compressed, and fileio cases. When a shared inode
is created, we initialize @aops that is the same as the initial
real inode, and subsequent inodes cannot share the page cache
if the inferred @aops differ from the corresponding shared inode.

This is reasonable as a first step because, in typical use cases,
if an inode is compressible, it will fall into compressed
inodes across different filesystem images unless users use plain
filesystems. However, in that cases, users will use plain
filesystems all the time.

Fixes: 5ef3208e3b ("erofs: introduce the page cache share feature")
Signed-off-by: Hongbo Li <lihongbo22@huawei.com>
Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
2026-02-23 18:04:18 +08:00
Daniel J Blueman
32e0a7ad9c gpio: shared: fix memory leaks
On a Snapdragon X1 Elite laptop (Lenovo Yoga Slim 7x), kmemleak reports
three sets of:

unreferenced object 0xffff00080187f400 (size 1024):
  comm "swapper/0", pid 1, jiffies 4294667327
  hex dump (first 32 bytes):
    58 bd 70 01 08 00 ff ff 58 bd 70 01 08 00 ff ff  X.p.....X.p.....
    00 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00  ................
  backtrace (crc 1665d1f8):
    kmemleak_alloc+0xf4/0x12c
    __kmalloc_cache_noprof+0x370/0x49c
    gpio_shared_make_ref+0x70/0x16c
    gpio_shared_of_traverse+0x4e8/0x5f4
    gpio_shared_of_traverse+0x200/0x5f4
    gpio_shared_of_traverse+0x200/0x5f4
    gpio_shared_of_traverse+0x200/0x5f4
    gpio_shared_of_traverse+0x200/0x5f4
    gpio_shared_init+0x34/0x1c4
    do_one_initcall+0x50/0x280
    kernel_init_freeable+0x290/0x33c
    kernel_init+0x28/0x14c
    ret_from_fork+0x10/0x20

unreferenced object 0xffff00080170c140 (size 8):
  comm "swapper/0", pid 1, jiffies 4294667327
  hex dump (first 8 bytes):
    72 65 73 65 74 00 00 00                          reset...
  backtrace (crc fc24536):
    kmemleak_alloc+0xf4/0x12c
    __kmalloc_node_track_caller_noprof+0x3c4/0x584
    kstrdup+0x4c/0xcc
    gpio_shared_make_ref+0x8c/0x16c
    gpio_shared_of_traverse+0x4e8/0x5f4
    gpio_shared_of_traverse+0x200/0x5f4
    gpio_shared_of_traverse+0x200/0x5f4
    gpio_shared_of_traverse+0x200/0x5f4
    gpio_shared_of_traverse+0x200/0x5f4
    gpio_shared_init+0x34/0x1c4
    do_one_initcall+0x50/0x280
    kernel_init_freeable+0x290/0x33c
    kernel_init+0x28/0x14c
    ret_from_fork+0x10/0x20

Fix this by decrementing the reference count of each list entry rather than
only the first.

Fix verified on the same laptop.

Fixes: a060b8c511 gpiolib: implement low-level, shared GPIO support
Signed-off-by: Daniel J Blueman <daniel@quora.org>
Link: https://patch.msgid.link/20260220093452.101655-1-daniel@quora.org
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
2026-02-23 11:01:13 +01:00
Maulik Shah
09a30b7a03 pinctrl: qcom: qcs615: Add missing dual edge GPIO IRQ errata flag
Wakeup capable GPIOs uses PDC as parent IRQ chip and PDC on qcs615 do not
support dual edge IRQs. Add missing wakeirq_dual_edge_errata configuration
to enable workaround for dual edge GPIO IRQs.

Fixes: b698f36a9d ("pinctrl: qcom: add the tlmm driver for QCS615 platform")
Signed-off-by: Maulik Shah <maulik.shah@oss.qualcomm.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
Signed-off-by: Linus Walleij <linusw@kernel.org>
2026-02-23 10:34:04 +01:00
Florian Eckert
3e00b1b332 pinctrl: equilibrium: fix warning trace on load
The callback functions 'eqbr_irq_mask()' and 'eqbr_irq_ack()' are also
called in the callback function 'eqbr_irq_mask_ack()'. This is done to
avoid source code duplication. The problem, is that in the function
'eqbr_irq_mask()' also calles the gpiolib function 'gpiochip_disable_irq()'

This generates the following warning trace in the log for every gpio on
load.

[    6.088111] ------------[ cut here ]------------
[    6.092440] WARNING: CPU: 3 PID: 1 at drivers/gpio/gpiolib.c:3810 gpiochip_disable_irq+0x39/0x50
[    6.097847] Modules linked in:
[    6.097847] CPU: 3 UID: 0 PID: 1 Comm: swapper/0 Tainted: G        W          6.12.59+ #0
[    6.097847] Tainted: [W]=WARN
[    6.097847] RIP: 0010:gpiochip_disable_irq+0x39/0x50
[    6.097847] Code: 39 c6 48 19 c0 21 c6 48 c1 e6 05 48 03 b2 38 03 00 00 48 81 fe 00 f0 ff ff 77 11 48 8b 46 08 f6 c4 02 74 06 f0 80 66 09 fb c3 <0f> 0b 90 0f 1f 40 00 c3 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40
[    6.097847] RSP: 0000:ffffc9000000b830 EFLAGS: 00010046
[    6.097847] RAX: 0000000000000045 RBX: ffff888001be02a0 RCX: 0000000000000008
[    6.097847] RDX: ffff888001be9000 RSI: ffff888001b2dd00 RDI: ffff888001be02a0
[    6.097847] RBP: ffffc9000000b860 R08: 0000000000000000 R09: 0000000000000000
[    6.097847] R10: 0000000000000001 R11: ffff888001b2a154 R12: ffff888001be0514
[    6.097847] R13: ffff888001be02a0 R14: 0000000000000008 R15: 0000000000000000
[    6.097847] FS:  0000000000000000(0000) GS:ffff888041d80000(0000) knlGS:0000000000000000
[    6.097847] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    6.097847] CR2: 0000000000000000 CR3: 0000000003030000 CR4: 00000000001026b0
[    6.097847] Call Trace:
[    6.097847]  <TASK>
[    6.097847]  ? eqbr_irq_mask+0x63/0x70
[    6.097847]  ? no_action+0x10/0x10
[    6.097847]  eqbr_irq_mask_ack+0x11/0x60

In an other driver (drivers/pinctrl/starfive/pinctrl-starfive-jh7100.c) the
interrupt is not disabled here.

To fix this, do not call the 'eqbr_irq_mask()' and 'eqbr_irq_ack()'
function. Implement instead this directly without disabling the interrupts.

Fixes: 52066a53bd ("pinctrl: equilibrium: Convert to immutable irq_chip")
Signed-off-by: Florian Eckert <fe@dev.tdt.de>
Signed-off-by: Linus Walleij <linusw@kernel.org>
2026-02-23 10:28:04 +01:00
Florian Eckert
1f96b84835 pinctrl: equilibrium: rename irq_chip function callbacks
Renaming of the irq_chip callback functions to improve clarity.

Signed-off-by: Florian Eckert <fe@dev.tdt.de>
Signed-off-by: Linus Walleij <linusw@kernel.org>
2026-02-23 10:28:04 +01:00
Maxime Ripard
c17ee635fd Merge drm/drm-fixes into drm-misc-fixes
7.0-rc1 was just released, let's merge it to kick the new release cycle.

Signed-off-by: Maxime Ripard <mripard@kernel.org>
2026-02-23 10:09:45 +01:00
Geoffrey D. Bennett
0d58273be0 ALSA: usb-audio: Skip clock selector for Focusrite devices
Add QUIRK_FLAG_SKIP_CLOCK_SELECTOR for Focusrite devices.

During interface parsing, snd_usb_clock_find_source() reads the clock
selector value then writes it back unchanged. On Focusrite devices
this redundant write results in a ~300ms delay per altsetting, adding
~1.8s to probe time on a typical device with 6 altsettings.

Enabling SKIP_CLOCK_SELECTOR skips the redundant write-back.

Signed-off-by: Geoffrey D. Bennett <g@b4.vu>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Link: https://patch.msgid.link/00e53ae0a508b41516b41833daa17823381a649c.1771594828.git.g@b4.vu
2026-02-23 09:57:10 +01:00
Geoffrey D. Bennett
38c322068a ALSA: usb-audio: Add QUIRK_FLAG_SKIP_IFACE_SETUP
Add a quirk flag to skip the usb_set_interface(),
snd_usb_init_pitch(), and snd_usb_init_sample_rate() calls in
__snd_usb_parse_audio_interface(). These are redundant with
snd_usb_endpoint_prepare() at stream-open time.

Enable the quirk for Focusrite devices, as init_sample_rate(rate_max)
sets 192kHz during probing, which disables the internal mixer and Air
and Safe modes.

Fixes: 16f1f83844 ("Revert "ALSA: usb-audio: Drop superfluous interface setup at parsing"")
Signed-off-by: Geoffrey D. Bennett <g@b4.vu>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Link: https://patch.msgid.link/65a7909b15f9feb76c2a6f4f8814c240ddc50737.1771594828.git.g@b4.vu
2026-02-23 09:57:10 +01:00
Geoffrey D. Bennett
a8cc55bf81 ALSA: usb-audio: Remove VALIDATE_RATES quirk for Focusrite devices
Remove QUIRK_FLAG_VALIDATE_RATES for Focusrite. With the previous
commit, focusrite_valid_sample_rate() produces correct rate tables
without USB probing.

QUIRK_FLAG_VALIDATE_RATES sends SET_CUR requests for each rate (~25ms
each) and leaves the device at 192kHz. This is a problem because that
rate: 1) disables the internal mixer, so outputs are silent until an
application opens the PCM and sets a lower rate, and 2) the Air and
Safe modes get disabled.

Fixes: 5963e52621 ("ALSA: usb-audio: Enable rate validation for Scarlett devices")
Signed-off-by: Geoffrey D. Bennett <g@b4.vu>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Link: https://patch.msgid.link/09b9c012024c998c4ca14bd876ef0dce0d0b6101.1771594828.git.g@b4.vu
2026-02-23 09:57:09 +01:00
Geoffrey D. Bennett
24d2d3c5f9 ALSA: usb-audio: Improve Focusrite sample rate filtering
Replace the bLength == 10 max_rate check in
focusrite_valid_sample_rate() with filtering that also examines the
bmControls VAL_ALT_SETTINGS bit.

When VAL_ALT_SETTINGS is readable, the device uses strict
per-altsetting rate filtering (only the highest rate pair for that
altsetting is valid). When it is not readable, all rates up to
max_rate are valid.

For devices without the bLength == 10 Format Type descriptor extension
but with VAL_ALT_SETTINGS readable and multiple altsettings (only seen
in Scarlett 18i8 3rd Gen playback), fall back to the Focusrite
convention: alt 1 = 48kHz, alt 2 = 96kHz, alt 3 = 192kHz.

This produces correct rate tables for all tested Focusrite devices
(all Scarlett 2nd, 3rd, and 4th Gen, Clarett+, and Vocaster) using
only USB descriptors, allowing QUIRK_FLAG_VALIDATE_RATES to be removed
for Focusrite in the next commit.

Signed-off-by: Geoffrey D. Bennett <g@b4.vu>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Link: https://patch.msgid.link/7e18c1f393a6ecb6fc75dd867a2c4dbe135e3e22.1771594828.git.g@b4.vu
2026-02-23 09:57:09 +01:00
Juhyung Park
9fb16a5c5f ALSA: hda/realtek: add quirk for Samsung Galaxy Book Flex (NT950QCT-A38A)
Similar to other Samsung laptops, NT950QCT also requires the
ALC298_FIXUP_SAMSUNG_AMP quirk applied.

Cc: <stable@vger.kernel.org>
Signed-off-by: Juhyung Park <qkrwngud825@gmail.com>
Link: https://patch.msgid.link/20260222122609.281191-2-qkrwngud825@gmail.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-02-23 09:54:43 +01:00
Juhyung Park
43a44fb7f2 ALSA: hda/realtek: fix model name typo for Samsung Galaxy Book Flex (NT950QCG-X716)
There's no product named "Samsung Galaxy Flex Book".
Use the correct "Samsung Galaxy Book Flex" name.

Link: https://www.samsung.com/sec/support/model/NT950QCG-X716
Link: https://www.samsung.com/us/computing/galaxy-books/galaxy-book-flex/galaxy-book-flex-15-6-qled-512gb-storage-s-pen-included-np950qcg-k01us
Cc: <stable@vger.kernel.org>
Signed-off-by: Juhyung Park <qkrwngud825@gmail.com>
Link: https://patch.msgid.link/20260222122609.281191-1-qkrwngud825@gmail.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-02-23 09:54:32 +01:00
Panagiotis Foliadis
cbddd30341 ALSA: hda/realtek: Add quirk for Acer Aspire V3-572G
The Acer Aspire V3-572G has a combo jack (ALC283) but the BIOS
sets pin 0x19 to 0x411111f0 (not connected), so the headset mic
is not detected.

Add a quirk to override pin 0x19 as a headset mic and enable
headset mode.

Cc: stable@vger.kernel.org
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=221075
Suggested-by: Charalampos Mitrodimas <charmitro@posteo.net>
Signed-off-by: Panagiotis Foliadis <pfoliadis@posteo.net>
Reviewed-by: Charalampos Mitrodimas <charmitro@posteo.net>
Link: https://patch.msgid.link/20260221-fix-detect-mic-v1-1-b6e427b5275d@posteo.net
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-02-23 09:53:59 +01:00
Geoffrey D. Bennett
1d24148336 ALSA: scarlett2: Fix DSP filter control array handling
scarlett2_add_dsp_ctls() was incorrectly storing the precomp and PEQ
filter coefficient control pointers into the precomp_flt_switch_ctls
and peq_flt_switch_ctls arrays instead of the intended targets
precomp_flt_ctls and peq_flt_ctls. Pass NULL instead, as the filter
coefficient control pointers are not used, and remove the unused
precomp_flt_ctls and peq_flt_ctls arrays from struct scarlett2_data.

Additionally, scarlett2_update_filter_values() was reading
dsp_input_count * peq_flt_count values for
SCARLETT2_CONFIG_PEQ_FLT_SWITCH, but the peq_flt_switch array is
indexed only by dsp_input_count (one switch per DSP input, not per
filter). Fix the read count.

Fixes: b64678eb4e ("ALSA: scarlett2: Add DSP controls")
Signed-off-by: Geoffrey D. Bennett <g@b4.vu>
Link: https://patch.msgid.link/86497b71db060677d97c38a6ce5f89bb3b25361b.1771581197.git.g@b4.vu
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-02-23 09:51:55 +01:00
Sean Rhodes
1cb3c20688 ALSA: hda/realtek: Fix speaker pop on Star Labs StarFighter
On Star Labs StarFighter (Realtek ALC233/235), the internal speakers can
emit an audible pop when entering or leaving runtime suspend.

Mute the speaker output paths via snd_hda_gen_shutup_speakers() in the
Realtek shutup callback before the codec is powered down.

This is enough to avoid the pop without special EAPD handling.

Test results:
- runtime PM pop fixed
- still reaches D3 (PCI 0000:00:1f.3 power_state=D3hot)
- does not address pops on cold boot (G3 exit) or around display manager
  start/shutdown

journalctl -k (boot):
- snd_hda_codec_alc269 hdaudioC0D0: ALC233: picked fixup for PCI SSID
  7017:2014
- snd_hda_codec_alc269 hdaudioC0D0: autoconfig for ALC233: line_outs=1
  (0x1b/0x0/0x0/0x0/0x0) type:speaker

Suggested-by: Takashi Iwai <tiwai@suse.com>
Tested-by: Sean Rhodes <sean@starlabs.systems>
Signed-off-by: Sean Rhodes <sean@starlabs.systems>
Link: https://patch.msgid.link/4d5fb71b132bb283fd41c622b8413770b2065242.1771532060.git.sean@starlabs.systems
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2026-02-23 09:50:51 +01:00
Ramanathan Choodamani
2259d14499 wifi: mac80211: set default WMM parameters on all links
Currently, mac80211 only initializes default WMM parameters
on the deflink during do_open(). For MLO cases, this
leaves the additional links without proper WMM defaults
if hostapd does not supply per-link WMM parameters, leading
to inconsistent QoS behavior across links.

Set default WMM parameters for each link during
ieee80211_vif_update_links(), because this ensures all
individual links in an MLD have valid WMM settings during
bring-up and behave consistently across different BSS.

Signed-off-by: Ramanathan Choodamani <quic_rchoodam@quicinc.com>
Signed-off-by: Aishwarya R <aishwarya.r@oss.qualcomm.com>
Link: https://patch.msgid.link/20260205094216.3093542-1-aishwarya.r@oss.qualcomm.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2026-02-23 09:30:20 +01:00
Daniel Hodges
03cc8f90d0 wifi: libertas: fix use-after-free in lbs_free_adapter()
The lbs_free_adapter() function uses timer_delete() (non-synchronous)
for both command_timer and tx_lockup_timer before the structure is
freed. This is incorrect because timer_delete() does not wait for
any running timer callback to complete.

If a timer callback is executing when lbs_free_adapter() is called,
the callback will access freed memory since lbs_cfg_free() frees the
containing structure immediately after lbs_free_adapter() returns.

Both timer callbacks (lbs_cmd_timeout_handler and lbs_tx_lockup_handler)
access priv->driver_lock, priv->cur_cmd, priv->dev, and other fields,
which would all be use-after-free violations.

Use timer_delete_sync() instead to ensure any running timer callback
has completed before returning.

This bug was introduced in commit 8f641d93c3 ("libertas: detect TX
lockups and reset hardware") where del_timer() was used instead of
del_timer_sync() in the cleanup path. The command_timer has had the
same issue since the driver was first written.

Fixes: 8f641d93c3 ("libertas: detect TX lockups and reset hardware")
Fixes: 954ee164f4 ("[PATCH] libertas: reorganize and simplify init sequence")
Cc: stable@vger.kernel.org
Signed-off-by: Daniel Hodges <git@danielhodges.dev>
Link: https://patch.msgid.link/20260206195356.15647-1-git@danielhodges.dev
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2026-02-23 09:28:14 +01:00
Chen-Yu Tsai
25723454f6 wifi: mwifiex: Fix dev_alloc_name() return value check
dev_alloc_name() returns the allocated ID on success, which could be
over 0.

Fix the return value check to check for negative error codes.

Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
Closes: https://lore.kernel.org/all/aYmsQfujoAe5qO02@stanley.mountain/
Fixes: 7bab5bdb81 ("wifi: mwifiex: Allocate dev name earlier for interface workqueue name")
Signed-off-by: Chen-Yu Tsai <wenst@chromium.org>
Reviewed-by: Francesco Dolcini <francesco.dolcini@toradex.com>
Link: https://patch.msgid.link/20260210100337.1131279-1-wenst@chromium.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2026-02-23 09:25:48 +01:00
Radu Sabau
9990cd4f88 iio: imu: adis: Fix NULL pointer dereference in adis_init
The adis_init() function dereferences adis->ops to check if the
individual function pointers (write, read, reset) are NULL, but does
not first check if adis->ops itself is NULL.

Drivers like adis16480, adis16490, adis16545 and others do not set
custom ops and rely on adis_init() assigning the defaults. Since struct
adis is zero-initialized by devm_iio_device_alloc(), adis->ops is NULL
when adis_init() is called, causing a NULL pointer dereference:

    Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
    pc : adis_init+0xc0/0x118
    Call trace:
     adis_init+0xc0/0x118
     adis16480_probe+0xe0/0x670

Fix this by checking if adis->ops is NULL before dereferencing it,
falling through to assign the default ops in that case.

Fixes: 3b29bcee8f ("iio: imu: adis: Add custom ops struct")
Signed-off-by: Radu Sabau <radu.sabau@analog.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@intel.com>
Reviewed-by: Antoniu Miclaus <antoniu.miclaus@analog.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
2026-02-23 08:24:39 +00:00
Jean-Baptiste Maneyrol
2617595538 iio: imu: inv_icm45600: fix regulator put warning when probe fails
When the driver probe fails we encounter a regulator put warning
because vddio regulator is not stopped before release. The issue
comes from pm_runtime not already setup when core probe fails and
the vddio regulator disable callback is called.

Fix the issue by setting pm_runtime active early before vddio
regulator resource cleanup. This requires to cut pm_runtime
set_active and enable in 2 function calls.

Fixes: 7ff021a3fa ("iio: imu: inv_icm45600: add new inv_icm45600 driver")
Signed-off-by: Jean-Baptiste Maneyrol <jean-baptiste.maneyrol@tdk.com>
Cc: stable@vger.kernel.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
2026-02-23 08:24:39 +00:00
Nuno Sá
0642340440 iio: buffer: Fix wait_queue not being removed
In the edge case where the IIO device is unregistered while we're
buffering, we were directly returning an error without removing the wait
queue. Instead, set 'ret' and break out of the loop.

Fixes: 9eeee3b0bf ("iio: Add output buffer support")
Signed-off-by: Nuno Sá <nuno.sa@analog.com>
Reviewed-by: David Lechner <dlechner@baylibre.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
2026-02-23 08:24:39 +00:00
Antoniu Miclaus
acc3949aab iio: gyro: mpu3050-core: fix pm_runtime error handling
The return value of pm_runtime_get_sync() is not checked, allowing
the driver to access hardware that may fail to resume. The device
usage count is also unconditionally incremented. Use
pm_runtime_resume_and_get() which propagates errors and avoids
incrementing the usage count on failure.

In preenable, add pm_runtime_put_autosuspend() on set_8khz_samplerate()
failure since postdisable does not run when preenable fails.

Fixes: 3904b28efb ("iio: gyro: Add driver for the MPU-3050 gyroscope")
Reviewed-by: Linus Walleij <linusw@kernel.org>
Signed-off-by: Antoniu Miclaus <antoniu.miclaus@analog.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
2026-02-23 08:24:39 +00:00
Antoniu Miclaus
91f950b4cb iio: gyro: mpu3050-i2c: fix pm_runtime error handling
The return value of pm_runtime_get_sync() is not checked, and the
function always returns success. This allows I2C mux operations to
proceed even when the device fails to resume.

Use pm_runtime_resume_and_get() and propagate its return value to
properly handle resume failures.

Fixes: 3904b28efb ("iio: gyro: Add driver for the MPU-3050 gyroscope")
Signed-off-by: Antoniu Miclaus <antoniu.miclaus@analog.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
2026-02-23 08:24:38 +00:00
Ethan Tidmore
dd6183e427 iio: adc: ad7768-1: Fix ERR_PTR dereference in ad7768_fill_scale_tbl
The function iio_get_current_scan_type() can return an error pointer,
the return value scan_type is not checked for this and immediately
dereferenced which can cause a kernel panic.

Add check for IS_ERR() and propagate the error back.

Fixes: ff085189cb ("iio: adc: ad7768-1: add support for ADAQ776x-1 ADC Family")
Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <error27@gmail.com>
Closes: https://lore.kernel.org/r/202602051234.5gArzLyZ-lkp@intel.com/
Signed-off-by: Ethan Tidmore <ethantidmore06@gmail.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@intel.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
2026-02-23 08:24:38 +00:00
Antoniu Miclaus
c3914ce196 iio: chemical: sps30_serial: fix buffer size in sps30_serial_read_meas()
sizeof(num) evaluates to sizeof(size_t) which is 8 bytes on 64-bit,
but the buffer elements are only 4 bytes. The same function already
uses sizeof(*meas) on line 312, making the mismatch evident. Use
sizeof(*meas) consistently.

Fixes: b2e171f5a5 ("iio: sps30: add support for serial interface")
Signed-off-by: Antoniu Miclaus <antoniu.miclaus@analog.com>
Acked-by: Tomasz Duszynski <tduszyns@gmail.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@intel.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
2026-02-23 08:24:38 +00:00
Antoniu Miclaus
216345f98c iio: chemical: sps30_i2c: fix buffer size in sps30_i2c_read_meas()
sizeof(num) evaluates to sizeof(size_t) (8 bytes on 64-bit) instead
of the intended __be32 element size (4 bytes). Use sizeof(*meas) to
correctly match the buffer element type.

Fixes: 8f3f130852 ("iio: sps30: separate core and interface specific code")
Signed-off-by: Antoniu Miclaus <antoniu.miclaus@analog.com>
Acked-by: Tomasz Duszynski <tduszyns@gmail.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@intel.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
2026-02-23 08:24:38 +00:00
Antoniu Miclaus
82ee91d6b1 iio: magnetometer: tlv493d: remove erroneous shift in X-axis data
TLV493D_BX2_MAG_X_AXIS_LSB is defined as GENMASK(7, 4). FIELD_GET()
already right-shifts bits [7:4] to [3:0], so the additional >> 4
discards most of the X-axis low nibble. The Y and Z axes correctly
omit this extra shift. Remove it.

Fixes: 106511d280 ("iio: magnetometer: add support for Infineon TLV493D 3D Magentic sensor")
Signed-off-by: Antoniu Miclaus <antoniu.miclaus@analog.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
2026-02-23 08:24:38 +00:00
Yasin Lee
a318cfc085 iio: proximity: hx9023s: Protect against division by zero in set_samp_freq
Avoid division by zero when sampling frequency is unspecified.

Fixes: 60df548277 ("iio: proximity: Add driver support for TYHX's HX9023S capacitive proximity sensor")
Signed-off-by: Yasin Lee <yasin.lee.x@gmail.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@intel.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
2026-02-23 08:24:38 +00:00
Yasin Lee
585b90c016 iio: proximity: hx9023s: fix assignment order for __counted_by
Initialize fw_size before copying firmware data into the flexible
array member to match the __counted_by() annotation. This fixes the
incorrect assignment order that triggers runtime safety checks.

Fixes: e9ed97be4f ("iio: proximity: hx9023s: Added firmware file parsing functionality")
Signed-off-by: Yasin Lee <yasin.lee.x@gmail.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@intel.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
2026-02-23 08:24:38 +00:00
Chris Spencer
f55b9510cd iio: chemical: bme680: Fix measurement wait duration calculation
This function refers to the Bosch BME680 API as the source of the
calculation, but one of the constants does not match the Bosch
implementation. This appears to be a simple transposition of two digits,
resulting in a wait time that is too short. This can cause the following
'device measurement cycle incomplete' check to occasionally fail, returning
EBUSY to user space.

Adjust the constant to match the Bosch implementation and resolve the EBUSY
errors.

Fixes: 4241665e6e ("iio: chemical: bme680: Fix sensor data read operation")
Link: https://github.com/boschsensortec/BME68x_SensorAPI/blob/v4.4.8/bme68x.c#L521
Signed-off-by: Chris Spencer <spencercw@gmail.com>
Acked-by: Vasileios Amoiridis <vassilisamir@gmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
2026-02-23 08:24:38 +00:00
Oleksij Rempel
5187e03b81 iio: dac: ds4424: reject -128 RAW value
The DS442x DAC uses sign-magnitude encoding, so -128 cannot be represented
in hardware (7-bit magnitude).

Previously, passing -128 resulted in a truncated value that programmed
0mA (magnitude 0) instead of the expected maximum negative current,
effectively failing silently.

Reject -128 to avoid producing the wrong current.

Fixes: d632a2bd8f ("iio: dac: ds4422/ds4424 dac driver")
Cc: stable@vger.kernel.org
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@intel.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
2026-02-23 08:24:37 +00:00
Jean-Baptiste Maneyrol
7ef74d961d iio: imu: inv_icm45600: fix INT1 drive bit inverted
Drive bit must be set for open-drain mode and be cleared for push-pull
mode.

Referring to datasheet DS-000576_ICM-45605.pdf section 17.23
INT1_CONFIG2.

Fixes: 06674a72cf ("iio: imu: inv_icm45600: add buffer support in iio devices")
Signed-off-by: Jean-Baptiste Maneyrol <jean-baptiste.maneyrol@tdk.com>
Reviewed-by: Andy Shevchenko <andy@kernel.org>
Cc: stable@vger.kernel.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
2026-02-23 08:24:37 +00:00
Lukas Schmid
85e4614524 iio: potentiometer: mcp4131: fix double application of wiper shift
The MCP4131 wiper address is shifted twice when preparing the SPI
command in mcp4131_write_raw().

The address is already shifted when assigned to the local variable
"address", but is then shifted again when written to data->buf[0].
This results in an incorrect command being sent to the device and
breaks wiper writes to the second channel.

Remove the second shift and use the pre-shifted address directly
when composing the SPI transfer.

Fixes: 22d199a539 ("iio: potentiometer: add driver for Microchip MCP413X/414X/415X/416X/423X/424X/425X/426X")
Signed-off-by: Lukas Schmid <lukas.schmid@netcube.li>#
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
2026-02-23 08:24:37 +00:00
Andreas Kemnade
d23d763e00 iio: imu: inv-mpu9150: fix irq ack preventing irq storms
IRQ needs to be acked. for some odd reasons, reading from irq status does
not reliable help, enable acking from any register to be on the safe side
and read the irq status register. Comments in the code indicate a known
unreliability with that register.
The blamed commit was tested with mpu6050 in lg,p895 and lg,p880 according
to Tested-bys. But with the MPU9150 in the Epson Moverio BT-200 this leads
to irq storms without properly acking the irq.

Fixes: 0a3b517c80 ("iio: imu: inv_mpu6050: fix interrupt status read for old buggy chips")
Signed-off-by: Andreas Kemnade <andreas@kemnade.info>
Acked-by: Jean-Baptiste Maneyrol <jean-baptiste.maneyrol@tdk.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
2026-02-23 08:24:37 +00:00
SeungJu Cheon
6c8bf4b604 iio: frequency: adf4377: Fix duplicated soft reset mask
The regmap_read_poll_timeout() uses ADF4377_0000_SOFT_RESET_R_MSK
twice instead of checking both SOFT_RESET_MSK (bit 0) and
SOFT_RESET_R_MSK (bit 7). This causes an incomplete reset status check.

The code first sets both SOFT_RESET and SOFT_RESET_R bits to 1 via
regmap_update_bits(), then polls for them to be cleared. Since we set
both bits before polling, we should be waiting for both to clear.

Fix by using both masks as done in regmap_update_bits() above.

Fixes: eda549e2e5 ("iio: frequency: adf4377: add support for ADF4377")
Signed-off-by: SeungJu Cheon <suunj1331@gmail.com>
Cc: Stable@vger.kernel.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
2026-02-23 08:24:37 +00:00
Antoniu Miclaus
dd72e6c3cd iio: light: bh1780: fix PM runtime leak on error path
Move pm_runtime_put_autosuspend() before the error check to ensure
the PM runtime reference count is always decremented after
pm_runtime_get_sync(), regardless of whether the read operation
succeeds or fails.

Fixes: 1f0477f183 ("iio: light: new driver for the ROHM BH1780")
Signed-off-by: Antoniu Miclaus <antoniu.miclaus@analog.com>
Reviewed-by: Linus Walleij <linusw@kernel.org>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
2026-02-23 08:24:37 +00:00
Jean-Baptiste Maneyrol
ffd32db826 iio: imu: inv_icm42600: fix odr switch when turning buffer off
ODR switch is done in 2 steps when FIFO is on : change the ODR register
value and acknowledge change when reading the FIFO ODR change flag.
When we are switching odr and turning buffer off just afterward, we are
losing the FIFO ODR change flag and ODR switch is blocked.

Fix the issue by force applying any waiting ODR change when turning
buffer off.

Fixes: ec74ae9fd3 ("iio: imu: inv_icm42600: add accurate timestamping")
Signed-off-by: Jean-Baptiste Maneyrol <jean-baptiste.maneyrol@tdk.com>
Cc: stable@vger.kernel.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
2026-02-23 08:24:37 +00:00
Jean-Baptiste Maneyrol
c9f3a59313 iio: imu: inv_icm42600: fix odr switch to the same value
ODR switch is done in 2 steps when FIFO is on : change the ODR register
value and acknowledge change when reading the FIFO ODR change flag.
When we are switching to the same odr value, we end up waiting for a
FIFO ODR flag that is never happening.

Fix the issue by doing nothing and exiting properly when we are
switching to the same ODR value.

Fixes: ec74ae9fd3 ("iio: imu: inv_icm42600: add accurate timestamping")
Signed-off-by: Jean-Baptiste Maneyrol <jean-baptiste.maneyrol@tdk.com>
Cc: stable@vger.kernel.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
2026-02-23 08:24:37 +00:00
Marek Szyprowski
243307a0d1 wifi: brcmfmac: Fix potential kernel oops when probe fails
When probe of the sdio brcmfmac device fails for some reasons (i.e.
missing firmware), the sdiodev->bus is set to error instead of NULL, thus
the cleanup later in brcmf_sdio_remove() tries to free resources via
invalid bus pointer. This happens because sdiodev->bus is set 2 times:
first in brcmf_sdio_probe() and second time in brcmf_sdiod_probe(). Fix
this by chaning the brcmf_sdio_probe() function to return the error code
and set sdio->bus only there.

Fixes: 0ff0843310 ("wifi: brcmfmac: Add optional lpo clock enable support")
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Acked-by: Arend van Spriel<arend.vanspriel@broadcom.com>
Link: https://patch.msgid.link/20260203102133.1478331-1-m.szyprowski@samsung.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2026-02-23 09:24:30 +01:00
Johannes Berg
c854758abe wifi: radiotap: reject radiotap with unknown bits
The radiotap parser is currently only used with the radiotap
namespace (not with vendor namespaces), but if the undefined
field 18 is used, the alignment/size is unknown as well. In
this case, iterator->_next_ns_data isn't initialized (it's
only set for skipping vendor namespaces), and syzbot points
out that we later compare against this uninitialized value.

Fix this by moving the rejection of unknown radiotap fields
down to after the in-namespace lookup, so it will really use
iterator->_next_ns_data only for vendor namespaces, even in
case undefined fields are present.

Cc: stable@vger.kernel.org
Fixes: 33e5a2f776 ("wireless: update radiotap parser")
Reported-by: syzbot+b09c1af8764c0097bb19@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/r/69944a91.a70a0220.2c38d7.00fc.GAE@google.com
Link: https://patch.msgid.link/20260217120526.162647-2-johannes@sipsolutions.net
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2026-02-23 09:23:44 +01:00
Daniil Dulov
767d23ade7 wifi: cfg80211: cancel rfkill_block work in wiphy_unregister()
There is a use-after-free error in cfg80211_shutdown_all_interfaces found
by syzkaller:

BUG: KASAN: use-after-free in cfg80211_shutdown_all_interfaces+0x213/0x220
Read of size 8 at addr ffff888112a78d98 by task kworker/0:5/5326
CPU: 0 UID: 0 PID: 5326 Comm: kworker/0:5 Not tainted 6.19.0-rc2 #2 PREEMPT(voluntary)
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
Workqueue: events cfg80211_rfkill_block_work
Call Trace:
 <TASK>
 dump_stack_lvl+0x116/0x1f0
 print_report+0xcd/0x630
 kasan_report+0xe0/0x110
 cfg80211_shutdown_all_interfaces+0x213/0x220
 cfg80211_rfkill_block_work+0x1e/0x30
 process_one_work+0x9cf/0x1b70
 worker_thread+0x6c8/0xf10
 kthread+0x3c5/0x780
 ret_from_fork+0x56d/0x700
 ret_from_fork_asm+0x1a/0x30
 </TASK>

The problem arises due to the rfkill_block work is not cancelled when wiphy
is being unregistered. In order to fix the issue cancel the corresponding
work in wiphy_unregister().

Found by Linux Verification Center (linuxtesting.org) with Syzkaller.

Fixes: 1f87f7d3a3 ("cfg80211: add rfkill support")
Cc: stable@vger.kernel.org
Signed-off-by: Daniil Dulov <d.dulov@aladdin.ru>
Link: https://patch.msgid.link/20260211082024.1967588-1-d.dulov@aladdin.ru
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2026-02-23 09:21:55 +01:00
Johannes Berg
c8d7f21ead wifi: cfg80211: wext: fix IGTK key ID off-by-one
The IGTK key ID must be 4 or 5, but the code checks against
key ID + 1, so must check against 5/6 rather than 4/5. Fix
that.

Reported-by: Jouni Malinen <j@w1.fi>
Fixes: 08645126dd ("cfg80211: implement wext key handling")
Link: https://patch.msgid.link/20260209181220.362205-2-johannes@sipsolutions.net
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2026-02-23 09:18:59 +01:00
Stian Halseth
d5b5e8149a sparc: Fix page alignment in dma mapping
'phys' may include an offset within the page, while previously used
'base_paddr' was already page-aligned. This caused incorrect DMA mapping
in dma_4u_map_phys and dma_4v_map_phys.

Fix both functions by masking 'phys' with IO_PAGE_MASK, covering both
generic SPARC code and sun4v.

Fixes: 38c0d0ebf5 ("sparc: Use physical address DMA mapping")
Reported-by: Stian Halseth <stian@itx.no>
Closes: https://github.com/sparclinux/issues/issues/75
Suggested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Stian Halseth <stian@itx.no>
Tested-by: Nathaniel Roach <nroach44@nroach44.id.au>
Tested-by: Han Gao <gaohan@iscas.ac.cn> # on SPARC Enterprise T5220
[mszyprow: adjusted commit description a bit]
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://lore.kernel.org/r/20260218120056.3366-2-stian@itx.no
2026-02-23 08:33:51 +01:00
Jiri Pirko
47322c469d dma-mapping: avoid random addr value print out on error path
dma_addr is unitialized in dma_direct_map_phys() when swiotlb is forced
and DMA_ATTR_MMIO is set which leads to random value print out in
warning. Fix that by just returning DMA_MAPPING_ERROR.

Fixes: e53d29f957 ("dma-mapping: convert dma_direct_*map_page to be phys_addr_t based")
Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://lore.kernel.org/r/20260209153809.250835-2-jiri@resnulli.us
2026-02-23 08:26:54 +01:00
Nicholas Carlini
6b4f875aac ksmbd: fix signededness bug in smb_direct_prepare_negotiation()
smb_direct_prepare_negotiation() casts an unsigned __u32 value
from sp->max_recv_size and req->preferred_send_size to a signed
int before computing min_t(int, ...). A maliciously provided
preferred_send_size of 0x80000000 will return as smaller than
max_recv_size, and then be used to set the maximum allowed
alowed receive size for the next message.

By sending a second message with a large value (>1420 bytes)
the attacker can then achieve a heap buffer overflow.

This fix replaces min_t(int, ...) with min_t(u32)

Fixes: 0626e6641f ("cifsd: add server handler for central processing and tranport layers")
Signed-off-by: Nicholas Carlini <nicholas@carlini.com>
Reviewed-by: Stefan Metzmacher <metze@samba.org>
Acked-by: Stefan Metzmacher <metze@samba.org>
Acked-by: Namjae Jeon <linkinjeon@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-02-22 21:27:33 -06:00
Eric Biggers
c5794709bc ksmbd: Compare MACs in constant time
To prevent timing attacks, MAC comparisons need to be constant-time.
Replace the memcmp() with the correct function, crypto_memneq().

Fixes: e2f34481b2 ("cifsd: add server-side procedures for SMB3")
Cc: stable@vger.kernel.org
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
Acked-by: Namjae Jeon <linkinjeon@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-02-22 21:27:28 -06:00
Danilo Krummrich
bfcaf4e8ae rust: io: macro_export io_define_read!() and io_define_write!()
Currently, the define_read!() and define_write!() I/O macros are crate
public. The only user outside of the I/O module is PCI (for the
configurations space I/O backend). Consequently, when CONFIG_PCI=n this
causes a compile time warning [1].

In order to fix this, rename the macros to io_define_read!() and
io_define_write!() and use #[macro_export] to export them.

This is better than making the crate public visibility conditional, as
eventually subsystems will have their own crate.

Also, I/O backends are valid to be implemented by drivers as well. For
instance, there are devices (such as GPUs) that run firmware which
allows to program other devices only accessible through the primary
device through indirect I/O.

Since the macros are now public, also add the corresponding
documentation.

Fixes: 121d87b28e ("rust: io: separate generic I/O helpers from MMIO implementation")
Reported-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Closes: https://lore.kernel.org/driver-core/CANiq72khOYkt6t5zwMvSiyZvWWHMZuNCMERXu=7K=_5tT-8Pgg@mail.gmail.com/ [1]
Reviewed-by: Alice Ryhl <aliceryhl@google.com>
Reviewed-by: Daniel Almeida <daniel.almeida@collabora.com>
Link: https://patch.msgid.link/20260216131534.65008-1-dakr@kernel.org
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
2026-02-23 00:54:02 +01:00
David Lechner
be704107e7 regulator: dt-bindings: mt6359: make regulator names unique
Update the example devicetree with unique regulator names for all
regulators. This reflects the same change made to the actual .dtsi file.

Signed-off-by: David Lechner <dlechner@baylibre.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
Link: https://patch.msgid.link/20260219-mtk-mt6359-fix-regulator-names-v1-2-ee0fcebfe1d9@baylibre.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2026-02-22 23:51:39 +00:00
Hao Yu
b7497b5a99 hwmon: (aht10) Fix initialization commands for AHT20
According to the AHT20 datasheet (updated to V1.0 after the 2023.09
version), the initialization command for AHT20 is 0b10111110 (0xBE).
The previous sequence (0xE1) used in earlier versions is no longer
compatible with newer AHT20 sensors. Update the initialization
command to ensure the sensor is properly initialized.

While at it, use binary notation for DHT20_CMD_INIT to match the notation
used in the datasheet.

Fixes: d2abcb5cc8 ("hwmon: (aht10) Add support for compatible aht20")
Signed-off-by: Hao Yu <haoyufine@gmail.com>
Link: https://lore.kernel.org/r/20260222170332.1616-3-haoyufine@gmail.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
2026-02-22 15:48:30 -08:00
Randy Dunlap
ac209a7d81 hwmon: (emc1403) correct a malformed email address
Add a closing '>' to Kalhan's emaill address.

line 60:  Kalhan Trisal <kalhan.trisal@intel.com

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://lore.kernel.org/r/20260215010327.1687304-1-rdunlap@infradead.org
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
2026-02-22 15:48:30 -08:00
Guenter Roeck
579b86f3c2 hwmon: (macsmc) Fix overflows, underflows, and sign extension
The macsmc-hwmon driver experienced several issues related to value
scaling and type conversion:

1. macsmc_hwmon_read_f32_scaled() clipped values to INT_MAX/INT_MIN.
   On 64-bit systems, hwmon supports long values, so clipping to
   32-bit range was premature and caused loss of range for high-power
   sensors. Changed it to use long and clip to LONG_MAX/LONG_MIN.
2. The overflow check in macsmc_hwmon_read_f32_scaled() used 1UL,
   which is 32-bit on some platforms. Switched to 1ULL.
3. macsmc_hwmon_read_key() used a u32 temporary variable for f32
   values. When assigned to a 64-bit long, negative values were
   zero-extended instead of sign-extended, resulting in large
   positive numbers.
4. macsmc_hwmon_read_ioft_scaled() used mult_frac() which could
   overflow during intermediate multiplication. Switched to
   mul_u64_u32_div() to handle the 64-bit multiplication safely.
5. ioft values (unsigned 48.16) could overflow long when scaled
   by 1,000,000. Added explicit clipping to LONG_MAX in the caller.
6. macsmc_hwmon_write_f32() truncated its long argument to int,
   potentially causing issues for large values.

Fix these issues by using appropriate types and helper functions.

Fixes: 785205fd81 ("hwmon: Add Apple Silicon SMC hwmon driver")
Cc: James Calligeros <jcalligeros99@gmail.com>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Neal Gompa <neal@gompa.dev>
Cc: Janne Grunau <j@jannau.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Link: https://lore.kernel.org/r/20260129175112.3751907-3-linux@roeck-us.net
Reviewed-by: James Calligeros <jcalligeros99@gmail.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
2026-02-22 15:48:30 -08:00
Guenter Roeck
5dd69b8649 hwmon: (macsmc) Fix regressions in Apple Silicon SMC hwmon driver
The recently added macsmc-hwmon driver contained several critical
bugs in its sensor population logic and float conversion routines.

Specifically:
- The voltage sensor population loop used the wrong prefix ("volt-"
  instead of "voltage-") and incorrectly assigned sensors to the
  temperature sensor array (hwmon->temp.sensors) instead of the
  voltage sensor array (hwmon->volt.sensors). This would lead to
  out-of-bounds memory access or data corruption when both temperature
  and voltage sensors were present.
- The float conversion in macsmc_hwmon_write_f32() had flawed exponent
  logic for values >= 2^24 and lacked masking for the mantissa, which
  could lead to incorrect values being written to the SMC.

Fix these issues to ensure correct sensor registration and reliable
manual fan control.

Confirm that the reported overflow in FIELD_PREP is fixed by declaring
macsmc_hwmon_write_f32() as __always_inline for a compile test.

Fixes: 785205fd81 ("hwmon: Add Apple Silicon SMC hwmon driver")
Reported-by: Nathan Chancellor <nathan@kernel.org>
Closes: https://lore.kernel.org/linux-hwmon/20260119195817.GA1035354@ax162/
Cc: James Calligeros <jcalligeros99@gmail.com>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Neal Gompa <neal@gompa.dev>
Cc: Janne Grunau <j@jannau.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Link: https://lore.kernel.org/r/20260129175112.3751907-2-linux@roeck-us.net
Reviewed-by: James Calligeros <jcalligeros99@gmail.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
2026-02-22 15:48:30 -08:00
Andy Shevchenko
2692c614f8 device property: Allow secondary lookup in fwnode_get_next_child_node()
When device_get_child_node_count() got split to the fwnode and device
respective APIs, the fwnode didn't inherit the ability to traverse over
the secondary fwnode. Hence any user, that switches from device to fwnode
API misses this feature. In particular, this was revealed by the commit
1490cbb9db ("device property: Split fwnode_get_child_node_count()")
that effectively broke the GPIO enumeration on Intel Galileo boards.
Fix this by moving the secondary lookup from device to fwnode API.

Note, in general no device_*() API should go into the depth of the fwnode
implementation.

Fixes: 114dbb4fa7 ("drivers property: When no children in primary, try secondary")
Cc: stable@vger.kernel.org
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Rafael J. Wysocki (Intel) <rafael@kernel.org>
Reviewed-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Link: https://patch.msgid.link/20260210135822.47335-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
2026-02-23 00:10:31 +01:00
Henrique Carvalho
663c28469d smb: client: fix cifs_pick_channel when channels are equally loaded
cifs_pick_channel uses (start % chan_count) when channels are equally
loaded, but that can return a channel that failed the eligibility
checks.

Drop the fallback and return the scan-selected channel instead. If none
is eligible, keep the existing behavior of using the primary channel.

Signed-off-by: Henrique Carvalho <henrique.carvalho@suse.com>
Acked-by: Paulo Alcantara (Red Hat) <pc@manguebit.org>
Acked-by: Meetakshi Setiya <msetiya@microsoft.com>
Reviewed-by: Shyam Prasad N <sprasad@microsoft.com>
Cc: stable@vger.kernel.org
Signed-off-by: Steve French <stfrench@microsoft.com>
2026-02-22 16:52:50 -06:00
Shawn Lin
24ed11ee5b soc: rockchip: grf: Add missing of_node_put() when returning
Fix the smatch checking:
drivers/soc/rockchip/grf.c:249 rockchip_grf_init()
warn: inconsistent refcounting 'np->kobj.kref.refcount.refs.counter':

Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
Fixes: 75fb63ae03 ("soc: rockchip: grf: Support multiple grf to be handled")
Closes: https://lore.kernel.org/all/aYXvgTcUJWQL2can@stanley.mountain/
Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
Link: https://patch.msgid.link/1770814957-17762-1-git-send-email-shawn.lin@rock-chips.com
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
2026-02-22 23:27:20 +01:00
Marc Zyngier
08f97454b7 KVM: arm64: Fix protected mode handling of pages larger than 4kB
Since 3669ddd8fa ("KVM: arm64: Add a range to pkvm_mappings"),
pKVM tracks the memory that has been mapped into a guest in a
side data structure. Crucially, it uses it to find out whether
a page has already been mapped, and therefore refuses to map it
twice. So far, so good.

However, this very patch completely breaks non-4kB page support,
with guests being unable to boot. The most obvious symptom is that
we take the same fault repeatedly, and not making forward progress.
A quick investigation shows that this is because of the above
rejection code.

As it turns out, there are multiple issues at play:

- while the HPFAR_EL2 register gives you the faulting IPA minus
  the bottom 12 bits, it will still give you the extra bits that
  are part of the page offset for anything larger than 4kB,
  even for a level-3 mapping

- pkvm_pgtable_stage2_map() assumes that the address passed as
  a parameter is aligned to the size of the intended mapping

- the faulting address is only aligned for a non-page mapping

When the planets are suitably aligned (pun intended), the guest
faults on a page by accessing it past the bottom 4kB, and extra bits
get set in the HPFAR_EL2 register. If this results in a page mapping
(which is likely with large granule sizes), nothing aligns it further
down, and pkvm_mapping_iter_first() finds an intersection that
doesn't really exist. We assume this is a spurious fault and return
-EAGAIN. And again...

This doesn't hit outside of the protected code, as the page table
code always aligns the IPA down to a page boundary, hiding the issue
for everyone else.

Fix it by always forcing the alignment on vma_pagesize, irrespective
of the value of vma_pagesize.

Fixes: 3669ddd8fa ("KVM: arm64: Add a range to pkvm_mappings")
Reviewed-by: Fuad Tabba <tabba@google.com>
Tested-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://https://patch.msgid.link/20260222141000.3084258-1-maz@kernel.org
Cc: stable@vger.kernel.org
2026-02-22 18:56:17 +00:00
Sven Eckelmann
cfc83a3c71 batman-adv: Avoid double-rtnl_lock ELP metric worker
batadv_v_elp_get_throughput() might be called when the RTNL lock is already
held. This could be problematic when the work queue item is cancelled via
cancel_delayed_work_sync() in batadv_v_elp_iface_disable(). In this case,
an rtnl_lock() would cause a deadlock.

To avoid this, rtnl_trylock() was used in this function to skip the
retrieval of the ethtool information in case the RTNL lock was already
held.

But for cfg80211 interfaces, batadv_get_real_netdev() was called - which
also uses rtnl_lock(). The approach for __ethtool_get_link_ksettings() must
also be used instead and the lockless version __batadv_get_real_netdev()
has to be called.

Cc: stable@vger.kernel.org
Fixes: 8c8ecc98f5 ("batman-adv: Drop unmanaged ELP metric worker")
Reported-by: Christian Schmidbauer <github@grische.xyz>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Tested-by: Sören Skaarup <freifunk_nordm4nn@gmx.de>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
2026-02-21 13:01:55 +01:00
Tomasz Pakuła
97d5c8f5c0 HID: pidff: Fix condition effect bit clearing
As reported by MPDarkGuy on discord, NULL pointer dereferences were
happening because not all the conditional effects bits were cleared.

Properly clear all conditional effect bits from ffbit

Fixes: 7f3d7bc0df ("HID: pidff: Better quirk assigment when searching for fields")
Cc: stable@vger.kernel.org # 6.18.x
Signed-off-by: Tomasz Pakuła <tomasz.pakula.oficjalny@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
2026-02-21 10:47:12 +01:00
Ralf Lici
d4f687fbbc ovpn: tcp - fix packet extraction from stream
When processing TCP stream data in ovpn_tcp_recv, we receive large
cloned skbs from __strp_rcv that may contain multiple coalesced packets.
The current implementation has two bugs:

1. Header offset overflow: Using pskb_pull with large offsets on
   coalesced skbs causes skb->data - skb->head to exceed the u16 storage
   of skb->network_header. This causes skb_reset_network_header to fail
   on the inner decapsulated packet, resulting in packet drops.

2. Unaligned protocol headers: Extracting packets from arbitrary
   positions within the coalesced TCP stream provides no alignment
   guarantees for the packet data causing performance penalties on
   architectures without efficient unaligned access. Additionally,
   openvpn's 2-byte length prefix on TCP packets causes the subsequent
   4-byte opcode and packet ID fields to be inherently misaligned.

Fix both issues by allocating a new skb for each openvpn packet and
using skb_copy_bits to extract only the packet content into the new
buffer, skipping the 2-byte length prefix. Also, check the length before
invoking the function that performs the allocation to avoid creating an
invalid skb.

If the packet has to be forwarded to userspace the 2-byte prefix can be
pushed to the head safely, without misalignment.

As a side effect, this approach also avoids the expensive linearization
that pskb_pull triggers on cloned skbs with page fragments. In testing,
this resulted in TCP throughput improvements of up to 74%.

Fixes: 11851cbd60 ("ovpn: implement TCP transport")
Signed-off-by: Ralf Lici <ralf@mandelbit.com>
Signed-off-by: Antonio Quartulli <antonio@openvpn.net>
Reviewed-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2026-02-21 07:04:08 +00:00
Jakub Kicinski
3f1af48530 Merge branch 'bnxt_en-fix-rss-context-and-ntuple-filter-issues'
Michael Chan says:

====================
bnxt_en: Fix RSS context and ntuple filter issues

The first patch fixes the problem of ifup failing if one or more RSS
contexts were previously created.  The 2nd patch fixes ntuple filter
deletion errors in ifdown state.  The last patch adds self tests to
cover these failure cases.
====================

Link: https://patch.msgid.link/20260219185313.2682148-1-michael.chan@broadcom.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-20 16:34:53 -08:00
Pavan Chebbi
ce5a0f4612 selftests: drv-net: rss_ctx: test RSS contexts persist after ifdown/up
Add a test to verify that RSS contexts persist across interface
down/up along with their associated Ntuple filters. Another test
that creates contexts/rules keeping interface down and test their
persistence is also added.

Tested on bnxt_en:

 TAP version 13
 1..1
 # timeout set to 0
 # selftests: drivers/net/hw: rss_ctx.py
 # TAP version 13
 # 1..2
 # ok 1 rss_ctx.test_rss_context_persist_create_and_ifdown
 # ok 2 rss_ctx.test_rss_context_persist_ifdown_and_create # SKIP Create context not supported with interface down
 # # Totals: pass:1 fail:0 xfail:0 xpass:0 skip:1 error:0

Reviewed-by: Andy Gospodarek <andrew.gospodarek@broadcom.com>
Signed-off-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Link: https://patch.msgid.link/20260219185313.2682148-4-michael.chan@broadcom.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-20 16:34:47 -08:00
Pavan Chebbi
c1bbd9900d bnxt_en: Fix deleting of Ntuple filters
Ntuple filters can be deleted when the interface
is down. The current code blindly sends the filter
delete command to FW. When the interface is down, all
the VNICs are deleted in the FW. When the VNIC is
freed in the FW, all the associated filters are also
freed. We need not send the free command explicitly.
Sending such command will generate FW error in the
dmesg.

In order to fix this, we can safely return from
bnxt_hwrm_cfa_ntuple_filter_free() when BNXT_STATE_OPEN
is not true which confirms the VNICs have been deleted.

Fixes: 8336a974f3 ("bnxt_en: Save user configured filters in a lookup list")
Suggested-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Link: https://patch.msgid.link/20260219185313.2682148-3-michael.chan@broadcom.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-20 16:34:47 -08:00
Pavan Chebbi
e123d9302d bnxt_en: Fix RSS context delete logic
We need to free the corresponding RSS context VNIC
in FW everytime an RSS context is deleted in driver.
Commit 667ac333db added a check to delete the VNIC
in FW only when netif_running() is true to help delete
RSS contexts with interface down.

Having that condition will make the driver leak VNICs
in FW whenever close() happens with active RSS contexts.
On the subsequent open(), as part of RSS context restoration,
we will end up trying to create extra VNICs for which we
did not make any reservation. FW can fail this request,
thereby making us lose active RSS contexts.

Suppose an RSS context is deleted already and we try to
process a delete request again, then the HWRM functions
will check for validity of the request and they simply
return if the resource is already freed. So, even for
delete-when-down cases, netif_running() check is not
necessary.

Remove the netif_running() condition check when deleting
an RSS context.

Reported-by: Jakub Kicinski <kicinski@meta.com>
Fixes: 667ac333db ("eth: bnxt: allow deleting RSS contexts when the device is down")
Reviewed-by: Andy Gospodarek <andrew.gospodarek@broadcom.com>
Signed-off-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Link: https://patch.msgid.link/20260219185313.2682148-2-michael.chan@broadcom.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-20 16:34:47 -08:00
Kuniyuki Iwashima
470c7ca2b4 udplite: Fix null-ptr-deref in __udp_enqueue_schedule_skb().
syzbot reported null-ptr-deref of udp_sk(sk)->udp_prod_queue. [0]

Since the cited commit, udp_lib_init_sock() can fail, as can
udp_init_sock() and udpv6_init_sock().

Let's handle the error in udplite_sk_init() and udplitev6_sk_init().

[0]:
BUG: KASAN: null-ptr-deref in instrument_atomic_read include/linux/instrumented.h:82 [inline]
BUG: KASAN: null-ptr-deref in atomic_read include/linux/atomic/atomic-instrumented.h:32 [inline]
BUG: KASAN: null-ptr-deref in __udp_enqueue_schedule_skb+0x151/0x1480 net/ipv4/udp.c:1719
Read of size 4 at addr 0000000000000008 by task syz.2.18/2944

CPU: 1 UID: 0 PID: 2944 Comm: syz.2.18 Not tainted syzkaller #0 PREEMPTLAZY
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/25/2025
Call Trace:
 <IRQ>
 dump_stack_lvl+0xe8/0x150 lib/dump_stack.c:120
 kasan_report+0xa2/0xe0 mm/kasan/report.c:595
 check_region_inline mm/kasan/generic.c:-1 [inline]
 kasan_check_range+0x264/0x2c0 mm/kasan/generic.c:200
 instrument_atomic_read include/linux/instrumented.h:82 [inline]
 atomic_read include/linux/atomic/atomic-instrumented.h:32 [inline]
 __udp_enqueue_schedule_skb+0x151/0x1480 net/ipv4/udp.c:1719
 __udpv6_queue_rcv_skb net/ipv6/udp.c:795 [inline]
 udpv6_queue_rcv_one_skb+0xa2e/0x1ad0 net/ipv6/udp.c:906
 udp6_unicast_rcv_skb+0x227/0x380 net/ipv6/udp.c:1064
 ip6_protocol_deliver_rcu+0xe17/0x1540 net/ipv6/ip6_input.c:438
 ip6_input_finish+0x191/0x350 net/ipv6/ip6_input.c:489
 NF_HOOK+0x354/0x3f0 include/linux/netfilter.h:318
 ip6_input+0x16c/0x2b0 net/ipv6/ip6_input.c:500
 NF_HOOK+0x354/0x3f0 include/linux/netfilter.h:318
 __netif_receive_skb_one_core net/core/dev.c:6149 [inline]
 __netif_receive_skb+0xd3/0x370 net/core/dev.c:6262
 process_backlog+0x4d6/0x1160 net/core/dev.c:6614
 __napi_poll+0xae/0x320 net/core/dev.c:7678
 napi_poll net/core/dev.c:7741 [inline]
 net_rx_action+0x60d/0xdc0 net/core/dev.c:7893
 handle_softirqs+0x209/0x8d0 kernel/softirq.c:622
 do_softirq+0x52/0x90 kernel/softirq.c:523
 </IRQ>
 <TASK>
 __local_bh_enable_ip+0xe7/0x120 kernel/softirq.c:450
 local_bh_enable include/linux/bottom_half.h:33 [inline]
 rcu_read_unlock_bh include/linux/rcupdate.h:924 [inline]
 __dev_queue_xmit+0x109c/0x2dc0 net/core/dev.c:4856
 __ip6_finish_output net/ipv6/ip6_output.c:-1 [inline]
 ip6_finish_output+0x158/0x4e0 net/ipv6/ip6_output.c:219
 NF_HOOK_COND include/linux/netfilter.h:307 [inline]
 ip6_output+0x342/0x580 net/ipv6/ip6_output.c:246
 ip6_send_skb+0x1d7/0x3c0 net/ipv6/ip6_output.c:1984
 udp_v6_send_skb+0x9a5/0x1770 net/ipv6/udp.c:1442
 udp_v6_push_pending_frames+0xa2/0x140 net/ipv6/udp.c:1469
 udpv6_sendmsg+0xfe0/0x2830 net/ipv6/udp.c:1759
 sock_sendmsg_nosec net/socket.c:727 [inline]
 __sock_sendmsg+0xe5/0x270 net/socket.c:742
 __sys_sendto+0x3eb/0x580 net/socket.c:2206
 __do_sys_sendto net/socket.c:2213 [inline]
 __se_sys_sendto net/socket.c:2209 [inline]
 __x64_sys_sendto+0xde/0x100 net/socket.c:2209
 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
 do_syscall_64+0xd2/0xf20 arch/x86/entry/syscall_64.c:94
 entry_SYSCALL_64_after_hwframe+0x76/0x7e
RIP: 0033:0x7f67b4d9c629
Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 e8 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007f67b5c98028 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
RAX: ffffffffffffffda RBX: 00007f67b5015fa0 RCX: 00007f67b4d9c629
RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000003
RBP: 00007f67b4e32b39 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000040000 R11: 0000000000000246 R12: 0000000000000000
R13: 00007f67b5016038 R14: 00007f67b5015fa0 R15: 00007ffe3cb66dd8
 </TASK>

Fixes: b650bf0977 ("udp: remove busylock and add per NUMA queues")
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20260219173142.310741-1-kuniyu@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-20 16:14:10 -08:00
Linus Walleij
594163ea88 net: ethernet: xscale: Check for PTP support properly
In ixp4xx_get_ts_info() ixp46x_ptp_find() is called
unconditionally despite this feature only existing on
ixp46x, leading to the following splat from tcpdump:

root@OpenWrt:~# tcpdump -vv -X -i eth0
(...)
Unable to handle kernel NULL pointer dereference at virtual address
  00000238 when read
(...)
Call trace:
 ptp_clock_index from ixp46x_ptp_find+0x1c/0x38
 ixp46x_ptp_find from ixp4xx_get_ts_info+0x4c/0x64
 ixp4xx_get_ts_info from __ethtool_get_ts_info+0x90/0x108
 __ethtool_get_ts_info from __dev_ethtool+0xa00/0x2648
 __dev_ethtool from dev_ethtool+0x160/0x234
 dev_ethtool from dev_ioctl+0x2cc/0x460
 dev_ioctl from sock_ioctl+0x1ec/0x524
 sock_ioctl from sys_ioctl+0x51c/0xa94
 sys_ioctl from ret_fast_syscall+0x0/0x44
 (...)
Segmentation fault

Check for ixp46x in ixp46x_ptp_find() before trying to set up
PTP to avoid this.

To avoid altering the returned error code from ixp4xx_hwtstamp_set()
which before this patch was -EOPNOTSUPP, we return -EOPNOTSUPP
from ixp4xx_hwtstamp_set() if ixp46x_ptp_find() fails no matter
the error code. The helper function ixp46x_ptp_find() helper
returns -ENODEV.

Fixes: 9055a2f591 ("ixp4xx_eth: make ptp support a platform driver")
Signed-off-by: Linus Walleij <linusw@kernel.org>
Link: https://patch.msgid.link/20260219-ixp4xx-fix-ethernet-v3-1-f235ccc3cd46@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-20 16:10:24 -08:00
Dmitry Torokhov
2bb995e615 net: phy: qcom: qca807x: normalize return value of gpio_get
The GPIO get callback is expected to return 0 or 1 (or a negative error
code). Ensure that the value returned by qca807x_gpio_get() is
normalized to the [0, 1] range.

Fixes: 86ef402d80 ("gpiolib: sanitize the return value of gpio_chip::get()")
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Reviewed-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
Reviewed-by: Linus Walleij <linusw@kernel.org>
Link: https://patch.msgid.link/aZZeyr2ysqqk2GqA@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-20 16:09:07 -08:00
Jakub Kicinski
0aebd81fa8 Merge tag 'ipsec-2026-02-20' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec
Steffen Klassert says:

====================
pull request (net): ipsec 2026-02-20

1) Check the value of ipv6_dev_get_saddr() to fix an
   uninitialized saddr in xfrm6_get_saddr().
   From Jiayuan Chen.

2) Skip the templates check for packet offload in tunnel
   mode. Is was already done by the hardware and causes
   an unexpected XfrmInTmplMismatch increase.
   From Leon Romanovsky.

3) Fix a unregister_netdevice stall due to not dropped
   refcounts by always flushing xfrm state and policy
   on a NETDEV_UNREGISTER event.
   From Tetsuo Handa.

* tag 'ipsec-2026-02-20' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec:
  xfrm: always flush state and policy upon NETDEV_UNREGISTER event
  xfrm: skip templates check for packet offload tunnel mode
  xfrm6: fix uninitialized saddr in xfrm6_get_saddr()
====================

Link: https://patch.msgid.link/20260220094133.14219-1-steffen.klassert@secunet.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-20 15:57:55 -08:00
Satish Kharat
9b8eeccd71 MAINTAINERS: update enic and usnic maintainers
Remove Christian Benvenuti from the enic/usnic maintainer lists because he
is no longer working on the drivers and the email address is no longer valid.
Keep Satish Kharat as the enic maintainer and the usnic co-maintainer.

Signed-off-by: Satish Kharat <satishkh@cisco.com>
Acked-by: Leon Romanovsky <leon@kernel.org>
Link: https://patch.msgid.link/20260219-enic-maintainers-update-v2-v1-1-c58aa11c2ea8@cisco.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-20 15:56:26 -08:00
Christoph Böhmwalder
0d195d3b20 drbd: fix null-pointer dereference on local read error
In drbd_request_endio(), READ_COMPLETED_WITH_ERROR is passed to
__req_mod() with a NULL peer_device:

  __req_mod(req, what, NULL, &m);

The READ_COMPLETED_WITH_ERROR handler then unconditionally passes this
NULL peer_device to drbd_set_out_of_sync(), which dereferences it,
causing a null-pointer dereference.

Fix this by obtaining the peer_device via first_peer_device(device),
matching how drbd_req_destroy() handles the same situation.

Cc: stable@vger.kernel.org
Reported-by: Tuo Li <islituo@gmail.com>
Link: https://lore.kernel.org/linux-block/20260104165355.151864-1-islituo@gmail.com
Signed-off-by: Christoph Böhmwalder <christoph.boehmwalder@linbit.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2026-02-20 07:09:06 -07:00
Osama Abdelkader
803ec1faf7 drm/bridge: samsung-dsim: Fix memory leak in error path
In samsung_dsim_host_attach(), drm_bridge_add() is called to add the
bridge. However, if samsung_dsim_register_te_irq() or
pdata->host_ops->attach() fails afterwards, the function returns
without removing the bridge, causing a memory leak.

Fix this by adding proper error handling with goto labels to ensure
drm_bridge_remove() is called in all error paths. Also ensure that
samsung_dsim_unregister_te_irq() is called if the attach operation
fails after the TE IRQ has been registered.

samsung_dsim_unregister_te_irq() function is moved without changes
to be before samsung_dsim_host_attach() to avoid forward declaration.

Fixes: e7447128ca ("drm: bridge: Generalize Exynos-DSI driver into a Samsung DSIM bridge")
Cc: stable@vger.kernel.org
Signed-off-by: Osama Abdelkader <osama.abdelkader@gmail.com>
Reviewed-by: Luca Ceresoli <luca.ceresoli@bootlin.com>
Link: https://patch.msgid.link/20260209184115.10937-1-osama.abdelkader@gmail.com
Signed-off-by: Luca Ceresoli <luca.ceresoli@bootlin.com>
2026-02-20 13:56:14 +01:00
Chen Ni
496daa2759 drm/bridge: synopsys: dw-dp: Check return value of devm_drm_bridge_add() in dw_dp_bind()
Return the value of devm_drm_bridge_add() in order to propagate the
error properly, if it fails due to resource allocation failure or bridge
registration failure.

This ensures that the bind function fails safely rather than proceeding
with a potentially incomplete bridge setup.

Fixes: b726970486 ("drm/bridge: synopsys: dw-dp: add bridge before attaching")
Signed-off-by: Chen Ni <nichen@iscas.ac.cn>
Reviewed-by: Andy Yan <andyshrk@163.com>
Reviewed-by: Luca Ceresoli <luca.ceresoli@bootlin.com>
Link: https://patch.msgid.link/20260206040621.4095517-1-nichen@iscas.ac.cn
Signed-off-by: Luca Ceresoli <luca.ceresoli@bootlin.com>
2026-02-20 13:47:09 +01:00
Chen Ni
c5f8658f97 drm/imx: parallel-display: check return value of devm_drm_bridge_add() in imx_pd_probe()
Return the value of devm_drm_bridge_add() in order to propagate the
error properly, if it fails due to resource allocation failure or bridge
registration failure.

This ensures that the probe function fails safely rather than proceeding
with a potentially incomplete bridge setup.

Fixes: bf7e97910b ("drm/imx: parallel-display: add the bridge before attaching it")
Signed-off-by: Chen Ni <nichen@iscas.ac.cn>
Reviewed-by: Luca Ceresoli <luca.ceresoli@bootlin.com>
Link: https://patch.msgid.link/20260204090629.2209542-1-nichen@iscas.ac.cn
Signed-off-by: Luca Ceresoli <luca.ceresoli@bootlin.com>
2026-02-20 13:37:03 +01:00
Ethan Tidmore
f6a495484a proc: Fix pointer error dereference
The function try_lookup_noperm() can return an error pointer. Add check
for error pointer.

Detected by Smatch:
fs/proc/base.c:2148 proc_fill_cache() error:
'child' dereferencing possible ERR_PTR()

Fixes: 1df98b8bbc ("proc_fill_cache(): clean up, get rid of pointless find_inode_number() use")
Signed-off-by: Ethan Tidmore <ethantidmore06@gmail.com>
Link: https://patch.msgid.link/20260219221001.1117135-1-ethantidmore06@gmail.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
2026-02-20 11:21:16 +01:00
Vladimir Oltean
bfd264fbbb net: dsa: sja1105: protect link replay helpers against NULL phylink instance
There is a crash when unbinding the sja1105 driver under special
circumstances:

Unable to handle kernel NULL pointer dereference at virtual address 0000000000000030
Call trace:
phylink_run_resolve_and_disable+0x10/0x90
sja1105_static_config_reload+0xc0/0x410
sja1105_vlan_filtering+0x100/0x140
dsa_port_vlan_filtering+0x13c/0x368
dsa_port_reset_vlan_filtering.isra.0+0xe8/0x198
dsa_port_bridge_leave+0x130/0x248
dsa_user_changeupper.part.0+0x74/0x158
dsa_user_netdevice_event+0x50c/0xa50
notifier_call_chain+0x78/0x148
raw_notifier_call_chain+0x20/0x38
call_netdevice_notifiers_info+0x58/0xa8
__netdev_upper_dev_unlink+0xac/0x220
netdev_upper_dev_unlink+0x38/0x70
del_nbp+0x1a4/0x320
br_del_if+0x3c/0xd8
br_device_event+0xf8/0x2d8
notifier_call_chain+0x78/0x148
raw_notifier_call_chain+0x20/0x38
call_netdevice_notifiers_info+0x58/0xa8
unregister_netdevice_many_notify+0x314/0x848
unregister_netdevice_queue+0xe8/0xf8
dsa_user_destroy+0x50/0xa8
dsa_port_teardown+0x80/0x98
dsa_switch_teardown_ports+0x4c/0xb8
dsa_switch_deinit+0x94/0xb8
dsa_switch_put_tree+0x2c/0xc0
dsa_unregister_switch+0x38/0x60
sja1105_remove+0x24/0x40
spi_remove+0x38/0x60
device_remove+0x54/0x90
device_release_driver_internal+0x1d4/0x230
device_driver_detach+0x20/0x38
unbind_store+0xbc/0xc8
---[ end trace 0000000000000000 ]---

which requires an explanation.

When a port offloads a bridge, the switch must be reset to change
the VLAN awareness state (the SJA1105_VLAN_FILTERING reason for
sja1105_static_config_reload()). When the port leaves a VLAN-aware
bridge, it must also be reset for the same reason: it is returning
to operation as a VLAN-unaware standalone port.

sja1105_static_config_reload() triggers the phylink link replay helpers.

Because sja1105 is a switch, it has multiple user ports. During unbind,
ports are torn down one by one in dsa_switch_teardown_ports() ->
dsa_port_teardown() -> dsa_user_destroy().

The crash happens when the first user port is not part of the VLAN-aware
bridge, but any other user port is.

Tearing down the first user port causes phylink_destroy() to be called
on dp->pl, and this pointer to be set to NULL. Then, when the second
user port is torn down, this was offloading a VLAN-aware bridge port, so
indirectly it will trigger sja1105_static_config_reload().

The latter function iterates using dsa_switch_for_each_available_port(),
and unconditionally dereferences dp->pl, including for the
aforementioned torn down previous port, and passes that to phylink.
This is where the NULL pointer is coming from.

There are multiple levels at which this could be avoided:
- add an "if (dp->pl)" in sja1105_static_config_reload()
- make the phylink replay helpers NULL-tolerant
- mark ports as DSA_PORT_TYPE_UNUSED after dsa_port_phylink_destroy()
  has run, such that subsequent dsa_switch_for_each_available_port()
  iterations skip them
- disconnect the entire switch at once from switchdev and
  NETDEV_CHANGEUPPER events while unbinding, not just port by port,
  likely using a "ds->unbinding = true" mechanism or similar

however options 3 and 4 are quite heavy and might have side effects.
Although 2 allows to keep the driver simpler, the phylink API it not
NULL-tolerant in general and is not responsible for the NULL pointer
(this is something done by dsa_port_phylink_destroy()). So I went
with 1.

Functionally speaking, skipping the replay helpers for ports without
a phylink instance is fine, because that only happens during driver
removal (an operation which cannot be cancelled). The ports are not
required to work (although they probably still will - untested
assumption - as long as we don't overwrite the last port speed with
SJA1105_SPEED_AUTO).

Fixes: 0b2edc531e ("net: dsa: sja1105: let phylink help with the replay of link callbacks")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://patch.msgid.link/20260218160551.194782-1-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-19 14:55:57 -08:00
Martin Pålsson
f1e2f0ce70 net: usb: lan78xx: scan all MDIO addresses on LAN7801
The LAN7801 is designed exclusively for external PHYs (unlike the
LAN7800/LAN7850 which have internal PHYs), but lan78xx_mdio_init()
restricts PHY scanning to MDIO addresses 0-7 by setting phy_mask to
~(0xFF). This prevents discovery of external PHYs wired to addresses
outside that range.

One such case is the DP83TC814 100BASE-T1 PHY, which is typically
configured at MDIO address 10 via PHYAD bootstrap pins and goes
undetected with the current mask.

Remove the restrictive phy_mask assignment for the LAN7801 so that the
default mask of 0 applies, allowing all 32 MDIO addresses to be
scanned during bus registration.

Fixes: 02dc1f3d61 ("lan78xx: add LAN7801 MAC only support")
Signed-off-by: Martin Pålsson <martin@poleshift.se>
Link: https://patch.msgid.link/0110019c6f388aff-98d99cf0-4425-4fff-b16b-dea5ad8fafe0-000000@eu-north-1.amazonses.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-19 14:30:50 -08:00
Ziyi Guo
64868f5eca net: usb: kaweth: remove TX queue manipulation in kaweth_set_rx_mode
kaweth_set_rx_mode(), the ndo_set_rx_mode callback, calls
netif_stop_queue() and netif_wake_queue(). These are TX queue flow
control functions unrelated to RX multicast configuration.

The premature netif_wake_queue() can re-enable TX while tx_urb is still
in-flight, leading to a double usb_submit_urb() on the same URB:

kaweth_start_xmit() {
    netif_stop_queue();
    usb_submit_urb(kaweth->tx_urb);
}

kaweth_set_rx_mode() {
    netif_stop_queue();
    netif_wake_queue();             // wakes TX queue before URB is done
}

kaweth_start_xmit() {
    netif_stop_queue();
    usb_submit_urb(kaweth->tx_urb); // URB submitted while active
}

This triggers the WARN in usb_submit_urb():

  "URB submitted while active"

This is a similar class of bug fixed in rtl8150 by

- commit 958baf5eae ("net: usb: Remove disruptive netif_wake_queue in rtl8150_set_multicast").

Also kaweth_set_rx_mode() is already functionally broken, the
real set_rx_mode action is performed by kaweth_async_set_rx_mode(),
which in turn is not a no-op only at ndo_open() time.

Suggested-by: Paolo Abeni <pabeni@redhat.com>
Fixes: 1da177e4c3 ("Linux-2.6.12-rc2")
Signed-off-by: Ziyi Guo <n7l8m4@u.northwestern.edu>
Link: https://patch.msgid.link/20260217175012.1234494-1-n7l8m4@u.northwestern.edu
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-19 14:28:59 -08:00
Hyunwoo Kim
e1512c1db9 espintcp: Fix race condition in espintcp_close()
This issue was discovered during a code audit.

After cancel_work_sync() is called from espintcp_close(),
espintcp_tx_work() can still be scheduled from paths such as
the Delayed ACK handler or ksoftirqd.
As a result, the espintcp_tx_work() worker may dereference a
freed espintcp ctx or sk.

The following is a simple race scenario:

           cpu0                             cpu1

  espintcp_close()
    cancel_work_sync(&ctx->work);
                                     espintcp_write_space()
                                       schedule_work(&ctx->work);

To prevent this race condition, cancel_work_sync() is
replaced with disable_work_sync().

Fixes: e27cca96cd ("xfrm: add espintcp (RFC 8229)")
Signed-off-by: Hyunwoo Kim <imv4bel@gmail.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/aZSie7rEdh9Nu0eM@v4bel
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-19 14:27:40 -08:00
Eric Dumazet
f891007ab1 psp: use sk->sk_hash in psp_write_headers()
udp_flow_src_port() is indirectly using sk->sk_txhash as a base,
because __tcp_transmit_skb() uses skb_set_hash_from_sk().

This is problematic because this field can change over the
lifetime of a TCP flow, thanks to calls to sk_rethink_txhash().

Problem is that some NIC might (ab)use the PSP UDP source port in their
RSS computation, and PSP packets for a given flow could jump
from one queue to another.

In order to avoid surprises, it is safer to let Protective Load
Balancing (PLB) get its entropy from the IPv6 flowlabel,
and change psp_write_headers() to use sk->sk_hash which
does not change for the duration of the flow.

We might add a sysctl to select the behavior, if there
is a need for it.

Fixes: fc72451574 ("psp: provide encapsulation helper for drivers")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-By: Daniel Zahka <daniel.zahka@gmail.com>
Link: https://patch.msgid.link/20260218141337.999945-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-19 14:04:23 -08:00
Eric Dumazet
858d2a4f67 tcp: fix potential race in tcp_v6_syn_recv_sock()
Code in tcp_v6_syn_recv_sock() after the call to tcp_v4_syn_recv_sock()
is done too late.

After tcp_v4_syn_recv_sock(), the child socket is already visible
from TCP ehash table and other cpus might use it.

Since newinet->pinet6 is still pointing to the listener ipv6_pinfo
bad things can happen as syzbot found.

Move the problematic code in tcp_v6_mapped_child_init()
and call this new helper from tcp_v4_syn_recv_sock() before
the ehash insertion.

This allows the removal of one tcp_sync_mss(), since
tcp_v4_syn_recv_sock() will call it with the correct
context.

Fixes: 1da177e4c3 ("Linux-2.6.12-rc2")
Reported-by: syzbot+937b5bbb6a815b3e5d0b@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/netdev/69949275.050a0220.2eeac1.0145.GAE@google.com/
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20260217161205.2079883-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-19 14:02:19 -08:00
Greg Kroah-Hartman
ecfa6f3449 HID: Add HID_CLAIMED_INPUT guards in raw_event callbacks missing them
In commit 2ff5baa9b5 ("HID: appleir: Fix potential NULL dereference at
raw event handle"), we handle the fact that raw event callbacks
can happen even for a HID device that has not been "claimed" causing a
crash if a broken device were attempted to be connected to the system.

Fix up the remaining in-tree HID drivers that forgot to add this same
check to resolve the same issue.

Cc: Jiri Kosina <jikos@kernel.org>
Cc: Benjamin Tissoires <bentiss@kernel.org>
Cc: Bastien Nocera <hadess@hadess.net>
Cc: linux-input@vger.kernel.org
Cc: stable <stable@kernel.org>
Assisted-by: gkh_clanker_2000
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
2026-02-19 18:58:39 +01:00
Günther Noack
2bad24c177 HID: asus: avoid memory leak in asus_report_fixup()
The asus_report_fixup() function was returning a newly allocated
kmemdup()-allocated buffer, but never freeing it.  Switch to
devm_kzalloc() to ensure the memory is managed and freed automatically
when the device is removed.

The caller of report_fixup() does not take ownership of the returned
pointer, but it is permitted to return a pointer whose lifetime is at
least that of the input buffer.

Also fix a harmless out-of-bounds read by copying only the original
descriptor size.

Assisted-by: Gemini-CLI:Google Gemini 3
Signed-off-by: Günther Noack <gnoack@google.com>
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
2026-02-19 18:57:38 +01:00
Günther Noack
91e8c6e601 HID: magicmouse: avoid memory leak in magicmouse_report_fixup()
The magicmouse_report_fixup() function was returning a
newly kmemdup()-allocated buffer, but never freeing it.

The caller of report_fixup() does not take ownership of the returned
pointer, but it *is* permitted to return a sub-portion of the input
rdesc, whose lifetime is managed by the caller.

Assisted-by: Gemini-CLI:Google Gemini 3
Signed-off-by: Günther Noack <gnoack@google.com>
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
2026-02-19 18:57:38 +01:00
Günther Noack
239c15116d HID: apple: avoid memory leak in apple_report_fixup()
The apple_report_fixup() function was returning a
newly kmemdup()-allocated buffer, but never freeing it.

The caller of report_fixup() does not take ownership of the returned
pointer, but it *is* permitted to return a sub-portion of the input
rdesc, whose lifetime is managed by the caller.

Assisted-by: Gemini-CLI:Google Gemini 3
Signed-off-by: Günther Noack <gnoack@google.com>
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
2026-02-19 18:57:38 +01:00
Günther Noack
6b3e458806 HID: Document memory allocation properties of report_fixup()
The memory pointer returned by the report_fixup() hook does not get
freed by the caller.  Instead, report_fixup() must return (in return
value and *rsize) a memory buffer with at least the same lifetime as
the input buffer (defined by rdesc and original *rsize).

This is usually achieved using one of the following techniques:

* Returning a pointer and size to a sub-portion of the input buffer
* Returning a pointer to a static buffer
* Allocating a buffer with a devm_*() function,
  which will automatically get freed when the device is removed.

Signed-off-by: Günther Noack <gnoack@google.com>
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
2026-02-19 18:57:38 +01:00
Thorsten Blum
81b1f046ff drbd: Replace deprecated strcpy with strscpy
strcpy() has been deprecated [1] because it performs no bounds checking
on the destination buffer, which can lead to buffer overflows. Replace
it with the safer strscpy().  No functional changes.

Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#strcpy [1]
Signed-off-by: Thorsten Blum <thorsten.blum@linux.dev>
Reviewed-by: Christoph Böhmwalder <christoph.boehmwalder@linbit.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2026-02-19 08:22:02 -07:00
Lars Ellenberg
ab140365fb drbd: fix "LOGIC BUG" in drbd_al_begin_io_nonblock()
Even though we check that we "should" be able to do lc_get_cumulative()
while holding the device->al_lock spinlock, it may still fail,
if some other code path decided to do lc_try_lock() with bad timing.

If that happened, we logged "LOGIC BUG for enr=...",
but still did not return an error.

The rest of the code now assumed that this request has references
for the relevant activity log extents.

The implcations are that during an active resync, mutual exclusivity of
resync versus application IO is not guaranteed. And a potential crash
at this point may not realizs that these extents could have been target
of in-flight IO and would need to be resynced just in case.

Also, once the request completes, it will give up activity log references it
does not even hold, which will trigger a BUG_ON(refcnt == 0) in lc_put().

Fix:

Do not crash the kernel for a condition that is harmless during normal
operation: also catch "e->refcnt == 0", not only "e == NULL"
when being noisy about "al_complete_io() called on inactive extent %u\n".

And do not try to be smart and "guess" whether something will work, then
be surprised when it does not.
Deal with the fact that it may or may not work.  If it does not, remember a
possible "partially in activity log" state (only possible for requests that
cross extent boundaries), and return an error code from
drbd_al_begin_io_nonblock().

A latter call for the same request will then resume from where we left off.

Cc: stable@vger.kernel.org
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Christoph Böhmwalder <christoph.boehmwalder@linbit.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2026-02-19 08:21:53 -07:00
Piotr Mazek
023cd6d90f ACPI: PM: Save NVS memory on Lenovo G70-35
[821d6f0359] prevented machines
produced later than 2012 from saving NVS region to accelerate S3.

Despite being made after 2012, Lenovo G70-35 still needs NVS memory
saving during S3. A quirk is introduced for this platform.

Signed-off-by: Piotr Mazek <pmazek@outlook.com>
[ rjw: Subject adjustment ]
Link: https://patch.msgid.link/GV2PPF3CD5B63CC2442EE3F76F8443EAD90D499A@GV2PPF3CD5B63CC.EURP251.PROD.OUTLOOK.COM
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2026-02-19 11:54:52 +01:00
Christian Brauner
768f4dc4cc Merge patch series "fserror: bug fixes"
This contains two fixes for the new fserror infrastructure.

* patches from https://patch.msgid.link/177148129514.716249.10889194125495783768.stgit@frogsfrogsfrogs:
  fserror: fix lockdep complaint when igrabbing inode
  fsnotify: drop unused helper

Link: https://patch.msgid.link/177148129514.716249.10889194125495783768.stgit@frogsfrogsfrogs
Signed-off-by: Christian Brauner <brauner@kernel.org>
2026-02-19 09:12:13 +01:00
Darrick J. Wong
294f54f849 fserror: fix lockdep complaint when igrabbing inode
Christoph Hellwig reported a lockdep splat in generic/108:

 ================================
 WARNING: inconsistent lock state
 6.19.0+ #4827 Tainted: G                 N
 --------------------------------
 inconsistent {HARDIRQ-ON-W} -> {IN-HARDIRQ-W} usage.
 swapper/1/0 [HC1[1]:SC0[0]:HE0:SE1] takes:
 ffff88811ed1b140 (&sb->s_type->i_lock_key#33){?.+.}-{3:3}, at: igrab+0x1a/0xb0
 {HARDIRQ-ON-W} state was registered at:
   lock_acquire+0xca/0x2c0
   _raw_spin_lock+0x2e/0x40
   unlock_new_inode+0x2c/0xc0
   xfs_iget+0xcf4/0x1080
   xfs_trans_metafile_iget+0x3d/0x100
   xfs_metafile_iget+0x2b/0x50
   xfs_mount_setup_metadir+0x20/0x60
   xfs_mountfs+0x457/0xa60
   xfs_fs_fill_super+0x6b3/0xa90
   get_tree_bdev_flags+0x13c/0x1e0
   vfs_get_tree+0x27/0xe0
   vfs_cmd_create+0x54/0xe0
   __do_sys_fsconfig+0x309/0x620
   do_syscall_64+0x8b/0xf80
   entry_SYSCALL_64_after_hwframe+0x76/0x7e
 irq event stamp: 139080
 hardirqs last  enabled at (139079): [<ffffffff813a923c>] do_idle+0x1ec/0x270
 hardirqs last disabled at (139080): [<ffffffff828a8d09>] common_interrupt+0x19/0xe0
 softirqs last  enabled at (139032): [<ffffffff8134a853>] __irq_exit_rcu+0xc3/0x120
 softirqs last disabled at (139025): [<ffffffff8134a853>] __irq_exit_rcu+0xc3/0x120

 other info that might help us debug this:
  Possible unsafe locking scenario:

        CPU0
        ----
   lock(&sb->s_type->i_lock_key#33);
   <Interrupt>
     lock(&sb->s_type->i_lock_key#33);

  *** DEADLOCK ***

 1 lock held by swapper/1/0:
  #0: ffff8881052c81a0 (&vblk->vqs[i].lock){-.-.}-{3:3}, at: virtblk_done+0x4b/0x110

 stack backtrace:
 CPU: 1 UID: 0 PID: 0 Comm: swapper/1 Tainted: G                 N  6.19.0+ #4827 PREEMPT(full)
 Tainted: [N]=TEST
 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.17.0-0-gb52ca86e094d-prebuilt.qemu.org 04/01/2014
 Call Trace:
  <IRQ>
  dump_stack_lvl+0x5b/0x80
  print_usage_bug.part.0+0x22c/0x2c0
  mark_lock+0xa6f/0xe90
  __lock_acquire+0x10b6/0x25e0
  lock_acquire+0xca/0x2c0
  _raw_spin_lock+0x2e/0x40
  igrab+0x1a/0xb0
  fserror_report+0x135/0x260
  iomap_finish_ioend_buffered+0x170/0x210
  clone_endio+0x8f/0x1c0
  blk_update_request+0x1e4/0x4d0
  blk_mq_end_request+0x1b/0x100
  virtblk_done+0x6f/0x110
  vring_interrupt+0x59/0x80
  __handle_irq_event_percpu+0x8a/0x2e0
  handle_irq_event+0x33/0x70
  handle_edge_irq+0xdd/0x1e0
  __common_interrupt+0x6f/0x180
  common_interrupt+0xb7/0xe0
  </IRQ>

It looks like the concern here is that inode::i_lock is sometimes taken
in IRQ context, and sometimes it is held when going to IRQ context,
though it's a little difficult to tell since I think this is a kernel
from after the actual 6.19 release but before 7.0-rc1.

Either way, we don't need to take i_lock, because filesystems should
not report files to fserror if they're about to be freed or have not
yet been exposed to other threads, because the resulting fsnotify report
will be meaningless.

Therefore, bump inode::i_count directly and clarify the preconditions on
the inode being passed in.

Link: https://lore.kernel.org/linux-fsdevel/aY7BndIgQg3ci_6s@infradead.org/
Reported-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Link: https://patch.msgid.link/177148129564.716249.3069780698231701540.stgit@frogsfrogsfrogs
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
2026-02-19 09:12:08 +01:00
Darrick J. Wong
249013e673 fsnotify: drop unused helper
Remove this helper now that all users have been converted to
fserror_report_metadata as of 7.0-rc1.

Cc: jack@suse.cz
Cc: amir73il@gmail.com
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Link: https://patch.msgid.link/177148129543.716249.980530449513340111.stgit@frogsfrogsfrogs
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
2026-02-19 09:12:05 +01:00
Dan Carpenter
7be41fb00e accel: ethosu: Fix shift overflow in cmd_to_addr()
The "((cmd[0] & 0xff0000) << 16)" shift is zero.  This was intended
to be (((u64)cmd[0] & 0xff0000) << 16).  Move the cast to the correct
location.

Fixes: 5a5e9c0228 ("accel: Add Arm Ethos-U NPU driver")
Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
Link: https://patch.msgid.link/aQGmY64tWcwOGFP4@stanley.mountain
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
2026-02-18 16:28:08 -06:00
Al Viro
6c4b2243cb unshare: fix unshare_fs() handling
There's an unpleasant corner case in unshare(2), when we have a
CLONE_NEWNS in flags and current->fs hadn't been shared at all; in that
case copy_mnt_ns() gets passed current->fs instead of a private copy,
which causes interesting warts in proof of correctness]

> I guess if private means fs->users == 1, the condition could still be true.

Unfortunately, it's worse than just a convoluted proof of correctness.
Consider the case when we have CLONE_NEWCGROUP in addition to CLONE_NEWNS
(and current->fs->users == 1).

We pass current->fs to copy_mnt_ns(), all right.  Suppose it succeeds and
flips current->fs->{pwd,root} to corresponding locations in the new namespace.
Now we proceed to copy_cgroup_ns(), which fails (e.g. with -ENOMEM).
We call put_mnt_ns() on the namespace created by copy_mnt_ns(), it's
destroyed and its mount tree is dissolved, but...  current->fs->root and
current->fs->pwd are both left pointing to now detached mounts.

They are pinning those, so it's not a UAF, but it leaves the calling
process with unshare(2) failing with -ENOMEM _and_ leaving it with
pwd and root on detached isolated mounts.  The last part is clearly a bug.

There is other fun related to that mess (races with pivot_root(), including
the one between pivot_root() and fork(), of all things), but this one
is easy to isolate and fix - treat CLONE_NEWNS as "allocate a new
fs_struct even if it hadn't been shared in the first place".  Sure, we could
go for something like "if both CLONE_NEWNS *and* one of the things that might
end up failing after copy_mnt_ns() call in create_new_namespaces() are set,
force allocation of new fs_struct", but let's keep it simple - the cost
of copy_fs_struct() is trivial.

Another benefit is that copy_mnt_ns() with CLONE_NEWNS *always* gets
a freshly allocated fs_struct, yet to be attached to anything.  That
seriously simplifies the analysis...

FWIW, that bug had been there since the introduction of unshare(2) ;-/

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Link: https://patch.msgid.link/20260207082524.GE3183987@ZenIV
Tested-by: Waiman Long <longman@redhat.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
2026-02-18 14:04:51 +01:00
Jori Koolstra
ef0b64741a minix: Correct errno in minix_new_inode
The cases (!j || j > sbi->s_ninodes) can never occur unless the
filesystem is broken, so this should not return ENOSPC, but
EFSCORRUPTED.

Signed-off-by: Jori Koolstra <jkoolstra@xs4all.nl>
Link: https://patch.msgid.link/20251201122338.90568-1-jkoolstra@xs4all.nl
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
2026-02-18 14:04:42 +01:00
Christian Brauner
4a403d7aa9 namespace: fix proc mount iteration
The m->index isn't updated when m->show() overflows and retains its
value before the current mount causing a restart to start at the same
value. If that happens in short order to due a quickly expanding mount
table this would cause the same mount to be shown again and again.

Ensure that *pos always equals the mount id of the mount that was
returned by start/next. On restart after overflow mnt_find_id_at(*pos)
finds the exact mount. This should avoid duplicates, avoid skips and
should handle concurrent modification just fine.

Cc: <stable@vger.kernel.org>
Fixed: 2eea9ce431 ("mounts: keep list of mounts in an rbtree")
Link: https://patch.msgid.link/20260129-geleckt-treuhand-4bb940acacd9@brauner
Signed-off-by: Christian Brauner <brauner@kernel.org>
2026-02-18 14:04:42 +01:00
Christian Brauner
a41dbf5e00 mount: hold namespace_sem across copy in create_new_namespace()
Fix an oversight when creating a new mount namespace. If someone had the
bright idea to make the real rootfs a shared or dependent mount and it
is later copied the copy will become a peer of the old real rootfs mount
or a dependent mount of it. The namespace semaphore is dropped and we
use mount lock exact to lock the new real root mount. If that fails or
the subsequent do_loopback() fails we rely on the copy of the real root
mount to be cleaned up by path_put(). The problem is that this doesn't
deal with mount propagation and will leave the mounts linked in the
propagation lists.

When creating a new mount namespace create_new_namespace() first
acquires namespace_sem to clone the nullfs root, drops it, then
reacquires it via LOCK_MOUNT_EXACT which takes inode_lock first to
respect the inode_lock -> namespace_sem lock ordering. This
drop-and-reacquire pattern is fragile and was the source of the
propagation cleanup bug fixed in the preceding commit.

Extend lock_mount_exact() with a copy_mount mode that clones the mount
under the locks atomically. When copy_mount is true, path_overmounted()
is skipped since we're copying the mount, not mounting on top of it -
the nullfs root always has rootfs mounted on top so the check would
always fail. If clone_mnt() fails after get_mountpoint() has pinned the
mountpoint, __unlock_mount() is used to properly unpin the mountpoint
and release both locks.

This allows create_new_namespace() to use LOCK_MOUNT_EXACT_COPY which
takes inode_lock and namespace_sem once and holds them throughout the
clone and subsequent mount operations, eliminating the
drop-and-reacquire pattern entirely.

Reported-by: syzbot+a89f9434fb5a001ccd58@syzkaller.appspotmail.com
Fixes: 9b8a0ba682 ("mount: add OPEN_TREE_NAMESPACE") # mainline only
Link: https://lore.kernel.org/699047f6.050a0220.2757fb.0024.GAE@google.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
2026-02-18 14:04:42 +01:00
Karan Tilak Kumar
97af85787c scsi: snic: MAINTAINERS: Update snic maintainers
Update snic maintainers.

Signed-off-by: Karan Tilak Kumar <kartilak@cisco.com>
Link: https://patch.msgid.link/20260217204658.5465-1-kartilak@cisco.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2026-02-17 21:41:28 -05:00
Thomas Fourier
af3973e7b4 scsi: snic: Remove unused linkstatus
The (struct vnic_dev).linkstatus buffer is freed in svnic_dev_unregister()
and referenced in svnic_dev_link_status() but never alloc'd. This means
(struct vnic_dev).linkstatus is always null and the dealloc the reference
in svnic_dev_link_status() is dead code.

Signed-off-by: Thomas Fourier <fourier.thomas@gmail.com>
Acked-by: Karan Tilak Kumar <kartilak@cisco.com>
Link: https://patch.msgid.link/20260216141056.59429-2-fourier.thomas@gmail.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2026-02-17 21:37:24 -05:00
Salomon Dushimirimana
38353c26db scsi: pm8001: Fix use-after-free in pm8001_queue_command()
Commit e29c47fe89 ("scsi: pm8001: Simplify pm8001_task_exec()") refactors
pm8001_queue_command(), however it introduces a potential cause of a double
free scenario when it changes the function to return -ENODEV in case of phy
down/device gone state.

In this path, pm8001_queue_command() updates task status and calls
task_done to indicate to upper layer that the task has been handled.
However, this also frees the underlying SAS task. A -ENODEV is then
returned to the caller. When libsas sas_ata_qc_issue() receives this error
value, it assumes the task wasn't handled/queued by LLDD and proceeds to
clean up and free the task again, resulting in a double free.

Since pm8001_queue_command() handles the SAS task in this case, it should
return 0 to the caller indicating that the task has been handled.

Fixes: e29c47fe89 ("scsi: pm8001: Simplify pm8001_task_exec()")
Signed-off-by: Salomon Dushimirimana <salomondush@google.com>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Link: https://patch.msgid.link/20260213192806.439432-1-salomondush@google.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2026-02-17 21:35:43 -05:00
Ranjan Kumar
fa96392ebe scsi: mpi3mr: Add NULL checks when resetting request and reply queues
The driver encountered a crash during resource cleanup when the reply and
request queues were NULL due to freed memory.  This issue occurred when the
creation of reply or request queues failed, and the driver freed the memory
first, but attempted to mem set the content of the freed memory, leading to
a system crash.

Add NULL pointer checks for reply and request queues before accessing the
reply/request memory during cleanup

Signed-off-by: Ranjan Kumar <ranjan.kumar@broadcom.com>
Link: https://patch.msgid.link/20260212070026.30263-1-ranjan.kumar@broadcom.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2026-02-17 21:31:47 -05:00
Won Jung
5b31376005 scsi: ufs: core: Reset urgent_bkops_lvl to allow runtime PM power mode
Ensures that UFS Runtime PM can achieve power saving after System PM
suspend by resetting hba->urgent_bkops_lvl. Also modify the
ufshcd_bkops_exception_event_handler to avoid setting urgent_bkops_lvl when
status is 0, which helps maintain optimal power management.

On UFS devices supporting UFSHCD_CAP_AUTO_BKOPS_SUSPEND, a BKOPS exception
event can lead to a situation where UFS Runtime PM can't enter low-power
mode states even after the BKOPS exception has been resolved.

BKOPS exception with bkops status 0 occurs, the driver logs:

 "ufshcd_bkops_exception_event_handler: device raised urgent BKOPS exception for bkops status 0"

When a BKOPS exception occurs, ufshcd_bkops_exception_event_handler() reads
the BKOPS status and sets hba->urgent_bkops_lvl to BKOPS_STATUS_NO_OP(0).
This allows the device to perform Runtime PM without changing the UFS power
mode.  (__ufshcd_wl_suspend(hba, UFS_RUNTIME_PM))

During system PM suspend, ufshcd_disable_auto_bkops() is called, disabling
auto bkops. After UFS System PM Resume, when runtime PM attempts to suspend
again, ufshcd_urgent_bkops() is invoked. Since hba->urgent_bkops_lvl
remains at BKOPS_STATUS_NO_OP(0), ufshcd_enable_auto_bkops() is triggered.

However, in ufshcd_bkops_ctrl(), the driver compares the current BKOPS
status with hba->urgent_bkops_lvl, and only enables auto bkops if
curr_status >= hba->urgent_bkops_lvl.  Since both values are 0, the
condition is met

As a result, __ufshcd_wl_suspend(hba, UFS_RUNTIME_PM) skips power mode
transitions and remains in an active state, preventing power saving even
though no urgent BKOPS condition exists.

Signed-off-by: Won Jung <wone.jung@samsung.com>
Reviewed-by: Peter Wang <peter.wang@mediatek.com>
Link: https://patch.msgid.link/1891546521.01770806581968.JavaMail.epsvc@epcpadp2new
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2026-02-17 21:30:13 -05:00
Tomas Henzl
70ca8caa96 scsi: ses: Fix devices attaching to different hosts
On a multipath SAS system some devices don't end up with correct symlinks
from the SCSI device to its enclosure. Some devices even have enclosure
links pointing to enclosures attached to different SCSI hosts.

ses_match_to_enclosure() calls enclosure_for_each_device() which iterates
over all enclosures on the system, not just enclosures attached to the
current SCSI host.

Replace the iteration with a direct call to ses_enclosure_find_by_addr().

Reviewed-by: David Jeffery <djeffery@redhat.com>
Signed-off-by: Tomas Henzl <thenzl@redhat.com>
Link: https://patch.msgid.link/20260210191850.36784-1-thenzl@redhat.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2026-02-17 21:26:46 -05:00
Alexey Charkov
2e6b5cd6a4 scsi: ufs: core: Fix RPMB region size detection for UFS 2.2
Older UFS spec devices (2.2 and earlier) do not expose per-region RPMB
sizes, as only one RPMB region is supported. In such cases, the size of the
single RPMB region can be deduced from the Logical Block Count and Logical
Block Size fields in the RPMB Unit Descriptor.

Add a fallback mechanism to calculate the RPMB region size from these
fields if the device implements an older spec, so that the RPMB driver can
work with such devices - otherwise it silently skips the whole RPMB.

        Section 14.1.4.6 (RPMB Unit Descriptor)

Link: https://www.jedec.org/system/files/docs/JESD220C-2_2.pdf
Cc: stable@vger.kernel.org
Fixes: b06b8c4214 ("scsi: ufs: core: Add OP-TEE based RPMB driver for UFS devices")
Reviewed-by: Bean Huo <beanhuo@micron.com>
Signed-off-by: Alexey Charkov <alchark@flipper.net>
Link: https://patch.msgid.link/20260209-ufs-rpmb-v3-1-b1804e71bd38@flipper.net
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2026-02-17 21:16:04 -05:00
Jan Kiszka
57297736c0 scsi: storvsc: Fix scheduling while atomic on PREEMPT_RT
This resolves the follow splat and lock-up when running with PREEMPT_RT
enabled on Hyper-V:

[  415.140818] BUG: scheduling while atomic: stress-ng-iomix/1048/0x00000002
[  415.140822] INFO: lockdep is turned off.
[  415.140823] Modules linked in: intel_rapl_msr intel_rapl_common intel_uncore_frequency_common intel_pmc_core pmt_telemetry pmt_discovery pmt_class intel_pmc_ssram_telemetry intel_vsec ghash_clmulni_intel aesni_intel rapl binfmt_misc nls_ascii nls_cp437 vfat fat snd_pcm hyperv_drm snd_timer drm_client_lib drm_shmem_helper snd sg soundcore drm_kms_helper pcspkr hv_balloon hv_utils evdev joydev drm configfs efi_pstore nfnetlink vsock_loopback vmw_vsock_virtio_transport_common hv_sock vmw_vsock_vmci_transport vsock vmw_vmci efivarfs autofs4 ext4 crc16 mbcache jbd2 sr_mod sd_mod cdrom hv_storvsc serio_raw hid_generic scsi_transport_fc hid_hyperv scsi_mod hid hv_netvsc hyperv_keyboard scsi_common
[  415.140846] Preemption disabled at:
[  415.140847] [<ffffffffc0656171>] storvsc_queuecommand+0x2e1/0xbe0 [hv_storvsc]
[  415.140854] CPU: 8 UID: 0 PID: 1048 Comm: stress-ng-iomix Not tainted 6.19.0-rc7 #30 PREEMPT_{RT,(full)}
[  415.140856] Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS Hyper-V UEFI Release v4.1 09/04/2024
[  415.140857] Call Trace:
[  415.140861]  <TASK>
[  415.140861]  ? storvsc_queuecommand+0x2e1/0xbe0 [hv_storvsc]
[  415.140863]  dump_stack_lvl+0x91/0xb0
[  415.140870]  __schedule_bug+0x9c/0xc0
[  415.140875]  __schedule+0xdf6/0x1300
[  415.140877]  ? rtlock_slowlock_locked+0x56c/0x1980
[  415.140879]  ? rcu_is_watching+0x12/0x60
[  415.140883]  schedule_rtlock+0x21/0x40
[  415.140885]  rtlock_slowlock_locked+0x502/0x1980
[  415.140891]  rt_spin_lock+0x89/0x1e0
[  415.140893]  hv_ringbuffer_write+0x87/0x2a0
[  415.140899]  vmbus_sendpacket_mpb_desc+0xb6/0xe0
[  415.140900]  ? rcu_is_watching+0x12/0x60
[  415.140902]  storvsc_queuecommand+0x669/0xbe0 [hv_storvsc]
[  415.140904]  ? HARDIRQ_verbose+0x10/0x10
[  415.140908]  ? __rq_qos_issue+0x28/0x40
[  415.140911]  scsi_queue_rq+0x760/0xd80 [scsi_mod]
[  415.140926]  __blk_mq_issue_directly+0x4a/0xc0
[  415.140928]  blk_mq_issue_direct+0x87/0x2b0
[  415.140931]  blk_mq_dispatch_queue_requests+0x120/0x440
[  415.140933]  blk_mq_flush_plug_list+0x7a/0x1a0
[  415.140935]  __blk_flush_plug+0xf4/0x150
[  415.140940]  __submit_bio+0x2b2/0x5c0
[  415.140944]  ? submit_bio_noacct_nocheck+0x272/0x360
[  415.140946]  submit_bio_noacct_nocheck+0x272/0x360
[  415.140951]  ext4_read_bh_lock+0x3e/0x60 [ext4]
[  415.140995]  ext4_block_write_begin+0x396/0x650 [ext4]
[  415.141018]  ? __pfx_ext4_da_get_block_prep+0x10/0x10 [ext4]
[  415.141038]  ext4_da_write_begin+0x1c4/0x350 [ext4]
[  415.141060]  generic_perform_write+0x14e/0x2c0
[  415.141065]  ext4_buffered_write_iter+0x6b/0x120 [ext4]
[  415.141083]  vfs_write+0x2ca/0x570
[  415.141087]  ksys_write+0x76/0xf0
[  415.141089]  do_syscall_64+0x99/0x1490
[  415.141093]  ? rcu_is_watching+0x12/0x60
[  415.141095]  ? finish_task_switch.isra.0+0xdf/0x3d0
[  415.141097]  ? rcu_is_watching+0x12/0x60
[  415.141098]  ? lock_release+0x1f0/0x2a0
[  415.141100]  ? rcu_is_watching+0x12/0x60
[  415.141101]  ? finish_task_switch.isra.0+0xe4/0x3d0
[  415.141103]  ? rcu_is_watching+0x12/0x60
[  415.141104]  ? __schedule+0xb34/0x1300
[  415.141106]  ? hrtimer_try_to_cancel+0x1d/0x170
[  415.141109]  ? do_nanosleep+0x8b/0x160
[  415.141111]  ? hrtimer_nanosleep+0x89/0x100
[  415.141114]  ? __pfx_hrtimer_wakeup+0x10/0x10
[  415.141116]  ? xfd_validate_state+0x26/0x90
[  415.141118]  ? rcu_is_watching+0x12/0x60
[  415.141120]  ? do_syscall_64+0x1e0/0x1490
[  415.141121]  ? do_syscall_64+0x1e0/0x1490
[  415.141123]  ? rcu_is_watching+0x12/0x60
[  415.141124]  ? do_syscall_64+0x1e0/0x1490
[  415.141125]  ? do_syscall_64+0x1e0/0x1490
[  415.141127]  ? irqentry_exit+0x140/0x7e0
[  415.141129]  entry_SYSCALL_64_after_hwframe+0x76/0x7e

get_cpu() disables preemption while the spinlock hv_ringbuffer_write is
using is converted to an rt-mutex under PREEMPT_RT.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Tested-by: Florian Bezdeka <florian.bezdeka@siemens.com>
Reviewed-by: Michael Kelley <mhklinux@outlook.com>
Tested-by: Michael Kelley <mhklinux@outlook.com>
Link: https://patch.msgid.link/0c7fb5cd-fb21-4760-8593-e04bade84744@siemens.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2026-02-17 21:13:26 -05:00
Mathias Krause
bffda93a51 scsi: lpfc: Properly set WC for DPP mapping
Using set_memory_wc() to enable write-combining for the DPP portion of
the MMIO mapping is wrong as set_memory_*() is meant to operate on RAM
only, not MMIO mappings. In fact, as used currently triggers a BUG_ON()
with enabled CONFIG_DEBUG_VIRTUAL.

Simply map the DPP region separately and in addition to the already
existing mappings, avoiding any possible negative side effects for
these.

Fixes: 1351e69fc6 ("scsi: lpfc: Add push-to-adapter support to sli4")
Signed-off-by: Mathias Krause <minipli@grsecurity.net>
Signed-off-by: Justin Tee <justin.tee@broadcom.com>
Reviewed-by: Mathias Krause <minipli@grsecurity.net>
Link: https://patch.msgid.link/20260212192327.141104-1-justintee8345@gmail.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2026-02-17 21:10:29 -05:00
Krzysztof Kozlowski
f0617176be irqchip/mmp: Make icu_irq_chip variable static const
File-scope 'icu_irq_chip' is not used outside of this unit and is not
modified anywhere, so make it static const to silence sparse warning:

  irq-mmp.c:139:17: warning: symbol 'icu_irq_chip' was not declared. Should it be static?

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Link: https://patch.msgid.link/20260216110449.160277-2-krzysztof.kozlowski@oss.qualcomm.com
2026-02-17 11:03:56 +01:00
Marc Zyngier
ce9e40a9a5 irqchip/gic-v3-its: Limit number of per-device MSIs to the range the ITS supports
The ITS driver blindly assumes that EventIDs are in abundant supply, to the
point where it never checks how many the hardware actually supports.

It turns out that some pretty esoteric integrations make it so that only a
few bits are available, all the way down to a single bit.

Enforce the advertised limitation at the point of allocating the device
structure, and hope that the endpoint driver can deal with such limitation.

Fixes: 84a6a2e7fc ("irqchip: GICv3: ITS: device allocation and configuration")
Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Reviewed-by: Zenghui Yu <zenghui.yu@linux.dev>
Cc: stable@vger.kernel.org
Link: https://patch.msgid.link/20260206154816.3582887-1-maz@kernel.org
2026-02-17 11:00:43 +01:00
Nam Cao
1072020685 irqchip/sifive-plic: Fix frozen interrupt due to affinity setting
PLIC ignores interrupt completion message for disabled interrupt, explained
by the specification:

    The PLIC signals it has completed executing an interrupt handler by
    writing the interrupt ID it received from the claim to the
    claim/complete register. The PLIC does not check whether the completion
    ID is the same as the last claim ID for that target. If the completion
    ID does not match an interrupt source that is currently enabled for
    the target, the completion is silently ignored.

This caused problems in the past, because an interrupt can be disabled
while still being handled and plic_irq_eoi() had no effect. That was fixed
by checking if the interrupt is disabled, and if so enable it, before
sending the completion message. That check is done with irqd_irq_disabled().

However, that is not sufficient because the enable bit for the handling
hart can be zero despite irqd_irq_disabled(d) being false. This can happen
when affinity setting is changed while a hart is still handling the
interrupt.

This problem is easily reproducible by dumping a large file to uart (which
generates lots of interrupts) and at the same time keep changing the uart
interrupt's affinity setting. The uart port becomes frozen almost
instantaneously.

Fix this by checking PLIC's enable bit instead of irqd_irq_disabled().

Fixes: cc9f04f9a8 ("irqchip/sifive-plic: Implement irq_set_affinity() for SMP host")
Signed-off-by: Nam Cao <namcao@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Link: https://patch.msgid.link/20260212114125.3148067-1-namcao@linutronix.de
2026-02-17 11:00:43 +01:00
Ethan Tidmore
46120745bb drm/tiny: sharp-memory: fix pointer error dereference
The function devm_drm_dev_alloc() returns a pointer error upon failure
not NULL. Change null check to pointer error check.

Detected by Smatch:
drivers/gpu/drm/tiny/sharp-memory.c:549 sharp_memory_probe() error:
'smd' dereferencing possible ERR_PTR()

Fixes: b8f9f21716 ("drm/tiny: Add driver for Sharp Memory LCD")
Signed-off-by: Ethan Tidmore <ethantidmore06@gmail.com>
Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://patch.msgid.link/20260216040438.43702-1-ethantidmore06@gmail.com
2026-02-17 10:04:28 +01:00
Dave Airlie
9478c166c4 nouveau/gsp: drop WARN_ON in ACPI probes
These WARN_ONs seem to trigger a lot, and we don't seem to have a
plan to fix them, so just drop them, as they are most likely
harmless.

Cc: stable@vger.kernel.org
Fixes: 176fdcbddf ("drm/nouveau/gsp/r535: add support for booting GSP-RM")
Signed-off-by: Dave Airlie <airlied@redhat.com>
Link: https://patch.msgid.link/20241121014601.229391-1-airlied@gmail.com
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
2026-02-16 22:51:24 +01:00
Kuniyuki Iwashima
92978c83bb nfsd: Fix cred ref leak in nfsd_nl_listener_set_doit().
nfsd_nl_listener_set_doit() uses get_current_cred() without
put_cred().

As we can see from other callers, svc_xprt_create_from_sa()
does not require the extra refcount.

nfsd_nl_listener_set_doit() is always in the process context,
sendmsg(), and current->cred does not go away.

Let's use current_cred() in nfsd_nl_listener_set_doit().

Fixes: 16a4711774 ("NFSD: add listener-{set,get} netlink command")
Cc: stable@vger.kernel.org
Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
2026-02-14 12:50:24 -05:00
Kuniyuki Iwashima
1cb968a201 nfsd: Fix cred ref leak in nfsd_nl_threads_set_doit().
syzbot reported memory leak of struct cred. [0]

nfsd_nl_threads_set_doit() passes get_current_cred() to
nfsd_svc(), but put_cred() is not called after that.

The cred is finally passed down to _svc_xprt_create(),
which calls get_cred() with the cred for struct svc_xprt.

The ownership of the refcount by get_current_cred() is not
transferred to anywhere and is just leaked.

nfsd_svc() is also called from write_threads(), but it does
not bump file->f_cred there.

nfsd_nl_threads_set_doit() is called from sendmsg() and
current->cred does not go away.

Let's use current_cred() in nfsd_nl_threads_set_doit().

[0]:
BUG: memory leak
unreferenced object 0xffff888108b89480 (size 184):
  comm "syz-executor", pid 5994, jiffies 4294943386
  hex dump (first 32 bytes):
    01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace (crc 369454a7):
    kmemleak_alloc_recursive include/linux/kmemleak.h:44 [inline]
    slab_post_alloc_hook mm/slub.c:4958 [inline]
    slab_alloc_node mm/slub.c:5263 [inline]
    kmem_cache_alloc_noprof+0x412/0x580 mm/slub.c:5270
    prepare_creds+0x22/0x600 kernel/cred.c:185
    copy_creds+0x44/0x290 kernel/cred.c:286
    copy_process+0x7a7/0x2870 kernel/fork.c:2086
    kernel_clone+0xac/0x6e0 kernel/fork.c:2651
    __do_sys_clone+0x7f/0xb0 kernel/fork.c:2792
    do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
    do_syscall_64+0xa4/0xf80 arch/x86/entry/syscall_64.c:94
    entry_SYSCALL_64_after_hwframe+0x77/0x7f

Fixes: 924f4fb003 ("NFSD: convert write_threads to netlink command")
Cc: stable@vger.kernel.org
Reported-by: syzbot+dd3b43aa0204089217ee@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/69744674.a00a0220.33ccc7.0000.GAE@google.com/
Tested-by: syzbot+dd3b43aa0204089217ee@syzkaller.appspotmail.com
Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
2026-02-14 12:48:51 -05:00
Hongbo Li
ac83896172 iomap: Describe @private in iomap_readahead()
The kernel test rebot reports the kernel-doc warning:

```
Warning: fs/iomap/buffered-io.c:624 function parameter 'private'
 not described in 'iomap_readahead'
```

The former commit in "iomap: stash iomap read ctx in the private
field of iomap_iter" has added a new parameter @private to
iomap_readahead(), so let's describe the parameter.

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202601261111.vIL9rhgD-lkp@intel.com/
Fixes: 8806f27924 ("iomap: stash iomap read ctx in the private field of iomap_iter")
Signed-off-by: Hongbo Li <lihongbo22@huawei.com>
Link: https://patch.msgid.link/20260213022812.766187-1-lihongbo22@huawei.com
Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
2026-02-14 13:24:44 +01:00
Qing Wang
81f16c9778 statmount: Fix the null-ptr-deref in do_statmount()
If the mount is internal, it's mnt_ns will be MNT_NS_INTERNAL, which is
defined as ERR_PTR(-EINVAL). So, in the do_statmount(), need to check ns
of mount by IS_ERR() and return.

Fixes: 0e5032237e ("statmount: accept fd as a parameter")
Reported-by: syzbot+9e03a9535ea65f687a44@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/698e287a.a70a0220.2c38d7.009e.GAE@google.com/
Signed-off-by: Qing Wang <wangqing7171@gmail.com>
Link: https://patch.msgid.link/20260213103006.2472569-1-wangqing7171@gmail.com
Reviewed-by: Bhavik Sachdev <b.sachdev1904@gmail.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
2026-02-14 13:17:50 +01:00
Huacai Chen
9eed043d10 writeback: Fix wakeup and logging timeouts for !DETECT_HUNG_TASK
Recent changes of fs-writeback cause such warnings if DETECT_HUNG_TASK
is not enabled:

INFO: The task sync:1342 has been waiting for writeback completion for more than 1 seconds.

The reason is sysctl_hung_task_timeout_secs is 0 when DETECT_HUNG_TASK
is not enabled, then it causes the warning message even if the writeback
lasts for only one second.

Guard the wakeup and logging with "#ifdef CONFIG_DETECT_HUNG_TASK" can
eliminate the warning messages. But on the other hand, it is possible
that sysctl_hung_task_timeout_secs be also 0 when DETECT_HUNG_TASK is
enabled. So let's just check the value of sysctl_hung_task_timeout_secs
to decide whether do wakeup and logging.

Fixes: 1888635532 ("writeback: Wake up waiting tasks when finishing the writeback of a chunk.")
Fixes: d6e6215907 ("writeback: Add logging for slow writeback (exceeds sysctl_hung_task_timeout_secs)")
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
Link: https://patch.msgid.link/20260203094014.2273240-1-chenhuacai@loongson.cn
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
2026-02-14 13:17:46 +01:00
Edward Adam Davis
cb184dd191 fs: init flags_valid before calling vfs_fileattr_get
syzbot reported a uninit-value bug in [1].

Similar to the "*get" context where the kernel's internal file_kattr
structure is initialized before calling vfs_fileattr_get(), we should
use the same mechanism when using fa.

[1]
BUG: KMSAN: uninit-value in fuse_fileattr_get+0xeb4/0x1450 fs/fuse/ioctl.c:517
 fuse_fileattr_get+0xeb4/0x1450 fs/fuse/ioctl.c:517
 vfs_fileattr_get fs/file_attr.c:94 [inline]
 __do_sys_file_getattr fs/file_attr.c:416 [inline]

Local variable fa.i created at:
 __do_sys_file_getattr fs/file_attr.c:380 [inline]
 __se_sys_file_getattr+0x8c/0xbd0 fs/file_attr.c:372

Reported-by: syzbot+7c31755f2cea07838b0c@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=7c31755f2cea07838b0c
Tested-by: syzbot+7c31755f2cea07838b0c@syzkaller.appspotmail.com
Signed-off-by: Edward Adam Davis <eadavis@qq.com>
Link: https://patch.msgid.link/tencent_B6C4583771D76766D71362A368696EC3B605@qq.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
2026-02-14 13:17:43 +01:00
Simon Ser
e9e0b48cd1 drm/fourcc: fix plane order for 10/12/16-bit YCbCr formats
The short comments had the correct order, but the long comments
had the planes reversed.

Fixes: 2271e0a20e ("drm: drm_fourcc: add 10/12/16bit software decoder YCbCr formats")
Signed-off-by: Simon Ser <contact@emersion.fr>
Reviewed-by: Daniel Stone <daniels@collabora.com>
Reviewed-by: Robert Mader <robert.mader@collabora.com>
Link: https://patch.msgid.link/20260208224718.57199-1-contact@emersion.fr
2026-02-14 11:11:30 +01:00
Franz Schnyder
0b87d51690 drm/bridge: ti-sn65dsi86: Enable HPD polling if IRQ is not used
Fallback to polling to detect hotplug events on systems without
interrupts.

On systems where the interrupt line of the bridge is not connected,
the bridge cannot notify hotplug events. Only add the
DRM_BRIDGE_OP_HPD flag if an interrupt has been registered
otherwise remain in polling mode.

Fixes: 55e8ff8420 ("drm/bridge: ti-sn65dsi86: Add HPD for DisplayPort connector type")
Cc: stable@vger.kernel.org # 6.16: 9133bc3f05: drm/bridge: ti-sn65dsi86: Add
Signed-off-by: Franz Schnyder <franz.schnyder@toradex.com>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
[dianders: Adjusted Fixes/stable line based on discussion]
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://patch.msgid.link/20260206123758.374555-1-fra.schnyder@gmail.com
2026-02-13 12:42:34 -08:00
Kees Cook
ee5c38a8d3 KVM: arm64: vgic: Handle const qualifier from gic_kvm_info allocation type
In preparation for making the kmalloc family of allocators type aware,
we need to make sure that the returned type from the allocation matches
the type of the variable being assigned. (Before, the allocator would
always return "void *", which can be implicitly cast to any pointer type.)

The assigned type is "struct gic_kvm_info", but the returned type,
while matching, is const qualified. To get them exactly matching, just
use the dereferenced pointer for the sizeof().

Signed-off-by: Kees Cook <kees@kernel.org>
Link: https://patch.msgid.link/20260206223022.it.052-kees@kernel.org
Signed-off-by: Marc Zyngier <maz@kernel.org>
2026-02-13 14:59:04 +00:00
Fuad Tabba
02471a78a0 KVM: arm64: Remove redundant kern_hyp_va() in unpin_host_sve_state()
The `sve_state` pointer in `hyp_vcpu->vcpu.arch` is initialized as a
hypervisor virtual address during vCPU initialization in
`pkvm_vcpu_init_sve()`.

`unpin_host_sve_state()` calls `kern_hyp_va()` on this address. Since
`kern_hyp_va()` is idempotent, it's not a bug. However, it is
unnecessary and potentially confusing. Remove the redundant conversion.

Signed-off-by: Fuad Tabba <tabba@google.com>
Link: https://patch.msgid.link/20260213143815.1732675-5-tabba@google.com
Signed-off-by: Marc Zyngier <maz@kernel.org>
2026-02-13 14:54:48 +00:00
Fuad Tabba
7e7c2cf002 KVM: arm64: Fix ID register initialization for non-protected pKVM guests
In protected mode, the hypervisor maintains a separate instance of
the `kvm` structure for each VM. For non-protected VMs, this structure is
initialized from the host's `kvm` state.

Currently, `pkvm_init_features_from_host()` copies the
`KVM_ARCH_FLAG_ID_REGS_INITIALIZED` flag from the host without the
underlying `id_regs` data being initialized. This results in the
hypervisor seeing the flag as set while the ID registers remain zeroed.

Consequently, `kvm_has_feat()` checks at EL2 fail (return 0) for
non-protected VMs. This breaks logic that relies on feature detection,
such as `ctxt_has_tcrx()` for TCR2_EL1 support. As a result, certain
system registers (e.g., TCR2_EL1, PIR_EL1, POR_EL1) are not
saved/restored during the world switch, which could lead to state
corruption.

Fix this by explicitly copying the ID registers from the host `kvm` to
the hypervisor `kvm` for non-protected VMs during initialization, since
we trust the host with its non-protected guests' features. Also ensure
`KVM_ARCH_FLAG_ID_REGS_INITIALIZED` is cleared initially in
`pkvm_init_features_from_host` so that `vm_copy_id_regs` can properly
initialize them and set the flag once done.

Fixes: 41d6028e28 ("KVM: arm64: Convert the SVE guest vcpu flag to a vm flag")
Signed-off-by: Fuad Tabba <tabba@google.com>
Link: https://patch.msgid.link/20260213143815.1732675-4-tabba@google.com
Signed-off-by: Marc Zyngier <maz@kernel.org>
2026-02-13 14:54:48 +00:00
Fuad Tabba
9cb0468d0b KVM: arm64: Optimise away S1POE handling when not supported by host
Although ID register sanitisation prevents guests from seeing the
feature, adding this check to the helper allows the compiler to entirely
eliminate S1POE-specific code paths (such as context switching POR_EL1)
when the host kernel is compiled without support (CONFIG_ARM64_POE is
disabled).

This aligns with the pattern used for other optional features like SVE
(kvm_has_sve()) and FPMR (kvm_has_fpmr()), ensuring no POE logic if the
host lacks support, regardless of the guest configuration state.

Signed-off-by: Fuad Tabba <tabba@google.com>
Link: https://patch.msgid.link/20260213143815.1732675-3-tabba@google.com
Signed-off-by: Marc Zyngier <maz@kernel.org>
2026-02-13 14:54:48 +00:00
Fuad Tabba
f66857bafd KVM: arm64: Hide S1POE from guests when not supported by the host
When CONFIG_ARM64_POE is disabled, KVM does not save/restore POR_EL1.
However, ID_AA64MMFR3_EL1 sanitisation currently exposes the feature to
guests whenever the hardware supports it, ignoring the host kernel
configuration.

If a guest detects this feature and attempts to use it, the host will
fail to context-switch POR_EL1, potentially leading to state corruption.

Fix this by masking ID_AA64MMFR3_EL1.S1POE in the sanitised system
registers, preventing KVM from advertising the feature when the host
does not support it (i.e. system_supports_poe() is false).

Fixes: 70ed723829 ("KVM: arm64: Sanitise ID_AA64MMFR3_EL1")
Signed-off-by: Fuad Tabba <tabba@google.com>
Link: https://patch.msgid.link/20260213143815.1732675-2-tabba@google.com
Signed-off-by: Marc Zyngier <maz@kernel.org>
2026-02-13 14:54:48 +00:00
Keith Busch
baa47c4f89 nvme-pci: do not try to add queue maps at runtime
The block layer allocates the set's maps once. We can't add special
purpose queues at runtime if they weren't allocated at initialization
time.

Tested-by: Kanchan Joshi <joshi.k@samsung.com>
Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
2026-02-13 06:47:59 -08:00
Keith Busch
4735b510a0 nvme-pci: cap queue creation to used queues
If the user reduces the special queue count at runtime and resets the
controller, we need to reduce the number of queues and interrupts
requested accordingly rather than start with the pre-allocated queue
count.

Tested-by: Kanchan Joshi <joshi.k@samsung.com>
Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
2026-02-13 06:47:41 -08:00
Keith Busch
166e31d7db nvme-pci: ensure we're polling a polled queue
A user can change the polled queue count at run time. There's a brief
window during a reset where a hipri task may try to poll that queue
before the block layer has updated the queue maps, which would race with
the now interrupt driven queue and may cause double completions.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>
2026-02-13 06:47:24 -08:00
Qingye Zhao
5ee01f1a73 cgroup: fix race between task migration and iteration
When a task is migrated out of a css_set, cgroup_migrate_add_task()
first moves it from cset->tasks to cset->mg_tasks via:

    list_move_tail(&task->cg_list, &cset->mg_tasks);

If a css_task_iter currently has it->task_pos pointing to this task,
css_set_move_task() calls css_task_iter_skip() to keep the iterator
valid. However, since the task has already been moved to ->mg_tasks,
the iterator is advanced relative to the mg_tasks list instead of the
original tasks list. As a result, remaining tasks on cset->tasks, as
well as tasks queued on cset->mg_tasks, can be skipped by iteration.

Fix this by calling css_set_skip_task_iters() before unlinking
task->cg_list from cset->tasks. This advances all active iterators to
the next task on cset->tasks, so iteration continues correctly even
when a task is concurrently being migrated.

This race is hard to hit in practice without instrumentation, but it
can be reproduced by artificially slowing down cgroup_procs_show().
For example, on an Android device a temporary
/sys/kernel/cgroup/cgroup_test knob can be added to inject a delay
into cgroup_procs_show(), and then:

  1) Spawn three long-running tasks (PIDs 101, 102, 103).
  2) Create a test cgroup and move the tasks into it.
  3) Enable a large delay via /sys/kernel/cgroup/cgroup_test.
  4) In one shell, read cgroup.procs from the test cgroup.
  5) Within the delay window, in another shell migrate PID 102 by
     writing it to a different cgroup.procs file.

Under this setup, cgroup.procs can intermittently show only PID 101
while skipping PID 103. Once the migration completes, reading the
file again shows all tasks as expected.

Note that this change does not allow removing the existing
css_set_skip_task_iters() call in css_set_move_task(). The new call
in cgroup_migrate_add_task() only handles iterators that are racing
with migration while the task is still on cset->tasks. Iterators may
also start after the task has been moved to cset->mg_tasks. If we
dropped css_set_skip_task_iters() from css_set_move_task(), such
iterators could keep task_pos pointing to a migrating task, causing
css_task_iter_advance() to malfunction on the destination css_set,
up to and including crashes or infinite loops.

The race window between migration and iteration is very small, and
css_task_iter is not on a hot path. In the worst case, when an
iterator is positioned on the first thread of the migrating process,
cgroup_migrate_add_task() may have to skip multiple tasks via
css_set_skip_task_iters(). However, this only happens when migration
and iteration actually race, so the performance impact is negligible
compared to the correctness fix provided here.

Fixes: b636fd38dc ("cgroup: Implement css_task_iter_skip()")
Cc: stable@vger.kernel.org # v5.2+
Signed-off-by: Qingye Zhao <zhaoqingye@honor.com>
Reviewed-by: Michal Koutný <mkoutny@suse.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-02-12 07:25:09 -10:00
Maurizio Lombardi
bbdaa8c17c nvme: fix memory leak in quirks_param_set()
When loading the nvme module, if the 'quirks' parameter is specified
via both the kernel command line (e.g., nvme.quirks=...) and the
modprobe command line (e.g., modprobe nvme quirks=...), the
quirks_param_set() callback is invoked twice.

Currently, in the double-invocation scenario, the second call
overwrites the nvme_pci_quirk_list pointer, causing the memory
allocated in the first call to leak.

Fix this by freeing the existing list before assigning the new one.

Fixes: b4247c8317c5 ("nvme: add support for dynamic quirk configuration via module parameter")
Reviewed-by: Daniel Wagner <dwagner@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Maurizio Lombardi <mlombard@redhat.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>
2026-02-11 18:34:39 -08:00
Eric Dumazet
b777b5e09e time/jiffies: Inline jiffies_to_msecs() and jiffies_to_usecs()
For common cases (HZ=100, 250 or 1000), these helpers are at most one
multiply, so there is no point calling a tiny function.

Keep them out of line for HZ=300 and others.

This saves cycles in TCP fast path, among other things.

$ scripts/bloat-o-meter -t vmlinux.old vmlinux.new
add/remove: 0/8 grow/shrink: 25/89 up/down: 530/-3474 (-2944)
...
nla_put_msecs                                193       -    -193
message_stats_print                         2131     920   -1211
Total: Before=25365208, After=25362264, chg -0.01%

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Link: https://patch.msgid.link/20260210170226.57209-1-edumazet@google.com
2026-02-11 08:55:52 +01:00
Felix Gu
fef0e649f8 drm/logicvc: Fix device node reference leak in logicvc_drm_config_parse()
The logicvc_drm_config_parse() function calls of_get_child_by_name() to
find the "layers" node but fails to release the reference, leading to a
device node reference leak.

Fix this by using the __free(device_node) cleanup attribute to automatic
release the reference when the variable goes out of scope.

Fixes: efeeaefe9b ("drm: Add support for the LogiCVC display controller")
Signed-off-by: Felix Gu <ustc.gu@gmail.com>
Reviewed-by: Luca Ceresoli <luca.ceresoli@bootlin.com>
Reviewed-by: Kory Maincent <kory.maincent@bootlin.com>
Link: https://patch.msgid.link/20260130-logicvc_drm-v1-1-04366463750c@gmail.com
Signed-off-by: Luca Ceresoli <luca.ceresoli@bootlin.com>
2026-02-10 15:18:36 +01:00
Thomas Gleixner
fd3634312a debugobject: Make it work with deferred page initialization - again
debugobjects uses __GFP_HIGH for allocations as it might be invoked
within locked regions. That worked perfectly fine until v6.18. It still
works correctly when deferred page initialization is disabled and works
by chance when no page allocation is required before deferred page
initialization has completed.

Since v6.18 allocations w/o a reclaim flag cause new_slab() to end up in
alloc_frozen_pages_nolock_noprof(), which returns early when deferred
page initialization has not yet completed. As the deferred page
initialization takes quite a while the debugobject pool is depleted and
debugobjects are disabled.

This can be worked around when PREEMPT_COUNT is enabled as that allows
debugobjects to add __GFP_KSWAPD_RECLAIM to the GFP flags when the context
is preemtible. When PREEMPT_COUNT is disabled the context is unknown and
the reclaim bit can't be set because the caller might hold locks which
might deadlock in the allocator.

In preemptible context the reclaim bit is harmless and not a performance
issue as that's usually invoked from slow path initialization context.

That makes debugobjects depend on PREEMPT_COUNT || !DEFERRED_STRUCT_PAGE_INIT.

Fixes: af92793e52 ("slab: Introduce kmalloc_nolock() and kfree_nolock().")
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Link: https://patch.msgid.link/87pl6gznti.ffs@tglx
2026-02-10 13:26:19 +01:00
Tetsuo Handa
4efa91a285 xfrm: always flush state and policy upon NETDEV_UNREGISTER event
syzbot is reporting that "struct xfrm_state" refcount is leaking.

  unregister_netdevice: waiting for netdevsim0 to become free. Usage count = 2
  ref_tracker: netdev@ffff888052f24618 has 1/1 users at
       __netdev_tracker_alloc include/linux/netdevice.h:4400 [inline]
       netdev_tracker_alloc include/linux/netdevice.h:4412 [inline]
       xfrm_dev_state_add+0x3a5/0x1080 net/xfrm/xfrm_device.c:316
       xfrm_state_construct net/xfrm/xfrm_user.c:986 [inline]
       xfrm_add_sa+0x34ff/0x5fa0 net/xfrm/xfrm_user.c:1022
       xfrm_user_rcv_msg+0x58e/0xc00 net/xfrm/xfrm_user.c:3507
       netlink_rcv_skb+0x158/0x420 net/netlink/af_netlink.c:2550
       xfrm_netlink_rcv+0x71/0x90 net/xfrm/xfrm_user.c:3529
       netlink_unicast_kernel net/netlink/af_netlink.c:1318 [inline]
       netlink_unicast+0x5aa/0x870 net/netlink/af_netlink.c:1344
       netlink_sendmsg+0x8c8/0xdd0 net/netlink/af_netlink.c:1894
       sock_sendmsg_nosec net/socket.c:727 [inline]
       __sock_sendmsg net/socket.c:742 [inline]
       ____sys_sendmsg+0xa5d/0xc30 net/socket.c:2592
       ___sys_sendmsg+0x134/0x1d0 net/socket.c:2646
       __sys_sendmsg+0x16d/0x220 net/socket.c:2678
       do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
       do_syscall_64+0xcd/0xf80 arch/x86/entry/syscall_64.c:94
       entry_SYSCALL_64_after_hwframe+0x77/0x7f

This is because commit d77e38e612 ("xfrm: Add an IPsec hardware
offloading API") implemented xfrm_dev_unregister() as no-op despite
xfrm_dev_state_add() from xfrm_state_construct() acquires a reference
to "struct net_device".
I guess that that commit expected that NETDEV_DOWN event is fired before
NETDEV_UNREGISTER event fires, and also assumed that xfrm_dev_state_add()
is called only if (dev->features & NETIF_F_HW_ESP) != 0.

Sabrina Dubroca identified steps to reproduce the same symptoms as below.

  echo 0 > /sys/bus/netdevsim/new_device
  dev=$(ls -1 /sys/bus/netdevsim/devices/netdevsim0/net/)
  ip xfrm state add src 192.168.13.1 dst 192.168.13.2 proto esp \
     spi 0x1000 mode tunnel aead 'rfc4106(gcm(aes))' $key 128   \
     offload crypto dev $dev dir out
  ethtool -K $dev esp-hw-offload off
  echo 0 > /sys/bus/netdevsim/del_device

Like these steps indicate, the NETIF_F_HW_ESP bit can be cleared after
xfrm_dev_state_add() acquired a reference to "struct net_device".
Also, xfrm_dev_state_add() does not check for the NETIF_F_HW_ESP bit
when acquiring a reference to "struct net_device".

Commit 03891f820c ("xfrm: handle NETDEV_UNREGISTER for xfrm device")
re-introduced the NETDEV_UNREGISTER event to xfrm_dev_event(), but that
commit for unknown reason chose to share xfrm_dev_down() between the
NETDEV_DOWN event and the NETDEV_UNREGISTER event.
I guess that that commit missed the behavior in the previous paragraph.

Therefore, we need to re-introduce xfrm_dev_unregister() in order to
release the reference to "struct net_device" by unconditionally flushing
state and policy.

Reported-by: syzbot+881d65229ca4f9ae8c84@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=881d65229ca4f9ae8c84
Fixes: d77e38e612 ("xfrm: Add an IPsec hardware offloading API")
Cc: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2026-02-09 10:28:05 +01:00
Felix Gu
0528a348b0 cache: ax45mp: Fix device node reference leak in ax45mp_cache_init()
In ax45mp_cache_init(), of_find_matching_node() returns a device node
with an incremented reference count that must be released with
of_node_put(). The current code fails to call of_node_put() which
causes a reference leak.

Use the __free(device_node) attribute to ensure automatic cleanup when
the variable goes out of scope.

Fixes: d34599bcd2 ("cache: Add L2 cache management for Andes AX45MP RISC-V core")
Signed-off-by: Felix Gu <ustc.gu@gmail.com>
Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
2026-02-06 19:54:40 +00:00
Felix Gu
3c85234b97 cache: starfive: fix device node leak in starlink_cache_init()
of_find_matching_node() returns a device_node with refcount incremented.

Use __free(device_node) attribute to automatically call of_node_put()
when the variable goes out of scope, preventing the refcount leak.

Fixes: cabff60ca7 ("cache: Add StarFive StarLink cache management")
Signed-off-by: Felix Gu <ustc.gu@gmail.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
2026-02-06 19:54:27 +00:00
Conor Dooley
ff4b6bf7ee riscv: dts: microchip: add can resets to mpfs
The can IP on PolarFire SoC requires the use of the blocks reset
during normal operation, and the property is therefore required by the
binding, causing a warning on the m100pfsevp board where it is default
enabled:
mpfs-m100pfsevp.dtb: can@2010c000 (microchip,mpfs-can): 'resets' is a required property
Add the reset to both can nodes.

Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
2026-02-06 19:53:29 +00:00
Corey Minyard
c3bb329563 ipmi:si: Use a long timeout when the BMC is misbehaving
If the driver goes into HOSED state, don't reset the timeout to the
short timeout in the timeout handler.

Reported-by: Igor Raits <igor@gooddata.com>
Closes: https://lore.kernel.org/linux-acpi/CAK8fFZ58fidGUCHi5WFX0uoTPzveUUDzT=k=AAm4yWo3bAuCFg@mail.gmail.com/
Fixes: bc3a9d2177 ("ipmi:si: Gracefully handle if the BMC is non-functional")
Cc: stable@vger.kernel.org # 4.18
Signed-off-by: Corey Minyard <corey@minyard.net>
2026-02-06 11:06:26 -06:00
Corey Minyard
52c9ee202e ipmi:si: Handle waiting messages when BMC failure detected
If a BMC failure is detected, the current message is returned with an
error.  However, if there was a waiting message, it would not be
handled.

Add a check for the waiting message after handling the current message.

Suggested-by: Guenter Roeck <linux@roeck-us.net>
Reported-by: Rafael J. Wysocki <rafael@kernel.org>
Closes: https://lore.kernel.org/linux-acpi/CAK8fFZ58fidGUCHi5WFX0uoTPzveUUDzT=k=AAm4yWo3bAuCFg@mail.gmail.com/
Fixes: bc3a9d2177 ("ipmi:si: Gracefully handle if the BMC is non-functional")
Cc: stable@vger.kernel.org # 4.18
Signed-off-by: Corey Minyard <corey@minyard.net>
2026-02-06 10:50:59 -06:00
John Garry
3ddfbfbc78 nvme: correct comment about nvme_ns_remove()
The comment in nvme_mpath_remove_disk() references nvme_remove_ns(), which
should be nvme_ns_remove().

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: John Garry <john.g.garry@oracle.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>
2026-02-06 07:59:25 -08:00
John Garry
63059500b1 nvme: stop setting namespace gendisk device driver data
Since commit 1f4137e882 ("nvme: move passthrough logging attribute to
head"), we stopped using the namespace to hold the passthrough logging
enabled attribute. There is now nowhere now which looks up the gendisk dev
driver data, so stop setting it.

Incidentally, it would have been better to set this before adding the
disk.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: John Garry <john.g.garry@oracle.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>
2026-02-06 07:59:11 -08:00
Ian Forbes
5023ca80f9 drm/vmwgfx: Return the correct value in vmw_translate_ptr functions
Before the referenced fixes these functions used a lookup function that
returned a pointer. This was changed to another lookup function that
returned an error code with the pointer becoming an out parameter.

The error path when the lookup failed was not changed to reflect this
change and the code continued to return the PTR_ERR of the now
uninitialized pointer. This could cause the vmw_translate_ptr functions
to return success when they actually failed causing further uninitialized
and OOB accesses.

Reported-by: Kuzey Arda Bulut <kuzeyardabulut@gmail.com>
Fixes: a309c7194e ("drm/vmwgfx: Remove rcu locks from user resources")
Signed-off-by: Ian Forbes <ian.forbes@broadcom.com>
Reviewed-by: Zack Rusin <zack.rusin@broadcom.com>
Signed-off-by: Zack Rusin <zack.rusin@broadcom.com>
Link: https://patch.msgid.link/20260113175357.129285-1-ian.forbes@broadcom.com
2026-02-05 12:44:23 -05:00
Ian Forbes
922f9dec5d drm/vmwgfx: Set a unique ID for each submitted command buffer
These IDs are logged by the Hypervisor when debug logging is enabled.
Having the IDs in the log makes it much easier to see when command
buffers start and finish. They can also be used by logging/tracing
in the Guest to help correlate between Guest and Hypervisor logs.

Signed-off-by: Ian Forbes <ian.forbes@broadcom.com>
Signed-off-by: Zack Rusin <zack.rusin@broadcom.com>
Link: https://patch.msgid.link/20260109155139.3259493-1-ian.forbes@broadcom.com
2026-02-05 12:44:14 -05:00
Brad Spengler
211ecfaaef drm/vmwgfx: Fix invalid kref_put callback in vmw_bo_dirty_release
The kref_put() call uses (void *)kvfree as the release callback, which
is incorrect. kref_put() expects a function with signature
void (*release)(struct kref *), but kvfree has signature
void (*)(const void *). Calling through an incompatible function pointer
is undefined behavior.

The code only worked by accident because ref_count is the first member
of vmw_bo_dirty, making the kref pointer equal to the struct pointer.

Fix this by adding a proper release callback that uses container_of()
to retrieve the containing structure before freeing.

Fixes: c1962742ff ("drm/vmwgfx: Use kref in vmw_bo_dirty")
Signed-off-by: Brad Spengler <brad.spengler@opensrcsec.com>
Signed-off-by: Zack Rusin <zack.rusin@broadcom.com>
Cc: Ian Forbes <ian.forbes@broadcom.com>
Link: https://patch.msgid.link/20260107171236.3573118-1-zack.rusin@broadcom.com
2026-02-05 12:43:51 -05:00
Maurizio Lombardi
7bb8c40f5a nvme: add support for dynamic quirk configuration via module parameter
Introduce support for enabling or disabling specific NVMe quirks at module
load time through the `quirks` module parameter.
This mechanism allows users to apply known quirks dynamically based on the
device's PCI vendor and device IDs, without requiring to add hardcoded
entries in the driver and recompiling the kernel.

While the generic PCI new_id sysfs interface exists for dynamic
configuration, it is insufficient for scenarios where the system fails
to boot (for example, this has been reported to happen because of the
bogus_nid quirk). The new_id attribute is writable only after the system
has booted and sysfs is mounted.

The `quirks` parameter accepts a list of quirk specifications separated by
a '-' character in the following format:

<VID>:<DID>:<quirk_names>[-<VID>:<DID>:<quirk_names>-..]

Each quirk is represented by its name and can be prefixed with `^` to
indicate that the quirk should be disabled; quirk names are separated by
a ',' character.

Example: enable BOGUS_NID and BROKEN_MSI, disable DEALLOCATE_ZEROES:

   $ modprobe nvme quirks=7170:2210:bogus_nid,broken_msi,^deallocate_zeroes

Tested-by: Daniel Wagner <dwagner@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Maurizio Lombardi <mlombard@redhat.com>
Signed-off-by: Daniel Wagner <dwagner@suse.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
2026-02-05 07:35:58 -08:00
Corey Minyard
6b157b408d ipmi:ls2k: Make ipmi_ls2k_platform_driver static
No need for it to be global.

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202601170753.3zDBerGP-lkp@intel.com/
Signed-off-by: Corey Minyard <corey@minyard.net>
2026-02-03 21:06:19 -06:00
Matt Johnston
9f235ccecd ipmi: ipmb: initialise event handler read bytes
IPMB doesn't use i2c reads, but the handler needs to set a value.
Otherwise an i2c read will return an uninitialised value from the bus
driver.

Fixes: 63c4eb3471 ("ipmi:ipmb: Add initial support for IPMI over IPMB")
Signed-off-by: Matt Johnston <matt@codeconstruct.com.au>
Message-ID: <20260113-ipmb-read-init-v1-1-a9cbce7b94e3@codeconstruct.com.au>
Signed-off-by: Corey Minyard <corey@minyard.net>
2026-02-03 21:06:18 -06:00
Corey Minyard
1d90e6c1a5 ipmi: Consolidate the run to completion checking for xmit msgs lock
It made things hard to read, move the check to a function.

Signed-off-by: Corey Minyard <corey@minyard.net>
Reviewed-by: Breno Leitao <leitao@debian.org>
2026-02-03 21:06:18 -06:00
Corey Minyard
594c11d0e1 ipmi: Fix use-after-free and list corruption on sender error
The analysis from Breno:

When the SMI sender returns an error, smi_work() delivers an error
response but then jumps back to restart without cleaning up properly:

1. intf->curr_msg is not cleared, so no new message is pulled
2. newmsg still points to the message, causing sender() to be called
   again with the same message
3. If sender() fails again, deliver_err_response() is called with
   the same recv_msg that was already queued for delivery

This causes list_add corruption ("list_add double add") because the
recv_msg is added to the user_msgs list twice. Subsequently, the
corrupted list leads to use-after-free when the memory is freed and
reused, and eventually a NULL pointer dereference when accessing
recv_msg->done.

The buggy sequence:

  sender() fails
    -> deliver_err_response(recv_msg)  // recv_msg queued for delivery
    -> goto restart                    // curr_msg not cleared!
  sender() fails again (same message!)
    -> deliver_err_response(recv_msg)  // tries to queue same recv_msg
    -> LIST CORRUPTION

Fix this by freeing the message and setting it to NULL on a send error.
Also, always free the newmsg on a send error, otherwise it will leak.

Reported-by: Breno Leitao <leitao@debian.org>
Closes: https://lore.kernel.org/lkml/20260127-ipmi-v1-0-ba5cc90f516f@debian.org/
Fixes: 9cf93a8fa9 ("ipmi: Allow an SMI sender to return an error")
Cc: stable@vger.kernel.org # 4.18
Reviewed-by: Breno Leitao <leitao@debian.org>
Signed-off-by: Corey Minyard <corey@minyard.net>
2026-02-03 21:02:54 -06:00
Ming Lei
b84bb7bd91 nvme: fix admin queue leak on controller reset
When nvme_alloc_admin_tag_set() is called during a controller reset,
a previous admin queue may still exist. Release it properly before
allocating a new one to avoid orphaning the old queue.

This fixes a regression introduced by commit 03b3bcd319 ("nvme: fix
admin request_queue lifetime").

Cc: Keith Busch <kbusch@kernel.org>
Fixes: 03b3bcd319 ("nvme: fix admin request_queue lifetime").
Reported-and-tested-by: Yi Zhang <yi.zhang@redhat.com>
Closes: https://lore.kernel.org/linux-block/CAHj4cs9wv3SdPo+N01Fw2SHBYDs9tj2M_e1-GdQOkRy=DsBB1w@mail.gmail.com/
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>
2026-02-02 08:09:29 -08:00
Daniel Hodges
0a1fc2f301 nvme-fabrics: use kfree_sensitive() for DHCHAP secrets
The DHCHAP secrets (dhchap_secret and dhchap_ctrl_secret) contain
authentication key material for NVMe-oF. Use kfree_sensitive() instead
of kfree() in nvmf_free_options() to ensure secrets are zeroed before
the memory is freed, preventing recovery from freed pages.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Daniel Hodges <hodgesd@meta.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>
2026-02-02 08:06:33 -08:00
Leon Romanovsky
0a4524bc69 xfrm: skip templates check for packet offload tunnel mode
In packet offload, hardware is responsible to check templates. The
result of its operation is forwarded through secpath by relevant
drivers. That secpath is actually removed in __xfrm_policy_check2().

In case packet is forwarded, this secpath is reset in RX, but pushed
again to TX where policy is rechecked again against dummy secpath
in xfrm_policy_ok().

Such situation causes to unexpected XfrmInTmplMismatch increase.

As a solution, simply skip template mismatch check.

Fixes: 600258d555 ("xfrm: delete intermediate secpath entry in packet offload mode")
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Reviewed-by: Jianbo Liu <jianbol@nvidia.com>
Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2026-02-02 08:15:38 +01:00
Jiayuan Chen
1799d8abea xfrm6: fix uninitialized saddr in xfrm6_get_saddr()
xfrm6_get_saddr() does not check the return value of
ipv6_dev_get_saddr(). When ipv6_dev_get_saddr() fails to find a suitable
source address (returns -EADDRNOTAVAIL), saddr->in6 is left
uninitialized, but xfrm6_get_saddr() still returns 0 (success).

This causes the caller xfrm_tmpl_resolve_one() to use the uninitialized
address in xfrm_state_find(), triggering KMSAN warning:

=====================================================
BUG: KMSAN: uninit-value in xfrm_state_find+0x2424/0xa940
 xfrm_state_find+0x2424/0xa940
 xfrm_resolve_and_create_bundle+0x906/0x5a20
 xfrm_lookup_with_ifid+0xcc0/0x3770
 xfrm_lookup_route+0x63/0x2b0
 ip_route_output_flow+0x1ce/0x270
 udp_sendmsg+0x2ce1/0x3400
 inet_sendmsg+0x1ef/0x2a0
 __sock_sendmsg+0x278/0x3d0
 __sys_sendto+0x593/0x720
 __x64_sys_sendto+0x130/0x200
 x64_sys_call+0x332b/0x3e70
 do_syscall_64+0xd3/0xf80
 entry_SYSCALL_64_after_hwframe+0x77/0x7f

Local variable tmp.i.i created at:
 xfrm_resolve_and_create_bundle+0x3e3/0x5a20
 xfrm_lookup_with_ifid+0xcc0/0x3770
=====================================================

Fix by checking the return value of ipv6_dev_get_saddr() and propagating
the error.

Fixes: a1e59abf82 ("[XFRM]: Fix wildcard as tunnel source")
Reported-by: syzbot+e136d86d34b42399a8b1@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/68bf1024.a70a0220.7a912.02c2.GAE@google.com/T/
Signed-off-by: Jiayuan Chen <jiayuan.chen@shopee.com>
Signed-off-by: Jiayuan Chen <jiayuan.chen@linux.dev>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2026-02-02 08:03:47 +01:00
John Garry
ac30cd3043 nvme: stop using AWUPF
As described at [0], much of the atomic write parts of the specification
are lacking.

For now, there is nothing which we can do in software about the lack of
a dedicated NVMe write atomic command.

As for reading the atomic write limits, it is felt that the per-namespace
values are mostly properly specified and it is assumed that they are
properly implemented.

The specification of NAWUPF is quite clear. However the specification of
NABSPF is less clear. The lack of clarity in NABSPF comes from deciding
whether NABSPF applies when NSABP is 0 - it is assumed that NSABPF does
not apply when NSABP is 0.

As for the per-controller AWUPF, how this value applies to shared
namespaces is missing in the specification. Furthermore, the value is in
terms of logical blocks, which is an NS entity.

Since AWUPF is so poorly defined, stop using it already together.
Hopefully this will force vendors to implement NAWUPF support always.

Note that AWUPF not only effects atomic write support, but also the
physical block size reported for the device.

To help users know this restriction, log an info message per NS.

[0] https://lore.kernel.org/linux-nvme/20250707141834.GA30198@lst.de/

Tested-by: Nilay Shroff <nilay@linux.ibm.com>
Reviewed-by: Nilay Shroff <nilay@linux.ibm.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: John Garry <john.g.garry@oracle.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>
2026-01-28 07:02:38 -08:00
Zilin Guan
5a741f8cc6 soc: microchip: mpfs: Fix memory leak in mpfs_sys_controller_probe()
In mpfs_sys_controller_probe(), if of_get_mtd_device_by_node() fails,
the function returns immediately without freeing the allocated memory
for sys_controller, leading to a memory leak.

Fix this by jumping to the out_free label to ensure the memory is
properly freed.

Also, consolidate the error handling for the mbox_request_channel()
failure case to use the same label.

Fixes: 742aa6c563 ("soc: microchip: mpfs: enable access to the system controller's flash")
Co-developed-by: Jianhao Xu <jianhao.xu@seu.edu.cn>
Signed-off-by: Jianhao Xu <jianhao.xu@seu.edu.cn>
Signed-off-by: Zilin Guan <zilin@seu.edu.cn>
Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
2026-01-15 19:15:19 +00:00
Maurizio Lombardi
ddfb8b322b nvme: expose active quirks in sysfs
Currently, there is no straightforward way for a user to inspect
which quirks are active for a given device from userspace.

Add a new "quirks" sysfs attribute to the nvme controller device.

Reading this file will display a human-readable list
of all active quirks, with each quirk name on a new line.
If no quirks are active, it will display "none".

Tested-by: John Meneghini <jmeneghi@redhat.com>
Reviewed-by: John Meneghini <jmeneghi@redhat.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Maurizio Lombardi <mlombard@redhat.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>
2026-01-13 13:53:03 -08:00
Wilfred Mallawa
f947d9e77b nvme/host: fixup some typos
Fix up some minor typos in the nvme host driver and a comment
style to conform to the standard kernel style.

Signed-off-by: Wilfred Mallawa <wilfred.mallawa@wdc.com>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
2026-01-13 13:44:52 -08:00
Sean Christopherson
da142f3d37 KVM: Remove subtle "struct kvm_stats_desc" pseudo-overlay
Remove KVM's internal pseudo-overlay of kvm_stats_desc, which subtly
aliases the flexible name[] in the uAPI definition with a fixed-size array
of the same name.  The unusual embedded structure results in compiler
warnings due to -Wflex-array-member-not-at-end, and also necessitates an
extra level of dereferencing in KVM.  To avoid the "overlay", define the
uAPI structure to have a fixed-size name when building for the kernel.

Opportunistically clean up the indentation for the stats macros, and
replace spaces with tabs.

No functional change intended.

Reported-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Closes: https://lore.kernel.org/all/aPfNKRpLfhmhYqfP@kspp
Acked-by: Marc Zyngier <maz@kernel.org>
Acked-by: Christian Borntraeger <borntraeger@linux.ibm.com>
[..]
Acked-by: Anup Patel <anup@brainfault.org>
Reviewed-by: Bibo Mao <maobibo@loongson.cn>
Acked-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Link: https://patch.msgid.link/20251205232655.445294-1-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
2026-01-08 10:40:48 -08:00
2138 changed files with 29799 additions and 13816 deletions

View File

@@ -210,10 +210,16 @@ Daniel Borkmann <daniel@iogearbox.net> <daniel.borkmann@tik.ee.ethz.ch>
Daniel Borkmann <daniel@iogearbox.net> <dborkmann@redhat.com>
Daniel Borkmann <daniel@iogearbox.net> <dborkman@redhat.com>
Daniel Borkmann <daniel@iogearbox.net> <dxchgb@gmail.com>
Daniel Lezcano <daniel.lezcano@kernel.org> <daniel.lezcano@linaro.org>
Daniel Lezcano <daniel.lezcano@kernel.org> <daniel.lezcano@free.fr>
Daniel Lezcano <daniel.lezcano@kernel.org> <daniel.lezcano@linexp.org>
Daniel Lezcano <daniel.lezcano@kernel.org> <dlezcano@fr.ibm.com>
Daniel Thompson <danielt@kernel.org> <daniel.thompson@linaro.org>
Daniele Alessandrelli <daniele.alessandrelli@gmail.com> <daniele.alessandrelli@intel.com>
Danilo Krummrich <dakr@kernel.org> <dakr@redhat.com>
David Brownell <david-b@pacbell.net>
David Collins <quic_collinsd@quicinc.com> <collinsd@codeaurora.org>
David Gow <david@davidgow.net> <davidgow@google.com>
David Heidelberg <david@ixit.cz> <d.okias@gmail.com>
David Hildenbrand <david@kernel.org> <david@redhat.com>
David Rheinsberg <david@readahead.eu> <dh.herrmann@gmail.com>
@@ -310,6 +316,7 @@ Hans Verkuil <hverkuil@kernel.org> <hverkuil-cisco@xs4all.nl>
Hans Verkuil <hverkuil@kernel.org> <hansverk@cisco.com>
Hao Ge <hao.ge@linux.dev> <gehao@kylinos.cn>
Harry Yoo <harry.yoo@oracle.com> <42.hyeyoo@gmail.com>
Harry Yoo <harry@kernel.org> <harry.yoo@oracle.com>
Heiko Carstens <hca@linux.ibm.com> <h.carstens@de.ibm.com>
Heiko Carstens <hca@linux.ibm.com> <heiko.carstens@de.ibm.com>
Heiko Stuebner <heiko@sntech.de> <heiko.stuebner@bqreaders.com>
@@ -321,6 +328,7 @@ Henrik Rydberg <rydberg@bitmath.org>
Herbert Xu <herbert@gondor.apana.org.au>
Huacai Chen <chenhuacai@kernel.org> <chenhc@lemote.com>
Huacai Chen <chenhuacai@kernel.org> <chenhuacai@loongson.cn>
Ignat Korchagin <ignat@linux.win> <ignat@cloudflare.com>
Ike Panhc <ikepanhc@gmail.com> <ike.pan@canonical.com>
J. Bruce Fields <bfields@fieldses.org> <bfields@redhat.com>
J. Bruce Fields <bfields@fieldses.org> <bfields@citi.umich.edu>
@@ -348,6 +356,7 @@ Jarkko Sakkinen <jarkko@kernel.org> <jarkko.sakkinen@opinsys.com>
Jason Gunthorpe <jgg@ziepe.ca> <jgg@mellanox.com>
Jason Gunthorpe <jgg@ziepe.ca> <jgg@nvidia.com>
Jason Gunthorpe <jgg@ziepe.ca> <jgunthorpe@obsidianresearch.com>
Jason Xing <kerneljasonxing@gmail.com> <kernelxing@tencent.com>
<javier@osg.samsung.com> <javier.martinez@collabora.co.uk>
Javi Merino <javi.merino@kernel.org> <javi.merino@arm.com>
Jayachandran C <c.jayachandran@gmail.com> <jayachandranc@netlogicmicro.com>
@@ -396,6 +405,7 @@ Jiri Slaby <jirislaby@kernel.org> <xslaby@fi.muni.cz>
Jisheng Zhang <jszhang@kernel.org> <jszhang@marvell.com>
Jisheng Zhang <jszhang@kernel.org> <Jisheng.Zhang@synaptics.com>
Jishnu Prakash <quic_jprakash@quicinc.com> <jprakash@codeaurora.org>
Joe Damato <joe@dama.to> <jdamato@fastly.com>
Joel Granados <joel.granados@kernel.org> <j.granados@samsung.com>
Johan Hovold <johan@kernel.org> <jhovold@gmail.com>
Johan Hovold <johan@kernel.org> <johan@hovoldconsulting.com>
@@ -490,7 +500,8 @@ Lior David <quic_liord@quicinc.com> <liord@codeaurora.org>
Loic Poulain <loic.poulain@oss.qualcomm.com> <loic.poulain@linaro.org>
Loic Poulain <loic.poulain@oss.qualcomm.com> <loic.poulain@intel.com>
Lorenzo Pieralisi <lpieralisi@kernel.org> <lorenzo.pieralisi@arm.com>
Lorenzo Stoakes <lorenzo.stoakes@oracle.com> <lstoakes@gmail.com>
Lorenzo Stoakes <ljs@kernel.org> <lstoakes@gmail.com>
Lorenzo Stoakes <ljs@kernel.org> <lorenzo.stoakes@oracle.com>
Luca Ceresoli <luca.ceresoli@bootlin.com> <luca@lucaceresoli.net>
Luca Weiss <luca@lucaweiss.eu> <luca@z3ntu.xyz>
Lucas De Marchi <demarchi@kernel.org> <lucas.demarchi@intel.com>
@@ -577,6 +588,7 @@ Morten Welinder <terra@gnome.org>
Morten Welinder <welinder@anemone.rentec.com>
Morten Welinder <welinder@darter.rentec.com>
Morten Welinder <welinder@troll.com>
Muhammad Usama Anjum <usama.anjum@arm.com> <usama.anjum@collabora.com>
Mukesh Ojha <quic_mojha@quicinc.com> <mojha@codeaurora.org>
Muna Sinada <quic_msinada@quicinc.com> <msinada@codeaurora.org>
Murali Nalajala <quic_mnalajal@quicinc.com> <mnalajal@codeaurora.org>
@@ -876,6 +888,7 @@ Vivien Didelot <vivien.didelot@gmail.com> <vivien.didelot@savoirfairelinux.com>
Vlad Dogaru <ddvlad@gmail.com> <vlad.dogaru@intel.com>
Vladimir Davydov <vdavydov.dev@gmail.com> <vdavydov@parallels.com>
Vladimir Davydov <vdavydov.dev@gmail.com> <vdavydov@virtuozzo.com>
Vlastimil Babka <vbabka@kernel.org> <vbabka@suse.cz>
WangYuli <wangyuli@aosc.io> <wangyl5933@chinaunicom.cn>
WangYuli <wangyuli@aosc.io> <wangyuli@deepin.org>
Weiwen Hu <huweiwen@linux.alibaba.com> <sehuww@mail.scut.edu.cn>
@@ -890,7 +903,8 @@ Yanteng Si <si.yanteng@linux.dev> <siyanteng@loongson.cn>
Ying Huang <huang.ying.caritas@gmail.com> <ying.huang@intel.com>
Yixun Lan <dlan@kernel.org> <dlan@gentoo.org>
Yixun Lan <dlan@kernel.org> <yixun.lan@amlogic.com>
Yosry Ahmed <yosry.ahmed@linux.dev> <yosryahmed@google.com>
Yosry Ahmed <yosry@kernel.org> <yosryahmed@google.com>
Yosry Ahmed <yosry@kernel.org> <yosry.ahmed@linux.dev>
Yu-Chun Lin <eleanor.lin@realtek.com> <eleanor15x@gmail.com>
Yusuke Goda <goda.yusuke@renesas.com>
Zack Rusin <zack.rusin@broadcom.com> <zackr@vmware.com>

View File

@@ -1242,6 +1242,10 @@ N: Veaceslav Falico
E: vfalico@gmail.com
D: Co-maintainer and co-author of the network bonding driver.
N: Thomas Falcon
E: tlfalcon@linux.ibm.com
D: Initial author of the IBM ibmvnic network driver
N: János Farkas
E: chexum@shadow.banki.hu
D: romfs, various (mostly networking) fixes
@@ -2415,6 +2419,10 @@ S: Am Muehlenweg 38
S: D53424 Remagen
S: Germany
N: Jonathan Lemon
E: jonathan.lemon@gmail.com
D: OpenCompute PTP clock driver (ptp_ocp)
N: Colin Leroy
E: colin@colino.net
W: http://www.geekounet.org/

View File

@@ -151,11 +151,11 @@ Description:
The algorithm_params file is write-only and is used to setup
compression algorithm parameters.
What: /sys/block/zram<id>/writeback_compressed
What: /sys/block/zram<id>/compressed_writeback
Date: Decemeber 2025
Contact: Richard Chang <richardycc@google.com>
Description:
The writeback_compressed device atrribute toggles compressed
The compressed_writeback device atrribute toggles compressed
writeback feature.
What: /sys/block/zram<id>/writeback_batch_size

View File

@@ -1,4 +1,4 @@
What: /sys/bus/platform/devices/INOU0000:XX/fn_lock_toggle_enable
What: /sys/bus/platform/devices/INOU0000:XX/fn_lock
Date: November 2025
KernelVersion: 6.19
Contact: Armin Wolf <W_Armin@gmx.de>
@@ -8,15 +8,15 @@ Description:
Reading this file returns the current enable status of the FN lock functionality.
What: /sys/bus/platform/devices/INOU0000:XX/super_key_toggle_enable
What: /sys/bus/platform/devices/INOU0000:XX/super_key_enable
Date: November 2025
KernelVersion: 6.19
Contact: Armin Wolf <W_Armin@gmx.de>
Description:
Allows userspace applications to enable/disable the super key functionality
of the integrated keyboard by writing "1"/"0" into this file.
Allows userspace applications to enable/disable the super key of the integrated
keyboard by writing "1"/"0" into this file.
Reading this file returns the current enable status of the super key functionality.
Reading this file returns the current enable status of the super key.
What: /sys/bus/platform/devices/INOU0000:XX/touchpad_toggle_enable
Date: November 2025

View File

@@ -85,6 +85,16 @@ In the example, 'Requester ID' means the ID of the device that sent
the error message to the Root Port. Please refer to PCIe specs for other
fields.
The 'TLP Header' is the prefix/header of the TLP that caused the error
in raw hex format. To decode the TLP Header into human-readable form
one may use tlp-tool:
https://github.com/mmpg-x86/tlp-tool
Example usage::
curl -L https://git.kernel.org/linus/2ca1c94ce0b6 | rtlp-tool --aer
AER Ratelimits
--------------

View File

@@ -216,7 +216,7 @@ writeback_limit WO specifies the maximum amount of write IO zram
writeback_limit_enable RW show and set writeback_limit feature
writeback_batch_size RW show and set maximum number of in-flight
writeback operations
writeback_compressed RW show and set compressed writeback feature
compressed_writeback RW show and set compressed writeback feature
comp_algorithm RW show and change the compression algorithm
algorithm_params WO setup compression algorithm parameters
compact WO trigger memory compaction
@@ -439,11 +439,11 @@ budget in next setting is user's job.
By default zram stores written back pages in decompressed (raw) form, which
means that writeback operation involves decompression of the page before
writing it to the backing device. This behavior can be changed by enabling
`writeback_compressed` feature, which causes zram to write compressed pages
`compressed_writeback` feature, which causes zram to write compressed pages
to the backing device, thus avoiding decompression overhead. To enable
this feature, execute::
$ echo yes > /sys/block/zramX/writeback_compressed
$ echo yes > /sys/block/zramX/compressed_writeback
Note that this feature should be configured before the `zramX` device is
initialized.

View File

@@ -74,6 +74,7 @@
TPM TPM drivers are enabled.
UMS USB Mass Storage support is enabled.
USB USB support is enabled.
NVME NVMe support is enabled
USBHID USB Human Interface Device support is enabled.
V4L Video For Linux support is enabled.
VGA The VGA console has been enabled.
@@ -4787,6 +4788,18 @@ Kernel parameters
This can be set from sysctl after boot.
See Documentation/admin-guide/sysctl/vm.rst for details.
nvme.quirks= [NVME] A list of quirk entries to augment the built-in
nvme quirk list. List entries are separated by a
'-' character.
Each entry has the form VendorID:ProductID:quirk_names.
The IDs are 4-digits hex numbers and quirk_names is a
list of quirk names separated by commas. A quirk name
can be prefixed by '^', meaning that the specified
quirk must be disabled.
Example:
nvme.quirks=7710:2267:bogus_nid,^identify_cns-9900:7711:broken_msi
ohci1394_dma=early [HW,EARLY] enable debugging via the ohci1394 driver.
See Documentation/core-api/debugging-via-ohci1394.rst for more
info.
@@ -8183,6 +8196,9 @@ Kernel parameters
p = USB_QUIRK_SHORT_SET_ADDRESS_REQ_TIMEOUT
(Reduce timeout of the SET_ADDRESS
request from 5000 ms to 500 ms);
q = USB_QUIRK_FORCE_ONE_CONFIG (Device
claims zero configurations,
forcing to 1);
Example: quirks=0781:5580:bk,0a5c:5834:gij
usbhid.mousepoll=

View File

@@ -24,7 +24,7 @@ Keyboard settings
The ``uniwill-laptop`` driver allows the user to enable/disable:
- the FN and super key lock functionality of the integrated keyboard
- the FN lock and super key of the integrated keyboard
- the touchpad toggle functionality of the integrated touchpad
See Documentation/ABI/testing/sysfs-driver-uniwill-laptop for details.

View File

@@ -594,6 +594,9 @@ Values:
their sockets will only be able to connect within their own
namespace.
The first write to ``child_ns_mode`` locks its value. Subsequent writes of the
same value succeed, but writing a different value returns ``-EBUSY``.
Changing ``child_ns_mode`` only affects namespaces created after the change;
it does not modify the current namespace or any existing children.

View File

@@ -149,11 +149,33 @@ For architectures that require cache flushing for DMA coherence
DMA_ATTR_MMIO will not perform any cache flushing. The address
provided must never be mapped cacheable into the CPU.
DMA_ATTR_CPU_CACHE_CLEAN
------------------------
DMA_ATTR_DEBUGGING_IGNORE_CACHELINES
------------------------------------
This attribute indicates the CPU will not dirty any cacheline overlapping this
DMA_FROM_DEVICE/DMA_BIDIRECTIONAL buffer while it is mapped. This allows
multiple small buffers to safely share a cacheline without risk of data
corruption, suppressing DMA debug warnings about overlapping mappings.
All mappings sharing a cacheline should have this attribute.
This attribute indicates that CPU cache lines may overlap for buffers mapped
with DMA_FROM_DEVICE or DMA_BIDIRECTIONAL.
Such overlap may occur when callers map multiple small buffers that reside
within the same cache line. In this case, callers must guarantee that the CPU
will not dirty these cache lines after the mappings are established. When this
condition is met, multiple buffers can safely share a cache line without risking
data corruption.
All mappings that share a cache line must set this attribute to suppress DMA
debug warnings about overlapping mappings.
DMA_ATTR_REQUIRE_COHERENT
-------------------------
DMA mapping requests with the DMA_ATTR_REQUIRE_COHERENT fail on any
system where SWIOTLB or cache management is required. This should only
be used to support uAPI designs that require continuous HW DMA
coherence with userspace processes, for example RDMA and DRM. At a
minimum the memory being mapped must be userspace memory from
pin_user_pages() or similar.
Drivers should consider using dma_mmap_pages() instead of this
interface when building their uAPIs, when possible.
It must never be used in an in-kernel driver that only works with
kernel memory.

View File

@@ -336,6 +336,8 @@ command line arguments:
- ``--list_tests_attr``: If set, lists all tests that will be run and all of their
attributes.
- ``--list_suites``: If set, lists all suites that will be run.
Command-line completion
==============================

View File

@@ -66,7 +66,7 @@ then:
required:
- refresh-rate-hz
additionalProperties: false
unevaluatedProperties: false
examples:
- |

View File

@@ -253,7 +253,6 @@ allOf:
enum:
# these platforms support 2 streams MST on some interfaces,
# others are SST only
- qcom,glymur-dp
- qcom,sc8280xp-dp
- qcom,x1e80100-dp
then:
@@ -310,6 +309,26 @@ allOf:
minItems: 6
maxItems: 8
- if:
properties:
compatible:
contains:
enum:
# these platforms support 2 streams MST on some interfaces,
# others are SST only, but all controllers have 4 ports
- qcom,glymur-dp
then:
properties:
reg:
minItems: 9
maxItems: 9
clocks:
minItems: 5
maxItems: 6
clocks-names:
minItems: 5
maxItems: 6
unevaluatedProperties: false
examples:

View File

@@ -176,13 +176,17 @@ examples:
};
};
displayport-controller@ae90000 {
displayport-controller@af54000 {
compatible = "qcom,glymur-dp";
reg = <0xae90000 0x200>,
<0xae90200 0x200>,
<0xae90400 0x600>,
<0xae91000 0x400>,
<0xae91400 0x400>;
reg = <0xaf54000 0x200>,
<0xaf54200 0x200>,
<0xaf55000 0xc00>,
<0xaf56000 0x400>,
<0xaf57000 0x400>,
<0xaf58000 0x400>,
<0xaf59000 0x400>,
<0xaf5a000 0x600>,
<0xaf5b000 0x600>;
interrupt-parent = <&mdss>;
interrupts = <12>;

View File

@@ -10,7 +10,7 @@ maintainers:
- Krzysztof Kozlowski <krzk@kernel.org>
description:
SM8650 MSM Mobile Display Subsystem(MDSS), which encapsulates sub-blocks like
SM8750 MSM Mobile Display Subsystem(MDSS), which encapsulates sub-blocks like
DPU display controller, DSI and DP interfaces etc.
$ref: /schemas/display/msm/mdss-common.yaml#

View File

@@ -16,7 +16,6 @@ description: |
properties:
compatible:
enum:
- kontron,sa67mcu-hwmon
- kontron,sl28cpld-fan
reg:

View File

@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Synopsys DesignWare APB I2C Controller
maintainers:
- Jarkko Nikula <jarkko.nikula@linux.intel.com>
- Mika Westerberg <mika.westerberg@linux.intel.com>
allOf:
- $ref: /schemas/i2c/i2c-controller.yaml#

View File

@@ -19,9 +19,6 @@ description:
Flash sub nodes describe the memory range and optional per-flash
properties.
allOf:
- $ref: mtd.yaml#
properties:
compatible:
const: st,spear600-smi
@@ -42,14 +39,29 @@ properties:
$ref: /schemas/types.yaml#/definitions/uint32
description: Functional clock rate of the SMI controller in Hz.
st,smi-fast-mode:
type: boolean
description: Indicates that the attached flash supports fast read mode.
patternProperties:
"^flash@.*$":
$ref: /schemas/mtd/mtd.yaml#
properties:
reg:
maxItems: 1
st,smi-fast-mode:
type: boolean
description: Indicates that the attached flash supports fast read mode.
unevaluatedProperties: false
required:
- reg
required:
- compatible
- reg
- clock-rate
- "#address-cells"
- "#size-cells"
unevaluatedProperties: false
@@ -64,7 +76,7 @@ examples:
interrupts = <12>;
clock-rate = <50000000>; /* 50 MHz */
flash@f8000000 {
flash@fc000000 {
reg = <0xfc000000 0x1000>;
st,smi-fast-mode;
};

View File

@@ -87,6 +87,7 @@ required:
allOf:
- $ref: can-controller.yaml#
- $ref: /schemas/memory-controllers/mc-peripheral-props.yaml
- if:
properties:
compatible:

View File

@@ -42,7 +42,7 @@ properties:
- const: mgbe
- const: mac
- const: mac-divider
- const: ptp-ref
- const: ptp_ref
- const: rx-input-m
- const: rx-input
- const: tx
@@ -133,7 +133,7 @@ examples:
<&bpmp TEGRA234_CLK_MGBE0_RX_PCS_M>,
<&bpmp TEGRA234_CLK_MGBE0_RX_PCS>,
<&bpmp TEGRA234_CLK_MGBE0_TX_PCS>;
clock-names = "mgbe", "mac", "mac-divider", "ptp-ref", "rx-input-m",
clock-names = "mgbe", "mac", "mac-divider", "ptp_ref", "rx-input-m",
"rx-input", "tx", "eee-pcs", "rx-pcs-input", "rx-pcs-m",
"rx-pcs", "tx-pcs";
resets = <&bpmp TEGRA234_RESET_MGBE0_MAC>,

View File

@@ -0,0 +1,93 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/powerpc/fsl/fsl,mpc83xx.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Freescale PowerQUICC II Pro (MPC83xx) platforms
maintainers:
- J. Neuschäfer <j.ne@posteo.net>
properties:
$nodename:
const: '/'
compatible:
oneOf:
- description: MPC83xx Reference Design Boards
items:
- enum:
- fsl,mpc8308rdb
- fsl,mpc8315erdb
- fsl,mpc8360rdk
- fsl,mpc8377rdb
- fsl,mpc8377wlan
- fsl,mpc8378rdb
- fsl,mpc8379rdb
- description: MPC8313E Reference Design Board
items:
- const: MPC8313ERDB
- const: MPC831xRDB
- const: MPC83xxRDB
- description: MPC8323E Reference Design Board
items:
- const: MPC8323ERDB
- const: MPC832xRDB
- const: MPC83xxRDB
- description: MPC8349E-mITX(-GP) Reference Design Platform
items:
- enum:
- MPC8349EMITX
- MPC8349EMITXGP
- const: MPC834xMITX
- const: MPC83xxMITX
- description: Keymile KMETER1 board
const: keymile,KMETER1
- description: MPC8308 P1M board
const: denx,mpc8308_p1m
patternProperties:
"^soc@.*$":
type: object
properties:
compatible:
oneOf:
- items:
- enum:
- fsl,mpc8315-immr
- fsl,mpc8308-immr
- const: simple-bus
- items:
- const: fsl,mpc8360-immr
- const: fsl,immr
- const: fsl,soc
- const: simple-bus
- const: simple-bus
additionalProperties: true
examples:
- |
/ {
compatible = "fsl,mpc8315erdb";
model = "MPC8315E-RDB";
#address-cells = <1>;
#size-cells = <1>;
soc@e0000000 {
compatible = "fsl,mpc8315-immr", "simple-bus";
reg = <0xe0000000 0x00000200>;
#address-cells = <1>;
#size-cells = <1>;
device_type = "soc";
ranges = <0 0xe0000000 0x00100000>;
bus-frequency = <0>;
};
};
...

View File

@@ -287,7 +287,7 @@ examples:
regulator-max-microvolt = <1700000>;
};
mt6359_vrfck_1_ldo_reg: ldo_vrfck_1 {
regulator-name = "vrfck";
regulator-name = "vrfck_1";
regulator-min-microvolt = <1240000>;
regulator-max-microvolt = <1600000>;
};
@@ -309,7 +309,7 @@ examples:
regulator-max-microvolt = <3300000>;
};
mt6359_vemc_1_ldo_reg: ldo_vemc_1 {
regulator-name = "vemc";
regulator-name = "vemc_1";
regulator-min-microvolt = <2500000>;
regulator-max-microvolt = <3300000>;
};

View File

@@ -168,7 +168,7 @@ properties:
offset from voltage set to regulator.
regulator-uv-protection-microvolt:
description: Set over under voltage protection limit. This is a limit where
description: Set under voltage protection limit. This is a limit where
hardware performs emergency shutdown. Zero can be passed to disable
protection and value '1' indicates that protection should be enabled but
limit setting can be omitted. Limit is given as microvolt offset from
@@ -182,7 +182,7 @@ properties:
is given as microvolt offset from voltage set to regulator.
regulator-uv-warn-microvolt:
description: Set over under voltage warning limit. This is a limit where
description: Set under voltage warning limit. This is a limit where
hardware is assumed still to be functional but approaching limit where
it gets damaged. Recovery actions should be initiated. Zero can be passed
to disable detection and value '1' indicates that detection should

View File

@@ -23,6 +23,7 @@ properties:
enum:
- nvidia,tegra210-audio-graph-card
- nvidia,tegra186-audio-graph-card
- nvidia,tegra238-audio-graph-card
- nvidia,tegra264-audio-graph-card
clocks:

View File

@@ -20,6 +20,7 @@ properties:
- renesas,r9a07g044-ssi # RZ/G2{L,LC}
- renesas,r9a07g054-ssi # RZ/V2L
- renesas,r9a08g045-ssi # RZ/G3S
- renesas,r9a08g046-ssi # RZ/G3L
- const: renesas,rz-ssi
reg:

View File

@@ -33,6 +33,7 @@ properties:
- const: rockchip,rk3066-spdif
- items:
- enum:
- rockchip,rk3576-spdif
- rockchip,rk3588-spdif
- const: rockchip,rk3568-spdif

View File

@@ -164,7 +164,7 @@ allOf:
properties:
compatible:
contains:
const: st,stm32mph7-sai
const: st,stm32h7-sai
then:
properties:
clocks:

View File

@@ -6,9 +6,6 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Allwinner A31 SPI Controller
allOf:
- $ref: spi-controller.yaml
maintainers:
- Chen-Yu Tsai <wens@csie.org>
- Maxime Ripard <mripard@kernel.org>
@@ -82,11 +79,11 @@ patternProperties:
spi-rx-bus-width:
items:
- const: 1
enum: [0, 1, 2, 4]
spi-tx-bus-width:
items:
- const: 1
enum: [0, 1, 2, 4]
required:
- compatible
@@ -95,6 +92,28 @@ required:
- clocks
- clock-names
allOf:
- $ref: spi-controller.yaml
- if:
not:
properties:
compatible:
contains:
enum:
- allwinner,sun50i-r329-spi
- allwinner,sun55i-a523-spi
then:
patternProperties:
"^.*@[0-9a-f]+":
properties:
spi-rx-bus-width:
items:
enum: [0, 1]
spi-tx-bus-width:
items:
enum: [0, 1]
unevaluatedProperties: false
examples:

View File

@@ -22,21 +22,6 @@ allOf:
properties:
reg:
minItems: 2
- if:
properties:
compatible:
contains:
enum:
- baikal,bt1-sys-ssi
then:
properties:
mux-controls:
maxItems: 1
required:
- mux-controls
else:
required:
- interrupts
- if:
properties:
compatible:
@@ -75,10 +60,6 @@ properties:
const: intel,mountevans-imc-ssi
- description: AMD Pensando Elba SoC SPI Controller
const: amd,pensando-elba-spi
- description: Baikal-T1 SPI Controller
const: baikal,bt1-ssi
- description: Baikal-T1 System Boot SPI Controller
const: baikal,bt1-sys-ssi
- description: Canaan Kendryte K210 SoS SPI Controller
const: canaan,k210-spi
- description: Renesas RZ/N1 SPI Controller
@@ -170,6 +151,7 @@ required:
- "#address-cells"
- "#size-cells"
- clocks
- interrupts
examples:
- |
@@ -190,15 +172,4 @@ examples:
rx-sample-delay-ns = <7>;
};
};
- |
spi@1f040100 {
compatible = "baikal,bt1-sys-ssi";
reg = <0x1f040100 0x900>,
<0x1c000000 0x1000000>;
#address-cells = <1>;
#size-cells = <0>;
mux-controls = <&boot_mux>;
clocks = <&ccu_sys>;
clock-names = "ssi_clk";
};
...

View File

@@ -99,3 +99,51 @@ of the driver is decremented. All symlinks between the two are removed.
When a driver is removed, the list of devices that it supports is
iterated over, and the driver's remove callback is called for each
one. The device is removed from that list and the symlinks removed.
Driver Override
~~~~~~~~~~~~~~~
Userspace may override the standard matching by writing a driver name to
a device's ``driver_override`` sysfs attribute. When set, only a driver
whose name matches the override will be considered during binding. This
bypasses all bus-specific matching (OF, ACPI, ID tables, etc.).
The override may be cleared by writing an empty string, which returns
the device to standard matching rules. Writing to ``driver_override``
does not automatically unbind the device from its current driver or
make any attempt to load the specified driver.
Buses opt into this mechanism by setting the ``driver_override`` flag in
their ``struct bus_type``::
const struct bus_type example_bus_type = {
...
.driver_override = true,
};
When the flag is set, the driver core automatically creates the
``driver_override`` sysfs attribute for every device on that bus.
The bus's ``match()`` callback should check the override before performing
its own matching, using ``device_match_driver_override()``::
static int example_match(struct device *dev, const struct device_driver *drv)
{
int ret;
ret = device_match_driver_override(dev, drv);
if (ret >= 0)
return ret;
/* Fall through to bus-specific matching... */
}
``device_match_driver_override()`` returns > 0 if the override matches
the given driver, 0 if the override is set but does not match, or < 0 if
no override is set at all.
Additional helpers are available:
- ``device_set_driver_override()`` - set or clear the override from kernel code.
- ``device_has_driver_override()`` - check whether an override is set.

View File

@@ -783,6 +783,56 @@ controlled by the "uuid" mount option, which supports these values:
mounted with "uuid=on".
Durability and copy up
----------------------
The fsync(2) system call ensures that the data and metadata of a file
are safely written to the backing storage, which is expected to
guarantee the existence of the information post system crash.
Without an fsync(2) call, there is no guarantee that the observed
data after a system crash will be either the old or the new data, but
in practice, the observed data after crash is often the old or new data
or a mix of both.
When an overlayfs file is modified for the first time, copy up will
create a copy of the lower file and its parent directories in the upper
layer. Since the Linux filesystem API does not enforce any particular
ordering on storing changes without explicit fsync(2) calls, in case
of a system crash, the upper file could end up with no data at all
(i.e. zeros), which would be an unusual outcome. To avoid this
experience, overlayfs calls fsync(2) on the upper file before completing
data copy up with rename(2) or link(2) to make the copy up "atomic".
By default, overlayfs does not explicitly call fsync(2) on copied up
directories or on metadata-only copy up, so it provides no guarantee to
persist the user's modification unless the user calls fsync(2).
The fsync during copy up only guarantees that if a copy up is observed
after a crash, the observed data is not zeroes or intermediate values
from the copy up staging area.
On traditional local filesystems with a single journal (e.g. ext4, xfs),
fsync on a file also persists the parent directory changes, because they
are usually modified in the same transaction, so metadata durability during
data copy up effectively comes for free. Overlayfs further limits risk by
disallowing network filesystems as upper layer.
Overlayfs can be tuned to prefer performance or durability when storing
to the underlying upper layer. This is controlled by the "fsync" mount
option, which supports these values:
- "auto": (default)
Call fsync(2) on upper file before completion of data copy up.
No explicit fsync(2) on directory or metadata-only copy up.
- "strict":
Call fsync(2) on upper file and directories before completion of any
copy up.
- "volatile": [*]
Prefer performance over durability (see `Volatile mount`_)
[*] The mount option "volatile" is an alias to "fsync=volatile".
Volatile mount
--------------

View File

@@ -27,10 +27,10 @@ for details.
Sysfs entries
-------------
The following attributes are supported. Current maxim attribute
The following attributes are supported. Current maximum attribute
is read-write, all other attributes are read-only.
in0_input Measured voltage in microvolts.
in0_input Measured voltage in millivolts.
curr1_input Measured current in microamperes.
curr1_max_alarm Overcurrent alarm in microamperes.
curr1_input Measured current in milliamperes.
curr1_max Overcurrent shutdown threshold in milliamperes.

View File

@@ -57,7 +57,7 @@ Supported chips:
- https://ww1.microchip.com/downloads/en/DeviceDoc/EMC1438%20DS%20Rev.%201.0%20(04-29-10).pdf
Author:
Kalhan Trisal <kalhan.trisal@intel.com
Kalhan Trisal <kalhan.trisal@intel.com>
Description

View File

@@ -220,7 +220,6 @@ Hardware Monitoring Kernel Drivers
q54sj108a2
qnap-mcu-hwmon
raspberrypi-hwmon
sa67
sbrmi
sbtsi_temp
sch5627

View File

@@ -51,8 +51,9 @@ temp1_max Provides thermal control temperature of the CPU package
temp1_crit Provides shutdown temperature of the CPU package which
is also known as the maximum processor junction
temperature, Tjmax or Tprochot.
temp1_crit_hyst Provides the hysteresis value from Tcontrol to Tjmax of
the CPU package.
temp1_crit_hyst Provides the hysteresis temperature of the CPU
package. Returns Tcontrol, the temperature at which
the critical condition clears.
temp2_label "DTS"
temp2_input Provides current temperature of the CPU package scaled
@@ -62,8 +63,9 @@ temp2_max Provides thermal control temperature of the CPU package
temp2_crit Provides shutdown temperature of the CPU package which
is also known as the maximum processor junction
temperature, Tjmax or Tprochot.
temp2_crit_hyst Provides the hysteresis value from Tcontrol to Tjmax of
the CPU package.
temp2_crit_hyst Provides the hysteresis temperature of the CPU
package. Returns Tcontrol, the temperature at which
the critical condition clears.
temp3_label "Tcontrol"
temp3_input Provides current Tcontrol temperature of the CPU

View File

@@ -1,41 +0,0 @@
.. SPDX-License-Identifier: GPL-2.0-only
Kernel driver sa67mcu
=====================
Supported chips:
* Kontron sa67mcu
Prefix: 'sa67mcu'
Datasheet: not available
Authors: Michael Walle <mwalle@kernel.org>
Description
-----------
The sa67mcu is a board management controller which also exposes a hardware
monitoring controller.
The controller has two voltage and one temperature sensor. The values are
hold in two 8 bit registers to form one 16 bit value. Reading the lower byte
will also capture the high byte to make the access atomic. The unit of the
volatge sensors are 1mV and the unit of the temperature sensor is 0.1degC.
Sysfs entries
-------------
The following attributes are supported.
======================= ========================================================
in0_label "VDDIN"
in0_input Measured VDDIN voltage.
in1_label "VDD_RTC"
in1_input Measured VDD_RTC voltage.
temp1_input MCU temperature. Roughly the board temperature.
======================= ========================================================

View File

@@ -247,8 +247,8 @@ operations:
flags: [admin-perm]
do:
pre: net-shaper-nl-pre-doit
post: net-shaper-nl-post-doit
pre: net-shaper-nl-pre-doit-write
post: net-shaper-nl-post-doit-write
request:
attributes:
- ifindex
@@ -278,8 +278,8 @@ operations:
flags: [admin-perm]
do:
pre: net-shaper-nl-pre-doit
post: net-shaper-nl-post-doit
pre: net-shaper-nl-pre-doit-write
post: net-shaper-nl-post-doit-write
request:
attributes: *ns-binding
@@ -309,8 +309,8 @@ operations:
flags: [admin-perm]
do:
pre: net-shaper-nl-pre-doit
post: net-shaper-nl-post-doit
pre: net-shaper-nl-pre-doit-write
post: net-shaper-nl-post-doit-write
request:
attributes:
- ifindex

View File

@@ -152,7 +152,7 @@ operations:
- compound-ops
-
name: threads-set
doc: set the number of running threads
doc: set the maximum number of running threads
attribute-set: server
flags: [admin-perm]
do:
@@ -165,7 +165,7 @@ operations:
- min-threads
-
name: threads-get
doc: get the number of running threads
doc: get the maximum number of running threads
attribute-set: server
do:
reply:

View File

@@ -43,7 +43,6 @@ options should be enabled to use sched_ext:
CONFIG_DEBUG_INFO_BTF=y
CONFIG_BPF_JIT_ALWAYS_ON=y
CONFIG_BPF_JIT_DEFAULT_ON=y
CONFIG_PAHOLE_HAS_BTF_TAG=y
sched_ext is used only when the BPF scheduler is loaded and running.
@@ -58,7 +57,8 @@ in ``ops->flags``, all ``SCHED_NORMAL``, ``SCHED_BATCH``, ``SCHED_IDLE``, and
However, when the BPF scheduler is loaded and ``SCX_OPS_SWITCH_PARTIAL`` is
set in ``ops->flags``, only tasks with the ``SCHED_EXT`` policy are scheduled
by sched_ext, while tasks with ``SCHED_NORMAL``, ``SCHED_BATCH`` and
``SCHED_IDLE`` policies are scheduled by the fair-class scheduler.
``SCHED_IDLE`` policies are scheduled by the fair-class scheduler which has
higher sched_class precedence than ``SCHED_EXT``.
Terminating the sched_ext scheduler program, triggering `SysRq-S`, or
detection of any internal error including stalled runnable tasks aborts the
@@ -345,6 +345,8 @@ Where to Look
The functions prefixed with ``scx_bpf_`` can be called from the BPF
scheduler.
* ``kernel/sched/ext_idle.c`` contains the built-in idle CPU selection policy.
* ``tools/sched_ext/`` hosts example BPF scheduler implementations.
* ``scx_simple[.bpf].c``: Minimal global FIFO scheduler example using a
@@ -353,13 +355,35 @@ Where to Look
* ``scx_qmap[.bpf].c``: A multi-level FIFO scheduler supporting five
levels of priority implemented with ``BPF_MAP_TYPE_QUEUE``.
* ``scx_central[.bpf].c``: A central FIFO scheduler where all scheduling
decisions are made on one CPU, demonstrating ``LOCAL_ON`` dispatching,
tickless operation, and kthread preemption.
* ``scx_cpu0[.bpf].c``: A scheduler that queues all tasks to a shared DSQ
and only dispatches them on CPU0 in FIFO order. Useful for testing bypass
behavior.
* ``scx_flatcg[.bpf].c``: A flattened cgroup hierarchy scheduler
implementing hierarchical weight-based cgroup CPU control by compounding
each cgroup's share at every level into a single flat scheduling layer.
* ``scx_pair[.bpf].c``: A core-scheduling example that always makes
sibling CPU pairs execute tasks from the same CPU cgroup.
* ``scx_sdt[.bpf].c``: A variation of ``scx_simple`` demonstrating BPF
arena memory management for per-task data.
* ``scx_userland[.bpf].c``: A minimal scheduler demonstrating user space
scheduling. Tasks with CPU affinity are direct-dispatched in FIFO order;
all others are scheduled in user space by a simple vruntime scheduler.
ABI Instability
===============
The APIs provided by sched_ext to BPF schedulers programs have no stability
guarantees. This includes the ops table callbacks and constants defined in
``include/linux/sched/ext.h``, as well as the ``scx_bpf_`` kfuncs defined in
``kernel/sched/ext.c``.
``kernel/sched/ext.c`` and ``kernel/sched/ext_idle.c``.
While we will attempt to provide a relatively stable API surface when
possible, they are subject to change without warning between kernel

View File

@@ -2372,6 +2372,10 @@ quirk_flags
audible volume
* bit 25: ``mixer_capture_min_mute``
Similar to bit 24 but for capture streams
* bit 26: ``skip_iface_setup``
Skip the probe-time interface setup (usb_set_interface,
init_pitch, init_sample_rate); redundant with
snd_usb_endpoint_prepare() at stream-open time
This module supports multiple devices, autoprobe and hotplugging.

View File

@@ -8,7 +8,7 @@ Landlock: unprivileged access control
=====================================
:Author: Mickaël Salaün
:Date: January 2026
:Date: March 2026
The goal of Landlock is to enable restriction of ambient rights (e.g. global
filesystem or network access) for a set of processes. Because Landlock
@@ -197,12 +197,27 @@ similar backwards compatibility check is needed for the restrict flags
.. code-block:: c
__u32 restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON;
if (abi < 7) {
/* Clear logging flags unsupported before ABI 7. */
__u32 restrict_flags =
LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON |
LANDLOCK_RESTRICT_SELF_TSYNC;
switch (abi) {
case 1 ... 6:
/* Removes logging flags for ABI < 7 */
restrict_flags &= ~(LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF |
LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON |
LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF);
__attribute__((fallthrough));
case 7:
/*
* Removes multithreaded enforcement flag for ABI < 8
*
* WARNING: Without this flag, calling landlock_restrict_self(2) is
* only equivalent if the calling process is single-threaded. Below
* ABI v8 (and as of ABI v8, when not using this flag), a Landlock
* policy would only be enforced for the calling thread and its
* children (and not for all threads, including parents and siblings).
*/
restrict_flags &= ~LANDLOCK_RESTRICT_SELF_TSYNC;
}
The next step is to restrict the current thread from gaining more privileges

View File

@@ -1396,7 +1396,10 @@ or its flags may be modified, but it may not be resized.
Memory for the region is taken starting at the address denoted by the
field userspace_addr, which must point at user addressable memory for
the entire memory slot size. Any object may back this memory, including
anonymous memory, ordinary files, and hugetlbfs.
anonymous memory, ordinary files, and hugetlbfs. Changes in the backing
of the memory region are automatically reflected into the guest.
For example, an mmap() that affects the region will be made visible
immediately. Another example is madvise(MADV_DROP).
On architectures that support a form of address tagging, userspace_addr must
be an untagged address.
@@ -1412,11 +1415,6 @@ use it. The latter can be set, if KVM_CAP_READONLY_MEM capability allows it,
to make a new slot read-only. In this case, writes to this memory will be
posted to userspace as KVM_EXIT_MMIO exits.
When the KVM_CAP_SYNC_MMU capability is available, changes in the backing of
the memory region are automatically reflected into the guest. For example, an
mmap() that affects the region will be made visible immediately. Another
example is madvise(MADV_DROP).
For TDX guest, deleting/moving memory region loses guest memory contents.
Read only region isn't supported. Only as-id 0 is supported.
@@ -8437,115 +8435,123 @@ KVM_CHECK_EXTENSION.
The valid bits in cap.args[0] are:
=================================== ============================================
KVM_X86_QUIRK_LINT0_REENABLED By default, the reset value for the LVT
LINT0 register is 0x700 (APIC_MODE_EXTINT).
When this quirk is disabled, the reset value
is 0x10000 (APIC_LVT_MASKED).
======================================== ================================================
KVM_X86_QUIRK_LINT0_REENABLED By default, the reset value for the LVT
LINT0 register is 0x700 (APIC_MODE_EXTINT).
When this quirk is disabled, the reset value
is 0x10000 (APIC_LVT_MASKED).
KVM_X86_QUIRK_CD_NW_CLEARED By default, KVM clears CR0.CD and CR0.NW on
AMD CPUs to workaround buggy guest firmware
that runs in perpetuity with CR0.CD, i.e.
with caches in "no fill" mode.
KVM_X86_QUIRK_CD_NW_CLEARED By default, KVM clears CR0.CD and CR0.NW on
AMD CPUs to workaround buggy guest firmware
that runs in perpetuity with CR0.CD, i.e.
with caches in "no fill" mode.
When this quirk is disabled, KVM does not
change the value of CR0.CD and CR0.NW.
When this quirk is disabled, KVM does not
change the value of CR0.CD and CR0.NW.
KVM_X86_QUIRK_LAPIC_MMIO_HOLE By default, the MMIO LAPIC interface is
available even when configured for x2APIC
mode. When this quirk is disabled, KVM
disables the MMIO LAPIC interface if the
LAPIC is in x2APIC mode.
KVM_X86_QUIRK_LAPIC_MMIO_HOLE By default, the MMIO LAPIC interface is
available even when configured for x2APIC
mode. When this quirk is disabled, KVM
disables the MMIO LAPIC interface if the
LAPIC is in x2APIC mode.
KVM_X86_QUIRK_OUT_7E_INC_RIP By default, KVM pre-increments %rip before
exiting to userspace for an OUT instruction
to port 0x7e. When this quirk is disabled,
KVM does not pre-increment %rip before
exiting to userspace.
KVM_X86_QUIRK_OUT_7E_INC_RIP By default, KVM pre-increments %rip before
exiting to userspace for an OUT instruction
to port 0x7e. When this quirk is disabled,
KVM does not pre-increment %rip before
exiting to userspace.
KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT When this quirk is disabled, KVM sets
CPUID.01H:ECX[bit 3] (MONITOR/MWAIT) if
IA32_MISC_ENABLE[bit 18] (MWAIT) is set.
Additionally, when this quirk is disabled,
KVM clears CPUID.01H:ECX[bit 3] if
IA32_MISC_ENABLE[bit 18] is cleared.
KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT When this quirk is disabled, KVM sets
CPUID.01H:ECX[bit 3] (MONITOR/MWAIT) if
IA32_MISC_ENABLE[bit 18] (MWAIT) is set.
Additionally, when this quirk is disabled,
KVM clears CPUID.01H:ECX[bit 3] if
IA32_MISC_ENABLE[bit 18] is cleared.
KVM_X86_QUIRK_FIX_HYPERCALL_INSN By default, KVM rewrites guest
VMMCALL/VMCALL instructions to match the
vendor's hypercall instruction for the
system. When this quirk is disabled, KVM
will no longer rewrite invalid guest
hypercall instructions. Executing the
incorrect hypercall instruction will
generate a #UD within the guest.
KVM_X86_QUIRK_FIX_HYPERCALL_INSN By default, KVM rewrites guest
VMMCALL/VMCALL instructions to match the
vendor's hypercall instruction for the
system. When this quirk is disabled, KVM
will no longer rewrite invalid guest
hypercall instructions. Executing the
incorrect hypercall instruction will
generate a #UD within the guest.
KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS By default, KVM emulates MONITOR/MWAIT (if
they are intercepted) as NOPs regardless of
whether or not MONITOR/MWAIT are supported
according to guest CPUID. When this quirk
is disabled and KVM_X86_DISABLE_EXITS_MWAIT
is not set (MONITOR/MWAIT are intercepted),
KVM will inject a #UD on MONITOR/MWAIT if
they're unsupported per guest CPUID. Note,
KVM will modify MONITOR/MWAIT support in
guest CPUID on writes to MISC_ENABLE if
KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT is
disabled.
KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS By default, KVM emulates MONITOR/MWAIT (if
they are intercepted) as NOPs regardless of
whether or not MONITOR/MWAIT are supported
according to guest CPUID. When this quirk
is disabled and KVM_X86_DISABLE_EXITS_MWAIT
is not set (MONITOR/MWAIT are intercepted),
KVM will inject a #UD on MONITOR/MWAIT if
they're unsupported per guest CPUID. Note,
KVM will modify MONITOR/MWAIT support in
guest CPUID on writes to MISC_ENABLE if
KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT is
disabled.
KVM_X86_QUIRK_SLOT_ZAP_ALL By default, for KVM_X86_DEFAULT_VM VMs, KVM
invalidates all SPTEs in all memslots and
address spaces when a memslot is deleted or
moved. When this quirk is disabled (or the
VM type isn't KVM_X86_DEFAULT_VM), KVM only
ensures the backing memory of the deleted
or moved memslot isn't reachable, i.e KVM
_may_ invalidate only SPTEs related to the
memslot.
KVM_X86_QUIRK_SLOT_ZAP_ALL By default, for KVM_X86_DEFAULT_VM VMs, KVM
invalidates all SPTEs in all memslots and
address spaces when a memslot is deleted or
moved. When this quirk is disabled (or the
VM type isn't KVM_X86_DEFAULT_VM), KVM only
ensures the backing memory of the deleted
or moved memslot isn't reachable, i.e KVM
_may_ invalidate only SPTEs related to the
memslot.
KVM_X86_QUIRK_STUFF_FEATURE_MSRS By default, at vCPU creation, KVM sets the
vCPU's MSR_IA32_PERF_CAPABILITIES (0x345),
MSR_IA32_ARCH_CAPABILITIES (0x10a),
MSR_PLATFORM_INFO (0xce), and all VMX MSRs
(0x480..0x492) to the maximal capabilities
supported by KVM. KVM also sets
MSR_IA32_UCODE_REV (0x8b) to an arbitrary
value (which is different for Intel vs.
AMD). Lastly, when guest CPUID is set (by
userspace), KVM modifies select VMX MSR
fields to force consistency between guest
CPUID and L2's effective ISA. When this
quirk is disabled, KVM zeroes the vCPU's MSR
values (with two exceptions, see below),
i.e. treats the feature MSRs like CPUID
leaves and gives userspace full control of
the vCPU model definition. This quirk does
not affect VMX MSRs CR0/CR4_FIXED1 (0x487
and 0x489), as KVM does now allow them to
be set by userspace (KVM sets them based on
guest CPUID, for safety purposes).
KVM_X86_QUIRK_STUFF_FEATURE_MSRS By default, at vCPU creation, KVM sets the
vCPU's MSR_IA32_PERF_CAPABILITIES (0x345),
MSR_IA32_ARCH_CAPABILITIES (0x10a),
MSR_PLATFORM_INFO (0xce), and all VMX MSRs
(0x480..0x492) to the maximal capabilities
supported by KVM. KVM also sets
MSR_IA32_UCODE_REV (0x8b) to an arbitrary
value (which is different for Intel vs.
AMD). Lastly, when guest CPUID is set (by
userspace), KVM modifies select VMX MSR
fields to force consistency between guest
CPUID and L2's effective ISA. When this
quirk is disabled, KVM zeroes the vCPU's MSR
values (with two exceptions, see below),
i.e. treats the feature MSRs like CPUID
leaves and gives userspace full control of
the vCPU model definition. This quirk does
not affect VMX MSRs CR0/CR4_FIXED1 (0x487
and 0x489), as KVM does now allow them to
be set by userspace (KVM sets them based on
guest CPUID, for safety purposes).
KVM_X86_QUIRK_IGNORE_GUEST_PAT By default, on Intel platforms, KVM ignores
guest PAT and forces the effective memory
type to WB in EPT. The quirk is not available
on Intel platforms which are incapable of
safely honoring guest PAT (i.e., without CPU
self-snoop, KVM always ignores guest PAT and
forces effective memory type to WB). It is
also ignored on AMD platforms or, on Intel,
when a VM has non-coherent DMA devices
assigned; KVM always honors guest PAT in
such case. The quirk is needed to avoid
slowdowns on certain Intel Xeon platforms
(e.g. ICX, SPR) where self-snoop feature is
supported but UC is slow enough to cause
issues with some older guests that use
UC instead of WC to map the video RAM.
Userspace can disable the quirk to honor
guest PAT if it knows that there is no such
guest software, for example if it does not
expose a bochs graphics device (which is
known to have had a buggy driver).
=================================== ============================================
KVM_X86_QUIRK_IGNORE_GUEST_PAT By default, on Intel platforms, KVM ignores
guest PAT and forces the effective memory
type to WB in EPT. The quirk is not available
on Intel platforms which are incapable of
safely honoring guest PAT (i.e., without CPU
self-snoop, KVM always ignores guest PAT and
forces effective memory type to WB). It is
also ignored on AMD platforms or, on Intel,
when a VM has non-coherent DMA devices
assigned; KVM always honors guest PAT in
such case. The quirk is needed to avoid
slowdowns on certain Intel Xeon platforms
(e.g. ICX, SPR) where self-snoop feature is
supported but UC is slow enough to cause
issues with some older guests that use
UC instead of WC to map the video RAM.
Userspace can disable the quirk to honor
guest PAT if it knows that there is no such
guest software, for example if it does not
expose a bochs graphics device (which is
known to have had a buggy driver).
KVM_X86_QUIRK_VMCS12_ALLOW_FREEZE_IN_SMM By default, KVM relaxes the consistency
check for GUEST_IA32_DEBUGCTL in vmcs12
to allow FREEZE_IN_SMM to be set. When
this quirk is disabled, KVM requires this
bit to be cleared. Note that the vmcs02
bit is still completely controlled by the
host, regardless of the quirk setting.
======================================== ================================================
7.32 KVM_CAP_MAX_VCPU_ID
------------------------

View File

@@ -17,6 +17,8 @@ The acquisition orders for mutexes are as follows:
- kvm->lock is taken outside kvm->slots_lock and kvm->irq_lock
- vcpu->mutex is taken outside kvm->slots_lock and kvm->slots_arch_lock
- kvm->slots_lock is taken outside kvm->irq_lock, though acquiring
them together is quite rare.

View File

@@ -993,10 +993,8 @@ F: Documentation/devicetree/bindings/thermal/amazon,al-thermal.yaml
F: drivers/thermal/thermal_mmio.c
AMAZON ETHERNET DRIVERS
M: Shay Agroskin <shayagr@amazon.com>
M: Arthur Kiyanovski <akiyano@amazon.com>
R: David Arinzon <darinzon@amazon.com>
R: Saeed Bishara <saeedb@amazon.com>
M: David Arinzon <darinzon@amazon.com>
L: netdev@vger.kernel.org
S: Maintained
F: Documentation/networking/device_drivers/ethernet/amazon/ena.rst
@@ -1292,8 +1290,8 @@ F: include/trace/events/amdxdna.h
F: include/uapi/drm/amdxdna_accel.h
AMD XGBE DRIVER
M: "Shyam Sundar S K" <Shyam-sundar.S-k@amd.com>
M: Raju Rangoju <Raju.Rangoju@amd.com>
M: Prashanth Kumar K R <PrashanthKumar.K.R@amd.com>
L: netdev@vger.kernel.org
S: Maintained
F: arch/arm64/boot/dts/amd/amd-seattle-xgbe*.dtsi
@@ -3989,7 +3987,7 @@ F: drivers/hwmon/asus-ec-sensors.c
ASUS NOTEBOOKS AND EEEPC ACPI/WMI EXTRAS DRIVERS
M: Corentin Chary <corentin.chary@gmail.com>
M: Luke D. Jones <luke@ljones.dev>
M: Denis Benato <benato.denis96@gmail.com>
M: Denis Benato <denis.benato@linux.dev>
L: platform-driver-x86@vger.kernel.org
S: Maintained
W: https://asus-linux.org/
@@ -4025,7 +4023,7 @@ F: drivers/hwmon/asus_wmi_sensors.c
ASYMMETRIC KEYS
M: David Howells <dhowells@redhat.com>
M: Lukas Wunner <lukas@wunner.de>
M: Ignat Korchagin <ignat@cloudflare.com>
M: Ignat Korchagin <ignat@linux.win>
L: keyrings@vger.kernel.org
L: linux-crypto@vger.kernel.org
S: Maintained
@@ -4038,7 +4036,7 @@ F: include/linux/verification.h
ASYMMETRIC KEYS - ECDSA
M: Lukas Wunner <lukas@wunner.de>
M: Ignat Korchagin <ignat@cloudflare.com>
M: Ignat Korchagin <ignat@linux.win>
R: Stefan Berger <stefanb@linux.ibm.com>
L: linux-crypto@vger.kernel.org
S: Maintained
@@ -4048,14 +4046,14 @@ F: include/crypto/ecc*
ASYMMETRIC KEYS - GOST
M: Lukas Wunner <lukas@wunner.de>
M: Ignat Korchagin <ignat@cloudflare.com>
M: Ignat Korchagin <ignat@linux.win>
L: linux-crypto@vger.kernel.org
S: Odd fixes
F: crypto/ecrdsa*
ASYMMETRIC KEYS - RSA
M: Lukas Wunner <lukas@wunner.de>
M: Ignat Korchagin <ignat@cloudflare.com>
M: Ignat Korchagin <ignat@linux.win>
L: linux-crypto@vger.kernel.org
S: Maintained
F: crypto/rsa*
@@ -4618,7 +4616,6 @@ F: drivers/bluetooth/
BLUETOOTH SUBSYSTEM
M: Marcel Holtmann <marcel@holtmann.org>
M: Johan Hedberg <johan.hedberg@gmail.com>
M: Luiz Augusto von Dentz <luiz.dentz@gmail.com>
L: linux-bluetooth@vger.kernel.org
S: Supported
@@ -6213,20 +6210,20 @@ F: drivers/scsi/fnic/
CISCO SCSI HBA DRIVER
M: Karan Tilak Kumar <kartilak@cisco.com>
M: Narsimhulu Musini <nmusini@cisco.com>
M: Sesidhar Baddela <sebaddel@cisco.com>
L: linux-scsi@vger.kernel.org
S: Supported
F: drivers/scsi/snic/
CISCO VIC ETHERNET NIC DRIVER
M: Christian Benvenuti <benve@cisco.com>
M: Satish Kharat <satishkh@cisco.com>
S: Maintained
F: drivers/net/ethernet/cisco/enic/
CISCO VIC LOW LATENCY NIC DRIVER
M: Christian Benvenuti <benve@cisco.com>
M: Nelson Escobar <neescoba@cisco.com>
M: Satish Kharat <satishkh@cisco.com>
S: Supported
F: drivers/infiniband/hw/usnic/
@@ -6280,7 +6277,7 @@ S: Maintained
F: include/linux/clk.h
CLOCKSOURCE, CLOCKEVENT DRIVERS
M: Daniel Lezcano <daniel.lezcano@linaro.org>
M: Daniel Lezcano <daniel.lezcano@kernel.org>
M: Thomas Gleixner <tglx@kernel.org>
L: linux-kernel@vger.kernel.org
S: Supported
@@ -6669,7 +6666,7 @@ F: rust/kernel/cpu.rs
CPU IDLE TIME MANAGEMENT FRAMEWORK
M: "Rafael J. Wysocki" <rafael@kernel.org>
M: Daniel Lezcano <daniel.lezcano@linaro.org>
M: Daniel Lezcano <daniel.lezcano@kernel.org>
R: Christian Loehle <christian.loehle@arm.com>
L: linux-pm@vger.kernel.org
S: Maintained
@@ -6699,7 +6696,7 @@ F: arch/x86/kernel/msr.c
CPUIDLE DRIVER - ARM BIG LITTLE
M: Lorenzo Pieralisi <lpieralisi@kernel.org>
M: Daniel Lezcano <daniel.lezcano@linaro.org>
M: Daniel Lezcano <daniel.lezcano@kernel.org>
L: linux-pm@vger.kernel.org
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Maintained
@@ -6707,7 +6704,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git
F: drivers/cpuidle/cpuidle-big_little.c
CPUIDLE DRIVER - ARM EXYNOS
M: Daniel Lezcano <daniel.lezcano@linaro.org>
M: Daniel Lezcano <daniel.lezcano@kernel.org>
M: Kukjin Kim <kgene@kernel.org>
R: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-pm@vger.kernel.org
@@ -8002,7 +7999,9 @@ F: Documentation/devicetree/bindings/display/himax,hx8357.yaml
F: drivers/gpu/drm/tiny/hx8357d.c
DRM DRIVER FOR HYPERV SYNTHETIC VIDEO DEVICE
M: Deepak Rawat <drawat.floss@gmail.com>
M: Dexuan Cui <decui@microsoft.com>
M: Long Li <longli@microsoft.com>
M: Saurabh Sengar <ssengar@linux.microsoft.com>
L: linux-hyperv@vger.kernel.org
L: dri-devel@lists.freedesktop.org
S: Maintained
@@ -8630,9 +8629,14 @@ F: drivers/gpu/drm/lima/
F: include/uapi/drm/lima_drm.h
DRM DRIVERS FOR LOONGSON
M: Sui Jingfeng <suijingfeng@loongson.cn>
M: Jianmin Lv <lvjianmin@loongson.cn>
M: Qianhai Wu <wuqianhai@loongson.cn>
R: Huacai Chen <chenhuacai@kernel.org>
R: Mingcong Bai <jeffbai@aosc.io>
R: Xi Ruoyao <xry111@xry111.site>
R: Icenowy Zheng <zhengxingda@iscas.ac.cn>
L: dri-devel@lists.freedesktop.org
S: Supported
S: Maintained
T: git https://gitlab.freedesktop.org/drm/misc/kernel.git
F: drivers/gpu/drm/loongson/
@@ -9616,7 +9620,12 @@ F: include/linux/ext2*
EXT4 FILE SYSTEM
M: "Theodore Ts'o" <tytso@mit.edu>
M: Andreas Dilger <adilger.kernel@dilger.ca>
R: Andreas Dilger <adilger.kernel@dilger.ca>
R: Baokun Li <libaokun@linux.alibaba.com>
R: Jan Kara <jack@suse.cz>
R: Ojaswin Mujoo <ojaswin@linux.ibm.com>
R: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
R: Zhang Yi <yi.zhang@huawei.com>
L: linux-ext4@vger.kernel.org
S: Maintained
W: http://ext4.wiki.kernel.org
@@ -10172,8 +10181,8 @@ F: drivers/i2c/busses/i2c-cpm.c
FREESCALE IMX / MXC FEC DRIVER
M: Wei Fang <wei.fang@nxp.com>
R: Frank Li <frank.li@nxp.com>
R: Shenwei Wang <shenwei.wang@nxp.com>
R: Clark Wang <xiaoning.wang@nxp.com>
L: imx@lists.linux.dev
L: netdev@vger.kernel.org
S: Maintained
@@ -10485,7 +10494,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace.git
F: Documentation/trace/ftrace*
F: arch/*/*/*/*ftrace*
F: arch/*/*/*ftrace*
F: include/*/ftrace.h
F: include/*/*ftrace*
F: kernel/trace/fgraph.c
F: kernel/trace/ftrace*
F: samples/ftrace
@@ -12012,7 +12021,6 @@ I2C SUBSYSTEM
M: Wolfram Sang <wsa+renesas@sang-engineering.com>
L: linux-i2c@vger.kernel.org
S: Maintained
W: https://i2c.wiki.kernel.org/
Q: https://patchwork.ozlabs.org/project/linux-i2c/list/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git
F: Documentation/i2c/
@@ -12038,7 +12046,6 @@ I2C SUBSYSTEM HOST DRIVERS
M: Andi Shyti <andi.shyti@kernel.org>
L: linux-i2c@vger.kernel.org
S: Maintained
W: https://i2c.wiki.kernel.org/
Q: https://patchwork.ozlabs.org/project/linux-i2c/list/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/andi.shyti/linux.git
F: Documentation/devicetree/bindings/i2c/
@@ -12217,7 +12224,6 @@ IBM Power SRIOV Virtual NIC Device Driver
M: Haren Myneni <haren@linux.ibm.com>
M: Rick Lindsley <ricklind@linux.ibm.com>
R: Nick Child <nnac123@linux.ibm.com>
R: Thomas Falcon <tlfalcon@linux.ibm.com>
L: netdev@vger.kernel.org
S: Maintained
F: drivers/net/ethernet/ibm/ibmvnic.*
@@ -13943,7 +13949,7 @@ F: fs/smb/server/
KERNEL UNIT TESTING FRAMEWORK (KUnit)
M: Brendan Higgins <brendan.higgins@linux.dev>
M: David Gow <davidgow@google.com>
M: David Gow <david@davidgow.net>
R: Rae Moar <raemoar63@gmail.com>
L: linux-kselftest@vger.kernel.org
L: kunit-dev@googlegroups.com
@@ -14412,9 +14418,9 @@ LANTIQ PEF2256 DRIVER
M: Herve Codina <herve.codina@bootlin.com>
S: Maintained
F: Documentation/devicetree/bindings/net/lantiq,pef2256.yaml
F: drivers/net/wan/framer/pef2256/
F: drivers/net/wan/framer/
F: drivers/pinctrl/pinctrl-pef2256.c
F: include/linux/framer/pef2256.h
F: include/linux/framer/
LASI 53c700 driver for PARISC
M: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>
@@ -14763,7 +14769,7 @@ F: drivers/misc/lis3lv02d/
F: drivers/platform/x86/hp/hp_accel.c
LIST KUNIT TEST
M: David Gow <davidgow@google.com>
M: David Gow <david@davidgow.net>
L: linux-kselftest@vger.kernel.org
L: kunit-dev@googlegroups.com
S: Maintained
@@ -15376,10 +15382,8 @@ F: drivers/crypto/marvell/
F: include/linux/soc/marvell/octeontx2/
MARVELL GIGABIT ETHERNET DRIVERS (skge/sky2)
M: Mirko Lindner <mlindner@marvell.com>
M: Stephen Hemminger <stephen@networkplumber.org>
L: netdev@vger.kernel.org
S: Odd fixes
S: Orphan
F: drivers/net/ethernet/marvell/sk*
MARVELL LIBERTAS WIRELESS DRIVER
@@ -15476,7 +15480,6 @@ MARVELL OCTEONTX2 RVU ADMIN FUNCTION DRIVER
M: Sunil Goutham <sgoutham@marvell.com>
M: Linu Cherian <lcherian@marvell.com>
M: Geetha sowjanya <gakula@marvell.com>
M: Jerin Jacob <jerinj@marvell.com>
M: hariprasad <hkelam@marvell.com>
M: Subbaraya Sundeep <sbhatta@marvell.com>
L: netdev@vger.kernel.org
@@ -15491,7 +15494,7 @@ S: Supported
F: drivers/perf/marvell_pem_pmu.c
MARVELL PRESTERA ETHERNET SWITCH DRIVER
M: Taras Chornyi <taras.chornyi@plvision.eu>
M: Elad Nachman <enachman@marvell.com>
S: Supported
W: https://github.com/Marvell-switching/switchdev-prestera
F: drivers/net/ethernet/marvell/prestera/
@@ -16165,7 +16168,6 @@ F: drivers/dma/mediatek/
MEDIATEK ETHERNET DRIVER
M: Felix Fietkau <nbd@nbd.name>
M: Sean Wang <sean.wang@mediatek.com>
M: Lorenzo Bianconi <lorenzo@kernel.org>
L: netdev@vger.kernel.org
S: Maintained
@@ -16358,8 +16360,6 @@ F: include/soc/mediatek/smi.h
MEDIATEK SWITCH DRIVER
M: Chester A. Unal <chester.a.unal@arinc9.com>
M: Daniel Golle <daniel@makrotopia.org>
M: DENG Qingfang <dqfext@gmail.com>
M: Sean Wang <sean.wang@mediatek.com>
L: netdev@vger.kernel.org
S: Maintained
F: drivers/net/dsa/mt7530-mdio.c
@@ -16369,7 +16369,6 @@ F: net/dsa/tag_mtk.c
MEDIATEK T7XX 5G WWAN MODEM DRIVER
M: Chandrashekar Devegowda <chandrashekar.devegowda@intel.com>
R: Chiranjeevi Rapolu <chiranjeevi.rapolu@linux.intel.com>
R: Liu Haijun <haijun.liu@mediatek.com>
R: Ricardo Martinez <ricardo.martinez@linux.intel.com>
L: netdev@vger.kernel.org
@@ -16654,9 +16653,9 @@ F: mm/balloon.c
MEMORY MANAGEMENT - CORE
M: Andrew Morton <akpm@linux-foundation.org>
M: David Hildenbrand <david@kernel.org>
R: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
R: Lorenzo Stoakes <ljs@kernel.org>
R: Liam R. Howlett <Liam.Howlett@oracle.com>
R: Vlastimil Babka <vbabka@suse.cz>
R: Vlastimil Babka <vbabka@kernel.org>
R: Mike Rapoport <rppt@kernel.org>
R: Suren Baghdasaryan <surenb@google.com>
R: Michal Hocko <mhocko@suse.com>
@@ -16784,9 +16783,9 @@ F: mm/workingset.c
MEMORY MANAGEMENT - MISC
M: Andrew Morton <akpm@linux-foundation.org>
M: David Hildenbrand <david@kernel.org>
R: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
R: Lorenzo Stoakes <ljs@kernel.org>
R: Liam R. Howlett <Liam.Howlett@oracle.com>
R: Vlastimil Babka <vbabka@suse.cz>
R: Vlastimil Babka <vbabka@kernel.org>
R: Mike Rapoport <rppt@kernel.org>
R: Suren Baghdasaryan <surenb@google.com>
R: Michal Hocko <mhocko@suse.com>
@@ -16841,7 +16840,7 @@ F: mm/oom_kill.c
MEMORY MANAGEMENT - PAGE ALLOCATOR
M: Andrew Morton <akpm@linux-foundation.org>
M: Vlastimil Babka <vbabka@suse.cz>
M: Vlastimil Babka <vbabka@kernel.org>
R: Suren Baghdasaryan <surenb@google.com>
R: Michal Hocko <mhocko@suse.com>
R: Brendan Jackman <jackmanb@google.com>
@@ -16875,7 +16874,7 @@ R: David Hildenbrand <david@kernel.org>
R: Michal Hocko <mhocko@kernel.org>
R: Qi Zheng <zhengqi.arch@bytedance.com>
R: Shakeel Butt <shakeel.butt@linux.dev>
R: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
R: Lorenzo Stoakes <ljs@kernel.org>
L: linux-mm@kvack.org
S: Maintained
F: mm/vmscan.c
@@ -16884,11 +16883,11 @@ F: mm/workingset.c
MEMORY MANAGEMENT - RMAP (REVERSE MAPPING)
M: Andrew Morton <akpm@linux-foundation.org>
M: David Hildenbrand <david@kernel.org>
M: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
M: Lorenzo Stoakes <ljs@kernel.org>
R: Rik van Riel <riel@surriel.com>
R: Liam R. Howlett <Liam.Howlett@oracle.com>
R: Vlastimil Babka <vbabka@suse.cz>
R: Harry Yoo <harry.yoo@oracle.com>
R: Vlastimil Babka <vbabka@kernel.org>
R: Harry Yoo <harry@kernel.org>
R: Jann Horn <jannh@google.com>
L: linux-mm@kvack.org
S: Maintained
@@ -16929,7 +16928,7 @@ F: mm/swapfile.c
MEMORY MANAGEMENT - THP (TRANSPARENT HUGE PAGE)
M: Andrew Morton <akpm@linux-foundation.org>
M: David Hildenbrand <david@kernel.org>
M: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
M: Lorenzo Stoakes <ljs@kernel.org>
R: Zi Yan <ziy@nvidia.com>
R: Baolin Wang <baolin.wang@linux.alibaba.com>
R: Liam R. Howlett <Liam.Howlett@oracle.com>
@@ -16969,7 +16968,7 @@ F: tools/testing/selftests/mm/uffd-*.[ch]
MEMORY MANAGEMENT - RUST
M: Alice Ryhl <aliceryhl@google.com>
R: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
R: Lorenzo Stoakes <ljs@kernel.org>
R: Liam R. Howlett <Liam.Howlett@oracle.com>
L: linux-mm@kvack.org
L: rust-for-linux@vger.kernel.org
@@ -16985,8 +16984,8 @@ F: rust/kernel/page.rs
MEMORY MAPPING
M: Andrew Morton <akpm@linux-foundation.org>
M: Liam R. Howlett <Liam.Howlett@oracle.com>
M: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
R: Vlastimil Babka <vbabka@suse.cz>
M: Lorenzo Stoakes <ljs@kernel.org>
R: Vlastimil Babka <vbabka@kernel.org>
R: Jann Horn <jannh@google.com>
R: Pedro Falcato <pfalcato@suse.de>
L: linux-mm@kvack.org
@@ -17015,8 +17014,8 @@ MEMORY MAPPING - LOCKING
M: Andrew Morton <akpm@linux-foundation.org>
M: Suren Baghdasaryan <surenb@google.com>
M: Liam R. Howlett <Liam.Howlett@oracle.com>
M: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
R: Vlastimil Babka <vbabka@suse.cz>
M: Lorenzo Stoakes <ljs@kernel.org>
R: Vlastimil Babka <vbabka@kernel.org>
R: Shakeel Butt <shakeel.butt@linux.dev>
L: linux-mm@kvack.org
S: Maintained
@@ -17030,9 +17029,9 @@ F: mm/mmap_lock.c
MEMORY MAPPING - MADVISE (MEMORY ADVICE)
M: Andrew Morton <akpm@linux-foundation.org>
M: Liam R. Howlett <Liam.Howlett@oracle.com>
M: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
M: Lorenzo Stoakes <ljs@kernel.org>
M: David Hildenbrand <david@kernel.org>
R: Vlastimil Babka <vbabka@suse.cz>
R: Vlastimil Babka <vbabka@kernel.org>
R: Jann Horn <jannh@google.com>
L: linux-mm@kvack.org
S: Maintained
@@ -19227,8 +19226,6 @@ F: tools/objtool/
OCELOT ETHERNET SWITCH DRIVER
M: Vladimir Oltean <vladimir.oltean@nxp.com>
M: Claudiu Manoil <claudiu.manoil@nxp.com>
M: Alexandre Belloni <alexandre.belloni@bootlin.com>
M: UNGLinuxDriver@microchip.com
L: netdev@vger.kernel.org
S: Supported
@@ -19814,7 +19811,6 @@ F: arch/*/boot/dts/
F: include/dt-bindings/
OPENCOMPUTE PTP CLOCK DRIVER
M: Jonathan Lemon <jonathan.lemon@gmail.com>
M: Vadim Fedorenko <vadim.fedorenko@linux.dev>
L: netdev@vger.kernel.org
S: Maintained
@@ -20122,9 +20118,8 @@ F: Documentation/devicetree/bindings/pci/marvell,armada-3700-pcie.yaml
F: drivers/pci/controller/pci-aardvark.c
PCI DRIVER FOR ALTERA PCIE IP
M: Joyce Ooi <joyce.ooi@intel.com>
L: linux-pci@vger.kernel.org
S: Supported
S: Orphan
F: Documentation/devicetree/bindings/pci/altr,pcie-root-port.yaml
F: drivers/pci/controller/pcie-altera.c
@@ -20369,9 +20364,8 @@ S: Supported
F: Documentation/PCI/pci-error-recovery.rst
PCI MSI DRIVER FOR ALTERA MSI IP
M: Joyce Ooi <joyce.ooi@intel.com>
L: linux-pci@vger.kernel.org
S: Supported
S: Orphan
F: Documentation/devicetree/bindings/interrupt-controller/altr,msi-controller.yaml
F: drivers/pci/controller/pcie-altera-msi.c
@@ -20509,7 +20503,7 @@ F: Documentation/devicetree/bindings/pci/hisilicon,kirin-pcie.yaml
F: drivers/pci/controller/dwc/pcie-kirin.c
PCIE DRIVER FOR HISILICON STB
M: Shawn Guo <shawn.guo@linaro.org>
M: Shawn Guo <shawnguo@kernel.org>
L: linux-pci@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/pci/hisilicon-histb-pcie.txt
@@ -21082,8 +21076,7 @@ F: include/uapi/linux/atmppp.h
F: net/atm/pppoatm.c
PPP OVER ETHERNET
M: Michal Ostrowski <mostrows@earthlink.net>
S: Maintained
S: Orphan
F: drivers/net/ppp/pppoe.c
F: drivers/net/ppp/pppox.c
@@ -21458,9 +21451,8 @@ S: Supported
F: drivers/scsi/qedi/
QLOGIC QL4xxx ETHERNET DRIVER
M: Manish Chopra <manishc@marvell.com>
L: netdev@vger.kernel.org
S: Maintained
S: Orphan
F: drivers/net/ethernet/qlogic/qed/
F: drivers/net/ethernet/qlogic/qede/
F: include/linux/qed/
@@ -21695,7 +21687,7 @@ S: Maintained
F: drivers/net/ethernet/qualcomm/emac/
QUALCOMM ETHQOS ETHERNET DRIVER
M: Vinod Koul <vkoul@kernel.org>
M: Mohd Ayaan Anwar <mohd.anwar@oss.qualcomm.com>
L: netdev@vger.kernel.org
L: linux-arm-msm@vger.kernel.org
S: Maintained
@@ -21955,7 +21947,7 @@ F: drivers/media/radio/radio-tea5777.c
RADOS BLOCK DEVICE (RBD)
M: Ilya Dryomov <idryomov@gmail.com>
R: Dongsheng Yang <dongsheng.yang@easystack.cn>
R: Dongsheng Yang <dongsheng.yang@linux.dev>
L: ceph-devel@vger.kernel.org
S: Supported
W: http://ceph.com/
@@ -22138,7 +22130,7 @@ S: Supported
F: drivers/infiniband/sw/rdmavt
RDS - RELIABLE DATAGRAM SOCKETS
M: Allison Henderson <allison.henderson@oracle.com>
M: Allison Henderson <achender@kernel.org>
L: netdev@vger.kernel.org
L: linux-rdma@vger.kernel.org
L: rds-devel@oss.oracle.com (moderated for non-subscribers)
@@ -22284,6 +22276,16 @@ L: linux-wireless@vger.kernel.org
S: Orphan
F: drivers/net/wireless/rsi/
RELAY
M: Andrew Morton <akpm@linux-foundation.org>
M: Jens Axboe <axboe@kernel.dk>
M: Jason Xing <kernelxing@tencent.com>
L: linux-kernel@vger.kernel.org
S: Maintained
F: Documentation/filesystems/relay.rst
F: include/linux/relay.h
F: kernel/relay.c
REGISTER MAP ABSTRACTION
M: Mark Brown <broonie@kernel.org>
L: linux-kernel@vger.kernel.org
@@ -23173,8 +23175,8 @@ K: \b(?i:rust)\b
RUST [ALLOC]
M: Danilo Krummrich <dakr@kernel.org>
R: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
R: Vlastimil Babka <vbabka@suse.cz>
R: Lorenzo Stoakes <ljs@kernel.org>
R: Vlastimil Babka <vbabka@kernel.org>
R: Liam R. Howlett <Liam.Howlett@oracle.com>
R: Uladzislau Rezki <urezki@gmail.com>
L: rust-for-linux@vger.kernel.org
@@ -24337,7 +24339,6 @@ F: Documentation/devicetree/bindings/interrupt-controller/kontron,sl28cpld-intc.
F: Documentation/devicetree/bindings/pwm/kontron,sl28cpld-pwm.yaml
F: Documentation/devicetree/bindings/watchdog/kontron,sl28cpld-wdt.yaml
F: drivers/gpio/gpio-sl28cpld.c
F: drivers/hwmon/sa67mcu-hwmon.c
F: drivers/hwmon/sl28cpld-hwmon.c
F: drivers/irqchip/irq-sl28cpld.c
F: drivers/pwm/pwm-sl28cpld.c
@@ -24350,12 +24351,13 @@ F: Documentation/devicetree/bindings/nvmem/layouts/kontron,sl28-vpd.yaml
F: drivers/nvmem/layouts/sl28vpd.c
SLAB ALLOCATOR
M: Vlastimil Babka <vbabka@suse.cz>
M: Vlastimil Babka <vbabka@kernel.org>
M: Harry Yoo <harry@kernel.org>
M: Andrew Morton <akpm@linux-foundation.org>
R: Hao Li <hao.li@linux.dev>
R: Christoph Lameter <cl@gentwo.org>
R: David Rientjes <rientjes@google.com>
R: Roman Gushchin <roman.gushchin@linux.dev>
R: Harry Yoo <harry.yoo@oracle.com>
L: linux-mm@kvack.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab.git
@@ -24909,9 +24911,9 @@ F: drivers/clk/spear/
F: drivers/pinctrl/spear/
SPI NOR SUBSYSTEM
M: Tudor Ambarus <tudor.ambarus@linaro.org>
M: Pratyush Yadav <pratyush@kernel.org>
M: Michael Walle <mwalle@kernel.org>
R: Takahiro Kuwano <takahiro.kuwano@infineon.com>
L: linux-mtd@lists.infradead.org
S: Maintained
W: http://www.linux-mtd.infradead.org/
@@ -25766,6 +25768,7 @@ F: include/net/pkt_cls.h
F: include/net/pkt_sched.h
F: include/net/sch_priv.h
F: include/net/tc_act/
F: include/net/tc_wrapper.h
F: include/uapi/linux/pkt_cls.h
F: include/uapi/linux/pkt_sched.h
F: include/uapi/linux/tc_act/
@@ -26217,7 +26220,7 @@ F: drivers/media/radio/radio-raremono.c
THERMAL
M: Rafael J. Wysocki <rafael@kernel.org>
M: Daniel Lezcano <daniel.lezcano@linaro.org>
M: Daniel Lezcano <daniel.lezcano@kernel.org>
R: Zhang Rui <rui.zhang@intel.com>
R: Lukasz Luba <lukasz.luba@arm.com>
L: linux-pm@vger.kernel.org
@@ -26247,7 +26250,7 @@ F: drivers/thermal/amlogic_thermal.c
THERMAL/CPU_COOLING
M: Amit Daniel Kachhap <amit.kachhap@gmail.com>
M: Daniel Lezcano <daniel.lezcano@linaro.org>
M: Daniel Lezcano <daniel.lezcano@kernel.org>
M: Viresh Kumar <viresh.kumar@linaro.org>
R: Lukasz Luba <lukasz.luba@arm.com>
L: linux-pm@vger.kernel.org
@@ -29186,7 +29189,7 @@ K: zstd
ZSWAP COMPRESSED SWAP CACHING
M: Johannes Weiner <hannes@cmpxchg.org>
M: Yosry Ahmed <yosry.ahmed@linux.dev>
M: Yosry Ahmed <yosry@kernel.org>
M: Nhat Pham <nphamcs@gmail.com>
R: Chengming Zhou <chengming.zhou@linux.dev>
L: linux-mm@kvack.org

View File

@@ -2,7 +2,7 @@
VERSION = 7
PATCHLEVEL = 0
SUBLEVEL = 0
EXTRAVERSION = -rc1
EXTRAVERSION = -rc6
NAME = Baby Opossum Posse
# *DOCUMENTATION*
@@ -476,6 +476,7 @@ KBUILD_USERLDFLAGS := $(USERLDFLAGS)
export rust_common_flags := --edition=2021 \
-Zbinary_dep_depinfo=y \
-Astable_features \
-Aunused_features \
-Dnon_ascii_idents \
-Dunsafe_op_in_unsafe_fn \
-Wmissing_docs \
@@ -1113,6 +1114,9 @@ KBUILD_CFLAGS += -fno-builtin-wcslen
# change __FILE__ to the relative path to the source directory
ifdef building_out_of_srctree
KBUILD_CPPFLAGS += -fmacro-prefix-map=$(srcroot)/=
ifeq ($(call rustc-option-yn, --remap-path-scope=macro),y)
KBUILD_RUSTFLAGS += --remap-path-prefix=$(srcroot)/= --remap-path-scope=macro
endif
endif
# include additional Makefiles when needed
@@ -1497,13 +1501,13 @@ ifneq ($(wildcard $(resolve_btfids_O)),)
$(Q)$(MAKE) -sC $(srctree)/tools/bpf/resolve_btfids O=$(resolve_btfids_O) clean
endif
PHONY += objtool_clean
PHONY += objtool_clean objtool_mrproper
objtool_O = $(abspath $(objtree))/tools/objtool
objtool_clean:
objtool_clean objtool_mrproper:
ifneq ($(wildcard $(objtool_O)),)
$(Q)$(MAKE) -sC $(abs_srctree)/tools/objtool O=$(objtool_O) srctree=$(abs_srctree) clean
$(Q)$(MAKE) -sC $(abs_srctree)/tools/objtool O=$(objtool_O) srctree=$(abs_srctree) $(patsubst objtool_%,%,$@)
endif
tools/: FORCE
@@ -1650,7 +1654,7 @@ CLEAN_FILES += vmlinux.symvers modules-only.symvers \
modules.builtin.ranges vmlinux.o.map vmlinux.unstripped \
compile_commands.json rust/test \
rust-project.json .vmlinux.objs .vmlinux.export.c \
.builtin-dtbs-list .builtin-dtb.S
.builtin-dtbs-list .builtin-dtbs.S
# Directories & files removed with 'make mrproper'
MRPROPER_FILES += include/config include/generated \
@@ -1686,7 +1690,7 @@ PHONY += $(mrproper-dirs) mrproper
$(mrproper-dirs):
$(Q)$(MAKE) $(clean)=$(patsubst _mrproper_%,%,$@)
mrproper: clean $(mrproper-dirs)
mrproper: clean objtool_mrproper $(mrproper-dirs)
$(call cmd,rmfiles)
@find . $(RCS_FIND_IGNORE) \
\( -name '*.rmeta' \) \

View File

@@ -71,6 +71,7 @@ SECTIONS
STABS_DEBUG
DWARF_DEBUG
MODINFO
ELF_DETAILS
DISCARDS

View File

@@ -123,6 +123,7 @@ SECTIONS
_end = . ;
STABS_DEBUG
MODINFO
ELF_DETAILS
DISCARDS

View File

@@ -21,6 +21,7 @@ SECTIONS
COMMON_DISCARDS
*(.ARM.exidx*)
*(.ARM.extab*)
*(.modinfo)
*(.note.*)
*(.rel.*)
*(.printk_index)

View File

@@ -279,7 +279,6 @@ CONFIG_TI_CPSW_SWITCHDEV=y
CONFIG_TI_CPTS=y
CONFIG_TI_KEYSTONE_NETCP=y
CONFIG_TI_KEYSTONE_NETCP_ETHSS=y
CONFIG_TI_PRUSS=m
CONFIG_TI_PRUETH=m
CONFIG_XILINX_EMACLITE=y
CONFIG_SFP=m

View File

@@ -154,6 +154,7 @@ SECTIONS
STABS_DEBUG
DWARF_DEBUG
MODINFO
ARM_DETAILS
ARM_ASSERTS

View File

@@ -153,6 +153,7 @@ SECTIONS
STABS_DEBUG
DWARF_DEBUG
MODINFO
ARM_DETAILS
ARM_ASSERTS

View File

@@ -698,7 +698,7 @@
compatible = "renesas,scif-r8a78000",
"renesas,rcar-gen5-scif", "renesas,scif";
reg = <0 0xc0700000 0 0x40>;
interrupts = <GIC_SPI 4074 IRQ_TYPE_LEVEL_HIGH>;
interrupts = <GIC_ESPI 10 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&dummy_clk_sgasyncd16>, <&dummy_clk_sgasyncd16>, <&scif_clk>;
clock-names = "fck", "brg_int", "scif_clk";
status = "disabled";
@@ -708,7 +708,7 @@
compatible = "renesas,scif-r8a78000",
"renesas,rcar-gen5-scif", "renesas,scif";
reg = <0 0xc0704000 0 0x40>;
interrupts = <GIC_SPI 4075 IRQ_TYPE_LEVEL_HIGH>;
interrupts = <GIC_ESPI 11 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&dummy_clk_sgasyncd16>, <&dummy_clk_sgasyncd16>, <&scif_clk>;
clock-names = "fck", "brg_int", "scif_clk";
status = "disabled";
@@ -718,7 +718,7 @@
compatible = "renesas,scif-r8a78000",
"renesas,rcar-gen5-scif", "renesas,scif";
reg = <0 0xc0708000 0 0x40>;
interrupts = <GIC_SPI 4076 IRQ_TYPE_LEVEL_HIGH>;
interrupts = <GIC_ESPI 12 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&dummy_clk_sgasyncd16>, <&dummy_clk_sgasyncd16>, <&scif_clk>;
clock-names = "fck", "brg_int", "scif_clk";
status = "disabled";
@@ -728,7 +728,7 @@
compatible = "renesas,scif-r8a78000",
"renesas,rcar-gen5-scif", "renesas,scif";
reg = <0 0xc070c000 0 0x40>;
interrupts = <GIC_SPI 4077 IRQ_TYPE_LEVEL_HIGH>;
interrupts = <GIC_ESPI 13 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&dummy_clk_sgasyncd16>, <&dummy_clk_sgasyncd16>, <&scif_clk>;
clock-names = "fck", "brg_int", "scif_clk";
status = "disabled";
@@ -738,7 +738,7 @@
compatible = "renesas,hscif-r8a78000",
"renesas,rcar-gen5-hscif", "renesas,hscif";
reg = <0 0xc0710000 0 0x60>;
interrupts = <GIC_SPI 4078 IRQ_TYPE_LEVEL_HIGH>;
interrupts = <GIC_ESPI 14 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&dummy_clk_sgasyncd4>, <&dummy_clk_sgasyncd4>, <&scif_clk>;
clock-names = "fck", "brg_int", "scif_clk";
status = "disabled";
@@ -748,7 +748,7 @@
compatible = "renesas,hscif-r8a78000",
"renesas,rcar-gen5-hscif", "renesas,hscif";
reg = <0 0xc0714000 0 0x60>;
interrupts = <GIC_SPI 4079 IRQ_TYPE_LEVEL_HIGH>;
interrupts = <GIC_ESPI 15 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&dummy_clk_sgasyncd4>, <&dummy_clk_sgasyncd4>, <&scif_clk>;
clock-names = "fck", "brg_int", "scif_clk";
status = "disabled";
@@ -758,7 +758,7 @@
compatible = "renesas,hscif-r8a78000",
"renesas,rcar-gen5-hscif", "renesas,hscif";
reg = <0 0xc0718000 0 0x60>;
interrupts = <GIC_SPI 4080 IRQ_TYPE_LEVEL_HIGH>;
interrupts = <GIC_ESPI 16 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&dummy_clk_sgasyncd4>, <&dummy_clk_sgasyncd4>, <&scif_clk>;
clock-names = "fck", "brg_int", "scif_clk";
status = "disabled";
@@ -768,7 +768,7 @@
compatible = "renesas,hscif-r8a78000",
"renesas,rcar-gen5-hscif", "renesas,hscif";
reg = <0 0xc071c000 0 0x60>;
interrupts = <GIC_SPI 4081 IRQ_TYPE_LEVEL_HIGH>;
interrupts = <GIC_ESPI 17 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&dummy_clk_sgasyncd4>, <&dummy_clk_sgasyncd4>, <&scif_clk>;
clock-names = "fck", "brg_int", "scif_clk";
status = "disabled";

View File

@@ -581,16 +581,6 @@
status = "disabled";
};
wdt0: watchdog@11c00400 {
compatible = "renesas,r9a09g057-wdt";
reg = <0 0x11c00400 0 0x400>;
clocks = <&cpg CPG_MOD 0x4b>, <&cpg CPG_MOD 0x4c>;
clock-names = "pclk", "oscclk";
resets = <&cpg 0x75>;
power-domains = <&cpg>;
status = "disabled";
};
wdt1: watchdog@14400000 {
compatible = "renesas,r9a09g057-wdt";
reg = <0 0x14400000 0 0x400>;
@@ -601,26 +591,6 @@
status = "disabled";
};
wdt2: watchdog@13000000 {
compatible = "renesas,r9a09g057-wdt";
reg = <0 0x13000000 0 0x400>;
clocks = <&cpg CPG_MOD 0x4f>, <&cpg CPG_MOD 0x50>;
clock-names = "pclk", "oscclk";
resets = <&cpg 0x77>;
power-domains = <&cpg>;
status = "disabled";
};
wdt3: watchdog@13000400 {
compatible = "renesas,r9a09g057-wdt";
reg = <0 0x13000400 0 0x400>;
clocks = <&cpg CPG_MOD 0x51>, <&cpg CPG_MOD 0x52>;
clock-names = "pclk", "oscclk";
resets = <&cpg 0x78>;
power-domains = <&cpg>;
status = "disabled";
};
rtc: rtc@11c00800 {
compatible = "renesas,r9a09g057-rtca3", "renesas,rz-rtca3";
reg = <0 0x11c00800 0 0x400>;

View File

@@ -974,8 +974,8 @@
cpg: clock-controller@80280000 {
compatible = "renesas,r9a09g077-cpg-mssr";
reg = <0 0x80280000 0 0x1000>,
<0 0x81280000 0 0x9000>;
reg = <0 0x80280000 0 0x10000>,
<0 0x81280000 0 0x10000>;
clocks = <&extal_clk>;
clock-names = "extal";
#clock-cells = <2>;

View File

@@ -977,8 +977,8 @@
cpg: clock-controller@80280000 {
compatible = "renesas,r9a09g087-cpg-mssr";
reg = <0 0x80280000 0 0x1000>,
<0 0x81280000 0 0x9000>;
reg = <0 0x80280000 0 0x10000>,
<0 0x81280000 0 0x10000>;
clocks = <&extal_clk>;
clock-names = "extal";
#clock-cells = <2>;

View File

@@ -162,7 +162,7 @@
<100000000>;
renesas,settings = [
80 00 11 19 4c 42 dc 2f 06 7d 20 1a 5f 1e f2 27
00 40 00 00 00 00 00 00 06 0c 19 02 3f f0 90 86
00 40 00 00 00 00 00 00 06 0c 19 02 3b f0 90 86
a0 80 30 30 9c
];
};

View File

@@ -53,6 +53,7 @@
regulator-max-microvolt = <3300000>;
gpios-states = <0>;
states = <3300000 0>, <1800000 1>;
regulator-ramp-delay = <60>;
};
#endif

View File

@@ -25,6 +25,7 @@
regulator-max-microvolt = <3300000>;
gpios-states = <0>;
states = <3300000 0>, <1800000 1>;
regulator-ramp-delay = <60>;
};
};

View File

@@ -76,19 +76,24 @@ static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm);
struct crypto_aes_ctx rk;
struct crypto_aes_ctx *rk;
int err;
err = aes_expandkey(&rk, in_key, key_len);
rk = kmalloc(sizeof(*rk), GFP_KERNEL);
if (!rk)
return -ENOMEM;
err = aes_expandkey(rk, in_key, key_len);
if (err)
return err;
goto out;
ctx->rounds = 6 + key_len / 4;
scoped_ksimd()
aesbs_convert_key(ctx->rk, rk.key_enc, ctx->rounds);
return 0;
aesbs_convert_key(ctx->rk, rk->key_enc, ctx->rounds);
out:
kfree_sensitive(rk);
return err;
}
static int __ecb_crypt(struct skcipher_request *req,
@@ -133,22 +138,26 @@ static int aesbs_cbc_ctr_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
struct aesbs_cbc_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
struct crypto_aes_ctx rk;
struct crypto_aes_ctx *rk;
int err;
err = aes_expandkey(&rk, in_key, key_len);
rk = kmalloc(sizeof(*rk), GFP_KERNEL);
if (!rk)
return -ENOMEM;
err = aes_expandkey(rk, in_key, key_len);
if (err)
return err;
goto out;
ctx->key.rounds = 6 + key_len / 4;
memcpy(ctx->enc, rk.key_enc, sizeof(ctx->enc));
memcpy(ctx->enc, rk->key_enc, sizeof(ctx->enc));
scoped_ksimd()
aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds);
memzero_explicit(&rk, sizeof(rk));
return 0;
aesbs_convert_key(ctx->key.rk, rk->key_enc, ctx->key.rounds);
out:
kfree_sensitive(rk);
return err;
}
static int cbc_encrypt(struct skcipher_request *req)

View File

@@ -91,8 +91,9 @@ __XCHG_GEN(_mb)
#define __xchg_wrapper(sfx, ptr, x) \
({ \
__typeof__(*(ptr)) __ret; \
__ret = (__typeof__(*(ptr))) \
__arch_xchg##sfx((unsigned long)(x), (ptr), sizeof(*(ptr))); \
__ret = (__force __typeof__(*(ptr))) \
__arch_xchg##sfx((__force unsigned long)(x), (ptr), \
sizeof(*(ptr))); \
__ret; \
})
@@ -175,9 +176,10 @@ __CMPXCHG_GEN(_mb)
#define __cmpxchg_wrapper(sfx, ptr, o, n) \
({ \
__typeof__(*(ptr)) __ret; \
__ret = (__typeof__(*(ptr))) \
__cmpxchg##sfx((ptr), (unsigned long)(o), \
(unsigned long)(n), sizeof(*(ptr))); \
__ret = (__force __typeof__(*(ptr))) \
__cmpxchg##sfx((ptr), (__force unsigned long)(o), \
(__force unsigned long)(n), \
sizeof(*(ptr))); \
__ret; \
})

View File

@@ -264,19 +264,33 @@ __iowrite64_copy(void __iomem *to, const void *from, size_t count)
typedef int (*ioremap_prot_hook_t)(phys_addr_t phys_addr, size_t size,
pgprot_t *prot);
int arm64_ioremap_prot_hook_register(const ioremap_prot_hook_t hook);
void __iomem *__ioremap_prot(phys_addr_t phys, size_t size, pgprot_t prot);
static inline void __iomem *ioremap_prot(phys_addr_t phys, size_t size,
pgprot_t user_prot)
{
pgprot_t prot;
ptdesc_t user_prot_val = pgprot_val(user_prot);
if (WARN_ON_ONCE(!(user_prot_val & PTE_USER)))
return NULL;
prot = __pgprot_modify(PAGE_KERNEL, PTE_ATTRINDX_MASK,
user_prot_val & PTE_ATTRINDX_MASK);
return __ioremap_prot(phys, size, prot);
}
#define ioremap_prot ioremap_prot
#define _PAGE_IOREMAP PROT_DEVICE_nGnRE
#define ioremap(addr, size) \
__ioremap_prot((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
#define ioremap_wc(addr, size) \
ioremap_prot((addr), (size), __pgprot(PROT_NORMAL_NC))
__ioremap_prot((addr), (size), __pgprot(PROT_NORMAL_NC))
#define ioremap_np(addr, size) \
ioremap_prot((addr), (size), __pgprot(PROT_DEVICE_nGnRnE))
__ioremap_prot((addr), (size), __pgprot(PROT_DEVICE_nGnRnE))
#define ioremap_encrypted(addr, size) \
ioremap_prot((addr), (size), PAGE_KERNEL)
__ioremap_prot((addr), (size), PAGE_KERNEL)
/*
* io{read,write}{16,32,64}be() macros
@@ -297,7 +311,7 @@ static inline void __iomem *ioremap_cache(phys_addr_t addr, size_t size)
if (pfn_is_map_memory(__phys_to_pfn(addr)))
return (void __iomem *)__phys_to_virt(addr);
return ioremap_prot(addr, size, __pgprot(PROT_NORMAL));
return __ioremap_prot(addr, size, __pgprot(PROT_NORMAL));
}
/*

View File

@@ -784,6 +784,9 @@ struct kvm_host_data {
/* Number of debug breakpoints/watchpoints for this CPU (minus 1) */
unsigned int debug_brps;
unsigned int debug_wrps;
/* Last vgic_irq part of the AP list recorded in an LR */
struct vgic_irq *last_lr_irq;
};
struct kvm_host_psci_config {
@@ -1616,7 +1619,8 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val);
(kvm_has_feat((k), ID_AA64MMFR3_EL1, S1PIE, IMP))
#define kvm_has_s1poe(k) \
(kvm_has_feat((k), ID_AA64MMFR3_EL1, S1POE, IMP))
(system_supports_poe() && \
kvm_has_feat((k), ID_AA64MMFR3_EL1, S1POE, IMP))
#define kvm_has_ras(k) \
(kvm_has_feat((k), ID_AA64PFR0_EL1, RAS, IMP))

View File

@@ -397,6 +397,8 @@ int kvm_vcpu_allocate_vncr_tlb(struct kvm_vcpu *vcpu);
int kvm_handle_vncr_abort(struct kvm_vcpu *vcpu);
void kvm_handle_s1e2_tlbi(struct kvm_vcpu *vcpu, u32 inst, u64 val);
u16 get_asid_by_regime(struct kvm_vcpu *vcpu, enum trans_regime regime);
#define vncr_fixmap(c) \
({ \
u32 __c = (c); \

View File

@@ -50,11 +50,11 @@
#define _PAGE_DEFAULT (_PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL))
#define _PAGE_KERNEL (PROT_NORMAL)
#define _PAGE_KERNEL_RO ((PROT_NORMAL & ~PTE_WRITE) | PTE_RDONLY)
#define _PAGE_KERNEL_ROX ((PROT_NORMAL & ~(PTE_WRITE | PTE_PXN)) | PTE_RDONLY)
#define _PAGE_KERNEL_EXEC (PROT_NORMAL & ~PTE_PXN)
#define _PAGE_KERNEL_EXEC_CONT ((PROT_NORMAL & ~PTE_PXN) | PTE_CONT)
#define _PAGE_KERNEL (PROT_NORMAL | PTE_DIRTY)
#define _PAGE_KERNEL_RO ((PROT_NORMAL & ~PTE_WRITE) | PTE_RDONLY | PTE_DIRTY)
#define _PAGE_KERNEL_ROX ((PROT_NORMAL & ~(PTE_WRITE | PTE_PXN)) | PTE_RDONLY | PTE_DIRTY)
#define _PAGE_KERNEL_EXEC ((PROT_NORMAL & ~PTE_PXN) | PTE_DIRTY)
#define _PAGE_KERNEL_EXEC_CONT ((PROT_NORMAL & ~PTE_PXN) | PTE_CONT | PTE_DIRTY)
#define _PAGE_SHARED (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
#define _PAGE_SHARED_EXEC (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE)
@@ -164,9 +164,6 @@ static inline bool __pure lpa2_is_enabled(void)
#define _PAGE_GCS (_PAGE_DEFAULT | PTE_NG | PTE_UXN | PTE_WRITE | PTE_USER)
#define _PAGE_GCS_RO (_PAGE_DEFAULT | PTE_NG | PTE_UXN | PTE_USER)
#define PAGE_GCS __pgprot(_PAGE_GCS)
#define PAGE_GCS_RO __pgprot(_PAGE_GCS_RO)
#define PIE_E0 ( \
PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS), PIE_GCS) | \
PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS_RO), PIE_R) | \

View File

@@ -2,6 +2,10 @@
#ifndef _ASM_RUNTIME_CONST_H
#define _ASM_RUNTIME_CONST_H
#ifdef MODULE
#error "Cannot use runtime-const infrastructure from modules"
#endif
#include <asm/cacheflush.h>
/* Sigh. You can still run arm64 in BE mode */

View File

@@ -31,19 +31,11 @@
*/
#define __TLBI_0(op, arg) asm (ARM64_ASM_PREAMBLE \
"tlbi " #op "\n" \
ALTERNATIVE("nop\n nop", \
"dsb ish\n tlbi " #op, \
ARM64_WORKAROUND_REPEAT_TLBI, \
CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \
: : )
#define __TLBI_1(op, arg) asm (ARM64_ASM_PREAMBLE \
"tlbi " #op ", %0\n" \
ALTERNATIVE("nop\n nop", \
"dsb ish\n tlbi " #op ", %0", \
ARM64_WORKAROUND_REPEAT_TLBI, \
CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \
: : "r" (arg))
"tlbi " #op ", %x0\n" \
: : "rZ" (arg))
#define __TLBI_N(op, arg, n, ...) __TLBI_##n(op, arg)
@@ -181,6 +173,34 @@ static inline unsigned long get_trans_granule(void)
(__pages >> (5 * (scale) + 1)) - 1; \
})
#define __repeat_tlbi_sync(op, arg...) \
do { \
if (!alternative_has_cap_unlikely(ARM64_WORKAROUND_REPEAT_TLBI)) \
break; \
__tlbi(op, ##arg); \
dsb(ish); \
} while (0)
/*
* Complete broadcast TLB maintenance issued by the host which invalidates
* stage 1 information in the host's own translation regime.
*/
static inline void __tlbi_sync_s1ish(void)
{
dsb(ish);
__repeat_tlbi_sync(vale1is, 0);
}
/*
* Complete broadcast TLB maintenance issued by hyp code which invalidates
* stage 1 translation information in any translation regime.
*/
static inline void __tlbi_sync_s1ish_hyp(void)
{
dsb(ish);
__repeat_tlbi_sync(vale2is, 0);
}
/*
* TLB Invalidation
* ================
@@ -279,7 +299,7 @@ static inline void flush_tlb_all(void)
{
dsb(ishst);
__tlbi(vmalle1is);
dsb(ish);
__tlbi_sync_s1ish();
isb();
}
@@ -291,7 +311,7 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
asid = __TLBI_VADDR(0, ASID(mm));
__tlbi(aside1is, asid);
__tlbi_user(aside1is, asid);
dsb(ish);
__tlbi_sync_s1ish();
mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
}
@@ -345,20 +365,11 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
unsigned long uaddr)
{
flush_tlb_page_nosync(vma, uaddr);
dsb(ish);
__tlbi_sync_s1ish();
}
static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm)
{
/*
* TLB flush deferral is not required on systems which are affected by
* ARM64_WORKAROUND_REPEAT_TLBI, as __tlbi()/__tlbi_user() implementation
* will have two consecutive TLBI instructions with a dsb(ish) in between
* defeating the purpose (i.e save overall 'dsb ish' cost).
*/
if (alternative_has_cap_unlikely(ARM64_WORKAROUND_REPEAT_TLBI))
return false;
return true;
}
@@ -374,7 +385,7 @@ static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm)
*/
static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
{
dsb(ish);
__tlbi_sync_s1ish();
}
/*
@@ -509,7 +520,7 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
{
__flush_tlb_range_nosync(vma->vm_mm, start, end, stride,
last_level, tlb_level);
dsb(ish);
__tlbi_sync_s1ish();
}
static inline void local_flush_tlb_contpte(struct vm_area_struct *vma,
@@ -557,7 +568,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end
dsb(ishst);
__flush_tlb_range_op(vaale1is, start, pages, stride, 0,
TLBI_TTL_UNKNOWN, false, lpa2_is_enabled());
dsb(ish);
__tlbi_sync_s1ish();
isb();
}
@@ -571,7 +582,7 @@ static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr)
dsb(ishst);
__tlbi(vaae1is, addr);
dsb(ish);
__tlbi_sync_s1ish();
isb();
}

View File

@@ -377,7 +377,7 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
prot = __acpi_get_writethrough_mem_attribute();
}
}
return ioremap_prot(phys, size, prot);
return __ioremap_prot(phys, size, prot);
}
/*

View File

@@ -2345,6 +2345,15 @@ static bool can_trap_icv_dir_el1(const struct arm64_cpu_capabilities *entry,
!is_midr_in_range_list(has_vgic_v3))
return false;
/*
* pKVM prevents late onlining of CPUs. This means that whatever
* state the capability is in after deprivilege cannot be affected
* by a new CPU booting -- this is garanteed to be a CPU we have
* already seen, and the cap is therefore unchanged.
*/
if (system_capabilities_finalized() && is_protected_kvm_enabled())
return cpus_have_final_cap(ARM64_HAS_ICH_HCR_EL2_TDIR);
if (is_kernel_in_hyp_mode())
res.a1 = read_sysreg_s(SYS_ICH_VTR_EL2);
else

View File

@@ -192,6 +192,14 @@ static int scs_handle_fde_frame(const struct eh_frame *frame,
size -= 2;
break;
case DW_CFA_advance_loc4:
loc += *opcode++ * code_alignment_factor;
loc += (*opcode++ << 8) * code_alignment_factor;
loc += (*opcode++ << 16) * code_alignment_factor;
loc += (*opcode++ << 24) * code_alignment_factor;
size -= 4;
break;
case DW_CFA_def_cfa:
case DW_CFA_offset_extended:
size = skip_xleb128(&opcode, size);

View File

@@ -12,6 +12,7 @@
#include <asm/io.h>
#include <asm/mem_encrypt.h>
#include <asm/pgtable.h>
#include <asm/rsi.h>
static struct realm_config config;
@@ -146,7 +147,7 @@ void __init arm64_rsi_init(void)
return;
if (WARN_ON(rsi_get_realm_config(&config)))
return;
prot_ns_shared = BIT(config.ipa_bits - 1);
prot_ns_shared = __phys_to_pte_val(BIT(config.ipa_bits - 1));
if (arm64_ioremap_prot_hook_register(realm_ioremap_hook))
return;

View File

@@ -37,7 +37,7 @@ __do_compat_cache_op(unsigned long start, unsigned long end)
* We pick the reserved-ASID to minimise the impact.
*/
__tlbi(aside1is, __TLBI_VADDR(0, 0));
dsb(ish);
__tlbi_sync_s1ish();
}
ret = caches_clean_inval_user_pou(start, start + chunk);

View File

@@ -400,16 +400,25 @@ static inline
int counters_read_on_cpu(int cpu, smp_call_func_t func, u64 *val)
{
/*
* Abort call on counterless CPU or when interrupts are
* disabled - can lead to deadlock in smp sync call.
* Abort call on counterless CPU.
*/
if (!cpu_has_amu_feat(cpu))
return -EOPNOTSUPP;
if (WARN_ON_ONCE(irqs_disabled()))
return -EPERM;
smp_call_function_single(cpu, func, val, 1);
if (irqs_disabled()) {
/*
* When IRQs are disabled (tick path: sched_tick ->
* topology_scale_freq_tick or cppc_scale_freq_tick), only local
* CPU counter reads are allowed. Remote CPU counter read would
* require smp_call_function_single() which is unsafe with IRQs
* disabled.
*/
if (WARN_ON_ONCE(cpu != smp_processor_id()))
return -EPERM;
func(val);
} else {
smp_call_function_single(cpu, func, val, 1);
}
return 0;
}

View File

@@ -349,6 +349,7 @@ SECTIONS
STABS_DEBUG
DWARF_DEBUG
MODINFO
ELF_DETAILS
HEAD_SYMBOLS

View File

@@ -21,7 +21,6 @@ menuconfig KVM
bool "Kernel-based Virtual Machine (KVM) support"
select KVM_COMMON
select KVM_GENERIC_HARDWARE_ENABLING
select KVM_GENERIC_MMU_NOTIFIER
select HAVE_KVM_CPU_RELAX_INTERCEPT
select KVM_MMIO
select KVM_GENERIC_DIRTYLOG_READ_PROTECT

View File

@@ -358,7 +358,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
break;
case KVM_CAP_IOEVENTFD:
case KVM_CAP_USER_MEMORY:
case KVM_CAP_SYNC_MMU:
case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
case KVM_CAP_ONE_REG:
case KVM_CAP_ARM_PSCI:

View File

@@ -540,31 +540,8 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
wr->pa |= va & GENMASK_ULL(va_bottom - 1, 0);
wr->nG = (wi->regime != TR_EL2) && (desc & PTE_NG);
if (wr->nG) {
u64 asid_ttbr, tcr;
switch (wi->regime) {
case TR_EL10:
tcr = vcpu_read_sys_reg(vcpu, TCR_EL1);
asid_ttbr = ((tcr & TCR_A1) ?
vcpu_read_sys_reg(vcpu, TTBR1_EL1) :
vcpu_read_sys_reg(vcpu, TTBR0_EL1));
break;
case TR_EL20:
tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
asid_ttbr = ((tcr & TCR_A1) ?
vcpu_read_sys_reg(vcpu, TTBR1_EL2) :
vcpu_read_sys_reg(vcpu, TTBR0_EL2));
break;
default:
BUG();
}
wr->asid = FIELD_GET(TTBR_ASID_MASK, asid_ttbr);
if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, ASIDBITS, 16) ||
!(tcr & TCR_ASID16))
wr->asid &= GENMASK(7, 0);
}
if (wr->nG)
wr->asid = get_asid_by_regime(vcpu, wi->regime);
return 0;
@@ -1527,8 +1504,6 @@ int __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
fail = true;
}
isb();
if (!fail)
par = read_sysreg_par();
@@ -1778,7 +1753,7 @@ int __kvm_at_swap_desc(struct kvm *kvm, gpa_t ipa, u64 old, u64 new)
if (!writable)
return -EPERM;
ptep = (u64 __user *)hva + offset;
ptep = (void __user *)hva + offset;
if (cpus_have_final_cap(ARM64_HAS_LSE_ATOMICS))
r = __lse_swap_desc(ptep, old, new);
else

View File

@@ -29,7 +29,7 @@
#include "trace.h"
const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
const struct kvm_stats_desc kvm_vm_stats_desc[] = {
KVM_GENERIC_VM_STATS()
};
@@ -42,7 +42,7 @@ const struct kvm_stats_header kvm_vm_stats_header = {
sizeof(kvm_vm_stats_desc),
};
const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
const struct kvm_stats_desc kvm_vcpu_stats_desc[] = {
KVM_GENERIC_VCPU_STATS(),
STATS_DESC_COUNTER(VCPU, hvc_exit_stat),
STATS_DESC_COUNTER(VCPU, wfe_exit_stat),

View File

@@ -518,7 +518,7 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
granule = kvm_granule_size(level);
cur.start = ALIGN_DOWN(addr, granule);
cur.end = cur.start + granule;
if (!range_included(&cur, range))
if (!range_included(&cur, range) && level < KVM_PGTABLE_LAST_LEVEL)
continue;
*range = cur;
return 0;

View File

@@ -271,7 +271,7 @@ static void fixmap_clear_slot(struct hyp_fixmap_slot *slot)
*/
dsb(ishst);
__tlbi_level(vale2is, __TLBI_VADDR(addr, 0), level);
dsb(ish);
__tlbi_sync_s1ish_hyp();
isb();
}

View File

@@ -342,6 +342,7 @@ static void pkvm_init_features_from_host(struct pkvm_hyp_vm *hyp_vm, const struc
/* No restrictions for non-protected VMs. */
if (!kvm_vm_is_protected(kvm)) {
hyp_vm->kvm.arch.flags = host_arch_flags;
hyp_vm->kvm.arch.flags &= ~BIT_ULL(KVM_ARCH_FLAG_ID_REGS_INITIALIZED);
bitmap_copy(kvm->arch.vcpu_features,
host_kvm->arch.vcpu_features,
@@ -391,7 +392,7 @@ static void unpin_host_sve_state(struct pkvm_hyp_vcpu *hyp_vcpu)
if (!vcpu_has_feature(&hyp_vcpu->vcpu, KVM_ARM_VCPU_SVE))
return;
sve_state = kern_hyp_va(hyp_vcpu->vcpu.arch.sve_state);
sve_state = hyp_vcpu->vcpu.arch.sve_state;
hyp_unpin_shared_mem(sve_state,
sve_state + vcpu_sve_state_size(&hyp_vcpu->vcpu));
}
@@ -471,6 +472,35 @@ err:
return ret;
}
static int vm_copy_id_regs(struct pkvm_hyp_vcpu *hyp_vcpu)
{
struct pkvm_hyp_vm *hyp_vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
const struct kvm *host_kvm = hyp_vm->host_kvm;
struct kvm *kvm = &hyp_vm->kvm;
if (!test_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &host_kvm->arch.flags))
return -EINVAL;
if (test_and_set_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags))
return 0;
memcpy(kvm->arch.id_regs, host_kvm->arch.id_regs, sizeof(kvm->arch.id_regs));
return 0;
}
static int pkvm_vcpu_init_sysregs(struct pkvm_hyp_vcpu *hyp_vcpu)
{
int ret = 0;
if (pkvm_hyp_vcpu_is_protected(hyp_vcpu))
kvm_init_pvm_id_regs(&hyp_vcpu->vcpu);
else
ret = vm_copy_id_regs(hyp_vcpu);
return ret;
}
static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu,
struct pkvm_hyp_vm *hyp_vm,
struct kvm_vcpu *host_vcpu)
@@ -490,8 +520,9 @@ static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu,
hyp_vcpu->vcpu.arch.cflags = READ_ONCE(host_vcpu->arch.cflags);
hyp_vcpu->vcpu.arch.mp_state.mp_state = KVM_MP_STATE_STOPPED;
if (pkvm_hyp_vcpu_is_protected(hyp_vcpu))
kvm_init_pvm_id_regs(&hyp_vcpu->vcpu);
ret = pkvm_vcpu_init_sysregs(hyp_vcpu);
if (ret)
goto done;
ret = pkvm_vcpu_init_traps(hyp_vcpu);
if (ret)

View File

@@ -169,7 +169,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
*/
dsb(ish);
__tlbi(vmalle1is);
dsb(ish);
__tlbi_sync_s1ish_hyp();
isb();
exit_vmid_context(&cxt);
@@ -226,7 +226,7 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
dsb(ish);
__tlbi(vmalle1is);
dsb(ish);
__tlbi_sync_s1ish_hyp();
isb();
exit_vmid_context(&cxt);
@@ -240,7 +240,7 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
enter_vmid_context(mmu, &cxt, false);
__tlbi(vmalls12e1is);
dsb(ish);
__tlbi_sync_s1ish_hyp();
isb();
exit_vmid_context(&cxt);
@@ -266,5 +266,5 @@ void __kvm_flush_vm_context(void)
/* Same remark as in enter_vmid_context() */
dsb(ish);
__tlbi(alle1is);
dsb(ish);
__tlbi_sync_s1ish_hyp();
}

View File

@@ -501,7 +501,7 @@ static int hyp_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
*unmapped += granule;
}
dsb(ish);
__tlbi_sync_s1ish_hyp();
isb();
mm_ops->put_page(ctx->ptep);

View File

@@ -115,7 +115,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
*/
dsb(ish);
__tlbi(vmalle1is);
dsb(ish);
__tlbi_sync_s1ish_hyp();
isb();
exit_vmid_context(&cxt);
@@ -176,7 +176,7 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
dsb(ish);
__tlbi(vmalle1is);
dsb(ish);
__tlbi_sync_s1ish_hyp();
isb();
exit_vmid_context(&cxt);
@@ -192,7 +192,7 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
enter_vmid_context(mmu, &cxt);
__tlbi(vmalls12e1is);
dsb(ish);
__tlbi_sync_s1ish_hyp();
isb();
exit_vmid_context(&cxt);
@@ -217,7 +217,7 @@ void __kvm_flush_vm_context(void)
{
dsb(ishst);
__tlbi(alle1is);
dsb(ish);
__tlbi_sync_s1ish_hyp();
}
/*
@@ -358,7 +358,7 @@ int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding)
default:
ret = -EINVAL;
}
dsb(ish);
__tlbi_sync_s1ish_hyp();
isb();
if (mmu)

View File

@@ -1751,17 +1751,16 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
force_pte = (max_map_size == PAGE_SIZE);
vma_pagesize = min_t(long, vma_pagesize, max_map_size);
vma_shift = __ffs(vma_pagesize);
}
/*
* Both the canonical IPA and fault IPA must be hugepage-aligned to
* ensure we find the right PFN and lay down the mapping in the right
* place.
* Both the canonical IPA and fault IPA must be aligned to the
* mapping size to ensure we find the right PFN and lay down the
* mapping in the right place.
*/
if (vma_pagesize == PMD_SIZE || vma_pagesize == PUD_SIZE) {
fault_ipa &= ~(vma_pagesize - 1);
ipa &= ~(vma_pagesize - 1);
}
fault_ipa = ALIGN_DOWN(fault_ipa, vma_pagesize);
ipa = ALIGN_DOWN(ipa, vma_pagesize);
gfn = ipa >> PAGE_SHIFT;
mte_allowed = kvm_vma_mte_allowed(vma);
@@ -1839,10 +1838,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (exec_fault && s2_force_noncacheable)
ret = -ENOEXEC;
if (ret) {
kvm_release_page_unused(page);
return ret;
}
if (ret)
goto out_put_page;
/*
* Guest performs atomic/exclusive operations on memory with unsupported
@@ -1852,7 +1849,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
*/
if (esr_fsc_is_excl_atomic_fault(kvm_vcpu_get_esr(vcpu))) {
kvm_inject_dabt_excl_atomic(vcpu, kvm_vcpu_get_hfar(vcpu));
return 1;
ret = 1;
goto out_put_page;
}
if (nested)
@@ -1938,6 +1936,10 @@ out_unlock:
mark_page_dirty_in_slot(kvm, memslot, gfn);
return ret != -EAGAIN ? ret : 0;
out_put_page:
kvm_release_page_unused(page);
return ret;
}
/* Resolve the access fault by making the page young again. */

View File

@@ -152,31 +152,31 @@ static int get_ia_size(struct s2_walk_info *wi)
return 64 - wi->t0sz;
}
static int check_base_s2_limits(struct s2_walk_info *wi,
static int check_base_s2_limits(struct kvm_vcpu *vcpu, struct s2_walk_info *wi,
int level, int input_size, int stride)
{
int start_size, ia_size;
int start_size, pa_max;
ia_size = get_ia_size(wi);
pa_max = kvm_get_pa_bits(vcpu->kvm);
/* Check translation limits */
switch (BIT(wi->pgshift)) {
case SZ_64K:
if (level == 0 || (level == 1 && ia_size <= 42))
if (level == 0 || (level == 1 && pa_max <= 42))
return -EFAULT;
break;
case SZ_16K:
if (level == 0 || (level == 1 && ia_size <= 40))
if (level == 0 || (level == 1 && pa_max <= 40))
return -EFAULT;
break;
case SZ_4K:
if (level < 0 || (level == 0 && ia_size <= 42))
if (level < 0 || (level == 0 && pa_max <= 42))
return -EFAULT;
break;
}
/* Check input size limits */
if (input_size > ia_size)
if (input_size > pa_max)
return -EFAULT;
/* Check number of entries in starting level table */
@@ -269,16 +269,19 @@ static int walk_nested_s2_pgd(struct kvm_vcpu *vcpu, phys_addr_t ipa,
if (input_size > 48 || input_size < 25)
return -EFAULT;
ret = check_base_s2_limits(wi, level, input_size, stride);
if (WARN_ON(ret))
ret = check_base_s2_limits(vcpu, wi, level, input_size, stride);
if (WARN_ON(ret)) {
out->esr = compute_fsc(0, ESR_ELx_FSC_FAULT);
return ret;
}
base_lower_bound = 3 + input_size - ((3 - level) * stride +
wi->pgshift);
base_addr = wi->baddr & GENMASK_ULL(47, base_lower_bound);
if (check_output_size(wi, base_addr)) {
out->esr = compute_fsc(level, ESR_ELx_FSC_ADDRSZ);
/* R_BFHQH */
out->esr = compute_fsc(0, ESR_ELx_FSC_ADDRSZ);
return 1;
}
@@ -293,8 +296,10 @@ static int walk_nested_s2_pgd(struct kvm_vcpu *vcpu, phys_addr_t ipa,
paddr = base_addr | index;
ret = read_guest_s2_desc(vcpu, paddr, &desc, wi);
if (ret < 0)
if (ret < 0) {
out->esr = ESR_ELx_FSC_SEA_TTW(level);
return ret;
}
new_desc = desc;
@@ -854,6 +859,33 @@ int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2)
return kvm_inject_nested_sync(vcpu, esr_el2);
}
u16 get_asid_by_regime(struct kvm_vcpu *vcpu, enum trans_regime regime)
{
enum vcpu_sysreg ttbr_elx;
u64 tcr;
u16 asid;
switch (regime) {
case TR_EL10:
tcr = vcpu_read_sys_reg(vcpu, TCR_EL1);
ttbr_elx = (tcr & TCR_A1) ? TTBR1_EL1 : TTBR0_EL1;
break;
case TR_EL20:
tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
ttbr_elx = (tcr & TCR_A1) ? TTBR1_EL2 : TTBR0_EL2;
break;
default:
BUG();
}
asid = FIELD_GET(TTBRx_EL1_ASID, vcpu_read_sys_reg(vcpu, ttbr_elx));
if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, ASIDBITS, 16) ||
!(tcr & TCR_ASID16))
asid &= GENMASK(7, 0);
return asid;
}
static void invalidate_vncr(struct vncr_tlb *vt)
{
vt->valid = false;
@@ -1154,9 +1186,6 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm)
{
int i;
if (!kvm->arch.nested_mmus_size)
return;
for (i = 0; i < kvm->arch.nested_mmus_size; i++) {
struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i];
@@ -1336,20 +1365,8 @@ static bool kvm_vncr_tlb_lookup(struct kvm_vcpu *vcpu)
if (read_vncr_el2(vcpu) != vt->gva)
return false;
if (vt->wr.nG) {
u64 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
u64 ttbr = ((tcr & TCR_A1) ?
vcpu_read_sys_reg(vcpu, TTBR1_EL2) :
vcpu_read_sys_reg(vcpu, TTBR0_EL2));
u16 asid;
asid = FIELD_GET(TTBR_ASID_MASK, ttbr);
if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, ASIDBITS, 16) ||
!(tcr & TCR_ASID16))
asid &= GENMASK(7, 0);
return asid == vt->wr.asid;
}
if (vt->wr.nG)
return get_asid_by_regime(vcpu, TR_EL20) == vt->wr.asid;
return true;
}
@@ -1452,21 +1469,8 @@ static void kvm_map_l1_vncr(struct kvm_vcpu *vcpu)
if (read_vncr_el2(vcpu) != vt->gva)
return;
if (vt->wr.nG) {
u64 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
u64 ttbr = ((tcr & TCR_A1) ?
vcpu_read_sys_reg(vcpu, TTBR1_EL2) :
vcpu_read_sys_reg(vcpu, TTBR0_EL2));
u16 asid;
asid = FIELD_GET(TTBR_ASID_MASK, ttbr);
if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, ASIDBITS, 16) ||
!(tcr & TCR_ASID16))
asid &= GENMASK(7, 0);
if (asid != vt->wr.asid)
return;
}
if (vt->wr.nG && get_asid_by_regime(vcpu, TR_EL20) != vt->wr.asid)
return;
vt->cpu = smp_processor_id();

View File

@@ -247,6 +247,20 @@ void kvm_reset_vcpu(struct kvm_vcpu *vcpu)
kvm_vcpu_set_be(vcpu);
*vcpu_pc(vcpu) = target_pc;
/*
* We may come from a state where either a PC update was
* pending (SMC call resulting in PC being increpented to
* skip the SMC) or a pending exception. Make sure we get
* rid of all that, as this cannot be valid out of reset.
*
* Note that clearing the exception mask also clears PC
* updates, but that's an implementation detail, and we
* really want to make it explicit.
*/
vcpu_clear_flag(vcpu, PENDING_EXCEPTION);
vcpu_clear_flag(vcpu, EXCEPT_MASK);
vcpu_clear_flag(vcpu, INCREMENT_PC);
vcpu_set_reg(vcpu, 0, reset_state.r0);
}

View File

@@ -1816,6 +1816,9 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
ID_AA64MMFR3_EL1_SCTLRX |
ID_AA64MMFR3_EL1_S1POE |
ID_AA64MMFR3_EL1_S1PIE;
if (!system_supports_poe())
val &= ~ID_AA64MMFR3_EL1_S1POE;
break;
case SYS_ID_MMFR4_EL1:
val &= ~ID_MMFR4_EL1_CCIDX;

View File

@@ -143,23 +143,6 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
kvm->arch.vgic.in_kernel = true;
kvm->arch.vgic.vgic_model = type;
kvm->arch.vgic.implementation_rev = KVM_VGIC_IMP_REV_LATEST;
kvm_for_each_vcpu(i, vcpu, kvm) {
ret = vgic_allocate_private_irqs_locked(vcpu, type);
if (ret)
break;
}
if (ret) {
kvm_for_each_vcpu(i, vcpu, kvm) {
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
kfree(vgic_cpu->private_irqs);
vgic_cpu->private_irqs = NULL;
}
goto out_unlock;
}
kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
aa64pfr0 = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1) & ~ID_AA64PFR0_EL1_GIC;
@@ -176,6 +159,23 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1, aa64pfr0);
kvm_set_vm_id_reg(kvm, SYS_ID_PFR1_EL1, pfr1);
kvm_for_each_vcpu(i, vcpu, kvm) {
ret = vgic_allocate_private_irqs_locked(vcpu, type);
if (ret)
break;
}
if (ret) {
kvm_for_each_vcpu(i, vcpu, kvm) {
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
kfree(vgic_cpu->private_irqs);
vgic_cpu->private_irqs = NULL;
}
kvm->arch.vgic.vgic_model = 0;
goto out_unlock;
}
if (type == KVM_DEV_TYPE_ARM_VGIC_V3)
kvm->arch.vgic.nassgicap = system_supports_direct_sgis();

View File

@@ -115,7 +115,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_v2_cpu_if *cpuif = &vgic_cpu->vgic_v2;
u32 eoicount = FIELD_GET(GICH_HCR_EOICOUNT, cpuif->vgic_hcr);
struct vgic_irq *irq;
struct vgic_irq *irq = *host_data_ptr(last_lr_irq);
DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
@@ -123,7 +123,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
vgic_v2_fold_lr(vcpu, cpuif->vgic_lr[lr]);
/* See the GICv3 equivalent for the EOIcount handling rationale */
list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
list_for_each_entry_continue(irq, &vgic_cpu->ap_list_head, ap_list) {
u32 lr;
if (!eoicount) {

View File

@@ -148,7 +148,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3;
u32 eoicount = FIELD_GET(ICH_HCR_EL2_EOIcount, cpuif->vgic_hcr);
struct vgic_irq *irq;
struct vgic_irq *irq = *host_data_ptr(last_lr_irq);
DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
@@ -158,12 +158,12 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
/*
* EOIMode=0: use EOIcount to emulate deactivation. We are
* guaranteed to deactivate in reverse order of the activation, so
* just pick one active interrupt after the other in the ap_list,
* and replay the deactivation as if the CPU was doing it. We also
* rely on priority drop to have taken place, and the list to be
* sorted by priority.
* just pick one active interrupt after the other in the tail part
* of the ap_list, past the LRs, and replay the deactivation as if
* the CPU was doing it. We also rely on priority drop to have taken
* place, and the list to be sorted by priority.
*/
list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
list_for_each_entry_continue(irq, &vgic_cpu->ap_list_head, ap_list) {
u64 lr;
/*

View File

@@ -814,6 +814,9 @@ retry:
static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu)
{
if (!*host_data_ptr(last_lr_irq))
return;
if (kvm_vgic_global_state.type == VGIC_V2)
vgic_v2_fold_lr_state(vcpu);
else
@@ -960,10 +963,13 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
if (irqs_outside_lrs(&als))
vgic_sort_ap_list(vcpu);
*host_data_ptr(last_lr_irq) = NULL;
list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
scoped_guard(raw_spinlock, &irq->irq_lock) {
if (likely(vgic_target_oracle(irq) == vcpu)) {
vgic_populate_lr(vcpu, irq, count++);
*host_data_ptr(last_lr_irq) = irq;
}
}

View File

@@ -32,7 +32,11 @@ static inline unsigned long xloops_to_cycles(unsigned long xloops)
* Note that userspace cannot change the offset behind our back either,
* as the vcpu mutex is held as long as KVM_RUN is in progress.
*/
#define __delay_cycles() __arch_counter_get_cntvct_stable()
static cycles_t notrace __delay_cycles(void)
{
guard(preempt_notrace)();
return __arch_counter_get_cntvct_stable();
}
void __delay(unsigned long cycles)
{

View File

@@ -599,6 +599,27 @@ void contpte_clear_young_dirty_ptes(struct vm_area_struct *vma,
}
EXPORT_SYMBOL_GPL(contpte_clear_young_dirty_ptes);
static bool contpte_all_subptes_match_access_flags(pte_t *ptep, pte_t entry)
{
pte_t *cont_ptep = contpte_align_down(ptep);
/*
* PFNs differ per sub-PTE. Match only bits consumed by
* __ptep_set_access_flags(): AF, DIRTY and write permission.
*/
const pteval_t cmp_mask = PTE_RDONLY | PTE_AF | PTE_WRITE | PTE_DIRTY;
pteval_t entry_cmp = pte_val(entry) & cmp_mask;
int i;
for (i = 0; i < CONT_PTES; i++) {
pteval_t pte_cmp = pte_val(__ptep_get(cont_ptep + i)) & cmp_mask;
if (pte_cmp != entry_cmp)
return false;
}
return true;
}
int contpte_ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
pte_t entry, int dirty)
@@ -608,13 +629,37 @@ int contpte_ptep_set_access_flags(struct vm_area_struct *vma,
int i;
/*
* Gather the access/dirty bits for the contiguous range. If nothing has
* changed, its a noop.
* Check whether all sub-PTEs in the CONT block already match the
* requested access flags/write permission, using raw per-PTE values
* rather than the gathered ptep_get() view.
*
* __ptep_set_access_flags() can update AF, dirty and write
* permission, but only to make the mapping more permissive.
*
* ptep_get() gathers AF/dirty state across the whole CONT block,
* which is correct for a CPU with FEAT_HAFDBS. But page-table
* walkers that evaluate each descriptor individually (e.g. a CPU
* without DBM support, or an SMMU without HTTU, or with HA/HD
* disabled in CD.TCR) can keep faulting on the target sub-PTE if
* only a sibling has been updated. Gathering can therefore cause
* false no-ops when only a sibling has been updated:
* - write faults: target still has PTE_RDONLY (needs PTE_RDONLY cleared)
* - read faults: target still lacks PTE_AF
*
* Per Arm ARM (DDI 0487) D8.7.1, any sub-PTE in a CONT range may
* become the effective cached translation, so all entries must have
* consistent attributes. Check the full CONT block before returning
* no-op, and when any sub-PTE mismatches, proceed to update the whole
* range.
*/
orig_pte = pte_mknoncont(ptep_get(ptep));
if (pte_val(orig_pte) == pte_val(entry))
if (contpte_all_subptes_match_access_flags(ptep, entry))
return 0;
/*
* Use raw target pte (not gathered) for write-bit unfold decision.
*/
orig_pte = pte_mknoncont(__ptep_get(ptep));
/*
* We can fix up access/dirty bits without having to unfold the contig
* range. But if the write bit is changing, we must unfold.

View File

@@ -14,8 +14,8 @@ int arm64_ioremap_prot_hook_register(ioremap_prot_hook_t hook)
return 0;
}
void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
pgprot_t pgprot)
void __iomem *__ioremap_prot(phys_addr_t phys_addr, size_t size,
pgprot_t pgprot)
{
unsigned long last_addr = phys_addr + size - 1;
@@ -39,7 +39,7 @@ void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
return generic_ioremap_prot(phys_addr, size, pgprot);
}
EXPORT_SYMBOL(ioremap_prot);
EXPORT_SYMBOL(__ioremap_prot);
/*
* Must be called after early_fixmap_init

View File

@@ -34,6 +34,8 @@ static pgprot_t protection_map[16] __ro_after_init = {
[VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_SHARED_EXEC
};
static ptdesc_t gcs_page_prot __ro_after_init = _PAGE_GCS_RO;
/*
* You really shouldn't be using read() or write() on /dev/mem. This might go
* away in the future.
@@ -73,9 +75,11 @@ static int __init adjust_protection_map(void)
protection_map[VM_EXEC | VM_SHARED] = PAGE_EXECONLY;
}
if (lpa2_is_enabled())
if (lpa2_is_enabled()) {
for (int i = 0; i < ARRAY_SIZE(protection_map); i++)
pgprot_val(protection_map[i]) &= ~PTE_SHARED;
gcs_page_prot &= ~PTE_SHARED;
}
return 0;
}
@@ -87,7 +91,11 @@ pgprot_t vm_get_page_prot(vm_flags_t vm_flags)
/* Short circuit GCS to avoid bloating the table. */
if (system_supports_gcs() && (vm_flags & VM_SHADOW_STACK)) {
prot = _PAGE_GCS_RO;
/* Honour mprotect(PROT_NONE) on shadow stack mappings */
if (vm_flags & VM_ACCESS_FLAGS)
prot = gcs_page_prot;
else
prot = pgprot_val(protection_map[VM_NONE]);
} else {
prot = pgprot_val(protection_map[vm_flags &
(VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]);

View File

@@ -2119,7 +2119,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
extable_offset = round_up(prog_size + PLT_TARGET_SIZE, extable_align);
image_size = extable_offset + extable_size;
ro_header = bpf_jit_binary_pack_alloc(image_size, &ro_image_ptr,
sizeof(u32), &header, &image_ptr,
sizeof(u64), &header, &image_ptr,
jit_fill_hole);
if (!ro_header) {
prog = orig_prog;

View File

@@ -109,6 +109,7 @@ SECTIONS
STABS_DEBUG
DWARF_DEBUG
MODINFO
ELF_DETAILS
DISCARDS

View File

@@ -62,6 +62,7 @@ SECTIONS
STABS_DEBUG
DWARF_DEBUG
MODINFO
ELF_DETAILS
.hexagon.attributes 0 : { *(.hexagon.attributes) }

View File

@@ -304,6 +304,9 @@ config AS_HAS_LBT_EXTENSION
config AS_HAS_LVZ_EXTENSION
def_bool $(as-instr,hvcl 0)
config AS_HAS_SCQ_EXTENSION
def_bool $(as-instr,sc.q \$t0$(comma)\$t1$(comma)\$t2)
config CC_HAS_ANNOTATE_TABLEJUMP
def_bool $(cc-option,-mannotate-tablejump)

View File

@@ -238,6 +238,8 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, unsigned int
arch_cmpxchg((ptr), (o), (n)); \
})
#ifdef CONFIG_AS_HAS_SCQ_EXTENSION
union __u128_halves {
u128 full;
struct {
@@ -290,6 +292,9 @@ union __u128_halves {
BUILD_BUG_ON(sizeof(*(ptr)) != 16); \
__arch_cmpxchg128(ptr, o, n, ""); \
})
#endif /* CONFIG_AS_HAS_SCQ_EXTENSION */
#else
#include <asm-generic/cmpxchg-local.h>
#define arch_cmpxchg64_local(ptr, o, n) __generic_cmpxchg64_local((ptr), (o), (n))

Some files were not shown because too many files have changed in this diff Show More