Skip to content

Commit 6f9c4d8

Browse files
committed
iommufd: Do not UAF during iommufd_put_object()
The mixture of kernel and user space lifecycle objects continues to be complicated inside iommufd. The obj->destroy_rwsem is used to bring order to the kernel driver destruction sequence but it cannot be sequenced right with the other refcounts so we end up possibly UAF'ing: BUG: KASAN: slab-use-after-free in __up_read+0x627/0x750 kernel/locking/rwsem.c:1342 Read of size 8 at addr ffff888073cde868 by task syz-executor934/6535 CPU: 1 PID: 6535 Comm: syz-executor934 Not tainted 6.6.0-rc7-syzkaller-00195-g2af9b20dbb39 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/09/2023 Call Trace: <TASK> __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd9/0x1b0 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:364 [inline] print_report+0xc4/0x620 mm/kasan/report.c:475 kasan_report+0xda/0x110 mm/kasan/report.c:588 __up_read+0x627/0x750 kernel/locking/rwsem.c:1342 iommufd_put_object drivers/iommu/iommufd/iommufd_private.h:149 [inline] iommufd_vfio_ioas+0x46c/0x580 drivers/iommu/iommufd/vfio_compat.c:146 iommufd_fops_ioctl+0x347/0x4d0 drivers/iommu/iommufd/main.c:398 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:871 [inline] __se_sys_ioctl fs/ioctl.c:857 [inline] __x64_sys_ioctl+0x18f/0x210 fs/ioctl.c:857 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd There are two races here, the more obvious one: CPU 0 CPU 1 iommufd_put_object() iommufd_destroy() refcount_dec(&obj->users) iommufd_object_remove() kfree() up_read(&obj->destroy_rwsem) // Boom And there is also perhaps some possibility that the rwsem could hit an issue: CPU 0 CPU 1 iommufd_put_object() iommufd_object_destroy_user() refcount_dec(&obj->users); down_write(&obj->destroy_rwsem) up_read(&obj->destroy_rwsem); atomic_long_or(RWSEM_FLAG_WAITERS, &sem->count); tmp = atomic_long_add_return_release() rwsem_try_write_lock() iommufd_object_remove() up_write(&obj->destroy_rwsem) kfree() clear_nonspinnable() // Boom Fix this by reorganizing this again so that two refcounts are used to keep track of things with a rule that users == 0 && shortterm_users == 0 means no other threads have that memory. Put a wait_queue in the iommufd_ctx object that is triggered when any sub object reaches a 0 shortterm_users. This allows the same wait for userspace ioctls to finish behavior that the rwsem was providing. This is weaker still than the prior versions: - There is no bias on shortterm_users so if some thread is waiting to destroy other threads can continue to get new read sides - If destruction fails, eg because of an active in-kernel user, then shortterm_users will have cycled to zero momentarily blocking new users - If userspace races destroy with other userspace operations they continue to get an EBUSY since we still can't intermix looking up an ID and sleeping for its unref In all cases these are things that userspace brings on itself, correct programs will not hit them. Fixes: 99f98a7 ("iommufd: IOMMUFD_DESTROY should not increase the refcount") Link: https://lore.kernel.org/all/2-v2-ca9e00171c5b+123-iommufd_syz4_jgg@nvidia.com/ Reported-by: syzbot+d31adfb277377ef8fcba@syzkaller.appspotmail.com Closes: https://lore.kernel.org/r/00000000000055ef9a0609336580@google.com Reviewed-by: Kevin Tian <kevin.tian@intel.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
1 parent bd7a282 commit 6f9c4d8

File tree

2 files changed

+135
-78
lines changed

2 files changed

+135
-78
lines changed

drivers/iommu/iommufd/iommufd_private.h

Lines changed: 55 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ struct iommufd_ctx {
2121
struct file *file;
2222
struct xarray objects;
2323
struct xarray groups;
24+
wait_queue_head_t destroy_wait;
2425

2526
u8 account_mode;
2627
/* Compatibility with VFIO no iommu */
@@ -135,18 +136,23 @@ enum iommufd_object_type {
135136

136137
/* Base struct for all objects with a userspace ID handle. */
137138
struct iommufd_object {
138-
struct rw_semaphore destroy_rwsem;
139+
refcount_t shortterm_users;
139140
refcount_t users;
140141
enum iommufd_object_type type;
141142
unsigned int id;
142143
};
143144

144145
static inline bool iommufd_lock_obj(struct iommufd_object *obj)
145146
{
146-
if (!down_read_trylock(&obj->destroy_rwsem))
147+
if (!refcount_inc_not_zero(&obj->users))
147148
return false;
148-
if (!refcount_inc_not_zero(&obj->users)) {
149-
up_read(&obj->destroy_rwsem);
149+
if (!refcount_inc_not_zero(&obj->shortterm_users)) {
150+
/*
151+
* If the caller doesn't already have a ref on obj this must be
152+
* called under the xa_lock. Otherwise the caller is holding a
153+
* ref on users. Thus it cannot be one before this decrement.
154+
*/
155+
refcount_dec(&obj->users);
150156
return false;
151157
}
152158
return true;
@@ -157,26 +163,63 @@ struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id,
157163
static inline void iommufd_put_object(struct iommufd_ctx *ictx,
158164
struct iommufd_object *obj)
159165
{
166+
/*
167+
* Users first, then shortterm so that REMOVE_WAIT_SHORTTERM never sees
168+
* a spurious !0 users with a 0 shortterm_users.
169+
*/
160170
refcount_dec(&obj->users);
161-
up_read(&obj->destroy_rwsem);
171+
if (refcount_dec_and_test(&obj->shortterm_users))
172+
wake_up_interruptible_all(&ictx->destroy_wait);
162173
}
163174

164175
void iommufd_object_abort(struct iommufd_ctx *ictx, struct iommufd_object *obj);
165176
void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx,
166177
struct iommufd_object *obj);
167178
void iommufd_object_finalize(struct iommufd_ctx *ictx,
168179
struct iommufd_object *obj);
169-
void __iommufd_object_destroy_user(struct iommufd_ctx *ictx,
170-
struct iommufd_object *obj, bool allow_fail);
180+
181+
enum {
182+
REMOVE_WAIT_SHORTTERM = 1,
183+
};
184+
int iommufd_object_remove(struct iommufd_ctx *ictx,
185+
struct iommufd_object *to_destroy, u32 id,
186+
unsigned int flags);
187+
188+
/*
189+
* The caller holds a users refcount and wants to destroy the object. At this
190+
* point the caller has no shortterm_users reference and at least the xarray
191+
* will be holding one.
192+
*/
171193
static inline void iommufd_object_destroy_user(struct iommufd_ctx *ictx,
172194
struct iommufd_object *obj)
173195
{
174-
__iommufd_object_destroy_user(ictx, obj, false);
196+
int ret;
197+
198+
ret = iommufd_object_remove(ictx, obj, obj->id, REMOVE_WAIT_SHORTTERM);
199+
200+
/*
201+
* If there is a bug and we couldn't destroy the object then we did put
202+
* back the caller's users refcount and will eventually try to free it
203+
* again during close.
204+
*/
205+
WARN_ON(ret);
175206
}
176-
static inline void iommufd_object_deref_user(struct iommufd_ctx *ictx,
177-
struct iommufd_object *obj)
207+
208+
/*
209+
* The HWPT allocated by autodomains is used in possibly many devices and
210+
* is automatically destroyed when its refcount reaches zero.
211+
*
212+
* If userspace uses the HWPT manually, even for a short term, then it will
213+
* disrupt this refcounting and the auto-free in the kernel will not work.
214+
* Userspace that tries to use the automatically allocated HWPT must be careful
215+
* to ensure that it is consistently destroyed, eg by not racing accesses
216+
* and by not attaching an automatic HWPT to a device manually.
217+
*/
218+
static inline void
219+
iommufd_object_put_and_try_destroy(struct iommufd_ctx *ictx,
220+
struct iommufd_object *obj)
178221
{
179-
__iommufd_object_destroy_user(ictx, obj, true);
222+
iommufd_object_remove(ictx, obj, obj->id, 0);
180223
}
181224

182225
struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
@@ -312,7 +355,7 @@ static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx,
312355
lockdep_assert_not_held(&hwpt_paging->ioas->mutex);
313356

314357
if (hwpt_paging->auto_domain) {
315-
iommufd_object_deref_user(ictx, &hwpt->obj);
358+
iommufd_object_put_and_try_destroy(ictx, &hwpt->obj);
316359
return;
317360
}
318361
}

drivers/iommu/iommufd/main.c

Lines changed: 80 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -33,23 +33,15 @@ struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
3333
size_t size,
3434
enum iommufd_object_type type)
3535
{
36-
static struct lock_class_key obj_keys[IOMMUFD_OBJ_MAX];
3736
struct iommufd_object *obj;
3837
int rc;
3938

4039
obj = kzalloc(size, GFP_KERNEL_ACCOUNT);
4140
if (!obj)
4241
return ERR_PTR(-ENOMEM);
4342
obj->type = type;
44-
/*
45-
* In most cases the destroy_rwsem is obtained with try so it doesn't
46-
* interact with lockdep, however on destroy we have to sleep. This
47-
* means if we have to destroy an object while holding a get on another
48-
* object it triggers lockdep. Using one locking class per object type
49-
* is a simple and reasonable way to avoid this.
50-
*/
51-
__init_rwsem(&obj->destroy_rwsem, "iommufd_object::destroy_rwsem",
52-
&obj_keys[type]);
43+
/* Starts out bias'd by 1 until it is removed from the xarray */
44+
refcount_set(&obj->shortterm_users, 1);
5345
refcount_set(&obj->users, 1);
5446

5547
/*
@@ -129,92 +121,113 @@ struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id,
129121
return obj;
130122
}
131123

124+
static int iommufd_object_dec_wait_shortterm(struct iommufd_ctx *ictx,
125+
struct iommufd_object *to_destroy)
126+
{
127+
if (refcount_dec_and_test(&to_destroy->shortterm_users))
128+
return 0;
129+
130+
if (wait_event_timeout(ictx->destroy_wait,
131+
refcount_read(&to_destroy->shortterm_users) ==
132+
0,
133+
msecs_to_jiffies(10000)))
134+
return 0;
135+
136+
pr_crit("Time out waiting for iommufd object to become free\n");
137+
refcount_inc(&to_destroy->shortterm_users);
138+
return -EBUSY;
139+
}
140+
132141
/*
133142
* Remove the given object id from the xarray if the only reference to the
134-
* object is held by the xarray. The caller must call ops destroy().
143+
* object is held by the xarray.
135144
*/
136-
static struct iommufd_object *iommufd_object_remove(struct iommufd_ctx *ictx,
137-
u32 id, bool extra_put)
145+
int iommufd_object_remove(struct iommufd_ctx *ictx,
146+
struct iommufd_object *to_destroy, u32 id,
147+
unsigned int flags)
138148
{
139149
struct iommufd_object *obj;
140150
XA_STATE(xas, &ictx->objects, id);
141-
142-
xa_lock(&ictx->objects);
143-
obj = xas_load(&xas);
144-
if (xa_is_zero(obj) || !obj) {
145-
obj = ERR_PTR(-ENOENT);
146-
goto out_xa;
147-
}
151+
bool zerod_shortterm = false;
152+
int ret;
148153

149154
/*
150-
* If the caller is holding a ref on obj we put it here under the
151-
* spinlock.
155+
* The purpose of the shortterm_users is to ensure deterministic
156+
* destruction of objects used by external drivers and destroyed by this
157+
* function. Any temporary increment of the refcount must increment
158+
* shortterm_users, such as during ioctl execution.
152159
*/
153-
if (extra_put)
160+
if (flags & REMOVE_WAIT_SHORTTERM) {
161+
ret = iommufd_object_dec_wait_shortterm(ictx, to_destroy);
162+
if (ret) {
163+
/*
164+
* We have a bug. Put back the callers reference and
165+
* defer cleaning this object until close.
166+
*/
167+
refcount_dec(&to_destroy->users);
168+
return ret;
169+
}
170+
zerod_shortterm = true;
171+
}
172+
173+
xa_lock(&ictx->objects);
174+
obj = xas_load(&xas);
175+
if (to_destroy) {
176+
/*
177+
* If the caller is holding a ref on obj we put it here under
178+
* the spinlock.
179+
*/
154180
refcount_dec(&obj->users);
155181

182+
if (WARN_ON(obj != to_destroy)) {
183+
ret = -ENOENT;
184+
goto err_xa;
185+
}
186+
} else if (xa_is_zero(obj) || !obj) {
187+
ret = -ENOENT;
188+
goto err_xa;
189+
}
190+
156191
if (!refcount_dec_if_one(&obj->users)) {
157-
obj = ERR_PTR(-EBUSY);
158-
goto out_xa;
192+
ret = -EBUSY;
193+
goto err_xa;
159194
}
160195

161196
xas_store(&xas, NULL);
162197
if (ictx->vfio_ioas == container_of(obj, struct iommufd_ioas, obj))
163198
ictx->vfio_ioas = NULL;
164-
165-
out_xa:
166199
xa_unlock(&ictx->objects);
167200

168-
/* The returned object reference count is zero */
169-
return obj;
170-
}
171-
172-
/*
173-
* The caller holds a users refcount and wants to destroy the object. Returns
174-
* true if the object was destroyed. In all cases the caller no longer has a
175-
* reference on obj.
176-
*/
177-
void __iommufd_object_destroy_user(struct iommufd_ctx *ictx,
178-
struct iommufd_object *obj, bool allow_fail)
179-
{
180-
struct iommufd_object *ret;
181-
182201
/*
183-
* The purpose of the destroy_rwsem is to ensure deterministic
184-
* destruction of objects used by external drivers and destroyed by this
185-
* function. Any temporary increment of the refcount must hold the read
186-
* side of this, such as during ioctl execution.
187-
*/
188-
down_write(&obj->destroy_rwsem);
189-
ret = iommufd_object_remove(ictx, obj->id, true);
190-
up_write(&obj->destroy_rwsem);
191-
192-
if (allow_fail && IS_ERR(ret))
193-
return;
194-
195-
/*
196-
* If there is a bug and we couldn't destroy the object then we did put
197-
* back the caller's refcount and will eventually try to free it again
198-
* during close.
202+
* Since users is zero any positive users_shortterm must be racing
203+
* iommufd_put_object(), or we have a bug.
199204
*/
200-
if (WARN_ON(IS_ERR(ret)))
201-
return;
205+
if (!zerod_shortterm) {
206+
ret = iommufd_object_dec_wait_shortterm(ictx, obj);
207+
if (WARN_ON(ret))
208+
return ret;
209+
}
202210

203211
iommufd_object_ops[obj->type].destroy(obj);
204212
kfree(obj);
213+
return 0;
214+
215+
err_xa:
216+
if (zerod_shortterm) {
217+
/* Restore the xarray owned reference */
218+
refcount_set(&obj->shortterm_users, 1);
219+
}
220+
xa_unlock(&ictx->objects);
221+
222+
/* The returned object reference count is zero */
223+
return ret;
205224
}
206225

207226
static int iommufd_destroy(struct iommufd_ucmd *ucmd)
208227
{
209228
struct iommu_destroy *cmd = ucmd->cmd;
210-
struct iommufd_object *obj;
211229

212-
obj = iommufd_object_remove(ucmd->ictx, cmd->id, false);
213-
if (IS_ERR(obj))
214-
return PTR_ERR(obj);
215-
iommufd_object_ops[obj->type].destroy(obj);
216-
kfree(obj);
217-
return 0;
230+
return iommufd_object_remove(ucmd->ictx, NULL, cmd->id, 0);
218231
}
219232

220233
static int iommufd_fops_open(struct inode *inode, struct file *filp)
@@ -238,6 +251,7 @@ static int iommufd_fops_open(struct inode *inode, struct file *filp)
238251
xa_init_flags(&ictx->objects, XA_FLAGS_ALLOC1 | XA_FLAGS_ACCOUNT);
239252
xa_init(&ictx->groups);
240253
ictx->file = filp;
254+
init_waitqueue_head(&ictx->destroy_wait);
241255
filp->private_data = ictx;
242256
return 0;
243257
}

0 commit comments

Comments
 (0)