Skip to content

Commit 8a2278c

Browse files
Stefan Haberlandaxboe
authored andcommitted
s390/dasd: fix hanging device after request requeue
The DASD device driver has a function to requeue requests to the blocklayer. This function is used in various cases when basic settings for the device have to be changed like High Performance Ficon related parameters or copy pair settings. The functions iterates over the device->ccw_queue and also removes the requests from the block->ccw_queue. In case the device is started on an alias device instead of the base device it might be removed from the block->ccw_queue without having it canceled properly before. This might lead to a hanging device since the request is no longer on a queue and can not be handled properly. Fix by iterating over the block->ccw_queue instead of the device->ccw_queue. This will take care of all blocklayer related requests and handle them on all associated DASD devices. Signed-off-by: Stefan Haberland <sth@linux.ibm.com> Reviewed-by: Jan Hoeppner <hoeppner@linux.ibm.com> Link: https://lore.kernel.org/r/20230721193647.3889634-4-sth@linux.ibm.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent acea28a commit 8a2278c

File tree

1 file changed

+48
-77
lines changed

1 file changed

+48
-77
lines changed

drivers/s390/block/dasd.c

Lines changed: 48 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -2943,41 +2943,32 @@ static void _dasd_wake_block_flush_cb(struct dasd_ccw_req *cqr, void *data)
29432943
* Requeue a request back to the block request queue
29442944
* only works for block requests
29452945
*/
2946-
static int _dasd_requeue_request(struct dasd_ccw_req *cqr)
2946+
static void _dasd_requeue_request(struct dasd_ccw_req *cqr)
29472947
{
2948-
struct dasd_block *block = cqr->block;
29492948
struct request *req;
29502949

2951-
if (!block)
2952-
return -EINVAL;
29532950
/*
29542951
* If the request is an ERP request there is nothing to requeue.
29552952
* This will be done with the remaining original request.
29562953
*/
29572954
if (cqr->refers)
2958-
return 0;
2955+
return;
29592956
spin_lock_irq(&cqr->dq->lock);
29602957
req = (struct request *) cqr->callback_data;
29612958
blk_mq_requeue_request(req, true);
29622959
spin_unlock_irq(&cqr->dq->lock);
29632960

2964-
return 0;
2961+
return;
29652962
}
29662963

2967-
/*
2968-
* Go through all request on the dasd_block request queue, cancel them
2969-
* on the respective dasd_device, and return them to the generic
2970-
* block layer.
2971-
*/
2972-
static int dasd_flush_block_queue(struct dasd_block *block)
2964+
static int _dasd_requests_to_flushqueue(struct dasd_block *block,
2965+
struct list_head *flush_queue)
29732966
{
29742967
struct dasd_ccw_req *cqr, *n;
2975-
int rc, i;
2976-
struct list_head flush_queue;
29772968
unsigned long flags;
2969+
int rc, i;
29782970

2979-
INIT_LIST_HEAD(&flush_queue);
2980-
spin_lock_bh(&block->queue_lock);
2971+
spin_lock_irqsave(&block->queue_lock, flags);
29812972
rc = 0;
29822973
restart:
29832974
list_for_each_entry_safe(cqr, n, &block->ccw_queue, blocklist) {
@@ -2992,13 +2983,32 @@ static int dasd_flush_block_queue(struct dasd_block *block)
29922983
* is returned from the dasd_device layer.
29932984
*/
29942985
cqr->callback = _dasd_wake_block_flush_cb;
2995-
for (i = 0; cqr != NULL; cqr = cqr->refers, i++)
2996-
list_move_tail(&cqr->blocklist, &flush_queue);
2986+
for (i = 0; cqr; cqr = cqr->refers, i++)
2987+
list_move_tail(&cqr->blocklist, flush_queue);
29972988
if (i > 1)
29982989
/* moved more than one request - need to restart */
29992990
goto restart;
30002991
}
3001-
spin_unlock_bh(&block->queue_lock);
2992+
spin_unlock_irqrestore(&block->queue_lock, flags);
2993+
2994+
return rc;
2995+
}
2996+
2997+
/*
2998+
* Go through all request on the dasd_block request queue, cancel them
2999+
* on the respective dasd_device, and return them to the generic
3000+
* block layer.
3001+
*/
3002+
static int dasd_flush_block_queue(struct dasd_block *block)
3003+
{
3004+
struct dasd_ccw_req *cqr, *n;
3005+
struct list_head flush_queue;
3006+
unsigned long flags;
3007+
int rc;
3008+
3009+
INIT_LIST_HEAD(&flush_queue);
3010+
rc = _dasd_requests_to_flushqueue(block, &flush_queue);
3011+
30023012
/* Now call the callback function of flushed requests */
30033013
restart_cb:
30043014
list_for_each_entry_safe(cqr, n, &flush_queue, blocklist) {
@@ -3881,75 +3891,36 @@ EXPORT_SYMBOL_GPL(dasd_generic_space_avail);
38813891
*/
38823892
int dasd_generic_requeue_all_requests(struct dasd_device *device)
38833893
{
3894+
struct dasd_block *block = device->block;
38843895
struct list_head requeue_queue;
38853896
struct dasd_ccw_req *cqr, *n;
3886-
struct dasd_ccw_req *refers;
38873897
int rc;
38883898

3889-
INIT_LIST_HEAD(&requeue_queue);
3890-
spin_lock_irq(get_ccwdev_lock(device->cdev));
3891-
rc = 0;
3892-
list_for_each_entry_safe(cqr, n, &device->ccw_queue, devlist) {
3893-
/* Check status and move request to flush_queue */
3894-
if (cqr->status == DASD_CQR_IN_IO) {
3895-
rc = device->discipline->term_IO(cqr);
3896-
if (rc) {
3897-
/* unable to terminate requeust */
3898-
dev_err(&device->cdev->dev,
3899-
"Unable to terminate request %p "
3900-
"on suspend\n", cqr);
3901-
spin_unlock_irq(get_ccwdev_lock(device->cdev));
3902-
dasd_put_device(device);
3903-
return rc;
3904-
}
3905-
}
3906-
list_move_tail(&cqr->devlist, &requeue_queue);
3907-
}
3908-
spin_unlock_irq(get_ccwdev_lock(device->cdev));
3909-
3910-
list_for_each_entry_safe(cqr, n, &requeue_queue, devlist) {
3911-
wait_event(dasd_flush_wq,
3912-
(cqr->status != DASD_CQR_CLEAR_PENDING));
3899+
if (!block)
3900+
return 0;
39133901

3914-
/*
3915-
* requeue requests to blocklayer will only work
3916-
* for block device requests
3917-
*/
3918-
if (_dasd_requeue_request(cqr))
3919-
continue;
3902+
INIT_LIST_HEAD(&requeue_queue);
3903+
rc = _dasd_requests_to_flushqueue(block, &requeue_queue);
39203904

3921-
/* remove requests from device and block queue */
3922-
list_del_init(&cqr->devlist);
3923-
while (cqr->refers != NULL) {
3924-
refers = cqr->refers;
3925-
/* remove the request from the block queue */
3926-
list_del(&cqr->blocklist);
3927-
/* free the finished erp request */
3928-
dasd_free_erp_request(cqr, cqr->memdev);
3929-
cqr = refers;
3905+
/* Now call the callback function of flushed requests */
3906+
restart_cb:
3907+
list_for_each_entry_safe(cqr, n, &requeue_queue, blocklist) {
3908+
wait_event(dasd_flush_wq, (cqr->status < DASD_CQR_QUEUED));
3909+
/* Process finished ERP request. */
3910+
if (cqr->refers) {
3911+
spin_lock_bh(&block->queue_lock);
3912+
__dasd_process_erp(block->base, cqr);
3913+
spin_unlock_bh(&block->queue_lock);
3914+
/* restart list_for_xx loop since dasd_process_erp
3915+
* might remove multiple elements
3916+
*/
3917+
goto restart_cb;
39303918
}
3931-
3932-
/*
3933-
* _dasd_requeue_request already checked for a valid
3934-
* blockdevice, no need to check again
3935-
* all erp requests (cqr->refers) have a cqr->block
3936-
* pointer copy from the original cqr
3937-
*/
3919+
_dasd_requeue_request(cqr);
39383920
list_del_init(&cqr->blocklist);
39393921
cqr->block->base->discipline->free_cp(
39403922
cqr, (struct request *) cqr->callback_data);
39413923
}
3942-
3943-
/*
3944-
* if requests remain then they are internal request
3945-
* and go back to the device queue
3946-
*/
3947-
if (!list_empty(&requeue_queue)) {
3948-
/* move freeze_queue to start of the ccw_queue */
3949-
spin_lock_irq(get_ccwdev_lock(device->cdev));
3950-
list_splice_tail(&requeue_queue, &device->ccw_queue);
3951-
spin_unlock_irq(get_ccwdev_lock(device->cdev));
3952-
}
39533924
dasd_schedule_device_bh(device);
39543925
return rc;
39553926
}

0 commit comments

Comments
 (0)