@@ -39,6 +39,8 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
39
39
EXPORT_TRACEPOINT_SYMBOL_GPL (block_rq_remap );
40
40
EXPORT_TRACEPOINT_SYMBOL_GPL (block_bio_complete );
41
41
42
+ DEFINE_IDA (blk_queue_ida );
43
+
42
44
/*
43
45
* For the allocated request tables
44
46
*/
@@ -358,7 +360,8 @@ EXPORT_SYMBOL(blk_put_queue);
358
360
void blk_drain_queue (struct request_queue * q , bool drain_all )
359
361
{
360
362
while (true) {
361
- int nr_rqs ;
363
+ bool drain = false;
364
+ int i ;
362
365
363
366
spin_lock_irq (q -> queue_lock );
364
367
@@ -375,14 +378,25 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
375
378
if (!list_empty (& q -> queue_head ))
376
379
__blk_run_queue (q );
377
380
378
- if (drain_all )
379
- nr_rqs = q -> rq .count [0 ] + q -> rq .count [1 ];
380
- else
381
- nr_rqs = q -> rq .elvpriv ;
381
+ drain |= q -> rq .elvpriv ;
382
+
383
+ /*
384
+ * Unfortunately, requests are queued at and tracked from
385
+ * multiple places and there's no single counter which can
386
+ * be drained. Check all the queues and counters.
387
+ */
388
+ if (drain_all ) {
389
+ drain |= !list_empty (& q -> queue_head );
390
+ for (i = 0 ; i < 2 ; i ++ ) {
391
+ drain |= q -> rq .count [i ];
392
+ drain |= q -> in_flight [i ];
393
+ drain |= !list_empty (& q -> flush_queue [i ]);
394
+ }
395
+ }
382
396
383
397
spin_unlock_irq (q -> queue_lock );
384
398
385
- if (!nr_rqs )
399
+ if (!drain )
386
400
break ;
387
401
msleep (10 );
388
402
}
@@ -469,6 +483,10 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
469
483
if (!q )
470
484
return NULL ;
471
485
486
+ q -> id = ida_simple_get (& blk_queue_ida , 0 , 0 , GFP_KERNEL );
487
+ if (q -> id < 0 )
488
+ goto fail_q ;
489
+
472
490
q -> backing_dev_info .ra_pages =
473
491
(VM_MAX_READAHEAD * 1024 ) / PAGE_CACHE_SIZE ;
474
492
q -> backing_dev_info .state = 0 ;
@@ -477,20 +495,17 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
477
495
q -> node = node_id ;
478
496
479
497
err = bdi_init (& q -> backing_dev_info );
480
- if (err ) {
481
- kmem_cache_free (blk_requestq_cachep , q );
482
- return NULL ;
483
- }
498
+ if (err )
499
+ goto fail_id ;
484
500
485
- if (blk_throtl_init (q )) {
486
- kmem_cache_free (blk_requestq_cachep , q );
487
- return NULL ;
488
- }
501
+ if (blk_throtl_init (q ))
502
+ goto fail_id ;
489
503
490
504
setup_timer (& q -> backing_dev_info .laptop_mode_wb_timer ,
491
505
laptop_mode_timer_fn , (unsigned long ) q );
492
506
setup_timer (& q -> timeout , blk_rq_timed_out_timer , (unsigned long ) q );
493
507
INIT_LIST_HEAD (& q -> timeout_list );
508
+ INIT_LIST_HEAD (& q -> icq_list );
494
509
INIT_LIST_HEAD (& q -> flush_queue [0 ]);
495
510
INIT_LIST_HEAD (& q -> flush_queue [1 ]);
496
511
INIT_LIST_HEAD (& q -> flush_data_in_flight );
@@ -508,6 +523,12 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
508
523
q -> queue_lock = & q -> __queue_lock ;
509
524
510
525
return q ;
526
+
527
+ fail_id :
528
+ ida_simple_remove (& blk_queue_ida , q -> id );
529
+ fail_q :
530
+ kmem_cache_free (blk_requestq_cachep , q );
531
+ return NULL ;
511
532
}
512
533
EXPORT_SYMBOL (blk_alloc_queue_node );
513
534
@@ -605,26 +626,31 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
605
626
}
606
627
EXPORT_SYMBOL (blk_init_allocated_queue );
607
628
608
- int blk_get_queue (struct request_queue * q )
629
+ bool blk_get_queue (struct request_queue * q )
609
630
{
610
- if (likely (!test_bit ( QUEUE_FLAG_DEAD , & q -> queue_flags ))) {
611
- kobject_get ( & q -> kobj );
612
- return 0 ;
631
+ if (likely (!blk_queue_dead ( q ))) {
632
+ __blk_get_queue ( q );
633
+ return true ;
613
634
}
614
635
615
- return 1 ;
636
+ return false ;
616
637
}
617
638
EXPORT_SYMBOL (blk_get_queue );
618
639
619
640
static inline void blk_free_request (struct request_queue * q , struct request * rq )
620
641
{
621
- if (rq -> cmd_flags & REQ_ELVPRIV )
642
+ if (rq -> cmd_flags & REQ_ELVPRIV ) {
622
643
elv_put_request (q , rq );
644
+ if (rq -> elv .icq )
645
+ put_io_context (rq -> elv .icq -> ioc , q );
646
+ }
647
+
623
648
mempool_free (rq , q -> rq .rq_pool );
624
649
}
625
650
626
651
static struct request *
627
- blk_alloc_request (struct request_queue * q , unsigned int flags , gfp_t gfp_mask )
652
+ blk_alloc_request (struct request_queue * q , struct io_cq * icq ,
653
+ unsigned int flags , gfp_t gfp_mask )
628
654
{
629
655
struct request * rq = mempool_alloc (q -> rq .rq_pool , gfp_mask );
630
656
@@ -635,10 +661,15 @@ blk_alloc_request(struct request_queue *q, unsigned int flags, gfp_t gfp_mask)
635
661
636
662
rq -> cmd_flags = flags | REQ_ALLOCED ;
637
663
638
- if ((flags & REQ_ELVPRIV ) &&
639
- unlikely (elv_set_request (q , rq , gfp_mask ))) {
640
- mempool_free (rq , q -> rq .rq_pool );
641
- return NULL ;
664
+ if (flags & REQ_ELVPRIV ) {
665
+ rq -> elv .icq = icq ;
666
+ if (unlikely (elv_set_request (q , rq , gfp_mask ))) {
667
+ mempool_free (rq , q -> rq .rq_pool );
668
+ return NULL ;
669
+ }
670
+ /* @rq->elv.icq holds on to io_context until @rq is freed */
671
+ if (icq )
672
+ get_io_context (icq -> ioc );
642
673
}
643
674
644
675
return rq ;
@@ -750,11 +781,17 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
750
781
{
751
782
struct request * rq = NULL ;
752
783
struct request_list * rl = & q -> rq ;
753
- struct io_context * ioc = NULL ;
784
+ struct elevator_type * et ;
785
+ struct io_context * ioc ;
786
+ struct io_cq * icq = NULL ;
754
787
const bool is_sync = rw_is_sync (rw_flags ) != 0 ;
788
+ bool retried = false;
755
789
int may_queue ;
790
+ retry :
791
+ et = q -> elevator -> type ;
792
+ ioc = current -> io_context ;
756
793
757
- if (unlikely (test_bit ( QUEUE_FLAG_DEAD , & q -> queue_flags )))
794
+ if (unlikely (blk_queue_dead ( q )))
758
795
return NULL ;
759
796
760
797
may_queue = elv_may_queue (q , rw_flags );
@@ -763,7 +800,20 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
763
800
764
801
if (rl -> count [is_sync ]+ 1 >= queue_congestion_on_threshold (q )) {
765
802
if (rl -> count [is_sync ]+ 1 >= q -> nr_requests ) {
766
- ioc = current_io_context (GFP_ATOMIC , q -> node );
803
+ /*
804
+ * We want ioc to record batching state. If it's
805
+ * not already there, creating a new one requires
806
+ * dropping queue_lock, which in turn requires
807
+ * retesting conditions to avoid queue hang.
808
+ */
809
+ if (!ioc && !retried ) {
810
+ spin_unlock_irq (q -> queue_lock );
811
+ create_io_context (current , gfp_mask , q -> node );
812
+ spin_lock_irq (q -> queue_lock );
813
+ retried = true;
814
+ goto retry ;
815
+ }
816
+
767
817
/*
768
818
* The queue will fill after this allocation, so set
769
819
* it as full, and mark this process as "batching".
@@ -799,17 +849,36 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
799
849
rl -> count [is_sync ]++ ;
800
850
rl -> starved [is_sync ] = 0 ;
801
851
852
+ /*
853
+ * Decide whether the new request will be managed by elevator. If
854
+ * so, mark @rw_flags and increment elvpriv. Non-zero elvpriv will
855
+ * prevent the current elevator from being destroyed until the new
856
+ * request is freed. This guarantees icq's won't be destroyed and
857
+ * makes creating new ones safe.
858
+ *
859
+ * Also, lookup icq while holding queue_lock. If it doesn't exist,
860
+ * it will be created after releasing queue_lock.
861
+ */
802
862
if (blk_rq_should_init_elevator (bio ) &&
803
863
!test_bit (QUEUE_FLAG_ELVSWITCH , & q -> queue_flags )) {
804
864
rw_flags |= REQ_ELVPRIV ;
805
865
rl -> elvpriv ++ ;
866
+ if (et -> icq_cache && ioc )
867
+ icq = ioc_lookup_icq (ioc , q );
806
868
}
807
869
808
870
if (blk_queue_io_stat (q ))
809
871
rw_flags |= REQ_IO_STAT ;
810
872
spin_unlock_irq (q -> queue_lock );
811
873
812
- rq = blk_alloc_request (q , rw_flags , gfp_mask );
874
+ /* create icq if missing */
875
+ if (unlikely (et -> icq_cache && !icq ))
876
+ icq = ioc_create_icq (q , gfp_mask );
877
+
878
+ /* rqs are guaranteed to have icq on elv_set_request() if requested */
879
+ if (likely (!et -> icq_cache || icq ))
880
+ rq = blk_alloc_request (q , icq , rw_flags , gfp_mask );
881
+
813
882
if (unlikely (!rq )) {
814
883
/*
815
884
* Allocation failed presumably due to memory. Undo anything
@@ -871,10 +940,9 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
871
940
rq = get_request (q , rw_flags , bio , GFP_NOIO );
872
941
while (!rq ) {
873
942
DEFINE_WAIT (wait );
874
- struct io_context * ioc ;
875
943
struct request_list * rl = & q -> rq ;
876
944
877
- if (unlikely (test_bit ( QUEUE_FLAG_DEAD , & q -> queue_flags )))
945
+ if (unlikely (blk_queue_dead ( q )))
878
946
return NULL ;
879
947
880
948
prepare_to_wait_exclusive (& rl -> wait [is_sync ], & wait ,
@@ -891,8 +959,8 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
891
959
* up to a big batch of them for a small period time.
892
960
* See ioc_batching, ioc_set_batching
893
961
*/
894
- ioc = current_io_context ( GFP_NOIO , q -> node );
895
- ioc_set_batching (q , ioc );
962
+ create_io_context ( current , GFP_NOIO , q -> node );
963
+ ioc_set_batching (q , current -> io_context );
896
964
897
965
spin_lock_irq (q -> queue_lock );
898
966
finish_wait (& rl -> wait [is_sync ], & wait );
@@ -1009,54 +1077,6 @@ static void add_acct_request(struct request_queue *q, struct request *rq,
1009
1077
__elv_add_request (q , rq , where );
1010
1078
}
1011
1079
1012
- /**
1013
- * blk_insert_request - insert a special request into a request queue
1014
- * @q: request queue where request should be inserted
1015
- * @rq: request to be inserted
1016
- * @at_head: insert request at head or tail of queue
1017
- * @data: private data
1018
- *
1019
- * Description:
1020
- * Many block devices need to execute commands asynchronously, so they don't
1021
- * block the whole kernel from preemption during request execution. This is
1022
- * accomplished normally by inserting aritficial requests tagged as
1023
- * REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them
1024
- * be scheduled for actual execution by the request queue.
1025
- *
1026
- * We have the option of inserting the head or the tail of the queue.
1027
- * Typically we use the tail for new ioctls and so forth. We use the head
1028
- * of the queue for things like a QUEUE_FULL message from a device, or a
1029
- * host that is unable to accept a particular command.
1030
- */
1031
- void blk_insert_request (struct request_queue * q , struct request * rq ,
1032
- int at_head , void * data )
1033
- {
1034
- int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK ;
1035
- unsigned long flags ;
1036
-
1037
- /*
1038
- * tell I/O scheduler that this isn't a regular read/write (ie it
1039
- * must not attempt merges on this) and that it acts as a soft
1040
- * barrier
1041
- */
1042
- rq -> cmd_type = REQ_TYPE_SPECIAL ;
1043
-
1044
- rq -> special = data ;
1045
-
1046
- spin_lock_irqsave (q -> queue_lock , flags );
1047
-
1048
- /*
1049
- * If command is tagged, release the tag
1050
- */
1051
- if (blk_rq_tagged (rq ))
1052
- blk_queue_end_tag (q , rq );
1053
-
1054
- add_acct_request (q , rq , where );
1055
- __blk_run_queue (q );
1056
- spin_unlock_irqrestore (q -> queue_lock , flags );
1057
- }
1058
- EXPORT_SYMBOL (blk_insert_request );
1059
-
1060
1080
static void part_round_stats_single (int cpu , struct hd_struct * part ,
1061
1081
unsigned long now )
1062
1082
{
@@ -1766,6 +1786,10 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
1766
1786
return - EIO ;
1767
1787
1768
1788
spin_lock_irqsave (q -> queue_lock , flags );
1789
+ if (unlikely (blk_queue_dead (q ))) {
1790
+ spin_unlock_irqrestore (q -> queue_lock , flags );
1791
+ return - ENODEV ;
1792
+ }
1769
1793
1770
1794
/*
1771
1795
* Submitting request must be dequeued before calling this function
@@ -2739,6 +2763,14 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth,
2739
2763
{
2740
2764
trace_block_unplug (q , depth , !from_schedule );
2741
2765
2766
+ /*
2767
+ * Don't mess with dead queue.
2768
+ */
2769
+ if (unlikely (blk_queue_dead (q ))) {
2770
+ spin_unlock (q -> queue_lock );
2771
+ return ;
2772
+ }
2773
+
2742
2774
/*
2743
2775
* If we are punting this to kblockd, then we can safely drop
2744
2776
* the queue_lock before waking kblockd (which needs to take
@@ -2815,6 +2847,15 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
2815
2847
depth = 0 ;
2816
2848
spin_lock (q -> queue_lock );
2817
2849
}
2850
+
2851
+ /*
2852
+ * Short-circuit if @q is dead
2853
+ */
2854
+ if (unlikely (blk_queue_dead (q ))) {
2855
+ __blk_end_request_all (rq , - ENODEV );
2856
+ continue ;
2857
+ }
2858
+
2818
2859
/*
2819
2860
* rq is already accounted, so use raw insert
2820
2861
*/
0 commit comments