@@ -694,6 +694,108 @@ <h2>Intel® GPU Occupancy Calculator</h2>
694
694
"Max_Num_Of_Barrier_Registers" : 32
695
695
}
696
696
} ,
697
+ //RPL-P
698
+ {
699
+ "pci_id" : [ "A7A1" , "A7A0" , "A721" , "A720" ] ,
700
+ "name" : "Integrated GPU (Xe LP)" ,
701
+ "product_name" : "Intel® Iris® Xe Max Graphics" ,
702
+ "code" : "gen12" ,
703
+ "device_info" : {
704
+ "EU_Per_Sub_Slice" : 16 ,
705
+ "Threads_Per_EU" : 7 ,
706
+ "EU_Count" : 96 ,
707
+ "Max_Threads_Per_Sub_Slice" : 112 ,
708
+ "Large_GRF_Mode" : false ,
709
+ "Subgroup_Sizes" : [ 32 , 16 , 8 ] ,
710
+ "SLM_Size_Per_Sub_Slice" : 64 ,
711
+ "SLM_Size_Per_Work_Group" : 64 ,
712
+ "TG_SLM_Sizes" : [ 0 , 1 , 2 , 4 , 8 , 16 , 32 , 64 ] ,
713
+ "Max_Work_Group_Size" : 512 ,
714
+ "Max_Num_Of_Workgroups" : 112 ,
715
+ "Max_Num_Of_Barrier_Registers" : 32
716
+ }
717
+ } ,
718
+ {
719
+ "pci_id" : [ "A7A9" , "A7A8" ] ,
720
+ "name" : "Integrated GPU (Xe LP)" ,
721
+ "product_name" : "Intel® Iris® Xe Max Graphics" ,
722
+ "code" : "gen12" ,
723
+ "device_info" : {
724
+ "EU_Per_Sub_Slice" : 16 ,
725
+ "Threads_Per_EU" : 7 ,
726
+ "EU_Count" : 64 ,
727
+ "Max_Threads_Per_Sub_Slice" : 112 ,
728
+ "Large_GRF_Mode" : false ,
729
+ "Subgroup_Sizes" : [ 32 , 16 , 8 ] ,
730
+ "SLM_Size_Per_Sub_Slice" : 64 ,
731
+ "SLM_Size_Per_Work_Group" : 64 ,
732
+ "TG_SLM_Sizes" : [ 0 , 1 , 2 , 4 , 8 , 16 , 32 , 64 ] ,
733
+ "Max_Work_Group_Size" : 512 ,
734
+ "Max_Num_Of_Workgroups" : 112 ,
735
+ "Max_Num_Of_Barrier_Registers" : 32
736
+ }
737
+ } ,
738
+ //RPL-S
739
+ {
740
+ "pci_id" : [ "A780" , "A781" , "A788" , "A789" ] ,
741
+ "name" : "Integrated GPU (Xe LP)" ,
742
+ "product_name" : "Intel® UHD Graphics" ,
743
+ "code" : "gen12" ,
744
+ "device_info" : {
745
+ "EU_Per_Sub_Slice" : 16 ,
746
+ "Threads_Per_EU" : 7 ,
747
+ "EU_Count" : 32 ,
748
+ "Max_Threads_Per_Sub_Slice" : 112 ,
749
+ "Large_GRF_Mode" : false ,
750
+ "Subgroup_Sizes" : [ 32 , 16 , 8 ] ,
751
+ "SLM_Size_Per_Sub_Slice" : 64 ,
752
+ "SLM_Size_Per_Work_Group" : 64 ,
753
+ "TG_SLM_Sizes" : [ 0 , 1 , 2 , 4 , 8 , 16 , 32 , 64 ] ,
754
+ "Max_Work_Group_Size" : 512 ,
755
+ "Max_Num_Of_Workgroups" : 112 ,
756
+ "Max_Num_Of_Barrier_Registers" : 32
757
+ }
758
+ } ,
759
+ {
760
+ "pci_id" : [ "A782" , "A78A" ] ,
761
+ "name" : "Integrated GPU (Xe LP)" ,
762
+ "product_name" : "Intel® UHD Graphics" ,
763
+ "code" : "gen12" ,
764
+ "device_info" : {
765
+ "EU_Per_Sub_Slice" : 12 ,
766
+ "Threads_Per_EU" : 7 ,
767
+ "EU_Count" : 24 ,
768
+ "Max_Threads_Per_Sub_Slice" : 112 ,
769
+ "Large_GRF_Mode" : false ,
770
+ "Subgroup_Sizes" : [ 32 , 16 , 8 ] ,
771
+ "SLM_Size_Per_Sub_Slice" : 64 ,
772
+ "SLM_Size_Per_Work_Group" : 64 ,
773
+ "TG_SLM_Sizes" : [ 0 , 1 , 2 , 4 , 8 , 16 , 32 , 64 ] ,
774
+ "Max_Work_Group_Size" : 512 ,
775
+ "Max_Num_Of_Workgroups" : 112 ,
776
+ "Max_Num_Of_Barrier_Registers" : 32
777
+ }
778
+ } ,
779
+ {
780
+ "pci_id" : [ "A783" , "A78B" ] ,
781
+ "name" : "Integrated GPU (Xe LP)" ,
782
+ "product_name" : "Intel® UHD Graphics" ,
783
+ "code" : "gen12" ,
784
+ "device_info" : {
785
+ "EU_Per_Sub_Slice" : 16 ,
786
+ "Threads_Per_EU" : 7 ,
787
+ "EU_Count" : 16 ,
788
+ "Max_Threads_Per_Sub_Slice" : 112 ,
789
+ "Large_GRF_Mode" : false ,
790
+ "Subgroup_Sizes" : [ 32 , 16 , 8 ] ,
791
+ "SLM_Size_Per_Sub_Slice" : 64 ,
792
+ "SLM_Size_Per_Work_Group" : 64 ,
793
+ "TG_SLM_Sizes" : [ 0 , 1 , 2 , 4 , 8 , 16 , 32 , 64 ] ,
794
+ "Max_Work_Group_Size" : 512 ,
795
+ "Max_Num_Of_Workgroups" : 112 ,
796
+ "Max_Num_Of_Barrier_Registers" : 32
797
+ }
798
+ } ,
697
799
//ARC
698
800
{
699
801
"pci_id" : [ "56A5" , "5694" ] ,
@@ -1256,23 +1358,21 @@ <h2>Intel® GPU Occupancy Calculator</h2>
1256
1358
}
1257
1359
1258
1360
// Calculate GPU Occupancy
1361
+
1259
1362
function compute_gpu_occupancy ( wg , num_wg , ss_occ , num_ss , global_range ) {
1260
1363
var gpu_occ ;
1261
1364
// Calculate max num of work-items in all ss of gpu
1262
1365
var num_wi = num_ss * num_wg * wg ;
1263
1366
// Calculate occupancy for all ss in gpu
1264
- if ( global_range > num_wi ) {
1265
- var num_pass = parseInt ( global_range / num_wi ) ;
1266
- gpu_occ = ss_occ * num_pass ;
1267
- if ( global_range % num_wi !== 0 ) {
1268
- num_pass += 1 ;
1269
- }
1270
- gpu_occ = gpu_occ / num_pass ;
1271
- return { gpu_occ, ss_occ}
1272
- } else {
1273
- gpu_occ = ( global_range / num_wi ) * ss_occ ;
1274
- return { gpu_occ, ss_occ}
1275
- }
1367
+ var num_pass = parseInt ( global_range / num_wi ) ;
1368
+ var gpu_pass = ss_occ * num_pass ;
1369
+ var num_wi_left = global_range % num_wi ;
1370
+ if ( num_wi_left !== 0 ) {
1371
+ gpu_pass += ( num_wi_left / num_wi ) * ss_occ ;
1372
+ num_pass += 1 ;
1373
+ }
1374
+ gpu_occ = gpu_pass / num_pass ;
1375
+ return { gpu_occ, ss_occ}
1276
1376
}
1277
1377
1278
1378
// Generate Graphs and Optimal Occupancy Config table
0 commit comments