microsoft · pow2clk · Nov 21, 2024 · Dec 5, 2024 · Feb 18, 2025 · Mar 3, 2025
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -686,6 +686,8 @@ add_subdirectory(include/dxc)
 # really depend on anything else in the build it is safe.
 list(APPEND LLVM_COMMON_DEPENDS HCTGen) 
 
+add_subdirectory(utils/hct)
+
 if(EXISTS "${LLVM_MAIN_SRC_DIR}/external")
   add_subdirectory(external) # SPIRV change
 endif()

diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h
@@ -147,12 +147,19 @@ const unsigned kMaxMSTotalSigRows = 32;
 const unsigned kMaxMSSMSize = 1024 * 28;
 const unsigned kMinWaveSize = 4;
 const unsigned kMaxWaveSize = 128;
+const unsigned kDefaultMaxVectorLength = 4;
+const unsigned kSM69MaxVectorLength = 1024;
 
 const float kMaxMipLodBias = 15.99f;
 const float kMinMipLodBias = -16.0f;
 
 const unsigned kResRetStatusIndex = 4;
 
+/* <py::lines('OLOAD_DIMS-TEXT')>hctdb_instrhelp.get_max_oload_dims()</py>*/
+// OLOAD_DIMS-TEXT:BEGIN
+const unsigned kDxilMaxOloadDims = 2;
+// OLOAD_DIMS-TEXT:END
+
 enum class ComponentType : uint32_t {
   Invalid = 0,
   I1,
@@ -463,6 +470,11 @@ inline bool IsTBuffer(DXIL::ResourceKind ResourceKind) {
   return ResourceKind == DXIL::ResourceKind::TBuffer;
 }
 
+inline bool IsCTBuffer(DXIL::ResourceKind ResourceKind) {
+  return ResourceKind == DXIL::ResourceKind::CBuffer ||
+         ResourceKind == DXIL::ResourceKind::TBuffer;
+}
+
 /// Whether the resource kind is a FeedbackTexture.
 inline bool IsFeedbackTexture(DXIL::ResourceKind ResourceKind) {
   return ResourceKind == DXIL::ResourceKind::FeedbackTexture2D ||
@@ -475,6 +487,9 @@ inline bool IsFeedbackTexture(DXIL::ResourceKind ResourceKind) {
 // Enumeration for operations specified by DXIL
 enum class OpCode : unsigned {
   //
+  RawBufferVectorLoad = 303, // reads from a raw buffer and structured buffer
+  RawBufferVectorStore =
+      304,           // writes to a RWByteAddressBuffer or RWStructuredBuffer
   Reserved0 = 226,   // Reserved
   Reserved1 = 227,   // Reserved
   Reserved10 = 236,  // Reserved
@@ -1029,8 +1044,9 @@ enum class OpCode : unsigned {
   NumOpCodes_Dxil_1_6 = 222,
   NumOpCodes_Dxil_1_7 = 226,
   NumOpCodes_Dxil_1_8 = 258,
+  NumOpCodes_Dxil_1_9 = 305,
 
-  NumOpCodes = 303 // exclusive last value of enumeration
+  NumOpCodes = 305 // exclusive last value of enumeration
 };
 // OPCODE-ENUM:END
 
@@ -1042,6 +1058,8 @@ enum class OpCode : unsigned {
 // Groups for DXIL operations with equivalent function templates
 enum class OpCodeClass : unsigned {
   //
+  RawBufferVectorLoad,
+  RawBufferVectorStore,
   Reserved,
 
   // Amplification shader instructions
@@ -1337,8 +1355,9 @@ enum class OpCodeClass : unsigned {
   NumOpClasses_Dxil_1_6 = 149,
   NumOpClasses_Dxil_1_7 = 153,
   NumOpClasses_Dxil_1_8 = 174,
+  NumOpClasses_Dxil_1_9 = 177,
 
-  NumOpClasses = 175 // exclusive last value of enumeration
+  NumOpClasses = 177 // exclusive last value of enumeration
 };
 // OPCODECLASS-ENUM:END
 

diff --git a/include/dxc/DXIL/DxilInstructions.h b/include/dxc/DXIL/DxilInstructions.h
@@ -645,6 +645,42 @@ struct LlvmInst_VAArg {
   bool isAllowed() const { return false; }
 };
 
+/// This instruction extracts from vector
+struct LlvmInst_ExtractElement {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  LlvmInst_ExtractElement(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return Instr->getOpcode() == llvm::Instruction::ExtractElement;
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+};
+
+/// This instruction inserts into vector
+struct LlvmInst_InsertElement {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  LlvmInst_InsertElement(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return Instr->getOpcode() == llvm::Instruction::InsertElement;
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+};
+
+/// This instruction Shuffle two vectors
+struct LlvmInst_ShuffleVector {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  LlvmInst_ShuffleVector(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return Instr->getOpcode() == llvm::Instruction::ShuffleVector;
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+};
+
 /// This instruction extracts from aggregate
 struct LlvmInst_ExtractValue {
   llvm::Instruction *Instr;
@@ -8813,5 +8849,98 @@ struct DxilInst_AllocateRayQuery2 {
                              llvm::APInt(32, (uint64_t)val)));
   }
 };
+
+/// This instruction reads from a raw buffer and structured buffer
+struct DxilInst_RawBufferVectorLoad {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_RawBufferVectorLoad(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(
+        Instr, hlsl::OP::OpCode::RawBufferVectorLoad);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (5 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_srv = 1,
+    arg_index = 2,
+    arg_elementOffset = 3,
+    arg_alignment = 4,
+  };
+  // Accessors
+  llvm::Value *get_srv() const { return Instr->getOperand(1); }
+  void set_srv(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_index() const { return Instr->getOperand(2); }
+  void set_index(llvm::Value *val) { Instr->setOperand(2, val); }
+  llvm::Value *get_elementOffset() const { return Instr->getOperand(3); }
+  void set_elementOffset(llvm::Value *val) { Instr->setOperand(3, val); }
+  llvm::Value *get_alignment() const { return Instr->getOperand(4); }
+  void set_alignment(llvm::Value *val) { Instr->setOperand(4, val); }
+  int32_t get_alignment_val() const {
+    return (int32_t)(llvm::dyn_cast<llvm::ConstantInt>(Instr->getOperand(4))
+                         ->getZExtValue());
+  }
+  void set_alignment_val(int32_t val) {
+    Instr->setOperand(4, llvm::Constant::getIntegerValue(
+                             llvm::IntegerType::get(Instr->getContext(), 32),
+                             llvm::APInt(32, (uint64_t)val)));
+  }
+};
+
+/// This instruction writes to a RWByteAddressBuffer or RWStructuredBuffer
+struct DxilInst_RawBufferVectorStore {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_RawBufferVectorStore(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(
+        Instr, hlsl::OP::OpCode::RawBufferVectorStore);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (6 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_uav = 1,
+    arg_index = 2,
+    arg_elementOffset = 3,
+    arg_value0 = 4,
+    arg_alignment = 5,
+  };
+  // Accessors
+  llvm::Value *get_uav() const { return Instr->getOperand(1); }
+  void set_uav(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_index() const { return Instr->getOperand(2); }
+  void set_index(llvm::Value *val) { Instr->setOperand(2, val); }
+  llvm::Value *get_elementOffset() const { return Instr->getOperand(3); }
+  void set_elementOffset(llvm::Value *val) { Instr->setOperand(3, val); }
+  llvm::Value *get_value0() const { return Instr->getOperand(4); }
+  void set_value0(llvm::Value *val) { Instr->setOperand(4, val); }
+  llvm::Value *get_alignment() const { return Instr->getOperand(5); }
+  void set_alignment(llvm::Value *val) { Instr->setOperand(5, val); }
+  int32_t get_alignment_val() const {
+    return (int32_t)(llvm::dyn_cast<llvm::ConstantInt>(Instr->getOperand(5))
+                         ->getZExtValue());
+  }
+  void set_alignment_val(int32_t val) {
+    Instr->setOperand(5, llvm::Constant::getIntegerValue(
+                             llvm::IntegerType::get(Instr->getContext(), 32),
+                             llvm::APInt(32, (uint64_t)val)));
+  }
+};
 // INSTR-HELPER:END
 } // namespace hlsl
diff --git a/include/dxc/DXIL/DxilOperations.h b/include/dxc/DXIL/DxilOperations.h
@@ -58,11 +58,14 @@ class OP {
   void RefreshCache();
 
   llvm::Function *GetOpFunc(OpCode OpCode, llvm::Type *pOverloadType);
+  llvm::Function *GetOpFunc(OpCode OpCode,
+                            llvm::ArrayRef<llvm::Type *> ExtendedOverloads);
   const llvm::SmallMapVector<llvm::Type *, llvm::Function *, 8> &
   GetOpFuncList(OpCode OpCode) const;
   bool IsDxilOpUsed(OpCode opcode) const;
   void RemoveFunction(llvm::Function *F);
   llvm::LLVMContext &GetCtx() { return m_Ctx; }
+  llvm::Module *GetModule() { return m_pModule; }
   llvm::Type *GetHandleType() const;
   llvm::Type *GetNodeHandleType() const;
   llvm::Type *GetNodeRecordHandleType() const;
@@ -83,6 +86,10 @@ class OP {
   llvm::Type *GetVectorType(unsigned numElements, llvm::Type *pOverloadType);
   bool IsResRetType(llvm::Type *Ty);
 
+  // Construct an unnamed struct type containing the set of member types.
+  llvm::StructType *
+  GetExtendedOverloadType(llvm::ArrayRef<llvm::Type *> OverloadTypes);
+
   // Try to get the opcode class for a function.
   // Return true and set `opClass` if the given function is a dxil function.
   // Return false if the given function is not a dxil function.
@@ -140,6 +147,8 @@ class OP {
                                        unsigned valMinor, unsigned &major,
                                        unsigned &minor, unsigned &mask);
 
+  static bool IsDxilOpExtendedOverload(OpCode C);
+
 private:
   // Per-module properties.
   llvm::LLVMContext &m_Ctx;
@@ -164,8 +173,10 @@ class OP {
 
   static const unsigned kUserDefineTypeSlot = 9;
   static const unsigned kObjectTypeSlot = 10;
+  static const unsigned kVectorTypeSlot = 11;
+  static const unsigned kExtendedTypeSlot = 12;
   static const unsigned kNumTypeOverloads =
-      11; // void, h,f,d, i1, i8,i16,i32,i64, udt, obj
+      13; // void, h,f,d, i1, i8,i16,i32,i64, udt, obj, vec, extended
 
   llvm::Type *m_pResRetType[kNumTypeOverloads];
   llvm::Type *m_pCBufferRetType[kNumTypeOverloads];
@@ -179,14 +190,39 @@ class OP {
 
 private:
   // Static properties.
+  struct OverloadMask {
+    // mask of type slot bits as (1 << TypeSlot)
+    uint16_t SlotMask;
+    static_assert(kNumTypeOverloads <= (sizeof(SlotMask) * 8));
+    bool operator[](unsigned TypeSlot) const {
+      return (TypeSlot < kNumTypeOverloads) ? (bool)(SlotMask & (1 << TypeSlot))
+                                            : 0;
+    }
+    operator bool() const { return SlotMask != 0; }
+  };
   struct OpCodeProperty {
     OpCode opCode;
     const char *pOpCodeName;
     OpCodeClass opCodeClass;
     const char *pOpCodeClassName;
     bool bAllowOverload[kNumTypeOverloads]; // void, h,f,d, i1, i8,i16,i32,i64,
-                                            // udt
+                                            // udt, obj, vec, extended
     llvm::Attribute::AttrKind FuncAttr;
+
+    // Extended Type Overloads:
+    // This is an encoding for a multi-dimensional overload.
+    // 1. Only bAllowOverload[kExtendedTypeSlot] is set to true
+    // 2. ExtendedOverloads defines allowed types for each overload index
+    // 3. AllowedVectorElements defines allowed vector component types,
+    //    when kVectorTypeSlot bit is set for the corresponding overload index.
+    //    This includes when a single vector overload type is specified with
+    //    bAllowOverload[kVectorTypeSlot].
+
+    // A bit mask of allowed type slots per extended overload
+    OverloadMask ExtendedOverloads[DXIL::kDxilMaxOloadDims];
+    // A bit mask of allowed vector element types for the vector overload
+    // or each corresponding extended vector overload.
+    OverloadMask AllowedVectorElements[DXIL::kDxilMaxOloadDims];
   };
   static const OpCodeProperty m_OpCodeProps[(unsigned)OpCode::NumOpCodes];