Update gptq.py

qwopqwop200 · web-flow · commit 7a06733ca2ac · 2023-03-20T02:00:19.000+09:00
diff --git a/gptq.py b/gptq.py
@@ -15,7 +15,6 @@
 
 
 class GPTQ:
-
     def __init__(self, layer):
         self.layer = layer
         self.dev = self.layer.weight.device
@@ -88,6 +87,10 @@ def fasterquant(
         H = torch.cholesky_inverse(H)
         H = torch.linalg.cholesky(H, upper=True)
         Hinv = H
+        
+        scale = []
+        zero = []
+        now_idx = 1
 
         for i1 in range(0, self.columns, blocksize):
             i2 = min(i1 + blocksize, self.columns)
@@ -106,6 +109,11 @@ def fasterquant(
                 if groupsize != -1:
                     if (i1 + i) % groupsize == 0:
                         self.quantizer.find_params(W[:, (i1 + i):(i1 + i + groupsize)], weight=True)
+                    
+                    if ((i1 + i) // groupsize) - now_idx == -1:
+                        scale.append(self.quantizer.scale)
+                        zero.append(self.quantizer.zero)
+                        now_idx += 1
 
                 q = quantize(
                     w.unsqueeze(1), self.quantizer.scale, self.quantizer.zero, self.quantizer.maxq
@@ -137,7 +145,14 @@ def fasterquant(
         self.layer.weight.data = Q.reshape(self.layer.weight.shape).to(self.layer.weight.data.dtype)
         if DEBUG:
             print(torch.sum((self.layer(self.inp1) - self.out1) ** 2))
-
+            
+        if scale == []:
+            scale.append(self.quantizer.scale)
+            zero.append(self.quantizer.zero)
+        scale = torch.cat(scale,dim=1)
+        zero = torch.cat(zero,dim=1)
+        return scale,zero
+            
     def free(self):
         if DEBUG:
             self.inp1 = None