@@ -258,27 +258,35 @@ def __init__(self, config: UltravoxConfig):
258
258
super ().__init__ ()
259
259
self .hidden_dim = config .hidden_size
260
260
self ._pad_and_stack = StackAudioFrames (config .stack_factor )
261
- dim = config .audio_config .hidden_size * config .stack_factor
262
- self .ln_pre = RMSNorm (dim )
263
- self .linear_1 = nn .Linear (dim , self .hidden_dim , bias = False )
264
- dim = self .hidden_dim
261
+ dim_in = config .audio_config .hidden_size * config .stack_factor
262
+ self .ln_pre = RMSNorm (dim_in )
263
+ self .linear_1 = nn .Linear (dim_in , self .hidden_dim , bias = False )
264
+ dim_mid = self .hidden_dim
265
265
266
266
if config .projector_act == "swiglu" :
267
267
self .act = MulAndSilu ()
268
- dim = dim // 2
268
+ dim_mid = dim_mid // 2
269
269
else :
270
270
self .act = get_act_fn (config .projector_act )
271
271
272
- self .linear_2 = nn .Linear (dim ,
273
- config .text_config .hidden_size ,
274
- bias = False )
275
- self .ln_post = RMSNorm (config .text_config .hidden_size )
272
+ dim_out = config .text_config .hidden_size
273
+ self .linear_2 = nn .Linear (dim_mid , dim_out , bias = False )
274
+
275
+ # Ultravox v0.4.1 and below use layer_norm after the second linear layer
276
+ # while v0.5.0 and above uses layer_norm after the first linear layer.
277
+ if config .projector_ln_mid :
278
+ self .ln_mid : nn .Module = RMSNorm (dim_mid )
279
+ self .ln_post = nn .Identity ()
280
+ else :
281
+ self .ln_mid = nn .Identity ()
282
+ self .ln_post = RMSNorm (dim_out )
276
283
277
284
def forward (self , audio_features : torch .Tensor ) -> torch .Tensor :
278
285
audio_features = self ._pad_and_stack (audio_features )
279
286
audio_features = self .ln_pre (audio_features )
280
287
hidden_states = self .linear_1 (audio_features )
281
288
hidden_states = self .act (hidden_states )
289
+ hidden_states = self .ln_mid (hidden_states )
282
290
hidden_states = self .linear_2 (hidden_states )
283
291
hidden_states = self .ln_post (hidden_states )
284
292
return hidden_states
0 commit comments