fix bug when fine-tuning without flash-attention
Browse files- modeling_telechat.py +1 -0
modeling_telechat.py
CHANGED
|
@@ -270,6 +270,7 @@ class TELECHATAttention(nn.Module):
|
|
| 270 |
self.pruned_heads = set()
|
| 271 |
|
| 272 |
self.use_flash_attn = False
|
|
|
|
| 273 |
|
| 274 |
|
| 275 |
|
|
|
|
| 270 |
self.pruned_heads = set()
|
| 271 |
|
| 272 |
self.use_flash_attn = False
|
| 273 |
+
self.is_cross_attention = False
|
| 274 |
|
| 275 |
|
| 276 |
|