Spaces:
Runtime error
Runtime error
complied Conv and BatchNorm
Browse files
unsloth_compiled_cache/BatchNorm1d.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
# Unsloth Zoo - Utilities for Unsloth
|
| 3 |
+
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
|
| 4 |
+
#
|
| 5 |
+
# This program is free software: you can redistribute it and/or modify
|
| 6 |
+
# it under the terms of the GNU Lesser General Public License as published by
|
| 7 |
+
# the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
# (at your option) any later version.
|
| 9 |
+
#
|
| 10 |
+
# This program is distributed in the hope that it will be useful,
|
| 11 |
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
# GNU General Public License for more details.
|
| 14 |
+
#
|
| 15 |
+
# You should have received a copy of the GNU Lesser General Public License
|
| 16 |
+
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
| 17 |
+
torch_compile_options = {'epilogue_fusion': True, 'max_autotune': False, 'shape_padding': True, 'trace.enabled': False, 'triton.cudagraphs': False}
|
| 18 |
+
from torch import Tensor
|
| 19 |
+
import torch
|
| 20 |
+
from torch.nn import functional as F
|
| 21 |
+
from transformers.models.mllama.modeling_mllama import (F, nn)
|
| 22 |
+
|
| 23 |
+
def forward(self, input: Tensor) -> Tensor:
|
| 24 |
+
self._check_input_dim(input)
|
| 25 |
+
|
| 26 |
+
# exponential_average_factor is set to self.momentum
|
| 27 |
+
# (when it is available) only so that it gets updated
|
| 28 |
+
# in ONNX graph when this node is exported to ONNX.
|
| 29 |
+
if self.momentum is None:
|
| 30 |
+
exponential_average_factor = 0.0
|
| 31 |
+
else:
|
| 32 |
+
exponential_average_factor = self.momentum
|
| 33 |
+
|
| 34 |
+
if self.training and self.track_running_stats:
|
| 35 |
+
# TODO: if statement only here to tell the jit to skip emitting this when it is None
|
| 36 |
+
if self.num_batches_tracked is not None: # type: ignore[has-type]
|
| 37 |
+
self.num_batches_tracked.add_(1) # type: ignore[has-type]
|
| 38 |
+
if self.momentum is None: # use cumulative moving average
|
| 39 |
+
exponential_average_factor = 1.0 / float(self.num_batches_tracked)
|
| 40 |
+
else: # use exponential moving average
|
| 41 |
+
exponential_average_factor = self.momentum
|
| 42 |
+
|
| 43 |
+
r"""
|
| 44 |
+
Decide whether the mini-batch stats should be used for normalization rather than the buffers.
|
| 45 |
+
Mini-batch stats are used in training mode, and in eval mode when buffers are None.
|
| 46 |
+
"""
|
| 47 |
+
if self.training:
|
| 48 |
+
bn_training = True
|
| 49 |
+
else:
|
| 50 |
+
bn_training = (self.running_mean is None) and (self.running_var is None)
|
| 51 |
+
|
| 52 |
+
r"""
|
| 53 |
+
Buffers are only updated if they are to be tracked and we are in training mode. Thus they only need to be
|
| 54 |
+
passed when the update should occur (i.e. in training mode when they are tracked), or when buffer stats are
|
| 55 |
+
used for normalization (i.e. in eval mode when buffers are not None).
|
| 56 |
+
"""
|
| 57 |
+
return F.batch_norm(
|
| 58 |
+
input,
|
| 59 |
+
# If buffers are not to be tracked, ensure that they won't be updated
|
| 60 |
+
self.running_mean
|
| 61 |
+
if not self.training or self.track_running_stats
|
| 62 |
+
else None,
|
| 63 |
+
self.running_var if not self.training or self.track_running_stats else None,
|
| 64 |
+
self.weight,
|
| 65 |
+
self.bias,
|
| 66 |
+
bn_training,
|
| 67 |
+
exponential_average_factor,
|
| 68 |
+
self.eps,
|
| 69 |
+
).to(input.dtype)
|
unsloth_compiled_cache/BatchNorm2d.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
# Unsloth Zoo - Utilities for Unsloth
|
| 3 |
+
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
|
| 4 |
+
#
|
| 5 |
+
# This program is free software: you can redistribute it and/or modify
|
| 6 |
+
# it under the terms of the GNU Lesser General Public License as published by
|
| 7 |
+
# the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
# (at your option) any later version.
|
| 9 |
+
#
|
| 10 |
+
# This program is distributed in the hope that it will be useful,
|
| 11 |
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
# GNU General Public License for more details.
|
| 14 |
+
#
|
| 15 |
+
# You should have received a copy of the GNU Lesser General Public License
|
| 16 |
+
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
| 17 |
+
torch_compile_options = {'epilogue_fusion': True, 'max_autotune': False, 'shape_padding': True, 'trace.enabled': False, 'triton.cudagraphs': False}
|
| 18 |
+
from torch import Tensor
|
| 19 |
+
import torch
|
| 20 |
+
from torch.nn import functional as F
|
| 21 |
+
from transformers.models.mllama.modeling_mllama import (F, nn)
|
| 22 |
+
|
| 23 |
+
def forward(self, input: Tensor) -> Tensor:
|
| 24 |
+
self._check_input_dim(input)
|
| 25 |
+
|
| 26 |
+
# exponential_average_factor is set to self.momentum
|
| 27 |
+
# (when it is available) only so that it gets updated
|
| 28 |
+
# in ONNX graph when this node is exported to ONNX.
|
| 29 |
+
if self.momentum is None:
|
| 30 |
+
exponential_average_factor = 0.0
|
| 31 |
+
else:
|
| 32 |
+
exponential_average_factor = self.momentum
|
| 33 |
+
|
| 34 |
+
if self.training and self.track_running_stats:
|
| 35 |
+
# TODO: if statement only here to tell the jit to skip emitting this when it is None
|
| 36 |
+
if self.num_batches_tracked is not None: # type: ignore[has-type]
|
| 37 |
+
self.num_batches_tracked.add_(1) # type: ignore[has-type]
|
| 38 |
+
if self.momentum is None: # use cumulative moving average
|
| 39 |
+
exponential_average_factor = 1.0 / float(self.num_batches_tracked)
|
| 40 |
+
else: # use exponential moving average
|
| 41 |
+
exponential_average_factor = self.momentum
|
| 42 |
+
|
| 43 |
+
r"""
|
| 44 |
+
Decide whether the mini-batch stats should be used for normalization rather than the buffers.
|
| 45 |
+
Mini-batch stats are used in training mode, and in eval mode when buffers are None.
|
| 46 |
+
"""
|
| 47 |
+
if self.training:
|
| 48 |
+
bn_training = True
|
| 49 |
+
else:
|
| 50 |
+
bn_training = (self.running_mean is None) and (self.running_var is None)
|
| 51 |
+
|
| 52 |
+
r"""
|
| 53 |
+
Buffers are only updated if they are to be tracked and we are in training mode. Thus they only need to be
|
| 54 |
+
passed when the update should occur (i.e. in training mode when they are tracked), or when buffer stats are
|
| 55 |
+
used for normalization (i.e. in eval mode when buffers are not None).
|
| 56 |
+
"""
|
| 57 |
+
return F.batch_norm(
|
| 58 |
+
input,
|
| 59 |
+
# If buffers are not to be tracked, ensure that they won't be updated
|
| 60 |
+
self.running_mean
|
| 61 |
+
if not self.training or self.track_running_stats
|
| 62 |
+
else None,
|
| 63 |
+
self.running_var if not self.training or self.track_running_stats else None,
|
| 64 |
+
self.weight,
|
| 65 |
+
self.bias,
|
| 66 |
+
bn_training,
|
| 67 |
+
exponential_average_factor,
|
| 68 |
+
self.eps,
|
| 69 |
+
).to(input.dtype)
|
unsloth_compiled_cache/BatchNorm3d.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
# Unsloth Zoo - Utilities for Unsloth
|
| 3 |
+
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
|
| 4 |
+
#
|
| 5 |
+
# This program is free software: you can redistribute it and/or modify
|
| 6 |
+
# it under the terms of the GNU Lesser General Public License as published by
|
| 7 |
+
# the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
# (at your option) any later version.
|
| 9 |
+
#
|
| 10 |
+
# This program is distributed in the hope that it will be useful,
|
| 11 |
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
# GNU General Public License for more details.
|
| 14 |
+
#
|
| 15 |
+
# You should have received a copy of the GNU Lesser General Public License
|
| 16 |
+
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
| 17 |
+
torch_compile_options = {'epilogue_fusion': True, 'max_autotune': False, 'shape_padding': True, 'trace.enabled': False, 'triton.cudagraphs': False}
|
| 18 |
+
from torch import Tensor
|
| 19 |
+
import torch
|
| 20 |
+
from torch.nn import functional as F
|
| 21 |
+
from transformers.models.mllama.modeling_mllama import (F, nn)
|
| 22 |
+
|
| 23 |
+
def forward(self, input: Tensor) -> Tensor:
|
| 24 |
+
self._check_input_dim(input)
|
| 25 |
+
|
| 26 |
+
# exponential_average_factor is set to self.momentum
|
| 27 |
+
# (when it is available) only so that it gets updated
|
| 28 |
+
# in ONNX graph when this node is exported to ONNX.
|
| 29 |
+
if self.momentum is None:
|
| 30 |
+
exponential_average_factor = 0.0
|
| 31 |
+
else:
|
| 32 |
+
exponential_average_factor = self.momentum
|
| 33 |
+
|
| 34 |
+
if self.training and self.track_running_stats:
|
| 35 |
+
# TODO: if statement only here to tell the jit to skip emitting this when it is None
|
| 36 |
+
if self.num_batches_tracked is not None: # type: ignore[has-type]
|
| 37 |
+
self.num_batches_tracked.add_(1) # type: ignore[has-type]
|
| 38 |
+
if self.momentum is None: # use cumulative moving average
|
| 39 |
+
exponential_average_factor = 1.0 / float(self.num_batches_tracked)
|
| 40 |
+
else: # use exponential moving average
|
| 41 |
+
exponential_average_factor = self.momentum
|
| 42 |
+
|
| 43 |
+
r"""
|
| 44 |
+
Decide whether the mini-batch stats should be used for normalization rather than the buffers.
|
| 45 |
+
Mini-batch stats are used in training mode, and in eval mode when buffers are None.
|
| 46 |
+
"""
|
| 47 |
+
if self.training:
|
| 48 |
+
bn_training = True
|
| 49 |
+
else:
|
| 50 |
+
bn_training = (self.running_mean is None) and (self.running_var is None)
|
| 51 |
+
|
| 52 |
+
r"""
|
| 53 |
+
Buffers are only updated if they are to be tracked and we are in training mode. Thus they only need to be
|
| 54 |
+
passed when the update should occur (i.e. in training mode when they are tracked), or when buffer stats are
|
| 55 |
+
used for normalization (i.e. in eval mode when buffers are not None).
|
| 56 |
+
"""
|
| 57 |
+
return F.batch_norm(
|
| 58 |
+
input,
|
| 59 |
+
# If buffers are not to be tracked, ensure that they won't be updated
|
| 60 |
+
self.running_mean
|
| 61 |
+
if not self.training or self.track_running_stats
|
| 62 |
+
else None,
|
| 63 |
+
self.running_var if not self.training or self.track_running_stats else None,
|
| 64 |
+
self.weight,
|
| 65 |
+
self.bias,
|
| 66 |
+
bn_training,
|
| 67 |
+
exponential_average_factor,
|
| 68 |
+
self.eps,
|
| 69 |
+
).to(input.dtype)
|
unsloth_compiled_cache/ConvTranspose3d.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
# Unsloth Zoo - Utilities for Unsloth
|
| 3 |
+
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
|
| 4 |
+
#
|
| 5 |
+
# This program is free software: you can redistribute it and/or modify
|
| 6 |
+
# it under the terms of the GNU Lesser General Public License as published by
|
| 7 |
+
# the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
# (at your option) any later version.
|
| 9 |
+
#
|
| 10 |
+
# This program is distributed in the hope that it will be useful,
|
| 11 |
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
# GNU General Public License for more details.
|
| 14 |
+
#
|
| 15 |
+
# You should have received a copy of the GNU Lesser General Public License
|
| 16 |
+
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
| 17 |
+
torch_compile_options = {'epilogue_fusion': True, 'max_autotune': False, 'shape_padding': True, 'trace.enabled': False, 'triton.cudagraphs': False}
|
| 18 |
+
from torch import Tensor
|
| 19 |
+
import torch
|
| 20 |
+
from torch.nn import functional as F
|
| 21 |
+
from transformers.models.mllama.modeling_mllama import (F, List, Optional, Tuple, nn)
|
| 22 |
+
|
| 23 |
+
def forward(self, input: Tensor, output_size: Optional[List[int]] = None) -> Tensor:
|
| 24 |
+
if self.padding_mode != 'zeros':
|
| 25 |
+
raise ValueError('Only `zeros` padding mode is supported for ConvTranspose3d')
|
| 26 |
+
|
| 27 |
+
assert isinstance(self.padding, tuple)
|
| 28 |
+
# One cannot replace List by Tuple or Sequence in "_output_padding" because
|
| 29 |
+
# TorchScript does not support `Sequence[T]` or `Tuple[T, ...]`.
|
| 30 |
+
num_spatial_dims = 3
|
| 31 |
+
output_padding = self._output_padding(
|
| 32 |
+
input, output_size, self.stride, self.padding, self.kernel_size, # type: ignore[arg-type]
|
| 33 |
+
num_spatial_dims, self.dilation) # type: ignore[arg-type]
|
| 34 |
+
|
| 35 |
+
return F.conv_transpose3d(
|
| 36 |
+
input, self.weight, self.bias, self.stride, self.padding,
|
| 37 |
+
output_padding, self.groups, self.dilation).to(input.dtype)
|
unsloth_compiled_cache/GroupNorm.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
# Unsloth Zoo - Utilities for Unsloth
|
| 3 |
+
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
|
| 4 |
+
#
|
| 5 |
+
# This program is free software: you can redistribute it and/or modify
|
| 6 |
+
# it under the terms of the GNU Lesser General Public License as published by
|
| 7 |
+
# the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
# (at your option) any later version.
|
| 9 |
+
#
|
| 10 |
+
# This program is distributed in the hope that it will be useful,
|
| 11 |
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
# GNU General Public License for more details.
|
| 14 |
+
#
|
| 15 |
+
# You should have received a copy of the GNU Lesser General Public License
|
| 16 |
+
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
| 17 |
+
torch_compile_options = {'epilogue_fusion': True, 'max_autotune': False, 'shape_padding': True, 'trace.enabled': False, 'triton.cudagraphs': False}
|
| 18 |
+
from torch import Tensor
|
| 19 |
+
import torch
|
| 20 |
+
from torch.nn import functional as F
|
| 21 |
+
from transformers.models.mllama.modeling_mllama import (F)
|
| 22 |
+
|
| 23 |
+
def forward(self, input: Tensor) -> Tensor:
|
| 24 |
+
return F.group_norm(
|
| 25 |
+
input, self.num_groups, self.weight, self.bias, self.eps).to(input.dtype)
|
unsloth_compiled_cache/RMSNorm.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
# Unsloth Zoo - Utilities for Unsloth
|
| 3 |
+
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
|
| 4 |
+
#
|
| 5 |
+
# This program is free software: you can redistribute it and/or modify
|
| 6 |
+
# it under the terms of the GNU Lesser General Public License as published by
|
| 7 |
+
# the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
# (at your option) any later version.
|
| 9 |
+
#
|
| 10 |
+
# This program is distributed in the hope that it will be useful,
|
| 11 |
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
# GNU General Public License for more details.
|
| 14 |
+
#
|
| 15 |
+
# You should have received a copy of the GNU Lesser General Public License
|
| 16 |
+
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
| 17 |
+
torch_compile_options = {'epilogue_fusion': True, 'max_autotune': False, 'shape_padding': True, 'trace.enabled': False, 'triton.cudagraphs': False}
|
| 18 |
+
from torch import Tensor
|
| 19 |
+
import torch
|
| 20 |
+
from torch.nn import functional as F
|
| 21 |
+
from transformers.models.mllama.modeling_mllama import (F, torch)
|
| 22 |
+
|
| 23 |
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
| 24 |
+
"""
|
| 25 |
+
Runs forward pass.
|
| 26 |
+
"""
|
| 27 |
+
return F.rms_norm(x, self.normalized_shape, self.weight, self.eps).to(input.dtype)
|