Spaces:
Paused
Paused
alessandro trinca tornidor
commited on
Commit
·
e35418a
1
Parent(s):
e5d4552
feat: add device_map argument to try avoid CUDA init RuntimeError, bump lisa-on-cuda to version 1.4.5
Browse files- lisa_on_cuda/utils/app_helpers.py +19 -14
- pyproject.toml +2 -2
lisa_on_cuda/utils/app_helpers.py
CHANGED
|
@@ -118,7 +118,7 @@ def preprocess(
|
|
| 118 |
|
| 119 |
def load_model_for_causal_llm_pretrained(
|
| 120 |
version, torch_dtype, load_in_8bit, load_in_4bit, seg_token_idx, vision_tower,
|
| 121 |
-
internal_logger: logging = None
|
| 122 |
):
|
| 123 |
if internal_logger is None:
|
| 124 |
internal_logger = app_logger
|
|
@@ -128,8 +128,7 @@ def load_model_for_causal_llm_pretrained(
|
|
| 128 |
kwargs.update(
|
| 129 |
{
|
| 130 |
"torch_dtype": torch.half,
|
| 131 |
-
#
|
| 132 |
-
"load_in_4bit": True,
|
| 133 |
"quantization_config": BitsAndBytesConfig(
|
| 134 |
load_in_4bit=True,
|
| 135 |
bnb_4bit_compute_dtype=torch.float16,
|
|
@@ -155,13 +154,15 @@ def load_model_for_causal_llm_pretrained(
|
|
| 155 |
low_cpu_mem_usage=True,
|
| 156 |
vision_tower=vision_tower,
|
| 157 |
seg_token_idx=seg_token_idx,
|
|
|
|
|
|
|
| 158 |
**kwargs
|
| 159 |
)
|
| 160 |
-
internal_logger.debug(
|
| 161 |
return _model
|
| 162 |
|
| 163 |
|
| 164 |
-
def get_model(args_to_parse, internal_logger: logging = None, inference_decorator: Callable = None):
|
| 165 |
if internal_logger is None:
|
| 166 |
internal_logger = app_logger
|
| 167 |
internal_logger.info(f"starting model preparation, folder creation for path: {args_to_parse.vis_save_path}.")
|
|
@@ -183,7 +184,7 @@ def get_model(args_to_parse, internal_logger: logging = None, inference_decorato
|
|
| 183 |
use_fast=False,
|
| 184 |
)
|
| 185 |
_tokenizer.pad_token = _tokenizer.unk_token
|
| 186 |
-
internal_logger.info(
|
| 187 |
args_to_parse.seg_token_idx = _tokenizer("[SEG]", add_special_tokens=False).input_ids[0]
|
| 188 |
torch_dtype = torch.float32
|
| 189 |
if args_to_parse.precision == "bf16":
|
|
@@ -199,7 +200,8 @@ def get_model(args_to_parse, internal_logger: logging = None, inference_decorato
|
|
| 199 |
load_in_8bit=args_to_parse.load_in_8bit,
|
| 200 |
load_in_4bit=args_to_parse.load_in_4bit,
|
| 201 |
seg_token_idx=args_to_parse.seg_token_idx,
|
| 202 |
-
vision_tower=args_to_parse.vision_tower
|
|
|
|
| 203 |
)) if inference_decorator else load_model_for_causal_llm_pretrained(
|
| 204 |
args_to_parse.version,
|
| 205 |
torch_dtype=torch_dtype,
|
|
@@ -207,8 +209,9 @@ def get_model(args_to_parse, internal_logger: logging = None, inference_decorato
|
|
| 207 |
load_in_4bit=args_to_parse.load_in_4bit,
|
| 208 |
seg_token_idx=args_to_parse.seg_token_idx,
|
| 209 |
vision_tower=args_to_parse.vision_tower,
|
|
|
|
| 210 |
)
|
| 211 |
-
internal_logger.debug(
|
| 212 |
|
| 213 |
_model.config.eos_token_id = _tokenizer.eos_token_id
|
| 214 |
_model.config.bos_token_id = _tokenizer.bos_token_id
|
|
@@ -222,11 +225,11 @@ def get_model(args_to_parse, internal_logger: logging = None, inference_decorato
|
|
| 222 |
_model, args_to_parse, torch_dtype
|
| 223 |
)
|
| 224 |
vision_tower.to(device=args_to_parse.local_rank)
|
| 225 |
-
internal_logger.debug(
|
| 226 |
_clip_image_processor = CLIPImageProcessor.from_pretrained(_model.config.vision_tower)
|
| 227 |
-
internal_logger.debug(
|
| 228 |
_transform = ResizeLongestSide(args_to_parse.image_size)
|
| 229 |
-
internal_logger.debug(
|
| 230 |
inference_decorator(_model.eval()) if inference_decorator else _model.eval()
|
| 231 |
internal_logger.info("model preparation ok!")
|
| 232 |
return _model, _clip_image_processor, _tokenizer, _transform
|
|
@@ -261,15 +264,17 @@ def prepare_model_vision_tower(_model, args_to_parse, torch_dtype, internal_logg
|
|
| 261 |
internal_logger.debug(f"vision tower precision fp32? {args_to_parse.precision}, 3.")
|
| 262 |
_model = _model.float().cuda()
|
| 263 |
vision_tower = _model.get_model().get_vision_tower()
|
| 264 |
-
internal_logger.debug(
|
| 265 |
return _model, vision_tower
|
| 266 |
|
| 267 |
|
| 268 |
-
def get_inference_model_by_args(
|
|
|
|
|
|
|
| 269 |
if internal_logger0 is None:
|
| 270 |
internal_logger0 = app_logger
|
| 271 |
internal_logger0.info(f"args_to_parse:{args_to_parse}, creating model...")
|
| 272 |
-
model, clip_image_processor, tokenizer, transform = get_model(args_to_parse)
|
| 273 |
internal_logger0.info("created model, preparing inference function")
|
| 274 |
no_seg_out = placeholders["no_seg_out"]
|
| 275 |
|
|
|
|
| 118 |
|
| 119 |
def load_model_for_causal_llm_pretrained(
|
| 120 |
version, torch_dtype, load_in_8bit, load_in_4bit, seg_token_idx, vision_tower,
|
| 121 |
+
internal_logger: logging = None, device_map="auto"
|
| 122 |
):
|
| 123 |
if internal_logger is None:
|
| 124 |
internal_logger = app_logger
|
|
|
|
| 128 |
kwargs.update(
|
| 129 |
{
|
| 130 |
"torch_dtype": torch.half,
|
| 131 |
+
# "load_in_4bit": True,
|
|
|
|
| 132 |
"quantization_config": BitsAndBytesConfig(
|
| 133 |
load_in_4bit=True,
|
| 134 |
bnb_4bit_compute_dtype=torch.float16,
|
|
|
|
| 154 |
low_cpu_mem_usage=True,
|
| 155 |
vision_tower=vision_tower,
|
| 156 |
seg_token_idx=seg_token_idx,
|
| 157 |
+
# try to avoid CUDA init RuntimeError on ZeroGPU huggingface hardware
|
| 158 |
+
device_map=device_map,
|
| 159 |
**kwargs
|
| 160 |
)
|
| 161 |
+
internal_logger.debug("model loaded!")
|
| 162 |
return _model
|
| 163 |
|
| 164 |
|
| 165 |
+
def get_model(args_to_parse, internal_logger: logging = None, inference_decorator: Callable = None, device_map="auto"):
|
| 166 |
if internal_logger is None:
|
| 167 |
internal_logger = app_logger
|
| 168 |
internal_logger.info(f"starting model preparation, folder creation for path: {args_to_parse.vis_save_path}.")
|
|
|
|
| 184 |
use_fast=False,
|
| 185 |
)
|
| 186 |
_tokenizer.pad_token = _tokenizer.unk_token
|
| 187 |
+
internal_logger.info("tokenizer ok")
|
| 188 |
args_to_parse.seg_token_idx = _tokenizer("[SEG]", add_special_tokens=False).input_ids[0]
|
| 189 |
torch_dtype = torch.float32
|
| 190 |
if args_to_parse.precision == "bf16":
|
|
|
|
| 200 |
load_in_8bit=args_to_parse.load_in_8bit,
|
| 201 |
load_in_4bit=args_to_parse.load_in_4bit,
|
| 202 |
seg_token_idx=args_to_parse.seg_token_idx,
|
| 203 |
+
vision_tower=args_to_parse.vision_tower,
|
| 204 |
+
device_map=device_map # try to avoid CUDA init RuntimeError on ZeroGPU huggingface hardware
|
| 205 |
)) if inference_decorator else load_model_for_causal_llm_pretrained(
|
| 206 |
args_to_parse.version,
|
| 207 |
torch_dtype=torch_dtype,
|
|
|
|
| 209 |
load_in_4bit=args_to_parse.load_in_4bit,
|
| 210 |
seg_token_idx=args_to_parse.seg_token_idx,
|
| 211 |
vision_tower=args_to_parse.vision_tower,
|
| 212 |
+
device_map=device_map
|
| 213 |
)
|
| 214 |
+
internal_logger.debug("causal llm loaded!")
|
| 215 |
|
| 216 |
_model.config.eos_token_id = _tokenizer.eos_token_id
|
| 217 |
_model.config.bos_token_id = _tokenizer.bos_token_id
|
|
|
|
| 225 |
_model, args_to_parse, torch_dtype
|
| 226 |
)
|
| 227 |
vision_tower.to(device=args_to_parse.local_rank)
|
| 228 |
+
internal_logger.debug("vision tower loaded, prepare clip image processor...")
|
| 229 |
_clip_image_processor = CLIPImageProcessor.from_pretrained(_model.config.vision_tower)
|
| 230 |
+
internal_logger.debug("clip image processor done.")
|
| 231 |
_transform = ResizeLongestSide(args_to_parse.image_size)
|
| 232 |
+
internal_logger.debug("start model evaluation...")
|
| 233 |
inference_decorator(_model.eval()) if inference_decorator else _model.eval()
|
| 234 |
internal_logger.info("model preparation ok!")
|
| 235 |
return _model, _clip_image_processor, _tokenizer, _transform
|
|
|
|
| 264 |
internal_logger.debug(f"vision tower precision fp32? {args_to_parse.precision}, 3.")
|
| 265 |
_model = _model.float().cuda()
|
| 266 |
vision_tower = _model.get_model().get_vision_tower()
|
| 267 |
+
internal_logger.debug("vision tower ok!")
|
| 268 |
return _model, vision_tower
|
| 269 |
|
| 270 |
|
| 271 |
+
def get_inference_model_by_args(
|
| 272 |
+
args_to_parse, internal_logger0: logging = None, inference_decorator: Callable = None, device_map="auto"
|
| 273 |
+
):
|
| 274 |
if internal_logger0 is None:
|
| 275 |
internal_logger0 = app_logger
|
| 276 |
internal_logger0.info(f"args_to_parse:{args_to_parse}, creating model...")
|
| 277 |
+
model, clip_image_processor, tokenizer, transform = get_model(args_to_parse, device_map=device_map)
|
| 278 |
internal_logger0.info("created model, preparing inference function")
|
| 279 |
no_seg_out = placeholders["no_seg_out"]
|
| 280 |
|
pyproject.toml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
[tool.poetry]
|
| 2 |
name = "lisa-on-cuda"
|
| 3 |
-
version = "1.4.
|
| 4 |
description = "LISA (Reasoning Segmentation via Large Language Model) on cuda, now with huggingface ZeroGPU support!"
|
| 5 |
authors = ["alessandro trinca tornidor <alessandro@trinca.tornidor.com>"]
|
| 6 |
license = "Apache 2.0"
|
|
@@ -8,7 +8,7 @@ readme = "README.md"
|
|
| 8 |
|
| 9 |
[metadata]
|
| 10 |
name = "lisa-on-cuda"
|
| 11 |
-
version = "1.4.
|
| 12 |
|
| 13 |
[tool.poetry.urls]
|
| 14 |
Source = "https://huggingface.co/spaces/aletrn/lisa-on-cuda/"
|
|
|
|
| 1 |
[tool.poetry]
|
| 2 |
name = "lisa-on-cuda"
|
| 3 |
+
version = "1.4.5"
|
| 4 |
description = "LISA (Reasoning Segmentation via Large Language Model) on cuda, now with huggingface ZeroGPU support!"
|
| 5 |
authors = ["alessandro trinca tornidor <alessandro@trinca.tornidor.com>"]
|
| 6 |
license = "Apache 2.0"
|
|
|
|
| 8 |
|
| 9 |
[metadata]
|
| 10 |
name = "lisa-on-cuda"
|
| 11 |
+
version = "1.4.5"
|
| 12 |
|
| 13 |
[tool.poetry.urls]
|
| 14 |
Source = "https://huggingface.co/spaces/aletrn/lisa-on-cuda/"
|