Spaces:

HMWCS
/

Gemma3n-challenge-demo

Running on Zero

App Files Files Community

yichuan-huang commited on Jul 14

Commit

fd09229

1 Parent(s): d958a06

fix the bug

Browse files

Files changed (2) hide show

classifier.py +10 -32
config.py +5 -6

classifier.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from transformers import AutoProcessor, Gemma3nForConditionalGeneration
 from PIL import Image
 import torch
 import logging
@@ -34,11 +34,11 @@ class GarbageClassifier:
             )
             # Load model
-            self.model = Gemma3nForConditionalGeneration.from_pretrained(
                 self.config.MODEL_NAME,
                 torch_dtype=self.config.TORCH_DTYPE,
                 device_map=self.config.DEVICE_MAP,
-            ).eval()
             self.logger.info("Model loaded successfully")
@@ -138,40 +138,18 @@ class GarbageClassifier:
                 tokenize=True,
                 return_dict=True,
                 return_tensors="pt",
-)
-            # Move inputs to model device and set dtype
-            inputs = inputs.to(self.model.device, dtype=self.model.dtype)
             input_len = inputs["input_ids"].shape[-1]
-            # Generate response
-            with torch.no_grad():
-                generation_kwargs = {
-                    "max_new_tokens": self.config.MAX_NEW_TOKENS,
-                    "pad_token_id": self.processor.tokenizer.eos_token_id,
-                    "disable_compile": True,  # Important for stability
-                }
-                if self.config.DO_SAMPLE:
-                    generation_kwargs.update(
-                        {
-                            "do_sample": True,
-                            "temperature": self.config.TEMPERATURE,
-                            "top_p": self.config.TOP_P,
-                            "top_k": self.config.TOP_K,
-                        }
-                    )
-                else:
-                    generation_kwargs["do_sample"] = False
-                outputs = self.model.generate(**inputs, **generation_kwargs)
-            # Decode response
             response = self.processor.batch_decode(
                 outputs[:, input_len:],
                 skip_special_tokens=True,
-                clean_up_tokenization_spaces=True,
-            )[0]
             # Extract classification from response
             classification = self._extract_classification(response)

+from transformers import AutoProcessor, AutoModelForImageTextToText
 from PIL import Image
 import torch
 import logging
             )
             # Load model
+            self.model = AutoModelForImageTextToText.from_pretrained(
                 self.config.MODEL_NAME,
                 torch_dtype=self.config.TORCH_DTYPE,
                 device_map=self.config.DEVICE_MAP,
+            )
             self.logger.info("Model loaded successfully")
                 tokenize=True,
                 return_dict=True,
                 return_tensors="pt",
+            ).to(self.model.device, dtype=self.model.dtype)
             input_len = inputs["input_ids"].shape[-1]
+            outputs = self.model.generate(
+                **inputs,
+                max_new_tokens=self.config.MAX_NEW_TOKENS,
+                disable_compile=True,
+            )
             response = self.processor.batch_decode(
                 outputs[:, input_len:],
                 skip_special_tokens=True,
+            )
             # Extract classification from response
             classification = self._extract_classification(response)

config.py CHANGED Viewed

@@ -9,15 +9,14 @@ class Config:
     MODEL_NAME: str = "google/gemma-3n-E2B-it"
     # Generation parameters
-    MAX_NEW_TOKENS: int = 256
-    TEMPERATURE: float = 0.3
-    DO_SAMPLE: bool = True
-    TOP_P: float = 0.8
-    TOP_K: int = 40
     # Device configuration
     TORCH_DTYPE: str = torch.bfloat16
-    DEVICE_MAP: str = "auto"
     # Image preprocessing
     IMAGE_SIZE: int = 512

     MODEL_NAME: str = "google/gemma-3n-E2B-it"
     # Generation parameters
+    MAX_NEW_TOKENS: int = 512
     # Device configuration
     TORCH_DTYPE: str = torch.bfloat16
+    if torch.cuda.is_available():
+        DEVICE_MAP: str = "cuda:0"  # Use first GPU if available
+    else:
+        DEVICE_MAP: str = "cpu"
     # Image preprocessing
     IMAGE_SIZE: int = 512