Spaces:

eyad-silx
/

Quasar

Runtime error

App Files Files Community

Eiad Gomaa commited on Oct 24, 2024

Commit

6f6da11

1 Parent(s): 5ab0078

new model2

Browse files

Files changed (2) hide show

app.py +14 -15
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -9,6 +9,12 @@ import logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 @st.cache_resource
 def load_model():
     """Load model and tokenizer with caching"""
@@ -16,13 +22,10 @@ def load_model():
         st.spinner("Loading model... This may take a few minutes")
         logger.info("Starting model loading...")
-        # Load with 8-bit quantization for CPU
         model = AutoModelForCausalLM.from_pretrained(
             "NousResearch/Llama-3.2-1B",
-            load_in_8bit=True,          # Use 8-bit quantization
-            device_map="auto",          # Automatically handle device placement
-            low_cpu_mem_usage=True,
-            torch_dtype=torch.float32 if not torch.cuda.is_available() else torch.float16
         )
         tokenizer = AutoTokenizer.from_pretrained("NousResearch/Llama-3.2-1B")
@@ -62,7 +65,7 @@ def generate_response_with_timeout(model, tokenizer, prompt, timeout_seconds=30)
             padding=True,
             truncation=True,
             max_length=256  # Reduced for CPU
-        ).to(model.device)
         start_time = time.time()
@@ -81,8 +84,7 @@ def generate_response_with_timeout(model, tokenizer, prompt, timeout_seconds=30)
                 top_k=40,
                 repetition_penalty=1.5,  # Increased repetition penalty
                 no_repeat_ngram_size=3,  # Prevent 3-gram repetitions
-                early_stopping=True,
-                length_penalty=1.0
             )
         generation_time = time.time() - start_time
@@ -113,13 +115,10 @@ with st.sidebar:
     # Device and memory information
     device = "GPU" if torch.cuda.is_available() else "CPU"
     st.write(f"Running on: {device}")
-    if torch.cuda.is_available():
-        st.write(f"GPU: {torch.cuda.get_device_name(0)}")
-        st.write(f"Memory Usage: {torch.cuda.memory_allocated()/1024**2:.2f} MB")
-    else:
-        import psutil
-        st.write(f"CPU Memory Usage: {psutil.Process().memory_info().rss / 1024**2:.2f} MB")
-        st.write("⚠️ Running on CPU - Responses may be slow")
     # Model settings
     st.write("### Model Settings")

 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+# Display installation instructions if needed
+st.sidebar.write("### Required Packages")
+st.sidebar.code("""
+pip install transformers torch streamlit
+""")
 @st.cache_resource
 def load_model():
     """Load model and tokenizer with caching"""
         st.spinner("Loading model... This may take a few minutes")
         logger.info("Starting model loading...")
+        # Basic model loading without device map
         model = AutoModelForCausalLM.from_pretrained(
             "NousResearch/Llama-3.2-1B",
+            torch_dtype=torch.float32  # Use float32 for CPU
         )
         tokenizer = AutoTokenizer.from_pretrained("NousResearch/Llama-3.2-1B")
             padding=True,
             truncation=True,
             max_length=256  # Reduced for CPU
+        )
         start_time = time.time()
                 top_k=40,
                 repetition_penalty=1.5,  # Increased repetition penalty
                 no_repeat_ngram_size=3,  # Prevent 3-gram repetitions
+                early_stopping=True
             )
         generation_time = time.time() - start_time
     # Device and memory information
     device = "GPU" if torch.cuda.is_available() else "CPU"
     st.write(f"Running on: {device}")
+    # Warning for CPU usage
+    if not torch.cuda.is_available():
+        st.warning("⚠️ Running on CPU - Responses may be very slow. Consider using a GPU or a smaller model.")
     # Model settings
     st.write("### Model Settings")

requirements.txt CHANGED Viewed

@@ -2,4 +2,5 @@ streamlit
 transformers
 torch  # If your model requires PyTorch
 # or
-tensorflow

 transformers
 torch  # If your model requires PyTorch
 # or
+tensorflow
+accelerate