Eiad Gomaa
commited on
Commit
·
6f6da11
1
Parent(s):
5ab0078
new model2
Browse files- app.py +14 -15
- requirements.txt +2 -1
app.py
CHANGED
|
@@ -9,6 +9,12 @@ import logging
|
|
| 9 |
logging.basicConfig(level=logging.INFO)
|
| 10 |
logger = logging.getLogger(__name__)
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
@st.cache_resource
|
| 13 |
def load_model():
|
| 14 |
"""Load model and tokenizer with caching"""
|
|
@@ -16,13 +22,10 @@ def load_model():
|
|
| 16 |
st.spinner("Loading model... This may take a few minutes")
|
| 17 |
logger.info("Starting model loading...")
|
| 18 |
|
| 19 |
-
#
|
| 20 |
model = AutoModelForCausalLM.from_pretrained(
|
| 21 |
"NousResearch/Llama-3.2-1B",
|
| 22 |
-
|
| 23 |
-
device_map="auto", # Automatically handle device placement
|
| 24 |
-
low_cpu_mem_usage=True,
|
| 25 |
-
torch_dtype=torch.float32 if not torch.cuda.is_available() else torch.float16
|
| 26 |
)
|
| 27 |
|
| 28 |
tokenizer = AutoTokenizer.from_pretrained("NousResearch/Llama-3.2-1B")
|
|
@@ -62,7 +65,7 @@ def generate_response_with_timeout(model, tokenizer, prompt, timeout_seconds=30)
|
|
| 62 |
padding=True,
|
| 63 |
truncation=True,
|
| 64 |
max_length=256 # Reduced for CPU
|
| 65 |
-
)
|
| 66 |
|
| 67 |
start_time = time.time()
|
| 68 |
|
|
@@ -81,8 +84,7 @@ def generate_response_with_timeout(model, tokenizer, prompt, timeout_seconds=30)
|
|
| 81 |
top_k=40,
|
| 82 |
repetition_penalty=1.5, # Increased repetition penalty
|
| 83 |
no_repeat_ngram_size=3, # Prevent 3-gram repetitions
|
| 84 |
-
early_stopping=True
|
| 85 |
-
length_penalty=1.0
|
| 86 |
)
|
| 87 |
|
| 88 |
generation_time = time.time() - start_time
|
|
@@ -113,13 +115,10 @@ with st.sidebar:
|
|
| 113 |
# Device and memory information
|
| 114 |
device = "GPU" if torch.cuda.is_available() else "CPU"
|
| 115 |
st.write(f"Running on: {device}")
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
import psutil
|
| 121 |
-
st.write(f"CPU Memory Usage: {psutil.Process().memory_info().rss / 1024**2:.2f} MB")
|
| 122 |
-
st.write("⚠️ Running on CPU - Responses may be slow")
|
| 123 |
|
| 124 |
# Model settings
|
| 125 |
st.write("### Model Settings")
|
|
|
|
| 9 |
logging.basicConfig(level=logging.INFO)
|
| 10 |
logger = logging.getLogger(__name__)
|
| 11 |
|
| 12 |
+
# Display installation instructions if needed
|
| 13 |
+
st.sidebar.write("### Required Packages")
|
| 14 |
+
st.sidebar.code("""
|
| 15 |
+
pip install transformers torch streamlit
|
| 16 |
+
""")
|
| 17 |
+
|
| 18 |
@st.cache_resource
|
| 19 |
def load_model():
|
| 20 |
"""Load model and tokenizer with caching"""
|
|
|
|
| 22 |
st.spinner("Loading model... This may take a few minutes")
|
| 23 |
logger.info("Starting model loading...")
|
| 24 |
|
| 25 |
+
# Basic model loading without device map
|
| 26 |
model = AutoModelForCausalLM.from_pretrained(
|
| 27 |
"NousResearch/Llama-3.2-1B",
|
| 28 |
+
torch_dtype=torch.float32 # Use float32 for CPU
|
|
|
|
|
|
|
|
|
|
| 29 |
)
|
| 30 |
|
| 31 |
tokenizer = AutoTokenizer.from_pretrained("NousResearch/Llama-3.2-1B")
|
|
|
|
| 65 |
padding=True,
|
| 66 |
truncation=True,
|
| 67 |
max_length=256 # Reduced for CPU
|
| 68 |
+
)
|
| 69 |
|
| 70 |
start_time = time.time()
|
| 71 |
|
|
|
|
| 84 |
top_k=40,
|
| 85 |
repetition_penalty=1.5, # Increased repetition penalty
|
| 86 |
no_repeat_ngram_size=3, # Prevent 3-gram repetitions
|
| 87 |
+
early_stopping=True
|
|
|
|
| 88 |
)
|
| 89 |
|
| 90 |
generation_time = time.time() - start_time
|
|
|
|
| 115 |
# Device and memory information
|
| 116 |
device = "GPU" if torch.cuda.is_available() else "CPU"
|
| 117 |
st.write(f"Running on: {device}")
|
| 118 |
+
|
| 119 |
+
# Warning for CPU usage
|
| 120 |
+
if not torch.cuda.is_available():
|
| 121 |
+
st.warning("⚠️ Running on CPU - Responses may be very slow. Consider using a GPU or a smaller model.")
|
|
|
|
|
|
|
|
|
|
| 122 |
|
| 123 |
# Model settings
|
| 124 |
st.write("### Model Settings")
|
requirements.txt
CHANGED
|
@@ -2,4 +2,5 @@ streamlit
|
|
| 2 |
transformers
|
| 3 |
torch # If your model requires PyTorch
|
| 4 |
# or
|
| 5 |
-
tensorflow
|
|
|
|
|
|
| 2 |
transformers
|
| 3 |
torch # If your model requires PyTorch
|
| 4 |
# or
|
| 5 |
+
tensorflow
|
| 6 |
+
accelerate
|