druvx13 commited on
Commit
88a6999
·
verified ·
1 Parent(s): 2ab05f6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -132
app.py CHANGED
@@ -1,154 +1,95 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
  import os
5
 
6
- # 🔧 CPU Optimization Suite
7
- os.environ["OMP_NUM_THREADS"] = "4"
8
- torch.set_num_threads(4)
9
- torch.manual_seed(42)
10
-
11
- # 📦 Model Configuration
12
- MODEL_NAME = "openai-community/openai-gpt"
13
  cache_dir = "./model_cache"
 
14
 
15
- # 🧠 Load Tokenizer with Padding Fix
16
- tokenizer = AutoTokenizer.from_pretrained(
17
- MODEL_NAME,
 
18
  cache_dir=cache_dir,
19
- padding_side="left"
20
  )
21
 
22
- # ✅ Add pad_token if missing (required for batched generation)
23
- if tokenizer.pad_token is None:
24
- # First add special token to tokenizer
25
- tokenizer.add_special_tokens({'pad_token': '[PAD]'})
 
 
 
26
 
27
- # Then resize model embeddings to accommodate new token
28
- model = AutoModelForCausalLM.from_pretrained(
29
- MODEL_NAME,
30
- torch_dtype=torch.float32,
31
- low_cpu_mem_usage=True,
32
- cache_dir=cache_dir
 
33
  )
34
- model.resize_token_embeddings(len(tokenizer))
35
 
36
- # Finally set pad_token
37
- tokenizer.pad_token = tokenizer.eos_token or '[PAD]'
38
- else:
39
- # Load model normally if pad_token exists
40
- model = AutoModelForCausalLM.from_pretrained(
41
- MODEL_NAME,
42
- torch_dtype=torch.float32,
43
- low_cpu_mem_usage=True,
44
- cache_dir=cache_dir
45
- )
46
-
47
- # 🧠 Final model setup
48
- model = model.eval()
49
-
50
- def generate_response(prompt, max_new_tokens=128, temperature=0.7, top_p=0.9, num_sequences=1):
51
- """Optimized for 18GB CPU with strict memory control"""
52
- try:
53
- # 🧮 Calculate max_input_length as multiple of 8
54
- MAX_CONTEXT_LENGTH = 512
55
- max_input_length = MAX_CONTEXT_LENGTH - (MAX_CONTEXT_LENGTH % 8)
56
-
57
- # 🛡️ Input Protection with strict length control
58
- inputs = tokenizer(
59
- prompt,
60
- return_tensors="pt",
61
- truncation=True,
62
- max_length=max_input_length, # Now guaranteed to be multiple of 8
63
- padding="max_length",
64
- pad_to_multiple_of=8,
65
- add_special_tokens=False
66
- )
67
-
68
- with torch.inference_mode():
69
- outputs = model.generate(
70
- **inputs,
71
- max_new_tokens=int(max_new_tokens),
72
- temperature=float(temperature),
73
- top_p=float(top_p),
74
- do_sample=True,
75
- num_return_sequences=int(num_sequences),
76
- pad_token_id=tokenizer.convert_tokens_to_ids(tokenizer.pad_token),
77
- eos_token_id=tokenizer.eos_token_id,
78
- max_length=max_input_length + int(max_new_tokens) # Total length guardrail
79
- )
80
-
81
- return "\n\n".join([
82
- f"📝 Result {i+1}:\n{tokenizer.decode(output, skip_special_tokens=True)}"
83
- for i, output in enumerate(outputs)
84
- ])
85
- except Exception as e:
86
- return f"🚨 CPU Memory Alert: {str(e)}\nTry shorter inputs or fewer results!"
87
 
88
- # 🎨 UI Layout with Gradio Blocks
89
- with gr.Blocks(theme="soft", title="GPT-1 Legacy Engine") as demo:
90
  gr.Markdown("""
91
- # 🧠 Legacy GPT-1 Text Generator (CPU-Optimized Edition)
92
- *Running the original transformer-based language model with surgical memory control*
93
-
94
- 🔥 Features:
95
- - Thread-limited execution for stable performance
96
- - Input length protection (512 tokens)
97
- - Batch generation support
98
- - Temperature-controlled creativity
99
- - Interactive examples
100
  """)
101
 
102
  with gr.Row():
103
- with gr.Column(scale=2):
104
- prompt = gr.Textbox(
105
- label="Input Prompt",
106
- placeholder="Enter your prompt here...",
107
- lines=5
108
- )
109
-
110
- with gr.Accordion("⚙️ Generation Parameters", open=False):
111
- max_new_tokens = gr.Slider(
112
- minimum=32, maximum=256, value=128, step=16,
113
- label="Max New Tokens (Output Length)"
114
- )
115
- temperature = gr.Slider(
116
- minimum=0.1, maximum=1.5, value=0.7, step=0.1,
117
- label="Creativity Level (Temperature)"
118
- )
119
- top_p = gr.Slider(
120
- minimum=0.1, maximum=1.0, value=0.9, step=0.1,
121
- label="Top-p Sampling (Nucleus Filtering)"
122
- )
123
- num_sequences = gr.Slider(
124
- minimum=1, maximum=3, value=1, step=1,
125
- label="Number of Results to Generate"
126
- )
127
-
128
- submit = gr.Button("✨ Generate Text", variant="primary")
129
-
130
- with gr.Column(scale=2):
131
- output = gr.Textbox(
132
- label="Generated Text",
133
- lines=15,
134
- show_copy_button=True
135
- )
136
 
137
- submit.click(
138
- fn=generate_response,
139
- inputs=[prompt, max_new_tokens, temperature, top_p, num_sequences],
 
 
 
140
  outputs=output
141
  )
142
 
143
- gr.Examples(
144
- examples=[
145
- ["The future of artificial intelligence will"],
146
- ["Explain quantum physics like I'm five"],
147
- ["Write a haiku about machine learning"]
148
- ],
149
- inputs=prompt,
150
- label="🚀 Try These Prompts"
151
- )
152
 
153
- if __name__ == "__main__":
154
- demo.launch()
 
1
  import gradio as gr
2
+ from transformers import pipeline, set_seed
3
  import torch
4
  import os
5
 
6
+ # Create cache directory for model
 
 
 
 
 
 
7
  cache_dir = "./model_cache"
8
+ os.makedirs(cache_dir, exist_ok=True)
9
 
10
+ # Load text generation pipeline with GPU support
11
+ generator = pipeline(
12
+ 'text-generation',
13
+ model='openai-community/openai-gpt',
14
  cache_dir=cache_dir,
15
+ device=0 if torch.cuda.is_available() else -1
16
  )
17
 
18
+ def generate_text(prompt, max_length, num_return_sequences, seed):
19
+ """Generate text with optional seed control and error handling"""
20
+ if seed.strip():
21
+ try:
22
+ set_seed(int(seed)) # Handle user-provided seed
23
+ except ValueError:
24
+ pass # Ignore invalid seeds
25
 
26
+ # Generate with sampling for better diversity
27
+ results = generator(
28
+ prompt,
29
+ max_length=int(max_length),
30
+ num_return_sequences=int(num_return_sequences),
31
+ do_sample=True,
32
+ pad_token_id=generator.tokenizer.eos_token_id
33
  )
 
34
 
35
+ # Format results with numbering
36
+ output = ""
37
+ for i, res in enumerate(results):
38
+ output += f"🔥 Result {i+1}:\n{res['generated_text']}\n\n{'-'*30}\n"
39
+ return output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
+ # Create Gradio interface
42
+ with gr.Blocks(theme="soft") as demo:
43
  gr.Markdown("""
44
+ # GPT-1 Text Generator 🧠
45
+ ⚠️ **Warning**: This model may generate harmful stereotypes or offensive content. Use responsibly.
46
+ Generate text using the original OpenAI GPT model (2018).
 
 
 
 
 
 
47
  """)
48
 
49
  with gr.Row():
50
+ prompt = gr.Textbox(
51
+ label="Enter your prompt",
52
+ placeholder="Once upon a time...",
53
+ lines=3
54
+ )
55
+
56
+ with gr.Row():
57
+ max_length = gr.Slider(
58
+ minimum=10,
59
+ maximum=200,
60
+ value=50,
61
+ step=5,
62
+ label="Max Length"
63
+ )
64
+ num_return_sequences = gr.Slider(
65
+ minimum=1,
66
+ maximum=5,
67
+ value=3,
68
+ step=1,
69
+ label="Number of Results"
70
+ )
71
+ seed = gr.Textbox(
72
+ value="42",
73
+ label="Seed (leave blank for random)",
74
+ max_lines=1
75
+ )
 
 
 
 
 
 
 
76
 
77
+ generate_btn = gr.Button("Generate Text", variant="primary")
78
+ output = gr.Textbox(label="Generated Text", lines=15)
79
+
80
+ generate_btn.click(
81
+ fn=generate_text,
82
+ inputs=[prompt, max_length, num_return_sequences, seed],
83
  outputs=output
84
  )
85
 
86
+ gr.Markdown("""
87
+ ### Tips for Better Results
88
+ - Use clear, specific prompts
89
+ - Increase max length for longer stories
90
+ - Try different seeds for varied outputs
91
+ - GPT-1 has significant limitations compared to modern models
92
+ """)
 
 
93
 
94
+ # Launch the app
95
+ demo.launch()