LaunchLLM / financial_advisor_gui.py
Bmccloud22's picture
Deploy LaunchLLM - Production AI Training Platform
745ed18 verified
"""
Financial Advisor Training GUI
A simple, user-friendly interface for training and testing your financial advisor AI.
No coding required - just click buttons and see results!
"""
import gradio as gr
import json
import threading
from pathlib import Path
from datetime import datetime
import time
import os
# Import training components
from fine_tuning import LoRATrainer, LoRAConfig
from data_aggregation import DatasetBuilder, JSONDataCollector
from data_aggregation.synthetic_generator import SyntheticDataGenerator
from data_aggregation.quality_validator import QualityValidator
from data_aggregation.hf_dataset_loader import HuggingFaceDatasetLoader
from secure_config import SecureConfig
from model_registry import get_registry
from ollama_integration import OllamaClient, test_financial_advisor_ollama
from runpod_manager import RunPodManager, DeploymentConfig, TrainingConfig
from runpod_client import PodInfo
# Global variables to track training state
training_status = {
"is_training": False,
"current_epoch": 0,
"total_epochs": 0,
"loss": 0.0,
"progress": 0.0,
"logs": []
}
trainer_instance = None
secure_config = SecureConfig()
model_registry = get_registry()
selected_model_id = "qwen2.5-32b" # Default model
# RunPod state
runpod_manager = None
current_pod_info = None
deployment_in_progress = False
# API Key Management Functions
def save_api_keys(hf_token, openai_key, anthropic_key, wandb_key, runpod_key, custom_keys_json):
"""Save all API keys securely"""
try:
api_keys = {
"HUGGINGFACE_TOKEN": hf_token or "",
"HF_TOKEN": hf_token or "", # Alternative name
"OPENAI_API_KEY": openai_key or "",
"ANTHROPIC_API_KEY": anthropic_key or "",
"WANDB_API_KEY": wandb_key or "",
"RUNPOD_API_KEY": runpod_key or "",
}
# Parse custom keys if provided
if custom_keys_json and custom_keys_json.strip():
try:
custom_keys = json.loads(custom_keys_json)
api_keys.update(custom_keys)
except json.JSONDecodeError:
return "⚠️ Invalid JSON in custom keys. Other keys saved successfully."
# Save securely
secure_config.save_keys(api_keys)
# Count non-empty keys
saved_count = sum(1 for v in api_keys.values() if v and v.strip())
return f"βœ… Successfully saved {saved_count} API keys securely!\n\nKeys are encrypted and stored in .secrets/ directory."
except Exception as e:
return f"❌ Error saving keys: {e}"
def load_api_keys():
"""Load API keys and return masked versions for display"""
try:
keys = secure_config.load_keys()
hf_masked = secure_config.get_masked_key("HUGGINGFACE_TOKEN") or ""
openai_masked = secure_config.get_masked_key("OPENAI_API_KEY") or ""
anthropic_masked = secure_config.get_masked_key("ANTHROPIC_API_KEY") or ""
wandb_masked = secure_config.get_masked_key("WANDB_API_KEY") or ""
runpod_masked = secure_config.get_masked_key("RUNPOD_API_KEY") or ""
# Show custom keys
standard_keys = {"HUGGINGFACE_TOKEN", "HF_TOKEN", "OPENAI_API_KEY", "ANTHROPIC_API_KEY", "WANDB_API_KEY", "RUNPOD_API_KEY"}
custom_keys = {k: secure_config.get_masked_key(k) for k in keys.keys() if k not in standard_keys}
custom_json = json.dumps(custom_keys, indent=2) if custom_keys else ""
status = f"πŸ“Š **Loaded {len(keys)} API keys**\n\n"
if keys:
status += "Keys are encrypted and loaded into environment.\n"
status += "Masked keys shown for security."
else:
status += "⚠️ No API keys found. Please add your keys below."
return hf_masked, openai_masked, anthropic_masked, wandb_masked, runpod_masked, custom_json, status
except Exception as e:
return "", "", "", "", "", "", f"❌ Error loading keys: {e}"
def clear_api_keys():
"""Clear all stored API keys"""
try:
secure_config.delete_keys()
return "βœ… All API keys cleared successfully!", "", "", "", "", "", ""
except Exception as e:
return f"❌ Error clearing keys: {e}", "", "", "", "", "", ""
def check_required_keys():
"""Check if required API keys are set"""
hf_key = secure_config.get_key("HUGGINGFACE_TOKEN")
if not hf_key:
return False, "⚠️ HuggingFace token required! Set it in the Settings tab."
return True, "βœ… Required API keys are set"
# Synthetic Data Generation Functions
def generate_synthetic_data(api_provider, num_examples, difficulty, use_scenarios, selected_topics):
"""Generate synthetic training data"""
try:
# Check if API key exists
if api_provider == "openai":
api_key = secure_config.get_key("OPENAI_API_KEY")
if not api_key:
return "❌ OpenAI API key not found! Add it in the Settings tab.", ""
elif api_provider == "anthropic":
api_key = secure_config.get_key("ANTHROPIC_API_KEY")
if not api_key:
return "❌ Anthropic API key not found! Add it in the Settings tab.", ""
else:
return "❌ Invalid API provider", ""
log = f"πŸ€– **SYNTHETIC DATA GENERATION**\n\n"
log += f"Provider: {api_provider.upper()}\n"
log += f"Examples: {num_examples}\n"
log += f"Difficulty: {difficulty}\n"
log += f"Scenarios: {use_scenarios}\n\n"
# Initialize generator
generator = SyntheticDataGenerator(api_provider=api_provider)
# Parse selected topics
topics_list = None
if selected_topics and selected_topics.strip():
topics_list = [t.strip() for t in selected_topics.split(",")]
log += f"Custom topics: {topics_list}\n\n"
log += "πŸš€ Starting generation...\n\n"
# Generate data
if use_scenarios:
generated_data = generator.generate_with_scenarios(num_examples=num_examples)
else:
generated_data = generator.generate_examples(
num_examples=num_examples,
topics=topics_list,
difficulty=difficulty
)
if not generated_data:
return log + "\n❌ No data generated. Check API keys and try again.", ""
log += f"\nβœ… Generated {len(generated_data)} raw examples!\n\n"
# Quality validation
log += "πŸ” **QUALITY VALIDATION**\n\n"
validator = QualityValidator()
validation_results = validator.validate_batch(generated_data)
log += f"Valid: {validation_results['valid']}/{validation_results['total']} "
log += f"({validation_results['valid']/validation_results['total']*100:.1f}%)\n"
if validation_results['invalid'] > 0:
log += f"⚠️ Filtered out {validation_results['invalid']} low-quality examples\n\n"
log += "**Common Issues:**\n"
from collections import Counter
issue_counter = Counter(validation_results['issues'])
for issue, count in issue_counter.most_common(3):
log += f" - {issue}: {count}x\n"
log += "\n"
else:
log += "βœ… All examples passed quality checks!\n\n"
# Check for duplicates
if validation_results.get('duplicates'):
log += f"⚠️ Found {len(validation_results['duplicates'])} duplicate questions (removed)\n\n"
# Use only valid examples
valid_data = validation_results['valid_examples']
if not valid_data:
return log + "\n❌ No valid data after quality filtering. Try again with different settings.", ""
log += f"πŸ“Š **Final Count:** {len(valid_data)} high-quality examples\n\n"
# Calculate average quality score
avg_score = sum(validator.get_quality_score(ex) for ex in valid_data) / len(valid_data)
log += f"⭐ **Average Quality Score:** {avg_score:.1f}/100\n\n"
# Load existing data
data_path = "data/sample_financial_advisor_data.json"
if Path(data_path).exists():
with open(data_path, 'r', encoding='utf-8') as f:
existing_data = json.load(f)
else:
existing_data = []
# Combine and save
combined_data = existing_data + valid_data
Path(data_path).parent.mkdir(parents=True, exist_ok=True)
with open(data_path, 'w', encoding='utf-8') as f:
json.dump(combined_data, f, indent=2, ensure_ascii=False)
log += f"πŸ’Ύ Saved to training data!\n"
log += f"Total training examples: {len(combined_data)}\n\n"
# Show preview
log += "**Sample Generated Q&A:**\n\n"
for i, example in enumerate(valid_data[:3], 1):
quality_score = validator.get_quality_score(example)
log += f"{i}. [Quality: {quality_score:.0f}/100]\n"
log += f" Q: {example['instruction']}\n"
log += f" A: {example['output'][:150]}...\n\n"
return log, f"βœ… Generated {len(valid_data)} high-quality examples! Total: {len(combined_data)}"
except Exception as e:
import traceback
error_details = traceback.format_exc()
return f"❌ Error generating data: {e}\n\n{error_details}", f"❌ Error: {e}"
def get_available_topics():
"""Get list of available financial topics"""
topics = [
"Retirement Planning",
"Investment Strategies",
"Tax Planning",
"Debt Management",
"Emergency Funds",
"Budgeting",
"Insurance",
"Estate Planning",
"College Savings",
"Real Estate",
"Stock Market",
"Bonds and Fixed Income",
"Mutual Funds and ETFs",
"Cryptocurrency",
"Financial Independence",
"Side Hustles",
"Credit Scores",
"Mortgages",
"Small Business Finance",
"Risk Management"
]
return "\n".join(f"β€’ {topic}" for topic in topics)
# HuggingFace Dataset Loading Functions
def list_hf_datasets():
"""List available HuggingFace datasets"""
loader = HuggingFaceDatasetLoader()
datasets = loader.list_available_datasets()
output = "πŸ“¦ **AVAILABLE HUGGINGFACE DATASETS**\n\n"
for ds in datasets:
output += f"**{ds['name']}**\n"
output += f" Path: {ds['path']}\n"
output += f" Type: {ds['type']}\n"
output += f" Description: {ds['description']}\n\n"
return output
def preview_hf_dataset(dataset_path):
"""Preview a HuggingFace dataset"""
if not dataset_path or not dataset_path.strip():
return "⚠️ Please enter a dataset path (e.g., mitulshah/transaction-categorization)"
try:
loader = HuggingFaceDatasetLoader()
preview = loader.preview_dataset(dataset_path, num_examples=3)
return preview
except Exception as e:
return f"❌ Error previewing dataset: {e}\n\nMake sure:\n- Dataset path is correct\n- You're logged in to HuggingFace (run: huggingface-cli login)\n- Dataset is publicly accessible"
def load_hf_dataset(dataset_path, dataset_name, max_examples, split):
"""Load a HuggingFace dataset and add to training data"""
try:
log = "πŸ“₯ **LOADING HUGGINGFACE DATASET**\n\n"
# Check if using known dataset name or custom path
if dataset_name and dataset_name != "Custom Path":
log += f"Loading known dataset: {dataset_name}\n\n"
loader = HuggingFaceDatasetLoader()
dataset_data = loader.load_dataset_by_name(
dataset_name,
split=split,
max_examples=int(max_examples) if max_examples else None
)
elif dataset_path and dataset_path.strip():
log += f"Loading custom dataset: {dataset_path}\n\n"
loader = HuggingFaceDatasetLoader()
dataset_data = loader.load_dataset_by_path(
dataset_path,
dataset_type="auto",
split=split,
max_examples=int(max_examples) if max_examples else None
)
else:
return "❌ Please select a dataset or enter a custom path", ""
if not dataset_data:
return log + "\n❌ No data loaded. Check dataset path and try again.", ""
log += f"βœ… Loaded {len(dataset_data)} examples from HuggingFace\n\n"
# Quality validation
log += "πŸ” **QUALITY VALIDATION**\n\n"
validator = QualityValidator()
validation_results = validator.validate_batch(dataset_data)
log += f"Valid: {validation_results['valid']}/{validation_results['total']} "
log += f"({validation_results['valid']/validation_results['total']*100:.1f}%)\n"
if validation_results['invalid'] > 0:
log += f"⚠️ Filtered out {validation_results['invalid']} low-quality examples\n"
# Use only valid examples
valid_data = validation_results['valid_examples']
if not valid_data:
return log + "\n❌ No valid data after quality filtering.", ""
log += f"\nπŸ“Š **Final Count:** {len(valid_data)} high-quality examples\n\n"
# Calculate average quality score
avg_score = sum(validator.get_quality_score(ex) for ex in valid_data) / len(valid_data)
log += f"⭐ **Average Quality Score:** {avg_score:.1f}/100\n\n"
# Load existing data
data_path = "data/sample_financial_advisor_data.json"
if Path(data_path).exists():
with open(data_path, 'r', encoding='utf-8') as f:
existing_data = json.load(f)
else:
existing_data = []
# Combine and save
combined_data = existing_data + valid_data
Path(data_path).parent.mkdir(parents=True, exist_ok=True)
with open(data_path, 'w', encoding='utf-8') as f:
json.dump(combined_data, f, indent=2, ensure_ascii=False)
log += f"πŸ’Ύ Added to training data!\n"
log += f"Total training examples: {len(combined_data)}\n\n"
# Show preview
log += "**Sample Loaded Q&A:**\n\n"
for i, example in enumerate(valid_data[:3], 1):
quality_score = validator.get_quality_score(example)
log += f"{i}. [Quality: {quality_score:.0f}/100]\n"
log += f" Q: {example['instruction'][:100]}...\n"
log += f" A: {example['output'][:150]}...\n\n"
return log, f"βœ… Loaded {len(valid_data)} examples from HuggingFace! Total: {len(combined_data)}"
except Exception as e:
import traceback
error_details = traceback.format_exc()
return f"❌ Error loading dataset: {e}\n\n{error_details}", f"❌ Error: {e}"
def load_training_data():
"""Load and display current training data"""
data_path = "data/sample_financial_advisor_data.json"
if not Path(data_path).exists():
return "❌ No training data found!", 0, "", "❌ **Not Ready**: No training data found. Add examples above or use Synthetic Data/HuggingFace tabs."
try:
with open(data_path, 'r', encoding='utf-8') as f:
data = json.load(f)
# Format preview
preview = f"πŸ“Š **Total Q&A Pairs:** {len(data)}\n\n"
preview += "**Sample Questions:**\n"
for i, item in enumerate(data[:3]):
preview += f"\n{i+1}. {item['instruction']}\n"
# Training readiness status
if len(data) < 20:
status = f"⚠️ **Warning**: Only {len(data)} examples. Recommended minimum: 50-100 for good results."
elif len(data) < 50:
status = f"βœ… **Ready**: {len(data)} examples loaded. Consider adding more for better results (recommended: 100+)."
else:
status = f"βœ… **Ready for Training**: {len(data)} examples loaded and ready!\n\n**Next Step**: Go to **βš™οΈ Training** tab and click 'Start Training'"
return preview, len(data), json.dumps(data, indent=2), status
except Exception as e:
return f"❌ Error loading data: {e}", 0, "", "❌ Error loading training data"
def add_training_example(question, answer, context=""):
"""Add a new training example"""
data_path = "data/sample_financial_advisor_data.json"
try:
# Create new example
new_example = {
"instruction": question,
"input": context,
"output": answer
}
# Validate before adding
validator = QualityValidator()
is_valid, issues = validator.validate_example(new_example)
if not is_valid:
issues_text = "\n".join(f" - {issue}" for issue in issues)
return f"⚠️ Quality issues found:\n{issues_text}\n\nExample was still added, but consider improving it.", 0
# Load existing data
if Path(data_path).exists():
with open(data_path, 'r', encoding='utf-8') as f:
data = json.load(f)
else:
data = []
data.append(new_example)
# Save
Path(data_path).parent.mkdir(parents=True, exist_ok=True)
with open(data_path, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, ensure_ascii=False)
quality_score = validator.get_quality_score(new_example)
return f"βœ… Added! Quality Score: {quality_score:.0f}/100\nTotal examples: {len(data)}", len(data)
except Exception as e:
return f"❌ Error: {e}", 0
def validate_training_data():
"""Validate all training data and return report (with auto-deduplication)"""
data_path = "data/sample_financial_advisor_data.json"
if not Path(data_path).exists():
return "❌ No training data found!"
try:
with open(data_path, 'r', encoding='utf-8') as f:
data = json.load(f)
original_count = len(data)
validator = QualityValidator()
# First, check for duplicates and auto-remove
deduplicated_data, num_duplicates = validator.remove_duplicates(data)
# Save deduplicated data if duplicates were found
if num_duplicates > 0:
# Create backup
backup_path = data_path.replace('.json', '_backup.json')
with open(backup_path, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, ensure_ascii=False)
# Save deduplicated version
with open(data_path, 'w', encoding='utf-8') as f:
json.dump(deduplicated_data, f, indent=2, ensure_ascii=False)
data = deduplicated_data
# Now validate the deduplicated data
validation_results = validator.validate_batch(data)
# Generate report
report = "=" * 60 + "\n"
report += "QUALITY VALIDATION REPORT (WITH AUTO-DEDUPLICATION)\n"
report += "=" * 60 + "\n\n"
# Deduplication results
if num_duplicates > 0:
report += "🧹 AUTO-DEDUPLICATION COMPLETE!\n"
report += "-" * 60 + "\n"
report += f"Original Examples: {original_count}\n"
report += f"Duplicates Removed: {num_duplicates}\n"
report += f"Unique Examples: {len(data)}\n"
report += f"Backup saved to: {backup_path}\n"
report += "\n" + "=" * 60 + "\n\n"
else:
report += "βœ… NO DUPLICATES FOUND\n"
report += "-" * 60 + "\n"
report += f"All {len(data)} examples are unique!\n"
report += "\n" + "=" * 60 + "\n\n"
# Quality validation results
report += f"Total Examples: {validation_results['total']}\n"
report += f"Valid: {validation_results['valid']} ({validation_results['valid']/validation_results['total']*100:.1f}%)\n"
report += f"Invalid: {validation_results['invalid']} ({validation_results['invalid']/validation_results['total']*100:.1f}%)\n\n"
if validation_results['invalid'] > 0:
report += "-" * 60 + "\n"
report += "QUALITY ISSUES FOUND:\n"
report += "-" * 60 + "\n"
# Count issue types
from collections import Counter
issue_counter = Counter(validation_results['issues'])
for issue, count in issue_counter.most_common():
report += f" - {issue}: {count} occurrences\n"
report += "\n"
# Add quality scores
report += "\nπŸ“Š QUALITY SCORE DISTRIBUTION:\n"
report += "-" * 60 + "\n"
scores = [validator.get_quality_score(ex) for ex in data]
avg_score = sum(scores) / len(scores)
min_score = min(scores)
max_score = max(scores)
report += f"Average Score: {avg_score:.1f}/100\n"
report += f"Range: {min_score:.0f} - {max_score:.0f}\n\n"
# Score distribution
excellent = sum(1 for s in scores if s >= 90)
good = sum(1 for s in scores if 75 <= s < 90)
fair = sum(1 for s in scores if 60 <= s < 75)
poor = sum(1 for s in scores if s < 60)
report += f"Excellent (90+): {excellent}\n"
report += f"Good (75-89): {good}\n"
report += f"Fair (60-74): {fair}\n"
report += f"Poor (<60): {poor}\n\n"
# Final status
report += "=" * 60 + "\n"
if validation_results['valid'] == validation_results['total'] and num_duplicates == 0:
report += "βœ… PERFECT! All data is unique and high quality!\n"
elif validation_results['valid'] == validation_results['total']:
report += f"βœ… GOOD! All unique data passed quality checks!\n"
elif validation_results['valid'] / validation_results['total'] >= 0.9:
report += "βœ… GOOD QUALITY (90%+ valid)\n"
elif validation_results['valid'] / validation_results['total'] >= 0.7:
report += "⚠️ ACCEPTABLE QUALITY (70-90% valid)\n"
else:
report += "❌ POOR QUALITY (<70% valid)\n"
report += "=" * 60 + "\n"
return report
except Exception as e:
return f"❌ Error validating data: {e}"
def remove_duplicates_from_data():
"""Remove duplicate questions from training data"""
data_path = "data/sample_financial_advisor_data.json"
if not Path(data_path).exists():
return "❌ No training data found!", 0, "", ""
try:
# Load data
with open(data_path, 'r', encoding='utf-8') as f:
data = json.load(f)
original_count = len(data)
# Remove duplicates
validator = QualityValidator()
deduplicated_data, num_removed = validator.remove_duplicates(data)
if num_removed == 0:
return (
"βœ… No duplicates found! Your data is already clean.",
len(deduplicated_data),
json.dumps(deduplicated_data, indent=2),
f"βœ… **Ready for Training**: {len(deduplicated_data)} unique examples!\n\n**Next Step**: Go to **βš™οΈ Training** tab"
)
# Save deduplicated data
with open(data_path, 'w', encoding='utf-8') as f:
json.dump(deduplicated_data, f, indent=2, ensure_ascii=False)
# Create backup of original
backup_path = data_path.replace('.json', '_with_duplicates_backup.json')
with open(backup_path, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, ensure_ascii=False)
message = f"""βœ… **Deduplication Complete!**
**Removed:** {num_removed} duplicate questions
**Kept:** {len(deduplicated_data)} unique examples
**Original:** {original_count} total examples
**Backup saved to:** `{backup_path}`
Your training data now contains only unique questions. This will improve model quality and prevent overfitting.
"""
# Prepare preview
preview = f"πŸ“Š **Total Q&A Pairs:** {len(deduplicated_data)}\n\n"
preview += "**Sample Questions:**\n"
for i, item in enumerate(deduplicated_data[:3]):
preview += f"\n{i+1}. {item['instruction']}\n"
status = f"βœ… **Ready for Training**: {len(deduplicated_data)} unique examples loaded!\n\n**Next Step**: Go to **βš™οΈ Training** tab"
return message, len(deduplicated_data), json.dumps(deduplicated_data, indent=2), status
except Exception as e:
return f"❌ Error removing duplicates: {str(e)}", 0, "", ""
# Model Selection Functions
def get_model_info(model_name):
"""Get detailed model information"""
global selected_model_id
# Get model ID from name
model_id = model_registry.get_model_id_from_name(model_name)
if not model_id:
return "❌ Model not found"
selected_model_id = model_id
model = model_registry.get_model(model_id)
info = f"# {model.name}\n\n"
info += f"**Type:** {model.type.upper()}\n"
info += f"**Path:** `{model.path}`\n"
info += f"**Size:** {model.size}\n"
info += f"**VRAM Required:** {model.vram_required}\n"
info += f"**Context Length:** {model.context_length:,} tokens\n"
info += f"**Recommended Quantization:** {model.quantization}\n"
info += f"**Recommended LoRA Rank:** {model.lora_rank}\n\n"
info += f"**Description:**\n{model.description}\n\n"
info += f"**Tags:** {', '.join(model.tags)}\n\n"
# Validate availability
is_valid, message = model_registry.validate_model_selection(model_id)
info += f"\n**Status:** {message}\n"
return info
def check_ollama_status():
"""Check Ollama status and list installed models"""
client = OllamaClient()
status = "# Ollama Status\n\n"
if client.is_available():
status += "βœ… **Ollama is running**\n\n"
models = client.list_models()
if models:
status += f"**Installed Models ({len(models)}):**\n\n"
for model in models:
name = model.get("name", "unknown")
size = model.get("size", 0) / (1024**3) # Convert to GB
status += f"- `{name}` ({size:.1f}GB)\n"
else:
status += "⚠️ No models installed\n\n"
status += "Install models with: `ollama pull <model>`\n"
else:
status += "❌ **Ollama is not running**\n\n"
status += "Start Ollama with:\n"
status += "```bash\n"
status += "ollama serve\n"
status += "```\n\n"
status += "Or download from: https://ollama.com\n"
return status
def start_cloud_training(lora_rank, learning_rate, num_epochs, batch_size, grad_accum):
"""Start cloud training on RunPod"""
global training_status, runpod_manager, selected_model_id
if training_status["is_training"]:
return "⚠️ Training already in progress!", ""
# Check RunPod API key
runpod_key = secure_config.get_key("RUNPOD_API_KEY")
if not runpod_key:
return "❌ RunPod API key required for cloud training! Add it in Settings tab.", "❌ Missing RunPod API key"
# Check HF key
keys_ok, keys_msg = check_required_keys()
if not keys_ok:
return keys_msg, "❌ Missing API keys"
try:
training_status["is_training"] = True
log = "☁️ **CLOUD TRAINING ON RUNPOD**\n\n"
# Get selected model
model = model_registry.get_model(selected_model_id)
if not model:
training_status["is_training"] = False
return "❌ No model selected!", ""
log += f"πŸ“‹ Configuration:\n"
log += f"- Model: {model.name}\n"
log += f"- LoRA Rank: {lora_rank}\n"
log += f"- Epochs: {num_epochs}\n"
log += f"- Mode: Cloud (RunPod)\n\n"
# Load training data
data_path = "data/sample_financial_advisor_data.json"
if not Path(data_path).exists():
training_status["is_training"] = False
return "❌ No training data found!", ""
with open(data_path, 'r', encoding='utf-8') as f:
data = json.load(f)
log += f"βœ… Loaded {len(data)} training examples\n\n"
# Initialize RunPod manager
if not runpod_manager:
runpod_manager = RunPodManager(runpod_key)
log += "πŸš€ **STEP 1: Creating RunPod GPU Instance**\n"
log += "⏳ Finding available GPU (RTX 4090 recommended)...\n\n"
# Create pod config
from runpod_manager import DeploymentConfig
config = DeploymentConfig(
pod_name=f"aura-training-{datetime.now().strftime('%Y%m%d-%H%M')}",
gpu_type="NVIDIA GeForce RTX 4090",
storage_gb=50,
sync_data=True,
auto_setup=True
)
# Deploy pod
pod_info = runpod_manager.one_click_deploy(config=config)
log += f"βœ… Pod created: {pod_info.id}\n"
log += f"πŸ“ GPU: {pod_info.gpu_type}\n"
log += f"πŸ’° Cost: ${pod_info.cost_per_hr:.2f}/hour\n\n"
log += "πŸš€ **STEP 2: Setting Up Training Environment**\n"
log += "⏳ Installing dependencies on cloud GPU...\n\n"
# Environment is auto-setup by one_click_deploy
log += "βœ… Environment ready\n\n"
log += "πŸš€ **STEP 3: Uploading Training Data**\n"
log += f"⏳ Uploading {len(data)} examples to pod...\n\n"
# Data already synced by one_click_deploy if sync_data=True
log += "βœ… Data uploaded\n\n"
log += "πŸš€ **STEP 4: Starting Training Job**\n"
log += f"⏳ Training {model.name} with LoRA...\n"
log += f"⏱️ Estimated time: {num_epochs * 30}-{num_epochs * 60} minutes\n\n"
# Create training config
from runpod_manager import TrainingConfig
train_config = TrainingConfig(
model_name=model.path if model.type != "ollama" else None,
dataset_path="/workspace/data/sample_financial_advisor_data.json",
output_dir="/workspace/models/financial_advisor",
lora_rank=int(lora_rank),
learning_rate=float(learning_rate),
num_epochs=int(num_epochs),
batch_size=int(batch_size),
gradient_accumulation_steps=int(grad_accum)
)
# Submit training job
job_result = runpod_manager.submit_training_job(pod_info.id, train_config)
log += "βœ… Training started!\n\n"
log += "πŸ“Š **MONITORING TRAINING**\n"
log += "⏳ Training in progress... (this will take a while)\n\n"
# Note: In real implementation, we'd poll for completion
# For now, return success and let user check manually
log += f"πŸ”— **POD ACCESS**\n"
log += f"SSH: ssh root@{pod_info.ip} -p {pod_info.ssh_port}\n"
log += f"GUI: https://{pod_info.id}-7860.proxy.runpod.net\n\n"
log += "⚠️ **IMPORTANT:**\n"
log += "- Training is running on cloud GPU\n"
log += f"- Costing ${pod_info.cost_per_hr:.2f}/hour\n"
log += "- Go to RunPod tab to monitor or terminate\n"
log += "- Model will be saved to pod storage\n"
training_status["is_training"] = False
return log, "βœ… Cloud training started!"
except Exception as e:
training_status["is_training"] = False
import traceback
error_details = traceback.format_exc()
return f"❌ Error: {str(e)}\n\n{error_details}", f"❌ Error: {e}"
def start_training(lora_rank, learning_rate, num_epochs, batch_size, grad_accum, training_mode):
"""Start the training process (local or cloud)"""
global training_status, trainer_instance, selected_model_id
# Route to cloud or local training
if training_mode == "Cloud GPU (RunPod)":
return start_cloud_training(lora_rank, learning_rate, num_epochs, batch_size, grad_accum)
# Local training below
if training_status["is_training"]:
return "⚠️ Training already in progress!", ""
# Check API keys first
keys_ok, keys_msg = check_required_keys()
if not keys_ok:
return keys_msg, "❌ Missing API keys. Go to Settings tab."
# Get selected model
model = model_registry.get_model(selected_model_id)
if not model:
return "❌ No model selected!", ""
# Validate model
is_valid, message = model_registry.validate_model_selection(selected_model_id)
if not is_valid:
return f"❌ Model validation failed: {message}", ""
# Get model path (convert Ollama to HF if needed)
if model.type == "ollama":
from ollama_integration import get_hf_model_for_ollama
model_path = get_hf_model_for_ollama(model.path)
if not model_path:
return f"❌ Cannot train with Ollama model {model.path}. No HuggingFace equivalent found.", ""
log_model_name = f"{model.name} (using HF: {model_path})"
else:
model_path = model.path
log_model_name = model.name
try:
training_status["is_training"] = True
training_status["current_epoch"] = 0
training_status["total_epochs"] = num_epochs
training_status["logs"] = []
log = "πŸš€ **STARTING TRAINING**\n\n"
log += f"βš™οΈ Configuration:\n"
log += f"- Model: {log_model_name}\n"
log += f"- LoRA Rank: {lora_rank}\n"
log += f"- Learning Rate: {learning_rate}\n"
log += f"- Epochs: {num_epochs}\n"
log += f"- Batch Size: {batch_size}\n"
log += f"- Gradient Accumulation: {grad_accum}\n\n"
training_status["logs"].append(log)
# Load data
log += "πŸ“‚ Loading training data...\n"
data_path = "data/sample_financial_advisor_data.json"
if not Path(data_path).exists():
training_status["is_training"] = False
return "❌ No training data found! Add some Q&A pairs first.", ""
collector = JSONDataCollector()
data = collector.collect(data_path)
log += f"βœ… Loaded {len(data)} examples\n\n"
training_status["logs"].append(log)
# Split data
log += "πŸ“Š Splitting dataset...\n"
builder = DatasetBuilder()
train_data, val_data, test_data = builder.train_test_split(data)
log += f"βœ… Train: {len(train_data)}, Val: {len(val_data)}, Test: {len(test_data)}\n\n"
training_status["logs"].append(log)
# Configure LoRA
log += "βš™οΈ Configuring LoRA...\n"
lora_config = LoRAConfig(
r=int(lora_rank),
lora_alpha=int(lora_rank * 2),
lora_dropout=0.05,
target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
bias="none",
task_type="CAUSAL_LM"
)
log += f"βœ… LoRA configured\n\n"
training_status["logs"].append(log)
# Initialize trainer
log += "πŸ€– Initializing trainer...\n"
trainer_instance = LoRATrainer(
model_name=model_path,
lora_config=lora_config,
output_dir="models/financial_advisor"
)
log += "βœ… Trainer ready\n\n"
training_status["logs"].append(log)
# Load model
log += f"πŸ“₯ Loading {model.name} (this will take a few minutes)...\n"
log += f"⚠️ First time will download {model.size}\n\n"
training_status["logs"].append(log)
trainer_instance.load_model(use_4bit=True)
log += "βœ… Model loaded successfully!\n\n"
training_status["logs"].append(log)
# Show parameters
params = trainer_instance.get_trainable_parameters()
log += f"πŸ“Š **Parameter Efficiency:**\n"
log += f"- Total: {params['total']:,}\n"
log += f"- Trainable: {params['trainable']:,}\n"
log += f"- Percentage: {params['percentage']:.2f}%\n\n"
training_status["logs"].append(log)
# Prepare datasets
log += "πŸ”„ Preparing datasets...\n"
train_dataset, val_dataset = trainer_instance.prepare_dataset(train_data, val_data)
log += f"βœ… Datasets tokenized and ready\n\n"
training_status["logs"].append(log)
# Start training
log += "🎯 **TRAINING STARTED**\n\n"
log += f"This will take approximately {num_epochs * 30}-{num_epochs * 60} minutes\n"
log += "You can monitor progress in TensorBoard:\n"
log += "`tensorboard --logdir models/financial_advisor/logs`\n\n"
training_status["logs"].append(log)
# Train (this will take a while)
history = trainer_instance.train(
train_dataset=train_dataset,
val_dataset=val_dataset,
num_epochs=int(num_epochs),
batch_size=int(batch_size),
learning_rate=float(learning_rate),
gradient_accumulation_steps=int(grad_accum)
)
log += "\nβœ… **TRAINING COMPLETE!**\n\n"
log += f"πŸ’Ύ Model saved to: models/financial_advisor/final_model\n"
log += f"πŸ“Š Logs saved to: models/financial_advisor/logs\n\n"
training_status["logs"].append(log)
# Evaluate on test set
if len(test_data) > 0:
log += "πŸ” **EVALUATING ON TEST SET**\n\n"
log += f"Running evaluation on {len(test_data)} test examples...\n"
training_status["logs"].append(log)
try:
# Run evaluation (limit to 50 samples for speed)
eval_results = trainer_instance.evaluate_on_test_set(
test_data=test_data,
model_name=log_model_name,
dataset_info=f"Financial Advisor Training - {len(train_data)} train examples",
num_samples=min(50, len(test_data))
)
log += "\nβœ… **EVALUATION COMPLETE**\n\n"
log += "**Performance Metrics:**\n"
metrics = eval_results['metrics']
log += f"- Average Response Length: {metrics['avg_response_length']:.1f} words\n"
log += f"- Average Generation Time: {metrics['avg_generation_time']:.2f}s\n"
log += f"- Throughput: {metrics['examples_per_second']:.2f} examples/sec\n\n"
log += "πŸ“Š Evaluation report saved to: models/financial_advisor/evaluation_results/\n"
training_status["logs"].append(log)
except Exception as eval_error:
log += f"\n⚠️ Evaluation error (training still succeeded): {eval_error}\n"
training_status["logs"].append(log)
training_status["is_training"] = False
return "\n".join(training_status["logs"]), "βœ… Training & Evaluation Complete!"
except Exception as e:
training_status["is_training"] = False
error_msg = f"\n\n❌ **ERROR:** {str(e)}\n\n"
error_msg += "**Common fixes:**\n"
error_msg += "- Out of memory: Reduce batch_size or increase grad_accum\n"
error_msg += "- CUDA error: Make sure you have a GPU with 18GB+ VRAM\n"
error_msg += "- Import error: Run `pip install bitsandbytes`\n"
training_status["logs"].append(error_msg)
return "\n".join(training_status["logs"]), f"❌ Error: {e}"
def test_financial_advisor(question):
"""Test the trained model"""
global trainer_instance
if trainer_instance is None:
return "❌ No model loaded. Train a model first or load an existing one."
try:
# Format as financial advisor prompt
system_prompt = (
"You are an expert financial advisor with deep knowledge of personal finance, "
"investments, retirement planning, tax strategies, and wealth management."
)
prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n"
prompt += f"<|im_start|>user\n{question}<|im_end|>\n"
prompt += f"<|im_start|>assistant\n"
# Tokenize
inputs = trainer_instance.tokenizer(prompt, return_tensors="pt").to(trainer_instance.peft_model.device)
# Generate
outputs = trainer_instance.peft_model.generate(
**inputs,
max_new_tokens=512,
temperature=0.7,
top_p=0.9,
do_sample=True,
pad_token_id=trainer_instance.tokenizer.eos_token_id
)
# Decode
response = trainer_instance.tokenizer.decode(outputs[0], skip_special_tokens=False)
# Extract assistant response
if "<|im_start|>assistant" in response:
response = response.split("<|im_start|>assistant")[-1].strip()
response = response.replace("<|im_end|>", "").strip()
return response
except Exception as e:
return f"❌ Error generating response: {e}"
def load_existing_model():
"""Load a previously trained model"""
global trainer_instance, selected_model_id
model_path = "models/financial_advisor/final_model"
if not Path(model_path).exists():
return "❌ No trained model found at models/financial_advisor/final_model"
try:
# Get the model that was used for training
model = model_registry.get_model(selected_model_id)
if not model:
return "❌ Model not found in registry"
# Get actual model path
if model.type == "ollama":
from ollama_integration import get_hf_model_for_ollama
base_model_path = get_hf_model_for_ollama(model.path)
if not base_model_path:
return f"❌ Cannot load Ollama model {model.path}"
else:
base_model_path = model.path
# Initialize trainer
lora_config = LoRAConfig(r=model.lora_rank, lora_alpha=model.lora_rank*2)
trainer_instance = LoRATrainer(
model_name=base_model_path,
lora_config=lora_config,
output_dir="models/financial_advisor"
)
# Load base model
trainer_instance.load_model(use_4bit=True)
# Load adapter
trainer_instance.load_adapter(model_path)
return f"βœ… Model loaded successfully! ({model.name}) You can now test it."
except Exception as e:
return f"❌ Error loading model: {e}"
# Model Comparison and Evaluation Functions
def load_evaluation_history():
"""Load evaluation history and display summary"""
try:
from evaluation.model_evaluator import ModelEvaluator
evaluator = ModelEvaluator(output_dir="models/financial_advisor/evaluation_results")
if not evaluator.history:
return "❌ No evaluation history found. Train a model first to see evaluations."
# Generate summary
summary = f"πŸ“Š **EVALUATION HISTORY**\n\n"
summary += f"Total Evaluations: {len(evaluator.history)}\n\n"
summary += "=" * 70 + "\n\n"
for i, eval_result in enumerate(reversed(evaluator.history[-10:]), 1):
summary += f"**Evaluation {len(evaluator.history) - i + 1}:**\n"
summary += f"- Model: {eval_result['model_name']}\n"
summary += f"- Dataset: {eval_result['dataset_info']}\n"
summary += f"- Timestamp: {eval_result['timestamp']}\n"
summary += f"- Test Examples: {eval_result['num_test_examples']}\n"
metrics = eval_result.get('metrics', {})
summary += f"- Avg Response Length: {metrics.get('avg_response_length', 0):.1f} words\n"
summary += f"- Avg Generation Time: {metrics.get('avg_generation_time', 0):.2f}s\n"
summary += f"- Throughput: {metrics.get('examples_per_second', 0):.2f} ex/s\n"
summary += "\n" + "-" * 70 + "\n\n"
return summary
except Exception as e:
return f"❌ Error loading evaluation history: {e}"
def compare_models(num_models):
"""Compare recent model evaluations"""
try:
from evaluation.model_evaluator import ModelEvaluator
evaluator = ModelEvaluator(output_dir="models/financial_advisor/evaluation_results")
if not evaluator.history:
return "❌ No evaluation history found. Train models first to compare."
# Get comparison
comparison = evaluator.compare_models(num_recent=int(num_models))
# Generate report
report = evaluator.generate_comparison_report(comparison)
return report
except Exception as e:
return f"❌ Error comparing models: {e}"
def view_latest_evaluation():
"""View the most recent evaluation in detail"""
try:
from evaluation.model_evaluator import ModelEvaluator
evaluator = ModelEvaluator(output_dir="models/financial_advisor/evaluation_results")
latest = evaluator.get_latest_evaluation()
if not latest:
return "❌ No evaluations found. Train a model first."
# Generate detailed report
report = evaluator.generate_report(latest)
return report
except Exception as e:
return f"❌ Error viewing evaluation: {e}"
# RunPod Functions
def init_runpod_manager():
"""Initialize RunPod manager with API key"""
global runpod_manager
if runpod_manager is not None:
return "βœ… RunPod manager already initialized"
api_key = secure_config.get_key("RUNPOD_API_KEY")
if not api_key:
return "❌ RunPod API key not found! Please add it in the Settings tab."
try:
runpod_manager = RunPodManager(api_key)
return "βœ… RunPod manager initialized successfully!"
except Exception as e:
return f"❌ Error initializing RunPod manager: {e}"
def list_runpod_pods():
"""List all RunPod pods"""
global runpod_manager
if not runpod_manager:
init_result = init_runpod_manager()
if "❌" in init_result:
return init_result
try:
pods = runpod_manager.list_pods()
if not pods:
return "No pods found. Create a new pod to get started!"
output = f"πŸ“Š **YOUR RUNPOD PODS** ({len(pods)} total)\n\n"
for pod in pods:
output += f"**{pod.name}** (ID: {pod.id[:8]}...)\n"
output += f" Status: {pod.status}\n"
output += f" GPU: {pod.gpu_count}x {pod.gpu_type}\n"
output += f" Cost: ${pod.cost_per_hr:.2f}/hr\n"
if pod.status == "RUNNING":
if pod.ip and pod.ssh_port:
output += f" SSH: {pod.ip}:{pod.ssh_port}\n"
if pod.port:
output += f" GUI: https://{pod.id}-7860.proxy.runpod.net\n"
output += "\n"
return output
except Exception as e:
return f"❌ Error listing pods: {e}"
def create_runpod_pod(pod_name, gpu_type, storage_gb, sync_data):
"""Create a new RunPod pod"""
global runpod_manager, current_pod_info, deployment_in_progress
if deployment_in_progress:
return "⚠️ Deployment already in progress!", ""
if not runpod_manager:
init_result = init_runpod_manager()
if "❌" in init_result:
return init_result, ""
deployment_in_progress = True
log_output = []
def log_callback(msg):
log_output.append(msg)
try:
config = DeploymentConfig(
pod_name=pod_name or "aura-training-pod",
gpu_type=gpu_type,
storage_gb=int(storage_gb),
sync_data=sync_data,
auto_setup=True
)
pod_info = runpod_manager.one_click_deploy(
config=config,
progress_callback=log_callback
)
current_pod_info = pod_info
deployment_in_progress = False
final_log = "\n".join(log_output)
status = f"βœ… Pod created successfully!\n"
status += f"ID: {pod_info.id}\n"
status += f"SSH: {pod_info.ip}:{pod_info.ssh_port}\n"
status += f"GUI: https://{pod_info.id}-7860.proxy.runpod.net"
return final_log, status
except Exception as e:
deployment_in_progress = False
error_log = "\n".join(log_output) + f"\n\n❌ Error: {e}"
return error_log, f"❌ Deployment failed: {e}"
def stop_runpod_pod(pod_id):
"""Stop a running pod"""
global runpod_manager
if not runpod_manager:
init_result = init_runpod_manager()
if "❌" in init_result:
return init_result
try:
success = runpod_manager.stop_pod(pod_id)
if success:
return f"βœ… Pod {pod_id} stopped successfully!"
else:
return f"❌ Failed to stop pod {pod_id}"
except Exception as e:
return f"❌ Error stopping pod: {e}"
def terminate_runpod_pod(pod_id):
"""Terminate (delete) a pod"""
global runpod_manager, current_pod_info
if not runpod_manager:
init_result = init_runpod_manager()
if "❌" in init_result:
return init_result
try:
success = runpod_manager.terminate_pod(pod_id)
if success:
if current_pod_info and current_pod_info.id == pod_id:
current_pod_info = None
return f"βœ… Pod {pod_id} terminated successfully!"
else:
return f"❌ Failed to terminate pod {pod_id}"
except Exception as e:
return f"❌ Error terminating pod: {e}"
def get_pod_connection_info(pod_id):
"""Get connection info for a pod"""
global runpod_manager
if not runpod_manager:
init_result = init_runpod_manager()
if "❌" in init_result:
return init_result
try:
pod = runpod_manager.get_pod(pod_id)
if not pod:
return f"❌ Pod {pod_id} not found"
info = f"# Connection Info for {pod.name}\n\n"
info += f"**Status:** {pod.status}\n\n"
if pod.status == "RUNNING":
info += "## SSH Connection\n"
info += f"```bash\n"
info += f"ssh root@{pod.ip} -p {pod.ssh_port}\n"
info += f"```\n\n"
info += "## GUI Access\n"
info += f"Open in browser:\n"
info += f"```\n"
info += f"https://{pod.id}-7860.proxy.runpod.net\n"
info += f"```\n\n"
info += "## Details\n"
info += f"- GPU: {pod.gpu_count}x {pod.gpu_type}\n"
info += f"- Cost: ${pod.cost_per_hr:.2f}/hour\n"
else:
info += f"⚠️ Pod is not running (Status: {pod.status})\n"
return info
except Exception as e:
return f"❌ Error getting pod info: {e}"
# Create Gradio interface with custom CSS for scrolling
custom_css = """
/* Make all containers properly scrollable */
.overflow-y-auto {
overflow-y: auto !important;
max-height: 600px !important;
}
/* Textbox scrolling */
.textbox-container textarea {
max-height: 400px !important;
overflow-y: auto !important;
}
/* Column scrolling for long content */
.gr-column {
overflow-y: auto !important;
max-height: 800px !important;
}
/* Markdown blocks in columns */
.gr-column .gr-markdown {
overflow-y: auto !important;
max-height: 500px !important;
}
/* Accordion-like sections */
.gr-box {
overflow-y: auto !important;
max-height: 600px !important;
}
/* Hide footer */
footer {visibility: hidden}
"""
with gr.Blocks(title="Financial Advisor AI Trainer", theme=gr.themes.Soft(), css=custom_css) as demo:
gr.Markdown("""
# 🏦 Financial Advisor AI Training Studio
Train your own expert financial advisor AI powered by Qwen 3 30B!
No coding required - just configure, train, and test.
""")
with gr.Tabs():
# Tab 0: Settings & API Keys
with gr.Tab("βš™οΈ Settings"):
gr.Markdown("""
# API Key Management
Store your API keys securely. Keys are encrypted and never stored in plain text.
**You only need to enter these once!**
""")
with gr.Row():
with gr.Column():
gr.Markdown("### πŸ”‘ API Keys")
gr.Markdown("#### HuggingFace Token (Required)")
gr.Markdown("Get your token at: https://huggingface.co/settings/tokens")
hf_token_input = gr.Textbox(
label="HuggingFace Token",
placeholder="hf_...",
type="password",
info="Required for downloading Qwen 3 model"
)
gr.Markdown("---")
gr.Markdown("#### Optional API Keys")
openai_key_input = gr.Textbox(
label="OpenAI API Key (Optional)",
placeholder="sk-...",
type="password",
info="For testing against GPT models"
)
anthropic_key_input = gr.Textbox(
label="Anthropic API Key (Optional)",
placeholder="sk-ant-...",
type="password",
info="For testing against Claude models"
)
wandb_key_input = gr.Textbox(
label="Weights & Biases API Key (Optional)",
placeholder="...",
type="password",
info="For advanced experiment tracking"
)
runpod_key_input = gr.Textbox(
label="RunPod API Key (Optional)",
placeholder="...",
type="password",
info="For cloud GPU deployment"
)
gr.Markdown("---")
gr.Markdown("#### Custom API Keys (Advanced)")
custom_keys_input = gr.Code(
label="Custom Keys (JSON format)",
language="json",
value='{\n "MY_API_KEY": "value",\n "OTHER_KEY": "value"\n}',
lines=5
)
gr.Markdown("---")
with gr.Row():
save_keys_btn = gr.Button("πŸ’Ύ Save All Keys", variant="primary", size="lg")
load_keys_btn = gr.Button("πŸ”„ Load Saved Keys", variant="secondary")
clear_keys_btn = gr.Button("πŸ—‘οΈ Clear All Keys", variant="stop")
keys_status = gr.Markdown()
with gr.Column():
gr.Markdown("### πŸ“‹ Security & Status")
gr.Markdown("""
#### πŸ”’ Security Features
- βœ… Keys are encrypted using Fernet encryption
- βœ… Stored in `.secrets/` directory (auto-hidden)
- βœ… Never logged or displayed in full
- βœ… Loaded into environment variables automatically
- βœ… Only you can decrypt with your machine
#### πŸ“ Storage Location
Keys are saved in: `.secrets/config.enc`
⚠️ **Backup Note**: If you reinstall or move the project,
you'll need to re-enter your keys.
""")
gr.Markdown("---")
gr.Markdown("### πŸ” Current Keys Status")
keys_display = gr.Markdown("No keys loaded. Click 'Load Saved Keys' to check.")
gr.Markdown("---")
gr.Markdown("""
### πŸ’‘ Tips
**HuggingFace Token:**
1. Go to https://huggingface.co/settings/tokens
2. Create a new token (read access is enough)
3. Copy and paste it above
4. Click "Save All Keys"
**Other Keys:**
Only add if you plan to use those services.
Training works with just HuggingFace token.
""")
# Wire up settings
save_keys_btn.click(
fn=save_api_keys,
inputs=[hf_token_input, openai_key_input, anthropic_key_input, wandb_key_input, runpod_key_input, custom_keys_input],
outputs=keys_status
)
load_keys_btn.click(
fn=load_api_keys,
outputs=[hf_token_input, openai_key_input, anthropic_key_input, wandb_key_input, runpod_key_input, custom_keys_input, keys_display]
)
clear_keys_btn.click(
fn=clear_api_keys,
outputs=[keys_status, hf_token_input, openai_key_input, anthropic_key_input, wandb_key_input, runpod_key_input, custom_keys_input]
)
# Load keys on startup
demo.load(
fn=load_api_keys,
outputs=[hf_token_input, openai_key_input, anthropic_key_input, wandb_key_input, runpod_key_input, custom_keys_input, keys_display]
)
# Tab 1: Data Management
with gr.Tab("πŸ“š Training Data"):
gr.Markdown("""
### Manage Your Training Data
**Your training data is automatically used when you click "Start Training" in the Training tab.**
""")
with gr.Row():
with gr.Column():
gr.Markdown("#### Current Training Data")
data_preview = gr.Markdown()
data_count = gr.Number(label="Total Q&A Pairs", interactive=False)
gr.Markdown("---")
# Training readiness status
training_ready_status = gr.Markdown("Click 'Refresh Data' to check training readiness")
with gr.Row():
load_data_btn = gr.Button("πŸ”„ Refresh Data", variant="secondary")
validate_data_btn = gr.Button("πŸ” Validate Quality", variant="secondary")
remove_dupes_btn = gr.Button("🧹 Remove Duplicates", variant="secondary")
gr.Markdown("---")
gr.Markdown("#### Quality Report")
validation_report = gr.Textbox(
label="Data Quality Analysis",
lines=10,
max_lines=15,
interactive=False,
placeholder="Click 'Validate Quality' to analyze your training data...",
show_copy_button=True
)
gr.Markdown("---")
gr.Markdown("#### Deduplication Status")
dedup_status = gr.Markdown("Click 'Remove Duplicates' to clean your data")
gr.Markdown("---")
gr.Markdown("#### View Full Dataset")
data_json = gr.Code(label="Full Dataset (JSON)", language="json", lines=15)
with gr.Column():
gr.Markdown("#### Add New Training Example")
new_question = gr.Textbox(
label="Question",
placeholder="What is the difference between a Roth IRA and Traditional IRA?",
lines=3
)
new_context = gr.Textbox(
label="Context (optional)",
placeholder="Additional context for the question...",
lines=2
)
new_answer = gr.Textbox(
label="Expert Answer",
placeholder="Provide a detailed, accurate financial answer...",
lines=8
)
add_example_btn = gr.Button("βž• Add Example", variant="primary")
add_status = gr.Markdown()
# Wire up data management
load_data_btn.click(
fn=load_training_data,
outputs=[data_preview, data_count, data_json, training_ready_status]
)
validate_data_btn.click(
fn=validate_training_data,
outputs=validation_report
).then(
fn=load_training_data,
outputs=[data_preview, data_count, data_json, training_ready_status]
)
remove_dupes_btn.click(
fn=remove_duplicates_from_data,
outputs=[dedup_status, data_count, data_json, training_ready_status]
).then(
fn=load_training_data,
outputs=[data_preview, data_count, data_json, training_ready_status]
)
add_example_btn.click(
fn=add_training_example,
inputs=[new_question, new_answer, new_context],
outputs=[add_status, data_count]
).then(
fn=load_training_data,
outputs=[data_preview, data_count, data_json, training_ready_status]
)
# Load data on startup
demo.load(
fn=load_training_data,
outputs=[data_preview, data_count, data_json, training_ready_status]
)
# Tab 2: Synthetic Data Generation
with gr.Tab("πŸ€– Synthetic Data"):
gr.Markdown("""
# Generate Training Data Automatically
Use GPT-4 or Claude to automatically generate high-quality financial Q&A pairs!
**No manual writing required - just configure and generate.**
""")
with gr.Row():
with gr.Column():
gr.Markdown("#### πŸ”§ Generation Settings")
syn_provider = gr.Radio(
choices=["openai", "anthropic"],
value="openai",
label="API Provider",
info="Choose which LLM to use for generation"
)
syn_num_examples = gr.Slider(
minimum=5, maximum=100, value=20, step=5,
label="Number of Examples to Generate",
info="Start with 20, then increase"
)
syn_difficulty = gr.Radio(
choices=["beginner", "intermediate", "advanced", "mixed"],
value="mixed",
label="Difficulty Level",
info="Mixed creates diverse questions"
)
syn_use_scenarios = gr.Checkbox(
label="Use Realistic Scenarios",
value=False,
info="Generate questions with specific user contexts (age, income, etc.)"
)
gr.Markdown("---")
gr.Markdown("#### πŸ“‹ Available Topics")
syn_topics_display = gr.Textbox(
label="Default Topics (leave Custom Topics empty to use these)",
value=get_available_topics(),
lines=8,
max_lines=12,
interactive=False
)
syn_custom_topics = gr.Textbox(
label="Custom Topics (Optional)",
placeholder="401k Planning, Crypto Trading, Home Buying (comma-separated)",
lines=3,
info="Leave empty to use all default topics"
)
gr.Markdown("---")
syn_generate_btn = gr.Button("πŸš€ Generate Data", variant="primary", size="lg")
syn_status = gr.Markdown()
with gr.Column():
gr.Markdown("#### πŸ“Š Generation Log")
syn_log = gr.Textbox(
label="Progress & Preview",
lines=20,
max_lines=25,
interactive=False,
show_copy_button=True
)
gr.Markdown("---")
gr.Markdown("#### πŸ’‘ Tips & Info")
syn_tips = gr.Textbox(
label="Important Information",
value="""πŸ’‘ Tips:
β€’ Make sure you've added your OpenAI or Anthropic API key in Settings tab
β€’ Generated data is automatically added to your training dataset
β€’ Each example costs ~$0.01-0.02 in API credits
β€’ Quality is very high - often better than manual examples
β€’ Start with 20 examples to test, then generate more
β€’ Mix difficulties for best results
β€’ Scenario mode creates more realistic, personalized questions
⚠️ API Key Required:
This feature requires an OpenAI or Anthropic API key.
Set it in the Settings tab before generating.""",
lines=8,
max_lines=12,
interactive=False
)
# Wire up synthetic data generation
syn_generate_btn.click(
fn=generate_synthetic_data,
inputs=[syn_provider, syn_num_examples, syn_difficulty, syn_use_scenarios, syn_custom_topics],
outputs=[syn_log, syn_status]
)
# Tab 3: HuggingFace Datasets
with gr.Tab("πŸ“¦ HuggingFace Datasets"):
gr.Markdown("""
# Load Datasets from HuggingFace
Import high-quality financial datasets directly from HuggingFace!
**Includes transaction categorization, financial Q&A, and more.**
""")
with gr.Row():
with gr.Column():
gr.Markdown("#### πŸ“‹ Known Datasets")
hf_datasets_list = gr.Textbox(
label="Available Financial Datasets",
value=list_hf_datasets(),
lines=10,
max_lines=15,
interactive=False
)
gr.Markdown("---")
gr.Markdown("#### πŸ”§ Load Settings")
hf_dataset_name = gr.Radio(
choices=["financial-alpaca", "fingpt-finred", "finance-qa-10k", "Custom Path"],
value="financial-alpaca",
label="Select Dataset",
info="Choose from publicly accessible datasets or use custom path"
)
hf_custom_path = gr.Textbox(
label="Custom Dataset Path (if 'Custom Path' selected)",
placeholder="username/dataset-name",
info="Full HuggingFace dataset path"
)
hf_split = gr.Radio(
choices=["train", "test", "validation"],
value="train",
label="Dataset Split",
info="Which split to load"
)
hf_max_examples = gr.Number(
label="Max Examples to Load (optional - leave empty for all)",
value=None,
info="Limit number of examples (helps with large datasets)"
)
gr.Markdown("---")
with gr.Row():
hf_preview_btn = gr.Button("πŸ‘οΈ Preview Dataset", variant="secondary")
hf_load_btn = gr.Button("πŸ“₯ Load Dataset", variant="primary", size="lg")
hf_status = gr.Markdown()
with gr.Column():
gr.Markdown("#### πŸ“Š Dataset Info & Logs")
hf_preview = gr.Textbox(
label="Dataset Preview",
lines=10,
max_lines=15,
interactive=False,
placeholder="Click 'Preview Dataset' to see sample data...",
show_copy_button=True
)
gr.Markdown("---")
hf_log = gr.Textbox(
label="Loading Log",
lines=12,
max_lines=18,
interactive=False,
placeholder="Loading progress will appear here...",
show_copy_button=True
)
gr.Markdown("---")
gr.Markdown("#### πŸ’‘ Tips & Info")
hf_tips = gr.Textbox(
label="Important Information",
value="""πŸ’‘ Available Datasets:
β€’ financial-alpaca (52K examples)
Pre-built financial Q&A in Alpaca format - publicly accessible
β€’ fingpt-finred
Financial relation extraction dataset - publicly accessible
β€’ finance-qa-10k
Q&A from 10-K SEC filings - publicly accessible
πŸ’‘ Tips:
β€’ Preview datasets before loading to understand structure
β€’ Large datasets can be limited using Max Examples
β€’ All data is automatically quality-validated before adding
β€’ These datasets are PUBLIC and don't require special access
πŸ”‘ Authentication:
Your HuggingFace token is used automatically from Settings tab.
Some private/gated datasets may require accepting terms on HuggingFace.
πŸ“š Finding More Datasets:
Browse: https://huggingface.co/datasets
Search: "finance", "financial", "investment", "trading\"""",
lines=12,
max_lines=18,
interactive=False
)
# Wire up HuggingFace dataset loading
hf_preview_btn.click(
fn=preview_hf_dataset,
inputs=hf_custom_path,
outputs=hf_preview
)
hf_load_btn.click(
fn=load_hf_dataset,
inputs=[hf_custom_path, hf_dataset_name, hf_max_examples, hf_split],
outputs=[hf_log, hf_status]
)
# Tab 4: Training Configuration
with gr.Tab("βš™οΈ Training"):
gr.Markdown("### Select Model and Configure Training")
with gr.Row():
with gr.Column():
gr.Markdown("#### πŸ’» Training Mode")
training_mode = gr.Radio(
choices=["Local GPU", "Cloud GPU (RunPod)"],
value="Cloud GPU (RunPod)",
label="Where to Train",
info="Local requires NVIDIA GPU. Cloud uses RunPod (pay per minute)."
)
cloud_cost_estimate = gr.Markdown("**Estimated Cost:** Select model to see pricing")
gr.Markdown("---")
gr.Markdown("#### πŸ€– Model Selection")
model_choices = model_registry.get_model_choices_for_gui()
model_selector = gr.Dropdown(
choices=model_choices,
value=model_choices[0][1] if model_choices else None, # Default to first model ID
label="Select Model",
info="Choose which model to train"
)
model_info_display = gr.Markdown()
with gr.Row():
check_ollama_btn = gr.Button("πŸ” Check Ollama Status", variant="secondary", size="sm")
ollama_status_display = gr.Markdown()
gr.Markdown("---")
gr.Markdown("#### Training Configuration")
lora_rank = gr.Slider(
minimum=4, maximum=64, value=16, step=4,
label="LoRA Rank (Higher = More capacity, more memory)",
info="Recommended: 16 for 30B model"
)
learning_rate = gr.Slider(
minimum=1e-5, maximum=5e-4, value=1e-4, step=1e-5,
label="Learning Rate",
info="Recommended: 1e-4 for large models"
)
num_epochs = gr.Slider(
minimum=1, maximum=10, value=3, step=1,
label="Number of Epochs",
info="Start with 1 epoch to test"
)
batch_size = gr.Slider(
minimum=1, maximum=4, value=1, step=1,
label="Batch Size",
info="Keep at 1 for 30B model"
)
grad_accum = gr.Slider(
minimum=1, maximum=32, value=16, step=1,
label="Gradient Accumulation Steps",
info="Effective batch = batch_size Γ— grad_accum"
)
gr.Markdown("---")
start_train_btn = gr.Button("πŸš€ Start Training", variant="primary", size="lg")
training_status_text = gr.Markdown()
with gr.Column():
gr.Markdown("#### Training Progress & Logs")
training_log = gr.Textbox(
label="Training Log",
lines=20,
max_lines=25,
interactive=False,
show_copy_button=True
)
gr.Markdown("""
**πŸ’‘ Tips:**
- First training will download ~16GB model
- Monitor with TensorBoard: `tensorboard --logdir models/financial_advisor/logs`
- Training 30B model takes 30-60 min per epoch
- GPU needs ~18GB VRAM minimum
""")
# Wire up model selection
model_selector.change(
fn=get_model_info,
inputs=model_selector,
outputs=model_info_display
)
check_ollama_btn.click(
fn=check_ollama_status,
outputs=ollama_status_display
)
# Load default model info on startup
demo.load(
fn=get_model_info,
inputs=model_selector,
outputs=model_info_display
)
# Wire up training
start_train_btn.click(
fn=start_training,
inputs=[lora_rank, learning_rate, num_epochs, batch_size, grad_accum, training_mode],
outputs=[training_log, training_status_text]
)
# Tab 3: Testing
with gr.Tab("πŸ§ͺ Test Model"):
gr.Markdown("### Test Your Trained Financial Advisor")
with gr.Row():
with gr.Column():
load_model_btn = gr.Button("πŸ“₯ Load Trained Model", variant="secondary")
load_status = gr.Markdown()
gr.Markdown("---")
test_question = gr.Textbox(
label="Ask Your Financial Advisor",
placeholder="Should I pay off my student loans or invest in my 401k?",
lines=4
)
test_btn = gr.Button("πŸ’¬ Get Advice", variant="primary", size="lg")
gr.Markdown("#### Example Questions:")
gr.Markdown("""
- What's the difference between a Roth IRA and Traditional IRA?
- How much should I have in my emergency fund?
- Should I invest in index funds or individual stocks?
- What is dollar-cost averaging?
- How do I start investing with only $100 per month?
""")
with gr.Column():
gr.Markdown("#### Financial Advisor Response")
test_response = gr.Textbox(
label="Response",
lines=15,
max_lines=20,
interactive=False,
show_copy_button=True
)
# Wire up testing
load_model_btn.click(
fn=load_existing_model,
outputs=load_status
)
test_btn.click(
fn=test_financial_advisor,
inputs=test_question,
outputs=test_response
)
# Tab 6: Evaluation & Comparison
with gr.Tab("πŸ“Š Evaluation"):
gr.Markdown("### Model Evaluation & Comparison")
with gr.Row():
with gr.Column():
gr.Markdown("#### πŸ“‹ Evaluation History")
history_refresh_btn = gr.Button("πŸ”„ Refresh History", variant="secondary")
eval_history_display = gr.Textbox(
label="Recent Evaluations",
lines=15,
max_lines=20,
interactive=False,
show_copy_button=True,
placeholder="Click 'Refresh History' to see evaluation history..."
)
gr.Markdown("---")
gr.Markdown("#### πŸ” Latest Evaluation Details")
latest_eval_btn = gr.Button("πŸ“„ View Latest Evaluation", variant="secondary")
latest_eval_display = gr.Textbox(
label="Latest Evaluation Report",
lines=15,
max_lines=20,
interactive=False,
show_copy_button=True,
placeholder="Click to view detailed evaluation report..."
)
with gr.Column():
gr.Markdown("#### πŸ“ˆ Model Comparison")
num_models_compare = gr.Slider(
minimum=2,
maximum=10,
value=3,
step=1,
label="Number of Models to Compare",
info="Compare recent model evaluations"
)
compare_btn = gr.Button("βš–οΈ Compare Models", variant="primary", size="lg")
comparison_display = gr.Textbox(
label="Model Comparison Report",
lines=20,
max_lines=25,
interactive=False,
show_copy_button=True,
placeholder="Click 'Compare Models' to see side-by-side comparison..."
)
gr.Markdown("---")
gr.Markdown("""
**πŸ’‘ Tips:**
- Evaluations are run automatically after training
- Compare metrics across different training runs
- Use comparison to find the best model
- Detailed reports saved in `models/financial_advisor/evaluation_results/`
""")
# Wire up evaluation functions
history_refresh_btn.click(
fn=load_evaluation_history,
outputs=eval_history_display
)
latest_eval_btn.click(
fn=view_latest_evaluation,
outputs=latest_eval_display
)
compare_btn.click(
fn=compare_models,
inputs=num_models_compare,
outputs=comparison_display
)
# Load history on startup
demo.load(
fn=load_evaluation_history,
outputs=eval_history_display
)
# Tab 7: RunPod Cloud Deployment
with gr.Tab("☁️ RunPod"):
gr.Markdown("""
# Cloud GPU Deployment with RunPod
Deploy and train on powerful cloud GPUs without any manual setup!
**One-click deployment to RunPod cloud GPUs - fully automated.**
## 🎯 What This Does
This tab lets you:
1. **Create cloud GPU pods** - Get a powerful GPU in the cloud
2. **Auto-deploy your code** - Your AURA app runs on the cloud GPU
3. **Train remotely** - Use the same GUI, but on cloud hardware
## ⚑ Quick Start
**Before you start**, make sure you have:
- βœ… Added RunPod API key in **βš™οΈ Settings** tab
- βœ… Prepared training data in **πŸ“š Training Data** tab
Then:
1. Configure pod settings below (defaults are good)
2. Click "πŸš€ Create & Deploy Pod"
3. Wait ~5 minutes for setup
4. Access the cloud GUI via the provided URL
5. Train using the cloud GPU!
""")
with gr.Row():
with gr.Column():
gr.Markdown("#### πŸ”§ Pod Configuration")
runpod_pod_name = gr.Textbox(
label="Pod Name",
value="aura-training-pod",
placeholder="my-training-pod",
info="Name for your RunPod instance"
)
runpod_gpu_type = gr.Dropdown(
choices=[
"NVIDIA GeForce RTX 4090",
"NVIDIA GeForce RTX 3090",
"NVIDIA A100 40GB PCIe",
"NVIDIA A100 80GB PCIe",
"NVIDIA H100 80GB HBM3",
],
value="NVIDIA GeForce RTX 4090",
label="GPU Type",
info="RTX 4090 recommended for best value"
)
runpod_storage = gr.Slider(
minimum=20,
maximum=200,
value=50,
step=10,
label="Storage (GB)",
info="Disk space for models and data"
)
runpod_sync_data = gr.Checkbox(
label="Sync Training Data",
value=True,
info="Upload your local training data to the pod"
)
gr.Markdown("---")
with gr.Row():
runpod_create_btn = gr.Button("πŸš€ Create & Deploy Pod", variant="primary", size="lg")
runpod_refresh_btn = gr.Button("πŸ”„ Refresh Pods", variant="secondary")
runpod_status = gr.Markdown()
gr.Markdown("---")
gr.Markdown("#### πŸ“‹ Your Pods")
runpod_pods_list = gr.Textbox(
label="Active Pods",
lines=10,
max_lines=15,
interactive=False,
placeholder="Click 'Refresh Pods' to see your RunPod instances...",
show_copy_button=True
)
gr.Markdown("---")
gr.Markdown("#### πŸ”§ Pod Management")
runpod_pod_id = gr.Textbox(
label="Pod ID",
placeholder="Enter Pod ID for management operations",
info="Get from 'Your Pods' list above"
)
with gr.Row():
runpod_stop_btn = gr.Button("⏸️ Stop Pod", variant="secondary", size="sm")
runpod_terminate_btn = gr.Button("πŸ—‘οΈ Terminate Pod", variant="stop", size="sm")
runpod_mgmt_status = gr.Markdown()
with gr.Column():
gr.Markdown("#### πŸ“Š Deployment Log")
runpod_log = gr.Textbox(
label="Progress & Status",
lines=15,
max_lines=20,
interactive=False,
show_copy_button=True,
placeholder="Deployment progress will appear here..."
)
gr.Markdown("---")
gr.Markdown("#### πŸ”— Connection Info")
runpod_connection_info = gr.Markdown("Select a pod and click 'Get Connection Info'")
runpod_connect_btn = gr.Button("πŸ”— Get Connection Info", variant="secondary")
gr.Markdown("---")
gr.Markdown("""
**πŸ’° Cost Estimates:**
- RTX 4090: ~$0.69/hour (Best Value)
- RTX 3090: ~$0.44/hour (Budget)
- A100 40GB: ~$1.39/hour (Production)
**πŸ“ What Gets Deployed:**
- βœ… All your code files
- βœ… Your training data (if "Sync Data" checked)
- βœ… Python dependencies (auto-installed)
- βœ… Same GUI you're using now
**After Deployment:**
- Access GUI in browser (URL provided below)
- Train on the cloud GPU (same interface)
- Stop pod when done to save money
- Your trained models stay on the pod
**πŸ”‘ Get RunPod API Key:**
1. Go to https://www.runpod.io/console/user/settings
2. Click "API Keys" β†’ "Create API Key"
3. Copy the key
4. Add to **βš™οΈ Settings** tab above
5. Come back here and create a pod!
""")
# Wire up RunPod functions
runpod_create_btn.click(
fn=create_runpod_pod,
inputs=[runpod_pod_name, runpod_gpu_type, runpod_storage, runpod_sync_data],
outputs=[runpod_log, runpod_status]
)
runpod_refresh_btn.click(
fn=list_runpod_pods,
outputs=runpod_pods_list
)
runpod_stop_btn.click(
fn=stop_runpod_pod,
inputs=runpod_pod_id,
outputs=runpod_mgmt_status
)
runpod_terminate_btn.click(
fn=terminate_runpod_pod,
inputs=runpod_pod_id,
outputs=runpod_mgmt_status
)
runpod_connect_btn.click(
fn=get_pod_connection_info,
inputs=runpod_pod_id,
outputs=runpod_connection_info
)
# Tab 4: Help
with gr.Tab("❓ Help"):
gr.Markdown("""
# Getting Started Guide
## Step 1: Prepare Training Data
1. Go to **Training Data** tab
2. Review the sample financial Q&A pairs
3. Add your own examples using the form
4. Aim for at least 50-100 high-quality examples
## Step 2: Configure Training
1. Go to **Training** tab
2. Adjust settings (defaults are good to start):
- LoRA Rank: 16 (higher = more capacity)
- Learning Rate: 1e-4 (standard for large models)
- Epochs: 1-3 (start with 1 to test)
3. Click **Start Training**
4. Wait 30-60 minutes per epoch
## Step 3: Test Your Model
1. Go to **Test Model** tab
2. Click **Load Trained Model**
3. Ask financial questions
4. Get expert advice!
## Requirements
- **GPU**: NVIDIA GPU with 18GB+ VRAM (RTX 3090, 4090, A100, etc.)
- **RAM**: 32GB+ system RAM recommended
- **Storage**: 30GB free space for model
- **Internet**: Fast connection for first-time model download
- **HuggingFace Token**: Required for model access (set in Settings tab)
## Troubleshooting
### Out of Memory (OOM)
- Reduce batch_size to 1
- Increase gradient_accumulation_steps
- Close other applications
### Slow Training
- This is normal for 30B model
- Each epoch takes 30-60 minutes
- Be patient!
### Import Errors
```bash
pip install torch transformers peft accelerate bitsandbytes gradio cryptography
```
### HuggingFace Authentication Error
- Go to **Settings** tab
- Add your HuggingFace token
- Click **Save All Keys**
- Get token at: https://huggingface.co/settings/tokens
### Model Not Learning
- Check learning rate (try 1e-4 to 5e-4)
- Verify training data quality
- Train for more epochs
## Advanced Tips
1. **More Data = Better Results**: Aim for 500+ examples
2. **Diverse Topics**: Cover various financial areas
3. **Quality > Quantity**: Accurate answers matter
4. **Monitor TensorBoard**: Track loss curves
5. **Test Regularly**: Evaluate after each epoch
## Support
- Check logs in Training tab
- Review error messages carefully
- Verify GPU has enough memory
- Ensure all dependencies installed
""")
gr.Markdown("""
---
πŸ’‘ **Pro Tip**: Start with the sample data and 1 epoch to test everything works, then add more data and train for 3+ epochs.
""")
if __name__ == "__main__":
import argparse
import os
# Parse command-line arguments for cloud deployment compatibility
parser = argparse.ArgumentParser(description="Launch AURA AI Training Studio")
parser.add_argument("--server_name", type=str, default="0.0.0.0", help="Server host")
parser.add_argument("--server_port", type=int, default=int(os.getenv("PORT", 7860)), help="Server port")
parser.add_argument("--share", action="store_true", help="Create public share link")
args = parser.parse_args()
print("=" * 70)
print("Financial Advisor AI Training Studio")
print("=" * 70)
print("\nStarting GUI server...")
print(f"Server: http://{args.server_name}:{args.server_port}")
if args.share:
print("Share link will be generated...")
print("\nPress Ctrl+C to stop the server")
print("=" * 70)
demo.launch(
server_name=args.server_name,
server_port=args.server_port,
share=args.share,
show_error=True
)