""" Financial Advisor Training GUI A simple, user-friendly interface for training and testing your financial advisor AI. No coding required - just click buttons and see results! """ import gradio as gr import json import threading from pathlib import Path from datetime import datetime import time import os # Import training components from fine_tuning import LoRATrainer, LoRAConfig from data_aggregation import DatasetBuilder, JSONDataCollector from data_aggregation.synthetic_generator import SyntheticDataGenerator from data_aggregation.quality_validator import QualityValidator from data_aggregation.hf_dataset_loader import HuggingFaceDatasetLoader from secure_config import SecureConfig from model_registry import get_registry from ollama_integration import OllamaClient, test_financial_advisor_ollama from runpod_manager import RunPodManager, DeploymentConfig, TrainingConfig from runpod_client import PodInfo # Global variables to track training state training_status = { "is_training": False, "current_epoch": 0, "total_epochs": 0, "loss": 0.0, "progress": 0.0, "logs": [] } trainer_instance = None secure_config = SecureConfig() model_registry = get_registry() selected_model_id = "qwen2.5-32b" # Default model # RunPod state runpod_manager = None current_pod_info = None deployment_in_progress = False # API Key Management Functions def save_api_keys(hf_token, openai_key, anthropic_key, wandb_key, runpod_key, custom_keys_json): """Save all API keys securely""" try: api_keys = { "HUGGINGFACE_TOKEN": hf_token or "", "HF_TOKEN": hf_token or "", # Alternative name "OPENAI_API_KEY": openai_key or "", "ANTHROPIC_API_KEY": anthropic_key or "", "WANDB_API_KEY": wandb_key or "", "RUNPOD_API_KEY": runpod_key or "", } # Parse custom keys if provided if custom_keys_json and custom_keys_json.strip(): try: custom_keys = json.loads(custom_keys_json) api_keys.update(custom_keys) except json.JSONDecodeError: return "⚠️ Invalid JSON in custom keys. Other keys saved successfully." # Save securely secure_config.save_keys(api_keys) # Count non-empty keys saved_count = sum(1 for v in api_keys.values() if v and v.strip()) return f"βœ… Successfully saved {saved_count} API keys securely!\n\nKeys are encrypted and stored in .secrets/ directory." except Exception as e: return f"❌ Error saving keys: {e}" def load_api_keys(): """Load API keys and return masked versions for display""" try: keys = secure_config.load_keys() hf_masked = secure_config.get_masked_key("HUGGINGFACE_TOKEN") or "" openai_masked = secure_config.get_masked_key("OPENAI_API_KEY") or "" anthropic_masked = secure_config.get_masked_key("ANTHROPIC_API_KEY") or "" wandb_masked = secure_config.get_masked_key("WANDB_API_KEY") or "" runpod_masked = secure_config.get_masked_key("RUNPOD_API_KEY") or "" # Show custom keys standard_keys = {"HUGGINGFACE_TOKEN", "HF_TOKEN", "OPENAI_API_KEY", "ANTHROPIC_API_KEY", "WANDB_API_KEY", "RUNPOD_API_KEY"} custom_keys = {k: secure_config.get_masked_key(k) for k in keys.keys() if k not in standard_keys} custom_json = json.dumps(custom_keys, indent=2) if custom_keys else "" status = f"πŸ“Š **Loaded {len(keys)} API keys**\n\n" if keys: status += "Keys are encrypted and loaded into environment.\n" status += "Masked keys shown for security." else: status += "⚠️ No API keys found. Please add your keys below." return hf_masked, openai_masked, anthropic_masked, wandb_masked, runpod_masked, custom_json, status except Exception as e: return "", "", "", "", "", "", f"❌ Error loading keys: {e}" def clear_api_keys(): """Clear all stored API keys""" try: secure_config.delete_keys() return "βœ… All API keys cleared successfully!", "", "", "", "", "", "" except Exception as e: return f"❌ Error clearing keys: {e}", "", "", "", "", "", "" def check_required_keys(): """Check if required API keys are set""" hf_key = secure_config.get_key("HUGGINGFACE_TOKEN") if not hf_key: return False, "⚠️ HuggingFace token required! Set it in the Settings tab." return True, "βœ… Required API keys are set" # Synthetic Data Generation Functions def generate_synthetic_data(api_provider, num_examples, difficulty, use_scenarios, selected_topics): """Generate synthetic training data""" try: # Check if API key exists if api_provider == "openai": api_key = secure_config.get_key("OPENAI_API_KEY") if not api_key: return "❌ OpenAI API key not found! Add it in the Settings tab.", "" elif api_provider == "anthropic": api_key = secure_config.get_key("ANTHROPIC_API_KEY") if not api_key: return "❌ Anthropic API key not found! Add it in the Settings tab.", "" else: return "❌ Invalid API provider", "" log = f"πŸ€– **SYNTHETIC DATA GENERATION**\n\n" log += f"Provider: {api_provider.upper()}\n" log += f"Examples: {num_examples}\n" log += f"Difficulty: {difficulty}\n" log += f"Scenarios: {use_scenarios}\n\n" # Initialize generator generator = SyntheticDataGenerator(api_provider=api_provider) # Parse selected topics topics_list = None if selected_topics and selected_topics.strip(): topics_list = [t.strip() for t in selected_topics.split(",")] log += f"Custom topics: {topics_list}\n\n" log += "πŸš€ Starting generation...\n\n" # Generate data if use_scenarios: generated_data = generator.generate_with_scenarios(num_examples=num_examples) else: generated_data = generator.generate_examples( num_examples=num_examples, topics=topics_list, difficulty=difficulty ) if not generated_data: return log + "\n❌ No data generated. Check API keys and try again.", "" log += f"\nβœ… Generated {len(generated_data)} raw examples!\n\n" # Quality validation log += "πŸ” **QUALITY VALIDATION**\n\n" validator = QualityValidator() validation_results = validator.validate_batch(generated_data) log += f"Valid: {validation_results['valid']}/{validation_results['total']} " log += f"({validation_results['valid']/validation_results['total']*100:.1f}%)\n" if validation_results['invalid'] > 0: log += f"⚠️ Filtered out {validation_results['invalid']} low-quality examples\n\n" log += "**Common Issues:**\n" from collections import Counter issue_counter = Counter(validation_results['issues']) for issue, count in issue_counter.most_common(3): log += f" - {issue}: {count}x\n" log += "\n" else: log += "βœ… All examples passed quality checks!\n\n" # Check for duplicates if validation_results.get('duplicates'): log += f"⚠️ Found {len(validation_results['duplicates'])} duplicate questions (removed)\n\n" # Use only valid examples valid_data = validation_results['valid_examples'] if not valid_data: return log + "\n❌ No valid data after quality filtering. Try again with different settings.", "" log += f"πŸ“Š **Final Count:** {len(valid_data)} high-quality examples\n\n" # Calculate average quality score avg_score = sum(validator.get_quality_score(ex) for ex in valid_data) / len(valid_data) log += f"⭐ **Average Quality Score:** {avg_score:.1f}/100\n\n" # Load existing data data_path = "data/sample_financial_advisor_data.json" if Path(data_path).exists(): with open(data_path, 'r', encoding='utf-8') as f: existing_data = json.load(f) else: existing_data = [] # Combine and save combined_data = existing_data + valid_data Path(data_path).parent.mkdir(parents=True, exist_ok=True) with open(data_path, 'w', encoding='utf-8') as f: json.dump(combined_data, f, indent=2, ensure_ascii=False) log += f"πŸ’Ύ Saved to training data!\n" log += f"Total training examples: {len(combined_data)}\n\n" # Show preview log += "**Sample Generated Q&A:**\n\n" for i, example in enumerate(valid_data[:3], 1): quality_score = validator.get_quality_score(example) log += f"{i}. [Quality: {quality_score:.0f}/100]\n" log += f" Q: {example['instruction']}\n" log += f" A: {example['output'][:150]}...\n\n" return log, f"βœ… Generated {len(valid_data)} high-quality examples! Total: {len(combined_data)}" except Exception as e: import traceback error_details = traceback.format_exc() return f"❌ Error generating data: {e}\n\n{error_details}", f"❌ Error: {e}" def get_available_topics(): """Get list of available financial topics""" topics = [ "Retirement Planning", "Investment Strategies", "Tax Planning", "Debt Management", "Emergency Funds", "Budgeting", "Insurance", "Estate Planning", "College Savings", "Real Estate", "Stock Market", "Bonds and Fixed Income", "Mutual Funds and ETFs", "Cryptocurrency", "Financial Independence", "Side Hustles", "Credit Scores", "Mortgages", "Small Business Finance", "Risk Management" ] return "\n".join(f"β€’ {topic}" for topic in topics) # HuggingFace Dataset Loading Functions def list_hf_datasets(): """List available HuggingFace datasets""" loader = HuggingFaceDatasetLoader() datasets = loader.list_available_datasets() output = "πŸ“¦ **AVAILABLE HUGGINGFACE DATASETS**\n\n" for ds in datasets: output += f"**{ds['name']}**\n" output += f" Path: {ds['path']}\n" output += f" Type: {ds['type']}\n" output += f" Description: {ds['description']}\n\n" return output def preview_hf_dataset(dataset_path): """Preview a HuggingFace dataset""" if not dataset_path or not dataset_path.strip(): return "⚠️ Please enter a dataset path (e.g., mitulshah/transaction-categorization)" try: loader = HuggingFaceDatasetLoader() preview = loader.preview_dataset(dataset_path, num_examples=3) return preview except Exception as e: return f"❌ Error previewing dataset: {e}\n\nMake sure:\n- Dataset path is correct\n- You're logged in to HuggingFace (run: huggingface-cli login)\n- Dataset is publicly accessible" def load_hf_dataset(dataset_path, dataset_name, max_examples, split): """Load a HuggingFace dataset and add to training data""" try: log = "πŸ“₯ **LOADING HUGGINGFACE DATASET**\n\n" # Check if using known dataset name or custom path if dataset_name and dataset_name != "Custom Path": log += f"Loading known dataset: {dataset_name}\n\n" loader = HuggingFaceDatasetLoader() dataset_data = loader.load_dataset_by_name( dataset_name, split=split, max_examples=int(max_examples) if max_examples else None ) elif dataset_path and dataset_path.strip(): log += f"Loading custom dataset: {dataset_path}\n\n" loader = HuggingFaceDatasetLoader() dataset_data = loader.load_dataset_by_path( dataset_path, dataset_type="auto", split=split, max_examples=int(max_examples) if max_examples else None ) else: return "❌ Please select a dataset or enter a custom path", "" if not dataset_data: return log + "\n❌ No data loaded. Check dataset path and try again.", "" log += f"βœ… Loaded {len(dataset_data)} examples from HuggingFace\n\n" # Quality validation log += "πŸ” **QUALITY VALIDATION**\n\n" validator = QualityValidator() validation_results = validator.validate_batch(dataset_data) log += f"Valid: {validation_results['valid']}/{validation_results['total']} " log += f"({validation_results['valid']/validation_results['total']*100:.1f}%)\n" if validation_results['invalid'] > 0: log += f"⚠️ Filtered out {validation_results['invalid']} low-quality examples\n" # Use only valid examples valid_data = validation_results['valid_examples'] if not valid_data: return log + "\n❌ No valid data after quality filtering.", "" log += f"\nπŸ“Š **Final Count:** {len(valid_data)} high-quality examples\n\n" # Calculate average quality score avg_score = sum(validator.get_quality_score(ex) for ex in valid_data) / len(valid_data) log += f"⭐ **Average Quality Score:** {avg_score:.1f}/100\n\n" # Load existing data data_path = "data/sample_financial_advisor_data.json" if Path(data_path).exists(): with open(data_path, 'r', encoding='utf-8') as f: existing_data = json.load(f) else: existing_data = [] # Combine and save combined_data = existing_data + valid_data Path(data_path).parent.mkdir(parents=True, exist_ok=True) with open(data_path, 'w', encoding='utf-8') as f: json.dump(combined_data, f, indent=2, ensure_ascii=False) log += f"πŸ’Ύ Added to training data!\n" log += f"Total training examples: {len(combined_data)}\n\n" # Show preview log += "**Sample Loaded Q&A:**\n\n" for i, example in enumerate(valid_data[:3], 1): quality_score = validator.get_quality_score(example) log += f"{i}. [Quality: {quality_score:.0f}/100]\n" log += f" Q: {example['instruction'][:100]}...\n" log += f" A: {example['output'][:150]}...\n\n" return log, f"βœ… Loaded {len(valid_data)} examples from HuggingFace! Total: {len(combined_data)}" except Exception as e: import traceback error_details = traceback.format_exc() return f"❌ Error loading dataset: {e}\n\n{error_details}", f"❌ Error: {e}" def load_training_data(): """Load and display current training data""" data_path = "data/sample_financial_advisor_data.json" if not Path(data_path).exists(): return "❌ No training data found!", 0, "", "❌ **Not Ready**: No training data found. Add examples above or use Synthetic Data/HuggingFace tabs." try: with open(data_path, 'r', encoding='utf-8') as f: data = json.load(f) # Format preview preview = f"πŸ“Š **Total Q&A Pairs:** {len(data)}\n\n" preview += "**Sample Questions:**\n" for i, item in enumerate(data[:3]): preview += f"\n{i+1}. {item['instruction']}\n" # Training readiness status if len(data) < 20: status = f"⚠️ **Warning**: Only {len(data)} examples. Recommended minimum: 50-100 for good results." elif len(data) < 50: status = f"βœ… **Ready**: {len(data)} examples loaded. Consider adding more for better results (recommended: 100+)." else: status = f"βœ… **Ready for Training**: {len(data)} examples loaded and ready!\n\n**Next Step**: Go to **βš™οΈ Training** tab and click 'Start Training'" return preview, len(data), json.dumps(data, indent=2), status except Exception as e: return f"❌ Error loading data: {e}", 0, "", "❌ Error loading training data" def add_training_example(question, answer, context=""): """Add a new training example""" data_path = "data/sample_financial_advisor_data.json" try: # Create new example new_example = { "instruction": question, "input": context, "output": answer } # Validate before adding validator = QualityValidator() is_valid, issues = validator.validate_example(new_example) if not is_valid: issues_text = "\n".join(f" - {issue}" for issue in issues) return f"⚠️ Quality issues found:\n{issues_text}\n\nExample was still added, but consider improving it.", 0 # Load existing data if Path(data_path).exists(): with open(data_path, 'r', encoding='utf-8') as f: data = json.load(f) else: data = [] data.append(new_example) # Save Path(data_path).parent.mkdir(parents=True, exist_ok=True) with open(data_path, 'w', encoding='utf-8') as f: json.dump(data, f, indent=2, ensure_ascii=False) quality_score = validator.get_quality_score(new_example) return f"βœ… Added! Quality Score: {quality_score:.0f}/100\nTotal examples: {len(data)}", len(data) except Exception as e: return f"❌ Error: {e}", 0 def validate_training_data(): """Validate all training data and return report (with auto-deduplication)""" data_path = "data/sample_financial_advisor_data.json" if not Path(data_path).exists(): return "❌ No training data found!" try: with open(data_path, 'r', encoding='utf-8') as f: data = json.load(f) original_count = len(data) validator = QualityValidator() # First, check for duplicates and auto-remove deduplicated_data, num_duplicates = validator.remove_duplicates(data) # Save deduplicated data if duplicates were found if num_duplicates > 0: # Create backup backup_path = data_path.replace('.json', '_backup.json') with open(backup_path, 'w', encoding='utf-8') as f: json.dump(data, f, indent=2, ensure_ascii=False) # Save deduplicated version with open(data_path, 'w', encoding='utf-8') as f: json.dump(deduplicated_data, f, indent=2, ensure_ascii=False) data = deduplicated_data # Now validate the deduplicated data validation_results = validator.validate_batch(data) # Generate report report = "=" * 60 + "\n" report += "QUALITY VALIDATION REPORT (WITH AUTO-DEDUPLICATION)\n" report += "=" * 60 + "\n\n" # Deduplication results if num_duplicates > 0: report += "🧹 AUTO-DEDUPLICATION COMPLETE!\n" report += "-" * 60 + "\n" report += f"Original Examples: {original_count}\n" report += f"Duplicates Removed: {num_duplicates}\n" report += f"Unique Examples: {len(data)}\n" report += f"Backup saved to: {backup_path}\n" report += "\n" + "=" * 60 + "\n\n" else: report += "βœ… NO DUPLICATES FOUND\n" report += "-" * 60 + "\n" report += f"All {len(data)} examples are unique!\n" report += "\n" + "=" * 60 + "\n\n" # Quality validation results report += f"Total Examples: {validation_results['total']}\n" report += f"Valid: {validation_results['valid']} ({validation_results['valid']/validation_results['total']*100:.1f}%)\n" report += f"Invalid: {validation_results['invalid']} ({validation_results['invalid']/validation_results['total']*100:.1f}%)\n\n" if validation_results['invalid'] > 0: report += "-" * 60 + "\n" report += "QUALITY ISSUES FOUND:\n" report += "-" * 60 + "\n" # Count issue types from collections import Counter issue_counter = Counter(validation_results['issues']) for issue, count in issue_counter.most_common(): report += f" - {issue}: {count} occurrences\n" report += "\n" # Add quality scores report += "\nπŸ“Š QUALITY SCORE DISTRIBUTION:\n" report += "-" * 60 + "\n" scores = [validator.get_quality_score(ex) for ex in data] avg_score = sum(scores) / len(scores) min_score = min(scores) max_score = max(scores) report += f"Average Score: {avg_score:.1f}/100\n" report += f"Range: {min_score:.0f} - {max_score:.0f}\n\n" # Score distribution excellent = sum(1 for s in scores if s >= 90) good = sum(1 for s in scores if 75 <= s < 90) fair = sum(1 for s in scores if 60 <= s < 75) poor = sum(1 for s in scores if s < 60) report += f"Excellent (90+): {excellent}\n" report += f"Good (75-89): {good}\n" report += f"Fair (60-74): {fair}\n" report += f"Poor (<60): {poor}\n\n" # Final status report += "=" * 60 + "\n" if validation_results['valid'] == validation_results['total'] and num_duplicates == 0: report += "βœ… PERFECT! All data is unique and high quality!\n" elif validation_results['valid'] == validation_results['total']: report += f"βœ… GOOD! All unique data passed quality checks!\n" elif validation_results['valid'] / validation_results['total'] >= 0.9: report += "βœ… GOOD QUALITY (90%+ valid)\n" elif validation_results['valid'] / validation_results['total'] >= 0.7: report += "⚠️ ACCEPTABLE QUALITY (70-90% valid)\n" else: report += "❌ POOR QUALITY (<70% valid)\n" report += "=" * 60 + "\n" return report except Exception as e: return f"❌ Error validating data: {e}" def remove_duplicates_from_data(): """Remove duplicate questions from training data""" data_path = "data/sample_financial_advisor_data.json" if not Path(data_path).exists(): return "❌ No training data found!", 0, "", "" try: # Load data with open(data_path, 'r', encoding='utf-8') as f: data = json.load(f) original_count = len(data) # Remove duplicates validator = QualityValidator() deduplicated_data, num_removed = validator.remove_duplicates(data) if num_removed == 0: return ( "βœ… No duplicates found! Your data is already clean.", len(deduplicated_data), json.dumps(deduplicated_data, indent=2), f"βœ… **Ready for Training**: {len(deduplicated_data)} unique examples!\n\n**Next Step**: Go to **βš™οΈ Training** tab" ) # Save deduplicated data with open(data_path, 'w', encoding='utf-8') as f: json.dump(deduplicated_data, f, indent=2, ensure_ascii=False) # Create backup of original backup_path = data_path.replace('.json', '_with_duplicates_backup.json') with open(backup_path, 'w', encoding='utf-8') as f: json.dump(data, f, indent=2, ensure_ascii=False) message = f"""βœ… **Deduplication Complete!** **Removed:** {num_removed} duplicate questions **Kept:** {len(deduplicated_data)} unique examples **Original:** {original_count} total examples **Backup saved to:** `{backup_path}` Your training data now contains only unique questions. This will improve model quality and prevent overfitting. """ # Prepare preview preview = f"πŸ“Š **Total Q&A Pairs:** {len(deduplicated_data)}\n\n" preview += "**Sample Questions:**\n" for i, item in enumerate(deduplicated_data[:3]): preview += f"\n{i+1}. {item['instruction']}\n" status = f"βœ… **Ready for Training**: {len(deduplicated_data)} unique examples loaded!\n\n**Next Step**: Go to **βš™οΈ Training** tab" return message, len(deduplicated_data), json.dumps(deduplicated_data, indent=2), status except Exception as e: return f"❌ Error removing duplicates: {str(e)}", 0, "", "" # Model Selection Functions def get_model_info(model_name): """Get detailed model information""" global selected_model_id # Get model ID from name model_id = model_registry.get_model_id_from_name(model_name) if not model_id: return "❌ Model not found" selected_model_id = model_id model = model_registry.get_model(model_id) info = f"# {model.name}\n\n" info += f"**Type:** {model.type.upper()}\n" info += f"**Path:** `{model.path}`\n" info += f"**Size:** {model.size}\n" info += f"**VRAM Required:** {model.vram_required}\n" info += f"**Context Length:** {model.context_length:,} tokens\n" info += f"**Recommended Quantization:** {model.quantization}\n" info += f"**Recommended LoRA Rank:** {model.lora_rank}\n\n" info += f"**Description:**\n{model.description}\n\n" info += f"**Tags:** {', '.join(model.tags)}\n\n" # Validate availability is_valid, message = model_registry.validate_model_selection(model_id) info += f"\n**Status:** {message}\n" return info def check_ollama_status(): """Check Ollama status and list installed models""" client = OllamaClient() status = "# Ollama Status\n\n" if client.is_available(): status += "βœ… **Ollama is running**\n\n" models = client.list_models() if models: status += f"**Installed Models ({len(models)}):**\n\n" for model in models: name = model.get("name", "unknown") size = model.get("size", 0) / (1024**3) # Convert to GB status += f"- `{name}` ({size:.1f}GB)\n" else: status += "⚠️ No models installed\n\n" status += "Install models with: `ollama pull `\n" else: status += "❌ **Ollama is not running**\n\n" status += "Start Ollama with:\n" status += "```bash\n" status += "ollama serve\n" status += "```\n\n" status += "Or download from: https://ollama.com\n" return status def start_cloud_training(lora_rank, learning_rate, num_epochs, batch_size, grad_accum): """Start cloud training on RunPod""" global training_status, runpod_manager, selected_model_id if training_status["is_training"]: return "⚠️ Training already in progress!", "" # Check RunPod API key runpod_key = secure_config.get_key("RUNPOD_API_KEY") if not runpod_key: return "❌ RunPod API key required for cloud training! Add it in Settings tab.", "❌ Missing RunPod API key" # Check HF key keys_ok, keys_msg = check_required_keys() if not keys_ok: return keys_msg, "❌ Missing API keys" try: training_status["is_training"] = True log = "☁️ **CLOUD TRAINING ON RUNPOD**\n\n" # Get selected model model = model_registry.get_model(selected_model_id) if not model: training_status["is_training"] = False return "❌ No model selected!", "" log += f"πŸ“‹ Configuration:\n" log += f"- Model: {model.name}\n" log += f"- LoRA Rank: {lora_rank}\n" log += f"- Epochs: {num_epochs}\n" log += f"- Mode: Cloud (RunPod)\n\n" # Load training data data_path = "data/sample_financial_advisor_data.json" if not Path(data_path).exists(): training_status["is_training"] = False return "❌ No training data found!", "" with open(data_path, 'r', encoding='utf-8') as f: data = json.load(f) log += f"βœ… Loaded {len(data)} training examples\n\n" # Initialize RunPod manager if not runpod_manager: runpod_manager = RunPodManager(runpod_key) log += "πŸš€ **STEP 1: Creating RunPod GPU Instance**\n" log += "⏳ Finding available GPU (RTX 4090 recommended)...\n\n" # Create pod config from runpod_manager import DeploymentConfig config = DeploymentConfig( pod_name=f"aura-training-{datetime.now().strftime('%Y%m%d-%H%M')}", gpu_type="NVIDIA GeForce RTX 4090", storage_gb=50, sync_data=True, auto_setup=True ) # Deploy pod pod_info = runpod_manager.one_click_deploy(config=config) log += f"βœ… Pod created: {pod_info.id}\n" log += f"πŸ“ GPU: {pod_info.gpu_type}\n" log += f"πŸ’° Cost: ${pod_info.cost_per_hr:.2f}/hour\n\n" log += "πŸš€ **STEP 2: Setting Up Training Environment**\n" log += "⏳ Installing dependencies on cloud GPU...\n\n" # Environment is auto-setup by one_click_deploy log += "βœ… Environment ready\n\n" log += "πŸš€ **STEP 3: Uploading Training Data**\n" log += f"⏳ Uploading {len(data)} examples to pod...\n\n" # Data already synced by one_click_deploy if sync_data=True log += "βœ… Data uploaded\n\n" log += "πŸš€ **STEP 4: Starting Training Job**\n" log += f"⏳ Training {model.name} with LoRA...\n" log += f"⏱️ Estimated time: {num_epochs * 30}-{num_epochs * 60} minutes\n\n" # Create training config from runpod_manager import TrainingConfig train_config = TrainingConfig( model_name=model.path if model.type != "ollama" else None, dataset_path="/workspace/data/sample_financial_advisor_data.json", output_dir="/workspace/models/financial_advisor", lora_rank=int(lora_rank), learning_rate=float(learning_rate), num_epochs=int(num_epochs), batch_size=int(batch_size), gradient_accumulation_steps=int(grad_accum) ) # Submit training job job_result = runpod_manager.submit_training_job(pod_info.id, train_config) log += "βœ… Training started!\n\n" log += "πŸ“Š **MONITORING TRAINING**\n" log += "⏳ Training in progress... (this will take a while)\n\n" # Note: In real implementation, we'd poll for completion # For now, return success and let user check manually log += f"πŸ”— **POD ACCESS**\n" log += f"SSH: ssh root@{pod_info.ip} -p {pod_info.ssh_port}\n" log += f"GUI: https://{pod_info.id}-7860.proxy.runpod.net\n\n" log += "⚠️ **IMPORTANT:**\n" log += "- Training is running on cloud GPU\n" log += f"- Costing ${pod_info.cost_per_hr:.2f}/hour\n" log += "- Go to RunPod tab to monitor or terminate\n" log += "- Model will be saved to pod storage\n" training_status["is_training"] = False return log, "βœ… Cloud training started!" except Exception as e: training_status["is_training"] = False import traceback error_details = traceback.format_exc() return f"❌ Error: {str(e)}\n\n{error_details}", f"❌ Error: {e}" def start_training(lora_rank, learning_rate, num_epochs, batch_size, grad_accum, training_mode): """Start the training process (local or cloud)""" global training_status, trainer_instance, selected_model_id # Route to cloud or local training if training_mode == "Cloud GPU (RunPod)": return start_cloud_training(lora_rank, learning_rate, num_epochs, batch_size, grad_accum) # Local training below if training_status["is_training"]: return "⚠️ Training already in progress!", "" # Check API keys first keys_ok, keys_msg = check_required_keys() if not keys_ok: return keys_msg, "❌ Missing API keys. Go to Settings tab." # Get selected model model = model_registry.get_model(selected_model_id) if not model: return "❌ No model selected!", "" # Validate model is_valid, message = model_registry.validate_model_selection(selected_model_id) if not is_valid: return f"❌ Model validation failed: {message}", "" # Get model path (convert Ollama to HF if needed) if model.type == "ollama": from ollama_integration import get_hf_model_for_ollama model_path = get_hf_model_for_ollama(model.path) if not model_path: return f"❌ Cannot train with Ollama model {model.path}. No HuggingFace equivalent found.", "" log_model_name = f"{model.name} (using HF: {model_path})" else: model_path = model.path log_model_name = model.name try: training_status["is_training"] = True training_status["current_epoch"] = 0 training_status["total_epochs"] = num_epochs training_status["logs"] = [] log = "πŸš€ **STARTING TRAINING**\n\n" log += f"βš™οΈ Configuration:\n" log += f"- Model: {log_model_name}\n" log += f"- LoRA Rank: {lora_rank}\n" log += f"- Learning Rate: {learning_rate}\n" log += f"- Epochs: {num_epochs}\n" log += f"- Batch Size: {batch_size}\n" log += f"- Gradient Accumulation: {grad_accum}\n\n" training_status["logs"].append(log) # Load data log += "πŸ“‚ Loading training data...\n" data_path = "data/sample_financial_advisor_data.json" if not Path(data_path).exists(): training_status["is_training"] = False return "❌ No training data found! Add some Q&A pairs first.", "" collector = JSONDataCollector() data = collector.collect(data_path) log += f"βœ… Loaded {len(data)} examples\n\n" training_status["logs"].append(log) # Split data log += "πŸ“Š Splitting dataset...\n" builder = DatasetBuilder() train_data, val_data, test_data = builder.train_test_split(data) log += f"βœ… Train: {len(train_data)}, Val: {len(val_data)}, Test: {len(test_data)}\n\n" training_status["logs"].append(log) # Configure LoRA log += "βš™οΈ Configuring LoRA...\n" lora_config = LoRAConfig( r=int(lora_rank), lora_alpha=int(lora_rank * 2), lora_dropout=0.05, target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"], bias="none", task_type="CAUSAL_LM" ) log += f"βœ… LoRA configured\n\n" training_status["logs"].append(log) # Initialize trainer log += "πŸ€– Initializing trainer...\n" trainer_instance = LoRATrainer( model_name=model_path, lora_config=lora_config, output_dir="models/financial_advisor" ) log += "βœ… Trainer ready\n\n" training_status["logs"].append(log) # Load model log += f"πŸ“₯ Loading {model.name} (this will take a few minutes)...\n" log += f"⚠️ First time will download {model.size}\n\n" training_status["logs"].append(log) trainer_instance.load_model(use_4bit=True) log += "βœ… Model loaded successfully!\n\n" training_status["logs"].append(log) # Show parameters params = trainer_instance.get_trainable_parameters() log += f"πŸ“Š **Parameter Efficiency:**\n" log += f"- Total: {params['total']:,}\n" log += f"- Trainable: {params['trainable']:,}\n" log += f"- Percentage: {params['percentage']:.2f}%\n\n" training_status["logs"].append(log) # Prepare datasets log += "πŸ”„ Preparing datasets...\n" train_dataset, val_dataset = trainer_instance.prepare_dataset(train_data, val_data) log += f"βœ… Datasets tokenized and ready\n\n" training_status["logs"].append(log) # Start training log += "🎯 **TRAINING STARTED**\n\n" log += f"This will take approximately {num_epochs * 30}-{num_epochs * 60} minutes\n" log += "You can monitor progress in TensorBoard:\n" log += "`tensorboard --logdir models/financial_advisor/logs`\n\n" training_status["logs"].append(log) # Train (this will take a while) history = trainer_instance.train( train_dataset=train_dataset, val_dataset=val_dataset, num_epochs=int(num_epochs), batch_size=int(batch_size), learning_rate=float(learning_rate), gradient_accumulation_steps=int(grad_accum) ) log += "\nβœ… **TRAINING COMPLETE!**\n\n" log += f"πŸ’Ύ Model saved to: models/financial_advisor/final_model\n" log += f"πŸ“Š Logs saved to: models/financial_advisor/logs\n\n" training_status["logs"].append(log) # Evaluate on test set if len(test_data) > 0: log += "πŸ” **EVALUATING ON TEST SET**\n\n" log += f"Running evaluation on {len(test_data)} test examples...\n" training_status["logs"].append(log) try: # Run evaluation (limit to 50 samples for speed) eval_results = trainer_instance.evaluate_on_test_set( test_data=test_data, model_name=log_model_name, dataset_info=f"Financial Advisor Training - {len(train_data)} train examples", num_samples=min(50, len(test_data)) ) log += "\nβœ… **EVALUATION COMPLETE**\n\n" log += "**Performance Metrics:**\n" metrics = eval_results['metrics'] log += f"- Average Response Length: {metrics['avg_response_length']:.1f} words\n" log += f"- Average Generation Time: {metrics['avg_generation_time']:.2f}s\n" log += f"- Throughput: {metrics['examples_per_second']:.2f} examples/sec\n\n" log += "πŸ“Š Evaluation report saved to: models/financial_advisor/evaluation_results/\n" training_status["logs"].append(log) except Exception as eval_error: log += f"\n⚠️ Evaluation error (training still succeeded): {eval_error}\n" training_status["logs"].append(log) training_status["is_training"] = False return "\n".join(training_status["logs"]), "βœ… Training & Evaluation Complete!" except Exception as e: training_status["is_training"] = False error_msg = f"\n\n❌ **ERROR:** {str(e)}\n\n" error_msg += "**Common fixes:**\n" error_msg += "- Out of memory: Reduce batch_size or increase grad_accum\n" error_msg += "- CUDA error: Make sure you have a GPU with 18GB+ VRAM\n" error_msg += "- Import error: Run `pip install bitsandbytes`\n" training_status["logs"].append(error_msg) return "\n".join(training_status["logs"]), f"❌ Error: {e}" def test_financial_advisor(question): """Test the trained model""" global trainer_instance if trainer_instance is None: return "❌ No model loaded. Train a model first or load an existing one." try: # Format as financial advisor prompt system_prompt = ( "You are an expert financial advisor with deep knowledge of personal finance, " "investments, retirement planning, tax strategies, and wealth management." ) prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n" prompt += f"<|im_start|>user\n{question}<|im_end|>\n" prompt += f"<|im_start|>assistant\n" # Tokenize inputs = trainer_instance.tokenizer(prompt, return_tensors="pt").to(trainer_instance.peft_model.device) # Generate outputs = trainer_instance.peft_model.generate( **inputs, max_new_tokens=512, temperature=0.7, top_p=0.9, do_sample=True, pad_token_id=trainer_instance.tokenizer.eos_token_id ) # Decode response = trainer_instance.tokenizer.decode(outputs[0], skip_special_tokens=False) # Extract assistant response if "<|im_start|>assistant" in response: response = response.split("<|im_start|>assistant")[-1].strip() response = response.replace("<|im_end|>", "").strip() return response except Exception as e: return f"❌ Error generating response: {e}" def load_existing_model(): """Load a previously trained model""" global trainer_instance, selected_model_id model_path = "models/financial_advisor/final_model" if not Path(model_path).exists(): return "❌ No trained model found at models/financial_advisor/final_model" try: # Get the model that was used for training model = model_registry.get_model(selected_model_id) if not model: return "❌ Model not found in registry" # Get actual model path if model.type == "ollama": from ollama_integration import get_hf_model_for_ollama base_model_path = get_hf_model_for_ollama(model.path) if not base_model_path: return f"❌ Cannot load Ollama model {model.path}" else: base_model_path = model.path # Initialize trainer lora_config = LoRAConfig(r=model.lora_rank, lora_alpha=model.lora_rank*2) trainer_instance = LoRATrainer( model_name=base_model_path, lora_config=lora_config, output_dir="models/financial_advisor" ) # Load base model trainer_instance.load_model(use_4bit=True) # Load adapter trainer_instance.load_adapter(model_path) return f"βœ… Model loaded successfully! ({model.name}) You can now test it." except Exception as e: return f"❌ Error loading model: {e}" # Model Comparison and Evaluation Functions def load_evaluation_history(): """Load evaluation history and display summary""" try: from evaluation.model_evaluator import ModelEvaluator evaluator = ModelEvaluator(output_dir="models/financial_advisor/evaluation_results") if not evaluator.history: return "❌ No evaluation history found. Train a model first to see evaluations." # Generate summary summary = f"πŸ“Š **EVALUATION HISTORY**\n\n" summary += f"Total Evaluations: {len(evaluator.history)}\n\n" summary += "=" * 70 + "\n\n" for i, eval_result in enumerate(reversed(evaluator.history[-10:]), 1): summary += f"**Evaluation {len(evaluator.history) - i + 1}:**\n" summary += f"- Model: {eval_result['model_name']}\n" summary += f"- Dataset: {eval_result['dataset_info']}\n" summary += f"- Timestamp: {eval_result['timestamp']}\n" summary += f"- Test Examples: {eval_result['num_test_examples']}\n" metrics = eval_result.get('metrics', {}) summary += f"- Avg Response Length: {metrics.get('avg_response_length', 0):.1f} words\n" summary += f"- Avg Generation Time: {metrics.get('avg_generation_time', 0):.2f}s\n" summary += f"- Throughput: {metrics.get('examples_per_second', 0):.2f} ex/s\n" summary += "\n" + "-" * 70 + "\n\n" return summary except Exception as e: return f"❌ Error loading evaluation history: {e}" def compare_models(num_models): """Compare recent model evaluations""" try: from evaluation.model_evaluator import ModelEvaluator evaluator = ModelEvaluator(output_dir="models/financial_advisor/evaluation_results") if not evaluator.history: return "❌ No evaluation history found. Train models first to compare." # Get comparison comparison = evaluator.compare_models(num_recent=int(num_models)) # Generate report report = evaluator.generate_comparison_report(comparison) return report except Exception as e: return f"❌ Error comparing models: {e}" def view_latest_evaluation(): """View the most recent evaluation in detail""" try: from evaluation.model_evaluator import ModelEvaluator evaluator = ModelEvaluator(output_dir="models/financial_advisor/evaluation_results") latest = evaluator.get_latest_evaluation() if not latest: return "❌ No evaluations found. Train a model first." # Generate detailed report report = evaluator.generate_report(latest) return report except Exception as e: return f"❌ Error viewing evaluation: {e}" # RunPod Functions def init_runpod_manager(): """Initialize RunPod manager with API key""" global runpod_manager if runpod_manager is not None: return "βœ… RunPod manager already initialized" api_key = secure_config.get_key("RUNPOD_API_KEY") if not api_key: return "❌ RunPod API key not found! Please add it in the Settings tab." try: runpod_manager = RunPodManager(api_key) return "βœ… RunPod manager initialized successfully!" except Exception as e: return f"❌ Error initializing RunPod manager: {e}" def list_runpod_pods(): """List all RunPod pods""" global runpod_manager if not runpod_manager: init_result = init_runpod_manager() if "❌" in init_result: return init_result try: pods = runpod_manager.list_pods() if not pods: return "No pods found. Create a new pod to get started!" output = f"πŸ“Š **YOUR RUNPOD PODS** ({len(pods)} total)\n\n" for pod in pods: output += f"**{pod.name}** (ID: {pod.id[:8]}...)\n" output += f" Status: {pod.status}\n" output += f" GPU: {pod.gpu_count}x {pod.gpu_type}\n" output += f" Cost: ${pod.cost_per_hr:.2f}/hr\n" if pod.status == "RUNNING": if pod.ip and pod.ssh_port: output += f" SSH: {pod.ip}:{pod.ssh_port}\n" if pod.port: output += f" GUI: https://{pod.id}-7860.proxy.runpod.net\n" output += "\n" return output except Exception as e: return f"❌ Error listing pods: {e}" def create_runpod_pod(pod_name, gpu_type, storage_gb, sync_data): """Create a new RunPod pod""" global runpod_manager, current_pod_info, deployment_in_progress if deployment_in_progress: return "⚠️ Deployment already in progress!", "" if not runpod_manager: init_result = init_runpod_manager() if "❌" in init_result: return init_result, "" deployment_in_progress = True log_output = [] def log_callback(msg): log_output.append(msg) try: config = DeploymentConfig( pod_name=pod_name or "aura-training-pod", gpu_type=gpu_type, storage_gb=int(storage_gb), sync_data=sync_data, auto_setup=True ) pod_info = runpod_manager.one_click_deploy( config=config, progress_callback=log_callback ) current_pod_info = pod_info deployment_in_progress = False final_log = "\n".join(log_output) status = f"βœ… Pod created successfully!\n" status += f"ID: {pod_info.id}\n" status += f"SSH: {pod_info.ip}:{pod_info.ssh_port}\n" status += f"GUI: https://{pod_info.id}-7860.proxy.runpod.net" return final_log, status except Exception as e: deployment_in_progress = False error_log = "\n".join(log_output) + f"\n\n❌ Error: {e}" return error_log, f"❌ Deployment failed: {e}" def stop_runpod_pod(pod_id): """Stop a running pod""" global runpod_manager if not runpod_manager: init_result = init_runpod_manager() if "❌" in init_result: return init_result try: success = runpod_manager.stop_pod(pod_id) if success: return f"βœ… Pod {pod_id} stopped successfully!" else: return f"❌ Failed to stop pod {pod_id}" except Exception as e: return f"❌ Error stopping pod: {e}" def terminate_runpod_pod(pod_id): """Terminate (delete) a pod""" global runpod_manager, current_pod_info if not runpod_manager: init_result = init_runpod_manager() if "❌" in init_result: return init_result try: success = runpod_manager.terminate_pod(pod_id) if success: if current_pod_info and current_pod_info.id == pod_id: current_pod_info = None return f"βœ… Pod {pod_id} terminated successfully!" else: return f"❌ Failed to terminate pod {pod_id}" except Exception as e: return f"❌ Error terminating pod: {e}" def get_pod_connection_info(pod_id): """Get connection info for a pod""" global runpod_manager if not runpod_manager: init_result = init_runpod_manager() if "❌" in init_result: return init_result try: pod = runpod_manager.get_pod(pod_id) if not pod: return f"❌ Pod {pod_id} not found" info = f"# Connection Info for {pod.name}\n\n" info += f"**Status:** {pod.status}\n\n" if pod.status == "RUNNING": info += "## SSH Connection\n" info += f"```bash\n" info += f"ssh root@{pod.ip} -p {pod.ssh_port}\n" info += f"```\n\n" info += "## GUI Access\n" info += f"Open in browser:\n" info += f"```\n" info += f"https://{pod.id}-7860.proxy.runpod.net\n" info += f"```\n\n" info += "## Details\n" info += f"- GPU: {pod.gpu_count}x {pod.gpu_type}\n" info += f"- Cost: ${pod.cost_per_hr:.2f}/hour\n" else: info += f"⚠️ Pod is not running (Status: {pod.status})\n" return info except Exception as e: return f"❌ Error getting pod info: {e}" # Create Gradio interface with custom CSS for scrolling custom_css = """ /* Make all containers properly scrollable */ .overflow-y-auto { overflow-y: auto !important; max-height: 600px !important; } /* Textbox scrolling */ .textbox-container textarea { max-height: 400px !important; overflow-y: auto !important; } /* Column scrolling for long content */ .gr-column { overflow-y: auto !important; max-height: 800px !important; } /* Markdown blocks in columns */ .gr-column .gr-markdown { overflow-y: auto !important; max-height: 500px !important; } /* Accordion-like sections */ .gr-box { overflow-y: auto !important; max-height: 600px !important; } /* Hide footer */ footer {visibility: hidden} """ with gr.Blocks(title="Financial Advisor AI Trainer", theme=gr.themes.Soft(), css=custom_css) as demo: gr.Markdown(""" # 🏦 Financial Advisor AI Training Studio Train your own expert financial advisor AI powered by Qwen 3 30B! No coding required - just configure, train, and test. """) with gr.Tabs(): # Tab 0: Settings & API Keys with gr.Tab("βš™οΈ Settings"): gr.Markdown(""" # API Key Management Store your API keys securely. Keys are encrypted and never stored in plain text. **You only need to enter these once!** """) with gr.Row(): with gr.Column(): gr.Markdown("### πŸ”‘ API Keys") gr.Markdown("#### HuggingFace Token (Required)") gr.Markdown("Get your token at: https://huggingface.co/settings/tokens") hf_token_input = gr.Textbox( label="HuggingFace Token", placeholder="hf_...", type="password", info="Required for downloading Qwen 3 model" ) gr.Markdown("---") gr.Markdown("#### Optional API Keys") openai_key_input = gr.Textbox( label="OpenAI API Key (Optional)", placeholder="sk-...", type="password", info="For testing against GPT models" ) anthropic_key_input = gr.Textbox( label="Anthropic API Key (Optional)", placeholder="sk-ant-...", type="password", info="For testing against Claude models" ) wandb_key_input = gr.Textbox( label="Weights & Biases API Key (Optional)", placeholder="...", type="password", info="For advanced experiment tracking" ) runpod_key_input = gr.Textbox( label="RunPod API Key (Optional)", placeholder="...", type="password", info="For cloud GPU deployment" ) gr.Markdown("---") gr.Markdown("#### Custom API Keys (Advanced)") custom_keys_input = gr.Code( label="Custom Keys (JSON format)", language="json", value='{\n "MY_API_KEY": "value",\n "OTHER_KEY": "value"\n}', lines=5 ) gr.Markdown("---") with gr.Row(): save_keys_btn = gr.Button("πŸ’Ύ Save All Keys", variant="primary", size="lg") load_keys_btn = gr.Button("πŸ”„ Load Saved Keys", variant="secondary") clear_keys_btn = gr.Button("πŸ—‘οΈ Clear All Keys", variant="stop") keys_status = gr.Markdown() with gr.Column(): gr.Markdown("### πŸ“‹ Security & Status") gr.Markdown(""" #### πŸ”’ Security Features - βœ… Keys are encrypted using Fernet encryption - βœ… Stored in `.secrets/` directory (auto-hidden) - βœ… Never logged or displayed in full - βœ… Loaded into environment variables automatically - βœ… Only you can decrypt with your machine #### πŸ“ Storage Location Keys are saved in: `.secrets/config.enc` ⚠️ **Backup Note**: If you reinstall or move the project, you'll need to re-enter your keys. """) gr.Markdown("---") gr.Markdown("### πŸ” Current Keys Status") keys_display = gr.Markdown("No keys loaded. Click 'Load Saved Keys' to check.") gr.Markdown("---") gr.Markdown(""" ### πŸ’‘ Tips **HuggingFace Token:** 1. Go to https://huggingface.co/settings/tokens 2. Create a new token (read access is enough) 3. Copy and paste it above 4. Click "Save All Keys" **Other Keys:** Only add if you plan to use those services. Training works with just HuggingFace token. """) # Wire up settings save_keys_btn.click( fn=save_api_keys, inputs=[hf_token_input, openai_key_input, anthropic_key_input, wandb_key_input, runpod_key_input, custom_keys_input], outputs=keys_status ) load_keys_btn.click( fn=load_api_keys, outputs=[hf_token_input, openai_key_input, anthropic_key_input, wandb_key_input, runpod_key_input, custom_keys_input, keys_display] ) clear_keys_btn.click( fn=clear_api_keys, outputs=[keys_status, hf_token_input, openai_key_input, anthropic_key_input, wandb_key_input, runpod_key_input, custom_keys_input] ) # Load keys on startup demo.load( fn=load_api_keys, outputs=[hf_token_input, openai_key_input, anthropic_key_input, wandb_key_input, runpod_key_input, custom_keys_input, keys_display] ) # Tab 1: Data Management with gr.Tab("πŸ“š Training Data"): gr.Markdown(""" ### Manage Your Training Data **Your training data is automatically used when you click "Start Training" in the Training tab.** """) with gr.Row(): with gr.Column(): gr.Markdown("#### Current Training Data") data_preview = gr.Markdown() data_count = gr.Number(label="Total Q&A Pairs", interactive=False) gr.Markdown("---") # Training readiness status training_ready_status = gr.Markdown("Click 'Refresh Data' to check training readiness") with gr.Row(): load_data_btn = gr.Button("πŸ”„ Refresh Data", variant="secondary") validate_data_btn = gr.Button("πŸ” Validate Quality", variant="secondary") remove_dupes_btn = gr.Button("🧹 Remove Duplicates", variant="secondary") gr.Markdown("---") gr.Markdown("#### Quality Report") validation_report = gr.Textbox( label="Data Quality Analysis", lines=10, max_lines=15, interactive=False, placeholder="Click 'Validate Quality' to analyze your training data...", show_copy_button=True ) gr.Markdown("---") gr.Markdown("#### Deduplication Status") dedup_status = gr.Markdown("Click 'Remove Duplicates' to clean your data") gr.Markdown("---") gr.Markdown("#### View Full Dataset") data_json = gr.Code(label="Full Dataset (JSON)", language="json", lines=15) with gr.Column(): gr.Markdown("#### Add New Training Example") new_question = gr.Textbox( label="Question", placeholder="What is the difference between a Roth IRA and Traditional IRA?", lines=3 ) new_context = gr.Textbox( label="Context (optional)", placeholder="Additional context for the question...", lines=2 ) new_answer = gr.Textbox( label="Expert Answer", placeholder="Provide a detailed, accurate financial answer...", lines=8 ) add_example_btn = gr.Button("βž• Add Example", variant="primary") add_status = gr.Markdown() # Wire up data management load_data_btn.click( fn=load_training_data, outputs=[data_preview, data_count, data_json, training_ready_status] ) validate_data_btn.click( fn=validate_training_data, outputs=validation_report ).then( fn=load_training_data, outputs=[data_preview, data_count, data_json, training_ready_status] ) remove_dupes_btn.click( fn=remove_duplicates_from_data, outputs=[dedup_status, data_count, data_json, training_ready_status] ).then( fn=load_training_data, outputs=[data_preview, data_count, data_json, training_ready_status] ) add_example_btn.click( fn=add_training_example, inputs=[new_question, new_answer, new_context], outputs=[add_status, data_count] ).then( fn=load_training_data, outputs=[data_preview, data_count, data_json, training_ready_status] ) # Load data on startup demo.load( fn=load_training_data, outputs=[data_preview, data_count, data_json, training_ready_status] ) # Tab 2: Synthetic Data Generation with gr.Tab("πŸ€– Synthetic Data"): gr.Markdown(""" # Generate Training Data Automatically Use GPT-4 or Claude to automatically generate high-quality financial Q&A pairs! **No manual writing required - just configure and generate.** """) with gr.Row(): with gr.Column(): gr.Markdown("#### πŸ”§ Generation Settings") syn_provider = gr.Radio( choices=["openai", "anthropic"], value="openai", label="API Provider", info="Choose which LLM to use for generation" ) syn_num_examples = gr.Slider( minimum=5, maximum=100, value=20, step=5, label="Number of Examples to Generate", info="Start with 20, then increase" ) syn_difficulty = gr.Radio( choices=["beginner", "intermediate", "advanced", "mixed"], value="mixed", label="Difficulty Level", info="Mixed creates diverse questions" ) syn_use_scenarios = gr.Checkbox( label="Use Realistic Scenarios", value=False, info="Generate questions with specific user contexts (age, income, etc.)" ) gr.Markdown("---") gr.Markdown("#### πŸ“‹ Available Topics") syn_topics_display = gr.Textbox( label="Default Topics (leave Custom Topics empty to use these)", value=get_available_topics(), lines=8, max_lines=12, interactive=False ) syn_custom_topics = gr.Textbox( label="Custom Topics (Optional)", placeholder="401k Planning, Crypto Trading, Home Buying (comma-separated)", lines=3, info="Leave empty to use all default topics" ) gr.Markdown("---") syn_generate_btn = gr.Button("πŸš€ Generate Data", variant="primary", size="lg") syn_status = gr.Markdown() with gr.Column(): gr.Markdown("#### πŸ“Š Generation Log") syn_log = gr.Textbox( label="Progress & Preview", lines=20, max_lines=25, interactive=False, show_copy_button=True ) gr.Markdown("---") gr.Markdown("#### πŸ’‘ Tips & Info") syn_tips = gr.Textbox( label="Important Information", value="""πŸ’‘ Tips: β€’ Make sure you've added your OpenAI or Anthropic API key in Settings tab β€’ Generated data is automatically added to your training dataset β€’ Each example costs ~$0.01-0.02 in API credits β€’ Quality is very high - often better than manual examples β€’ Start with 20 examples to test, then generate more β€’ Mix difficulties for best results β€’ Scenario mode creates more realistic, personalized questions ⚠️ API Key Required: This feature requires an OpenAI or Anthropic API key. Set it in the Settings tab before generating.""", lines=8, max_lines=12, interactive=False ) # Wire up synthetic data generation syn_generate_btn.click( fn=generate_synthetic_data, inputs=[syn_provider, syn_num_examples, syn_difficulty, syn_use_scenarios, syn_custom_topics], outputs=[syn_log, syn_status] ) # Tab 3: HuggingFace Datasets with gr.Tab("πŸ“¦ HuggingFace Datasets"): gr.Markdown(""" # Load Datasets from HuggingFace Import high-quality financial datasets directly from HuggingFace! **Includes transaction categorization, financial Q&A, and more.** """) with gr.Row(): with gr.Column(): gr.Markdown("#### πŸ“‹ Known Datasets") hf_datasets_list = gr.Textbox( label="Available Financial Datasets", value=list_hf_datasets(), lines=10, max_lines=15, interactive=False ) gr.Markdown("---") gr.Markdown("#### πŸ”§ Load Settings") hf_dataset_name = gr.Radio( choices=["financial-alpaca", "fingpt-finred", "finance-qa-10k", "Custom Path"], value="financial-alpaca", label="Select Dataset", info="Choose from publicly accessible datasets or use custom path" ) hf_custom_path = gr.Textbox( label="Custom Dataset Path (if 'Custom Path' selected)", placeholder="username/dataset-name", info="Full HuggingFace dataset path" ) hf_split = gr.Radio( choices=["train", "test", "validation"], value="train", label="Dataset Split", info="Which split to load" ) hf_max_examples = gr.Number( label="Max Examples to Load (optional - leave empty for all)", value=None, info="Limit number of examples (helps with large datasets)" ) gr.Markdown("---") with gr.Row(): hf_preview_btn = gr.Button("πŸ‘οΈ Preview Dataset", variant="secondary") hf_load_btn = gr.Button("πŸ“₯ Load Dataset", variant="primary", size="lg") hf_status = gr.Markdown() with gr.Column(): gr.Markdown("#### πŸ“Š Dataset Info & Logs") hf_preview = gr.Textbox( label="Dataset Preview", lines=10, max_lines=15, interactive=False, placeholder="Click 'Preview Dataset' to see sample data...", show_copy_button=True ) gr.Markdown("---") hf_log = gr.Textbox( label="Loading Log", lines=12, max_lines=18, interactive=False, placeholder="Loading progress will appear here...", show_copy_button=True ) gr.Markdown("---") gr.Markdown("#### πŸ’‘ Tips & Info") hf_tips = gr.Textbox( label="Important Information", value="""πŸ’‘ Available Datasets: β€’ financial-alpaca (52K examples) Pre-built financial Q&A in Alpaca format - publicly accessible β€’ fingpt-finred Financial relation extraction dataset - publicly accessible β€’ finance-qa-10k Q&A from 10-K SEC filings - publicly accessible πŸ’‘ Tips: β€’ Preview datasets before loading to understand structure β€’ Large datasets can be limited using Max Examples β€’ All data is automatically quality-validated before adding β€’ These datasets are PUBLIC and don't require special access πŸ”‘ Authentication: Your HuggingFace token is used automatically from Settings tab. Some private/gated datasets may require accepting terms on HuggingFace. πŸ“š Finding More Datasets: Browse: https://huggingface.co/datasets Search: "finance", "financial", "investment", "trading\"""", lines=12, max_lines=18, interactive=False ) # Wire up HuggingFace dataset loading hf_preview_btn.click( fn=preview_hf_dataset, inputs=hf_custom_path, outputs=hf_preview ) hf_load_btn.click( fn=load_hf_dataset, inputs=[hf_custom_path, hf_dataset_name, hf_max_examples, hf_split], outputs=[hf_log, hf_status] ) # Tab 4: Training Configuration with gr.Tab("βš™οΈ Training"): gr.Markdown("### Select Model and Configure Training") with gr.Row(): with gr.Column(): gr.Markdown("#### πŸ’» Training Mode") training_mode = gr.Radio( choices=["Local GPU", "Cloud GPU (RunPod)"], value="Cloud GPU (RunPod)", label="Where to Train", info="Local requires NVIDIA GPU. Cloud uses RunPod (pay per minute)." ) cloud_cost_estimate = gr.Markdown("**Estimated Cost:** Select model to see pricing") gr.Markdown("---") gr.Markdown("#### πŸ€– Model Selection") model_choices = model_registry.get_model_choices_for_gui() model_selector = gr.Dropdown( choices=model_choices, value=model_choices[0][1] if model_choices else None, # Default to first model ID label="Select Model", info="Choose which model to train" ) model_info_display = gr.Markdown() with gr.Row(): check_ollama_btn = gr.Button("πŸ” Check Ollama Status", variant="secondary", size="sm") ollama_status_display = gr.Markdown() gr.Markdown("---") gr.Markdown("#### Training Configuration") lora_rank = gr.Slider( minimum=4, maximum=64, value=16, step=4, label="LoRA Rank (Higher = More capacity, more memory)", info="Recommended: 16 for 30B model" ) learning_rate = gr.Slider( minimum=1e-5, maximum=5e-4, value=1e-4, step=1e-5, label="Learning Rate", info="Recommended: 1e-4 for large models" ) num_epochs = gr.Slider( minimum=1, maximum=10, value=3, step=1, label="Number of Epochs", info="Start with 1 epoch to test" ) batch_size = gr.Slider( minimum=1, maximum=4, value=1, step=1, label="Batch Size", info="Keep at 1 for 30B model" ) grad_accum = gr.Slider( minimum=1, maximum=32, value=16, step=1, label="Gradient Accumulation Steps", info="Effective batch = batch_size Γ— grad_accum" ) gr.Markdown("---") start_train_btn = gr.Button("πŸš€ Start Training", variant="primary", size="lg") training_status_text = gr.Markdown() with gr.Column(): gr.Markdown("#### Training Progress & Logs") training_log = gr.Textbox( label="Training Log", lines=20, max_lines=25, interactive=False, show_copy_button=True ) gr.Markdown(""" **πŸ’‘ Tips:** - First training will download ~16GB model - Monitor with TensorBoard: `tensorboard --logdir models/financial_advisor/logs` - Training 30B model takes 30-60 min per epoch - GPU needs ~18GB VRAM minimum """) # Wire up model selection model_selector.change( fn=get_model_info, inputs=model_selector, outputs=model_info_display ) check_ollama_btn.click( fn=check_ollama_status, outputs=ollama_status_display ) # Load default model info on startup demo.load( fn=get_model_info, inputs=model_selector, outputs=model_info_display ) # Wire up training start_train_btn.click( fn=start_training, inputs=[lora_rank, learning_rate, num_epochs, batch_size, grad_accum, training_mode], outputs=[training_log, training_status_text] ) # Tab 3: Testing with gr.Tab("πŸ§ͺ Test Model"): gr.Markdown("### Test Your Trained Financial Advisor") with gr.Row(): with gr.Column(): load_model_btn = gr.Button("πŸ“₯ Load Trained Model", variant="secondary") load_status = gr.Markdown() gr.Markdown("---") test_question = gr.Textbox( label="Ask Your Financial Advisor", placeholder="Should I pay off my student loans or invest in my 401k?", lines=4 ) test_btn = gr.Button("πŸ’¬ Get Advice", variant="primary", size="lg") gr.Markdown("#### Example Questions:") gr.Markdown(""" - What's the difference between a Roth IRA and Traditional IRA? - How much should I have in my emergency fund? - Should I invest in index funds or individual stocks? - What is dollar-cost averaging? - How do I start investing with only $100 per month? """) with gr.Column(): gr.Markdown("#### Financial Advisor Response") test_response = gr.Textbox( label="Response", lines=15, max_lines=20, interactive=False, show_copy_button=True ) # Wire up testing load_model_btn.click( fn=load_existing_model, outputs=load_status ) test_btn.click( fn=test_financial_advisor, inputs=test_question, outputs=test_response ) # Tab 6: Evaluation & Comparison with gr.Tab("πŸ“Š Evaluation"): gr.Markdown("### Model Evaluation & Comparison") with gr.Row(): with gr.Column(): gr.Markdown("#### πŸ“‹ Evaluation History") history_refresh_btn = gr.Button("πŸ”„ Refresh History", variant="secondary") eval_history_display = gr.Textbox( label="Recent Evaluations", lines=15, max_lines=20, interactive=False, show_copy_button=True, placeholder="Click 'Refresh History' to see evaluation history..." ) gr.Markdown("---") gr.Markdown("#### πŸ” Latest Evaluation Details") latest_eval_btn = gr.Button("πŸ“„ View Latest Evaluation", variant="secondary") latest_eval_display = gr.Textbox( label="Latest Evaluation Report", lines=15, max_lines=20, interactive=False, show_copy_button=True, placeholder="Click to view detailed evaluation report..." ) with gr.Column(): gr.Markdown("#### πŸ“ˆ Model Comparison") num_models_compare = gr.Slider( minimum=2, maximum=10, value=3, step=1, label="Number of Models to Compare", info="Compare recent model evaluations" ) compare_btn = gr.Button("βš–οΈ Compare Models", variant="primary", size="lg") comparison_display = gr.Textbox( label="Model Comparison Report", lines=20, max_lines=25, interactive=False, show_copy_button=True, placeholder="Click 'Compare Models' to see side-by-side comparison..." ) gr.Markdown("---") gr.Markdown(""" **πŸ’‘ Tips:** - Evaluations are run automatically after training - Compare metrics across different training runs - Use comparison to find the best model - Detailed reports saved in `models/financial_advisor/evaluation_results/` """) # Wire up evaluation functions history_refresh_btn.click( fn=load_evaluation_history, outputs=eval_history_display ) latest_eval_btn.click( fn=view_latest_evaluation, outputs=latest_eval_display ) compare_btn.click( fn=compare_models, inputs=num_models_compare, outputs=comparison_display ) # Load history on startup demo.load( fn=load_evaluation_history, outputs=eval_history_display ) # Tab 7: RunPod Cloud Deployment with gr.Tab("☁️ RunPod"): gr.Markdown(""" # Cloud GPU Deployment with RunPod Deploy and train on powerful cloud GPUs without any manual setup! **One-click deployment to RunPod cloud GPUs - fully automated.** ## 🎯 What This Does This tab lets you: 1. **Create cloud GPU pods** - Get a powerful GPU in the cloud 2. **Auto-deploy your code** - Your AURA app runs on the cloud GPU 3. **Train remotely** - Use the same GUI, but on cloud hardware ## ⚑ Quick Start **Before you start**, make sure you have: - βœ… Added RunPod API key in **βš™οΈ Settings** tab - βœ… Prepared training data in **πŸ“š Training Data** tab Then: 1. Configure pod settings below (defaults are good) 2. Click "πŸš€ Create & Deploy Pod" 3. Wait ~5 minutes for setup 4. Access the cloud GUI via the provided URL 5. Train using the cloud GPU! """) with gr.Row(): with gr.Column(): gr.Markdown("#### πŸ”§ Pod Configuration") runpod_pod_name = gr.Textbox( label="Pod Name", value="aura-training-pod", placeholder="my-training-pod", info="Name for your RunPod instance" ) runpod_gpu_type = gr.Dropdown( choices=[ "NVIDIA GeForce RTX 4090", "NVIDIA GeForce RTX 3090", "NVIDIA A100 40GB PCIe", "NVIDIA A100 80GB PCIe", "NVIDIA H100 80GB HBM3", ], value="NVIDIA GeForce RTX 4090", label="GPU Type", info="RTX 4090 recommended for best value" ) runpod_storage = gr.Slider( minimum=20, maximum=200, value=50, step=10, label="Storage (GB)", info="Disk space for models and data" ) runpod_sync_data = gr.Checkbox( label="Sync Training Data", value=True, info="Upload your local training data to the pod" ) gr.Markdown("---") with gr.Row(): runpod_create_btn = gr.Button("πŸš€ Create & Deploy Pod", variant="primary", size="lg") runpod_refresh_btn = gr.Button("πŸ”„ Refresh Pods", variant="secondary") runpod_status = gr.Markdown() gr.Markdown("---") gr.Markdown("#### πŸ“‹ Your Pods") runpod_pods_list = gr.Textbox( label="Active Pods", lines=10, max_lines=15, interactive=False, placeholder="Click 'Refresh Pods' to see your RunPod instances...", show_copy_button=True ) gr.Markdown("---") gr.Markdown("#### πŸ”§ Pod Management") runpod_pod_id = gr.Textbox( label="Pod ID", placeholder="Enter Pod ID for management operations", info="Get from 'Your Pods' list above" ) with gr.Row(): runpod_stop_btn = gr.Button("⏸️ Stop Pod", variant="secondary", size="sm") runpod_terminate_btn = gr.Button("πŸ—‘οΈ Terminate Pod", variant="stop", size="sm") runpod_mgmt_status = gr.Markdown() with gr.Column(): gr.Markdown("#### πŸ“Š Deployment Log") runpod_log = gr.Textbox( label="Progress & Status", lines=15, max_lines=20, interactive=False, show_copy_button=True, placeholder="Deployment progress will appear here..." ) gr.Markdown("---") gr.Markdown("#### πŸ”— Connection Info") runpod_connection_info = gr.Markdown("Select a pod and click 'Get Connection Info'") runpod_connect_btn = gr.Button("πŸ”— Get Connection Info", variant="secondary") gr.Markdown("---") gr.Markdown(""" **πŸ’° Cost Estimates:** - RTX 4090: ~$0.69/hour (Best Value) - RTX 3090: ~$0.44/hour (Budget) - A100 40GB: ~$1.39/hour (Production) **πŸ“ What Gets Deployed:** - βœ… All your code files - βœ… Your training data (if "Sync Data" checked) - βœ… Python dependencies (auto-installed) - βœ… Same GUI you're using now **After Deployment:** - Access GUI in browser (URL provided below) - Train on the cloud GPU (same interface) - Stop pod when done to save money - Your trained models stay on the pod **πŸ”‘ Get RunPod API Key:** 1. Go to https://www.runpod.io/console/user/settings 2. Click "API Keys" β†’ "Create API Key" 3. Copy the key 4. Add to **βš™οΈ Settings** tab above 5. Come back here and create a pod! """) # Wire up RunPod functions runpod_create_btn.click( fn=create_runpod_pod, inputs=[runpod_pod_name, runpod_gpu_type, runpod_storage, runpod_sync_data], outputs=[runpod_log, runpod_status] ) runpod_refresh_btn.click( fn=list_runpod_pods, outputs=runpod_pods_list ) runpod_stop_btn.click( fn=stop_runpod_pod, inputs=runpod_pod_id, outputs=runpod_mgmt_status ) runpod_terminate_btn.click( fn=terminate_runpod_pod, inputs=runpod_pod_id, outputs=runpod_mgmt_status ) runpod_connect_btn.click( fn=get_pod_connection_info, inputs=runpod_pod_id, outputs=runpod_connection_info ) # Tab 4: Help with gr.Tab("❓ Help"): gr.Markdown(""" # Getting Started Guide ## Step 1: Prepare Training Data 1. Go to **Training Data** tab 2. Review the sample financial Q&A pairs 3. Add your own examples using the form 4. Aim for at least 50-100 high-quality examples ## Step 2: Configure Training 1. Go to **Training** tab 2. Adjust settings (defaults are good to start): - LoRA Rank: 16 (higher = more capacity) - Learning Rate: 1e-4 (standard for large models) - Epochs: 1-3 (start with 1 to test) 3. Click **Start Training** 4. Wait 30-60 minutes per epoch ## Step 3: Test Your Model 1. Go to **Test Model** tab 2. Click **Load Trained Model** 3. Ask financial questions 4. Get expert advice! ## Requirements - **GPU**: NVIDIA GPU with 18GB+ VRAM (RTX 3090, 4090, A100, etc.) - **RAM**: 32GB+ system RAM recommended - **Storage**: 30GB free space for model - **Internet**: Fast connection for first-time model download - **HuggingFace Token**: Required for model access (set in Settings tab) ## Troubleshooting ### Out of Memory (OOM) - Reduce batch_size to 1 - Increase gradient_accumulation_steps - Close other applications ### Slow Training - This is normal for 30B model - Each epoch takes 30-60 minutes - Be patient! ### Import Errors ```bash pip install torch transformers peft accelerate bitsandbytes gradio cryptography ``` ### HuggingFace Authentication Error - Go to **Settings** tab - Add your HuggingFace token - Click **Save All Keys** - Get token at: https://huggingface.co/settings/tokens ### Model Not Learning - Check learning rate (try 1e-4 to 5e-4) - Verify training data quality - Train for more epochs ## Advanced Tips 1. **More Data = Better Results**: Aim for 500+ examples 2. **Diverse Topics**: Cover various financial areas 3. **Quality > Quantity**: Accurate answers matter 4. **Monitor TensorBoard**: Track loss curves 5. **Test Regularly**: Evaluate after each epoch ## Support - Check logs in Training tab - Review error messages carefully - Verify GPU has enough memory - Ensure all dependencies installed """) gr.Markdown(""" --- πŸ’‘ **Pro Tip**: Start with the sample data and 1 epoch to test everything works, then add more data and train for 3+ epochs. """) if __name__ == "__main__": import argparse import os # Parse command-line arguments for cloud deployment compatibility parser = argparse.ArgumentParser(description="Launch AURA AI Training Studio") parser.add_argument("--server_name", type=str, default="0.0.0.0", help="Server host") parser.add_argument("--server_port", type=int, default=int(os.getenv("PORT", 7860)), help="Server port") parser.add_argument("--share", action="store_true", help="Create public share link") args = parser.parse_args() print("=" * 70) print("Financial Advisor AI Training Studio") print("=" * 70) print("\nStarting GUI server...") print(f"Server: http://{args.server_name}:{args.server_port}") if args.share: print("Share link will be generated...") print("\nPress Ctrl+C to stop the server") print("=" * 70) demo.launch( server_name=args.server_name, server_port=args.server_port, share=args.share, show_error=True )