Spaces:
Runtime error
Runtime error
| """ | |
| Financial Advisor Training GUI | |
| A simple, user-friendly interface for training and testing your financial advisor AI. | |
| No coding required - just click buttons and see results! | |
| """ | |
| import gradio as gr | |
| import json | |
| import threading | |
| from pathlib import Path | |
| from datetime import datetime | |
| import time | |
| import os | |
| # Import training components | |
| from fine_tuning import LoRATrainer, LoRAConfig | |
| from data_aggregation import DatasetBuilder, JSONDataCollector | |
| from data_aggregation.synthetic_generator import SyntheticDataGenerator | |
| from data_aggregation.quality_validator import QualityValidator | |
| from data_aggregation.hf_dataset_loader import HuggingFaceDatasetLoader | |
| from secure_config import SecureConfig | |
| from model_registry import get_registry | |
| from ollama_integration import OllamaClient, test_financial_advisor_ollama | |
| from runpod_manager import RunPodManager, DeploymentConfig, TrainingConfig | |
| from runpod_client import PodInfo | |
| # Global variables to track training state | |
| training_status = { | |
| "is_training": False, | |
| "current_epoch": 0, | |
| "total_epochs": 0, | |
| "loss": 0.0, | |
| "progress": 0.0, | |
| "logs": [] | |
| } | |
| trainer_instance = None | |
| secure_config = SecureConfig() | |
| model_registry = get_registry() | |
| selected_model_id = "qwen2.5-32b" # Default model | |
| # RunPod state | |
| runpod_manager = None | |
| current_pod_info = None | |
| deployment_in_progress = False | |
| # API Key Management Functions | |
| def save_api_keys(hf_token, openai_key, anthropic_key, wandb_key, runpod_key, custom_keys_json): | |
| """Save all API keys securely""" | |
| try: | |
| api_keys = { | |
| "HUGGINGFACE_TOKEN": hf_token or "", | |
| "HF_TOKEN": hf_token or "", # Alternative name | |
| "OPENAI_API_KEY": openai_key or "", | |
| "ANTHROPIC_API_KEY": anthropic_key or "", | |
| "WANDB_API_KEY": wandb_key or "", | |
| "RUNPOD_API_KEY": runpod_key or "", | |
| } | |
| # Parse custom keys if provided | |
| if custom_keys_json and custom_keys_json.strip(): | |
| try: | |
| custom_keys = json.loads(custom_keys_json) | |
| api_keys.update(custom_keys) | |
| except json.JSONDecodeError: | |
| return "β οΈ Invalid JSON in custom keys. Other keys saved successfully." | |
| # Save securely | |
| secure_config.save_keys(api_keys) | |
| # Count non-empty keys | |
| saved_count = sum(1 for v in api_keys.values() if v and v.strip()) | |
| return f"β Successfully saved {saved_count} API keys securely!\n\nKeys are encrypted and stored in .secrets/ directory." | |
| except Exception as e: | |
| return f"β Error saving keys: {e}" | |
| def load_api_keys(): | |
| """Load API keys and return masked versions for display""" | |
| try: | |
| keys = secure_config.load_keys() | |
| hf_masked = secure_config.get_masked_key("HUGGINGFACE_TOKEN") or "" | |
| openai_masked = secure_config.get_masked_key("OPENAI_API_KEY") or "" | |
| anthropic_masked = secure_config.get_masked_key("ANTHROPIC_API_KEY") or "" | |
| wandb_masked = secure_config.get_masked_key("WANDB_API_KEY") or "" | |
| runpod_masked = secure_config.get_masked_key("RUNPOD_API_KEY") or "" | |
| # Show custom keys | |
| standard_keys = {"HUGGINGFACE_TOKEN", "HF_TOKEN", "OPENAI_API_KEY", "ANTHROPIC_API_KEY", "WANDB_API_KEY", "RUNPOD_API_KEY"} | |
| custom_keys = {k: secure_config.get_masked_key(k) for k in keys.keys() if k not in standard_keys} | |
| custom_json = json.dumps(custom_keys, indent=2) if custom_keys else "" | |
| status = f"π **Loaded {len(keys)} API keys**\n\n" | |
| if keys: | |
| status += "Keys are encrypted and loaded into environment.\n" | |
| status += "Masked keys shown for security." | |
| else: | |
| status += "β οΈ No API keys found. Please add your keys below." | |
| return hf_masked, openai_masked, anthropic_masked, wandb_masked, runpod_masked, custom_json, status | |
| except Exception as e: | |
| return "", "", "", "", "", "", f"β Error loading keys: {e}" | |
| def clear_api_keys(): | |
| """Clear all stored API keys""" | |
| try: | |
| secure_config.delete_keys() | |
| return "β All API keys cleared successfully!", "", "", "", "", "", "" | |
| except Exception as e: | |
| return f"β Error clearing keys: {e}", "", "", "", "", "", "" | |
| def check_required_keys(): | |
| """Check if required API keys are set""" | |
| hf_key = secure_config.get_key("HUGGINGFACE_TOKEN") | |
| if not hf_key: | |
| return False, "β οΈ HuggingFace token required! Set it in the Settings tab." | |
| return True, "β Required API keys are set" | |
| # Synthetic Data Generation Functions | |
| def generate_synthetic_data(api_provider, num_examples, difficulty, use_scenarios, selected_topics): | |
| """Generate synthetic training data""" | |
| try: | |
| # Check if API key exists | |
| if api_provider == "openai": | |
| api_key = secure_config.get_key("OPENAI_API_KEY") | |
| if not api_key: | |
| return "β OpenAI API key not found! Add it in the Settings tab.", "" | |
| elif api_provider == "anthropic": | |
| api_key = secure_config.get_key("ANTHROPIC_API_KEY") | |
| if not api_key: | |
| return "β Anthropic API key not found! Add it in the Settings tab.", "" | |
| else: | |
| return "β Invalid API provider", "" | |
| log = f"π€ **SYNTHETIC DATA GENERATION**\n\n" | |
| log += f"Provider: {api_provider.upper()}\n" | |
| log += f"Examples: {num_examples}\n" | |
| log += f"Difficulty: {difficulty}\n" | |
| log += f"Scenarios: {use_scenarios}\n\n" | |
| # Initialize generator | |
| generator = SyntheticDataGenerator(api_provider=api_provider) | |
| # Parse selected topics | |
| topics_list = None | |
| if selected_topics and selected_topics.strip(): | |
| topics_list = [t.strip() for t in selected_topics.split(",")] | |
| log += f"Custom topics: {topics_list}\n\n" | |
| log += "π Starting generation...\n\n" | |
| # Generate data | |
| if use_scenarios: | |
| generated_data = generator.generate_with_scenarios(num_examples=num_examples) | |
| else: | |
| generated_data = generator.generate_examples( | |
| num_examples=num_examples, | |
| topics=topics_list, | |
| difficulty=difficulty | |
| ) | |
| if not generated_data: | |
| return log + "\nβ No data generated. Check API keys and try again.", "" | |
| log += f"\nβ Generated {len(generated_data)} raw examples!\n\n" | |
| # Quality validation | |
| log += "π **QUALITY VALIDATION**\n\n" | |
| validator = QualityValidator() | |
| validation_results = validator.validate_batch(generated_data) | |
| log += f"Valid: {validation_results['valid']}/{validation_results['total']} " | |
| log += f"({validation_results['valid']/validation_results['total']*100:.1f}%)\n" | |
| if validation_results['invalid'] > 0: | |
| log += f"β οΈ Filtered out {validation_results['invalid']} low-quality examples\n\n" | |
| log += "**Common Issues:**\n" | |
| from collections import Counter | |
| issue_counter = Counter(validation_results['issues']) | |
| for issue, count in issue_counter.most_common(3): | |
| log += f" - {issue}: {count}x\n" | |
| log += "\n" | |
| else: | |
| log += "β All examples passed quality checks!\n\n" | |
| # Check for duplicates | |
| if validation_results.get('duplicates'): | |
| log += f"β οΈ Found {len(validation_results['duplicates'])} duplicate questions (removed)\n\n" | |
| # Use only valid examples | |
| valid_data = validation_results['valid_examples'] | |
| if not valid_data: | |
| return log + "\nβ No valid data after quality filtering. Try again with different settings.", "" | |
| log += f"π **Final Count:** {len(valid_data)} high-quality examples\n\n" | |
| # Calculate average quality score | |
| avg_score = sum(validator.get_quality_score(ex) for ex in valid_data) / len(valid_data) | |
| log += f"β **Average Quality Score:** {avg_score:.1f}/100\n\n" | |
| # Load existing data | |
| data_path = "data/sample_financial_advisor_data.json" | |
| if Path(data_path).exists(): | |
| with open(data_path, 'r', encoding='utf-8') as f: | |
| existing_data = json.load(f) | |
| else: | |
| existing_data = [] | |
| # Combine and save | |
| combined_data = existing_data + valid_data | |
| Path(data_path).parent.mkdir(parents=True, exist_ok=True) | |
| with open(data_path, 'w', encoding='utf-8') as f: | |
| json.dump(combined_data, f, indent=2, ensure_ascii=False) | |
| log += f"πΎ Saved to training data!\n" | |
| log += f"Total training examples: {len(combined_data)}\n\n" | |
| # Show preview | |
| log += "**Sample Generated Q&A:**\n\n" | |
| for i, example in enumerate(valid_data[:3], 1): | |
| quality_score = validator.get_quality_score(example) | |
| log += f"{i}. [Quality: {quality_score:.0f}/100]\n" | |
| log += f" Q: {example['instruction']}\n" | |
| log += f" A: {example['output'][:150]}...\n\n" | |
| return log, f"β Generated {len(valid_data)} high-quality examples! Total: {len(combined_data)}" | |
| except Exception as e: | |
| import traceback | |
| error_details = traceback.format_exc() | |
| return f"β Error generating data: {e}\n\n{error_details}", f"β Error: {e}" | |
| def get_available_topics(): | |
| """Get list of available financial topics""" | |
| topics = [ | |
| "Retirement Planning", | |
| "Investment Strategies", | |
| "Tax Planning", | |
| "Debt Management", | |
| "Emergency Funds", | |
| "Budgeting", | |
| "Insurance", | |
| "Estate Planning", | |
| "College Savings", | |
| "Real Estate", | |
| "Stock Market", | |
| "Bonds and Fixed Income", | |
| "Mutual Funds and ETFs", | |
| "Cryptocurrency", | |
| "Financial Independence", | |
| "Side Hustles", | |
| "Credit Scores", | |
| "Mortgages", | |
| "Small Business Finance", | |
| "Risk Management" | |
| ] | |
| return "\n".join(f"β’ {topic}" for topic in topics) | |
| # HuggingFace Dataset Loading Functions | |
| def list_hf_datasets(): | |
| """List available HuggingFace datasets""" | |
| loader = HuggingFaceDatasetLoader() | |
| datasets = loader.list_available_datasets() | |
| output = "π¦ **AVAILABLE HUGGINGFACE DATASETS**\n\n" | |
| for ds in datasets: | |
| output += f"**{ds['name']}**\n" | |
| output += f" Path: {ds['path']}\n" | |
| output += f" Type: {ds['type']}\n" | |
| output += f" Description: {ds['description']}\n\n" | |
| return output | |
| def preview_hf_dataset(dataset_path): | |
| """Preview a HuggingFace dataset""" | |
| if not dataset_path or not dataset_path.strip(): | |
| return "β οΈ Please enter a dataset path (e.g., mitulshah/transaction-categorization)" | |
| try: | |
| loader = HuggingFaceDatasetLoader() | |
| preview = loader.preview_dataset(dataset_path, num_examples=3) | |
| return preview | |
| except Exception as e: | |
| return f"β Error previewing dataset: {e}\n\nMake sure:\n- Dataset path is correct\n- You're logged in to HuggingFace (run: huggingface-cli login)\n- Dataset is publicly accessible" | |
| def load_hf_dataset(dataset_path, dataset_name, max_examples, split): | |
| """Load a HuggingFace dataset and add to training data""" | |
| try: | |
| log = "π₯ **LOADING HUGGINGFACE DATASET**\n\n" | |
| # Check if using known dataset name or custom path | |
| if dataset_name and dataset_name != "Custom Path": | |
| log += f"Loading known dataset: {dataset_name}\n\n" | |
| loader = HuggingFaceDatasetLoader() | |
| dataset_data = loader.load_dataset_by_name( | |
| dataset_name, | |
| split=split, | |
| max_examples=int(max_examples) if max_examples else None | |
| ) | |
| elif dataset_path and dataset_path.strip(): | |
| log += f"Loading custom dataset: {dataset_path}\n\n" | |
| loader = HuggingFaceDatasetLoader() | |
| dataset_data = loader.load_dataset_by_path( | |
| dataset_path, | |
| dataset_type="auto", | |
| split=split, | |
| max_examples=int(max_examples) if max_examples else None | |
| ) | |
| else: | |
| return "β Please select a dataset or enter a custom path", "" | |
| if not dataset_data: | |
| return log + "\nβ No data loaded. Check dataset path and try again.", "" | |
| log += f"β Loaded {len(dataset_data)} examples from HuggingFace\n\n" | |
| # Quality validation | |
| log += "π **QUALITY VALIDATION**\n\n" | |
| validator = QualityValidator() | |
| validation_results = validator.validate_batch(dataset_data) | |
| log += f"Valid: {validation_results['valid']}/{validation_results['total']} " | |
| log += f"({validation_results['valid']/validation_results['total']*100:.1f}%)\n" | |
| if validation_results['invalid'] > 0: | |
| log += f"β οΈ Filtered out {validation_results['invalid']} low-quality examples\n" | |
| # Use only valid examples | |
| valid_data = validation_results['valid_examples'] | |
| if not valid_data: | |
| return log + "\nβ No valid data after quality filtering.", "" | |
| log += f"\nπ **Final Count:** {len(valid_data)} high-quality examples\n\n" | |
| # Calculate average quality score | |
| avg_score = sum(validator.get_quality_score(ex) for ex in valid_data) / len(valid_data) | |
| log += f"β **Average Quality Score:** {avg_score:.1f}/100\n\n" | |
| # Load existing data | |
| data_path = "data/sample_financial_advisor_data.json" | |
| if Path(data_path).exists(): | |
| with open(data_path, 'r', encoding='utf-8') as f: | |
| existing_data = json.load(f) | |
| else: | |
| existing_data = [] | |
| # Combine and save | |
| combined_data = existing_data + valid_data | |
| Path(data_path).parent.mkdir(parents=True, exist_ok=True) | |
| with open(data_path, 'w', encoding='utf-8') as f: | |
| json.dump(combined_data, f, indent=2, ensure_ascii=False) | |
| log += f"πΎ Added to training data!\n" | |
| log += f"Total training examples: {len(combined_data)}\n\n" | |
| # Show preview | |
| log += "**Sample Loaded Q&A:**\n\n" | |
| for i, example in enumerate(valid_data[:3], 1): | |
| quality_score = validator.get_quality_score(example) | |
| log += f"{i}. [Quality: {quality_score:.0f}/100]\n" | |
| log += f" Q: {example['instruction'][:100]}...\n" | |
| log += f" A: {example['output'][:150]}...\n\n" | |
| return log, f"β Loaded {len(valid_data)} examples from HuggingFace! Total: {len(combined_data)}" | |
| except Exception as e: | |
| import traceback | |
| error_details = traceback.format_exc() | |
| return f"β Error loading dataset: {e}\n\n{error_details}", f"β Error: {e}" | |
| def load_training_data(): | |
| """Load and display current training data""" | |
| data_path = "data/sample_financial_advisor_data.json" | |
| if not Path(data_path).exists(): | |
| return "β No training data found!", 0, "", "β **Not Ready**: No training data found. Add examples above or use Synthetic Data/HuggingFace tabs." | |
| try: | |
| with open(data_path, 'r', encoding='utf-8') as f: | |
| data = json.load(f) | |
| # Format preview | |
| preview = f"π **Total Q&A Pairs:** {len(data)}\n\n" | |
| preview += "**Sample Questions:**\n" | |
| for i, item in enumerate(data[:3]): | |
| preview += f"\n{i+1}. {item['instruction']}\n" | |
| # Training readiness status | |
| if len(data) < 20: | |
| status = f"β οΈ **Warning**: Only {len(data)} examples. Recommended minimum: 50-100 for good results." | |
| elif len(data) < 50: | |
| status = f"β **Ready**: {len(data)} examples loaded. Consider adding more for better results (recommended: 100+)." | |
| else: | |
| status = f"β **Ready for Training**: {len(data)} examples loaded and ready!\n\n**Next Step**: Go to **βοΈ Training** tab and click 'Start Training'" | |
| return preview, len(data), json.dumps(data, indent=2), status | |
| except Exception as e: | |
| return f"β Error loading data: {e}", 0, "", "β Error loading training data" | |
| def add_training_example(question, answer, context=""): | |
| """Add a new training example""" | |
| data_path = "data/sample_financial_advisor_data.json" | |
| try: | |
| # Create new example | |
| new_example = { | |
| "instruction": question, | |
| "input": context, | |
| "output": answer | |
| } | |
| # Validate before adding | |
| validator = QualityValidator() | |
| is_valid, issues = validator.validate_example(new_example) | |
| if not is_valid: | |
| issues_text = "\n".join(f" - {issue}" for issue in issues) | |
| return f"β οΈ Quality issues found:\n{issues_text}\n\nExample was still added, but consider improving it.", 0 | |
| # Load existing data | |
| if Path(data_path).exists(): | |
| with open(data_path, 'r', encoding='utf-8') as f: | |
| data = json.load(f) | |
| else: | |
| data = [] | |
| data.append(new_example) | |
| # Save | |
| Path(data_path).parent.mkdir(parents=True, exist_ok=True) | |
| with open(data_path, 'w', encoding='utf-8') as f: | |
| json.dump(data, f, indent=2, ensure_ascii=False) | |
| quality_score = validator.get_quality_score(new_example) | |
| return f"β Added! Quality Score: {quality_score:.0f}/100\nTotal examples: {len(data)}", len(data) | |
| except Exception as e: | |
| return f"β Error: {e}", 0 | |
| def validate_training_data(): | |
| """Validate all training data and return report (with auto-deduplication)""" | |
| data_path = "data/sample_financial_advisor_data.json" | |
| if not Path(data_path).exists(): | |
| return "β No training data found!" | |
| try: | |
| with open(data_path, 'r', encoding='utf-8') as f: | |
| data = json.load(f) | |
| original_count = len(data) | |
| validator = QualityValidator() | |
| # First, check for duplicates and auto-remove | |
| deduplicated_data, num_duplicates = validator.remove_duplicates(data) | |
| # Save deduplicated data if duplicates were found | |
| if num_duplicates > 0: | |
| # Create backup | |
| backup_path = data_path.replace('.json', '_backup.json') | |
| with open(backup_path, 'w', encoding='utf-8') as f: | |
| json.dump(data, f, indent=2, ensure_ascii=False) | |
| # Save deduplicated version | |
| with open(data_path, 'w', encoding='utf-8') as f: | |
| json.dump(deduplicated_data, f, indent=2, ensure_ascii=False) | |
| data = deduplicated_data | |
| # Now validate the deduplicated data | |
| validation_results = validator.validate_batch(data) | |
| # Generate report | |
| report = "=" * 60 + "\n" | |
| report += "QUALITY VALIDATION REPORT (WITH AUTO-DEDUPLICATION)\n" | |
| report += "=" * 60 + "\n\n" | |
| # Deduplication results | |
| if num_duplicates > 0: | |
| report += "π§Ή AUTO-DEDUPLICATION COMPLETE!\n" | |
| report += "-" * 60 + "\n" | |
| report += f"Original Examples: {original_count}\n" | |
| report += f"Duplicates Removed: {num_duplicates}\n" | |
| report += f"Unique Examples: {len(data)}\n" | |
| report += f"Backup saved to: {backup_path}\n" | |
| report += "\n" + "=" * 60 + "\n\n" | |
| else: | |
| report += "β NO DUPLICATES FOUND\n" | |
| report += "-" * 60 + "\n" | |
| report += f"All {len(data)} examples are unique!\n" | |
| report += "\n" + "=" * 60 + "\n\n" | |
| # Quality validation results | |
| report += f"Total Examples: {validation_results['total']}\n" | |
| report += f"Valid: {validation_results['valid']} ({validation_results['valid']/validation_results['total']*100:.1f}%)\n" | |
| report += f"Invalid: {validation_results['invalid']} ({validation_results['invalid']/validation_results['total']*100:.1f}%)\n\n" | |
| if validation_results['invalid'] > 0: | |
| report += "-" * 60 + "\n" | |
| report += "QUALITY ISSUES FOUND:\n" | |
| report += "-" * 60 + "\n" | |
| # Count issue types | |
| from collections import Counter | |
| issue_counter = Counter(validation_results['issues']) | |
| for issue, count in issue_counter.most_common(): | |
| report += f" - {issue}: {count} occurrences\n" | |
| report += "\n" | |
| # Add quality scores | |
| report += "\nπ QUALITY SCORE DISTRIBUTION:\n" | |
| report += "-" * 60 + "\n" | |
| scores = [validator.get_quality_score(ex) for ex in data] | |
| avg_score = sum(scores) / len(scores) | |
| min_score = min(scores) | |
| max_score = max(scores) | |
| report += f"Average Score: {avg_score:.1f}/100\n" | |
| report += f"Range: {min_score:.0f} - {max_score:.0f}\n\n" | |
| # Score distribution | |
| excellent = sum(1 for s in scores if s >= 90) | |
| good = sum(1 for s in scores if 75 <= s < 90) | |
| fair = sum(1 for s in scores if 60 <= s < 75) | |
| poor = sum(1 for s in scores if s < 60) | |
| report += f"Excellent (90+): {excellent}\n" | |
| report += f"Good (75-89): {good}\n" | |
| report += f"Fair (60-74): {fair}\n" | |
| report += f"Poor (<60): {poor}\n\n" | |
| # Final status | |
| report += "=" * 60 + "\n" | |
| if validation_results['valid'] == validation_results['total'] and num_duplicates == 0: | |
| report += "β PERFECT! All data is unique and high quality!\n" | |
| elif validation_results['valid'] == validation_results['total']: | |
| report += f"β GOOD! All unique data passed quality checks!\n" | |
| elif validation_results['valid'] / validation_results['total'] >= 0.9: | |
| report += "β GOOD QUALITY (90%+ valid)\n" | |
| elif validation_results['valid'] / validation_results['total'] >= 0.7: | |
| report += "β οΈ ACCEPTABLE QUALITY (70-90% valid)\n" | |
| else: | |
| report += "β POOR QUALITY (<70% valid)\n" | |
| report += "=" * 60 + "\n" | |
| return report | |
| except Exception as e: | |
| return f"β Error validating data: {e}" | |
| def remove_duplicates_from_data(): | |
| """Remove duplicate questions from training data""" | |
| data_path = "data/sample_financial_advisor_data.json" | |
| if not Path(data_path).exists(): | |
| return "β No training data found!", 0, "", "" | |
| try: | |
| # Load data | |
| with open(data_path, 'r', encoding='utf-8') as f: | |
| data = json.load(f) | |
| original_count = len(data) | |
| # Remove duplicates | |
| validator = QualityValidator() | |
| deduplicated_data, num_removed = validator.remove_duplicates(data) | |
| if num_removed == 0: | |
| return ( | |
| "β No duplicates found! Your data is already clean.", | |
| len(deduplicated_data), | |
| json.dumps(deduplicated_data, indent=2), | |
| f"β **Ready for Training**: {len(deduplicated_data)} unique examples!\n\n**Next Step**: Go to **βοΈ Training** tab" | |
| ) | |
| # Save deduplicated data | |
| with open(data_path, 'w', encoding='utf-8') as f: | |
| json.dump(deduplicated_data, f, indent=2, ensure_ascii=False) | |
| # Create backup of original | |
| backup_path = data_path.replace('.json', '_with_duplicates_backup.json') | |
| with open(backup_path, 'w', encoding='utf-8') as f: | |
| json.dump(data, f, indent=2, ensure_ascii=False) | |
| message = f"""β **Deduplication Complete!** | |
| **Removed:** {num_removed} duplicate questions | |
| **Kept:** {len(deduplicated_data)} unique examples | |
| **Original:** {original_count} total examples | |
| **Backup saved to:** `{backup_path}` | |
| Your training data now contains only unique questions. This will improve model quality and prevent overfitting. | |
| """ | |
| # Prepare preview | |
| preview = f"π **Total Q&A Pairs:** {len(deduplicated_data)}\n\n" | |
| preview += "**Sample Questions:**\n" | |
| for i, item in enumerate(deduplicated_data[:3]): | |
| preview += f"\n{i+1}. {item['instruction']}\n" | |
| status = f"β **Ready for Training**: {len(deduplicated_data)} unique examples loaded!\n\n**Next Step**: Go to **βοΈ Training** tab" | |
| return message, len(deduplicated_data), json.dumps(deduplicated_data, indent=2), status | |
| except Exception as e: | |
| return f"β Error removing duplicates: {str(e)}", 0, "", "" | |
| # Model Selection Functions | |
| def get_model_info(model_name): | |
| """Get detailed model information""" | |
| global selected_model_id | |
| # Get model ID from name | |
| model_id = model_registry.get_model_id_from_name(model_name) | |
| if not model_id: | |
| return "β Model not found" | |
| selected_model_id = model_id | |
| model = model_registry.get_model(model_id) | |
| info = f"# {model.name}\n\n" | |
| info += f"**Type:** {model.type.upper()}\n" | |
| info += f"**Path:** `{model.path}`\n" | |
| info += f"**Size:** {model.size}\n" | |
| info += f"**VRAM Required:** {model.vram_required}\n" | |
| info += f"**Context Length:** {model.context_length:,} tokens\n" | |
| info += f"**Recommended Quantization:** {model.quantization}\n" | |
| info += f"**Recommended LoRA Rank:** {model.lora_rank}\n\n" | |
| info += f"**Description:**\n{model.description}\n\n" | |
| info += f"**Tags:** {', '.join(model.tags)}\n\n" | |
| # Validate availability | |
| is_valid, message = model_registry.validate_model_selection(model_id) | |
| info += f"\n**Status:** {message}\n" | |
| return info | |
| def check_ollama_status(): | |
| """Check Ollama status and list installed models""" | |
| client = OllamaClient() | |
| status = "# Ollama Status\n\n" | |
| if client.is_available(): | |
| status += "β **Ollama is running**\n\n" | |
| models = client.list_models() | |
| if models: | |
| status += f"**Installed Models ({len(models)}):**\n\n" | |
| for model in models: | |
| name = model.get("name", "unknown") | |
| size = model.get("size", 0) / (1024**3) # Convert to GB | |
| status += f"- `{name}` ({size:.1f}GB)\n" | |
| else: | |
| status += "β οΈ No models installed\n\n" | |
| status += "Install models with: `ollama pull <model>`\n" | |
| else: | |
| status += "β **Ollama is not running**\n\n" | |
| status += "Start Ollama with:\n" | |
| status += "```bash\n" | |
| status += "ollama serve\n" | |
| status += "```\n\n" | |
| status += "Or download from: https://ollama.com\n" | |
| return status | |
| def start_cloud_training(lora_rank, learning_rate, num_epochs, batch_size, grad_accum): | |
| """Start cloud training on RunPod""" | |
| global training_status, runpod_manager, selected_model_id | |
| if training_status["is_training"]: | |
| return "β οΈ Training already in progress!", "" | |
| # Check RunPod API key | |
| runpod_key = secure_config.get_key("RUNPOD_API_KEY") | |
| if not runpod_key: | |
| return "β RunPod API key required for cloud training! Add it in Settings tab.", "β Missing RunPod API key" | |
| # Check HF key | |
| keys_ok, keys_msg = check_required_keys() | |
| if not keys_ok: | |
| return keys_msg, "β Missing API keys" | |
| try: | |
| training_status["is_training"] = True | |
| log = "βοΈ **CLOUD TRAINING ON RUNPOD**\n\n" | |
| # Get selected model | |
| model = model_registry.get_model(selected_model_id) | |
| if not model: | |
| training_status["is_training"] = False | |
| return "β No model selected!", "" | |
| log += f"π Configuration:\n" | |
| log += f"- Model: {model.name}\n" | |
| log += f"- LoRA Rank: {lora_rank}\n" | |
| log += f"- Epochs: {num_epochs}\n" | |
| log += f"- Mode: Cloud (RunPod)\n\n" | |
| # Load training data | |
| data_path = "data/sample_financial_advisor_data.json" | |
| if not Path(data_path).exists(): | |
| training_status["is_training"] = False | |
| return "β No training data found!", "" | |
| with open(data_path, 'r', encoding='utf-8') as f: | |
| data = json.load(f) | |
| log += f"β Loaded {len(data)} training examples\n\n" | |
| # Initialize RunPod manager | |
| if not runpod_manager: | |
| runpod_manager = RunPodManager(runpod_key) | |
| log += "π **STEP 1: Creating RunPod GPU Instance**\n" | |
| log += "β³ Finding available GPU (RTX 4090 recommended)...\n\n" | |
| # Create pod config | |
| from runpod_manager import DeploymentConfig | |
| config = DeploymentConfig( | |
| pod_name=f"aura-training-{datetime.now().strftime('%Y%m%d-%H%M')}", | |
| gpu_type="NVIDIA GeForce RTX 4090", | |
| storage_gb=50, | |
| sync_data=True, | |
| auto_setup=True | |
| ) | |
| # Deploy pod | |
| pod_info = runpod_manager.one_click_deploy(config=config) | |
| log += f"β Pod created: {pod_info.id}\n" | |
| log += f"π GPU: {pod_info.gpu_type}\n" | |
| log += f"π° Cost: ${pod_info.cost_per_hr:.2f}/hour\n\n" | |
| log += "π **STEP 2: Setting Up Training Environment**\n" | |
| log += "β³ Installing dependencies on cloud GPU...\n\n" | |
| # Environment is auto-setup by one_click_deploy | |
| log += "β Environment ready\n\n" | |
| log += "π **STEP 3: Uploading Training Data**\n" | |
| log += f"β³ Uploading {len(data)} examples to pod...\n\n" | |
| # Data already synced by one_click_deploy if sync_data=True | |
| log += "β Data uploaded\n\n" | |
| log += "π **STEP 4: Starting Training Job**\n" | |
| log += f"β³ Training {model.name} with LoRA...\n" | |
| log += f"β±οΈ Estimated time: {num_epochs * 30}-{num_epochs * 60} minutes\n\n" | |
| # Create training config | |
| from runpod_manager import TrainingConfig | |
| train_config = TrainingConfig( | |
| model_name=model.path if model.type != "ollama" else None, | |
| dataset_path="/workspace/data/sample_financial_advisor_data.json", | |
| output_dir="/workspace/models/financial_advisor", | |
| lora_rank=int(lora_rank), | |
| learning_rate=float(learning_rate), | |
| num_epochs=int(num_epochs), | |
| batch_size=int(batch_size), | |
| gradient_accumulation_steps=int(grad_accum) | |
| ) | |
| # Submit training job | |
| job_result = runpod_manager.submit_training_job(pod_info.id, train_config) | |
| log += "β Training started!\n\n" | |
| log += "π **MONITORING TRAINING**\n" | |
| log += "β³ Training in progress... (this will take a while)\n\n" | |
| # Note: In real implementation, we'd poll for completion | |
| # For now, return success and let user check manually | |
| log += f"π **POD ACCESS**\n" | |
| log += f"SSH: ssh root@{pod_info.ip} -p {pod_info.ssh_port}\n" | |
| log += f"GUI: https://{pod_info.id}-7860.proxy.runpod.net\n\n" | |
| log += "β οΈ **IMPORTANT:**\n" | |
| log += "- Training is running on cloud GPU\n" | |
| log += f"- Costing ${pod_info.cost_per_hr:.2f}/hour\n" | |
| log += "- Go to RunPod tab to monitor or terminate\n" | |
| log += "- Model will be saved to pod storage\n" | |
| training_status["is_training"] = False | |
| return log, "β Cloud training started!" | |
| except Exception as e: | |
| training_status["is_training"] = False | |
| import traceback | |
| error_details = traceback.format_exc() | |
| return f"β Error: {str(e)}\n\n{error_details}", f"β Error: {e}" | |
| def start_training(lora_rank, learning_rate, num_epochs, batch_size, grad_accum, training_mode): | |
| """Start the training process (local or cloud)""" | |
| global training_status, trainer_instance, selected_model_id | |
| # Route to cloud or local training | |
| if training_mode == "Cloud GPU (RunPod)": | |
| return start_cloud_training(lora_rank, learning_rate, num_epochs, batch_size, grad_accum) | |
| # Local training below | |
| if training_status["is_training"]: | |
| return "β οΈ Training already in progress!", "" | |
| # Check API keys first | |
| keys_ok, keys_msg = check_required_keys() | |
| if not keys_ok: | |
| return keys_msg, "β Missing API keys. Go to Settings tab." | |
| # Get selected model | |
| model = model_registry.get_model(selected_model_id) | |
| if not model: | |
| return "β No model selected!", "" | |
| # Validate model | |
| is_valid, message = model_registry.validate_model_selection(selected_model_id) | |
| if not is_valid: | |
| return f"β Model validation failed: {message}", "" | |
| # Get model path (convert Ollama to HF if needed) | |
| if model.type == "ollama": | |
| from ollama_integration import get_hf_model_for_ollama | |
| model_path = get_hf_model_for_ollama(model.path) | |
| if not model_path: | |
| return f"β Cannot train with Ollama model {model.path}. No HuggingFace equivalent found.", "" | |
| log_model_name = f"{model.name} (using HF: {model_path})" | |
| else: | |
| model_path = model.path | |
| log_model_name = model.name | |
| try: | |
| training_status["is_training"] = True | |
| training_status["current_epoch"] = 0 | |
| training_status["total_epochs"] = num_epochs | |
| training_status["logs"] = [] | |
| log = "π **STARTING TRAINING**\n\n" | |
| log += f"βοΈ Configuration:\n" | |
| log += f"- Model: {log_model_name}\n" | |
| log += f"- LoRA Rank: {lora_rank}\n" | |
| log += f"- Learning Rate: {learning_rate}\n" | |
| log += f"- Epochs: {num_epochs}\n" | |
| log += f"- Batch Size: {batch_size}\n" | |
| log += f"- Gradient Accumulation: {grad_accum}\n\n" | |
| training_status["logs"].append(log) | |
| # Load data | |
| log += "π Loading training data...\n" | |
| data_path = "data/sample_financial_advisor_data.json" | |
| if not Path(data_path).exists(): | |
| training_status["is_training"] = False | |
| return "β No training data found! Add some Q&A pairs first.", "" | |
| collector = JSONDataCollector() | |
| data = collector.collect(data_path) | |
| log += f"β Loaded {len(data)} examples\n\n" | |
| training_status["logs"].append(log) | |
| # Split data | |
| log += "π Splitting dataset...\n" | |
| builder = DatasetBuilder() | |
| train_data, val_data, test_data = builder.train_test_split(data) | |
| log += f"β Train: {len(train_data)}, Val: {len(val_data)}, Test: {len(test_data)}\n\n" | |
| training_status["logs"].append(log) | |
| # Configure LoRA | |
| log += "βοΈ Configuring LoRA...\n" | |
| lora_config = LoRAConfig( | |
| r=int(lora_rank), | |
| lora_alpha=int(lora_rank * 2), | |
| lora_dropout=0.05, | |
| target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"], | |
| bias="none", | |
| task_type="CAUSAL_LM" | |
| ) | |
| log += f"β LoRA configured\n\n" | |
| training_status["logs"].append(log) | |
| # Initialize trainer | |
| log += "π€ Initializing trainer...\n" | |
| trainer_instance = LoRATrainer( | |
| model_name=model_path, | |
| lora_config=lora_config, | |
| output_dir="models/financial_advisor" | |
| ) | |
| log += "β Trainer ready\n\n" | |
| training_status["logs"].append(log) | |
| # Load model | |
| log += f"π₯ Loading {model.name} (this will take a few minutes)...\n" | |
| log += f"β οΈ First time will download {model.size}\n\n" | |
| training_status["logs"].append(log) | |
| trainer_instance.load_model(use_4bit=True) | |
| log += "β Model loaded successfully!\n\n" | |
| training_status["logs"].append(log) | |
| # Show parameters | |
| params = trainer_instance.get_trainable_parameters() | |
| log += f"π **Parameter Efficiency:**\n" | |
| log += f"- Total: {params['total']:,}\n" | |
| log += f"- Trainable: {params['trainable']:,}\n" | |
| log += f"- Percentage: {params['percentage']:.2f}%\n\n" | |
| training_status["logs"].append(log) | |
| # Prepare datasets | |
| log += "π Preparing datasets...\n" | |
| train_dataset, val_dataset = trainer_instance.prepare_dataset(train_data, val_data) | |
| log += f"β Datasets tokenized and ready\n\n" | |
| training_status["logs"].append(log) | |
| # Start training | |
| log += "π― **TRAINING STARTED**\n\n" | |
| log += f"This will take approximately {num_epochs * 30}-{num_epochs * 60} minutes\n" | |
| log += "You can monitor progress in TensorBoard:\n" | |
| log += "`tensorboard --logdir models/financial_advisor/logs`\n\n" | |
| training_status["logs"].append(log) | |
| # Train (this will take a while) | |
| history = trainer_instance.train( | |
| train_dataset=train_dataset, | |
| val_dataset=val_dataset, | |
| num_epochs=int(num_epochs), | |
| batch_size=int(batch_size), | |
| learning_rate=float(learning_rate), | |
| gradient_accumulation_steps=int(grad_accum) | |
| ) | |
| log += "\nβ **TRAINING COMPLETE!**\n\n" | |
| log += f"πΎ Model saved to: models/financial_advisor/final_model\n" | |
| log += f"π Logs saved to: models/financial_advisor/logs\n\n" | |
| training_status["logs"].append(log) | |
| # Evaluate on test set | |
| if len(test_data) > 0: | |
| log += "π **EVALUATING ON TEST SET**\n\n" | |
| log += f"Running evaluation on {len(test_data)} test examples...\n" | |
| training_status["logs"].append(log) | |
| try: | |
| # Run evaluation (limit to 50 samples for speed) | |
| eval_results = trainer_instance.evaluate_on_test_set( | |
| test_data=test_data, | |
| model_name=log_model_name, | |
| dataset_info=f"Financial Advisor Training - {len(train_data)} train examples", | |
| num_samples=min(50, len(test_data)) | |
| ) | |
| log += "\nβ **EVALUATION COMPLETE**\n\n" | |
| log += "**Performance Metrics:**\n" | |
| metrics = eval_results['metrics'] | |
| log += f"- Average Response Length: {metrics['avg_response_length']:.1f} words\n" | |
| log += f"- Average Generation Time: {metrics['avg_generation_time']:.2f}s\n" | |
| log += f"- Throughput: {metrics['examples_per_second']:.2f} examples/sec\n\n" | |
| log += "π Evaluation report saved to: models/financial_advisor/evaluation_results/\n" | |
| training_status["logs"].append(log) | |
| except Exception as eval_error: | |
| log += f"\nβ οΈ Evaluation error (training still succeeded): {eval_error}\n" | |
| training_status["logs"].append(log) | |
| training_status["is_training"] = False | |
| return "\n".join(training_status["logs"]), "β Training & Evaluation Complete!" | |
| except Exception as e: | |
| training_status["is_training"] = False | |
| error_msg = f"\n\nβ **ERROR:** {str(e)}\n\n" | |
| error_msg += "**Common fixes:**\n" | |
| error_msg += "- Out of memory: Reduce batch_size or increase grad_accum\n" | |
| error_msg += "- CUDA error: Make sure you have a GPU with 18GB+ VRAM\n" | |
| error_msg += "- Import error: Run `pip install bitsandbytes`\n" | |
| training_status["logs"].append(error_msg) | |
| return "\n".join(training_status["logs"]), f"β Error: {e}" | |
| def test_financial_advisor(question): | |
| """Test the trained model""" | |
| global trainer_instance | |
| if trainer_instance is None: | |
| return "β No model loaded. Train a model first or load an existing one." | |
| try: | |
| # Format as financial advisor prompt | |
| system_prompt = ( | |
| "You are an expert financial advisor with deep knowledge of personal finance, " | |
| "investments, retirement planning, tax strategies, and wealth management." | |
| ) | |
| prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n" | |
| prompt += f"<|im_start|>user\n{question}<|im_end|>\n" | |
| prompt += f"<|im_start|>assistant\n" | |
| # Tokenize | |
| inputs = trainer_instance.tokenizer(prompt, return_tensors="pt").to(trainer_instance.peft_model.device) | |
| # Generate | |
| outputs = trainer_instance.peft_model.generate( | |
| **inputs, | |
| max_new_tokens=512, | |
| temperature=0.7, | |
| top_p=0.9, | |
| do_sample=True, | |
| pad_token_id=trainer_instance.tokenizer.eos_token_id | |
| ) | |
| # Decode | |
| response = trainer_instance.tokenizer.decode(outputs[0], skip_special_tokens=False) | |
| # Extract assistant response | |
| if "<|im_start|>assistant" in response: | |
| response = response.split("<|im_start|>assistant")[-1].strip() | |
| response = response.replace("<|im_end|>", "").strip() | |
| return response | |
| except Exception as e: | |
| return f"β Error generating response: {e}" | |
| def load_existing_model(): | |
| """Load a previously trained model""" | |
| global trainer_instance, selected_model_id | |
| model_path = "models/financial_advisor/final_model" | |
| if not Path(model_path).exists(): | |
| return "β No trained model found at models/financial_advisor/final_model" | |
| try: | |
| # Get the model that was used for training | |
| model = model_registry.get_model(selected_model_id) | |
| if not model: | |
| return "β Model not found in registry" | |
| # Get actual model path | |
| if model.type == "ollama": | |
| from ollama_integration import get_hf_model_for_ollama | |
| base_model_path = get_hf_model_for_ollama(model.path) | |
| if not base_model_path: | |
| return f"β Cannot load Ollama model {model.path}" | |
| else: | |
| base_model_path = model.path | |
| # Initialize trainer | |
| lora_config = LoRAConfig(r=model.lora_rank, lora_alpha=model.lora_rank*2) | |
| trainer_instance = LoRATrainer( | |
| model_name=base_model_path, | |
| lora_config=lora_config, | |
| output_dir="models/financial_advisor" | |
| ) | |
| # Load base model | |
| trainer_instance.load_model(use_4bit=True) | |
| # Load adapter | |
| trainer_instance.load_adapter(model_path) | |
| return f"β Model loaded successfully! ({model.name}) You can now test it." | |
| except Exception as e: | |
| return f"β Error loading model: {e}" | |
| # Model Comparison and Evaluation Functions | |
| def load_evaluation_history(): | |
| """Load evaluation history and display summary""" | |
| try: | |
| from evaluation.model_evaluator import ModelEvaluator | |
| evaluator = ModelEvaluator(output_dir="models/financial_advisor/evaluation_results") | |
| if not evaluator.history: | |
| return "β No evaluation history found. Train a model first to see evaluations." | |
| # Generate summary | |
| summary = f"π **EVALUATION HISTORY**\n\n" | |
| summary += f"Total Evaluations: {len(evaluator.history)}\n\n" | |
| summary += "=" * 70 + "\n\n" | |
| for i, eval_result in enumerate(reversed(evaluator.history[-10:]), 1): | |
| summary += f"**Evaluation {len(evaluator.history) - i + 1}:**\n" | |
| summary += f"- Model: {eval_result['model_name']}\n" | |
| summary += f"- Dataset: {eval_result['dataset_info']}\n" | |
| summary += f"- Timestamp: {eval_result['timestamp']}\n" | |
| summary += f"- Test Examples: {eval_result['num_test_examples']}\n" | |
| metrics = eval_result.get('metrics', {}) | |
| summary += f"- Avg Response Length: {metrics.get('avg_response_length', 0):.1f} words\n" | |
| summary += f"- Avg Generation Time: {metrics.get('avg_generation_time', 0):.2f}s\n" | |
| summary += f"- Throughput: {metrics.get('examples_per_second', 0):.2f} ex/s\n" | |
| summary += "\n" + "-" * 70 + "\n\n" | |
| return summary | |
| except Exception as e: | |
| return f"β Error loading evaluation history: {e}" | |
| def compare_models(num_models): | |
| """Compare recent model evaluations""" | |
| try: | |
| from evaluation.model_evaluator import ModelEvaluator | |
| evaluator = ModelEvaluator(output_dir="models/financial_advisor/evaluation_results") | |
| if not evaluator.history: | |
| return "β No evaluation history found. Train models first to compare." | |
| # Get comparison | |
| comparison = evaluator.compare_models(num_recent=int(num_models)) | |
| # Generate report | |
| report = evaluator.generate_comparison_report(comparison) | |
| return report | |
| except Exception as e: | |
| return f"β Error comparing models: {e}" | |
| def view_latest_evaluation(): | |
| """View the most recent evaluation in detail""" | |
| try: | |
| from evaluation.model_evaluator import ModelEvaluator | |
| evaluator = ModelEvaluator(output_dir="models/financial_advisor/evaluation_results") | |
| latest = evaluator.get_latest_evaluation() | |
| if not latest: | |
| return "β No evaluations found. Train a model first." | |
| # Generate detailed report | |
| report = evaluator.generate_report(latest) | |
| return report | |
| except Exception as e: | |
| return f"β Error viewing evaluation: {e}" | |
| # RunPod Functions | |
| def init_runpod_manager(): | |
| """Initialize RunPod manager with API key""" | |
| global runpod_manager | |
| if runpod_manager is not None: | |
| return "β RunPod manager already initialized" | |
| api_key = secure_config.get_key("RUNPOD_API_KEY") | |
| if not api_key: | |
| return "β RunPod API key not found! Please add it in the Settings tab." | |
| try: | |
| runpod_manager = RunPodManager(api_key) | |
| return "β RunPod manager initialized successfully!" | |
| except Exception as e: | |
| return f"β Error initializing RunPod manager: {e}" | |
| def list_runpod_pods(): | |
| """List all RunPod pods""" | |
| global runpod_manager | |
| if not runpod_manager: | |
| init_result = init_runpod_manager() | |
| if "β" in init_result: | |
| return init_result | |
| try: | |
| pods = runpod_manager.list_pods() | |
| if not pods: | |
| return "No pods found. Create a new pod to get started!" | |
| output = f"π **YOUR RUNPOD PODS** ({len(pods)} total)\n\n" | |
| for pod in pods: | |
| output += f"**{pod.name}** (ID: {pod.id[:8]}...)\n" | |
| output += f" Status: {pod.status}\n" | |
| output += f" GPU: {pod.gpu_count}x {pod.gpu_type}\n" | |
| output += f" Cost: ${pod.cost_per_hr:.2f}/hr\n" | |
| if pod.status == "RUNNING": | |
| if pod.ip and pod.ssh_port: | |
| output += f" SSH: {pod.ip}:{pod.ssh_port}\n" | |
| if pod.port: | |
| output += f" GUI: https://{pod.id}-7860.proxy.runpod.net\n" | |
| output += "\n" | |
| return output | |
| except Exception as e: | |
| return f"β Error listing pods: {e}" | |
| def create_runpod_pod(pod_name, gpu_type, storage_gb, sync_data): | |
| """Create a new RunPod pod""" | |
| global runpod_manager, current_pod_info, deployment_in_progress | |
| if deployment_in_progress: | |
| return "β οΈ Deployment already in progress!", "" | |
| if not runpod_manager: | |
| init_result = init_runpod_manager() | |
| if "β" in init_result: | |
| return init_result, "" | |
| deployment_in_progress = True | |
| log_output = [] | |
| def log_callback(msg): | |
| log_output.append(msg) | |
| try: | |
| config = DeploymentConfig( | |
| pod_name=pod_name or "aura-training-pod", | |
| gpu_type=gpu_type, | |
| storage_gb=int(storage_gb), | |
| sync_data=sync_data, | |
| auto_setup=True | |
| ) | |
| pod_info = runpod_manager.one_click_deploy( | |
| config=config, | |
| progress_callback=log_callback | |
| ) | |
| current_pod_info = pod_info | |
| deployment_in_progress = False | |
| final_log = "\n".join(log_output) | |
| status = f"β Pod created successfully!\n" | |
| status += f"ID: {pod_info.id}\n" | |
| status += f"SSH: {pod_info.ip}:{pod_info.ssh_port}\n" | |
| status += f"GUI: https://{pod_info.id}-7860.proxy.runpod.net" | |
| return final_log, status | |
| except Exception as e: | |
| deployment_in_progress = False | |
| error_log = "\n".join(log_output) + f"\n\nβ Error: {e}" | |
| return error_log, f"β Deployment failed: {e}" | |
| def stop_runpod_pod(pod_id): | |
| """Stop a running pod""" | |
| global runpod_manager | |
| if not runpod_manager: | |
| init_result = init_runpod_manager() | |
| if "β" in init_result: | |
| return init_result | |
| try: | |
| success = runpod_manager.stop_pod(pod_id) | |
| if success: | |
| return f"β Pod {pod_id} stopped successfully!" | |
| else: | |
| return f"β Failed to stop pod {pod_id}" | |
| except Exception as e: | |
| return f"β Error stopping pod: {e}" | |
| def terminate_runpod_pod(pod_id): | |
| """Terminate (delete) a pod""" | |
| global runpod_manager, current_pod_info | |
| if not runpod_manager: | |
| init_result = init_runpod_manager() | |
| if "β" in init_result: | |
| return init_result | |
| try: | |
| success = runpod_manager.terminate_pod(pod_id) | |
| if success: | |
| if current_pod_info and current_pod_info.id == pod_id: | |
| current_pod_info = None | |
| return f"β Pod {pod_id} terminated successfully!" | |
| else: | |
| return f"β Failed to terminate pod {pod_id}" | |
| except Exception as e: | |
| return f"β Error terminating pod: {e}" | |
| def get_pod_connection_info(pod_id): | |
| """Get connection info for a pod""" | |
| global runpod_manager | |
| if not runpod_manager: | |
| init_result = init_runpod_manager() | |
| if "β" in init_result: | |
| return init_result | |
| try: | |
| pod = runpod_manager.get_pod(pod_id) | |
| if not pod: | |
| return f"β Pod {pod_id} not found" | |
| info = f"# Connection Info for {pod.name}\n\n" | |
| info += f"**Status:** {pod.status}\n\n" | |
| if pod.status == "RUNNING": | |
| info += "## SSH Connection\n" | |
| info += f"```bash\n" | |
| info += f"ssh root@{pod.ip} -p {pod.ssh_port}\n" | |
| info += f"```\n\n" | |
| info += "## GUI Access\n" | |
| info += f"Open in browser:\n" | |
| info += f"```\n" | |
| info += f"https://{pod.id}-7860.proxy.runpod.net\n" | |
| info += f"```\n\n" | |
| info += "## Details\n" | |
| info += f"- GPU: {pod.gpu_count}x {pod.gpu_type}\n" | |
| info += f"- Cost: ${pod.cost_per_hr:.2f}/hour\n" | |
| else: | |
| info += f"β οΈ Pod is not running (Status: {pod.status})\n" | |
| return info | |
| except Exception as e: | |
| return f"β Error getting pod info: {e}" | |
| # Create Gradio interface with custom CSS for scrolling | |
| custom_css = """ | |
| /* Make all containers properly scrollable */ | |
| .overflow-y-auto { | |
| overflow-y: auto !important; | |
| max-height: 600px !important; | |
| } | |
| /* Textbox scrolling */ | |
| .textbox-container textarea { | |
| max-height: 400px !important; | |
| overflow-y: auto !important; | |
| } | |
| /* Column scrolling for long content */ | |
| .gr-column { | |
| overflow-y: auto !important; | |
| max-height: 800px !important; | |
| } | |
| /* Markdown blocks in columns */ | |
| .gr-column .gr-markdown { | |
| overflow-y: auto !important; | |
| max-height: 500px !important; | |
| } | |
| /* Accordion-like sections */ | |
| .gr-box { | |
| overflow-y: auto !important; | |
| max-height: 600px !important; | |
| } | |
| /* Hide footer */ | |
| footer {visibility: hidden} | |
| """ | |
| with gr.Blocks(title="Financial Advisor AI Trainer", theme=gr.themes.Soft(), css=custom_css) as demo: | |
| gr.Markdown(""" | |
| # π¦ Financial Advisor AI Training Studio | |
| Train your own expert financial advisor AI powered by Qwen 3 30B! | |
| No coding required - just configure, train, and test. | |
| """) | |
| with gr.Tabs(): | |
| # Tab 0: Settings & API Keys | |
| with gr.Tab("βοΈ Settings"): | |
| gr.Markdown(""" | |
| # API Key Management | |
| Store your API keys securely. Keys are encrypted and never stored in plain text. | |
| **You only need to enter these once!** | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("### π API Keys") | |
| gr.Markdown("#### HuggingFace Token (Required)") | |
| gr.Markdown("Get your token at: https://huggingface.co/settings/tokens") | |
| hf_token_input = gr.Textbox( | |
| label="HuggingFace Token", | |
| placeholder="hf_...", | |
| type="password", | |
| info="Required for downloading Qwen 3 model" | |
| ) | |
| gr.Markdown("---") | |
| gr.Markdown("#### Optional API Keys") | |
| openai_key_input = gr.Textbox( | |
| label="OpenAI API Key (Optional)", | |
| placeholder="sk-...", | |
| type="password", | |
| info="For testing against GPT models" | |
| ) | |
| anthropic_key_input = gr.Textbox( | |
| label="Anthropic API Key (Optional)", | |
| placeholder="sk-ant-...", | |
| type="password", | |
| info="For testing against Claude models" | |
| ) | |
| wandb_key_input = gr.Textbox( | |
| label="Weights & Biases API Key (Optional)", | |
| placeholder="...", | |
| type="password", | |
| info="For advanced experiment tracking" | |
| ) | |
| runpod_key_input = gr.Textbox( | |
| label="RunPod API Key (Optional)", | |
| placeholder="...", | |
| type="password", | |
| info="For cloud GPU deployment" | |
| ) | |
| gr.Markdown("---") | |
| gr.Markdown("#### Custom API Keys (Advanced)") | |
| custom_keys_input = gr.Code( | |
| label="Custom Keys (JSON format)", | |
| language="json", | |
| value='{\n "MY_API_KEY": "value",\n "OTHER_KEY": "value"\n}', | |
| lines=5 | |
| ) | |
| gr.Markdown("---") | |
| with gr.Row(): | |
| save_keys_btn = gr.Button("πΎ Save All Keys", variant="primary", size="lg") | |
| load_keys_btn = gr.Button("π Load Saved Keys", variant="secondary") | |
| clear_keys_btn = gr.Button("ποΈ Clear All Keys", variant="stop") | |
| keys_status = gr.Markdown() | |
| with gr.Column(): | |
| gr.Markdown("### π Security & Status") | |
| gr.Markdown(""" | |
| #### π Security Features | |
| - β Keys are encrypted using Fernet encryption | |
| - β Stored in `.secrets/` directory (auto-hidden) | |
| - β Never logged or displayed in full | |
| - β Loaded into environment variables automatically | |
| - β Only you can decrypt with your machine | |
| #### π Storage Location | |
| Keys are saved in: `.secrets/config.enc` | |
| β οΈ **Backup Note**: If you reinstall or move the project, | |
| you'll need to re-enter your keys. | |
| """) | |
| gr.Markdown("---") | |
| gr.Markdown("### π Current Keys Status") | |
| keys_display = gr.Markdown("No keys loaded. Click 'Load Saved Keys' to check.") | |
| gr.Markdown("---") | |
| gr.Markdown(""" | |
| ### π‘ Tips | |
| **HuggingFace Token:** | |
| 1. Go to https://huggingface.co/settings/tokens | |
| 2. Create a new token (read access is enough) | |
| 3. Copy and paste it above | |
| 4. Click "Save All Keys" | |
| **Other Keys:** | |
| Only add if you plan to use those services. | |
| Training works with just HuggingFace token. | |
| """) | |
| # Wire up settings | |
| save_keys_btn.click( | |
| fn=save_api_keys, | |
| inputs=[hf_token_input, openai_key_input, anthropic_key_input, wandb_key_input, runpod_key_input, custom_keys_input], | |
| outputs=keys_status | |
| ) | |
| load_keys_btn.click( | |
| fn=load_api_keys, | |
| outputs=[hf_token_input, openai_key_input, anthropic_key_input, wandb_key_input, runpod_key_input, custom_keys_input, keys_display] | |
| ) | |
| clear_keys_btn.click( | |
| fn=clear_api_keys, | |
| outputs=[keys_status, hf_token_input, openai_key_input, anthropic_key_input, wandb_key_input, runpod_key_input, custom_keys_input] | |
| ) | |
| # Load keys on startup | |
| demo.load( | |
| fn=load_api_keys, | |
| outputs=[hf_token_input, openai_key_input, anthropic_key_input, wandb_key_input, runpod_key_input, custom_keys_input, keys_display] | |
| ) | |
| # Tab 1: Data Management | |
| with gr.Tab("π Training Data"): | |
| gr.Markdown(""" | |
| ### Manage Your Training Data | |
| **Your training data is automatically used when you click "Start Training" in the Training tab.** | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("#### Current Training Data") | |
| data_preview = gr.Markdown() | |
| data_count = gr.Number(label="Total Q&A Pairs", interactive=False) | |
| gr.Markdown("---") | |
| # Training readiness status | |
| training_ready_status = gr.Markdown("Click 'Refresh Data' to check training readiness") | |
| with gr.Row(): | |
| load_data_btn = gr.Button("π Refresh Data", variant="secondary") | |
| validate_data_btn = gr.Button("π Validate Quality", variant="secondary") | |
| remove_dupes_btn = gr.Button("π§Ή Remove Duplicates", variant="secondary") | |
| gr.Markdown("---") | |
| gr.Markdown("#### Quality Report") | |
| validation_report = gr.Textbox( | |
| label="Data Quality Analysis", | |
| lines=10, | |
| max_lines=15, | |
| interactive=False, | |
| placeholder="Click 'Validate Quality' to analyze your training data...", | |
| show_copy_button=True | |
| ) | |
| gr.Markdown("---") | |
| gr.Markdown("#### Deduplication Status") | |
| dedup_status = gr.Markdown("Click 'Remove Duplicates' to clean your data") | |
| gr.Markdown("---") | |
| gr.Markdown("#### View Full Dataset") | |
| data_json = gr.Code(label="Full Dataset (JSON)", language="json", lines=15) | |
| with gr.Column(): | |
| gr.Markdown("#### Add New Training Example") | |
| new_question = gr.Textbox( | |
| label="Question", | |
| placeholder="What is the difference between a Roth IRA and Traditional IRA?", | |
| lines=3 | |
| ) | |
| new_context = gr.Textbox( | |
| label="Context (optional)", | |
| placeholder="Additional context for the question...", | |
| lines=2 | |
| ) | |
| new_answer = gr.Textbox( | |
| label="Expert Answer", | |
| placeholder="Provide a detailed, accurate financial answer...", | |
| lines=8 | |
| ) | |
| add_example_btn = gr.Button("β Add Example", variant="primary") | |
| add_status = gr.Markdown() | |
| # Wire up data management | |
| load_data_btn.click( | |
| fn=load_training_data, | |
| outputs=[data_preview, data_count, data_json, training_ready_status] | |
| ) | |
| validate_data_btn.click( | |
| fn=validate_training_data, | |
| outputs=validation_report | |
| ).then( | |
| fn=load_training_data, | |
| outputs=[data_preview, data_count, data_json, training_ready_status] | |
| ) | |
| remove_dupes_btn.click( | |
| fn=remove_duplicates_from_data, | |
| outputs=[dedup_status, data_count, data_json, training_ready_status] | |
| ).then( | |
| fn=load_training_data, | |
| outputs=[data_preview, data_count, data_json, training_ready_status] | |
| ) | |
| add_example_btn.click( | |
| fn=add_training_example, | |
| inputs=[new_question, new_answer, new_context], | |
| outputs=[add_status, data_count] | |
| ).then( | |
| fn=load_training_data, | |
| outputs=[data_preview, data_count, data_json, training_ready_status] | |
| ) | |
| # Load data on startup | |
| demo.load( | |
| fn=load_training_data, | |
| outputs=[data_preview, data_count, data_json, training_ready_status] | |
| ) | |
| # Tab 2: Synthetic Data Generation | |
| with gr.Tab("π€ Synthetic Data"): | |
| gr.Markdown(""" | |
| # Generate Training Data Automatically | |
| Use GPT-4 or Claude to automatically generate high-quality financial Q&A pairs! | |
| **No manual writing required - just configure and generate.** | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("#### π§ Generation Settings") | |
| syn_provider = gr.Radio( | |
| choices=["openai", "anthropic"], | |
| value="openai", | |
| label="API Provider", | |
| info="Choose which LLM to use for generation" | |
| ) | |
| syn_num_examples = gr.Slider( | |
| minimum=5, maximum=100, value=20, step=5, | |
| label="Number of Examples to Generate", | |
| info="Start with 20, then increase" | |
| ) | |
| syn_difficulty = gr.Radio( | |
| choices=["beginner", "intermediate", "advanced", "mixed"], | |
| value="mixed", | |
| label="Difficulty Level", | |
| info="Mixed creates diverse questions" | |
| ) | |
| syn_use_scenarios = gr.Checkbox( | |
| label="Use Realistic Scenarios", | |
| value=False, | |
| info="Generate questions with specific user contexts (age, income, etc.)" | |
| ) | |
| gr.Markdown("---") | |
| gr.Markdown("#### π Available Topics") | |
| syn_topics_display = gr.Textbox( | |
| label="Default Topics (leave Custom Topics empty to use these)", | |
| value=get_available_topics(), | |
| lines=8, | |
| max_lines=12, | |
| interactive=False | |
| ) | |
| syn_custom_topics = gr.Textbox( | |
| label="Custom Topics (Optional)", | |
| placeholder="401k Planning, Crypto Trading, Home Buying (comma-separated)", | |
| lines=3, | |
| info="Leave empty to use all default topics" | |
| ) | |
| gr.Markdown("---") | |
| syn_generate_btn = gr.Button("π Generate Data", variant="primary", size="lg") | |
| syn_status = gr.Markdown() | |
| with gr.Column(): | |
| gr.Markdown("#### π Generation Log") | |
| syn_log = gr.Textbox( | |
| label="Progress & Preview", | |
| lines=20, | |
| max_lines=25, | |
| interactive=False, | |
| show_copy_button=True | |
| ) | |
| gr.Markdown("---") | |
| gr.Markdown("#### π‘ Tips & Info") | |
| syn_tips = gr.Textbox( | |
| label="Important Information", | |
| value="""π‘ Tips: | |
| β’ Make sure you've added your OpenAI or Anthropic API key in Settings tab | |
| β’ Generated data is automatically added to your training dataset | |
| β’ Each example costs ~$0.01-0.02 in API credits | |
| β’ Quality is very high - often better than manual examples | |
| β’ Start with 20 examples to test, then generate more | |
| β’ Mix difficulties for best results | |
| β’ Scenario mode creates more realistic, personalized questions | |
| β οΈ API Key Required: | |
| This feature requires an OpenAI or Anthropic API key. | |
| Set it in the Settings tab before generating.""", | |
| lines=8, | |
| max_lines=12, | |
| interactive=False | |
| ) | |
| # Wire up synthetic data generation | |
| syn_generate_btn.click( | |
| fn=generate_synthetic_data, | |
| inputs=[syn_provider, syn_num_examples, syn_difficulty, syn_use_scenarios, syn_custom_topics], | |
| outputs=[syn_log, syn_status] | |
| ) | |
| # Tab 3: HuggingFace Datasets | |
| with gr.Tab("π¦ HuggingFace Datasets"): | |
| gr.Markdown(""" | |
| # Load Datasets from HuggingFace | |
| Import high-quality financial datasets directly from HuggingFace! | |
| **Includes transaction categorization, financial Q&A, and more.** | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("#### π Known Datasets") | |
| hf_datasets_list = gr.Textbox( | |
| label="Available Financial Datasets", | |
| value=list_hf_datasets(), | |
| lines=10, | |
| max_lines=15, | |
| interactive=False | |
| ) | |
| gr.Markdown("---") | |
| gr.Markdown("#### π§ Load Settings") | |
| hf_dataset_name = gr.Radio( | |
| choices=["financial-alpaca", "fingpt-finred", "finance-qa-10k", "Custom Path"], | |
| value="financial-alpaca", | |
| label="Select Dataset", | |
| info="Choose from publicly accessible datasets or use custom path" | |
| ) | |
| hf_custom_path = gr.Textbox( | |
| label="Custom Dataset Path (if 'Custom Path' selected)", | |
| placeholder="username/dataset-name", | |
| info="Full HuggingFace dataset path" | |
| ) | |
| hf_split = gr.Radio( | |
| choices=["train", "test", "validation"], | |
| value="train", | |
| label="Dataset Split", | |
| info="Which split to load" | |
| ) | |
| hf_max_examples = gr.Number( | |
| label="Max Examples to Load (optional - leave empty for all)", | |
| value=None, | |
| info="Limit number of examples (helps with large datasets)" | |
| ) | |
| gr.Markdown("---") | |
| with gr.Row(): | |
| hf_preview_btn = gr.Button("ποΈ Preview Dataset", variant="secondary") | |
| hf_load_btn = gr.Button("π₯ Load Dataset", variant="primary", size="lg") | |
| hf_status = gr.Markdown() | |
| with gr.Column(): | |
| gr.Markdown("#### π Dataset Info & Logs") | |
| hf_preview = gr.Textbox( | |
| label="Dataset Preview", | |
| lines=10, | |
| max_lines=15, | |
| interactive=False, | |
| placeholder="Click 'Preview Dataset' to see sample data...", | |
| show_copy_button=True | |
| ) | |
| gr.Markdown("---") | |
| hf_log = gr.Textbox( | |
| label="Loading Log", | |
| lines=12, | |
| max_lines=18, | |
| interactive=False, | |
| placeholder="Loading progress will appear here...", | |
| show_copy_button=True | |
| ) | |
| gr.Markdown("---") | |
| gr.Markdown("#### π‘ Tips & Info") | |
| hf_tips = gr.Textbox( | |
| label="Important Information", | |
| value="""π‘ Available Datasets: | |
| β’ financial-alpaca (52K examples) | |
| Pre-built financial Q&A in Alpaca format - publicly accessible | |
| β’ fingpt-finred | |
| Financial relation extraction dataset - publicly accessible | |
| β’ finance-qa-10k | |
| Q&A from 10-K SEC filings - publicly accessible | |
| π‘ Tips: | |
| β’ Preview datasets before loading to understand structure | |
| β’ Large datasets can be limited using Max Examples | |
| β’ All data is automatically quality-validated before adding | |
| β’ These datasets are PUBLIC and don't require special access | |
| π Authentication: | |
| Your HuggingFace token is used automatically from Settings tab. | |
| Some private/gated datasets may require accepting terms on HuggingFace. | |
| π Finding More Datasets: | |
| Browse: https://huggingface.co/datasets | |
| Search: "finance", "financial", "investment", "trading\"""", | |
| lines=12, | |
| max_lines=18, | |
| interactive=False | |
| ) | |
| # Wire up HuggingFace dataset loading | |
| hf_preview_btn.click( | |
| fn=preview_hf_dataset, | |
| inputs=hf_custom_path, | |
| outputs=hf_preview | |
| ) | |
| hf_load_btn.click( | |
| fn=load_hf_dataset, | |
| inputs=[hf_custom_path, hf_dataset_name, hf_max_examples, hf_split], | |
| outputs=[hf_log, hf_status] | |
| ) | |
| # Tab 4: Training Configuration | |
| with gr.Tab("βοΈ Training"): | |
| gr.Markdown("### Select Model and Configure Training") | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("#### π» Training Mode") | |
| training_mode = gr.Radio( | |
| choices=["Local GPU", "Cloud GPU (RunPod)"], | |
| value="Cloud GPU (RunPod)", | |
| label="Where to Train", | |
| info="Local requires NVIDIA GPU. Cloud uses RunPod (pay per minute)." | |
| ) | |
| cloud_cost_estimate = gr.Markdown("**Estimated Cost:** Select model to see pricing") | |
| gr.Markdown("---") | |
| gr.Markdown("#### π€ Model Selection") | |
| model_choices = model_registry.get_model_choices_for_gui() | |
| model_selector = gr.Dropdown( | |
| choices=model_choices, | |
| value=model_choices[0][1] if model_choices else None, # Default to first model ID | |
| label="Select Model", | |
| info="Choose which model to train" | |
| ) | |
| model_info_display = gr.Markdown() | |
| with gr.Row(): | |
| check_ollama_btn = gr.Button("π Check Ollama Status", variant="secondary", size="sm") | |
| ollama_status_display = gr.Markdown() | |
| gr.Markdown("---") | |
| gr.Markdown("#### Training Configuration") | |
| lora_rank = gr.Slider( | |
| minimum=4, maximum=64, value=16, step=4, | |
| label="LoRA Rank (Higher = More capacity, more memory)", | |
| info="Recommended: 16 for 30B model" | |
| ) | |
| learning_rate = gr.Slider( | |
| minimum=1e-5, maximum=5e-4, value=1e-4, step=1e-5, | |
| label="Learning Rate", | |
| info="Recommended: 1e-4 for large models" | |
| ) | |
| num_epochs = gr.Slider( | |
| minimum=1, maximum=10, value=3, step=1, | |
| label="Number of Epochs", | |
| info="Start with 1 epoch to test" | |
| ) | |
| batch_size = gr.Slider( | |
| minimum=1, maximum=4, value=1, step=1, | |
| label="Batch Size", | |
| info="Keep at 1 for 30B model" | |
| ) | |
| grad_accum = gr.Slider( | |
| minimum=1, maximum=32, value=16, step=1, | |
| label="Gradient Accumulation Steps", | |
| info="Effective batch = batch_size Γ grad_accum" | |
| ) | |
| gr.Markdown("---") | |
| start_train_btn = gr.Button("π Start Training", variant="primary", size="lg") | |
| training_status_text = gr.Markdown() | |
| with gr.Column(): | |
| gr.Markdown("#### Training Progress & Logs") | |
| training_log = gr.Textbox( | |
| label="Training Log", | |
| lines=20, | |
| max_lines=25, | |
| interactive=False, | |
| show_copy_button=True | |
| ) | |
| gr.Markdown(""" | |
| **π‘ Tips:** | |
| - First training will download ~16GB model | |
| - Monitor with TensorBoard: `tensorboard --logdir models/financial_advisor/logs` | |
| - Training 30B model takes 30-60 min per epoch | |
| - GPU needs ~18GB VRAM minimum | |
| """) | |
| # Wire up model selection | |
| model_selector.change( | |
| fn=get_model_info, | |
| inputs=model_selector, | |
| outputs=model_info_display | |
| ) | |
| check_ollama_btn.click( | |
| fn=check_ollama_status, | |
| outputs=ollama_status_display | |
| ) | |
| # Load default model info on startup | |
| demo.load( | |
| fn=get_model_info, | |
| inputs=model_selector, | |
| outputs=model_info_display | |
| ) | |
| # Wire up training | |
| start_train_btn.click( | |
| fn=start_training, | |
| inputs=[lora_rank, learning_rate, num_epochs, batch_size, grad_accum, training_mode], | |
| outputs=[training_log, training_status_text] | |
| ) | |
| # Tab 3: Testing | |
| with gr.Tab("π§ͺ Test Model"): | |
| gr.Markdown("### Test Your Trained Financial Advisor") | |
| with gr.Row(): | |
| with gr.Column(): | |
| load_model_btn = gr.Button("π₯ Load Trained Model", variant="secondary") | |
| load_status = gr.Markdown() | |
| gr.Markdown("---") | |
| test_question = gr.Textbox( | |
| label="Ask Your Financial Advisor", | |
| placeholder="Should I pay off my student loans or invest in my 401k?", | |
| lines=4 | |
| ) | |
| test_btn = gr.Button("π¬ Get Advice", variant="primary", size="lg") | |
| gr.Markdown("#### Example Questions:") | |
| gr.Markdown(""" | |
| - What's the difference between a Roth IRA and Traditional IRA? | |
| - How much should I have in my emergency fund? | |
| - Should I invest in index funds or individual stocks? | |
| - What is dollar-cost averaging? | |
| - How do I start investing with only $100 per month? | |
| """) | |
| with gr.Column(): | |
| gr.Markdown("#### Financial Advisor Response") | |
| test_response = gr.Textbox( | |
| label="Response", | |
| lines=15, | |
| max_lines=20, | |
| interactive=False, | |
| show_copy_button=True | |
| ) | |
| # Wire up testing | |
| load_model_btn.click( | |
| fn=load_existing_model, | |
| outputs=load_status | |
| ) | |
| test_btn.click( | |
| fn=test_financial_advisor, | |
| inputs=test_question, | |
| outputs=test_response | |
| ) | |
| # Tab 6: Evaluation & Comparison | |
| with gr.Tab("π Evaluation"): | |
| gr.Markdown("### Model Evaluation & Comparison") | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("#### π Evaluation History") | |
| history_refresh_btn = gr.Button("π Refresh History", variant="secondary") | |
| eval_history_display = gr.Textbox( | |
| label="Recent Evaluations", | |
| lines=15, | |
| max_lines=20, | |
| interactive=False, | |
| show_copy_button=True, | |
| placeholder="Click 'Refresh History' to see evaluation history..." | |
| ) | |
| gr.Markdown("---") | |
| gr.Markdown("#### π Latest Evaluation Details") | |
| latest_eval_btn = gr.Button("π View Latest Evaluation", variant="secondary") | |
| latest_eval_display = gr.Textbox( | |
| label="Latest Evaluation Report", | |
| lines=15, | |
| max_lines=20, | |
| interactive=False, | |
| show_copy_button=True, | |
| placeholder="Click to view detailed evaluation report..." | |
| ) | |
| with gr.Column(): | |
| gr.Markdown("#### π Model Comparison") | |
| num_models_compare = gr.Slider( | |
| minimum=2, | |
| maximum=10, | |
| value=3, | |
| step=1, | |
| label="Number of Models to Compare", | |
| info="Compare recent model evaluations" | |
| ) | |
| compare_btn = gr.Button("βοΈ Compare Models", variant="primary", size="lg") | |
| comparison_display = gr.Textbox( | |
| label="Model Comparison Report", | |
| lines=20, | |
| max_lines=25, | |
| interactive=False, | |
| show_copy_button=True, | |
| placeholder="Click 'Compare Models' to see side-by-side comparison..." | |
| ) | |
| gr.Markdown("---") | |
| gr.Markdown(""" | |
| **π‘ Tips:** | |
| - Evaluations are run automatically after training | |
| - Compare metrics across different training runs | |
| - Use comparison to find the best model | |
| - Detailed reports saved in `models/financial_advisor/evaluation_results/` | |
| """) | |
| # Wire up evaluation functions | |
| history_refresh_btn.click( | |
| fn=load_evaluation_history, | |
| outputs=eval_history_display | |
| ) | |
| latest_eval_btn.click( | |
| fn=view_latest_evaluation, | |
| outputs=latest_eval_display | |
| ) | |
| compare_btn.click( | |
| fn=compare_models, | |
| inputs=num_models_compare, | |
| outputs=comparison_display | |
| ) | |
| # Load history on startup | |
| demo.load( | |
| fn=load_evaluation_history, | |
| outputs=eval_history_display | |
| ) | |
| # Tab 7: RunPod Cloud Deployment | |
| with gr.Tab("βοΈ RunPod"): | |
| gr.Markdown(""" | |
| # Cloud GPU Deployment with RunPod | |
| Deploy and train on powerful cloud GPUs without any manual setup! | |
| **One-click deployment to RunPod cloud GPUs - fully automated.** | |
| ## π― What This Does | |
| This tab lets you: | |
| 1. **Create cloud GPU pods** - Get a powerful GPU in the cloud | |
| 2. **Auto-deploy your code** - Your AURA app runs on the cloud GPU | |
| 3. **Train remotely** - Use the same GUI, but on cloud hardware | |
| ## β‘ Quick Start | |
| **Before you start**, make sure you have: | |
| - β Added RunPod API key in **βοΈ Settings** tab | |
| - β Prepared training data in **π Training Data** tab | |
| Then: | |
| 1. Configure pod settings below (defaults are good) | |
| 2. Click "π Create & Deploy Pod" | |
| 3. Wait ~5 minutes for setup | |
| 4. Access the cloud GUI via the provided URL | |
| 5. Train using the cloud GPU! | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("#### π§ Pod Configuration") | |
| runpod_pod_name = gr.Textbox( | |
| label="Pod Name", | |
| value="aura-training-pod", | |
| placeholder="my-training-pod", | |
| info="Name for your RunPod instance" | |
| ) | |
| runpod_gpu_type = gr.Dropdown( | |
| choices=[ | |
| "NVIDIA GeForce RTX 4090", | |
| "NVIDIA GeForce RTX 3090", | |
| "NVIDIA A100 40GB PCIe", | |
| "NVIDIA A100 80GB PCIe", | |
| "NVIDIA H100 80GB HBM3", | |
| ], | |
| value="NVIDIA GeForce RTX 4090", | |
| label="GPU Type", | |
| info="RTX 4090 recommended for best value" | |
| ) | |
| runpod_storage = gr.Slider( | |
| minimum=20, | |
| maximum=200, | |
| value=50, | |
| step=10, | |
| label="Storage (GB)", | |
| info="Disk space for models and data" | |
| ) | |
| runpod_sync_data = gr.Checkbox( | |
| label="Sync Training Data", | |
| value=True, | |
| info="Upload your local training data to the pod" | |
| ) | |
| gr.Markdown("---") | |
| with gr.Row(): | |
| runpod_create_btn = gr.Button("π Create & Deploy Pod", variant="primary", size="lg") | |
| runpod_refresh_btn = gr.Button("π Refresh Pods", variant="secondary") | |
| runpod_status = gr.Markdown() | |
| gr.Markdown("---") | |
| gr.Markdown("#### π Your Pods") | |
| runpod_pods_list = gr.Textbox( | |
| label="Active Pods", | |
| lines=10, | |
| max_lines=15, | |
| interactive=False, | |
| placeholder="Click 'Refresh Pods' to see your RunPod instances...", | |
| show_copy_button=True | |
| ) | |
| gr.Markdown("---") | |
| gr.Markdown("#### π§ Pod Management") | |
| runpod_pod_id = gr.Textbox( | |
| label="Pod ID", | |
| placeholder="Enter Pod ID for management operations", | |
| info="Get from 'Your Pods' list above" | |
| ) | |
| with gr.Row(): | |
| runpod_stop_btn = gr.Button("βΈοΈ Stop Pod", variant="secondary", size="sm") | |
| runpod_terminate_btn = gr.Button("ποΈ Terminate Pod", variant="stop", size="sm") | |
| runpod_mgmt_status = gr.Markdown() | |
| with gr.Column(): | |
| gr.Markdown("#### π Deployment Log") | |
| runpod_log = gr.Textbox( | |
| label="Progress & Status", | |
| lines=15, | |
| max_lines=20, | |
| interactive=False, | |
| show_copy_button=True, | |
| placeholder="Deployment progress will appear here..." | |
| ) | |
| gr.Markdown("---") | |
| gr.Markdown("#### π Connection Info") | |
| runpod_connection_info = gr.Markdown("Select a pod and click 'Get Connection Info'") | |
| runpod_connect_btn = gr.Button("π Get Connection Info", variant="secondary") | |
| gr.Markdown("---") | |
| gr.Markdown(""" | |
| **π° Cost Estimates:** | |
| - RTX 4090: ~$0.69/hour (Best Value) | |
| - RTX 3090: ~$0.44/hour (Budget) | |
| - A100 40GB: ~$1.39/hour (Production) | |
| **π What Gets Deployed:** | |
| - β All your code files | |
| - β Your training data (if "Sync Data" checked) | |
| - β Python dependencies (auto-installed) | |
| - β Same GUI you're using now | |
| **After Deployment:** | |
| - Access GUI in browser (URL provided below) | |
| - Train on the cloud GPU (same interface) | |
| - Stop pod when done to save money | |
| - Your trained models stay on the pod | |
| **π Get RunPod API Key:** | |
| 1. Go to https://www.runpod.io/console/user/settings | |
| 2. Click "API Keys" β "Create API Key" | |
| 3. Copy the key | |
| 4. Add to **βοΈ Settings** tab above | |
| 5. Come back here and create a pod! | |
| """) | |
| # Wire up RunPod functions | |
| runpod_create_btn.click( | |
| fn=create_runpod_pod, | |
| inputs=[runpod_pod_name, runpod_gpu_type, runpod_storage, runpod_sync_data], | |
| outputs=[runpod_log, runpod_status] | |
| ) | |
| runpod_refresh_btn.click( | |
| fn=list_runpod_pods, | |
| outputs=runpod_pods_list | |
| ) | |
| runpod_stop_btn.click( | |
| fn=stop_runpod_pod, | |
| inputs=runpod_pod_id, | |
| outputs=runpod_mgmt_status | |
| ) | |
| runpod_terminate_btn.click( | |
| fn=terminate_runpod_pod, | |
| inputs=runpod_pod_id, | |
| outputs=runpod_mgmt_status | |
| ) | |
| runpod_connect_btn.click( | |
| fn=get_pod_connection_info, | |
| inputs=runpod_pod_id, | |
| outputs=runpod_connection_info | |
| ) | |
| # Tab 4: Help | |
| with gr.Tab("β Help"): | |
| gr.Markdown(""" | |
| # Getting Started Guide | |
| ## Step 1: Prepare Training Data | |
| 1. Go to **Training Data** tab | |
| 2. Review the sample financial Q&A pairs | |
| 3. Add your own examples using the form | |
| 4. Aim for at least 50-100 high-quality examples | |
| ## Step 2: Configure Training | |
| 1. Go to **Training** tab | |
| 2. Adjust settings (defaults are good to start): | |
| - LoRA Rank: 16 (higher = more capacity) | |
| - Learning Rate: 1e-4 (standard for large models) | |
| - Epochs: 1-3 (start with 1 to test) | |
| 3. Click **Start Training** | |
| 4. Wait 30-60 minutes per epoch | |
| ## Step 3: Test Your Model | |
| 1. Go to **Test Model** tab | |
| 2. Click **Load Trained Model** | |
| 3. Ask financial questions | |
| 4. Get expert advice! | |
| ## Requirements | |
| - **GPU**: NVIDIA GPU with 18GB+ VRAM (RTX 3090, 4090, A100, etc.) | |
| - **RAM**: 32GB+ system RAM recommended | |
| - **Storage**: 30GB free space for model | |
| - **Internet**: Fast connection for first-time model download | |
| - **HuggingFace Token**: Required for model access (set in Settings tab) | |
| ## Troubleshooting | |
| ### Out of Memory (OOM) | |
| - Reduce batch_size to 1 | |
| - Increase gradient_accumulation_steps | |
| - Close other applications | |
| ### Slow Training | |
| - This is normal for 30B model | |
| - Each epoch takes 30-60 minutes | |
| - Be patient! | |
| ### Import Errors | |
| ```bash | |
| pip install torch transformers peft accelerate bitsandbytes gradio cryptography | |
| ``` | |
| ### HuggingFace Authentication Error | |
| - Go to **Settings** tab | |
| - Add your HuggingFace token | |
| - Click **Save All Keys** | |
| - Get token at: https://huggingface.co/settings/tokens | |
| ### Model Not Learning | |
| - Check learning rate (try 1e-4 to 5e-4) | |
| - Verify training data quality | |
| - Train for more epochs | |
| ## Advanced Tips | |
| 1. **More Data = Better Results**: Aim for 500+ examples | |
| 2. **Diverse Topics**: Cover various financial areas | |
| 3. **Quality > Quantity**: Accurate answers matter | |
| 4. **Monitor TensorBoard**: Track loss curves | |
| 5. **Test Regularly**: Evaluate after each epoch | |
| ## Support | |
| - Check logs in Training tab | |
| - Review error messages carefully | |
| - Verify GPU has enough memory | |
| - Ensure all dependencies installed | |
| """) | |
| gr.Markdown(""" | |
| --- | |
| π‘ **Pro Tip**: Start with the sample data and 1 epoch to test everything works, then add more data and train for 3+ epochs. | |
| """) | |
| if __name__ == "__main__": | |
| import argparse | |
| import os | |
| # Parse command-line arguments for cloud deployment compatibility | |
| parser = argparse.ArgumentParser(description="Launch AURA AI Training Studio") | |
| parser.add_argument("--server_name", type=str, default="0.0.0.0", help="Server host") | |
| parser.add_argument("--server_port", type=int, default=int(os.getenv("PORT", 7860)), help="Server port") | |
| parser.add_argument("--share", action="store_true", help="Create public share link") | |
| args = parser.parse_args() | |
| print("=" * 70) | |
| print("Financial Advisor AI Training Studio") | |
| print("=" * 70) | |
| print("\nStarting GUI server...") | |
| print(f"Server: http://{args.server_name}:{args.server_port}") | |
| if args.share: | |
| print("Share link will be generated...") | |
| print("\nPress Ctrl+C to stop the server") | |
| print("=" * 70) | |
| demo.launch( | |
| server_name=args.server_name, | |
| server_port=args.server_port, | |
| share=args.share, | |
| show_error=True | |
| ) | |