|
|
"""批量分析多个角色的工具脚本""" |
|
|
|
|
|
import sys |
|
|
import os |
|
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
|
|
|
|
|
from core import TextProcessor, CharacterExtractor, CharacterAnalyzer |
|
|
from utils import CacheManager |
|
|
import json |
|
|
from tqdm import tqdm |
|
|
|
|
|
def batch_analyze(novel_path: str, output_dir: str = "character_profiles", |
|
|
max_characters: int = 10): |
|
|
"""批量分析小说中的所有主要角色 |
|
|
|
|
|
Args: |
|
|
novel_path: 小说文件路径 |
|
|
output_dir: 输出目录 |
|
|
max_characters: 最多分析的角色数 |
|
|
""" |
|
|
|
|
|
print("="*70) |
|
|
print("📚 批量角色分析工具") |
|
|
print("="*70) |
|
|
|
|
|
|
|
|
print(f"\n📖 加载小说: {novel_path}") |
|
|
try: |
|
|
with open(novel_path, 'r', encoding='utf-8') as f: |
|
|
novel = f.read() |
|
|
except: |
|
|
print(f"❌ 无法加载文件: {novel_path}") |
|
|
return |
|
|
|
|
|
print(f"✓ 已加载 {len(novel):,} 个字符") |
|
|
|
|
|
|
|
|
print("\n📄 处理文本...") |
|
|
processor = TextProcessor() |
|
|
chunks = processor.chunk_text(novel) |
|
|
stats = processor.get_statistics(novel) |
|
|
|
|
|
print(f"✓ 文本已分为 {len(chunks)} 个块") |
|
|
print(f"✓ 检测语言: {stats['language']}") |
|
|
|
|
|
|
|
|
print("\n👥 提取角色...") |
|
|
extractor = CharacterExtractor() |
|
|
characters = extractor.extract_main_characters( |
|
|
chunks, |
|
|
text_sample=novel[:3000], |
|
|
language=stats['language'] |
|
|
) |
|
|
|
|
|
if not characters: |
|
|
print("❌ 未找到角色") |
|
|
return |
|
|
|
|
|
print(f"✓ 找到 {len(characters)} 个主要角色") |
|
|
|
|
|
|
|
|
os.makedirs(output_dir, exist_ok=True) |
|
|
|
|
|
|
|
|
print(f"\n🧠 开始分析角色 (最多 {max_characters} 个)...") |
|
|
analyzer = CharacterAnalyzer() |
|
|
|
|
|
all_profiles = [] |
|
|
analyze_count = min(max_characters, len(characters)) |
|
|
|
|
|
for i, char in enumerate(tqdm(characters[:analyze_count], desc="分析进度")): |
|
|
char_name = char['name'] |
|
|
|
|
|
try: |
|
|
|
|
|
representative_chunks = analyzer.select_representative_chunks( |
|
|
chunks, |
|
|
char['info']['chunks'] |
|
|
) |
|
|
|
|
|
|
|
|
profile = analyzer.analyze_character_batch( |
|
|
char_name, |
|
|
representative_chunks |
|
|
) |
|
|
|
|
|
|
|
|
profile = analyzer.enhance_profile_with_examples( |
|
|
profile, |
|
|
chunks, |
|
|
char['info']['chunks'] |
|
|
) |
|
|
|
|
|
all_profiles.append(profile) |
|
|
|
|
|
|
|
|
char_filename = f"{profile['name'].replace(' ', '_').replace('/', '_')}.json" |
|
|
char_file = os.path.join(output_dir, char_filename) |
|
|
|
|
|
with open(char_file, 'w', encoding='utf-8') as f: |
|
|
json.dump(profile, f, ensure_ascii=False, indent=2) |
|
|
|
|
|
except Exception as e: |
|
|
print(f"\n❌ 分析 {char_name} 失败: {e}") |
|
|
continue |
|
|
|
|
|
|
|
|
all_file = os.path.join(output_dir, "all_characters.json") |
|
|
with open(all_file, 'w', encoding='utf-8') as f: |
|
|
json.dump(all_profiles, f, ensure_ascii=False, indent=2) |
|
|
|
|
|
|
|
|
report_file = os.path.join(output_dir, "analysis_report.txt") |
|
|
with open(report_file, 'w', encoding='utf-8') as f: |
|
|
f.write("="*70 + "\n") |
|
|
f.write("角色分析报告\n") |
|
|
f.write("="*70 + "\n\n") |
|
|
f.write(f"小说文件: {novel_path}\n") |
|
|
f.write(f"文本长度: {len(novel):,} 字符\n") |
|
|
f.write(f"分析角色数: {len(all_profiles)}\n\n") |
|
|
f.write("-"*70 + "\n\n") |
|
|
|
|
|
for i, profile in enumerate(all_profiles, 1): |
|
|
f.write(f"{i}. {profile['name']}\n") |
|
|
f.write(f" 核心特质: {', '.join(profile.get('core_traits', []))}\n") |
|
|
f.write(f" 性格总结: {profile.get('personality_summary', 'N/A')}\n") |
|
|
f.write("\n") |
|
|
|
|
|
|
|
|
print("\n" + "="*70) |
|
|
print("✅ 分析完成!") |
|
|
print("="*70) |
|
|
print(f"📁 输出目录: {output_dir}") |
|
|
print(f"📊 分析角色数: {len(all_profiles)}") |
|
|
print(f"📄 汇总文件: {all_file}") |
|
|
print(f"📋 报告文件: {report_file}") |
|
|
print("="*70) |
|
|
|
|
|
def main(): |
|
|
import argparse |
|
|
|
|
|
parser = argparse.ArgumentParser( |
|
|
description="批量分析小说角色", |
|
|
formatter_class=argparse.RawDescriptionHelpFormatter, |
|
|
epilog=""" |
|
|
示例: |
|
|
python batch_analyze.py novel.txt |
|
|
python batch_analyze.py novel.txt -o my_characters -n 15 |
|
|
""" |
|
|
) |
|
|
|
|
|
parser.add_argument("novel_path", help="小说文件路径") |
|
|
parser.add_argument("-o", "--output", default="character_profiles", |
|
|
help="输出目录 (默认: character_profiles)") |
|
|
parser.add_argument("-n", "--num", type=int, default=10, |
|
|
help="最多分析的角色数 (默认: 10)") |
|
|
|
|
|
args = parser.parse_args() |
|
|
|
|
|
batch_analyze(args.novel_path, args.output, args.num) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |