| | import os |
| | import google.generativeai as genai |
| | from pathlib import Path |
| | import logging |
| |
|
| | logging.basicConfig(level=logging.INFO) |
| | logger = logging.getLogger(__name__) |
| |
|
| | class AnalysisPostProcessor: |
| | def __init__(self): |
| | api_key = os.getenv("GOOGLE_API_KEY") |
| | if not api_key: |
| | raise ValueError("GOOGLE_API_KEY not found") |
| |
|
| | genai.configure(api_key=api_key) |
| | self.model = genai.GenerativeModel('gemini-pro') |
| |
|
| | def read_sections(self, filepath: str) -> dict: |
| | """Read and separate the analysis into sections""" |
| | with open(filepath, 'r') as f: |
| | content = f.read() |
| |
|
| | sections = {} |
| | current_section = None |
| | current_content = [] |
| |
|
| | for line in content.split('\n'): |
| | if line.startswith('### ') and line.endswith(' ###'): |
| | if current_section: |
| | sections[current_section] = '\n'.join(current_content) |
| | current_section = line.strip('#').strip() |
| | current_content = [] |
| | else: |
| | current_content.append(line) |
| |
|
| | if current_section: |
| | sections[current_section] = '\n'.join(current_content) |
| |
|
| | return sections |
| |
|
| | def clean_section(self, title: str, content: str) -> str: |
| | """Clean individual section using Gemini""" |
| | prompt = f"""You are processing a section of screenplay analysis titled "{title}". |
| | The original analysis was generated by analyzing chunks of the screenplay, |
| | which may have led to some redundancy and discontinuity. |
| | |
| | Your task: |
| | 1. Remove any redundant observations |
| | 2. Stitch together related insights that may be separated |
| | 3. Ensure the analysis flows naturally from beginning to end |
| | 4. Preserve ALL unique insights and specific examples |
| | 5. Maintain the analytical depth while making it more coherent |
| | |
| | Original {title} section: |
| | {content} |
| | |
| | Provide the cleaned and coherent version maintaining the same analytical depth.""" |
| |
|
| | try: |
| | response = self.model.generate_content(prompt) |
| | return response.text |
| | except Exception as e: |
| | logger.error(f"Error cleaning {title}: {str(e)}") |
| | return content |
| |
|
| | def process_analysis(self, input_path: str, output_path: str): |
| | """Process the entire analysis file""" |
| | try: |
| | |
| | sections = self.read_sections(input_path) |
| |
|
| | |
| | cleaned_sections = {} |
| | for title, content in sections.items(): |
| | logger.info(f"Processing {title}") |
| | cleaned_sections[title] = self.clean_section(title, content) |
| |
|
| | |
| | final_analysis = "SCREENPLAY CREATIVE ANALYSIS\n\n" |
| | for title, content in cleaned_sections.items(): |
| | final_analysis += f"### {title} ###\n\n{content}\n\n" |
| |
|
| | |
| | with open(output_path, 'w') as f: |
| | f.write(final_analysis) |
| |
|
| | logger.info(f"Cleaned analysis saved to: {output_path}") |
| | return True |
| |
|
| | except Exception as e: |
| | logger.error(f"Error in post-processing: {str(e)}") |
| | return False |
| |
|
| | def main(): |
| | processor = AnalysisPostProcessor() |
| | input_file = "path/to/creative_analysis.txt" |
| | output_file = "path/to/cleaned_creative_analysis.txt" |
| | processor.process_analysis(input_file, output_file) |
| |
|
| | if __name__ == "__main__": |
| | main() |