Spaces:

ApsidalSolid4
/

CITProjectAIDetector

Running

App Files Files Community

ApsidalSolid4 commited on Mar 21

Commit

1419b33

verified ·

1 Parent(s): bc0ac29

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -117

app.py CHANGED Viewed

@@ -77,12 +77,6 @@ class OCRProcessor:
     def process_file(self, file_path: str) -> Dict:
         """
         Process a file using OCR.space API
-        Args:
-            file_path: Path to the file to be processed
-        Returns:
-            Dictionary containing the OCR results and status
         """
         start_time = time.time()
         ocr_logger.info(f"Starting OCR processing for file: {os.path.basename(file_path)}")
@@ -101,11 +95,6 @@ class OCRProcessor:
         file_type = self._get_file_type(file_path)
         ocr_logger.info(f"Detected file type: {file_type}")
-        # Special handling for Word documents - convert to PDF if needed
-        if file_type.startswith('application/vnd.openxmlformats-officedocument') or file_type == 'application/msword':
-            ocr_logger.info("Word document detected, processing directly")
-            # Note: OCR.space may handle Word directly, but if not, conversion would be needed here
         # Prepare the API request
         with open(file_path, 'rb') as f:
             file_data = f.read()
@@ -176,12 +165,6 @@ class OCRProcessor:
     def _extract_text_from_result(self, result: Dict) -> str:
         """
         Extract all text from the OCR API result
-        Args:
-            result: The OCR API response JSON
-        Returns:
-            Extracted text as a single string
         """
         extracted_text = ""
@@ -195,12 +178,6 @@ class OCRProcessor:
     def _get_file_type(self, file_path: str) -> str:
         """
         Determine MIME type of a file
-        Args:
-            file_path: Path to the file
-        Returns:
-            MIME type as string
         """
         mime_type, _ = mimetypes.guess_type(file_path)
         if mime_type is None:
@@ -208,11 +185,9 @@ class OCRProcessor:
             return 'application/octet-stream'
         return mime_type
 def is_admin_password(input_text: str) -> bool:
     """
     Check if the input text matches the admin password using secure hash comparison.
-    This prevents the password from being visible in the source code.
     """
     # Hash the input text
     input_hash = hashlib.sha256(input_text.strip().encode()).hexdigest()
@@ -220,7 +195,6 @@ def is_admin_password(input_text: str) -> bool:
     # Compare hashes (constant-time comparison to prevent timing attacks)
     return input_hash == ADMIN_PASSWORD_HASH
 class TextWindowProcessor:
     def __init__(self):
         try:
@@ -272,10 +246,8 @@ class TextWindowProcessor:
         return windows, window_sentence_indices
 class TextClassifier:
     def __init__(self):
-        # FIXED: Removed the thread configuration here, as it's now at the module level
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.model_name = MODEL_NAME
         self.tokenizer = None
@@ -310,6 +282,7 @@ class TextClassifier:
         self.model.eval()
     def quick_scan(self, text: str) -> Dict:
         """Perform a quick scan using simple window analysis."""
         if not text.strip():
@@ -520,19 +493,10 @@ class TextClassifier:
             'num_sentences': num_sentences
         }
 # Function to handle file upload, OCR processing, and text analysis
 def handle_file_upload_and_analyze(file_obj, mode: str, classifier) -> tuple:
     """
     Handle file upload, OCR processing, and text analysis
-    Args:
-        file_obj: Uploaded file object from Gradio (bytes when using type="binary")
-        mode: Analysis mode (quick or detailed)
-        classifier: The TextClassifier instance
-    Returns:
-        Analysis results as a tuple (same format as original analyze_text function)
     """
     if file_obj is None:
         return (
@@ -542,10 +506,6 @@ def handle_file_upload_and_analyze(file_obj, mode: str, classifier) -> tuple:
         )
     # Create a temporary file with an appropriate extension based on content
-    # Since we don't have the original filename when using binary mode,
-    # we'll use a generic extension based on simple content detection
-    # Simple content type detection
     content_start = file_obj[:20]  # Look at the first few bytes
     # Default to .bin extension
@@ -561,7 +521,6 @@ def handle_file_upload_and_analyze(file_obj, mode: str, classifier) -> tuple:
         file_ext = ".png"
     elif content_start.startswith(b'GIF'):      # GIF
         file_ext = ".gif"
-    # Add more content type detection as needed
     # Create a temporary file with the detected extension
     with tempfile.NamedTemporaryFile(delete=False, suffix=file_ext) as temp_file:
@@ -600,7 +559,6 @@ def handle_file_upload_and_analyze(file_obj, mode: str, classifier) -> tuple:
         if os.path.exists(temp_file_path):
             os.remove(temp_file_path)
 def initialize_excel_log():
     """Initialize the Excel log file if it doesn't exist."""
     if not os.path.exists(EXCEL_LOG_PATH):
@@ -810,20 +768,11 @@ def analyze_text(text: str, mode: str, classifier: TextClassifier) -> tuple:
             overall_result
         )
-# Modified Gradio interface setup function to include file upload
-def setup_gradio_interface(classifier):
-    """
-    Set up Gradio interface with a more aligned and compact file upload
-    Args:
-        classifier: The TextClassifier instance
-    Returns:
-        Gradio Interface object
-    """
-    import gradio as gr
     # Create analyzer functions that capture the classifier
     def analyze_text_wrapper(text, mode):
         return analyze_text(text, mode, classifier)
@@ -833,115 +782,109 @@ def setup_gradio_interface(classifier):
             return analyze_text_wrapper("", mode)  # Return empty analysis
         return handle_file_upload_and_analyze(file_obj, mode, classifier)
     with gr.Blocks(title="AI Text Detector") as demo:
         gr.Markdown("# AI Text Detector")
         with gr.Row():
-            # Left column - Input
             with gr.Column():
                 text_input = gr.Textbox(
-                    lines=8,
                     placeholder="Enter text to analyze...",
                     label="Input Text"
                 )
                 with gr.Row():
-                    # Left side: Analysis mode radio buttons
-                    with gr.Column(scale=4):
-                        gr.Markdown("Analysis Mode")
-                        gr.Markdown("Quick mode for faster analysis. Detailed mode for sentence-level analysis.", elem_classes=["description-text"])
-                        mode_selection = gr.Radio(
-                            choices=["quick", "detailed"],
-                            value="quick",
-                            label=""
-                        )
-                    # Right side: File upload (compact and aligned)
-                    with gr.Column(scale=1, elem_classes=["file-upload-container"]):
-                        file_upload = gr.File(
-                            label="File",
-                            file_types=["image", "pdf", "doc", "docx"],
-                            type="binary",
-                            elem_classes=["compact-file-upload"]
-                        )
-                # Analyze button
                 analyze_button = gr.Button("Analyze Text")
-            # Right column - Results
             with gr.Column():
                 output_html = gr.HTML(label="Highlighted Analysis")
                 output_sentences = gr.Textbox(label="Sentence-by-Sentence Analysis", lines=10)
                 output_result = gr.Textbox(label="Overall Result", lines=4)
-        # Connect buttons to functions
         analyze_button.click(
             analyze_text_wrapper,
             inputs=[text_input, mode_selection],
             outputs=[output_html, output_sentences, output_result]
         )
-        # Connect file upload to automatically process when changed
         file_upload.change(
             handle_file_upload_wrapper,
             inputs=[file_upload, mode_selection],
             outputs=[output_html, output_sentences, output_result]
         )
-        # Add custom CSS for alignment and styling
         gr.HTML("""
         <style>
-        /* Make file upload more compact */
-        .compact-file-upload {
-            max-width: 100%;
         }
-        .compact-file-upload > .wrap {
-            margin: 0;
-            padding: 0;
         }
-        .compact-file-upload .file-preview {
-            min-height: 0;
         }
-        /* Align file upload with radio buttons */
-        .file-upload-container {
-            display: flex;
-            align-items: flex-end;
-            justify-content: center;
-            padding-bottom: 10px;
         }
-        /* Make description text smaller */
-        .description-text {
-            font-size: 0.85em;
-            color: #666;
-            margin-top: -5px;
-            margin-bottom: 5px;
         }
         </style>
         """)
     return demo
-# This function is a replacement for the original main app setup
-def setup_app_with_ocr():
-    """
-    Setup the application with OCR capabilities
-    """
-    # Initialize the classifier (uses the fixed class)
-    classifier = TextClassifier()
-    # Create the Gradio interface with file upload functionality
-    demo = setup_gradio_interface(classifier)
     # Get the FastAPI app from Gradio
     app = demo.app
-    # Add CORS middleware (same as original code)
-    from fastapi.middleware.cors import CORSMiddleware
     app.add_middleware(
         CORSMiddleware,
         allow_origins=["*"],  # For development
@@ -950,14 +893,11 @@ def setup_app_with_ocr():
         allow_headers=["*"],
     )
-    # Return the demo for launching
     return demo
 # Initialize the application
 if __name__ == "__main__":
-    # Create the app with OCR functionality
-    demo = setup_app_with_ocr()
     # Start the server
     demo.queue()

     def process_file(self, file_path: str) -> Dict:
         """
         Process a file using OCR.space API
         """
         start_time = time.time()
         ocr_logger.info(f"Starting OCR processing for file: {os.path.basename(file_path)}")
         file_type = self._get_file_type(file_path)
         ocr_logger.info(f"Detected file type: {file_type}")
         # Prepare the API request
         with open(file_path, 'rb') as f:
             file_data = f.read()
     def _extract_text_from_result(self, result: Dict) -> str:
         """
         Extract all text from the OCR API result
         """
         extracted_text = ""
     def _get_file_type(self, file_path: str) -> str:
         """
         Determine MIME type of a file
         """
         mime_type, _ = mimetypes.guess_type(file_path)
         if mime_type is None:
             return 'application/octet-stream'
         return mime_type
 def is_admin_password(input_text: str) -> bool:
     """
     Check if the input text matches the admin password using secure hash comparison.
     """
     # Hash the input text
     input_hash = hashlib.sha256(input_text.strip().encode()).hexdigest()
     # Compare hashes (constant-time comparison to prevent timing attacks)
     return input_hash == ADMIN_PASSWORD_HASH
 class TextWindowProcessor:
     def __init__(self):
         try:
         return windows, window_sentence_indices
 class TextClassifier:
     def __init__(self):
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.model_name = MODEL_NAME
         self.tokenizer = None
         self.model.eval()
+    # [Other TextClassifier methods remain the same as in paste.txt]
     def quick_scan(self, text: str) -> Dict:
         """Perform a quick scan using simple window analysis."""
         if not text.strip():
             'num_sentences': num_sentences
         }
 # Function to handle file upload, OCR processing, and text analysis
 def handle_file_upload_and_analyze(file_obj, mode: str, classifier) -> tuple:
     """
     Handle file upload, OCR processing, and text analysis
     """
     if file_obj is None:
         return (
         )
     # Create a temporary file with an appropriate extension based on content
     content_start = file_obj[:20]  # Look at the first few bytes
     # Default to .bin extension
         file_ext = ".png"
     elif content_start.startswith(b'GIF'):      # GIF
         file_ext = ".gif"
     # Create a temporary file with the detected extension
     with tempfile.NamedTemporaryFile(delete=False, suffix=file_ext) as temp_file:
         if os.path.exists(temp_file_path):
             os.remove(temp_file_path)
 def initialize_excel_log():
     """Initialize the Excel log file if it doesn't exist."""
     if not os.path.exists(EXCEL_LOG_PATH):
             overall_result
         )
+# Initialize the classifier globally
+classifier = TextClassifier()
+# Create Gradio interface with a small file upload button next to the radio buttons
+def setup_interface():
     # Create analyzer functions that capture the classifier
     def analyze_text_wrapper(text, mode):
         return analyze_text(text, mode, classifier)
             return analyze_text_wrapper("", mode)  # Return empty analysis
         return handle_file_upload_and_analyze(file_obj, mode, classifier)
+    # Create the interface similar to the original but with a small file upload button
     with gr.Blocks(title="AI Text Detector") as demo:
         gr.Markdown("# AI Text Detector")
         with gr.Row():
+            # Left column for input
             with gr.Column():
                 text_input = gr.Textbox(
+                    lines=8,
                     placeholder="Enter text to analyze...",
                     label="Input Text"
                 )
                 with gr.Row():
+                    # Mode selection (same as original)
+                    mode_selection = gr.Radio(
+                        choices=["quick", "detailed"],
+                        value="quick",
+                        label="Analysis Mode",
+                        info="Quick mode for faster analysis. Detailed mode for sentence-level analysis."
+                    )
+                    # Small file upload button (like the Claude paperclip)
+                    file_upload = gr.File(
+                        label="",
+                        file_types=["image", "pdf", "doc", "docx"],
+                        type="binary",
+                        elem_classes=["small-file-upload"]
+                    )
                 analyze_button = gr.Button("Analyze Text")
+            # Right column for output
             with gr.Column():
                 output_html = gr.HTML(label="Highlighted Analysis")
                 output_sentences = gr.Textbox(label="Sentence-by-Sentence Analysis", lines=10)
                 output_result = gr.Textbox(label="Overall Result", lines=4)
+        # Connect the components
         analyze_button.click(
             analyze_text_wrapper,
             inputs=[text_input, mode_selection],
             outputs=[output_html, output_sentences, output_result]
         )
         file_upload.change(
             handle_file_upload_wrapper,
             inputs=[file_upload, mode_selection],
             outputs=[output_html, output_sentences, output_result]
         )
+        # Custom CSS to style the file upload button like a small paperclip
         gr.HTML("""
         <style>
+        /* Make the file upload small and positioned correctly */
+        .small-file-upload {
+            width: 40px !important;
+            margin-left: 10px !important;
+            margin-top: 15px !important;
         }
+        .small-file-upload > .wrap {
+            padding: 0 !important;
+            margin: 0 !important;
         }
+        .small-file-upload .file-preview {
+            min-height: 0 !important;
+            padding: 0 !important;
         }
+        /* Make file upload look like a paperclip icon */
+        .small-file-upload .icon {
+            font-size: 1.2em !important;
+            opacity: 0.7 !important;
         }
+        .small-file-upload .upload-button {
+            border-radius: 50% !important;
+            padding: 5px !important;
+            width: 30px !important;
+            height: 30px !important;
+            display: flex !important;
+            align-items: center !important;
+            justify-content: center !important;
+        }
+        .small-file-upload .upload-button:hover {
+            background-color: #f0f0f0 !important;
         }
         </style>
         """)
     return demo
+# Setup the app with CORS middleware
+def setup_app():
+    demo = setup_interface()
     # Get the FastAPI app from Gradio
     app = demo.app
+    # Add CORS middleware
     app.add_middleware(
         CORSMiddleware,
         allow_origins=["*"],  # For development
         allow_headers=["*"],
     )
     return demo
 # Initialize the application
 if __name__ == "__main__":
+    demo = setup_app()
     # Start the server
     demo.queue()