Update app.py
Browse files
app.py
CHANGED
|
@@ -12,12 +12,6 @@ from concurrent.futures import ThreadPoolExecutor
|
|
| 12 |
from functools import partial
|
| 13 |
import time
|
| 14 |
from datetime import datetime
|
| 15 |
-
import openpyxl
|
| 16 |
-
from openpyxl import Workbook
|
| 17 |
-
from openpyxl.utils import get_column_letter
|
| 18 |
-
from io import BytesIO
|
| 19 |
-
import base64
|
| 20 |
-
import hashlib
|
| 21 |
|
| 22 |
# Configure logging
|
| 23 |
logging.basicConfig(level=logging.INFO)
|
|
@@ -32,26 +26,6 @@ CONFIDENCE_THRESHOLD = 0.65
|
|
| 32 |
BATCH_SIZE = 8 # Reduced batch size for CPU
|
| 33 |
MAX_WORKERS = 4 # Number of worker threads for processing
|
| 34 |
|
| 35 |
-
# Get password hash from environment variable (more secure)
|
| 36 |
-
ADMIN_PASSWORD_HASH = os.environ.get('ADMIN_PASSWORD_HASH')
|
| 37 |
-
|
| 38 |
-
if not ADMIN_PASSWORD_HASH:
|
| 39 |
-
ADMIN_PASSWORD_HASH = "5e22d1ed71b273b1b2b5331f2d3e0f6cf34595236f201c6924d6bc81de27cdcb"
|
| 40 |
-
|
| 41 |
-
# Excel file path for logs
|
| 42 |
-
EXCEL_LOG_PATH = "/tmp/prediction_logs.xlsx"
|
| 43 |
-
|
| 44 |
-
def is_admin_password(input_text: str) -> bool:
|
| 45 |
-
"""
|
| 46 |
-
Check if the input text matches the admin password using secure hash comparison.
|
| 47 |
-
This prevents the password from being visible in the source code.
|
| 48 |
-
"""
|
| 49 |
-
# Hash the input text
|
| 50 |
-
input_hash = hashlib.sha256(input_text.strip().encode()).hexdigest()
|
| 51 |
-
|
| 52 |
-
# Compare hashes (constant-time comparison to prevent timing attacks)
|
| 53 |
-
return input_hash == ADMIN_PASSWORD_HASH
|
| 54 |
-
|
| 55 |
class TextWindowProcessor:
|
| 56 |
def __init__(self):
|
| 57 |
try:
|
|
@@ -354,133 +328,8 @@ class TextClassifier:
|
|
| 354 |
'num_sentences': num_sentences
|
| 355 |
}
|
| 356 |
|
| 357 |
-
def initialize_excel_log():
|
| 358 |
-
"""Initialize the Excel log file if it doesn't exist."""
|
| 359 |
-
if not os.path.exists(EXCEL_LOG_PATH):
|
| 360 |
-
wb = Workbook()
|
| 361 |
-
ws = wb.active
|
| 362 |
-
ws.title = "Prediction Logs"
|
| 363 |
-
|
| 364 |
-
# Set column headers
|
| 365 |
-
headers = ["timestamp", "word_count", "prediction", "confidence",
|
| 366 |
-
"execution_time_ms", "analysis_mode", "full_text"]
|
| 367 |
-
|
| 368 |
-
for col_num, header in enumerate(headers, 1):
|
| 369 |
-
ws.cell(row=1, column=col_num, value=header)
|
| 370 |
-
|
| 371 |
-
# Adjust column widths for better readability
|
| 372 |
-
ws.column_dimensions[get_column_letter(1)].width = 20 # timestamp
|
| 373 |
-
ws.column_dimensions[get_column_letter(2)].width = 10 # word_count
|
| 374 |
-
ws.column_dimensions[get_column_letter(3)].width = 10 # prediction
|
| 375 |
-
ws.column_dimensions[get_column_letter(4)].width = 10 # confidence
|
| 376 |
-
ws.column_dimensions[get_column_letter(5)].width = 15 # execution_time_ms
|
| 377 |
-
ws.column_dimensions[get_column_letter(6)].width = 15 # analysis_mode
|
| 378 |
-
ws.column_dimensions[get_column_letter(7)].width = 100 # full_text
|
| 379 |
-
|
| 380 |
-
# Save the workbook
|
| 381 |
-
wb.save(EXCEL_LOG_PATH)
|
| 382 |
-
logger.info(f"Initialized Excel log file at {EXCEL_LOG_PATH}")
|
| 383 |
-
|
| 384 |
-
def log_prediction_data(input_text, word_count, prediction, confidence, execution_time, mode):
|
| 385 |
-
"""Log prediction data to an Excel file in the /tmp directory."""
|
| 386 |
-
# Initialize the Excel file if it doesn't exist
|
| 387 |
-
if not os.path.exists(EXCEL_LOG_PATH):
|
| 388 |
-
initialize_excel_log()
|
| 389 |
-
|
| 390 |
-
try:
|
| 391 |
-
# Load the existing workbook
|
| 392 |
-
wb = openpyxl.load_workbook(EXCEL_LOG_PATH)
|
| 393 |
-
ws = wb.active
|
| 394 |
-
|
| 395 |
-
# Get the next row number
|
| 396 |
-
next_row = ws.max_row + 1
|
| 397 |
-
|
| 398 |
-
# Clean up the input text for Excel storage (replace problematic characters)
|
| 399 |
-
cleaned_text = input_text.replace("\n", " ")
|
| 400 |
-
|
| 401 |
-
# Prepare row data
|
| 402 |
-
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
| 403 |
-
row_data = [
|
| 404 |
-
timestamp,
|
| 405 |
-
word_count,
|
| 406 |
-
prediction,
|
| 407 |
-
f"{confidence:.2f}",
|
| 408 |
-
f"{execution_time:.2f}",
|
| 409 |
-
mode,
|
| 410 |
-
cleaned_text
|
| 411 |
-
]
|
| 412 |
-
|
| 413 |
-
# Add the data to the worksheet
|
| 414 |
-
for col_num, value in enumerate(row_data, 1):
|
| 415 |
-
ws.cell(row=next_row, column=col_num, value=value)
|
| 416 |
-
|
| 417 |
-
# Save the workbook
|
| 418 |
-
wb.save(EXCEL_LOG_PATH)
|
| 419 |
-
logger.info(f"Successfully logged prediction data to {EXCEL_LOG_PATH}")
|
| 420 |
-
return True
|
| 421 |
-
|
| 422 |
-
except Exception as e:
|
| 423 |
-
logger.error(f"Error logging prediction data to Excel: {str(e)}")
|
| 424 |
-
return False
|
| 425 |
-
|
| 426 |
-
def get_logs_as_base64():
|
| 427 |
-
"""Read the Excel logs file and return as base64 for downloading."""
|
| 428 |
-
if not os.path.exists(EXCEL_LOG_PATH):
|
| 429 |
-
return None
|
| 430 |
-
|
| 431 |
-
try:
|
| 432 |
-
# Read the Excel file into memory
|
| 433 |
-
with open(EXCEL_LOG_PATH, "rb") as f:
|
| 434 |
-
file_data = f.read()
|
| 435 |
-
|
| 436 |
-
# Encode the file as base64
|
| 437 |
-
base64_data = base64.b64encode(file_data).decode('utf-8')
|
| 438 |
-
return base64_data
|
| 439 |
-
|
| 440 |
-
except Exception as e:
|
| 441 |
-
logger.error(f"Error reading Excel logs: {str(e)}")
|
| 442 |
-
return None
|
| 443 |
-
|
| 444 |
def analyze_text(text: str, mode: str, classifier: TextClassifier) -> tuple:
|
| 445 |
"""Analyze text using specified mode and return formatted results."""
|
| 446 |
-
# Check if the input text matches the admin password using secure comparison
|
| 447 |
-
if is_admin_password(text.strip()):
|
| 448 |
-
# Return logs instead of analysis
|
| 449 |
-
base64_data = get_logs_as_base64()
|
| 450 |
-
logs_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 451 |
-
logs_filename = f"prediction_logs_{logs_timestamp}.xlsx"
|
| 452 |
-
|
| 453 |
-
if base64_data:
|
| 454 |
-
# Create downloadable HTML with the logs
|
| 455 |
-
html_content = f"""
|
| 456 |
-
<div style="background-color: #e6f7ff; padding: 15px; border-radius: 5px;">
|
| 457 |
-
<h3>Admin Access Granted - Prediction Logs</h3>
|
| 458 |
-
<p>Logs retrieved at: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}</p>
|
| 459 |
-
<p>Excel file contains all prediction data with full text of all submissions.</p>
|
| 460 |
-
<a href="data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64,{base64_data}"
|
| 461 |
-
download="{logs_filename}"
|
| 462 |
-
style="display: inline-block; margin-top: 10px; padding: 10px 15px;
|
| 463 |
-
background-color: #4CAF50; color: white; text-decoration: none;
|
| 464 |
-
border-radius: 4px;">
|
| 465 |
-
Download Excel Logs
|
| 466 |
-
</a>
|
| 467 |
-
</div>
|
| 468 |
-
"""
|
| 469 |
-
else:
|
| 470 |
-
html_content = """
|
| 471 |
-
<div style="background-color: #ffe6e6; padding: 15px; border-radius: 5px;">
|
| 472 |
-
<h3>Admin Access Granted - No Logs Found</h3>
|
| 473 |
-
<p>No prediction logs were found or there was an error reading the logs file.</p>
|
| 474 |
-
</div>
|
| 475 |
-
"""
|
| 476 |
-
|
| 477 |
-
# Return special admin output instead of normal analysis
|
| 478 |
-
return (
|
| 479 |
-
html_content,
|
| 480 |
-
f"Admin access granted. Logs retrieved at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
|
| 481 |
-
f"ADMIN MODE\nLogs available for download\nFile: {EXCEL_LOG_PATH}"
|
| 482 |
-
)
|
| 483 |
-
|
| 484 |
# Start timing for normal analysis
|
| 485 |
start_time = time.time()
|
| 486 |
|
|
@@ -508,16 +357,6 @@ def analyze_text(text: str, mode: str, classifier: TextClassifier) -> tuple:
|
|
| 508 |
# Calculate execution time in milliseconds
|
| 509 |
execution_time = (time.time() - start_time) * 1000
|
| 510 |
|
| 511 |
-
# Log the prediction data
|
| 512 |
-
log_prediction_data(
|
| 513 |
-
input_text=text,
|
| 514 |
-
word_count=word_count,
|
| 515 |
-
prediction=result['prediction'],
|
| 516 |
-
confidence=result['confidence'],
|
| 517 |
-
execution_time=execution_time,
|
| 518 |
-
mode=original_mode
|
| 519 |
-
)
|
| 520 |
-
|
| 521 |
return (
|
| 522 |
text, # No highlighting in quick mode
|
| 523 |
"Quick scan mode - no sentence-level analysis available",
|
|
@@ -544,16 +383,6 @@ def analyze_text(text: str, mode: str, classifier: TextClassifier) -> tuple:
|
|
| 544 |
# Calculate execution time in milliseconds
|
| 545 |
execution_time = (time.time() - start_time) * 1000
|
| 546 |
|
| 547 |
-
# Log the prediction data
|
| 548 |
-
log_prediction_data(
|
| 549 |
-
input_text=text,
|
| 550 |
-
word_count=word_count,
|
| 551 |
-
prediction=final_pred['prediction'],
|
| 552 |
-
confidence=final_pred['confidence'],
|
| 553 |
-
execution_time=execution_time,
|
| 554 |
-
mode=original_mode
|
| 555 |
-
)
|
| 556 |
-
|
| 557 |
return (
|
| 558 |
analysis['highlighted_text'],
|
| 559 |
"\n".join(detailed_analysis),
|
|
@@ -609,5 +438,4 @@ if __name__ == "__main__":
|
|
| 609 |
server_name="0.0.0.0",
|
| 610 |
server_port=7860,
|
| 611 |
share=True
|
| 612 |
-
)
|
| 613 |
-
|
|
|
|
| 12 |
from functools import partial
|
| 13 |
import time
|
| 14 |
from datetime import datetime
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
# Configure logging
|
| 17 |
logging.basicConfig(level=logging.INFO)
|
|
|
|
| 26 |
BATCH_SIZE = 8 # Reduced batch size for CPU
|
| 27 |
MAX_WORKERS = 4 # Number of worker threads for processing
|
| 28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
class TextWindowProcessor:
|
| 30 |
def __init__(self):
|
| 31 |
try:
|
|
|
|
| 328 |
'num_sentences': num_sentences
|
| 329 |
}
|
| 330 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 331 |
def analyze_text(text: str, mode: str, classifier: TextClassifier) -> tuple:
|
| 332 |
"""Analyze text using specified mode and return formatted results."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 333 |
# Start timing for normal analysis
|
| 334 |
start_time = time.time()
|
| 335 |
|
|
|
|
| 357 |
# Calculate execution time in milliseconds
|
| 358 |
execution_time = (time.time() - start_time) * 1000
|
| 359 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 360 |
return (
|
| 361 |
text, # No highlighting in quick mode
|
| 362 |
"Quick scan mode - no sentence-level analysis available",
|
|
|
|
| 383 |
# Calculate execution time in milliseconds
|
| 384 |
execution_time = (time.time() - start_time) * 1000
|
| 385 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 386 |
return (
|
| 387 |
analysis['highlighted_text'],
|
| 388 |
"\n".join(detailed_analysis),
|
|
|
|
| 438 |
server_name="0.0.0.0",
|
| 439 |
server_port=7860,
|
| 440 |
share=True
|
| 441 |
+
)
|
|
|