Spaces:
Running
Running
File size: 5,827 Bytes
4e6c165 65cb2b8 4e6c165 583638c 4e6c165 a89b867 4e6c165 a89b867 4e6c165 583638c 4e6c165 583638c 4e6c165 65cb2b8 4e6c165 583638c 4e6c165 65cb2b8 4e6c165 583638c 65cb2b8 4e6c165 583638c 65cb2b8 583638c 65cb2b8 4e6c165 65cb2b8 4e6c165 65cb2b8 4e6c165 65cb2b8 4e6c165 65cb2b8 4e6c165 65cb2b8 a89b867 583638c 4e6c165 65cb2b8 4e6c165 65cb2b8 4e6c165 65cb2b8 4e6c165 a89b867 4e6c165 a89b867 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
import streamlit as st
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import Flow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from google.auth.transport.requests import Request
import pandas as pd
from datetime import datetime
import os
import json
import hashlib
import re
from io import BytesIO
SCOPES = ['https://www.googleapis.com/auth/spreadsheets', 'https://www.googleapis.com/auth/drive.file']
TOKEN_FILE = 'token.json'
def get_redirect_uri():
return st.get_option("server.baseUrlPath") or "http://localhost:8501"
def initiate_google_auth():
if not os.path.exists('client_secret.json'):
st.error("Google Sheets integration is not set up correctly.")
st.markdown(
"Please follow the setup guide for Google Sheets integration in the "
"[CyberScraper-2077 README](https://github.com/itsOwen/CyberScraper-2077/blob/main/README.md#setup-google-sheets-authentication)."
)
st.info("Once you've completed the setup, restart the application.")
return
flow = Flow.from_client_secrets_file(
'client_secret.json',
scopes=SCOPES,
redirect_uri=get_redirect_uri()
)
authorization_url, state = flow.authorization_url(prompt='consent')
st.session_state['oauth_state'] = state
st.markdown(f"Please visit this URL to authorize the application: [Auth URL]({authorization_url})")
st.info("After authorizing, you'll be redirected back to this app. Then you can proceed with uploading.")
def get_google_sheets_credentials():
if not os.path.exists('client_secret.json'):
return None
creds = None
if os.path.exists(TOKEN_FILE):
try:
creds = Credentials.from_authorized_user_file(TOKEN_FILE, SCOPES)
except Exception as e:
print(f"Error loading credentials from file: {str(e)}")
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
try:
creds.refresh(Request())
save_credentials(creds)
except Exception as e:
print(f"Error refreshing credentials: {str(e)}")
creds = None
else:
creds = None
if not creds:
if 'google_auth_token' in st.session_state:
try:
creds = Credentials.from_authorized_user_info(json.loads(st.session_state['google_auth_token']), SCOPES)
save_credentials(creds)
except Exception as e:
print(f"Error creating credentials from session state: {str(e)}")
return creds
def save_credentials(creds):
try:
with open(TOKEN_FILE, 'w') as token:
token.write(creds.to_json())
except Exception as e:
print(f"Error saving credentials: {str(e)}")
def clean_data_for_sheets(df):
def clean_value(val):
if pd.isna(val):
return ""
if isinstance(val, (int, float)):
return str(val)
return str(val).replace('\n', ' ').replace('\r', '')
for col in df.columns:
df[col] = df[col].map(clean_value)
if 'comments' in df.columns:
df['comments'] = df['comments'].astype(str)
return df
def upload_to_google_sheets(data):
creds = get_google_sheets_credentials()
if not creds:
return None
try:
service = build('sheets', 'v4', credentials=creds)
spreadsheet = {
'properties': {
'title': f"CyberScraper Data {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
}
}
spreadsheet = service.spreadsheets().create(body=spreadsheet, fields='spreadsheetId').execute()
spreadsheet_id = spreadsheet.get('spreadsheetId')
if isinstance(data, pd.DataFrame):
df = clean_data_for_sheets(data)
else:
return None
values = [df.columns.tolist()] + df.values.tolist()
body = {'values': values}
result = service.spreadsheets().values().update(
spreadsheetId=spreadsheet_id, range='Sheet1',
valueInputOption='RAW', body=body).execute()
return spreadsheet_id
except HttpError as error:
print(f"An HTTP error occurred: {error}")
return None
except Exception as e:
print(f"An error occurred: {str(e)}")
return None
def display_google_sheets_button(data, unique_key):
if not os.path.exists('client_secret.json'):
st.warning("Google Sheets integration is not set up.")
st.markdown(
"To enable Google Sheets integration, please follow the setup guide in the "
"[CyberScraper-2077 README](https://github.com/itsOwen/CyberScraper-2077/blob/main/README.md#setup-google-sheets-authentication)."
)
return
creds = get_google_sheets_credentials()
if not creds:
auth_button = '🔑 Authorize Google Sheets'
if st.button(auth_button, key=f"auth_sheets_{unique_key}", help="Authorize access to Google Sheets"):
initiate_google_auth()
else:
upload_button = '✅ Upload to Google Sheets'
if st.button(upload_button, key=f"upload_{unique_key}", help="Upload data to Google Sheets"):
with st.spinner("Uploading to Google Sheets..."):
spreadsheet_id = upload_to_google_sheets(data)
if spreadsheet_id:
st.success(f"Hey Choom! Data uploaded successfully. Spreadsheet ID: {spreadsheet_id}")
st.markdown(f"[Open Spreadsheet](https://docs.google.com/spreadsheets/d/{spreadsheet_id})")
else:
st.error("Failed to upload data to Google Sheets. Check the console for error details.")
|