File size: 5,827 Bytes
4e6c165
 
 
 
 
 
 
 
 
 
 
65cb2b8
 
4e6c165
 
583638c
4e6c165
 
 
 
 
a89b867
 
 
 
 
 
 
 
 
4e6c165
 
 
 
 
 
 
 
 
 
 
 
 
a89b867
 
 
4e6c165
583638c
4e6c165
583638c
4e6c165
65cb2b8
4e6c165
 
 
 
 
583638c
4e6c165
65cb2b8
4e6c165
583638c
 
 
 
 
 
 
 
 
65cb2b8
4e6c165
 
583638c
 
 
 
 
65cb2b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
583638c
65cb2b8
 
 
4e6c165
 
 
 
 
65cb2b8
4e6c165
 
 
 
 
 
 
65cb2b8
 
 
 
 
4e6c165
 
 
 
 
 
 
 
65cb2b8
4e6c165
 
65cb2b8
4e6c165
 
65cb2b8
a89b867
 
 
 
 
 
 
 
583638c
 
4e6c165
65cb2b8
4e6c165
 
 
65cb2b8
4e6c165
65cb2b8
4e6c165
a89b867
4e6c165
 
a89b867
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import streamlit as st
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import Flow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from google.auth.transport.requests import Request
import pandas as pd
from datetime import datetime
import os
import json
import hashlib
import re
from io import BytesIO

SCOPES = ['https://www.googleapis.com/auth/spreadsheets', 'https://www.googleapis.com/auth/drive.file']
TOKEN_FILE = 'token.json'

def get_redirect_uri():
    return st.get_option("server.baseUrlPath") or "http://localhost:8501"

def initiate_google_auth():
    if not os.path.exists('client_secret.json'):
        st.error("Google Sheets integration is not set up correctly.")
        st.markdown(
            "Please follow the setup guide for Google Sheets integration in the "
            "[CyberScraper-2077 README](https://github.com/itsOwen/CyberScraper-2077/blob/main/README.md#setup-google-sheets-authentication)."
        )
        st.info("Once you've completed the setup, restart the application.")
        return

    flow = Flow.from_client_secrets_file(
        'client_secret.json',
        scopes=SCOPES,
        redirect_uri=get_redirect_uri()
    )
    
    authorization_url, state = flow.authorization_url(prompt='consent')
    
    st.session_state['oauth_state'] = state
    st.markdown(f"Please visit this URL to authorize the application: [Auth URL]({authorization_url})")
    st.info("After authorizing, you'll be redirected back to this app. Then you can proceed with uploading.")

def get_google_sheets_credentials():
    if not os.path.exists('client_secret.json'):
        return None

    creds = None
    if os.path.exists(TOKEN_FILE):
        try:
            creds = Credentials.from_authorized_user_file(TOKEN_FILE, SCOPES)
        except Exception as e:
            print(f"Error loading credentials from file: {str(e)}")
    
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            try:
                creds.refresh(Request())
                save_credentials(creds)
            except Exception as e:
                print(f"Error refreshing credentials: {str(e)}")
                creds = None
        else:
            creds = None
    
    if not creds:
        if 'google_auth_token' in st.session_state:
            try:
                creds = Credentials.from_authorized_user_info(json.loads(st.session_state['google_auth_token']), SCOPES)
                save_credentials(creds)
            except Exception as e:
                print(f"Error creating credentials from session state: {str(e)}")
    return creds

def save_credentials(creds):
    try:
        with open(TOKEN_FILE, 'w') as token:
            token.write(creds.to_json())
    except Exception as e:
        print(f"Error saving credentials: {str(e)}")

def clean_data_for_sheets(df):
    def clean_value(val):
        if pd.isna(val):
            return ""
        if isinstance(val, (int, float)):
            return str(val)
        return str(val).replace('\n', ' ').replace('\r', '')

    for col in df.columns:
        df[col] = df[col].map(clean_value)

    if 'comments' in df.columns:
        df['comments'] = df['comments'].astype(str)

    return df

def upload_to_google_sheets(data):
    creds = get_google_sheets_credentials()
    if not creds:
        return None

    try:
        service = build('sheets', 'v4', credentials=creds)        
        spreadsheet = {
            'properties': {
                'title': f"CyberScraper Data {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
            }
        }
        spreadsheet = service.spreadsheets().create(body=spreadsheet, fields='spreadsheetId').execute()
        spreadsheet_id = spreadsheet.get('spreadsheetId')

        if isinstance(data, pd.DataFrame):
            df = clean_data_for_sheets(data)
        else:
            return None

        values = [df.columns.tolist()] + df.values.tolist()
        body = {'values': values}
        result = service.spreadsheets().values().update(
            spreadsheetId=spreadsheet_id, range='Sheet1',
            valueInputOption='RAW', body=body).execute()
        return spreadsheet_id
    except HttpError as error:
        print(f"An HTTP error occurred: {error}")
        return None
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        return None

def display_google_sheets_button(data, unique_key):
    if not os.path.exists('client_secret.json'):
        st.warning("Google Sheets integration is not set up.")
        st.markdown(
            "To enable Google Sheets integration, please follow the setup guide in the "
            "[CyberScraper-2077 README](https://github.com/itsOwen/CyberScraper-2077/blob/main/README.md#setup-google-sheets-authentication)."
        )
        return

    creds = get_google_sheets_credentials()
    if not creds:
        auth_button = '🔑 Authorize Google Sheets'
        if st.button(auth_button, key=f"auth_sheets_{unique_key}", help="Authorize access to Google Sheets"):
            initiate_google_auth()
    else:
        upload_button = '✅ Upload to Google Sheets'
        if st.button(upload_button, key=f"upload_{unique_key}", help="Upload data to Google Sheets"):
            with st.spinner("Uploading to Google Sheets..."):
                spreadsheet_id = upload_to_google_sheets(data)
                if spreadsheet_id:
                    st.success(f"Hey Choom! Data uploaded successfully. Spreadsheet ID: {spreadsheet_id}")
                    st.markdown(f"[Open Spreadsheet](https://docs.google.com/spreadsheets/d/{spreadsheet_id})")
                else:
                    st.error("Failed to upload data to Google Sheets. Check the console for error details.")