Spaces:

HackathonCRA
/

ChatBot

Runtime error

App Files Files Community

Eddyhzd commited on Sep 18

Commit

f344af7

1 Parent(s): 2d000cd

test

Browse files

Files changed (4) hide show

app.py +63 -10
data_loader.py +269 -0
mcp_server.py +296 -0
serveur_mcp.py +0 -45

app.py CHANGED Viewed

@@ -1,13 +1,66 @@
-import asyncio
-from mcp.client import MCPClient
-MCP_SERVER_URL = "https://hackathoncra-gradio-mcp.hf.space/gradio_api/mcp/"
-async def list_tools():
-    client = await MCPClient.connect(MCP_SERVER_URL)
-    tools = await client.list_tools()
-    print("Tools disponibles sur le serveur MCP :")
-    for tool in tools:
-        print(f"- {tool['name']} : {tool.get('description', '')}")
-asyncio.run(list_tools())

+import gradio as gr
+from openai import OpenAI
+import os
+from mcp import ClientSession, StdioServerParameters
+from mcp.client.stdio import stdio_client
+cle_api = os.environ.get("CLE_API_MISTRAL")
+# Initialisation du client Mistral (API compatible OpenAI)
+client = OpenAI(api_key=cle_api, base_url="https://api.mistral.ai/v1")
+# Chatbot : simple écho Fonction chatbot reliée à Mistral
+def chatbot(message, history):
+    # Préparer l’historique dans le format de Mistral
+    messages = []
+    for user_msg, bot_msg in history:
+        messages.append({"role": "user", "content": user_msg})
+        messages.append({"role": "assistant", "content": bot_msg})
+    messages.append({"role": "user", "content": message})
+    # Appel API Mistral
+    response = client.chat.completions.create(
+        model="mistral-small-latest",
+        messages=messages
+    )
+    bot_reply = response.choices[0].message.content.strip()
+    history.append(("Vous: " + message, "Bot: " + bot_reply))
+    return history, history
+def connect(self) -> str:
+    server_params = StdioServerParameters(
+        command='python',
+        args=['mcp_server.py'],
+        env={"PYTHONIOENCODING": "utf-8", "PYTHONUNBUFFERED": "1"}
+    )
+    stdio_transport = self.exit_stack.enter_async_context(stdio_client(server_params))
+    self.stdio, self.write = stdio_transport
+    self.session = self.exit_stack.enter_async_context(ClientSession(self.stdio, self.write))
+    self.session.initialize()
+    response = self.session.list_tools()
+    self.tools = [{
+        "name": tool.name,
+        "description": tool.description,
+        "input_schema": tool.inputSchema
+    } for tool in response.tools]
+    tool_names = [tool["name"] for tool in self.tools]
+    return f"Connected to MCP server. Available tools: {', '.join(tool_names)}"
+with gr.Blocks() as demo:
+    print(connect())
+    chatbot_ui = gr.Chatbot(label="ChatBot")
+    msg = gr.Textbox(placeholder="Écrivez un message...")
+    msg.submit(chatbot, [msg, chatbot_ui], [chatbot_ui, chatbot_ui])
+demo.launch(mcp_server=True)

data_loader.py ADDED Viewed

	@@ -0,0 +1,269 @@

+"""
+Data loader for agricultural intervention data.
+Loads data exclusively from Hugging Face datasets.
+"""
+import pandas as pd
+import numpy as np
+from typing import List, Optional
+import os
+from datasets import Dataset, load_dataset
+from huggingface_hub import HfApi, hf_hub_download
+class AgriculturalDataLoader:
+    """Loads and preprocesses agricultural intervention data from Hugging Face datasets."""
+    def __init__(self, hf_token: str = None, dataset_id: str = None):
+        self.hf_token = hf_token or os.environ.get("HF_TOKEN")
+        self.dataset_id = dataset_id or "HackathonCRA/2024"
+        self.data_cache = {}
+    def load_all_files(self) -> pd.DataFrame:
+        """Load data from Hugging Face dataset."""
+        if 'combined_data' in self.data_cache:
+            return self.data_cache['combined_data']
+        # Load from Hugging Face only
+        df = self.load_from_huggingface()
+        self.data_cache['combined_data'] = df
+        return df
+    def load_from_huggingface(self) -> pd.DataFrame:
+        """Load data from Hugging Face dataset."""
+        print(f"🤗 Loading dataset from Hugging Face: {self.dataset_id}")
+        try:
+            # Try multiple loading strategies
+            df = None
+            # Strategy 1: Try direct dataset loading
+            try:
+                dataset = load_dataset(
+                    self.dataset_id,
+                    token=self.hf_token,
+                    streaming=False
+                )
+                df = dataset["train"].to_pandas()
+                print(f"✅ Loaded via load_dataset: {len(df)} records")
+            except Exception as e1:
+                print(f"⚠️  load_dataset failed: {e1}")
+                # Strategy 2: Load individual CSV files from HF Hub
+                try:
+                    df = self._load_csv_files_from_hub()
+                    print(f"✅ Loaded via individual CSV files: {len(df)} records")
+                except Exception as e2:
+                    print(f"⚠️  CSV loading failed: {e2}")
+                    raise ValueError(f"All loading strategies failed. Dataset: {e1}, CSV: {e2}")
+            if df is None or len(df) == 0:
+                raise ValueError("No data loaded from any strategy")
+            # Apply preprocessing
+            df = self._preprocess_data(df)
+            print(f"✅ Successfully processed {len(df)} records from Hugging Face")
+            return df
+        except Exception as e:
+            raise ValueError(f"Failed to load dataset from Hugging Face: {e}")
+    def _load_csv_files_from_hub(self) -> pd.DataFrame:
+        """Load individual CSV files from Hugging Face Hub."""
+        from huggingface_hub import hf_hub_download
+        import tempfile
+        print("📂 Loading individual CSV files from HF Hub...")
+        # Get list of CSV files
+        api = HfApi()
+        try:
+            repo_info = api.repo_info(repo_id=self.dataset_id, repo_type="dataset", token=self.hf_token)
+            csv_files = [f.rfilename for f in repo_info.siblings if f.rfilename.endswith('.csv')]
+        except Exception as e:
+            raise ValueError(f"Failed to get repo info: {e}")
+        if not csv_files:
+            raise ValueError("No CSV files found in the dataset repository")
+        print(f"📋 Found {len(csv_files)} CSV files")
+        all_dataframes = []
+        for csv_file in csv_files:
+            try:
+                # Download CSV file to temporary location
+                local_path = hf_hub_download(
+                    repo_id=self.dataset_id,
+                    filename=csv_file,
+                    repo_type="dataset",
+                    token=self.hf_token
+                )
+                # Read CSV with appropriate settings
+                # First, let's check if we need to skip the first row
+                df = pd.read_csv(local_path)
+                # If the first row contains "Interventions (sortie sous excel)", skip it
+                if df.columns[0].startswith('Interventions'):
+                    df = pd.read_csv(local_path)
+                all_dataframes.append(df)
+                print(f"  ✅ {csv_file}: {len(df)} rows")
+            except Exception as e:
+                print(f"  ⚠️  Failed to load {csv_file}: {e}")
+                continue
+        if not all_dataframes:
+            raise ValueError("No CSV files could be loaded successfully")
+        # Combine all dataframes
+        combined_df = pd.concat(all_dataframes, ignore_index=True)
+        return combined_df
+    def _preprocess_data(self, df: pd.DataFrame) -> pd.DataFrame:
+        """Preprocess the agricultural data."""
+        print(f"🔧 Preprocessing {len(df)} records...")
+        print(f"📋 Available columns: {list(df.columns)}")
+        # Convert date columns
+        date_columns = ['datedebut', 'datefin']
+        for col in date_columns:
+            if col in df.columns:
+                df[col] = pd.to_datetime(df[col], format='%d/%m/%y', errors='coerce')
+        # Convert numeric columns
+        numeric_columns = ['surfparc', 'quantitetot', 'neffqte', 'peffqte', 'kqte',
+                          'teneurn', 'teneurp', 'teneurk', 'keq', 'volumebo']
+        for col in numeric_columns:
+            if col in df.columns:
+                df[col] = pd.to_numeric(df[col], errors='coerce')
+        # Add derived columns (with error checking)
+        if 'millesime' in df.columns:
+            df['year'] = df['millesime']
+        else:
+            print("⚠️  Column 'millesime' not found, trying to infer year from filename or date")
+            # Try to extract year from date if available
+            if 'datedebut' in df.columns:
+                df['year'] = pd.to_datetime(df['datedebut'], errors='coerce').dt.year
+            else:
+                # Set a default year or raise error
+                print("❌ Cannot determine year - setting to 2024 as default")
+                df['year'] = 2024
+        if 'libelleusag' in df.columns:
+            df['crop_type'] = df['libelleusag']
+        else:
+            df['crop_type'] = 'unknown'
+        if 'libevenem' in df.columns:
+            df['intervention_type'] = df['libevenem']
+        else:
+            df['intervention_type'] = 'unknown'
+        if 'familleprod' in df.columns:
+            df['product_family'] = df['familleprod']
+            # Calculate IFT (Treatment Frequency Index) for herbicides
+            df['is_herbicide'] = df['familleprod'].str.contains('Herbicides', na=False)
+            df['is_fungicide'] = df['familleprod'].str.contains('Fongicides', na=False)
+            df['is_insecticide'] = df['familleprod'].str.contains('Insecticides', na=False)
+        else:
+            df['product_family'] = 'unknown'
+            df['is_herbicide'] = False
+            df['is_fungicide'] = False
+            df['is_insecticide'] = False
+        if 'nomparc' in df.columns:
+            df['plot_name'] = df['nomparc']
+        else:
+            df['plot_name'] = 'unknown'
+        if 'numparcell' in df.columns:
+            df['plot_number'] = df['numparcell']
+        else:
+            df['plot_number'] = 0
+        if 'surfparc' in df.columns:
+            df['plot_surface'] = df['surfparc']
+        else:
+            df['plot_surface'] = 1.0
+        print(f"✅ Preprocessing completed: {len(df)} records with {len(df.columns)} columns")
+        return df
+    def get_years_available(self) -> List[int]:
+        """Get list of available years in the data."""
+        df = self.load_all_files()
+        return sorted(df['year'].dropna().unique().astype(int).tolist())
+    def get_plots_available(self) -> List[str]:
+        """Get list of available plots."""
+        df = self.load_all_files()
+        return sorted(df['plot_name'].dropna().unique().tolist())
+    def get_crops_available(self) -> List[str]:
+        """Get list of available crop types."""
+        df = self.load_all_files()
+        return sorted(df['crop_type'].dropna().unique().tolist())
+    def filter_data(self,
+                   years: Optional[List[int]] = None,
+                   plots: Optional[List[str]] = None,
+                   crops: Optional[List[str]] = None,
+                   intervention_types: Optional[List[str]] = None) -> pd.DataFrame:
+        """Filter the data based on criteria."""
+        df = self.load_all_files()
+        if years:
+            df = df[df['year'].isin(years)]
+        if plots:
+            df = df[df['plot_name'].isin(plots)]
+        if crops:
+            df = df[df['crop_type'].isin(crops)]
+        if intervention_types:
+            df = df[df['intervention_type'].isin(intervention_types)]
+        return df
+    def get_herbicide_usage(self, years: Optional[List[int]] = None) -> pd.DataFrame:
+        """Get herbicide usage data for weed pressure analysis."""
+        df = self.filter_data(years=years)
+        herbicide_data = df[df['is_herbicide'] == True].copy()
+        # Group by plot, year, and crop
+        usage_summary = herbicide_data.groupby(['plot_name', 'year', 'crop_type']).agg({
+            'quantitetot': 'sum',
+            'produit': 'count',  # Number of herbicide applications
+            'surfparc': 'first'
+        }).reset_index()
+        usage_summary.columns = ['plot_name', 'year', 'crop_type', 'total_quantity', 'num_applications', 'plot_surface']
+        usage_summary['ift_herbicide'] = usage_summary['num_applications'] / usage_summary['plot_surface']
+        return usage_summary
+    def upload_to_huggingface(self) -> str:
+        """Upload data to Hugging Face dataset."""
+        if not self.hf_token:
+            raise ValueError("HF_TOKEN not provided")
+        df = self.load_all_files()
+        dataset = Dataset.from_pandas(df)
+        # Upload to Hugging Face
+        dataset.push_to_hub(
+            repo_id=self.dataset_id,
+            token=self.hf_token,
+            private=False
+        )
+        return f"Data uploaded to {self.dataset_id}"
+    def clear_cache(self):
+        """Clear cached data to force reload from Hugging Face."""
+        self.data_cache.clear()
+        print("📋 Cache cleared - will reload from Hugging Face on next access")

mcp_server.py ADDED Viewed

	@@ -0,0 +1,296 @@

+"""MCP Server for Agricultural Weed Pressure Analysis"""
+import gradio as gr
+import pandas as pd
+import numpy as np
+import plotly.express as px
+from data_loader import AgriculturalDataLoader
+import warnings
+warnings.filterwarnings('ignore')
+class WeedPressureAnalyzer:
+    """Analyze weed pressure and recommend plots for sensitive crops."""
+    def __init__(self):
+        self.data_loader = AgriculturalDataLoader()
+        self.data_cache = None
+    def load_data(self):
+        if self.data_cache is None:
+            self.data_cache = self.data_loader.load_all_files()
+        return self.data_cache
+    def calculate_herbicide_ift(self, years=None):
+        """Calculate IFT for herbicides by plot and year."""
+        df = self.load_data()
+        if years:
+            df = df[df['year'].isin(years)]
+        herbicide_df = df[df['is_herbicide'] == True].copy()
+        if len(herbicide_df) == 0:
+            return pd.DataFrame()
+        ift_summary = herbicide_df.groupby(['plot_name', 'year', 'crop_type']).agg({
+            'produit': 'count',
+            'plot_surface': 'first',
+            'quantitetot': 'sum'
+        }).reset_index()
+        ift_summary['ift_herbicide'] = ift_summary['produit'] / ift_summary['plot_surface']
+        return ift_summary
+    def predict_weed_pressure(self, target_years=[2025, 2026, 2027]):
+        """Predict weed pressure for future years."""
+        ift_data = self.calculate_herbicide_ift()
+        if len(ift_data) == 0:
+            return pd.DataFrame()
+        predictions = []
+        for plot in ift_data['plot_name'].unique():
+            plot_data = ift_data[ift_data['plot_name'] == plot].sort_values('year')
+            if len(plot_data) < 2:
+                continue
+            years = plot_data['year'].values
+            ift_values = plot_data['ift_herbicide'].values
+            if len(years) > 1:
+                slope = np.polyfit(years, ift_values, 1)[0]
+                intercept = np.polyfit(years, ift_values, 1)[1]
+                for target_year in target_years:
+                    predicted_ift = slope * target_year + intercept
+                    predicted_ift = max(0, predicted_ift)
+                    if predicted_ift < 1.0:
+                        risk_level = "Faible"
+                    elif predicted_ift < 2.0:
+                        risk_level = "Modéré"
+                    else:
+                        risk_level = "Élevé"
+                    predictions.append({
+                        'plot_name': plot,
+                        'year': target_year,
+                        'predicted_ift': predicted_ift,
+                        'risk_level': risk_level,
+                        'recent_crops': ', '.join(plot_data['crop_type'].tail(3).unique()),
+                        'historical_avg_ift': plot_data['ift_herbicide'].mean()
+                    })
+        return pd.DataFrame(predictions)
+# Initialize analyzer
+analyzer = WeedPressureAnalyzer()
+def analyze_herbicide_trends(years_range, plot_filter):
+    """Analyze herbicide usage trends over time."""
+    try:
+        if len(years_range) == 2:
+            years = list(range(int(years_range[0]), int(years_range[1]) + 1))
+        else:
+            years = [int(y) for y in years_range]
+        ift_data = analyzer.calculate_herbicide_ift(years=years)
+        if len(ift_data) == 0:
+            return None, "Aucune donnée d'herbicides trouvée."
+        if plot_filter != "Toutes":
+            ift_data = ift_data[ift_data['plot_name'] == plot_filter]
+        fig = px.line(ift_data,
+                     x='year',
+                     y='ift_herbicide',
+                     color='plot_name',
+                     title=f'Évolution de l\'IFT Herbicides',
+                     labels={'ift_herbicide': 'IFT Herbicides', 'year': 'Année'})
+        summary = f"""
+📊 **Analyse de l'IFT Herbicides**
+**Statistiques:**
+- IFT moyen: {ift_data['ift_herbicide'].mean():.2f}
+- IFT maximum: {ift_data['ift_herbicide'].max():.2f}
+- Nombre de parcelles: {ift_data['plot_name'].nunique()}
+**Interprétation:**
+- IFT < 1.0: Pression faible ✅
+- IFT 1.0-2.0: Pression modérée ⚠️
+- IFT > 2.0: Pression élevée ❌
+        """
+        return fig, summary
+    except Exception as e:
+        return None, f"Erreur: {str(e)}"
+def predict_future_weed_pressure():
+    """Predict weed pressure for the next 3 years."""
+    try:
+        predictions = analyzer.predict_weed_pressure()
+        if len(predictions) == 0:
+            return None, "Impossible de générer des prédictions."
+        fig = px.bar(predictions,
+                    x='plot_name',
+                    y='predicted_ift',
+                    color='risk_level',
+                    facet_col='year',
+                    title='Prédiction Pression Adventices (2025-2027)',
+                    color_discrete_map={'Faible': 'green', 'Modéré': 'orange', 'Élevé': 'red'})
+        low_risk = len(predictions[predictions['risk_level'] == 'Faible'])
+        moderate_risk = len(predictions[predictions['risk_level'] == 'Modéré'])
+        high_risk = len(predictions[predictions['risk_level'] == 'Élevé'])
+        summary = f"""
+🔮 **Prédictions 2025-2027**
+**Répartition des risques:**
+- ✅ Risque faible: {low_risk} prédictions
+- ⚠️ Risque modéré: {moderate_risk} prédictions
+- ❌ Risque élevé: {high_risk} prédictions
+        """
+        return fig, summary
+    except Exception as e:
+        return None, f"Erreur: {str(e)}"
+def recommend_sensitive_crop_plots():
+    """Recommend plots for sensitive crops."""
+    try:
+        predictions = analyzer.predict_weed_pressure()
+        if len(predictions) == 0:
+            return None, "Aucune recommandation disponible."
+        suitable_plots = predictions[predictions['risk_level'] == "Faible"].copy()
+        if len(suitable_plots) > 0:
+            suitable_plots['recommendation_score'] = 100 - (suitable_plots['predicted_ift'] * 30)
+            suitable_plots = suitable_plots.sort_values('recommendation_score', ascending=False)
+            top_recommendations = suitable_plots.head(10)[['plot_name', 'year', 'predicted_ift', 'recommendation_score']]
+            summary = f"""
+🌱 **Recommandations Cultures Sensibles**
+**Top parcelles recommandées:**
+{top_recommendations.to_string(index=False)}
+**Critères:** IFT prédit < 1.0 (faible pression adventices)
+            """
+            fig = px.scatter(suitable_plots,
+                           x='predicted_ift',
+                           y='recommendation_score',
+                           color='year',
+                           hover_data=['plot_name'],
+                           title='Parcelles Recommandées pour Cultures Sensibles')
+            return fig, summary
+        else:
+            return None, "Aucune parcelle à faible risque identifiée."
+    except Exception as e:
+        return None, f"Erreur: {str(e)}"
+def generate_technical_alternatives(herbicide_family):
+    """Generate technical alternatives."""
+    summary = f"""
+🔄 **Alternatives aux {herbicide_family}**
+**🚜 Alternatives Mécaniques:**
+• Faux-semis répétés avant implantation
+• Binage mécanique en inter-rang
+• Herse étrille en post-levée précoce
+**🌾 Alternatives Culturales:**
+• Rotation longue avec prairie temporaire
+• Cultures intermédiaires piège à nitrates
+• Densité de semis optimisée
+**🧪 Alternatives Biologiques:**
+• Stimulateurs de défenses naturelles
+• Extraits végétaux (huiles essentielles)
+• Bioherbicides à base de champignons
+**📋 Plan d'Action:**
+1. Tester sur petites surfaces
+2. Former les équipes
+3. Suivre l'efficacité
+4. Documenter les résultats
+    """
+    return summary
+def get_available_plots():
+    """Get available plots."""
+    try:
+        plots = analyzer.data_loader.get_plots_available()
+        return ["Toutes"] + plots
+    except:
+        return ["Toutes"]
+# Create Gradio Interface
+def create_mcp_interface():
+    with gr.Blocks(title="🚜 Analyse Pression Adventices", theme=gr.themes.Soft()) as demo:
+        gr.Markdown("""
+        # 🚜 Analyse Pression Adventices - CRA Bretagne
+        Anticiper et réduire la pression des adventices pour optimiser les cultures sensibles (pois, haricot).
+        """)
+        with gr.Tabs():
+            with gr.Tab("📈 Analyse Tendances"):
+                with gr.Row():
+                    years_slider = gr.Slider(2014, 2024, value=[2020, 2024], step=1, label="Période")
+                    plot_dropdown = gr.Dropdown(choices=get_available_plots(), value="Toutes", label="Parcelle")
+                analyze_btn = gr.Button("🔍 Analyser", variant="primary")
+                with gr.Row():
+                    trends_plot = gr.Plot()
+                    trends_summary = gr.Markdown()
+                analyze_btn.click(analyze_herbicide_trends, [years_slider, plot_dropdown], [trends_plot, trends_summary])
+            with gr.Tab("🔮 Prédictions"):
+                predict_btn = gr.Button("🎯 Prédire 2025-2027", variant="primary")
+                with gr.Row():
+                    predictions_plot = gr.Plot()
+                    predictions_summary = gr.Markdown()
+                predict_btn.click(predict_future_weed_pressure, outputs=[predictions_plot, predictions_summary])
+            with gr.Tab("🌱 Recommandations"):
+                recommend_btn = gr.Button("🎯 Recommander Parcelles", variant="primary")
+                with gr.Row():
+                    recommendations_plot = gr.Plot()
+                    recommendations_summary = gr.Markdown()
+                recommend_btn.click(recommend_sensitive_crop_plots, outputs=[recommendations_plot, recommendations_summary])
+            with gr.Tab("🔄 Alternatives"):
+                herbicide_type = gr.Dropdown(["Herbicides", "Fongicides"], value="Herbicides", label="Type")
+                alternatives_btn = gr.Button("💡 Générer Alternatives", variant="primary")
+                alternatives_output = gr.Markdown()
+                alternatives_btn.click(generate_technical_alternatives, [herbicide_type], [alternatives_output])
+    return demo
+if __name__ == "__main__":
+    demo = create_mcp_interface()
+    demo.launch(server_name="0.0.0.0", server_port=7860, share=True)

serveur_mcp.py DELETED Viewed

@@ -1,45 +0,0 @@
-import pandas as pd
-from datasets import load_dataset
-from transformers import pipeline
-from mcp.server.fastmcp import FastMCPServer
-# Charger dataset Hugging Face privé
-dataset = load_dataset("HackathonCRA/2024", split="train")
-df = dataset.to_pandas()
-# Charger Mistral
-mistral = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.2", device_map="auto")
-# Créer serveur MCP
-server = FastMCPServer("csv_analyzer")
-@server.tool()
-def list_columns() -> list[str]:
-    """Retourne la liste des colonnes disponibles dans le CSV."""
-    return df.columns.tolist()
-@server.tool()
-def filter_rows(column: str, value: str, limit: int = 5) -> list[dict]:
-    """Retourne des lignes où column == value."""
-    if column not in df.columns:
-        return [{"error": f"Colonne {column} inexistante"}]
-    subset = df[df[column] == value].head(limit)
-    return subset.to_dict(orient="records")
-@server.tool()
-def analyze_data(question: str) -> str:
-    """Interprète les données CSV avec Mistral."""
-    # On résume rapidement le dataframe
-    sample = df.head(20).to_string()
-    prompt = f"""
-    Voici un extrait de données tabulaires :
-    {sample}
-    Question: {question}
-    Réponds de manière concise et claire.
-    """
-    output = mistral(prompt, max_new_tokens=256)[0]["generated_text"]
-    return output
-if __name__ == "__main__":
-    server.run()