Spaces:
Runtime error
Runtime error
Eddyhzd
commited on
Commit
·
f344af7
1
Parent(s):
2d000cd
test
Browse files- app.py +63 -10
- data_loader.py +269 -0
- mcp_server.py +296 -0
- serveur_mcp.py +0 -45
app.py
CHANGED
|
@@ -1,13 +1,66 @@
|
|
| 1 |
-
import
|
| 2 |
-
from
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
-
|
| 5 |
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
tools = await client.list_tools()
|
| 9 |
-
print("Tools disponibles sur le serveur MCP :")
|
| 10 |
-
for tool in tools:
|
| 11 |
-
print(f"- {tool['name']} : {tool.get('description', '')}")
|
| 12 |
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from openai import OpenAI
|
| 3 |
+
import os
|
| 4 |
+
from mcp import ClientSession, StdioServerParameters
|
| 5 |
+
from mcp.client.stdio import stdio_client
|
| 6 |
|
| 7 |
+
cle_api = os.environ.get("CLE_API_MISTRAL")
|
| 8 |
|
| 9 |
+
# Initialisation du client Mistral (API compatible OpenAI)
|
| 10 |
+
client = OpenAI(api_key=cle_api, base_url="https://api.mistral.ai/v1")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
+
# Chatbot : simple écho Fonction chatbot reliée à Mistral
|
| 13 |
+
def chatbot(message, history):
|
| 14 |
+
# Préparer l’historique dans le format de Mistral
|
| 15 |
+
messages = []
|
| 16 |
+
for user_msg, bot_msg in history:
|
| 17 |
+
messages.append({"role": "user", "content": user_msg})
|
| 18 |
+
messages.append({"role": "assistant", "content": bot_msg})
|
| 19 |
+
|
| 20 |
+
messages.append({"role": "user", "content": message})
|
| 21 |
+
|
| 22 |
+
# Appel API Mistral
|
| 23 |
+
response = client.chat.completions.create(
|
| 24 |
+
model="mistral-small-latest",
|
| 25 |
+
messages=messages
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
bot_reply = response.choices[0].message.content.strip()
|
| 29 |
+
history.append(("Vous: " + message, "Bot: " + bot_reply))
|
| 30 |
+
return history, history
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def connect(self) -> str:
|
| 34 |
+
|
| 35 |
+
server_params = StdioServerParameters(
|
| 36 |
+
command='python',
|
| 37 |
+
args=['mcp_server.py'],
|
| 38 |
+
env={"PYTHONIOENCODING": "utf-8", "PYTHONUNBUFFERED": "1"}
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
stdio_transport = self.exit_stack.enter_async_context(stdio_client(server_params))
|
| 42 |
+
self.stdio, self.write = stdio_transport
|
| 43 |
+
|
| 44 |
+
self.session = self.exit_stack.enter_async_context(ClientSession(self.stdio, self.write))
|
| 45 |
+
self.session.initialize()
|
| 46 |
+
|
| 47 |
+
response = self.session.list_tools()
|
| 48 |
+
self.tools = [{
|
| 49 |
+
"name": tool.name,
|
| 50 |
+
"description": tool.description,
|
| 51 |
+
"input_schema": tool.inputSchema
|
| 52 |
+
} for tool in response.tools]
|
| 53 |
+
|
| 54 |
+
tool_names = [tool["name"] for tool in self.tools]
|
| 55 |
+
return f"Connected to MCP server. Available tools: {', '.join(tool_names)}"
|
| 56 |
+
|
| 57 |
+
with gr.Blocks() as demo:
|
| 58 |
+
|
| 59 |
+
print(connect())
|
| 60 |
+
|
| 61 |
+
chatbot_ui = gr.Chatbot(label="ChatBot")
|
| 62 |
+
msg = gr.Textbox(placeholder="Écrivez un message...")
|
| 63 |
+
|
| 64 |
+
msg.submit(chatbot, [msg, chatbot_ui], [chatbot_ui, chatbot_ui])
|
| 65 |
+
|
| 66 |
+
demo.launch(mcp_server=True)
|
data_loader.py
ADDED
|
@@ -0,0 +1,269 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Data loader for agricultural intervention data.
|
| 3 |
+
Loads data exclusively from Hugging Face datasets.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import pandas as pd
|
| 7 |
+
import numpy as np
|
| 8 |
+
from typing import List, Optional
|
| 9 |
+
import os
|
| 10 |
+
from datasets import Dataset, load_dataset
|
| 11 |
+
from huggingface_hub import HfApi, hf_hub_download
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class AgriculturalDataLoader:
|
| 15 |
+
"""Loads and preprocesses agricultural intervention data from Hugging Face datasets."""
|
| 16 |
+
|
| 17 |
+
def __init__(self, hf_token: str = None, dataset_id: str = None):
|
| 18 |
+
self.hf_token = hf_token or os.environ.get("HF_TOKEN")
|
| 19 |
+
self.dataset_id = dataset_id or "HackathonCRA/2024"
|
| 20 |
+
self.data_cache = {}
|
| 21 |
+
|
| 22 |
+
def load_all_files(self) -> pd.DataFrame:
|
| 23 |
+
"""Load data from Hugging Face dataset."""
|
| 24 |
+
if 'combined_data' in self.data_cache:
|
| 25 |
+
return self.data_cache['combined_data']
|
| 26 |
+
|
| 27 |
+
# Load from Hugging Face only
|
| 28 |
+
df = self.load_from_huggingface()
|
| 29 |
+
self.data_cache['combined_data'] = df
|
| 30 |
+
return df
|
| 31 |
+
|
| 32 |
+
def load_from_huggingface(self) -> pd.DataFrame:
|
| 33 |
+
"""Load data from Hugging Face dataset."""
|
| 34 |
+
print(f"🤗 Loading dataset from Hugging Face: {self.dataset_id}")
|
| 35 |
+
|
| 36 |
+
try:
|
| 37 |
+
# Try multiple loading strategies
|
| 38 |
+
df = None
|
| 39 |
+
|
| 40 |
+
# Strategy 1: Try direct dataset loading
|
| 41 |
+
try:
|
| 42 |
+
dataset = load_dataset(
|
| 43 |
+
self.dataset_id,
|
| 44 |
+
token=self.hf_token,
|
| 45 |
+
streaming=False
|
| 46 |
+
)
|
| 47 |
+
df = dataset["train"].to_pandas()
|
| 48 |
+
print(f"✅ Loaded via load_dataset: {len(df)} records")
|
| 49 |
+
|
| 50 |
+
except Exception as e1:
|
| 51 |
+
print(f"⚠️ load_dataset failed: {e1}")
|
| 52 |
+
|
| 53 |
+
# Strategy 2: Load individual CSV files from HF Hub
|
| 54 |
+
try:
|
| 55 |
+
df = self._load_csv_files_from_hub()
|
| 56 |
+
print(f"✅ Loaded via individual CSV files: {len(df)} records")
|
| 57 |
+
|
| 58 |
+
except Exception as e2:
|
| 59 |
+
print(f"⚠️ CSV loading failed: {e2}")
|
| 60 |
+
raise ValueError(f"All loading strategies failed. Dataset: {e1}, CSV: {e2}")
|
| 61 |
+
|
| 62 |
+
if df is None or len(df) == 0:
|
| 63 |
+
raise ValueError("No data loaded from any strategy")
|
| 64 |
+
|
| 65 |
+
# Apply preprocessing
|
| 66 |
+
df = self._preprocess_data(df)
|
| 67 |
+
print(f"✅ Successfully processed {len(df)} records from Hugging Face")
|
| 68 |
+
|
| 69 |
+
return df
|
| 70 |
+
|
| 71 |
+
except Exception as e:
|
| 72 |
+
raise ValueError(f"Failed to load dataset from Hugging Face: {e}")
|
| 73 |
+
|
| 74 |
+
def _load_csv_files_from_hub(self) -> pd.DataFrame:
|
| 75 |
+
"""Load individual CSV files from Hugging Face Hub."""
|
| 76 |
+
from huggingface_hub import hf_hub_download
|
| 77 |
+
import tempfile
|
| 78 |
+
|
| 79 |
+
print("📂 Loading individual CSV files from HF Hub...")
|
| 80 |
+
|
| 81 |
+
# Get list of CSV files
|
| 82 |
+
api = HfApi()
|
| 83 |
+
try:
|
| 84 |
+
repo_info = api.repo_info(repo_id=self.dataset_id, repo_type="dataset", token=self.hf_token)
|
| 85 |
+
csv_files = [f.rfilename for f in repo_info.siblings if f.rfilename.endswith('.csv')]
|
| 86 |
+
except Exception as e:
|
| 87 |
+
raise ValueError(f"Failed to get repo info: {e}")
|
| 88 |
+
|
| 89 |
+
if not csv_files:
|
| 90 |
+
raise ValueError("No CSV files found in the dataset repository")
|
| 91 |
+
|
| 92 |
+
print(f"📋 Found {len(csv_files)} CSV files")
|
| 93 |
+
|
| 94 |
+
all_dataframes = []
|
| 95 |
+
|
| 96 |
+
for csv_file in csv_files:
|
| 97 |
+
try:
|
| 98 |
+
# Download CSV file to temporary location
|
| 99 |
+
local_path = hf_hub_download(
|
| 100 |
+
repo_id=self.dataset_id,
|
| 101 |
+
filename=csv_file,
|
| 102 |
+
repo_type="dataset",
|
| 103 |
+
token=self.hf_token
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
# Read CSV with appropriate settings
|
| 107 |
+
# First, let's check if we need to skip the first row
|
| 108 |
+
df = pd.read_csv(local_path)
|
| 109 |
+
|
| 110 |
+
# If the first row contains "Interventions (sortie sous excel)", skip it
|
| 111 |
+
if df.columns[0].startswith('Interventions'):
|
| 112 |
+
df = pd.read_csv(local_path)
|
| 113 |
+
all_dataframes.append(df)
|
| 114 |
+
print(f" ✅ {csv_file}: {len(df)} rows")
|
| 115 |
+
|
| 116 |
+
except Exception as e:
|
| 117 |
+
print(f" ⚠️ Failed to load {csv_file}: {e}")
|
| 118 |
+
continue
|
| 119 |
+
|
| 120 |
+
if not all_dataframes:
|
| 121 |
+
raise ValueError("No CSV files could be loaded successfully")
|
| 122 |
+
|
| 123 |
+
# Combine all dataframes
|
| 124 |
+
combined_df = pd.concat(all_dataframes, ignore_index=True)
|
| 125 |
+
return combined_df
|
| 126 |
+
|
| 127 |
+
def _preprocess_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
| 128 |
+
"""Preprocess the agricultural data."""
|
| 129 |
+
print(f"🔧 Preprocessing {len(df)} records...")
|
| 130 |
+
print(f"📋 Available columns: {list(df.columns)}")
|
| 131 |
+
|
| 132 |
+
# Convert date columns
|
| 133 |
+
date_columns = ['datedebut', 'datefin']
|
| 134 |
+
for col in date_columns:
|
| 135 |
+
if col in df.columns:
|
| 136 |
+
df[col] = pd.to_datetime(df[col], format='%d/%m/%y', errors='coerce')
|
| 137 |
+
|
| 138 |
+
# Convert numeric columns
|
| 139 |
+
numeric_columns = ['surfparc', 'quantitetot', 'neffqte', 'peffqte', 'kqte',
|
| 140 |
+
'teneurn', 'teneurp', 'teneurk', 'keq', 'volumebo']
|
| 141 |
+
for col in numeric_columns:
|
| 142 |
+
if col in df.columns:
|
| 143 |
+
df[col] = pd.to_numeric(df[col], errors='coerce')
|
| 144 |
+
|
| 145 |
+
# Add derived columns (with error checking)
|
| 146 |
+
if 'millesime' in df.columns:
|
| 147 |
+
df['year'] = df['millesime']
|
| 148 |
+
else:
|
| 149 |
+
print("⚠️ Column 'millesime' not found, trying to infer year from filename or date")
|
| 150 |
+
# Try to extract year from date if available
|
| 151 |
+
if 'datedebut' in df.columns:
|
| 152 |
+
df['year'] = pd.to_datetime(df['datedebut'], errors='coerce').dt.year
|
| 153 |
+
else:
|
| 154 |
+
# Set a default year or raise error
|
| 155 |
+
print("❌ Cannot determine year - setting to 2024 as default")
|
| 156 |
+
df['year'] = 2024
|
| 157 |
+
|
| 158 |
+
if 'libelleusag' in df.columns:
|
| 159 |
+
df['crop_type'] = df['libelleusag']
|
| 160 |
+
else:
|
| 161 |
+
df['crop_type'] = 'unknown'
|
| 162 |
+
|
| 163 |
+
if 'libevenem' in df.columns:
|
| 164 |
+
df['intervention_type'] = df['libevenem']
|
| 165 |
+
else:
|
| 166 |
+
df['intervention_type'] = 'unknown'
|
| 167 |
+
|
| 168 |
+
if 'familleprod' in df.columns:
|
| 169 |
+
df['product_family'] = df['familleprod']
|
| 170 |
+
# Calculate IFT (Treatment Frequency Index) for herbicides
|
| 171 |
+
df['is_herbicide'] = df['familleprod'].str.contains('Herbicides', na=False)
|
| 172 |
+
df['is_fungicide'] = df['familleprod'].str.contains('Fongicides', na=False)
|
| 173 |
+
df['is_insecticide'] = df['familleprod'].str.contains('Insecticides', na=False)
|
| 174 |
+
else:
|
| 175 |
+
df['product_family'] = 'unknown'
|
| 176 |
+
df['is_herbicide'] = False
|
| 177 |
+
df['is_fungicide'] = False
|
| 178 |
+
df['is_insecticide'] = False
|
| 179 |
+
|
| 180 |
+
if 'nomparc' in df.columns:
|
| 181 |
+
df['plot_name'] = df['nomparc']
|
| 182 |
+
else:
|
| 183 |
+
df['plot_name'] = 'unknown'
|
| 184 |
+
|
| 185 |
+
if 'numparcell' in df.columns:
|
| 186 |
+
df['plot_number'] = df['numparcell']
|
| 187 |
+
else:
|
| 188 |
+
df['plot_number'] = 0
|
| 189 |
+
|
| 190 |
+
if 'surfparc' in df.columns:
|
| 191 |
+
df['plot_surface'] = df['surfparc']
|
| 192 |
+
else:
|
| 193 |
+
df['plot_surface'] = 1.0
|
| 194 |
+
|
| 195 |
+
print(f"✅ Preprocessing completed: {len(df)} records with {len(df.columns)} columns")
|
| 196 |
+
return df
|
| 197 |
+
|
| 198 |
+
def get_years_available(self) -> List[int]:
|
| 199 |
+
"""Get list of available years in the data."""
|
| 200 |
+
df = self.load_all_files()
|
| 201 |
+
return sorted(df['year'].dropna().unique().astype(int).tolist())
|
| 202 |
+
|
| 203 |
+
def get_plots_available(self) -> List[str]:
|
| 204 |
+
"""Get list of available plots."""
|
| 205 |
+
df = self.load_all_files()
|
| 206 |
+
return sorted(df['plot_name'].dropna().unique().tolist())
|
| 207 |
+
|
| 208 |
+
def get_crops_available(self) -> List[str]:
|
| 209 |
+
"""Get list of available crop types."""
|
| 210 |
+
df = self.load_all_files()
|
| 211 |
+
return sorted(df['crop_type'].dropna().unique().tolist())
|
| 212 |
+
|
| 213 |
+
def filter_data(self,
|
| 214 |
+
years: Optional[List[int]] = None,
|
| 215 |
+
plots: Optional[List[str]] = None,
|
| 216 |
+
crops: Optional[List[str]] = None,
|
| 217 |
+
intervention_types: Optional[List[str]] = None) -> pd.DataFrame:
|
| 218 |
+
"""Filter the data based on criteria."""
|
| 219 |
+
df = self.load_all_files()
|
| 220 |
+
|
| 221 |
+
if years:
|
| 222 |
+
df = df[df['year'].isin(years)]
|
| 223 |
+
if plots:
|
| 224 |
+
df = df[df['plot_name'].isin(plots)]
|
| 225 |
+
if crops:
|
| 226 |
+
df = df[df['crop_type'].isin(crops)]
|
| 227 |
+
if intervention_types:
|
| 228 |
+
df = df[df['intervention_type'].isin(intervention_types)]
|
| 229 |
+
|
| 230 |
+
return df
|
| 231 |
+
|
| 232 |
+
def get_herbicide_usage(self, years: Optional[List[int]] = None) -> pd.DataFrame:
|
| 233 |
+
"""Get herbicide usage data for weed pressure analysis."""
|
| 234 |
+
df = self.filter_data(years=years)
|
| 235 |
+
herbicide_data = df[df['is_herbicide'] == True].copy()
|
| 236 |
+
|
| 237 |
+
# Group by plot, year, and crop
|
| 238 |
+
usage_summary = herbicide_data.groupby(['plot_name', 'year', 'crop_type']).agg({
|
| 239 |
+
'quantitetot': 'sum',
|
| 240 |
+
'produit': 'count', # Number of herbicide applications
|
| 241 |
+
'surfparc': 'first'
|
| 242 |
+
}).reset_index()
|
| 243 |
+
|
| 244 |
+
usage_summary.columns = ['plot_name', 'year', 'crop_type', 'total_quantity', 'num_applications', 'plot_surface']
|
| 245 |
+
usage_summary['ift_herbicide'] = usage_summary['num_applications'] / usage_summary['plot_surface']
|
| 246 |
+
|
| 247 |
+
return usage_summary
|
| 248 |
+
|
| 249 |
+
def upload_to_huggingface(self) -> str:
|
| 250 |
+
"""Upload data to Hugging Face dataset."""
|
| 251 |
+
if not self.hf_token:
|
| 252 |
+
raise ValueError("HF_TOKEN not provided")
|
| 253 |
+
|
| 254 |
+
df = self.load_all_files()
|
| 255 |
+
dataset = Dataset.from_pandas(df)
|
| 256 |
+
|
| 257 |
+
# Upload to Hugging Face
|
| 258 |
+
dataset.push_to_hub(
|
| 259 |
+
repo_id=self.dataset_id,
|
| 260 |
+
token=self.hf_token,
|
| 261 |
+
private=False
|
| 262 |
+
)
|
| 263 |
+
|
| 264 |
+
return f"Data uploaded to {self.dataset_id}"
|
| 265 |
+
|
| 266 |
+
def clear_cache(self):
|
| 267 |
+
"""Clear cached data to force reload from Hugging Face."""
|
| 268 |
+
self.data_cache.clear()
|
| 269 |
+
print("📋 Cache cleared - will reload from Hugging Face on next access")
|
mcp_server.py
ADDED
|
@@ -0,0 +1,296 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""MCP Server for Agricultural Weed Pressure Analysis"""
|
| 2 |
+
|
| 3 |
+
import gradio as gr
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import numpy as np
|
| 6 |
+
import plotly.express as px
|
| 7 |
+
from data_loader import AgriculturalDataLoader
|
| 8 |
+
import warnings
|
| 9 |
+
warnings.filterwarnings('ignore')
|
| 10 |
+
|
| 11 |
+
class WeedPressureAnalyzer:
|
| 12 |
+
"""Analyze weed pressure and recommend plots for sensitive crops."""
|
| 13 |
+
|
| 14 |
+
def __init__(self):
|
| 15 |
+
self.data_loader = AgriculturalDataLoader()
|
| 16 |
+
self.data_cache = None
|
| 17 |
+
|
| 18 |
+
def load_data(self):
|
| 19 |
+
if self.data_cache is None:
|
| 20 |
+
self.data_cache = self.data_loader.load_all_files()
|
| 21 |
+
return self.data_cache
|
| 22 |
+
|
| 23 |
+
def calculate_herbicide_ift(self, years=None):
|
| 24 |
+
"""Calculate IFT for herbicides by plot and year."""
|
| 25 |
+
df = self.load_data()
|
| 26 |
+
|
| 27 |
+
if years:
|
| 28 |
+
df = df[df['year'].isin(years)]
|
| 29 |
+
|
| 30 |
+
herbicide_df = df[df['is_herbicide'] == True].copy()
|
| 31 |
+
|
| 32 |
+
if len(herbicide_df) == 0:
|
| 33 |
+
return pd.DataFrame()
|
| 34 |
+
|
| 35 |
+
ift_summary = herbicide_df.groupby(['plot_name', 'year', 'crop_type']).agg({
|
| 36 |
+
'produit': 'count',
|
| 37 |
+
'plot_surface': 'first',
|
| 38 |
+
'quantitetot': 'sum'
|
| 39 |
+
}).reset_index()
|
| 40 |
+
|
| 41 |
+
ift_summary['ift_herbicide'] = ift_summary['produit'] / ift_summary['plot_surface']
|
| 42 |
+
|
| 43 |
+
return ift_summary
|
| 44 |
+
|
| 45 |
+
def predict_weed_pressure(self, target_years=[2025, 2026, 2027]):
|
| 46 |
+
"""Predict weed pressure for future years."""
|
| 47 |
+
ift_data = self.calculate_herbicide_ift()
|
| 48 |
+
|
| 49 |
+
if len(ift_data) == 0:
|
| 50 |
+
return pd.DataFrame()
|
| 51 |
+
|
| 52 |
+
predictions = []
|
| 53 |
+
|
| 54 |
+
for plot in ift_data['plot_name'].unique():
|
| 55 |
+
plot_data = ift_data[ift_data['plot_name'] == plot].sort_values('year')
|
| 56 |
+
|
| 57 |
+
if len(plot_data) < 2:
|
| 58 |
+
continue
|
| 59 |
+
|
| 60 |
+
years = plot_data['year'].values
|
| 61 |
+
ift_values = plot_data['ift_herbicide'].values
|
| 62 |
+
|
| 63 |
+
if len(years) > 1:
|
| 64 |
+
slope = np.polyfit(years, ift_values, 1)[0]
|
| 65 |
+
intercept = np.polyfit(years, ift_values, 1)[1]
|
| 66 |
+
|
| 67 |
+
for target_year in target_years:
|
| 68 |
+
predicted_ift = slope * target_year + intercept
|
| 69 |
+
predicted_ift = max(0, predicted_ift)
|
| 70 |
+
|
| 71 |
+
if predicted_ift < 1.0:
|
| 72 |
+
risk_level = "Faible"
|
| 73 |
+
elif predicted_ift < 2.0:
|
| 74 |
+
risk_level = "Modéré"
|
| 75 |
+
else:
|
| 76 |
+
risk_level = "Élevé"
|
| 77 |
+
|
| 78 |
+
predictions.append({
|
| 79 |
+
'plot_name': plot,
|
| 80 |
+
'year': target_year,
|
| 81 |
+
'predicted_ift': predicted_ift,
|
| 82 |
+
'risk_level': risk_level,
|
| 83 |
+
'recent_crops': ', '.join(plot_data['crop_type'].tail(3).unique()),
|
| 84 |
+
'historical_avg_ift': plot_data['ift_herbicide'].mean()
|
| 85 |
+
})
|
| 86 |
+
|
| 87 |
+
return pd.DataFrame(predictions)
|
| 88 |
+
|
| 89 |
+
# Initialize analyzer
|
| 90 |
+
analyzer = WeedPressureAnalyzer()
|
| 91 |
+
|
| 92 |
+
def analyze_herbicide_trends(years_range, plot_filter):
|
| 93 |
+
"""Analyze herbicide usage trends over time."""
|
| 94 |
+
try:
|
| 95 |
+
if len(years_range) == 2:
|
| 96 |
+
years = list(range(int(years_range[0]), int(years_range[1]) + 1))
|
| 97 |
+
else:
|
| 98 |
+
years = [int(y) for y in years_range]
|
| 99 |
+
|
| 100 |
+
ift_data = analyzer.calculate_herbicide_ift(years=years)
|
| 101 |
+
|
| 102 |
+
if len(ift_data) == 0:
|
| 103 |
+
return None, "Aucune donnée d'herbicides trouvée."
|
| 104 |
+
|
| 105 |
+
if plot_filter != "Toutes":
|
| 106 |
+
ift_data = ift_data[ift_data['plot_name'] == plot_filter]
|
| 107 |
+
|
| 108 |
+
fig = px.line(ift_data,
|
| 109 |
+
x='year',
|
| 110 |
+
y='ift_herbicide',
|
| 111 |
+
color='plot_name',
|
| 112 |
+
title=f'Évolution de l\'IFT Herbicides',
|
| 113 |
+
labels={'ift_herbicide': 'IFT Herbicides', 'year': 'Année'})
|
| 114 |
+
|
| 115 |
+
summary = f"""
|
| 116 |
+
📊 **Analyse de l'IFT Herbicides**
|
| 117 |
+
|
| 118 |
+
**Statistiques:**
|
| 119 |
+
- IFT moyen: {ift_data['ift_herbicide'].mean():.2f}
|
| 120 |
+
- IFT maximum: {ift_data['ift_herbicide'].max():.2f}
|
| 121 |
+
- Nombre de parcelles: {ift_data['plot_name'].nunique()}
|
| 122 |
+
|
| 123 |
+
**Interprétation:**
|
| 124 |
+
- IFT < 1.0: Pression faible ✅
|
| 125 |
+
- IFT 1.0-2.0: Pression modérée ⚠️
|
| 126 |
+
- IFT > 2.0: Pression élevée ❌
|
| 127 |
+
"""
|
| 128 |
+
|
| 129 |
+
return fig, summary
|
| 130 |
+
|
| 131 |
+
except Exception as e:
|
| 132 |
+
return None, f"Erreur: {str(e)}"
|
| 133 |
+
|
| 134 |
+
def predict_future_weed_pressure():
|
| 135 |
+
"""Predict weed pressure for the next 3 years."""
|
| 136 |
+
try:
|
| 137 |
+
predictions = analyzer.predict_weed_pressure()
|
| 138 |
+
|
| 139 |
+
if len(predictions) == 0:
|
| 140 |
+
return None, "Impossible de générer des prédictions."
|
| 141 |
+
|
| 142 |
+
fig = px.bar(predictions,
|
| 143 |
+
x='plot_name',
|
| 144 |
+
y='predicted_ift',
|
| 145 |
+
color='risk_level',
|
| 146 |
+
facet_col='year',
|
| 147 |
+
title='Prédiction Pression Adventices (2025-2027)',
|
| 148 |
+
color_discrete_map={'Faible': 'green', 'Modéré': 'orange', 'Élevé': 'red'})
|
| 149 |
+
|
| 150 |
+
low_risk = len(predictions[predictions['risk_level'] == 'Faible'])
|
| 151 |
+
moderate_risk = len(predictions[predictions['risk_level'] == 'Modéré'])
|
| 152 |
+
high_risk = len(predictions[predictions['risk_level'] == 'Élevé'])
|
| 153 |
+
|
| 154 |
+
summary = f"""
|
| 155 |
+
🔮 **Prédictions 2025-2027**
|
| 156 |
+
|
| 157 |
+
**Répartition des risques:**
|
| 158 |
+
- ✅ Risque faible: {low_risk} prédictions
|
| 159 |
+
- ⚠️ Risque modéré: {moderate_risk} prédictions
|
| 160 |
+
- ❌ Risque élevé: {high_risk} prédictions
|
| 161 |
+
"""
|
| 162 |
+
|
| 163 |
+
return fig, summary
|
| 164 |
+
|
| 165 |
+
except Exception as e:
|
| 166 |
+
return None, f"Erreur: {str(e)}"
|
| 167 |
+
|
| 168 |
+
def recommend_sensitive_crop_plots():
|
| 169 |
+
"""Recommend plots for sensitive crops."""
|
| 170 |
+
try:
|
| 171 |
+
predictions = analyzer.predict_weed_pressure()
|
| 172 |
+
|
| 173 |
+
if len(predictions) == 0:
|
| 174 |
+
return None, "Aucune recommandation disponible."
|
| 175 |
+
|
| 176 |
+
suitable_plots = predictions[predictions['risk_level'] == "Faible"].copy()
|
| 177 |
+
|
| 178 |
+
if len(suitable_plots) > 0:
|
| 179 |
+
suitable_plots['recommendation_score'] = 100 - (suitable_plots['predicted_ift'] * 30)
|
| 180 |
+
suitable_plots = suitable_plots.sort_values('recommendation_score', ascending=False)
|
| 181 |
+
|
| 182 |
+
top_recommendations = suitable_plots.head(10)[['plot_name', 'year', 'predicted_ift', 'recommendation_score']]
|
| 183 |
+
|
| 184 |
+
summary = f"""
|
| 185 |
+
🌱 **Recommandations Cultures Sensibles**
|
| 186 |
+
|
| 187 |
+
**Top parcelles recommandées:**
|
| 188 |
+
{top_recommendations.to_string(index=False)}
|
| 189 |
+
|
| 190 |
+
**Critères:** IFT prédit < 1.0 (faible pression adventices)
|
| 191 |
+
"""
|
| 192 |
+
|
| 193 |
+
fig = px.scatter(suitable_plots,
|
| 194 |
+
x='predicted_ift',
|
| 195 |
+
y='recommendation_score',
|
| 196 |
+
color='year',
|
| 197 |
+
hover_data=['plot_name'],
|
| 198 |
+
title='Parcelles Recommandées pour Cultures Sensibles')
|
| 199 |
+
|
| 200 |
+
return fig, summary
|
| 201 |
+
else:
|
| 202 |
+
return None, "Aucune parcelle à faible risque identifiée."
|
| 203 |
+
|
| 204 |
+
except Exception as e:
|
| 205 |
+
return None, f"Erreur: {str(e)}"
|
| 206 |
+
|
| 207 |
+
def generate_technical_alternatives(herbicide_family):
|
| 208 |
+
"""Generate technical alternatives."""
|
| 209 |
+
summary = f"""
|
| 210 |
+
🔄 **Alternatives aux {herbicide_family}**
|
| 211 |
+
|
| 212 |
+
**🚜 Alternatives Mécaniques:**
|
| 213 |
+
• Faux-semis répétés avant implantation
|
| 214 |
+
• Binage mécanique en inter-rang
|
| 215 |
+
• Herse étrille en post-levée précoce
|
| 216 |
+
|
| 217 |
+
**🌾 Alternatives Culturales:**
|
| 218 |
+
• Rotation longue avec prairie temporaire
|
| 219 |
+
• Cultures intermédiaires piège à nitrates
|
| 220 |
+
• Densité de semis optimisée
|
| 221 |
+
|
| 222 |
+
**🧪 Alternatives Biologiques:**
|
| 223 |
+
• Stimulateurs de défenses naturelles
|
| 224 |
+
• Extraits végétaux (huiles essentielles)
|
| 225 |
+
• Bioherbicides à base de champignons
|
| 226 |
+
|
| 227 |
+
**📋 Plan d'Action:**
|
| 228 |
+
1. Tester sur petites surfaces
|
| 229 |
+
2. Former les équipes
|
| 230 |
+
3. Suivre l'efficacité
|
| 231 |
+
4. Documenter les résultats
|
| 232 |
+
"""
|
| 233 |
+
|
| 234 |
+
return summary
|
| 235 |
+
|
| 236 |
+
def get_available_plots():
|
| 237 |
+
"""Get available plots."""
|
| 238 |
+
try:
|
| 239 |
+
plots = analyzer.data_loader.get_plots_available()
|
| 240 |
+
return ["Toutes"] + plots
|
| 241 |
+
except:
|
| 242 |
+
return ["Toutes"]
|
| 243 |
+
|
| 244 |
+
# Create Gradio Interface
|
| 245 |
+
def create_mcp_interface():
|
| 246 |
+
with gr.Blocks(title="🚜 Analyse Pression Adventices", theme=gr.themes.Soft()) as demo:
|
| 247 |
+
gr.Markdown("""
|
| 248 |
+
# 🚜 Analyse Pression Adventices - CRA Bretagne
|
| 249 |
+
|
| 250 |
+
Anticiper et réduire la pression des adventices pour optimiser les cultures sensibles (pois, haricot).
|
| 251 |
+
""")
|
| 252 |
+
|
| 253 |
+
with gr.Tabs():
|
| 254 |
+
with gr.Tab("📈 Analyse Tendances"):
|
| 255 |
+
with gr.Row():
|
| 256 |
+
years_slider = gr.Slider(2014, 2024, value=[2020, 2024], step=1, label="Période")
|
| 257 |
+
plot_dropdown = gr.Dropdown(choices=get_available_plots(), value="Toutes", label="Parcelle")
|
| 258 |
+
|
| 259 |
+
analyze_btn = gr.Button("🔍 Analyser", variant="primary")
|
| 260 |
+
|
| 261 |
+
with gr.Row():
|
| 262 |
+
trends_plot = gr.Plot()
|
| 263 |
+
trends_summary = gr.Markdown()
|
| 264 |
+
|
| 265 |
+
analyze_btn.click(analyze_herbicide_trends, [years_slider, plot_dropdown], [trends_plot, trends_summary])
|
| 266 |
+
|
| 267 |
+
with gr.Tab("🔮 Prédictions"):
|
| 268 |
+
predict_btn = gr.Button("🎯 Prédire 2025-2027", variant="primary")
|
| 269 |
+
|
| 270 |
+
with gr.Row():
|
| 271 |
+
predictions_plot = gr.Plot()
|
| 272 |
+
predictions_summary = gr.Markdown()
|
| 273 |
+
|
| 274 |
+
predict_btn.click(predict_future_weed_pressure, outputs=[predictions_plot, predictions_summary])
|
| 275 |
+
|
| 276 |
+
with gr.Tab("🌱 Recommandations"):
|
| 277 |
+
recommend_btn = gr.Button("🎯 Recommander Parcelles", variant="primary")
|
| 278 |
+
|
| 279 |
+
with gr.Row():
|
| 280 |
+
recommendations_plot = gr.Plot()
|
| 281 |
+
recommendations_summary = gr.Markdown()
|
| 282 |
+
|
| 283 |
+
recommend_btn.click(recommend_sensitive_crop_plots, outputs=[recommendations_plot, recommendations_summary])
|
| 284 |
+
|
| 285 |
+
with gr.Tab("🔄 Alternatives"):
|
| 286 |
+
herbicide_type = gr.Dropdown(["Herbicides", "Fongicides"], value="Herbicides", label="Type")
|
| 287 |
+
alternatives_btn = gr.Button("💡 Générer Alternatives", variant="primary")
|
| 288 |
+
alternatives_output = gr.Markdown()
|
| 289 |
+
|
| 290 |
+
alternatives_btn.click(generate_technical_alternatives, [herbicide_type], [alternatives_output])
|
| 291 |
+
|
| 292 |
+
return demo
|
| 293 |
+
|
| 294 |
+
if __name__ == "__main__":
|
| 295 |
+
demo = create_mcp_interface()
|
| 296 |
+
demo.launch(server_name="0.0.0.0", server_port=7860, share=True)
|
serveur_mcp.py
DELETED
|
@@ -1,45 +0,0 @@
|
|
| 1 |
-
import pandas as pd
|
| 2 |
-
from datasets import load_dataset
|
| 3 |
-
from transformers import pipeline
|
| 4 |
-
from mcp.server.fastmcp import FastMCPServer
|
| 5 |
-
|
| 6 |
-
# Charger dataset Hugging Face privé
|
| 7 |
-
dataset = load_dataset("HackathonCRA/2024", split="train")
|
| 8 |
-
df = dataset.to_pandas()
|
| 9 |
-
|
| 10 |
-
# Charger Mistral
|
| 11 |
-
mistral = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.2", device_map="auto")
|
| 12 |
-
|
| 13 |
-
# Créer serveur MCP
|
| 14 |
-
server = FastMCPServer("csv_analyzer")
|
| 15 |
-
|
| 16 |
-
@server.tool()
|
| 17 |
-
def list_columns() -> list[str]:
|
| 18 |
-
"""Retourne la liste des colonnes disponibles dans le CSV."""
|
| 19 |
-
return df.columns.tolist()
|
| 20 |
-
|
| 21 |
-
@server.tool()
|
| 22 |
-
def filter_rows(column: str, value: str, limit: int = 5) -> list[dict]:
|
| 23 |
-
"""Retourne des lignes où column == value."""
|
| 24 |
-
if column not in df.columns:
|
| 25 |
-
return [{"error": f"Colonne {column} inexistante"}]
|
| 26 |
-
subset = df[df[column] == value].head(limit)
|
| 27 |
-
return subset.to_dict(orient="records")
|
| 28 |
-
|
| 29 |
-
@server.tool()
|
| 30 |
-
def analyze_data(question: str) -> str:
|
| 31 |
-
"""Interprète les données CSV avec Mistral."""
|
| 32 |
-
# On résume rapidement le dataframe
|
| 33 |
-
sample = df.head(20).to_string()
|
| 34 |
-
prompt = f"""
|
| 35 |
-
Voici un extrait de données tabulaires :
|
| 36 |
-
{sample}
|
| 37 |
-
|
| 38 |
-
Question: {question}
|
| 39 |
-
Réponds de manière concise et claire.
|
| 40 |
-
"""
|
| 41 |
-
output = mistral(prompt, max_new_tokens=256)[0]["generated_text"]
|
| 42 |
-
return output
|
| 43 |
-
|
| 44 |
-
if __name__ == "__main__":
|
| 45 |
-
server.run()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|