Spaces:

BrightData
/

brightdata-ai-agent

Running

App Files Files Community

meirk-brd commited on 11 days ago

Commit

8ef04af

1 Parent(s): 702cacf

change dataset inputs

Browse files

Files changed (1) hide show

brightdata_datasets.py +70 -10

brightdata_datasets.py CHANGED Viewed

@@ -496,17 +496,49 @@ class BrightDataDatasetTool(Tool):
     name = "brightdata_dataset_fetch"
     description = (
         "Trigger a Bright Data dataset collection and poll until the snapshot is ready. "
-        "Choose a dataset key (e.g., amazon_product, linkedin_company_profile, google_maps_reviews) "
-        "and pass the required parameters as JSON."
     )
     inputs = {
         "dataset": {
             "type": "string",
             "description": f"Dataset key. Options: {', '.join(sorted(DATASETS.keys()))}",
         },
-        "params_json": {
             "type": "string",
-            "description": "JSON string with the required inputs for the chosen dataset",
         },
     }
     output_type = "string"
@@ -531,13 +563,29 @@ class BrightDataDatasetTool(Tool):
         payload.update(fixed_values)
         return payload
-    def forward(self, dataset: str, params_json: str) -> str:
         """
         Trigger a dataset run and poll until results are ready.
         Args:
             dataset: The dataset key from DATASETS.
-            params_json: JSON string containing required inputs for the dataset.
         Returns:
             JSON string of the snapshot data once ready.
@@ -549,10 +597,22 @@ class BrightDataDatasetTool(Tool):
         if dataset not in DATASETS:
             raise ValueError(f"Unknown dataset '{dataset}'. Valid options: {', '.join(sorted(DATASETS.keys()))}")
-        try:
-            params = json.loads(params_json) if params_json else {}
-        except json.JSONDecodeError as exc:
-            raise ValueError(f"params_json is not valid JSON: {exc}") from exc
         payload = self._prepare_payload(dataset, params)
         dataset_id = DATASETS[dataset]["dataset_id"]

     name = "brightdata_dataset_fetch"
     description = (
         "Trigger a Bright Data dataset collection and poll until the snapshot is ready. "
+        "Choose a dataset key (e.g., amazon_product, linkedin_company_profile, google_maps_reviews). "
+        "For most datasets, you only need to provide the URL parameter. "
+        "For example: brightdata_dataset_fetch(dataset='linkedin_person_profile', url='https://linkedin.com/in/...')"
     )
     inputs = {
         "dataset": {
             "type": "string",
             "description": f"Dataset key. Options: {', '.join(sorted(DATASETS.keys()))}",
         },
+        "url": {
             "type": "string",
+            "description": "URL for the dataset (required for most datasets)",
+            "nullable": True,
+        },
+        "keyword": {
+            "type": "string",
+            "description": "Search keyword (for search datasets like amazon_product_search)",
+            "nullable": True,
+        },
+        "first_name": {
+            "type": "string",
+            "description": "First name (for datasets like linkedin_people_search)",
+            "nullable": True,
+        },
+        "last_name": {
+            "type": "string",
+            "description": "Last name (for datasets like linkedin_people_search)",
+            "nullable": True,
+        },
+        "days_limit": {
+            "type": "string",
+            "description": "Days limit (for datasets like google_maps_reviews, default: 3)",
+            "nullable": True,
+        },
+        "num_of_reviews": {
+            "type": "string",
+            "description": "Number of reviews (for datasets like facebook_company_reviews)",
+            "nullable": True,
+        },
+        "num_of_comments": {
+            "type": "string",
+            "description": "Number of comments (for datasets like youtube_comments, default: 10)",
+            "nullable": True,
         },
     }
     output_type = "string"
         payload.update(fixed_values)
         return payload
+    def forward(
+        self,
+        dataset: str,
+        url: str = None,
+        keyword: str = None,
+        first_name: str = None,
+        last_name: str = None,
+        days_limit: str = None,
+        num_of_reviews: str = None,
+        num_of_comments: str = None,
+    ) -> str:
         """
         Trigger a dataset run and poll until results are ready.
         Args:
             dataset: The dataset key from DATASETS.
+            url: URL for the dataset (required for most datasets).
+            keyword: Search keyword (for search datasets).
+            first_name: First name (for people search datasets).
+            last_name: Last name (for people search datasets).
+            days_limit: Days limit (for time-based datasets).
+            num_of_reviews: Number of reviews to fetch.
+            num_of_comments: Number of comments to fetch.
         Returns:
             JSON string of the snapshot data once ready.
         if dataset not in DATASETS:
             raise ValueError(f"Unknown dataset '{dataset}'. Valid options: {', '.join(sorted(DATASETS.keys()))}")
+        # Build params dict from provided arguments
+        params = {}
+        if url is not None:
+            params["url"] = url
+        if keyword is not None:
+            params["keyword"] = keyword
+        if first_name is not None:
+            params["first_name"] = first_name
+        if last_name is not None:
+            params["last_name"] = last_name
+        if days_limit is not None:
+            params["days_limit"] = days_limit
+        if num_of_reviews is not None:
+            params["num_of_reviews"] = num_of_reviews
+        if num_of_comments is not None:
+            params["num_of_comments"] = num_of_comments
         payload = self._prepare_payload(dataset, params)
         dataset_id = DATASETS[dataset]["dataset_id"]