meirk-brd commited on
Commit
8ef04af
·
1 Parent(s): 702cacf

change dataset inputs

Browse files
Files changed (1) hide show
  1. brightdata_datasets.py +70 -10
brightdata_datasets.py CHANGED
@@ -496,17 +496,49 @@ class BrightDataDatasetTool(Tool):
496
  name = "brightdata_dataset_fetch"
497
  description = (
498
  "Trigger a Bright Data dataset collection and poll until the snapshot is ready. "
499
- "Choose a dataset key (e.g., amazon_product, linkedin_company_profile, google_maps_reviews) "
500
- "and pass the required parameters as JSON."
 
501
  )
502
  inputs = {
503
  "dataset": {
504
  "type": "string",
505
  "description": f"Dataset key. Options: {', '.join(sorted(DATASETS.keys()))}",
506
  },
507
- "params_json": {
508
  "type": "string",
509
- "description": "JSON string with the required inputs for the chosen dataset",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
510
  },
511
  }
512
  output_type = "string"
@@ -531,13 +563,29 @@ class BrightDataDatasetTool(Tool):
531
  payload.update(fixed_values)
532
  return payload
533
 
534
- def forward(self, dataset: str, params_json: str) -> str:
 
 
 
 
 
 
 
 
 
 
535
  """
536
  Trigger a dataset run and poll until results are ready.
537
 
538
  Args:
539
  dataset: The dataset key from DATASETS.
540
- params_json: JSON string containing required inputs for the dataset.
 
 
 
 
 
 
541
 
542
  Returns:
543
  JSON string of the snapshot data once ready.
@@ -549,10 +597,22 @@ class BrightDataDatasetTool(Tool):
549
  if dataset not in DATASETS:
550
  raise ValueError(f"Unknown dataset '{dataset}'. Valid options: {', '.join(sorted(DATASETS.keys()))}")
551
 
552
- try:
553
- params = json.loads(params_json) if params_json else {}
554
- except json.JSONDecodeError as exc:
555
- raise ValueError(f"params_json is not valid JSON: {exc}") from exc
 
 
 
 
 
 
 
 
 
 
 
 
556
 
557
  payload = self._prepare_payload(dataset, params)
558
  dataset_id = DATASETS[dataset]["dataset_id"]
 
496
  name = "brightdata_dataset_fetch"
497
  description = (
498
  "Trigger a Bright Data dataset collection and poll until the snapshot is ready. "
499
+ "Choose a dataset key (e.g., amazon_product, linkedin_company_profile, google_maps_reviews). "
500
+ "For most datasets, you only need to provide the URL parameter. "
501
+ "For example: brightdata_dataset_fetch(dataset='linkedin_person_profile', url='https://linkedin.com/in/...')"
502
  )
503
  inputs = {
504
  "dataset": {
505
  "type": "string",
506
  "description": f"Dataset key. Options: {', '.join(sorted(DATASETS.keys()))}",
507
  },
508
+ "url": {
509
  "type": "string",
510
+ "description": "URL for the dataset (required for most datasets)",
511
+ "nullable": True,
512
+ },
513
+ "keyword": {
514
+ "type": "string",
515
+ "description": "Search keyword (for search datasets like amazon_product_search)",
516
+ "nullable": True,
517
+ },
518
+ "first_name": {
519
+ "type": "string",
520
+ "description": "First name (for datasets like linkedin_people_search)",
521
+ "nullable": True,
522
+ },
523
+ "last_name": {
524
+ "type": "string",
525
+ "description": "Last name (for datasets like linkedin_people_search)",
526
+ "nullable": True,
527
+ },
528
+ "days_limit": {
529
+ "type": "string",
530
+ "description": "Days limit (for datasets like google_maps_reviews, default: 3)",
531
+ "nullable": True,
532
+ },
533
+ "num_of_reviews": {
534
+ "type": "string",
535
+ "description": "Number of reviews (for datasets like facebook_company_reviews)",
536
+ "nullable": True,
537
+ },
538
+ "num_of_comments": {
539
+ "type": "string",
540
+ "description": "Number of comments (for datasets like youtube_comments, default: 10)",
541
+ "nullable": True,
542
  },
543
  }
544
  output_type = "string"
 
563
  payload.update(fixed_values)
564
  return payload
565
 
566
+ def forward(
567
+ self,
568
+ dataset: str,
569
+ url: str = None,
570
+ keyword: str = None,
571
+ first_name: str = None,
572
+ last_name: str = None,
573
+ days_limit: str = None,
574
+ num_of_reviews: str = None,
575
+ num_of_comments: str = None,
576
+ ) -> str:
577
  """
578
  Trigger a dataset run and poll until results are ready.
579
 
580
  Args:
581
  dataset: The dataset key from DATASETS.
582
+ url: URL for the dataset (required for most datasets).
583
+ keyword: Search keyword (for search datasets).
584
+ first_name: First name (for people search datasets).
585
+ last_name: Last name (for people search datasets).
586
+ days_limit: Days limit (for time-based datasets).
587
+ num_of_reviews: Number of reviews to fetch.
588
+ num_of_comments: Number of comments to fetch.
589
 
590
  Returns:
591
  JSON string of the snapshot data once ready.
 
597
  if dataset not in DATASETS:
598
  raise ValueError(f"Unknown dataset '{dataset}'. Valid options: {', '.join(sorted(DATASETS.keys()))}")
599
 
600
+ # Build params dict from provided arguments
601
+ params = {}
602
+ if url is not None:
603
+ params["url"] = url
604
+ if keyword is not None:
605
+ params["keyword"] = keyword
606
+ if first_name is not None:
607
+ params["first_name"] = first_name
608
+ if last_name is not None:
609
+ params["last_name"] = last_name
610
+ if days_limit is not None:
611
+ params["days_limit"] = days_limit
612
+ if num_of_reviews is not None:
613
+ params["num_of_reviews"] = num_of_reviews
614
+ if num_of_comments is not None:
615
+ params["num_of_comments"] = num_of_comments
616
 
617
  payload = self._prepare_payload(dataset, params)
618
  dataset_id = DATASETS[dataset]["dataset_id"]