Spaces:
Running
Running
meirk-brd
commited on
Commit
·
8ef04af
1
Parent(s):
702cacf
change dataset inputs
Browse files- brightdata_datasets.py +70 -10
brightdata_datasets.py
CHANGED
|
@@ -496,17 +496,49 @@ class BrightDataDatasetTool(Tool):
|
|
| 496 |
name = "brightdata_dataset_fetch"
|
| 497 |
description = (
|
| 498 |
"Trigger a Bright Data dataset collection and poll until the snapshot is ready. "
|
| 499 |
-
"Choose a dataset key (e.g., amazon_product, linkedin_company_profile, google_maps_reviews) "
|
| 500 |
-
"
|
|
|
|
| 501 |
)
|
| 502 |
inputs = {
|
| 503 |
"dataset": {
|
| 504 |
"type": "string",
|
| 505 |
"description": f"Dataset key. Options: {', '.join(sorted(DATASETS.keys()))}",
|
| 506 |
},
|
| 507 |
-
"
|
| 508 |
"type": "string",
|
| 509 |
-
"description": "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 510 |
},
|
| 511 |
}
|
| 512 |
output_type = "string"
|
|
@@ -531,13 +563,29 @@ class BrightDataDatasetTool(Tool):
|
|
| 531 |
payload.update(fixed_values)
|
| 532 |
return payload
|
| 533 |
|
| 534 |
-
def forward(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 535 |
"""
|
| 536 |
Trigger a dataset run and poll until results are ready.
|
| 537 |
|
| 538 |
Args:
|
| 539 |
dataset: The dataset key from DATASETS.
|
| 540 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 541 |
|
| 542 |
Returns:
|
| 543 |
JSON string of the snapshot data once ready.
|
|
@@ -549,10 +597,22 @@ class BrightDataDatasetTool(Tool):
|
|
| 549 |
if dataset not in DATASETS:
|
| 550 |
raise ValueError(f"Unknown dataset '{dataset}'. Valid options: {', '.join(sorted(DATASETS.keys()))}")
|
| 551 |
|
| 552 |
-
|
| 553 |
-
|
| 554 |
-
|
| 555 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 556 |
|
| 557 |
payload = self._prepare_payload(dataset, params)
|
| 558 |
dataset_id = DATASETS[dataset]["dataset_id"]
|
|
|
|
| 496 |
name = "brightdata_dataset_fetch"
|
| 497 |
description = (
|
| 498 |
"Trigger a Bright Data dataset collection and poll until the snapshot is ready. "
|
| 499 |
+
"Choose a dataset key (e.g., amazon_product, linkedin_company_profile, google_maps_reviews). "
|
| 500 |
+
"For most datasets, you only need to provide the URL parameter. "
|
| 501 |
+
"For example: brightdata_dataset_fetch(dataset='linkedin_person_profile', url='https://linkedin.com/in/...')"
|
| 502 |
)
|
| 503 |
inputs = {
|
| 504 |
"dataset": {
|
| 505 |
"type": "string",
|
| 506 |
"description": f"Dataset key. Options: {', '.join(sorted(DATASETS.keys()))}",
|
| 507 |
},
|
| 508 |
+
"url": {
|
| 509 |
"type": "string",
|
| 510 |
+
"description": "URL for the dataset (required for most datasets)",
|
| 511 |
+
"nullable": True,
|
| 512 |
+
},
|
| 513 |
+
"keyword": {
|
| 514 |
+
"type": "string",
|
| 515 |
+
"description": "Search keyword (for search datasets like amazon_product_search)",
|
| 516 |
+
"nullable": True,
|
| 517 |
+
},
|
| 518 |
+
"first_name": {
|
| 519 |
+
"type": "string",
|
| 520 |
+
"description": "First name (for datasets like linkedin_people_search)",
|
| 521 |
+
"nullable": True,
|
| 522 |
+
},
|
| 523 |
+
"last_name": {
|
| 524 |
+
"type": "string",
|
| 525 |
+
"description": "Last name (for datasets like linkedin_people_search)",
|
| 526 |
+
"nullable": True,
|
| 527 |
+
},
|
| 528 |
+
"days_limit": {
|
| 529 |
+
"type": "string",
|
| 530 |
+
"description": "Days limit (for datasets like google_maps_reviews, default: 3)",
|
| 531 |
+
"nullable": True,
|
| 532 |
+
},
|
| 533 |
+
"num_of_reviews": {
|
| 534 |
+
"type": "string",
|
| 535 |
+
"description": "Number of reviews (for datasets like facebook_company_reviews)",
|
| 536 |
+
"nullable": True,
|
| 537 |
+
},
|
| 538 |
+
"num_of_comments": {
|
| 539 |
+
"type": "string",
|
| 540 |
+
"description": "Number of comments (for datasets like youtube_comments, default: 10)",
|
| 541 |
+
"nullable": True,
|
| 542 |
},
|
| 543 |
}
|
| 544 |
output_type = "string"
|
|
|
|
| 563 |
payload.update(fixed_values)
|
| 564 |
return payload
|
| 565 |
|
| 566 |
+
def forward(
|
| 567 |
+
self,
|
| 568 |
+
dataset: str,
|
| 569 |
+
url: str = None,
|
| 570 |
+
keyword: str = None,
|
| 571 |
+
first_name: str = None,
|
| 572 |
+
last_name: str = None,
|
| 573 |
+
days_limit: str = None,
|
| 574 |
+
num_of_reviews: str = None,
|
| 575 |
+
num_of_comments: str = None,
|
| 576 |
+
) -> str:
|
| 577 |
"""
|
| 578 |
Trigger a dataset run and poll until results are ready.
|
| 579 |
|
| 580 |
Args:
|
| 581 |
dataset: The dataset key from DATASETS.
|
| 582 |
+
url: URL for the dataset (required for most datasets).
|
| 583 |
+
keyword: Search keyword (for search datasets).
|
| 584 |
+
first_name: First name (for people search datasets).
|
| 585 |
+
last_name: Last name (for people search datasets).
|
| 586 |
+
days_limit: Days limit (for time-based datasets).
|
| 587 |
+
num_of_reviews: Number of reviews to fetch.
|
| 588 |
+
num_of_comments: Number of comments to fetch.
|
| 589 |
|
| 590 |
Returns:
|
| 591 |
JSON string of the snapshot data once ready.
|
|
|
|
| 597 |
if dataset not in DATASETS:
|
| 598 |
raise ValueError(f"Unknown dataset '{dataset}'. Valid options: {', '.join(sorted(DATASETS.keys()))}")
|
| 599 |
|
| 600 |
+
# Build params dict from provided arguments
|
| 601 |
+
params = {}
|
| 602 |
+
if url is not None:
|
| 603 |
+
params["url"] = url
|
| 604 |
+
if keyword is not None:
|
| 605 |
+
params["keyword"] = keyword
|
| 606 |
+
if first_name is not None:
|
| 607 |
+
params["first_name"] = first_name
|
| 608 |
+
if last_name is not None:
|
| 609 |
+
params["last_name"] = last_name
|
| 610 |
+
if days_limit is not None:
|
| 611 |
+
params["days_limit"] = days_limit
|
| 612 |
+
if num_of_reviews is not None:
|
| 613 |
+
params["num_of_reviews"] = num_of_reviews
|
| 614 |
+
if num_of_comments is not None:
|
| 615 |
+
params["num_of_comments"] = num_of_comments
|
| 616 |
|
| 617 |
payload = self._prepare_payload(dataset, params)
|
| 618 |
dataset_id = DATASETS[dataset]["dataset_id"]
|