Spaces:

Neurolingua
/

AgriChatbot

Sleeping

App Files Files Community

Neurolingua commited on Aug 21, 2024

Commit

28bae9c

verified ·

1 Parent(s): cc3fbd0

Update other_function.py

Browse files

Files changed (1) hide show

other_function.py +26 -32

other_function.py CHANGED Viewed

@@ -114,43 +114,37 @@ def get_weather(city):
 import scrapy
 from scrapy.crawler import CrawlerProcess
 import pandas as pd
-class RatesSpider(scrapy.Spider):
-    name = "rates_spider"
     start_urls = ['https://www.kisandeals.com/mandiprices/ALL/TAMIL-NADU/ALL']
     def parse(self, response):
-        # Extract the table data
-        table_rows = response.xpath('//table/tbody/tr')
-        # Initialize a list to hold the data
-        data = []
-        for row in table_rows:
-            # Extract the commodity name and price per kg
-            commodity_name = row.xpath('td[1]//text()').get().strip()
-            price_per_kg = row.xpath('td[2]//text()').get().strip()
-            # Append the data to the list
-            data.append((commodity_name, price_per_kg))
-        # Convert the data to a Pandas DataFrame
-        df = pd.DataFrame(data, columns=['Commodity', 'Price per kg'])
-        # Convert the DataFrame to a dictionary
-        rate_dict = df.set_index('Commodity')['Price per kg'].to_dict()
-        # Return the scraped rates
-        return rate_dict+' This is prices for 1 kg'
 def get_rates():
-    process = CrawlerProcess({
-        'USER_AGENT': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0',
-        'LOG_LEVEL': 'ERROR',  # Suppress logging to keep the output clean
-    })
-    spider = RatesSpider()
-    process.crawl(spider)
-    process.start()  # This will block until the spider is done
-    # Get the scraped data from the spider
-    return spider.parse(None)

 import scrapy
 from scrapy.crawler import CrawlerProcess
 import pandas as pd
+from scrapy.crawler import CrawlerProcess
+from scrapy.utils.project import get_project_settings
+import scrapy
+class RateSpider(scrapy.Spider):
+    name = 'rates'
     start_urls = ['https://www.kisandeals.com/mandiprices/ALL/TAMIL-NADU/ALL']
     def parse(self, response):
+        rows = response.xpath('//table/tbody/tr')
+        data = {}
+        for row in rows:
+            commodity = row.xpath('td[1]/text()').get()
+            price = row.xpath('td[2]/text()').get()
+            data[commodity] = price
+        return data
 def get_rates():
+    # Set up a Scrapy process
+    process = CrawlerProcess(get_project_settings())
+    # Set up a dictionary to store the scraped data
+    data = {}
+    # Run the spider
+    def crawler_finished(signal, sender, item, response, spider):
+        data.update(item)
+    process.signals.connect(crawler_finished, signal=scrapy.signals.item_scraped)
+    process.crawl(RateSpider)
+    process.start()  # This will block until the crawling is finished
+    # Return the scraped data as a string (or format as needed)
+    return str(data) + ' These prices are for 1 kg'