Is there an Amazon

As mentioned by others above, amazon has discontinued offering evaluations in its api. Howevever, i discovered this good tutorial to do the identical with python. Right here is the code he provides, works for me! He makes use of python 2.7

#!/usr/bin/env python # -*- coding: utf-8 -*- # Written as a part of https://www.scrapehero.com/how-to-scrape-amazon-product-reviews-using-python/ from lxml import html import json import requests import json,re from dateutil import parser as dateparser from time import sleep def ParseReviews(asin): #This script has solely been examined with Amazon.com amazon_url = ‘http://www.amazon.com/dp/’+asin # Add some current consumer agent to stop amazon from blocking the request # Discover some chrome consumer agent strings right here https://udger.com/sources/ua-list/browser-detail?browser=Chrome headers = {‘Consumer-Agent’: ‘Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36’} web page = requests.get(amazon_url,headers = headers).textual content parser = html.fromstring(web page) XPATH_AGGREGATE = ‘//span[@id=”acrCustomerReviewText”]’ XPATH_REVIEW_SECTION = ‘//div[@id=”revMHRL”]/div’ XPATH_AGGREGATE_RATING = ‘//desk[@id=”histogramTable”]//tr’ XPATH_PRODUCT_NAME = ‘//h1//span[@id=”productTitle”]//textual content()’ XPATH_PRODUCT_PRICE = ‘//span[@id=”priceblock_ourprice”]/textual content()’ raw_product_price = parser.xpath(XPATH_PRODUCT_PRICE) product_price = ”.be a part of(raw_product_price).substitute(‘,’,”) raw_product_name = parser.xpath(XPATH_PRODUCT_NAME) product_name = ”.be a part of(raw_product_name).strip() total_ratings = parser.xpath(XPATH_AGGREGATE_RATING) evaluations = parser.xpath(XPATH_REVIEW_SECTION) ratings_dict = {} reviews_list = [] #grabing the ranking part in product web page for scores in total_ratings: extracted_rating = scores.xpath(‘./td//a//textual content()’) if extracted_rating: rating_key = extracted_rating[0] raw_raing_value = extracted_rating[1] rating_value = raw_raing_value if rating_key: ratings_dict.replace({rating_key:rating_value}) #Parsing particular person evaluations for evaluate in evaluations: XPATH_RATING =’./div//div//i//textual content()’ XPATH_REVIEW_HEADER = ‘./div//div//span[contains(@class,”text-bold”)]//textual content()’ XPATH_REVIEW_POSTED_DATE = ‘.//a[contains(@href,”/profile/”)]/guardian::span/following-sibling::span/textual content()’ XPATH_REVIEW_TEXT_1 = ‘.//div//span[@class=”MHRHead”]//textual content()’ XPATH_REVIEW_TEXT_2 = ‘.//div//span[@data-action=”columnbalancing-showfullreview”]/@data-columnbalancing-showfullreview’ XPATH_REVIEW_COMMENTS = ‘.//a[contains(@class,”commentStripe”)]/textual content()’ XPATH_AUTHOR = ‘.//a[contains(@href,”/profile/”)]/guardian::span//textual content()’ XPATH_REVIEW_TEXT_3 = ‘.//div[contains(@id,”dpReviews”)]/div/textual content()’ raw_review_author = evaluate.xpath(XPATH_AUTHOR) raw_review_rating = evaluate.xpath(XPATH_RATING) raw_review_header = evaluate.xpath(XPATH_REVIEW_HEADER) raw_review_posted_date = evaluate.xpath(XPATH_REVIEW_POSTED_DATE) raw_review_text1 = evaluate.xpath(XPATH_REVIEW_TEXT_1) raw_review_text2 = evaluate.xpath(XPATH_REVIEW_TEXT_2) raw_review_text3 = evaluate.xpath(XPATH_REVIEW_TEXT_3) creator = ‘ ‘.be a part of(‘ ‘.be a part of(raw_review_author).break up()).strip(‘By’) #cleansing knowledge review_rating = ”.be a part of(raw_review_rating).substitute(‘out of 5 stars’,”) review_header = ‘ ‘.be a part of(‘ ‘.be a part of(raw_review_header).break up()) review_posted_date = dateparser.parse(”.be a part of(raw_review_posted_date)).strftime(‘%d %b %Y’) review_text = ‘ ‘.be a part of(‘ ‘.be a part of(raw_review_text1).break up()) #grabbing hidden feedback if current if raw_review_text2: json_loaded_review_data = json.masses(raw_review_text2[0]) json_loaded_review_data_text = json_loaded_review_data[‘rest’] cleaned_json_loaded_review_data_text = re.sub(‘<.*?>‘,”,json_loaded_review_data_text) full_review_text = review_text+cleaned_json_loaded_review_data_text else: full_review_text = review_text if not raw_review_text1: full_review_text = ‘ ‘.be a part of(‘ ‘.be a part of(raw_review_text3).break up()) raw_review_comments = evaluate.xpath(XPATH_REVIEW_COMMENTS) review_comments = ”.be a part of(raw_review_comments) review_comments = re.sub(‘[A-Za-z]’,”,review_comments).strip() review_dict = { ‘review_comment_count’:review_comments, ‘review_text’:full_review_text, ‘review_posted_date’:review_posted_date, ‘review_header’:review_header, ‘review_rating’:review_rating, ‘review_author’:creator } reviews_list.append(review_dict) knowledge = { ‘scores’:ratings_dict, ‘evaluations’:reviews_list, ‘url’:amazon_url, ‘worth’:product_price, ‘title’:product_name } return knowledge def ReadAsin(): #Add your individual ASINs right here AsinList = [‘B01ETPUQ6E’,’B017HW9DEW’] extracted_data = [] for asin in AsinList: print “Downloading and processing web page http://www.amazon.com/dp/”+asin extracted_data.append(ParseReviews(asin)) sleep(5) f=open(‘knowledge.json’,’w’) json.dump(extracted_data,f,indent=4) if __name__ == ‘__main__’: ReadAsin()

Right here, is the hyperlink to his web site evaluations scraping with python 2.7

Leave a Reply

Your email address will not be published. Required fields are marked *