爬取京东商品评论 - f-尧图网站建设

📅 发布时间：2026/6/20 17:01:10

代码：

#导入自动化模块
from DrissionPage import ChromiumPage
import time
import json
#打开浏览器（实例化浏览器对象）
dp = ChromiumPage()
#访问网站
dp.get('https://item.jd.com/10167235203199.html')
#等待加载
time.sleep(2)
#监听数据包
dp.listen.start('getLegoWareDetailComment')
#下滑页面
dp.scroll.to_bottom()
#自动点击打开评论页面（元素定位）
dp.ele('css:#comment-root > div.all-btn').click()
#等待数据包加载
resp = dp.listen.wait()
#获取响应的数据内容
json_data = resp.response.body
print(json_data)
#循环遍历，提取列表里面的元素
for index in comments:
#提取具体每条评论保存字典中dit = {'昵称':index['nickname'],'评分':index['score'],'评论内容':index['content'],'点赞数':index['likeCount'],'回复数':index['replyCount'],'产品型号':index['productName'],'产品颜色':index['productColor'],'发布地区':index['area'],'评论时间':index['createTime'],
}
print(dit)

访问网站需要修改、监听数据包需要修改、打印dit字典内容需要修改、自动点击打开评论页面需要修改
需要添加将爬取的数据转为json进行存储的代码
修改之后的代码：

#导入自动化模块
from DrissionPage import ChromiumPage
import time
import json
#打开浏览器（实例化浏览器对象）
dp = ChromiumPage()
#访问网站
dp.get('https://item.jd.com/10167235203199.html')
#等待加载
time.sleep(2)
#监听数据包
dp.listen.start('getLegoWareDetailComment')
#下滑页面
dp.scroll.to_bottom()
#自动点击打开评论页面（元素定位）
dp.ele('css:#comment-root > div.all-btn').click()
#等待数据包加载
resp = dp.listen.wait()
#获取响应的数据内容
json_data = resp.response.body
print("获取到API响应数据")#提取评论列表
if json_data and 'commentInfoList' in json_data:comments = json_data['commentInfoList']print(f"找到 {len(comments)} 条评论")#循环遍历，提取列表里面的元素for index in comments:#提取具体每条评论保存字典中dit = {'昵称': index.get('userNickName', '未知用户'),'评分': index.get('commentScore', 0),'评论内容': index.get('commentData', ''),'点赞数': index.get('praiseCnt', 0),'回复数': index.get('replyCnt', 0),'产品型号': '','产品颜色': '','发布地区': index.get('publishArea', ''),'评论时间': index.get('commentDate', '')}print("\n提取到的评论:")print(dit)
else:print("没有找到评论列表")print("响应数据的键:", list(json_data.keys()) if json_data else "无数据")

以json形式存储：

#导入自动化模块
from DrissionPage import ChromiumPage
import time
import json
import os# 确保输出目录存在
output_dir = './dashuju/output'
os.makedirs(output_dir, exist_ok=True)# 输出JSON文件路径
output_file = os.path.join(output_dir, 'jd_phone_comments.json')#打开浏览器（实例化浏览器对象）
dp = ChromiumPage()
#访问网站
dp.get('https://item.jd.com/10167235203199.html')
#等待加载
time.sleep(2)
#监听数据包
dp.listen.start('getLegoWareDetailComment')
#下滑页面
dp.scroll.to_bottom()
#自动点击打开评论页面（元素定位）
dp.ele('css:#comment-root > div.all-btn').click()
#等待数据包加载
resp = dp.listen.wait()
#获取响应的数据内容
json_data = resp.response.body
print("获取到API响应数据")#提取评论列表
processed_comments = []
if json_data and 'commentInfoList' in json_data:comments = json_data['commentInfoList']print(f"找到 {len(comments)} 条评论")#循环遍历，提取列表里面的元素for index in comments:#提取具体每条评论保存字典中dit = {'昵称': index.get('userNickName', '未知用户'),'评分': index.get('commentScore', 0),'评论内容': index.get('commentData', ''),'点赞数': index.get('praiseCnt', 0),'回复数': index.get('replyCnt', 0),'产品型号': '','产品颜色': '','发布地区': index.get('publishArea', ''),'评论时间': index.get('commentDate', '')}processed_comments.append(dit)print("\n提取到的评论:")print(dit)# 保存评论数据到JSON文件try:with open(output_file, 'w', encoding='utf-8') as f:json.dump(processed_comments, f, ensure_ascii=False, indent=2)print(f"\n评论数据已成功保存到: {output_file}")print(f"共保存 {len(processed_comments)} 条评论")except Exception as e:print(f"保存JSON文件失败: {e}")
else:print("没有找到评论列表")print("响应数据的键:", list(json_data.keys()) if json_data else "无数据")