public_sentiment/scrawl/scrawl/spiders/weibo_spider.py
2024-09-18 13:38:24 +08:00

19 lines
506 B
Python

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
import scrapy
sys.path.append(r"scrawl")
from scrawl.items import ScrawlItem
class WeiboSpiderSpider(scrapy.Spider):
name = "weibo_spider"
allowed_domains = ["s.weibo.com"]
start_urls = ["https://s.weibo.com/weibo?q=%E5%8C%97%E4%BA%AC%E5%B7%A5%E5%95%86%E5%A4%A7%E5%AD%A6&nodup=1&page=5"]
def parse(self, response):
for con in response.xpath('//*[@id="pl_feedlist_index"]/div/div'):
scraw_item = ScrawlItem()