43 lines
1.1 KiB
Python
43 lines
1.1 KiB
Python
![]() |
#!/usr/bin/env python
|
|||
|
# -*- coding: utf-8 -*-
|
|||
|
|
|||
|
import json
|
|||
|
import sys
|
|||
|
from scrapy.crawler import CrawlerProcess
|
|||
|
from scrapy.utils.project import get_project_settings
|
|||
|
from web.handler.html_parser_handler import HtmlParserHandler
|
|||
|
|
|||
|
sys.path.append(r"collector")
|
|||
|
|
|||
|
from collector.settings import ITEM_PIPELINES
|
|||
|
|
|||
|
|
|||
|
class BaseController:
|
|||
|
"""
|
|||
|
controller层的基类
|
|||
|
"""
|
|||
|
|
|||
|
def __init__(self):
|
|||
|
self.html_parser_handler = HtmlParserHandler()
|
|||
|
|
|||
|
def to_vo(self, request, clazz):
|
|||
|
"""
|
|||
|
将json参数转换为vo对象
|
|||
|
"""
|
|||
|
raw_data = request.body.decode("utf-8")
|
|||
|
json_data_dict = json.loads(raw_data)
|
|||
|
obj = clazz(**json_data_dict)
|
|||
|
return obj
|
|||
|
|
|||
|
def start_scrawl(self, spider):
|
|||
|
"""
|
|||
|
开始执行爬虫
|
|||
|
"""
|
|||
|
|
|||
|
# get_project_settings方法并不能导入settings.py中的配置,因此此处还要硬编码导入
|
|||
|
settings = get_project_settings()
|
|||
|
settings['ITEM_PIPELINES'] = ITEM_PIPELINES
|
|||
|
process = CrawlerProcess(settings)
|
|||
|
process.crawl(spider)
|
|||
|
process.start()
|