46 lines
1.2 KiB
Python
46 lines
1.2 KiB
Python
#!/usr/bin/env python
|
||
# -*- coding: utf-8 -*-
|
||
|
||
import json
|
||
import sys
|
||
from scrapy.crawler import CrawlerProcess
|
||
from scrapy.utils.project import get_project_settings
|
||
|
||
from web.handler.apscheduler_handler import ApschedulerHandler
|
||
from web.handler.html_parser_handler import HtmlParserHandler
|
||
|
||
sys.path.append(r"collector")
|
||
|
||
from collector.settings import ITEM_PIPELINES
|
||
|
||
|
||
class BaseController:
|
||
"""
|
||
controller层的基类
|
||
"""
|
||
|
||
def __init__(self):
|
||
self.html_parser_handler = HtmlParserHandler()
|
||
self.apscheduler_handler = ApschedulerHandler()
|
||
|
||
def to_vo(self, request, clazz):
|
||
"""
|
||
将json参数转换为vo对象
|
||
"""
|
||
raw_data = request.body.decode("utf-8")
|
||
json_data_dict = json.loads(raw_data)
|
||
obj = clazz(**json_data_dict)
|
||
return obj
|
||
|
||
def start_scrawl(self, spider):
|
||
"""
|
||
开始执行爬虫
|
||
"""
|
||
|
||
# get_project_settings方法并不能导入settings.py中的配置,因此此处还要硬编码导入
|
||
settings = get_project_settings()
|
||
settings['ITEM_PIPELINES'] = ITEM_PIPELINES
|
||
process = CrawlerProcess(settings)
|
||
process.crawl(spider)
|
||
process.start()
|