#!/usr/bin/env python # -*- coding: utf-8 -*- import json import sys from scrapy.crawler import CrawlerProcess from scrapy.utils.project import get_project_settings from web.handler.html_parser_handler import HtmlParserHandler sys.path.append(r"collector") from collector.settings import ITEM_PIPELINES class BaseController: """ controller层的基类 """ def __init__(self): self.html_parser_handler = HtmlParserHandler() def to_vo(self, request, clazz): """ 将json参数转换为vo对象 """ raw_data = request.body.decode("utf-8") json_data_dict = json.loads(raw_data) obj = clazz(**json_data_dict) return obj def start_scrawl(self, spider): """ 开始执行爬虫 """ # get_project_settings方法并不能导入settings.py中的配置,因此此处还要硬编码导入 settings = get_project_settings() settings['ITEM_PIPELINES'] = ITEM_PIPELINES process = CrawlerProcess(settings) process.crawl(spider) process.start()