public_sentiment/web/controller/base_controller.py

43 lines
1.1 KiB
Python
Raw Normal View History

2024-09-18 13:38:24 +08:00
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import json
import sys
from scrapy.crawler import CrawlerProcess
from scrapy.utils.project import get_project_settings
from web.handler.html_parser_handler import HtmlParserHandler
sys.path.append(r"collector")
from collector.settings import ITEM_PIPELINES
class BaseController:
"""
controller层的基类
"""
def __init__(self):
self.html_parser_handler = HtmlParserHandler()
def to_vo(self, request, clazz):
"""
将json参数转换为vo对象
"""
raw_data = request.body.decode("utf-8")
json_data_dict = json.loads(raw_data)
obj = clazz(**json_data_dict)
return obj
def start_scrawl(self, spider):
"""
开始执行爬虫
"""
# get_project_settings方法并不能导入settings.py中的配置因此此处还要硬编码导入
settings = get_project_settings()
settings['ITEM_PIPELINES'] = ITEM_PIPELINES
process = CrawlerProcess(settings)
process.crawl(spider)
process.start()