public_sentiment/web/controller/base_controller.py

46 lines
1.2 KiB
Python
Raw Normal View History

2024-09-18 13:41:28 +08:00
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import json
import sys
from scrapy.crawler import CrawlerProcess
from scrapy.utils.project import get_project_settings
2024-09-19 16:58:49 +08:00
from web.handler.apscheduler_handler import ApschedulerHandler
2024-09-18 13:41:28 +08:00
from web.handler.html_parser_handler import HtmlParserHandler
sys.path.append(r"collector")
from collector.settings import ITEM_PIPELINES
class BaseController:
"""
controller层的基类
"""
def __init__(self):
self.html_parser_handler = HtmlParserHandler()
2024-09-19 16:58:49 +08:00
self.apscheduler_handler = ApschedulerHandler()
2024-09-18 13:41:28 +08:00
def to_vo(self, request, clazz):
"""
将json参数转换为vo对象
"""
raw_data = request.body.decode("utf-8")
json_data_dict = json.loads(raw_data)
obj = clazz(**json_data_dict)
return obj
def start_scrawl(self, spider):
"""
开始执行爬虫
"""
# get_project_settings方法并不能导入settings.py中的配置因此此处还要硬编码导入
settings = get_project_settings()
settings['ITEM_PIPELINES'] = ITEM_PIPELINES
process = CrawlerProcess(settings)
process.crawl(spider)
process.start()