diff --git a/manage.py b/manage.py index 7f75b1e..9b2913a 100644 --- a/manage.py +++ b/manage.py @@ -16,7 +16,7 @@ Logger = LogManager.get_logger(__name__) if __name__ == '__main__': - LogManager.get_logger("启动服务器") + Logger.info("启动服务器") try: from django.core.management import execute_from_command_line diff --git a/public_sentiment/settings.py b/public_sentiment/settings.py index 913d67f..a630da9 100644 --- a/public_sentiment/settings.py +++ b/public_sentiment/settings.py @@ -35,9 +35,41 @@ INSTALLED_APPS = [ 'django.contrib.sessions', 'django.contrib.messages', 'django.contrib.staticfiles', + 'rest_framework', + 'rest_framework_swagger', 'web', ] +# swagger 配置项 +# SWAGGER_SETTINGS = { +# # 基础样式 +# # 'SECURITY_DEFINITIONS': { +# # "basic": { +# # 'type': 'basic' +# # } +# # }, +# # 如果需要登录才能够查看接口文档, 登录的链接使用restframework自带的. +# # 'LOGIN_URL': 'rest_framework:login', +# # 'LOGOUT_URL': 'rest_framework:logout', +# # 'DOC_EXPANSION': None, +# # 'SHOW_REQUEST_HEADERS':True, +# # 'USE_SESSION_AUTH': True, +# # 'DOC_EXPANSION': 'list', +# # 接口文档中方法列表以首字母升序排列 +# 'APIS_SORTER': 'alpha', +# # 如果支持json提交, 则接口文档中包含json输入框 +# 'JSON_EDITOR': True, +# # 方法列表字母排序 +# 'OPERATIONS_SORTER': 'alpha', +# 'VALIDATOR_URL': None, +# } + +REST_FRAMEWORK = { + 'DEFAULT_PERMISSION_CLASSES': [ + 'rest_framework.permissions.IsAuthenticated', + ] +} + MIDDLEWARE = [ 'django.middleware.security.SecurityMiddleware', 'django.contrib.sessions.middleware.SessionMiddleware', diff --git a/public_sentiment/urls.py b/public_sentiment/urls.py index d5694b0..cab8a85 100644 --- a/public_sentiment/urls.py +++ b/public_sentiment/urls.py @@ -14,12 +14,57 @@ Including another URLconf 1. Import the include() function: from django.urls import include, path 2. Add a URL to urlpatterns: path('blog/', include('blog.urls')) """ -from django.contrib import admin +from django.db import router from django.urls import path +from drf_yasg import openapi +from drf_yasg.views import get_schema_view +from rest_framework import permissions, routers +from django.contrib import admin +from rest_framework import routers +from django.conf.urls import include +from rest_framework.routers import DefaultRouter +from rest_framework.schemas.views import SchemaView +from tornado.web import url -from web.controller.html_parser_controller import parse_html +from web import views +from web.controller.html_parser_controller import HtmlParserController +from web.controller.schedule_controller import SchedulerController + +# Swagger documentation setup +# schema_view = get_schema_view( +# openapi.Info( +# title="舆情系统API", +# default_version='v1', +# description="舆情系统API", +# terms_of_service="https://www.google.com/policies/terms/", +# contact=openapi.Contact(email="contact@snippets.local"), +# license=openapi.License(name="BSD License"), +# ), +# public=True, +# permission_classes=(permissions.AllowAny,), +# ) + + +# 重要的是如下三行 +from rest_framework.schemas import get_schema_view +from rest_framework_swagger.renderers import SwaggerUIRenderer, OpenAPIRenderer + +schema_view = get_schema_view(title='Users API', renderer_classes=[OpenAPIRenderer, SwaggerUIRenderer]) + +router = routers.DefaultRouter() +# router.register('users', views.UserViewSet, base_name='user') +# router.register('groups', views.GroupViewSet, base_name='group') urlpatterns = [ path('admin/', admin.site.urls), - path('api/v1/htmlParser/parseHtml', parse_html), + path('', include(router.urls)), + path('api-auth/', include('rest_framework.urls', namespace='rest_framework')), + path('docs/', schema_view, name='docs'), + + path('admin/', admin.site.urls), + path('api/v1/htmlParser/parseHtml', HtmlParserController().parse_html), + path('api/v1/scheduler/create', SchedulerController().create), + path('api/v1/scheduler/delete', SchedulerController().delete), + path('api/v1/scheduler/update', SchedulerController().update), + path('api/v1/scheduler/search', SchedulerController().search), ] diff --git a/script/runserver.bat b/script/runserver.bat index 01b5c27..430bca3 100644 --- a/script/runserver.bat +++ b/script/runserver.bat @@ -1 +1 @@ -C:\mywork\dev-env\python\Python38\python.exe C:\mywork\workspace\public_sentiment\manage.py runserver 9000 +C:\mywork\dev-env\python\Python38\python.exe C:\mywork\workspace\public_sentiment\manage.py runserver 5079 --noreload diff --git a/web/admin.py b/web/admin.py index 8c38f3f..c6fe108 100644 --- a/web/admin.py +++ b/web/admin.py @@ -1,3 +1,2 @@ from django.contrib import admin -# Register your models here. diff --git a/web/constants/__init__.py b/web/config/__init__.py similarity index 100% rename from web/constants/__init__.py rename to web/config/__init__.py diff --git a/web/config/apscheduler_config.py b/web/config/apscheduler_config.py new file mode 100644 index 0000000..515ba67 --- /dev/null +++ b/web/config/apscheduler_config.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from apscheduler.executors.pool import ThreadPoolExecutor, ProcessPoolExecutor + +from apscheduler.jobstores.sqlalchemy import SQLAlchemyJobStore + + +# apscheduler的配置 +class ApschedulerConfig: + url = f"mysql+pymysql://root:123456@127.0.0.1:3306/base_platform?charset=utf8" # 使用pymysql连接数据库,字符集为UTF8 + + job_stores = { + 'default': SQLAlchemyJobStore(url=url, tablename='apscheduler_task') # 定时任务表名为my_tasks + } + executors = { + 'default': ThreadPoolExecutor(20), + 'processpool': ProcessPoolExecutor(5) + } + job_defaults = { + 'coalesce': True, # 堆积后只执行最后一个 + 'max_instances': 1, # 最大的实例只能存在一个 + + } diff --git a/web/constants/startup_parameter.py b/web/constants/startup_parameter.py deleted file mode 100644 index 15d2c67..0000000 --- a/web/constants/startup_parameter.py +++ /dev/null @@ -1,12 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - - -""" -启动系统时的参数 -""" - - -class StartupParameter: - # 采集数据 - Crawl_Data = 'crawl_data' diff --git a/web/controller/base_controller.py b/web/controller/base_controller.py index 8a381f7..4accbfc 100644 --- a/web/controller/base_controller.py +++ b/web/controller/base_controller.py @@ -5,6 +5,8 @@ import json import sys from scrapy.crawler import CrawlerProcess from scrapy.utils.project import get_project_settings + +from web.handler.apscheduler_handler import ApschedulerHandler from web.handler.html_parser_handler import HtmlParserHandler sys.path.append(r"collector") @@ -19,6 +21,7 @@ class BaseController: def __init__(self): self.html_parser_handler = HtmlParserHandler() + self.apscheduler_handler = ApschedulerHandler() def to_vo(self, request, clazz): """ diff --git a/web/controller/html_parser_controller.py b/web/controller/html_parser_controller.py index ddd9529..3f17c5f 100644 --- a/web/controller/html_parser_controller.py +++ b/web/controller/html_parser_controller.py @@ -5,6 +5,7 @@ import json from collections import namedtuple from django.http import JsonResponse from rest_framework.decorators import api_view +from rest_framework.views import APIView from twisted.protocols.amp import Box from collector.spiders.collector_spider import CollectorSpider from web.controller.base_controller import BaseController @@ -16,23 +17,23 @@ from web.vo.parse_html_vo import ParseHtmlVo Logger = LogManager.get_logger(__name__) -base_controller = BaseController() +class HtmlParserController(APIView, BaseController): -@api_view(['POST']) -def parse_html(request): - """ - 解析html - """ + @api_view(['POST']) + def parse_html(self, request): + """ + 解析html + """ - Logger.info("开始解析html") + Logger.info("开始解析html") - parse_html_vo = base_controller.to_vo(request, ParseHtmlVo) - service_result = base_controller.html_parser_handler.parse_html(parse_html_vo.url) + parse_html_vo = self.to_vo(request, ParseHtmlVo) + service_result = self.html_parser_handler.parse_html(parse_html_vo.url) - # grid_graph_manager = GridGraphManager() - # list = grid_graph_manager.query_vertex(label='person') + # grid_graph_manager = GridGraphManager() + # list = grid_graph_manager.query_vertex(label='person') - # base_controller.start_scrawl(CollectorSpider) + # base_controller.start_scrawl(CollectorSpider) - return JsonResponse(DtoUtil.service_result_to_api_result(service_result), safe=False) + return JsonResponse(DtoUtil.service_result_to_api_result(service_result), safe=False) diff --git a/web/controller/schedule_controller.py b/web/controller/schedule_controller.py new file mode 100644 index 0000000..8793911 --- /dev/null +++ b/web/controller/schedule_controller.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from django.http import JsonResponse +from drf_yasg import openapi +from drf_yasg.utils import swagger_auto_schema +from rest_framework import permissions +from rest_framework.decorators import api_view, action +from rest_framework.views import APIView + +from web.controller.base_controller import BaseController +from web.handler.html_parser_handler import HtmlParserHandler +from web.manager.log_manager import LogManager +from web.util.dto_util import DtoUtil +from web.vo.scheduler_vo import SchedulerVo + +Logger = LogManager.get_logger(__name__) + + +class SchedulerController(APIView, BaseController): + """ + 创建定时任务 + """ + @api_view(['POST']) + def create(self, request): + Logger.info('创建定时任务') + + scheduler_vo = self.to_vo(request, SchedulerVo) + service_result = self.apscheduler_handler.create(HtmlParserHandler().parse_html, scheduler_vo.job_id, + scheduler_vo.seconds, {'url': scheduler_vo.url}) + return JsonResponse(DtoUtil.service_result_to_api_result(service_result), safe=False) + + @api_view(['GET']) + def delete(self, request): + """ + 删除定时任务 + """ + + Logger.info('删除定时任务') + scheduler_vo = self.to_vo(request, SchedulerVo) + self.apscheduler_handler.delete(scheduler_vo.job_id) + + @api_view(['POST']) + def update(self, request): + """ + 修改定时任务 + """ + + Logger.info('修改定时任务') + + scheduler_vo = self.to_vo(request, SchedulerVo) + + @api_view(['POST']) + def search(self, request): + """ + 查询定时任务 + """ + + Logger.info('查询定时任务') + + scheduler_vo = self.to_vo(request, SchedulerVo) diff --git a/web/enum/scheduler_operation_type_enum.py b/web/enum/scheduler_operation_type_enum.py new file mode 100644 index 0000000..5d686a1 --- /dev/null +++ b/web/enum/scheduler_operation_type_enum.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from enum import Enum + + +class SchedulerOperationTypeEnum(Enum): + """ + scheduler的操作类型 + """ + + # 创建 + CREATE = 1 + + # 删除 + DELETE = 2 + + # 关闭 + SHUTDOWN = 3 + + # 查询 + SEARCH = 4 diff --git a/web/handler/apscheduler_handler.py b/web/handler/apscheduler_handler.py new file mode 100644 index 0000000..ba9860f --- /dev/null +++ b/web/handler/apscheduler_handler.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from web.dto.service_result import ServiceResult +from web.enum.service_result_enum import ServiceResultEnum +from web.handler.base_handler import BaseHandler +from web.manager.apschedule_manager import ApschedulerManager +from web.manager.log_manager import LogManager + +Logger = LogManager.get_logger(__name__) + + +class ApschedulerHandler(BaseHandler): + """ + 调度器 + """ + + def __init__(self): + super().__init__() + + self.apscheduler_manager = ApschedulerManager() + + def create(self, _func, job_id, _seconds, _kwargs): + """ + 创建定时任务 + """ + + Logger.info('创建定时任务') + + job = self.apscheduler_manager.create(func_=_func, job_id=job_id, seconds_=_seconds, kwargs_=_kwargs) + return ServiceResult.ok(ServiceResultEnum.SAVE_SUCCESS.value, job.id, ServiceResultEnum.SAVE_SUCCESS_DESCRIPTION.value) + + def delete(self, job_id): + """ + 删除定时任务 + """ + + Logger.info('删除定时任务') + + self.apscheduler_manager.remove_job(job_id) + + def pause(self, job_id): + """ + 暂停定时任务 + """ + + Logger.info('暂停定时任务') + + self.apscheduler_manager.pause_job(job_id) + + def resume(self, job_id): + """ + 恢复定时任务 + """ + + Logger.info('恢复定时任务') + + self.apscheduler_manager.resume_job(job_id) + + def shutdown_apscheduler(self): + """ + 关闭调度器 + """ + + Logger.info('关闭调度器') + + self.apscheduler_manager.shutdown() + + def pause_apscheduler(self): + """ + 暂停调度器 + """ + + Logger.info('暂停调度器') + + self.apscheduler_manager.pause() + + def resume_apscheduler(self): + """ + 恢复调度器 + """ + + Logger.info('恢复调度器') + + self.apscheduler_manager.resume() diff --git a/web/handler/html_parser_handler.py b/web/handler/html_parser_handler.py index 81b3a16..d6382ff 100644 --- a/web/handler/html_parser_handler.py +++ b/web/handler/html_parser_handler.py @@ -23,6 +23,8 @@ class HtmlParserHandler(BaseHandler): 解析html网页 """ + Logger.info('解析html网页') + response = requests.get(url) text = response.text diff --git a/web/manager/apschedule_manager.py b/web/manager/apschedule_manager.py new file mode 100644 index 0000000..1833db4 --- /dev/null +++ b/web/manager/apschedule_manager.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from apscheduler.schedulers.background import BackgroundScheduler +from web.config.apscheduler_config import ApschedulerConfig + + +class ApschedulerManager: + """ + apscheduler管理器 + """ + + def __init__(self): + super().__init__() + + self.apscheduler = BackgroundScheduler(executors=ApschedulerConfig.executors, + job_defaults=ApschedulerConfig.job_defaults, + jobstores=ApschedulerConfig.job_stores, timezone='Asia/Shanghai') + + def create(self, func_, job_id, seconds_, kwargs_): + """ + 创建定时任务 + """ + job = self.apscheduler.add_job(func_, 'interval', id=job_id, seconds=seconds_, kwargs=kwargs_, replace_existing=True) + self.apscheduler.start() + return job + + def delete(self, job_id): + """ + 删除定时任务 + """ + self.apscheduler.remove_job(job_id) + + def pause(self, job_id): + """ + 暂停定时任务 + """ + self.apscheduler.pause_job(job_id) + + def resume(self, job_id): + """ + 恢复定时任务 + """ + self.apscheduler.resume_job(job_id) + + def shutdown_apscheduler(self): + """ + 关闭调度器 + """ + self.apscheduler.shutdown() + + def pause_apscheduler(self): + """ + 暂停调度器 + """ + self.apscheduler.pause() + + def resume_apscheduler(self): + """ + 恢复调度器 + """ + self.apscheduler.resume() diff --git a/web/manager/log_manager.py b/web/manager/log_manager.py index 27e298d..12d6b9b 100644 --- a/web/manager/log_manager.py +++ b/web/manager/log_manager.py @@ -29,8 +29,10 @@ class LogManager: LogManager.Logger = logging.getLogger(param_name) LogManager.Logger.setLevel(level=level) + # formatter = logging.Formatter( + # '%(asctime)s [%(threadName)s-%(thread)d] [%(levelname)s] %(name)s.%(funcName)s[%(lineno)d] %(message)s') formatter = logging.Formatter( - '%(asctime)s [%(threadName)s-%(thread)d] [%(levelname)s] %(name)s.%(funcName)s[%(lineno)d] %(message)s') + '%(asctime)s [%(threadName)s-%(thread)d] [%(levelname)s] %(filename)s[%(lineno)d] %(message)s') file_handler = logging.FileHandler(log_file, encoding="utf-8") file_handler.setLevel(level=level) diff --git a/web/models/public_sentiment_comment.py b/web/models/public_sentiment_comment.py index 0acf841..60b3000 100644 --- a/web/models/public_sentiment_comment.py +++ b/web/models/public_sentiment_comment.py @@ -11,20 +11,20 @@ class PublicSentimentComment(models.Model): """ # 主键 - id = models.AutoField(primary_key=True) + id = models.AutoField(primary_key=True, verbose_name="主键") # 内容 - content = models.CharField(max_length=2550, null=True, blank=True) + content = models.CharField(max_length=2550, null=True, blank=True, verbose_name="内容") # 来源id source_id = models.BigIntegerField(validators=[MaxValueValidator(9223372036854775807)], db_index=True, null=False, - blank=False) + blank=False, verbose_name="来源id") # 创建时间 - create_time = models.DateTimeField(null=False, blank=False) + create_time = models.DateTimeField(null=False, blank=False, verbose_name="创建时间") class Meta: managed = True db_table = 'ps_comment' verbose_name = '评论表' - verbose_name_plural = verbose_name \ No newline at end of file + verbose_name_plural = verbose_name diff --git a/web/models/public_sentiment_source.py b/web/models/public_sentiment_source.py index 95b5edc..032590d 100644 --- a/web/models/public_sentiment_source.py +++ b/web/models/public_sentiment_source.py @@ -10,13 +10,13 @@ class PublicSentimentSource(models.Model): """ # 主键 - id = models.AutoField(primary_key=True) + id = models.AutoField(primary_key=True, verbose_name="主键") # 域名 - domain_name = models.CharField(max_length=255, null=True, blank=True) + domain_name = models.CharField(max_length=255, null=True, blank=True, verbose_name="域名") # 名称 - name = models.CharField(max_length=255, null=True, blank=True) + name = models.CharField(max_length=255, null=True, blank=True, verbose_name="名称") class Meta: managed = True diff --git a/web/models/training_sensitive_word.py b/web/models/training_sensitive_word.py index 36a8070..d3b64e5 100644 --- a/web/models/training_sensitive_word.py +++ b/web/models/training_sensitive_word.py @@ -10,13 +10,13 @@ class TrainingSensitiveWord(models.Model): """ # 主键 - id = models.AutoField(primary_key=True) + id = models.AutoField(primary_key=True, verbose_name="主键") # 类型 - type = models.CharField(max_length=255, null=True, blank=True) + type = models.CharField(max_length=255, null=True, blank=True, verbose_name="类型") # 敏感词 - word = models.CharField(max_length=255, null=True, blank=True) + word = models.CharField(max_length=255, null=True, blank=True, verbose_name="敏感词") class Meta: managed = True diff --git a/web/scheduler/__init__.py b/web/scheduler/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/web/serializers.py b/web/serializers.py new file mode 100644 index 0000000..81c3a8c --- /dev/null +++ b/web/serializers.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from rest_framework import serializers +from web.models.training_sensitive_word import TrainingSensitiveWord + + +# 序列化模型为其他格式 + +class TrainingSensitiveWordSerializer(serializers.ModelSerializer): + class Meta: + model = TrainingSensitiveWord + + # 序列化所有的字段 + fields = '__all__' + + # 序列化部分字段 + # fields = ('id','song','singer','last_modify_date','created') diff --git a/web/views.py b/web/views.py index faa18be..aaeb9c3 100644 --- a/web/views.py +++ b/web/views.py @@ -1,2 +1,42 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- + +from rest_framework import viewsets +from rest_framework.parsers import MultiPartParser, FormParser, JSONParser + +from web.models import TrainingSensitiveWord +from web.serializers import TrainingSensitiveWordSerializer + + +class TrainingSensitiveWordSerializerViewSet(viewsets.ModelViewSet): + """ + CRUD 功能 + """ + authentication_classes = [] + permission_classes = [] + + # 解析方式 + parser_classes = (MultiPartParser, FormParser, JSONParser) + + queryset = TrainingSensitiveWord.objects.all() + serializer_class = TrainingSensitiveWordSerializer + + def create(self, request, *args, **kwargs): + """新建一条音乐""" + pass + + def list(self, request, *args, **kwargs): + """全部音乐数据""" + pass + + def retrieve(self, request, *args, **kwargs): + """查询一条数据""" + pass + + def update(self, request, *args, **kwargs): + """更新一条音乐数据""" + pass + + def destroy(self, request, *args, **kwargs): + """删除一条数据""" + pass \ No newline at end of file diff --git a/web/vo/scheduler_vo.py b/web/vo/scheduler_vo.py new file mode 100644 index 0000000..537d384 --- /dev/null +++ b/web/vo/scheduler_vo.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from typing import Optional, Any + +from pydantic import BaseModel + + +class SchedulerVo(BaseModel): + """ + scheduler的vo类 + """ + + # 每隔几秒执行一次 + seconds: Optional[int] = None + + # 扫描的url + url: Optional[str] = None + + # job的id + job_id: Optional[str] = None