public_sentiment/web/controller/html_parser_controller.py

34 lines
1007 B
Python
Raw Normal View History

2024-09-18 13:41:28 +08:00
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from django.http import JsonResponse
from rest_framework.decorators import api_view
2024-09-19 16:58:49 +08:00
from rest_framework.views import APIView
2024-09-18 13:41:28 +08:00
from web.controller.base_controller import BaseController
from web.manager.log_manager import LogManager
from web.util.dto_util import DtoUtil
from web.vo.parse_html_vo import ParseHtmlVo
Logger = LogManager.get_logger(__name__)
2024-09-19 16:58:49 +08:00
class HtmlParserController(APIView, BaseController):
2024-09-18 13:41:28 +08:00
2024-09-19 16:58:49 +08:00
@api_view(['POST'])
def parse_html(self, request):
"""
解析html
"""
2024-09-18 13:41:28 +08:00
2024-09-19 16:58:49 +08:00
Logger.info("开始解析html")
2024-09-18 13:41:28 +08:00
2024-09-19 16:58:49 +08:00
parse_html_vo = self.to_vo(request, ParseHtmlVo)
service_result = self.html_parser_handler.parse_html(parse_html_vo.url)
2024-09-18 13:41:28 +08:00
2024-09-19 16:58:49 +08:00
# grid_graph_manager = GridGraphManager()
# list = grid_graph_manager.query_vertex(label='person')
2024-09-18 13:41:28 +08:00
2024-09-19 16:58:49 +08:00
# base_controller.start_scrawl(CollectorSpider)
2024-09-18 13:41:28 +08:00
2024-09-19 16:58:49 +08:00
return JsonResponse(DtoUtil.service_result_to_api_result(service_result), safe=False)