minor fixes, added result processing

This commit is contained in:
2023-09-22 00:17:24 +03:00
committed by nikili0n
parent 0e32392948
commit 67ae781660
12 changed files with 549 additions and 182 deletions

View File

@ -1,16 +1,16 @@
import asyncio
import concurrent.futures as pool
import subprocess
import traceback
from functools import partial
from fastapi import HTTPException
from urllib.parse import urlparse
from src.core.async_queue import AsyncQueue
from src.core.rabbitmq import get_messages
from src.core.rabbitmq import get_messages, publish_message_with_task_done
from src.core.redis_client import RedisClient
from src.core.result import Result, ResultTypeEnum
from src.exceptions.download_exceptions import SiteNotImplementedException
from src.exceptions.download_exceptions import FileAlreadyExistException, SiteNotImplementedException
from src.parsers.MyMail.my_mail_parser import MyMailParser
from src.parsers.Yappy.yappy_parser import YappyParser
from src.parsers.base_parser import BaseParser
@ -39,55 +39,82 @@ class MasterService:
async def create_workers(self):
while True:
video_params = await self.queue.get()
#TODO: позднее написать функцию для определения парсера автоматически
# TODO: позднее написать функцию для определения парсера автоматически
redis = RedisClient()
# TODO: проверить что в редисе задача либо уже выполнена, т.е. сразу отдать ссылку, либо что она ранее была закончена с ошибкой
# и проверять словарь self.currently_underway, для надёжности
await redis.del_task_from_queue_and_add_to_tasks(task=video_params)
self.currently_underway[video_params['link']] = video_params
download_task = self.loop.run_in_executor(self.executor, partial(
MasterService.video_processing_executor, video_params=video_params
)
)
))
result = await download_task
await redis.del_task_from_tasks_and_add_to_task_done(task={"link": video_params["link"], "result": result})
result: Result = await download_task
if result.result_type in [ResultTypeEnum.DONE, ResultTypeEnum.EXIST]:
await redis.del_task_from_tasks_and_add_to_task_done(task=result.value)
await publish_message_with_task_done(task=result.value)
self.queue.task_done()
else:
error_message = {
"link": video_params["link"],
"result": result.value,
"status": "error"
}
await redis.del_task_from_tasks_and_add_to_task_done(task=error_message)
await publish_message_with_task_done(task=error_message)
if video_params['link'] in self.currently_underway:
del self.currently_underway[video_params['link']]
# TODO process result
'''
Result.Done \ Result.Exist - уведомить что задача выполнена, и отослать во вторую очередь сообщений RabbitMQ сообщение об этом
Result.Error - в таблице Редиса для выполненых задач, пометить, что это ошибка и уведомить об этом по второй очереди сообщений
и потом почистить self.currently_underway
'''
self.queue.task_done()
# Result.Done \ Result.Exist - уведомить что задача выполнена, и отослать во вторую очередь сообщений
# RabbitMQ сообщение об этом
# Result.Error - в таблице Редиса для выполненых задач, пометить, что это ошибка и уведомить об этом
# по второй очереди сообщений и потом почистить self.currently_underway
@staticmethod
def video_download(video_params: dict):
downloader: BaseParser | YappyParser | MyMailParser = MasterService.get_parser(video_params)
try:
result = downloader.video_download()
return result
except SiteNotImplementedException as ex:
raise HTTPException(
status_code=400,
detail=ex.message
)
result = downloader.video_download()
return result
@staticmethod
def get_parser(params: dict):
parser_mapping = {
"MyMailRu": MyMailParser(params),
"base": BaseParser(params),
"Yappy": YappyParser(params),
}
return parser_mapping[params["parser"]]
try:
domain = urlparse(params["link"]).netloc
parser_mapping = {
"my.mail.ru": MyMailParser(params),
"www.youtube.com": BaseParser(params),
"vk.com": BaseParser(params),
"ok.ru": BaseParser(params),
"likee.video": BaseParser(params),
"dzen.ru": BaseParser(params),
"yappy.media": YappyParser(params),
}
return parser_mapping[domain]
except KeyError:
raise SiteNotImplementedException
@staticmethod
def video_processing_executor(video_params: dict):
try:
result = MasterService.video_download(video_params=video_params)
return result
return Result(result_type=ResultTypeEnum.DONE, value={
"link": video_params["link"],
"result": result,
"status": "done"
})
except FileAlreadyExistException as ex:
return Result(result_type=ResultTypeEnum.EXIST, value={
"link": video_params["link"],
"result": ex.message,
"status": "exist"
})
except SiteNotImplementedException as ex:
return Result(result_type=ResultTypeEnum.EXCEPTION, value=ex.default_message)
except Exception as ex:
return Result(result_type=ResultTypeEnum.EXCEPTION, value=ex)
return Result(result_type=ResultTypeEnum.EXCEPTION, value=traceback.format_exc())
# TODO upload to server
@ -95,5 +122,3 @@ def executor_initializer():
import setproctitle
setproctitle.setproctitle(f'video_downloader_executor_process')
return True

View File

@ -2,7 +2,7 @@ import asyncio
import json
from functools import partial
from aio_pika import connect
from aio_pika import connect, Message, DeliveryMode
from aio_pika.abc import AbstractIncomingMessage
@ -26,3 +26,24 @@ async def get_messages(inner_queue) -> None:
print(" [*] Waiting for messages. To exit press CTRL+C")
await asyncio.Future()
async def publish_message_with_task_done(task: dict) -> None:
queue_name = "tasks_done"
async with await connect("amqp://guest:guest@localhost/") as connection:
# Creating channel
channel = await connection.channel()
# Will take no more than 10 messages in advance
await channel.set_qos(prefetch_count=1)
# Declaring queue
queue = await channel.declare_queue(queue_name)
message = Message(
json.dumps(task, indent=4).encode('utf-8'), delivery_mode=DeliveryMode.PERSISTENT,
)
await channel.default_exchange.publish(
message,
routing_key=queue_name,
)

View File

@ -1,4 +1,6 @@
import types
from enum import Enum
from inspect import Traceback
class ResultTypeEnum(Enum):
@ -8,9 +10,9 @@ class ResultTypeEnum(Enum):
class Result:
def __init__(self, result_type: ResultTypeEnum, value: Exception | bool = None):
def __init__(self, result_type: ResultTypeEnum, value: str | dict = None):
self.result_type = result_type
self.value = value
def __repr__(self):
return f'Result: {self.result_type.value}. Traceback: {self.value if self.value else None}'
return f'Result: {self.result_type.value}. Value: {self.value if self.value else None}'

View File

@ -4,3 +4,9 @@ from src.exceptions.base_exception import DefaultException
class SiteNotImplementedException(DefaultException):
default_message = "This site is not supported"
error_code = "SiteNotImplemented"
class FileAlreadyExistException(DefaultException):
default_message = "This file already exist"
error_code = "FileAlreadyExist"

View File

@ -1,12 +1,10 @@
import os
import uuid
import requests
from playwright.sync_api import Playwright
from playwright.sync_api import sync_playwright
from src.core.result import Result, ResultTypeEnum
from src.exceptions.download_exceptions import FileAlreadyExistException
from src.parsers.base_parser import BaseParser
@ -33,7 +31,7 @@ class MyMailParser(BaseParser):
link, title, cookies = self.get_video_link(playwright)
if os.path.exists(os.path.join(os.getcwd() + f"/downloads/MyMailRu/{title}.mp4")):
return f"MyMailRu/{title}.mp4"#Result(result_type=ResultTypeEnum.EXIST)
raise FileAlreadyExistException(message=f"MyMailRu/{title}.mp4")
self.make_sure_path_exists()
video_response = requests.get(link, cookies=cookies)

View File

@ -1,11 +1,9 @@
import os
import uuid
import requests
from bs4 import BeautifulSoup
from src.core.result import ResultTypeEnum, Result
from src.exceptions.download_exceptions import FileAlreadyExistException
from src.parsers.base_parser import BaseParser
@ -26,7 +24,7 @@ class YappyParser(BaseParser):
link, title = self.get_video_link()
if os.path.exists(os.path.join(os.getcwd() + f"/downloads/Yappy/{title}.mp4")):
return f"Yappy/{title}.mp4"
raise FileAlreadyExistException(message=f"Yappy/{title}.mp4")
video_response = requests.get(link)
self.make_sure_path_exists()

View File

@ -1,11 +1,8 @@
import errno
import os
from fastapi import HTTPException
from src.core.result import ResultTypeEnum, Result
from src.core.ydl import VideoDownloader
from src.exceptions.download_exceptions import SiteNotImplementedException
from src.exceptions.download_exceptions import FileAlreadyExistException
class BaseParser:
@ -24,20 +21,12 @@ class BaseParser:
}
downloader = VideoDownloader(link=self.params["link"], ydl_opts=ydl_opts)
video_info = downloader.get_info()
#TODO Добавить динамеческое имя директории сервиса для проверки дублирования
if os.path.exists(
os.path.join(os.getcwd() + f"Youtube/{video_info['id']}_{video_info['width']}.{video_info['ext']}")
):
return Result(result_type=ResultTypeEnum.EXIST)
try:
downloader.ydl_opts["quiet"] = False
result = downloader.download()
return f"{video_info['extractor_key']}/{result['id']}_{result['width']}p.{result['ext']}"
except SiteNotImplementedException as ex:
raise HTTPException(
status_code=400,
detail=ex.message
)
path_to_video = f"{video_info['extractor_key']}/{video_info['id']}_{video_info['width']}p.{video_info['ext']}"
if os.path.exists(os.path.join(os.getcwd() + "/downloads/" + path_to_video)):
raise FileAlreadyExistException(message=path_to_video)
downloader.ydl_opts["quiet"] = False
downloader.download()
return path_to_video
def make_sure_path_exists(self,):
try:

View File

@ -5,12 +5,13 @@ from ast import literal_eval
import uvicorn
from aio_pika import connect, Message, DeliveryMode
from fastapi import FastAPI, Request, Form, HTTPException
from fastapi import FastAPI, Request, Depends
from starlette.middleware.cors import CORSMiddleware
from starlette.responses import JSONResponse, FileResponse, StreamingResponse
from starlette.templating import Jinja2Templates
from src.core.redis_client import RedisClient
from src.web.schemes.submit import SubmitIn
app = FastAPI(
title="video_downloader", openapi_url=f"/api/v1/openapi.json"
@ -27,25 +28,43 @@ app.add_middleware(
)
async def is_task_already_done_or_exist(redis: RedisClient, link: str):
messages = await redis.get_task_done_queue()
tasks = [
literal_eval(message.decode('utf-8')) for message in messages
if literal_eval(message.decode('utf-8'))["link"] == link
and literal_eval(message.decode('utf-8'))["status"] in ["done", "exist"]
]
if len(tasks) > 0:
task = tasks[0]
await redis.del_task_from_task_done_queue(task)
return task
@app.get("/")
async def index(request: Request):
return templates.TemplateResponse("index.html", {"request": request})
@app.post('/submit/')
async def get_url_for_download_video(request: Request, link: str = Form(...)):
connection = await connect("amqp://guest:guest@localhost/")
async def get_url_for_download_video(request: Request, data: SubmitIn = Depends()):
red = RedisClient()
task_done = await is_task_already_done_or_exist(red, data.link)
if task_done:
link_to_download_video = str(request.base_url) + "get/?file_path=" + task_done["result"]
return JSONResponse({"result": link_to_download_video})
async with connection:
async with await connect("amqp://guest:guest@localhost/") as connection:
# Creating a channel
channel = await connection.channel()
body = [
{
"link": link,
"parser": "base",
"format": "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best",
"merge_output_format": "mp4",
"link": data.link,
"format": f"bestvideo[ext={data.format.value}]+bestaudio[ext={data.format.value}]/best[ext={data.format.value}]/best",
"merge_output_format": data.merge_output_format.value,
"outtmpl": f"downloads/%(extractor_key)s/%(id)s_%(width)sp.%(ext)s",
}, ]
# Sending the message
@ -63,13 +82,19 @@ async def get_url_for_download_video(request: Request, link: str = Form(...)):
)
print(f" [x] Sent '{link}'")
red = RedisClient()
while True:
try:
mes = await red.get_task_done_queue()
task = literal_eval(list(mes)[0].decode('utf-8'))
if task["link"] == link["link"]:
messages = await red.get_task_done_queue()
tasks = [
literal_eval(message.decode('utf-8')) for message in messages
if literal_eval(message.decode('utf-8'))["link"] == link["link"]
]
error_tasks = [tasks.pop(tasks.index(error_task)) for error_task in tasks if error_task["status"] == "error"]
if len(error_tasks) > 0:
return JSONResponse({"result": f"STATUS: ERROR {error_tasks[-1]['result']}"})
if len(tasks) > 0:
task = tasks[0]
await red.del_task_from_task_done_queue(task)
break
await asyncio.sleep(5)

30
src/web/schemes/submit.py Normal file
View File

@ -0,0 +1,30 @@
from dataclasses import dataclass
from enum import Enum
from fastapi import Form
class FormatEnum(Enum):
format_3gp = "3gp"
format_aac = "aac"
format_flv = "flv"
format_m4a = "m4a"
format_mp3 = "mp3"
format_mp4 = "mp4"
format_ogg = "ogg"
class MergeOutputFormatEnum(Enum):
format_avi = "avi"
format_flv = "flv"
format_mkv = "mkv"
format_mov = "mov"
format_mp4 = "mp4"
format_webm = "webm"
@dataclass
class SubmitIn:
link: str = Form(...)
format: FormatEnum = Form(...)
merge_output_format: MergeOutputFormatEnum = Form(...)

View File

@ -71,7 +71,9 @@
</style>
<body>
<form method="post" action="/submit" id="download">
<input type="text" name="link">
<input type="text" name="link" placeholder="link">
<input type="text" name="format" placeholder="format">
<input type="text" name="merge_output_format" placeholder="merge_output_format">
<button type="submit" class="custom-btn btn-1"><span class="submit-spinner submit-spinner_hide"></span> Download</button>
</form>
<div class="col">