From a672d2e421282b9f63d4539b919587b8975aa70b Mon Sep 17 00:00:00 2001 From: nikili0n Date: Wed, 20 Sep 2023 14:43:59 +0300 Subject: [PATCH] minor fixes, added web serer --- src/core/master_service.py | 6 +- src/core/redis_client.py | 20 ++++- src/core/uploader.py | 6 +- src/parsers/MyMail/my_mail_parser.py | 20 +++-- src/parsers/Yappy/yappy_parser.py | 8 +- src/parsers/base_parser.py | 3 +- src/web/main.py | 97 ++++++++++++++++++++++ src/web/templates/index.html | 115 +++++++++++++++++++++++++++ 8 files changed, 252 insertions(+), 23 deletions(-) create mode 100644 src/web/main.py create mode 100644 src/web/templates/index.html diff --git a/src/core/master_service.py b/src/core/master_service.py index 59f8936..d76eedc 100644 --- a/src/core/master_service.py +++ b/src/core/master_service.py @@ -48,7 +48,7 @@ class MasterService: ) result = await download_task - await redis.del_task_from_tasks_and_add_to_task_done(task=video_params) + await redis.del_task_from_tasks_and_add_to_task_done(task={"link": video_params["link"], "result": result}) # TODO process result self.queue.task_done() @@ -76,8 +76,8 @@ class MasterService: @staticmethod def video_processing_executor(video_params: dict): try: - MasterService.video_download(video_params=video_params) - return Result(result_type=ResultTypeEnum.DONE) + result = MasterService.video_download(video_params=video_params) + return result except Exception as ex: return Result(result_type=ResultTypeEnum.EXCEPTION, value=ex) # TODO upload to server diff --git a/src/core/redis_client.py b/src/core/redis_client.py index 7666286..428a033 100644 --- a/src/core/redis_client.py +++ b/src/core/redis_client.py @@ -18,8 +18,8 @@ class RedisClient: async def _set_task_done(self, task: dict) -> int: async with self.connection as connection: - res = await connection.set( - f'{self.TASKS_DONE_NAME}:1:{task["link"]}', + res = await connection.sadd( + f'{self.TASKS_DONE_NAME}:1', json.dumps(task, indent=4).encode('utf-8') ) return res @@ -53,3 +53,19 @@ class RedisClient: await self._del_task(task) return await self._set_task_done(task) + async def get_task_done_queue(self) -> set: + async with self.connection as connection: + res = await connection.smembers(self.TASKS_DONE_NAME + f":1") + return res + + async def del_task_from_task_done_queue(self, task) -> int: + async with self.connection as connection: + res = await connection.srem(self.TASKS_DONE_NAME + f":1", json.dumps(task, indent=4).encode('utf-8')) + return res + + async def get_tasks_queue(self) -> set: + async with self.connection as connection: + res = await connection.json().get(self.TASKS_NAME) + return res + + diff --git a/src/core/uploader.py b/src/core/uploader.py index e14e28c..fe6cedf 100644 --- a/src/core/uploader.py +++ b/src/core/uploader.py @@ -15,7 +15,7 @@ def main(): if not found: client.make_bucket("clean-internet-oculus-integration-dev") else: - print("Bucket 'asiatrip' already exists") + print("Bucket 'clean-internet-oculus-integration-dev' already exists") # Upload '/home/user/Photos/asiaphotos.zip' as object name # 'asiaphotos-2015.zip' to bucket 'asiatrip'. @@ -23,8 +23,8 @@ def main(): "clean-internet-oculus-integration-dev", "4uv2GNc_ybc_1080p.mp4", "/Users/garickbadalov/PycharmProjects/video_downloader_service/downloads/Youtube/4uv2GNc_ybc_1080p.mp4", ) print( - "'/home/user/Photos/asiaphotos.zip' is successfully uploaded as " - "object 'asiaphotos-2015.zip' to bucket 'asiatrip'." + "'/Users/garickbadalov/PycharmProjects/video_downloader_service/downloads/Youtube/4uv2GNc_ybc_1080p.mp4' is successfully uploaded as " + "object '4uv2GNc_ybc_1080p.mp4' to bucket 'clean-internet-oculus-integration-dev'." ) diff --git a/src/parsers/MyMail/my_mail_parser.py b/src/parsers/MyMail/my_mail_parser.py index 2394775..653ac8c 100644 --- a/src/parsers/MyMail/my_mail_parser.py +++ b/src/parsers/MyMail/my_mail_parser.py @@ -1,7 +1,7 @@ import os +import uuid import requests -from http.cookies import SimpleCookie from playwright.sync_api import Playwright from playwright.sync_api import sync_playwright @@ -19,25 +19,23 @@ class MyMailParser(BaseParser): page = context.new_page() mobile_url = f"{self.params['link'][0:8]}m.{self.params['link'][8:]}" page.goto(url=mobile_url) + cc = context.cookies() + cookies = {cookie["name"]: cookie["value"] for cookie in cc} link = page.get_attribute("xpath=//video", "src") link = "https:" + link - title = page.locator("xpath=//div[@class='event-text__title']").text_content() - return link, title + title = cookies["video_key"] + return link, title, cookies def video_download(self, link: str = None, title: str = None): if not link and not title: with sync_playwright() as playwright: - link, title = self.get_video_link(playwright) + link, title, cookies = self.get_video_link(playwright) - if os.path.exists(os.path.join(os.getcwd() + f"MyMailRu/{title}.mp4")): - return Result(result_type=ResultTypeEnum.EXIST) - - rawdata = "searchuid=4515257701686610918; p=ki8AAAYkJdcA; act=064d11655c924c9f8f2aad0181a06a4b; o=:1763:AUAQ.m; oid=22SgCdFE5g2ZEFHy1FkYW; mrcu=5A5B64F228CC3AC265485FC5AE55; re_theme=system; re_theme_actual=dark; s=fver=0|rt=1|dpr=2|ww=1728|wh=963; ph_tp_horo-mail-ru=t=1|d=1693591768453; tmr_lvid=26ef811c203f1c0c0e5d1c8af1a4671b; tmr_lvidTS=1693591768481; _ym_uid=1693591769619458564; _ym_d=1693591769; ph_v_my-mail-ru=1; mrhc=CB75tAx8UrwCaiqE85YXWoCM2+CTT6/VsTcMdxv4iCM=; mr_my_b=1; _ga=GA1.2.2000378679.1694259228; mtrc=%7B%22mytrackerid%22%3A52867%2C%22tmr_lvid%22%3A%2226ef811c203f1c0c0e5d1c8af1a4671b%22%7D; c=FuoAZQEAsHsTAAAUAQgACQAAgLrElILY4CDYNvMTASDQrSUa; b=nUwAAJBoPmMDosXR5CCG5oQO4ltqxSBq54QOYpLl6yBiWW0VIvzl6zAOfNwNOtuHt6ADAAAIpgR06GAGp1YMpgB06AAA; i=AQAQDgNlCQATAAguDyABAYwDApADARgHAewHATwJAUMLARkUAXseAjAgAfUgAfYgAfcgAfggAfEiAZMCCHYnbgABAQIBAgIBBwIBCAIBCQIBDgIBDwIBEQIBEgIBFwIBGAIBUQUBVgUBaAUBdAUBdQUBoAUBoQUBpAUBpgUBqQUBegYBDgsBKQsBLgsBxQsBxwsByQsBzAsBzQsBcA0BdQ0BeA0BvQ0B6BAB6RAB6hABw2MB3AQIBAEBAAHhBAkBAeIECgQGB80HOgUIDQQqAgEACAELCAEeEAHWBggEAQEAAb0HCAQBoxUBiQ0FAgHz; video_key=192bed9054db7a4efa7943ad834c7a2e05a55237; VID=0eXAI6071-IK00000t1kP4oK:::0-0-a1b105a-9aaf457:CAASEL33YAsZEz357mCA71F8QJgacM9HfhwzMJ-j3X3e-iJIE0DIiLWfRhfTc3GgyUNfH8_EwadLkVinwp0LA-QyaRe9p0A_ZR0y1i9Hk8aVl8Q8ZB_Qd_hCZN_SfHmeOvHeoe6QBCvz5w2SHcI2iFuAXKJkJMvNuYwSeBLdWhCXvsK5M_M" - cookie = SimpleCookie() - cookie.load(rawdata) - cookies = {k: v.value for k, v in cookie.items()} + if os.path.exists(os.path.join(os.getcwd() + f"/downloads/MyMailRu/{title}.mp4")): + return f"MyMailRu/{title}.mp4"#Result(result_type=ResultTypeEnum.EXIST) self.make_sure_path_exists() video_response = requests.get(link, cookies=cookies) with open(self.BASE_DIR + f"/{title}.mp4", "wb") as output: output.write(video_response.content) + return f"MyMailRu/{title}.mp4" diff --git a/src/parsers/Yappy/yappy_parser.py b/src/parsers/Yappy/yappy_parser.py index 0d4d248..2840369 100644 --- a/src/parsers/Yappy/yappy_parser.py +++ b/src/parsers/Yappy/yappy_parser.py @@ -1,4 +1,5 @@ import os +import uuid import requests @@ -17,17 +18,18 @@ class YappyParser(BaseParser): soup = BeautifulSoup(resp.text, 'lxml') link = soup.find('video').get("src") - title = soup.find('title').get_text() + title = soup.find('video').get("id") return link, title def video_download(self, link: str = None, title: str = None): if not link and not title: link, title = self.get_video_link() - if os.path.exists(os.path.join(os.getcwd() + f"Yappy/{title}.mp4")): - return Result(result_type=ResultTypeEnum.EXIST) + if os.path.exists(os.path.join(os.getcwd() + f"/downloads/Yappy/{title}.mp4")): + return f"Yappy/{title}.mp4" video_response = requests.get(link) self.make_sure_path_exists() with open(self.BASE_DIR + f"/{title}.mp4", "wb") as output: output.write(video_response.content) + return f"Yappy/{title}.mp4" diff --git a/src/parsers/base_parser.py b/src/parsers/base_parser.py index c7450a7..9334f67 100644 --- a/src/parsers/base_parser.py +++ b/src/parsers/base_parser.py @@ -24,6 +24,7 @@ class BaseParser: } downloader = VideoDownloader(link=self.params["link"], ydl_opts=ydl_opts) video_info = downloader.get_info() + #TODO Добавить динамеческое имя директории сервиса для проверки дублирования if os.path.exists( os.path.join(os.getcwd() + f"Youtube/{video_info['id']}_{video_info['width']}.{video_info['ext']}") ): @@ -31,7 +32,7 @@ class BaseParser: try: downloader.ydl_opts["quiet"] = False result = downloader.download() - return result + return f"{video_info['extractor_key']}/{result['id']}_{result['width']}p.{result['ext']}" except SiteNotImplementedException as ex: raise HTTPException( status_code=400, diff --git a/src/web/main.py b/src/web/main.py new file mode 100644 index 0000000..ff7d8af --- /dev/null +++ b/src/web/main.py @@ -0,0 +1,97 @@ +import asyncio +import json +import os +from ast import literal_eval + +import uvicorn +from aio_pika import connect, Message, DeliveryMode +from fastapi import FastAPI, Request, Form, HTTPException +from starlette.middleware.cors import CORSMiddleware +from starlette.responses import JSONResponse, FileResponse, StreamingResponse +from starlette.templating import Jinja2Templates + +from src.core.redis_client import RedisClient + +app = FastAPI( + title="video_downloader", openapi_url=f"/api/v1/openapi.json" +) + +templates = Jinja2Templates(directory="templates") + +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + + +@app.get("/") +async def index(request: Request): + return templates.TemplateResponse("index.html", {"request": request}) + + +@app.post('/submit/') +async def get_url_for_download_video(request: Request, link: str = Form(...)): + connection = await connect("amqp://guest:guest@localhost/") + + async with connection: + # Creating a channel + channel = await connection.channel() + body = [ + + { + "link": link, + "parser": "base", + "format": "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best", + "merge_output_format": "mp4", + "outtmpl": f"downloads/%(extractor_key)s/%(id)s_%(width)sp.%(ext)s", + }, ] + # Sending the message + for link in body: + if "mail" in link["link"]: + link["parser"] = "MyMailRu" + elif "yappy" in link["link"]: + link["parser"] = "Yappy" + message = Message( + json.dumps(link, indent=4).encode('utf-8'), delivery_mode=DeliveryMode.PERSISTENT, + ) + await channel.default_exchange.publish( + message, + routing_key='hello', + ) + + print(f" [x] Sent '{link}'") + red = RedisClient() + + while True: + try: + mes = await red.get_task_done_queue() + task = literal_eval(list(mes)[0].decode('utf-8')) + if task["link"] == link["link"]: + await red.del_task_from_task_done_queue(task) + break + await asyncio.sleep(5) + except (AttributeError, IndexError): + await asyncio.sleep(5) + continue + link_to_download_video = str(request.base_url) + "get/?file_path=" + task["result"] + + return JSONResponse({"result": link_to_download_video}) + + +@app.get('/get/', response_class=FileResponse, status_code=200) +async def download_video(file_path): + base = os.path.dirname(os.path.dirname(os.path.abspath(file_path))) + base_download_dir = os.path.join(base, os.pardir, os.pardir, "downloads") + + def iterfile(): + with open(base_download_dir + f'/{file_path}', mode="rb") as file_like: + yield from file_like + + return StreamingResponse(iterfile(), media_type="video/mp4") + + +if __name__ == '__main__': + uvicorn.run("src.web.main:app", host="localhost", log_level="info") diff --git a/src/web/templates/index.html b/src/web/templates/index.html new file mode 100644 index 0000000..ebf5dcd --- /dev/null +++ b/src/web/templates/index.html @@ -0,0 +1,115 @@ + + + + + Video Downloading + + + +
+ + +
+
+

Ссылка для скачивания:

+
+ +
+
+
+
+ + + \ No newline at end of file