added parsers for new social networks, rework master service
This commit is contained in:
53
src/core/link_parser.py
Normal file
53
src/core/link_parser.py
Normal file
@ -0,0 +1,53 @@
|
||||
import asyncio
|
||||
import json
|
||||
import re
|
||||
|
||||
from playwright.async_api import async_playwright
|
||||
from playwright.async_api import Playwright
|
||||
from aio_pika import Message, connect, DeliveryMode
|
||||
|
||||
|
||||
async def run(playwright: Playwright):
|
||||
browser = await playwright.chromium.launch(headless=False)
|
||||
context = await browser.new_context()
|
||||
page = await context.new_page()
|
||||
await page.goto(url="https://m.my.mail.ru/v/topclips/video/alltop/68100.html")
|
||||
# await page.goto(url="https://www.youtube.com/shorts/vJU0Sr3WvmU")
|
||||
video = await page.get_attribute("xpath=//video", "src")
|
||||
connection = await connect("amqp://guest:guest@localhost/")
|
||||
title = await page.title()
|
||||
async with connection:
|
||||
for i in range(10):
|
||||
url = page.url
|
||||
body = {
|
||||
"link": url,
|
||||
"format": "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best",
|
||||
"merge_output_format": "mp4",
|
||||
"outtmpl": f"downloads/%(extractor_key)s/%(id)s_%(width)sp.%(ext)s",
|
||||
}
|
||||
|
||||
|
||||
# Creating a channel
|
||||
channel = await connection.channel()
|
||||
|
||||
# Sending the message
|
||||
message = Message(
|
||||
json.dumps(body, indent=4).encode('utf-8'), delivery_mode=DeliveryMode.PERSISTENT,
|
||||
)
|
||||
await channel.default_exchange.publish(
|
||||
message,
|
||||
routing_key='hello',
|
||||
)
|
||||
|
||||
print(f" [x] Sent '{body}'")
|
||||
await page.keyboard.press("ArrowDown")
|
||||
|
||||
while title == await page.title():
|
||||
await page.title()
|
||||
|
||||
async def main():
|
||||
async with async_playwright() as playwright:
|
||||
await run(playwright)
|
||||
|
||||
|
||||
asyncio.run(main())
|
@ -1,6 +1,5 @@
|
||||
import asyncio
|
||||
import concurrent.futures as pool
|
||||
import os.path
|
||||
import subprocess
|
||||
|
||||
from functools import partial
|
||||
@ -11,8 +10,10 @@ from src.core.async_queue import AsyncQueue
|
||||
from src.core.rabbitmq import get_messages
|
||||
from src.core.redis_client import RedisClient
|
||||
from src.core.result import Result, ResultTypeEnum
|
||||
from src.core.ydl import VideoDownloader
|
||||
from src.exceptions.download_exceptions import SiteNotImplementedException
|
||||
from src.parsers.MyMail.my_mail_parser import MyMailParser
|
||||
from src.parsers.Yappy.yappy_parser import YappyParser
|
||||
from src.parsers.base_parser import BaseParser
|
||||
|
||||
|
||||
class MasterService:
|
||||
@ -53,21 +54,9 @@ class MasterService:
|
||||
|
||||
@staticmethod
|
||||
def video_download(video_params: dict):
|
||||
ydl_opts = {
|
||||
"format": video_params["format"],
|
||||
"merge_output_format": video_params["merge_output_format"],
|
||||
'outtmpl': video_params["outtmpl"],
|
||||
"quiet": True
|
||||
}
|
||||
downloader = VideoDownloader(link=video_params["link"], ydl_opts=ydl_opts)
|
||||
video_info = downloader.get_info()
|
||||
if os.path.exists(
|
||||
os.path.join(os.getcwd() + f"Youtube/{video_info['id']}_{video_info['width']}.{video_info['ext']}")
|
||||
):
|
||||
return Result(result_type=ResultTypeEnum.EXIST)
|
||||
downloader: BaseParser | YappyParser | MyMailParser = MasterService.get_parser(video_params)
|
||||
try:
|
||||
downloader.ydl_opts["quiet"] = False
|
||||
result = downloader.download()
|
||||
result = downloader.video_download()
|
||||
return result
|
||||
except SiteNotImplementedException as ex:
|
||||
raise HTTPException(
|
||||
@ -75,6 +64,15 @@ class MasterService:
|
||||
detail=ex.message
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def get_parser(params: dict):
|
||||
parser_mapping = {
|
||||
"MyMailRu": MyMailParser(params),
|
||||
"base": BaseParser(params),
|
||||
"Yappy": YappyParser(params),
|
||||
}
|
||||
return parser_mapping[params["parser"]]
|
||||
|
||||
@staticmethod
|
||||
def video_processing_executor(video_params: dict):
|
||||
try:
|
||||
|
35
src/core/uploader.py
Normal file
35
src/core/uploader.py
Normal file
@ -0,0 +1,35 @@
|
||||
from minio import Minio
|
||||
from minio.error import S3Error
|
||||
|
||||
|
||||
def main():
|
||||
client = Minio(
|
||||
"s3.grfc.ru:443",
|
||||
access_key="cl-i-oculus-dev1",
|
||||
secret_key="Nom8qKEU6IYtQSrNt5ZPN1XncQTZdtUM",
|
||||
secure=True
|
||||
)
|
||||
|
||||
# Make 'asiatrip' bucket if not exist.
|
||||
found = client.bucket_exists("clean-internet-oculus-integration-dev")
|
||||
if not found:
|
||||
client.make_bucket("clean-internet-oculus-integration-dev")
|
||||
else:
|
||||
print("Bucket 'asiatrip' already exists")
|
||||
|
||||
# Upload '/home/user/Photos/asiaphotos.zip' as object name
|
||||
# 'asiaphotos-2015.zip' to bucket 'asiatrip'.
|
||||
client.fput_object(
|
||||
"clean-internet-oculus-integration-dev", "4uv2GNc_ybc_1080p.mp4", "/Users/garickbadalov/PycharmProjects/video_downloader_service/downloads/Youtube/4uv2GNc_ybc_1080p.mp4",
|
||||
)
|
||||
print(
|
||||
"'/home/user/Photos/asiaphotos.zip' is successfully uploaded as "
|
||||
"object 'asiaphotos-2015.zip' to bucket 'asiatrip'."
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
main()
|
||||
except S3Error as exc:
|
||||
print("error occurred.", exc)
|
@ -1,18 +1,14 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import youtube_dl
|
||||
|
||||
from src.exceptions.download_exceptions import SiteNotImplementedException
|
||||
from yt_dlp import YoutubeDL
|
||||
|
||||
|
||||
class VideoDownloader:
|
||||
SUPPORTING_WEBSITES = [
|
||||
"ok.ru", "vk.com", "www.youtube.com",
|
||||
"ok.ru", "vk.com", "www.youtube.com", "livejournal.com"
|
||||
]
|
||||
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
BASE_DOWNLOAD_DIR = os.path.join(BASE_DIR, "downloads")
|
||||
@ -25,15 +21,15 @@ class VideoDownloader:
|
||||
self.password = password
|
||||
|
||||
def get_info(self):
|
||||
with youtube_dl.YoutubeDL(self.ydl_opts if self.ydl_opts else {}) as ydl:
|
||||
with YoutubeDL(self.ydl_opts if self.ydl_opts else {}) as ydl:
|
||||
return ydl.extract_info(self.link, download=False)
|
||||
|
||||
def download(self):
|
||||
domain = urlparse(self.link).netloc
|
||||
if domain not in self.SUPPORTING_WEBSITES:
|
||||
raise SiteNotImplementedException
|
||||
# if domain not in self.SUPPORTING_WEBSITES:
|
||||
# raise SiteNotImplementedException
|
||||
|
||||
with youtube_dl.YoutubeDL(self.ydl_opts if self.ydl_opts else {}) as ydl:
|
||||
with YoutubeDL(self.ydl_opts if self.ydl_opts else {}) as ydl:
|
||||
ydl.download([self.link])
|
||||
result = ydl.extract_info(self.link, download=False)
|
||||
return result
|
||||
|
Reference in New Issue
Block a user