added parsers for new social networks, rework master service
This commit is contained in:
43
src/parsers/MyMail/my_mail_parser.py
Normal file
43
src/parsers/MyMail/my_mail_parser.py
Normal file
@ -0,0 +1,43 @@
|
||||
import os
|
||||
|
||||
import requests
|
||||
from http.cookies import SimpleCookie
|
||||
|
||||
from playwright.sync_api import Playwright
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
from src.core.result import Result, ResultTypeEnum
|
||||
from src.parsers.base_parser import BaseParser
|
||||
|
||||
|
||||
class MyMailParser(BaseParser):
|
||||
BASE_DIR = os.path.abspath(f"downloads/MyMailRu")
|
||||
|
||||
def get_video_link(self, playwright: Playwright):
|
||||
browser = playwright.chromium.launch(headless=True)
|
||||
context = browser.new_context()
|
||||
page = context.new_page()
|
||||
mobile_url = f"{self.params['link'][0:8]}m.{self.params['link'][8:]}"
|
||||
page.goto(url=mobile_url)
|
||||
link = page.get_attribute("xpath=//video", "src")
|
||||
link = "https:" + link
|
||||
title = page.locator("xpath=//div[@class='event-text__title']").text_content()
|
||||
return link, title
|
||||
|
||||
def video_download(self, link: str = None, title: str = None):
|
||||
if not link and not title:
|
||||
with sync_playwright() as playwright:
|
||||
link, title = self.get_video_link(playwright)
|
||||
|
||||
if os.path.exists(os.path.join(os.getcwd() + f"MyMailRu/{title}.mp4")):
|
||||
return Result(result_type=ResultTypeEnum.EXIST)
|
||||
|
||||
rawdata = "searchuid=4515257701686610918; p=ki8AAAYkJdcA; act=064d11655c924c9f8f2aad0181a06a4b; o=:1763:AUAQ.m; oid=22SgCdFE5g2ZEFHy1FkYW; mrcu=5A5B64F228CC3AC265485FC5AE55; re_theme=system; re_theme_actual=dark; s=fver=0|rt=1|dpr=2|ww=1728|wh=963; ph_tp_horo-mail-ru=t=1|d=1693591768453; tmr_lvid=26ef811c203f1c0c0e5d1c8af1a4671b; tmr_lvidTS=1693591768481; _ym_uid=1693591769619458564; _ym_d=1693591769; ph_v_my-mail-ru=1; mrhc=CB75tAx8UrwCaiqE85YXWoCM2+CTT6/VsTcMdxv4iCM=; mr_my_b=1; _ga=GA1.2.2000378679.1694259228; mtrc=%7B%22mytrackerid%22%3A52867%2C%22tmr_lvid%22%3A%2226ef811c203f1c0c0e5d1c8af1a4671b%22%7D; c=FuoAZQEAsHsTAAAUAQgACQAAgLrElILY4CDYNvMTASDQrSUa; b=nUwAAJBoPmMDosXR5CCG5oQO4ltqxSBq54QOYpLl6yBiWW0VIvzl6zAOfNwNOtuHt6ADAAAIpgR06GAGp1YMpgB06AAA; i=AQAQDgNlCQATAAguDyABAYwDApADARgHAewHATwJAUMLARkUAXseAjAgAfUgAfYgAfcgAfggAfEiAZMCCHYnbgABAQIBAgIBBwIBCAIBCQIBDgIBDwIBEQIBEgIBFwIBGAIBUQUBVgUBaAUBdAUBdQUBoAUBoQUBpAUBpgUBqQUBegYBDgsBKQsBLgsBxQsBxwsByQsBzAsBzQsBcA0BdQ0BeA0BvQ0B6BAB6RAB6hABw2MB3AQIBAEBAAHhBAkBAeIECgQGB80HOgUIDQQqAgEACAELCAEeEAHWBggEAQEAAb0HCAQBoxUBiQ0FAgHz; video_key=192bed9054db7a4efa7943ad834c7a2e05a55237; VID=0eXAI6071-IK00000t1kP4oK:::0-0-a1b105a-9aaf457:CAASEL33YAsZEz357mCA71F8QJgacM9HfhwzMJ-j3X3e-iJIE0DIiLWfRhfTc3GgyUNfH8_EwadLkVinwp0LA-QyaRe9p0A_ZR0y1i9Hk8aVl8Q8ZB_Qd_hCZN_SfHmeOvHeoe6QBCvz5w2SHcI2iFuAXKJkJMvNuYwSeBLdWhCXvsK5M_M"
|
||||
cookie = SimpleCookie()
|
||||
cookie.load(rawdata)
|
||||
cookies = {k: v.value for k, v in cookie.items()}
|
||||
|
||||
self.make_sure_path_exists()
|
||||
video_response = requests.get(link, cookies=cookies)
|
||||
with open(self.BASE_DIR + f"/{title}.mp4", "wb") as output:
|
||||
output.write(video_response.content)
|
33
src/parsers/Yappy/yappy_parser.py
Normal file
33
src/parsers/Yappy/yappy_parser.py
Normal file
@ -0,0 +1,33 @@
|
||||
import os
|
||||
|
||||
import requests
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from src.core.result import ResultTypeEnum, Result
|
||||
from src.parsers.base_parser import BaseParser
|
||||
|
||||
|
||||
class YappyParser(BaseParser):
|
||||
BASE_DIR = os.path.abspath(f"downloads/Yappy")
|
||||
|
||||
def get_video_link(self):
|
||||
resp = requests.get(self.params["link"])
|
||||
resp.encoding = self.BASE_ENCODING
|
||||
soup = BeautifulSoup(resp.text, 'lxml')
|
||||
|
||||
link = soup.find('video').get("src")
|
||||
title = soup.find('title').get_text()
|
||||
return link, title
|
||||
|
||||
def video_download(self, link: str = None, title: str = None):
|
||||
if not link and not title:
|
||||
link, title = self.get_video_link()
|
||||
|
||||
if os.path.exists(os.path.join(os.getcwd() + f"Yappy/{title}.mp4")):
|
||||
return Result(result_type=ResultTypeEnum.EXIST)
|
||||
|
||||
video_response = requests.get(link)
|
||||
self.make_sure_path_exists()
|
||||
with open(self.BASE_DIR + f"/{title}.mp4", "wb") as output:
|
||||
output.write(video_response.content)
|
46
src/parsers/base_parser.py
Normal file
46
src/parsers/base_parser.py
Normal file
@ -0,0 +1,46 @@
|
||||
import errno
|
||||
import os
|
||||
|
||||
from fastapi import HTTPException
|
||||
|
||||
from src.core.result import ResultTypeEnum, Result
|
||||
from src.core.ydl import VideoDownloader
|
||||
from src.exceptions.download_exceptions import SiteNotImplementedException
|
||||
|
||||
|
||||
class BaseParser:
|
||||
BASE_ENCODING = 'utf-8'
|
||||
BASE_DIR = None
|
||||
|
||||
def __init__(self, params: dict):
|
||||
self.params = params
|
||||
|
||||
def video_download(self):
|
||||
ydl_opts = {
|
||||
"format": self.params["format"],
|
||||
"merge_output_format": self.params["merge_output_format"],
|
||||
'outtmpl': self.params["outtmpl"],
|
||||
"quiet": True
|
||||
}
|
||||
downloader = VideoDownloader(link=self.params["link"], ydl_opts=ydl_opts)
|
||||
video_info = downloader.get_info()
|
||||
if os.path.exists(
|
||||
os.path.join(os.getcwd() + f"Youtube/{video_info['id']}_{video_info['width']}.{video_info['ext']}")
|
||||
):
|
||||
return Result(result_type=ResultTypeEnum.EXIST)
|
||||
try:
|
||||
downloader.ydl_opts["quiet"] = False
|
||||
result = downloader.download()
|
||||
return result
|
||||
except SiteNotImplementedException as ex:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=ex.message
|
||||
)
|
||||
|
||||
def make_sure_path_exists(self,):
|
||||
try:
|
||||
os.makedirs(self.BASE_DIR)
|
||||
except OSError as exception:
|
||||
if exception.errno != errno.EEXIST:
|
||||
raise
|
Reference in New Issue
Block a user