From 97e7b25fbde303c2bcaaed42184447646153d2f0 Mon Sep 17 00:00:00 2001 From: Dantenerosas Date: Sat, 30 Sep 2023 03:32:46 +0300 Subject: [PATCH] added yahoo parser --- src/parsers/Yahoo/yahoo_parser.py | 28 ++++++++++++++++++++++++++++ src/parsers/base_parser.py | 6 ++++-- src/parsers/parser_mapping.py | 3 ++- 3 files changed, 34 insertions(+), 3 deletions(-) create mode 100644 src/parsers/Yahoo/yahoo_parser.py diff --git a/src/parsers/Yahoo/yahoo_parser.py b/src/parsers/Yahoo/yahoo_parser.py new file mode 100644 index 0000000..2c2861e --- /dev/null +++ b/src/parsers/Yahoo/yahoo_parser.py @@ -0,0 +1,28 @@ +import os + +from playwright.sync_api import Playwright +from playwright.sync_api import sync_playwright + +from src.parsers.base_parser import BaseParser + + +class YahooParser(BaseParser): + BASE_DIR = os.path.abspath(f"downloads/Yahoo") + + def get_video_link(self, playwright: Playwright): + browser = playwright.chromium.launch(headless=True) + context = browser.new_context() + page = context.new_page() + page.goto(url=self.params["link"], wait_until='domcontentloaded') + link = page.get_attribute("xpath=//iframe", "src") + return link + + def video_download(self, link: str = None, title: str = None): + base_link = self.params["link"] + with sync_playwright() as playwright: + link = self.get_video_link(playwright) + self.params["link"] = link + self.params['outtmpl'] = f"downloads/Yahoo/%(id)s_%(resolution)s.%(ext)s" + file_path = super().video_download() + self.params["link"] = base_link + return file_path diff --git a/src/parsers/base_parser.py b/src/parsers/base_parser.py index a298099..d1f9aee 100644 --- a/src/parsers/base_parser.py +++ b/src/parsers/base_parser.py @@ -29,8 +29,10 @@ class BaseParser: resolution = downloader.info['resolution'] else: resolution = "NA" - - path_to_video = f"{downloader.info['extractor_key']}/{downloader.info['id']}_{resolution}.{downloader.info['ext']}" + if "Yahoo" in ydl_opts["outtmpl"]["default"]: + path_to_video = f"Yahoo/{downloader.info['id']}_{resolution}.{downloader.info['ext']}" + else: + path_to_video = f"{downloader.info['extractor_key']}/{downloader.info['id']}_{resolution}.{downloader.info['ext']}" if os.path.exists(os.path.join(os.getcwd() + "/downloads/" + path_to_video)): raise FileAlreadyExistException(message=path_to_video) downloader.ydl_opts["quiet"] = False diff --git a/src/parsers/parser_mapping.py b/src/parsers/parser_mapping.py index 5178c69..0c3f9d5 100644 --- a/src/parsers/parser_mapping.py +++ b/src/parsers/parser_mapping.py @@ -3,7 +3,7 @@ import re from src.parsers.MyMail.my_mail_parser import MyMailParser from src.parsers.Okru.ok_parser import OkParser -from src.parsers.Yappy.yappy_parser import YappyParser +from src.parsers.Yahoo.yahoo_parser import YahooParser from src.parsers.base_parser import BaseParser @@ -22,6 +22,7 @@ parser_mapping = OrderedDict( compile_regex(r"^dzen.ru/"): BaseParser, compile_regex(r"^yappy.media/"): BaseParser, compile_regex(r"^yandex.ru/"): BaseParser, + compile_regex(r"^.*\.yahoo.com/"): YahooParser, compile_regex(r"^.*\.livejournal.com/"): BaseParser, } )