From 7f4e661ea6c5ed215de4e1e5117f582bc32c04e3 Mon Sep 17 00:00:00 2001 From: nikili0n Date: Wed, 11 Oct 2023 00:04:51 +0300 Subject: [PATCH] Added bing_parser.py, minor fixes --- src/parsers/Bing/bing_parser.py | 28 ++++++++++++++++++++++++++++ src/parsers/base_parser.py | 2 ++ src/parsers/parser_mapping.py | 2 ++ 3 files changed, 32 insertions(+) create mode 100644 src/parsers/Bing/bing_parser.py diff --git a/src/parsers/Bing/bing_parser.py b/src/parsers/Bing/bing_parser.py new file mode 100644 index 0000000..8a41a50 --- /dev/null +++ b/src/parsers/Bing/bing_parser.py @@ -0,0 +1,28 @@ +import os + +from playwright.sync_api import Playwright +from playwright.sync_api import sync_playwright + +from src.parsers.base_parser import BaseParser + + +class BingParser(BaseParser): + BASE_DIR = os.path.abspath(f"downloads/Bing") + + def get_video_link(self, playwright: Playwright): + browser = playwright.chromium.launch(headless=True) + context = browser.new_context() + page = context.new_page() + page.goto(url=self.params["link"], wait_until='domcontentloaded') + link = page.get_attribute("xpath=//iframe", "src") + return link + + def video_download(self, link: str = None, title: str = None): + base_link = self.params["link"] + with sync_playwright() as playwright: + link = self.get_video_link(playwright) + self.params["link"] = link + self.params['outtmpl'] = f"downloads/Bing/%(id)s_%(resolution)s.%(ext)s" + file_path = super().video_download() + self.params["link"] = base_link + return file_path diff --git a/src/parsers/base_parser.py b/src/parsers/base_parser.py index 87ab363..1ae9811 100644 --- a/src/parsers/base_parser.py +++ b/src/parsers/base_parser.py @@ -33,6 +33,8 @@ class BaseParser: path_to_video = f"Yahoo/{downloader.info['id']}_{resolution}.{downloader.info['ext']}" elif "ZenYandex" in ydl_opts["outtmpl"]["default"]: path_to_video = f"ZenYandex/{downloader.info['id']}_{resolution}.{downloader.info['ext']}" + elif "Bing" in ydl_opts["outtmpl"]["default"]: + path_to_video = f"Bing/{downloader.info['id']}_{resolution}.{downloader.info['ext']}" else: path_to_video = f"{downloader.info['extractor_key']}/{downloader.info['id']}_{resolution}.{downloader.info['ext']}" if os.path.exists(os.path.join(os.getcwd() + "/downloads/" + path_to_video)): diff --git a/src/parsers/parser_mapping.py b/src/parsers/parser_mapping.py index f1e100a..b149a2c 100644 --- a/src/parsers/parser_mapping.py +++ b/src/parsers/parser_mapping.py @@ -1,6 +1,7 @@ from collections import OrderedDict import re +from src.parsers.Bing.bing_parser import BingParser from src.parsers.Dzen.dzen_parser import DzenParser from src.parsers.MyMail.my_mail_parser import MyMailParser from src.parsers.Okru.ok_parser import OkParser @@ -27,6 +28,7 @@ parser_mapping = OrderedDict( compile_regex(r"^.*\.yahoo.com/"): YahooParser, compile_regex(r"^.*\.livejournal.com/"): BaseParser, compile_regex(r"^.*\.dzen.ru/"): BaseParser, + compile_regex(r"^.*\.bing.com/"): BingParser, } )