video_downloader_service/src/parsers/Yappy/yappy_parser.py

import os
import requests

from bs4 import BeautifulSoup
from playwright.sync_api import Playwright
from playwright.sync_api import sync_playwright

from src.exceptions.download_exceptions import FileAlreadyExistException
from src.parsers.base_parser import BaseParser


class YappyParser(BaseParser):
    BASE_DIR = os.path.abspath(f"downloads/Yappy")
    # OLD WAY
    # def get_video_link(self):
    #     resp = requests.get(self.params["link"])
    #     resp.encoding = self.BASE_ENCODING
    #     soup = BeautifulSoup(resp.text, 'lxml')
    #
    #     link = soup.find('video').get("src")
    #     title = soup.find('video').get("id")
    #     return link, title

    def get_video_link(self, playwright: Playwright):
        browser = playwright.chromium.launch(headless=True)
        context = browser.new_context()
        page = context.new_page()
        page.goto(url=self.params["link"], wait_until='domcontentloaded')
        link = page.get_attribute("xpath=//video", "src")
        title = page.get_attribute("xpath=//video", "id")
        return link, title


    def video_download(self, link: str = None, title: str = None):
        with sync_playwright() as playwright:

            if not link and not title:
                link, title = self.get_video_link(playwright)

        if os.path.exists(os.path.join(os.getcwd() + f"/downloads/Yappy/{title}.mp4")):
            raise FileAlreadyExistException(message=f"Yappy/{title}.mp4")

        video_response = requests.get(link)
        self.make_sure_path_exists()
        with open(self.BASE_DIR + f"/{title}.mp4", "wb") as output:
            output.write(video_response.content)
        return f"Yappy/{title}.mp4"