48 lines
1.7 KiB
Python
48 lines
1.7 KiB
Python
import os
|
|
import requests
|
|
|
|
from bs4 import BeautifulSoup
|
|
from playwright.sync_api import Playwright
|
|
from playwright.sync_api import sync_playwright
|
|
|
|
from src.exceptions.download_exceptions import FileAlreadyExistException
|
|
from src.parsers.base_parser import BaseParser
|
|
|
|
|
|
class YappyParser(BaseParser):
|
|
BASE_DIR = os.path.abspath(f"downloads/Yappy")
|
|
# OLD WAY
|
|
# def get_video_link(self):
|
|
# resp = requests.get(self.params["link"])
|
|
# resp.encoding = self.BASE_ENCODING
|
|
# soup = BeautifulSoup(resp.text, 'lxml')
|
|
#
|
|
# link = soup.find('video').get("src")
|
|
# title = soup.find('video').get("id")
|
|
# return link, title
|
|
|
|
def get_video_link(self, playwright: Playwright):
|
|
browser = playwright.chromium.launch(headless=True)
|
|
context = browser.new_context()
|
|
page = context.new_page()
|
|
page.goto(url=self.params["link"], wait_until='domcontentloaded')
|
|
link = page.get_attribute("xpath=//video", "src")
|
|
title = page.get_attribute("xpath=//video", "id")
|
|
return link, title
|
|
|
|
|
|
def video_download(self, link: str = None, title: str = None):
|
|
with sync_playwright() as playwright:
|
|
|
|
if not link and not title:
|
|
link, title = self.get_video_link(playwright)
|
|
|
|
if os.path.exists(os.path.join(os.getcwd() + f"/downloads/Yappy/{title}.mp4")):
|
|
raise FileAlreadyExistException(message=f"Yappy/{title}.mp4")
|
|
|
|
video_response = requests.get(link)
|
|
self.make_sure_path_exists()
|
|
with open(self.BASE_DIR + f"/{title}.mp4", "wb") as output:
|
|
output.write(video_response.content)
|
|
return f"Yappy/{title}.mp4"
|