rework redis, rework web for work with array of links

This commit is contained in:
2023-09-29 05:53:27 +03:00
parent ca3cecf271
commit ef6f96bcde
10 changed files with 153 additions and 124 deletions

View File

@ -4,6 +4,7 @@ import requests
from bs4 import BeautifulSoup
from src.exceptions.download_exceptions import FileAlreadyExistException
from src.parsers.base_parser import BaseParser
@ -16,12 +17,24 @@ class OkParser(BaseParser):
resp.encoding = self.BASE_ENCODING
soup = BeautifulSoup(resp.text, 'lxml')
required_div = [div for div in soup.find_all('div', {'class': 'invisible'}) if len(div['class']) < 2][0]
link = required_div.find('span').find('span').find('a').get("href")
self.params["link"] = link
return link
video_tags = required_div.find('span').find_all_next('span', {'itemprop': "video"})
links = [video_tag.find('a').get("href") for video_tag in video_tags]
return links
except Exception as ex:
raise
def video_download(self):
self.get_video_link()
super().video_download()
base_link = self.params["link"]
links = self.get_video_link()
file_paths = []
for link in links:
try:
self.params["link"] = link
file_path = super().video_download()
file_paths.append(file_path)
except FileAlreadyExistException as ex:
file_paths.append(ex.message)
continue
self.params["link"] = base_link
return file_paths

View File

@ -20,11 +20,11 @@ class BaseParser:
"logger": logger,
"merge_output_format": self.params["merge_output_format"],
'outtmpl': self.params["outtmpl"],
"quiet": True
# "quiet": True
}
downloader = VideoDownloader(link=self.params["link"], ydl_opts=ydl_opts)
downloader.get_info()
path_to_video = f"{downloader.info['extractor_key']}/{downloader.info['id']}_{downloader.info['width']}p.{downloader.info['ext']}"
path_to_video = f"{downloader.info['extractor_key']}/{downloader.info['id']}_{downloader.info['resolution']}.{downloader.info['ext']}"
if os.path.exists(os.path.join(os.getcwd() + "/downloads/" + path_to_video)):
raise FileAlreadyExistException(message=path_to_video)
downloader.ydl_opts["quiet"] = False

View File

@ -6,12 +6,14 @@ from src.parsers.Okru.ok_parser import OkParser
from src.parsers.Yappy.yappy_parser import YappyParser
from src.parsers.base_parser import BaseParser
def compile_regex(regex):
return re.compile(regex, re.IGNORECASE | re.DOTALL | re.MULTILINE)
parser_mapping = OrderedDict(
{
compile_regex(r"^my.mail.ru/") : MyMailParser,
compile_regex(r"^my.mail.ru/"): MyMailParser,
compile_regex(r"^(?:www.)?(?:youtube.com|youtu.be)/"): BaseParser,
compile_regex(r"^vk.com/"): BaseParser,
compile_regex(r"^ok.ru/okvideo/topic"): OkParser,
@ -24,7 +26,8 @@ parser_mapping = OrderedDict(
}
)
def get_parser(uri):
for regex in parser_mapping:
if regex.match(uri):
return parser_mapping[regex]
return parser_mapping[regex]