rework redis, rework web for work with array of links
This commit is contained in:
@ -4,6 +4,7 @@ import requests
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from src.exceptions.download_exceptions import FileAlreadyExistException
|
||||
from src.parsers.base_parser import BaseParser
|
||||
|
||||
|
||||
@ -16,12 +17,24 @@ class OkParser(BaseParser):
|
||||
resp.encoding = self.BASE_ENCODING
|
||||
soup = BeautifulSoup(resp.text, 'lxml')
|
||||
required_div = [div for div in soup.find_all('div', {'class': 'invisible'}) if len(div['class']) < 2][0]
|
||||
link = required_div.find('span').find('span').find('a').get("href")
|
||||
self.params["link"] = link
|
||||
return link
|
||||
video_tags = required_div.find('span').find_all_next('span', {'itemprop': "video"})
|
||||
links = [video_tag.find('a').get("href") for video_tag in video_tags]
|
||||
return links
|
||||
except Exception as ex:
|
||||
raise
|
||||
|
||||
def video_download(self):
|
||||
self.get_video_link()
|
||||
super().video_download()
|
||||
base_link = self.params["link"]
|
||||
links = self.get_video_link()
|
||||
file_paths = []
|
||||
for link in links:
|
||||
try:
|
||||
self.params["link"] = link
|
||||
file_path = super().video_download()
|
||||
file_paths.append(file_path)
|
||||
except FileAlreadyExistException as ex:
|
||||
file_paths.append(ex.message)
|
||||
continue
|
||||
self.params["link"] = base_link
|
||||
return file_paths
|
||||
|
||||
|
@ -20,11 +20,11 @@ class BaseParser:
|
||||
"logger": logger,
|
||||
"merge_output_format": self.params["merge_output_format"],
|
||||
'outtmpl': self.params["outtmpl"],
|
||||
"quiet": True
|
||||
# "quiet": True
|
||||
}
|
||||
downloader = VideoDownloader(link=self.params["link"], ydl_opts=ydl_opts)
|
||||
downloader.get_info()
|
||||
path_to_video = f"{downloader.info['extractor_key']}/{downloader.info['id']}_{downloader.info['width']}p.{downloader.info['ext']}"
|
||||
path_to_video = f"{downloader.info['extractor_key']}/{downloader.info['id']}_{downloader.info['resolution']}.{downloader.info['ext']}"
|
||||
if os.path.exists(os.path.join(os.getcwd() + "/downloads/" + path_to_video)):
|
||||
raise FileAlreadyExistException(message=path_to_video)
|
||||
downloader.ydl_opts["quiet"] = False
|
||||
|
@ -6,12 +6,14 @@ from src.parsers.Okru.ok_parser import OkParser
|
||||
from src.parsers.Yappy.yappy_parser import YappyParser
|
||||
from src.parsers.base_parser import BaseParser
|
||||
|
||||
|
||||
def compile_regex(regex):
|
||||
return re.compile(regex, re.IGNORECASE | re.DOTALL | re.MULTILINE)
|
||||
|
||||
|
||||
parser_mapping = OrderedDict(
|
||||
{
|
||||
compile_regex(r"^my.mail.ru/") : MyMailParser,
|
||||
compile_regex(r"^my.mail.ru/"): MyMailParser,
|
||||
compile_regex(r"^(?:www.)?(?:youtube.com|youtu.be)/"): BaseParser,
|
||||
compile_regex(r"^vk.com/"): BaseParser,
|
||||
compile_regex(r"^ok.ru/okvideo/topic"): OkParser,
|
||||
@ -24,7 +26,8 @@ parser_mapping = OrderedDict(
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def get_parser(uri):
|
||||
for regex in parser_mapping:
|
||||
if regex.match(uri):
|
||||
return parser_mapping[regex]
|
||||
return parser_mapping[regex]
|
||||
|
Reference in New Issue
Block a user