Compare commits

65 Commits

Author SHA1 Message Date
nikili0n 8747643616 added s3_client, refactored web and master services 2023-11-21 23:36:58 +03:00
nikili0n 29bffb6e53 minor fixes 2023-11-21 23:36:58 +03:00
nikili0n aad25c98d9 minor fixes 2023-11-21 23:36:58 +03:00
nikili0n e183953bfa wip 2023-11-21 23:36:58 +03:00
nikili0n 70f25f3e36 timeout 2023-11-21 23:36:58 +03:00
nikili0n be792ca57f up 2023-11-21 23:36:58 +03:00
nikili0n b2806da8ca wip 2023-11-21 23:36:58 +03:00
nikili0n dc066a17ab test wip 2023-11-21 23:36:58 +03:00
nikili0n 69eb65f32b remove loop from app 2023-11-21 23:36:58 +03:00
nikili0n a9575c55e9 up wip 2023-11-21 23:36:58 +03:00
nikili0n c12b966496 wip telegram test 2023-11-21 23:36:58 +03:00
nikili0n 913dc7f9aa minor fixes 2023-11-21 23:36:58 +03:00
nikili0n 724e07179d minor fixes 2023-11-21 23:36:58 +03:00
nikili0n 5bb7a72c6c minor fixes 2023-11-21 23:36:58 +03:00
nikili0n 56a43d212e minor fixes 2023-11-21 23:36:58 +03:00
nikili0n 2c6547f04f fix paths 2023-11-21 23:36:58 +03:00
nikili0n b83017a3d8 fix paths 2023-11-21 23:36:58 +03:00
nikili0n 85e3bb4e8e Merge pull request 'feature/tg_parser' (#2) from feature/tg_parser into main
Reviewed-on: #2
2023-11-21 23:36:58 +03:00
nikili0n 398260eafd Added bing_parser.py, minor fixes 2023-11-21 23:36:58 +03:00
nikili0n 038eba9b2a refactored tg parser 2023-11-21 23:36:58 +03:00
nikili0n 6abd2807aa rework yappy_parser.py, Added dzen_parser.py, minor fixes 2023-11-21 23:36:58 +03:00
nikili0n d2fa090731 refactored tg parser 2023-11-21 23:36:58 +03:00
nikili0n 17a257955c added resolution parameter, minor fixes 2023-11-21 23:36:58 +03:00
nikili0n 3ec8326fcd Added tg_parser 2023-11-21 23:36:58 +03:00
nikili0n 75be832b2d added yahoo parser 2023-11-21 23:36:58 +03:00
nikili0n 93b8e50680 minor fixes 2023-11-21 23:36:58 +03:00
nikili0n 5b848dd3eb rework redis, rework web for work with array of links 2023-11-21 23:36:58 +03:00
nikili0n 1a2975c0d4 fix uri path 2023-11-21 23:36:58 +03:00
nikili0n 9210bcbd07 live_journal 2023-11-21 23:36:58 +03:00
nikili0n a79b29e0fa exracted parser mappings 2023-11-21 23:36:58 +03:00
nikili0n d24d9a3a89 [master_service] added youtube.com, to allowed domains 2023-11-21 23:36:58 +03:00
nikili0n adc191e71f added okru parser 2023-11-21 23:36:58 +03:00
nikili0n 9728287569 Merge remote-tracking branch 'origin/main' 2023-11-21 23:36:58 +03:00
nikili0n 1d8b2e4a0d minor fixes 2023-11-21 23:36:58 +03:00
nikili0n a58b543529 change format defaults 2023-11-21 23:36:58 +03:00
nikili0n df51b59379 minor fixes 2023-11-21 23:36:58 +03:00
nikili0n 9d17a292bd unkown stuff 2023-11-21 23:36:58 +03:00
nikili0n 337885746d minor fixes 2023-11-21 23:36:58 +03:00
nikili0n ad3c7450fb fix post check 2023-11-21 23:36:58 +03:00
nikili0n fa75f980d2 up 2023-11-21 23:36:58 +03:00
nikili0n 00c10a4145 up 2023-11-21 23:36:58 +03:00
nikili0n ed267e065f up link and change to post 2023-11-21 23:36:58 +03:00
nikili0n d7b82e3184 up fix to sitenotimplementedexception 2023-11-21 23:36:58 +03:00
nikili0n 89398ae5d1 minor fixes 2023-11-21 23:36:58 +03:00
nikili0n 90207dd5a7 fix 2023-11-21 23:36:58 +03:00
nikili0n c3a3138c0c up 2023-11-21 23:36:58 +03:00
nikili0n 72c2f50a0c minor fixes 2023-11-21 23:36:58 +03:00
nikili0n ef9dc04458 up 2023-11-21 23:36:58 +03:00
nikili0n 8bd1463401 up 2023-11-21 23:36:58 +03:00
nikili0n 9d6d9947f5 minor fixes, rework web service, add features 2023-11-21 23:36:58 +03:00
nikili0n 801b9f2e52 up 2023-11-21 23:36:58 +03:00
nikili0n 79afa55e73 minor fixes, added result processing 2023-11-21 23:36:58 +03:00
nikili0n bce40ee341 up 2023-11-21 23:36:58 +03:00
nikili0n 53b3481c0e change to host in web 2023-11-21 23:36:58 +03:00
nikili0n 1fea12fc29 main file for web 2023-11-21 23:36:58 +03:00
nikili0n a672d2e421 minor fixes, added web serer 2023-11-21 23:36:58 +03:00
nikili0n 5f9b092832 added parsers for new social networks, rework master service 2023-11-21 23:36:58 +03:00
nikili0n 26740ef9ed refactoring for new arch, added Redis, fixed filename, added video exists check 2023-11-21 23:36:58 +03:00
nikili0n dbd1f19c95 refactoring for new arch, minor fixes 2023-11-21 23:36:58 +03:00
nikili0n 338d2c58a1 refactoring master service 2023-11-21 23:36:58 +03:00
nikili0n e909c80178 added aio rmq client, added internal queue for master service 2023-11-21 23:36:58 +03:00
nikili0n c1a4972889 fix extension and downloading 2023-11-21 23:36:58 +03:00
nikili0n 934fde77ec added loader, added video format check, fix video download 2023-11-21 23:36:58 +03:00
nikili0n 87cc8a0648 added link 2023-11-21 23:36:58 +03:00
nikili0n 46ae3a7077 initial commit 2023-11-21 23:36:58 +03:00
8 changed files with 13 additions and 108 deletions
-17
View File
@@ -1,17 +0,0 @@
FROM python:3.11.4
WORKDIR /app
COPY poetry.lock pyproject.toml /app/
RUN apt-get -y update
RUN apt-get -y upgrade
RUN apt-get install -y ffmpeg
RUN pip install poetry
RUN poetry install --no-root
COPY .. /app
CMD poetry run python main.py
+2 -43
View File
@@ -1,60 +1,19 @@
version: "2.1" version: "2.1"
networks:
network:
services: services:
web_service:
container_name: web_service
build:
context: .
dockerfile: web.Dockerfile
ports:
- "8000:8000"
depends_on:
redis:
condition: service_started
rabbitmq:
condition: service_healthy
restart: always
networks:
- network
download_service:
container_name: download_service
build:
context: .
dockerfile: Dockerfile
depends_on:
redis:
condition: service_started
rabbitmq:
condition: service_healthy
restart: always
networks:
- network
rabbitmq: rabbitmq:
container_name: rabbitmq
image: rabbitmq:3.10.7-management image: rabbitmq:3.10.7-management
hostname: rabbitmq hostname: rabbitmq
restart: always restart: always
healthcheck:
test: rabbitmq-diagnostics -q ping
interval: 30s
timeout: 30s
retries: 3
environment: environment:
- RABBITMQ_DEFAULT_USER=guest - RABBITMQ_DEFAULT_USER=guest
- RABBITMQ_DEFAULT_PASS=guest - RABBITMQ_DEFAULT_PASS=guest
volumes: volumes:
- ./rabbitmq:/var/lib/rabbitmq - ./rabbitmq:/var/lib/rabbitmq
ports: ports:
- "15672:15672" - 15672:15672
- "5672:5672" - 5672:5672
networks:
- network
redis: redis:
container_name: redis_video_downloader container_name: redis_video_downloader
image: redis:latest image: redis:latest
ports: ports:
- "6379:6379" - "6379:6379"
networks:
- network
-22
View File
@@ -1,30 +1,8 @@
import asyncio import asyncio
import json
from typing import Any
from multiprocessing import freeze_support from multiprocessing import freeze_support
from src.core.master_service import MasterService from src.core.master_service import MasterService
from loguru import logger
def json_logs(message: Any) -> None:
record = message.record
data = {
"timestamp": record["time"].strftime("%d.%m.%y %H.%M.%S %Z%z"),
"level": record["level"].name,
"message": record["message"],
"path": record["file"].path,
"function": record["function"],
"line": record["line"],
}
print(json.dumps(data))
logger.remove(0)
logger.add(json_logs)
if __name__ == '__main__': if __name__ == '__main__':
freeze_support() freeze_support()
loop = asyncio.new_event_loop() loop = asyncio.new_event_loop()
+1 -3
View File
@@ -1,7 +1,6 @@
import asyncio import asyncio
import json import json
from loguru import logger
from playwright.async_api import async_playwright from playwright.async_api import async_playwright
from playwright.async_api import Playwright from playwright.async_api import Playwright
from aio_pika import Message, connect, DeliveryMode from aio_pika import Message, connect, DeliveryMode
@@ -39,13 +38,12 @@ async def run(playwright: Playwright):
routing_key='hello', routing_key='hello',
) )
logger.info(f" [x] Sent '{body}'") print(f" [x] Sent '{body}'")
await page.keyboard.press("ArrowDown") await page.keyboard.press("ArrowDown")
while title == await page.title(): while title == await page.title():
await page.title() await page.title()
async def main(): async def main():
async with async_playwright() as playwright: async with async_playwright() as playwright:
await run(playwright) await run(playwright)
+5 -5
View File
@@ -4,17 +4,16 @@ from functools import partial
from aio_pika import connect, Message, DeliveryMode from aio_pika import connect, Message, DeliveryMode
from aio_pika.abc import AbstractIncomingMessage from aio_pika.abc import AbstractIncomingMessage
from loguru import logger
async def on_message(message: AbstractIncomingMessage, queue) -> None: async def on_message(message: AbstractIncomingMessage, queue) -> None:
async with message.process(): async with message.process():
await queue.put(json.loads(message.body)) await queue.put(json.loads(message.body))
logger.info(f" Message body is: {message.body!r}") print(f" Message body is: {message.body!r}")
async def get_messages(inner_queue) -> None: async def get_messages(inner_queue) -> None:
async with await connect("amqp://guest:guest@rabbitmq/") as connection: async with await connect("amqp://guest:guest@localhost/") as connection:
channel = await connection.channel() channel = await connection.channel()
await channel.set_qos(prefetch_count=1) await channel.set_qos(prefetch_count=1)
@@ -24,13 +23,14 @@ async def get_messages(inner_queue) -> None:
) )
await queue.consume(partial(on_message, queue=inner_queue)) await queue.consume(partial(on_message, queue=inner_queue))
logger.info("[*] Waiting for messages. To exit press CTRL+C")
print(" [*] Waiting for messages. To exit press CTRL+C")
await asyncio.Future() await asyncio.Future()
async def publish_message_with_task_done(task: dict | list) -> None: async def publish_message_with_task_done(task: dict | list) -> None:
queue_name = "tasks_done" queue_name = "tasks_done"
async with await connect("amqp://guest:guest@rabbitmq/") as connection: async with await connect("amqp://guest:guest@localhost/") as connection:
# Creating channel # Creating channel
channel = await connection.channel() channel = await connection.channel()
+1 -1
View File
@@ -9,7 +9,7 @@ class RedisClient:
TASKS_DONE_NAME = "tasks_done" TASKS_DONE_NAME = "tasks_done"
def __init__(self): def __init__(self):
self.connection = redis.Redis(host="redis_video_downloader", port=6379, db=0) self.connection = redis.Redis(host="localhost", port=6379, db=0)
async def _set_task(self, queue_name: str, link: str, task: dict | list, ) -> int: async def _set_task(self, queue_name: str, link: str, task: dict | list, ) -> int:
async with self.connection as connection: async with self.connection as connection:
+4 -4
View File
@@ -1,5 +1,4 @@
import json import json
import os
import uvicorn import uvicorn
import logging import logging
@@ -16,7 +15,8 @@ from src.web.schemes.submit import SubmitIn, CheckIn, DeleteFromS3, CopyToAnothe
app = FastAPI( app = FastAPI(
title="video_downloader", openapi_url=f"/api/v1/openapi.json" title="video_downloader", openapi_url=f"/api/v1/openapi.json"
) )
templates = Jinja2Templates(directory=f"{os.path.dirname(os.path.dirname(os.path.abspath(__file__)))}/web/templates")
templates = Jinja2Templates(directory="templates")
app.add_middleware( app.add_middleware(
CORSMiddleware, CORSMiddleware,
@@ -102,7 +102,7 @@ async def get_url_for_download_video(request: Request, data: SubmitIn = Depends(
return JSONResponse({"result": links_to_download_video}) return JSONResponse({"result": links_to_download_video})
# TODO: учесть, что если делать запрос CURL\urllib3\etc, в теле может быть несколько ссылок -> должно быть создано несколько задач # TODO: учесть, что если делать запрос CURL\urllib3\etc, в теле может быть несколько ссылок -> должно быть создано несколько задач
async with await connect("amqp://guest:guest@rabbitmq/") as connection: async with await connect("amqp://guest:guest@localhost/") as connection:
# Creating a channel # Creating a channel
channel = await connection.channel() channel = await connection.channel()
body = [ body = [
@@ -204,4 +204,4 @@ async def delete_video_from_s3(data: CopyToAnotherBucketS3):
) )
uvicorn.run("src.web.main:app", host="0.0.0.0", port=8000, log_level="info") uvicorn.run("src.web.main:app", host="0.0.0.0", log_level="info")
-13
View File
@@ -1,13 +0,0 @@
FROM python:3.11.4
WORKDIR /app
COPY poetry.lock pyproject.toml /app/
RUN pip install poetry
RUN poetry install --no-root
COPY .. /app
CMD poetry run python main_web.py