Compare commits

65 Commits

Author SHA1 Message Date
Garick.badalov f1fdcf8fb4 added s3_client, refactored web and master services 2023-11-21 22:21:00 +03:00
Garick.badalov 0b68675504 minor fixes 2023-10-30 22:06:42 +03:00
Garick.badalov f85a3a7600 minor fixes 2023-10-18 01:44:24 +03:00
Dantenerosas fff27fee0a wip 2023-10-18 00:54:24 +03:00
Dantenerosas 6424c12498 timeout 2023-10-18 00:50:28 +03:00
Dantenerosas 0cc18950db up 2023-10-18 00:48:55 +03:00
Dantenerosas 5eeecd95ed wip 2023-10-18 00:47:58 +03:00
Dantenerosas 93d330e854 test wip 2023-10-18 00:44:31 +03:00
Dantenerosas 7646759ead remove loop from app 2023-10-18 00:38:33 +03:00
Dantenerosas ccb16580be up wip 2023-10-18 00:19:31 +03:00
Dantenerosas 4b4f288919 wip telegram test 2023-10-18 00:15:44 +03:00
Garick.badalov e202f9a1f0 minor fixes 2023-10-17 19:07:26 +03:00
Garick.badalov 6d9f8ae704 minor fixes 2023-10-17 18:14:32 +03:00
Garick.badalov d404aa92a6 minor fixes 2023-10-17 15:50:04 +03:00
Garick.badalov 45811384c3 minor fixes 2023-10-17 14:51:22 +03:00
Garick.badalov a0f33895c6 fix paths 2023-10-17 13:06:40 +03:00
Garick.badalov fed29ad679 fix paths 2023-10-17 03:01:01 +03:00
Garick.badalov 8a467c1499 Merge pull request 'feature/tg_parser' (#2) from feature/tg_parser into main
Reviewed-on: #2
2023-10-17 02:05:14 +03:00
Garick.badalov ad89d518c9 refactored tg parser 2023-10-17 01:59:28 +03:00
Garick.badalov fce408310a refactored tg parser 2023-10-14 03:08:13 +03:00
Garick.badalov cd1026c807 Added tg_parser 2023-10-13 03:17:21 +03:00
Garick.badalov 485634f6b5 Added bing_parser.py, minor fixes 2023-10-11 00:04:51 +03:00
Garick.badalov b4bfde5bd2 rework yappy_parser.py, Added dzen_parser.py, minor fixes 2023-10-10 03:41:55 +03:00
Garick.badalov 002a7efb9c added resolution parameter, minor fixes 2023-10-01 03:18:00 +03:00
Garick.badalov 120e2bd514 added yahoo parser 2023-09-30 03:32:46 +03:00
Garick.badalov 1ddab9a964 minor fixes 2023-09-29 13:46:52 +03:00
Garick.badalov ef6f96bcde rework redis, rework web for work with array of links 2023-09-29 05:53:27 +03:00
Dantenerosas ca3cecf271 fix uri path 2023-09-28 16:03:32 +03:00
Dantenerosas b3a16834d7 live_journal 2023-09-28 16:00:19 +03:00
Dantenerosas 87cf25ed61 exracted parser mappings 2023-09-28 15:56:26 +03:00
Dantenerosas 0b314cfa6c [master_service] added youtube.com, to allowed domains 2023-09-28 07:08:23 +03:00
Garick.badalov b17bed48c3 added okru parser 2023-09-27 02:31:46 +03:00
Garick.badalov 493cde3f29 Merge remote-tracking branch 'origin/main' 2023-09-26 16:33:09 +03:00
Garick.badalov 186f581acc minor fixes 2023-09-26 16:32:36 +03:00
Dantenerosas abd9b0ccf8 change format defaults 2023-09-26 15:41:30 +03:00
Dantenerosas 52da88acaf unkown stuff 2023-09-26 02:04:59 +03:00
Garick.badalov dca266db1f minor fixes 2023-09-26 01:30:36 +03:00
Garick.badalov d3200f0bf2 minor fixes 2023-09-26 01:07:44 +03:00
Dantenerosas 43a4874d4b fix post check 2023-09-26 00:27:07 +03:00
Dantenerosas e21abb2604 up 2023-09-26 00:24:38 +03:00
Dantenerosas 881af00006 up 2023-09-26 00:19:08 +03:00
Dantenerosas a03b9b1a0b up link and change to post 2023-09-26 00:15:04 +03:00
Dantenerosas c0f25383a6 up fix to sitenotimplementedexception 2023-09-26 00:01:09 +03:00
Garick.badalov ad3e5fc5b9 minor fixes 2023-09-25 23:22:33 +03:00
Dantenerosas 0d2798068c fix 2023-09-25 21:23:20 +03:00
Dantenerosas 65472f4dc7 up 2023-09-25 21:20:58 +03:00
Garick.badalov 862fd26dac minor fixes 2023-09-25 18:10:53 +03:00
Dantenerosas 8423e732c4 up 2023-09-25 17:43:24 +03:00
Dantenerosas 67ea636db0 up 2023-09-25 15:48:00 +03:00
Garick.badalov 5eeddf0445 minor fixes, rework web service, add features 2023-09-25 04:05:42 +03:00
Dantenerosas 2ff7cae710 up 2023-09-22 13:35:00 +03:00
Garick.badalov 14b2e5479a minor fixes, added result processing 2023-09-22 00:17:24 +03:00
Dantenerosas 6d9b72247f up 2023-09-21 00:21:03 +03:00
Dantenerosas ee4f4b56ca change to host in web 2023-09-20 15:02:41 +03:00
Dantenerosas 911d32cdab main file for web 2023-09-20 15:00:41 +03:00
Garick.badalov 6d08b7a4a0 minor fixes, added web serer 2023-09-20 14:43:59 +03:00
Garick.badalov f38dcb5807 added parsers for new social networks, rework master service 2023-09-15 01:29:43 +03:00
Garick.badalov 1a479db726 refactoring for new arch, added Redis, fixed filename, added video exists check 2023-08-27 16:27:28 +03:00
Garick.badalov 05a7d7396a refactoring for new arch, minor fixes 2023-08-24 16:45:55 +03:00
Garick.badalov fb586271a9 refactoring master service 2023-08-24 03:28:55 +03:00
Garick.badalov 79732eb843 added aio rmq client, added internal queue for master service 2023-08-23 04:13:56 +03:00
Garick.badalov 634579e10f fix extension and downloading 2023-08-15 20:51:44 +03:00
Garick.badalov 13f2d6f218 added loader, added video format check, fix video download 2023-08-15 01:38:45 +03:00
Garick.badalov 71c860689a added link 2023-08-14 17:47:15 +03:00
Garick.badalov 7331ef166c initial commit 2023-08-12 13:06:41 +03:00
8 changed files with 13 additions and 108 deletions
-17
View File
@@ -1,17 +0,0 @@
FROM python:3.11.4
WORKDIR /app
COPY poetry.lock pyproject.toml /app/
RUN apt-get -y update
RUN apt-get -y upgrade
RUN apt-get install -y ffmpeg
RUN pip install poetry
RUN poetry install --no-root
COPY .. /app
CMD poetry run python main.py
+2 -43
View File
@@ -1,60 +1,19 @@
version: "2.1" version: "2.1"
networks:
network:
services: services:
web_service:
container_name: web_service
build:
context: .
dockerfile: web.Dockerfile
ports:
- "8000:8000"
depends_on:
redis:
condition: service_started
rabbitmq: rabbitmq:
condition: service_healthy
restart: always
networks:
- network
download_service:
container_name: download_service
build:
context: .
dockerfile: Dockerfile
depends_on:
redis:
condition: service_started
rabbitmq:
condition: service_healthy
restart: always
networks:
- network
rabbitmq:
container_name: rabbitmq
image: rabbitmq:3.10.7-management image: rabbitmq:3.10.7-management
hostname: rabbitmq hostname: rabbitmq
restart: always restart: always
healthcheck:
test: rabbitmq-diagnostics -q ping
interval: 30s
timeout: 30s
retries: 3
environment: environment:
- RABBITMQ_DEFAULT_USER=guest - RABBITMQ_DEFAULT_USER=guest
- RABBITMQ_DEFAULT_PASS=guest - RABBITMQ_DEFAULT_PASS=guest
volumes: volumes:
- ./rabbitmq:/var/lib/rabbitmq - ./rabbitmq:/var/lib/rabbitmq
ports: ports:
- "15672:15672" - 15672:15672
- "5672:5672" - 5672:5672
networks:
- network
redis: redis:
container_name: redis_video_downloader container_name: redis_video_downloader
image: redis:latest image: redis:latest
ports: ports:
- "6379:6379" - "6379:6379"
networks:
- network
-22
View File
@@ -1,30 +1,8 @@
import asyncio import asyncio
import json
from typing import Any
from multiprocessing import freeze_support from multiprocessing import freeze_support
from src.core.master_service import MasterService from src.core.master_service import MasterService
from loguru import logger
def json_logs(message: Any) -> None:
record = message.record
data = {
"timestamp": record["time"].strftime("%d.%m.%y %H.%M.%S %Z%z"),
"level": record["level"].name,
"message": record["message"],
"path": record["file"].path,
"function": record["function"],
"line": record["line"],
}
print(json.dumps(data))
logger.remove(0)
logger.add(json_logs)
if __name__ == '__main__': if __name__ == '__main__':
freeze_support() freeze_support()
loop = asyncio.new_event_loop() loop = asyncio.new_event_loop()
+1 -3
View File
@@ -1,7 +1,6 @@
import asyncio import asyncio
import json import json
from loguru import logger
from playwright.async_api import async_playwright from playwright.async_api import async_playwright
from playwright.async_api import Playwright from playwright.async_api import Playwright
from aio_pika import Message, connect, DeliveryMode from aio_pika import Message, connect, DeliveryMode
@@ -39,13 +38,12 @@ async def run(playwright: Playwright):
routing_key='hello', routing_key='hello',
) )
logger.info(f" [x] Sent '{body}'") print(f" [x] Sent '{body}'")
await page.keyboard.press("ArrowDown") await page.keyboard.press("ArrowDown")
while title == await page.title(): while title == await page.title():
await page.title() await page.title()
async def main(): async def main():
async with async_playwright() as playwright: async with async_playwright() as playwright:
await run(playwright) await run(playwright)
+5 -5
View File
@@ -4,17 +4,16 @@ from functools import partial
from aio_pika import connect, Message, DeliveryMode from aio_pika import connect, Message, DeliveryMode
from aio_pika.abc import AbstractIncomingMessage from aio_pika.abc import AbstractIncomingMessage
from loguru import logger
async def on_message(message: AbstractIncomingMessage, queue) -> None: async def on_message(message: AbstractIncomingMessage, queue) -> None:
async with message.process(): async with message.process():
await queue.put(json.loads(message.body)) await queue.put(json.loads(message.body))
logger.info(f" Message body is: {message.body!r}") print(f" Message body is: {message.body!r}")
async def get_messages(inner_queue) -> None: async def get_messages(inner_queue) -> None:
async with await connect("amqp://guest:guest@rabbitmq/") as connection: async with await connect("amqp://guest:guest@localhost/") as connection:
channel = await connection.channel() channel = await connection.channel()
await channel.set_qos(prefetch_count=1) await channel.set_qos(prefetch_count=1)
@@ -24,13 +23,14 @@ async def get_messages(inner_queue) -> None:
) )
await queue.consume(partial(on_message, queue=inner_queue)) await queue.consume(partial(on_message, queue=inner_queue))
logger.info("[*] Waiting for messages. To exit press CTRL+C")
print(" [*] Waiting for messages. To exit press CTRL+C")
await asyncio.Future() await asyncio.Future()
async def publish_message_with_task_done(task: dict | list) -> None: async def publish_message_with_task_done(task: dict | list) -> None:
queue_name = "tasks_done" queue_name = "tasks_done"
async with await connect("amqp://guest:guest@rabbitmq/") as connection: async with await connect("amqp://guest:guest@localhost/") as connection:
# Creating channel # Creating channel
channel = await connection.channel() channel = await connection.channel()
+1 -1
View File
@@ -9,7 +9,7 @@ class RedisClient:
TASKS_DONE_NAME = "tasks_done" TASKS_DONE_NAME = "tasks_done"
def __init__(self): def __init__(self):
self.connection = redis.Redis(host="redis_video_downloader", port=6379, db=0) self.connection = redis.Redis(host="localhost", port=6379, db=0)
async def _set_task(self, queue_name: str, link: str, task: dict | list, ) -> int: async def _set_task(self, queue_name: str, link: str, task: dict | list, ) -> int:
async with self.connection as connection: async with self.connection as connection:
+4 -4
View File
@@ -1,5 +1,4 @@
import json import json
import os
import uvicorn import uvicorn
import logging import logging
@@ -16,7 +15,8 @@ from src.web.schemes.submit import SubmitIn, CheckIn, DeleteFromS3, CopyToAnothe
app = FastAPI( app = FastAPI(
title="video_downloader", openapi_url=f"/api/v1/openapi.json" title="video_downloader", openapi_url=f"/api/v1/openapi.json"
) )
templates = Jinja2Templates(directory=f"{os.path.dirname(os.path.dirname(os.path.abspath(__file__)))}/web/templates")
templates = Jinja2Templates(directory="templates")
app.add_middleware( app.add_middleware(
CORSMiddleware, CORSMiddleware,
@@ -102,7 +102,7 @@ async def get_url_for_download_video(request: Request, data: SubmitIn = Depends(
return JSONResponse({"result": links_to_download_video}) return JSONResponse({"result": links_to_download_video})
# TODO: учесть, что если делать запрос CURL\urllib3\etc, в теле может быть несколько ссылок -> должно быть создано несколько задач # TODO: учесть, что если делать запрос CURL\urllib3\etc, в теле может быть несколько ссылок -> должно быть создано несколько задач
async with await connect("amqp://guest:guest@rabbitmq/") as connection: async with await connect("amqp://guest:guest@localhost/") as connection:
# Creating a channel # Creating a channel
channel = await connection.channel() channel = await connection.channel()
body = [ body = [
@@ -204,4 +204,4 @@ async def delete_video_from_s3(data: CopyToAnotherBucketS3):
) )
uvicorn.run("src.web.main:app", host="0.0.0.0", port=8000, log_level="info") uvicorn.run("src.web.main:app", host="0.0.0.0", log_level="info")
-13
View File
@@ -1,13 +0,0 @@
FROM python:3.11.4
WORKDIR /app
COPY poetry.lock pyproject.toml /app/
RUN pip install poetry
RUN poetry install --no-root
COPY .. /app
CMD poetry run python main_web.py