yarmarka/seo_helpers.py

# seo_helpers.py
import html
import json
import re  # Добавляем импорт re
from typing import Dict, List, Any, Optional


def generate_resume_seo_tags(resume_data: Dict[str, Any], resume_id: int) -> Dict[str, str]:
    """
    Генерация SEO-тегов для страницы резюме

    Args:
        resume_data: данные резюме из базы
        resume_id: ID резюме

    Returns:
        Dict с SEO-тегами
    """
    # Декодируем и экранируем данные
    full_name = html.escape(resume_data.get("full_name", "") or "")
    name_parts = full_name.split(' ')
    first_name = name_parts[0] if name_parts else ''
    last_name = ' '.join(name_parts[1:]) if len(name_parts) > 1 else ''
    position = html.escape(resume_data.get("desired_position", "Специалист") or "Специалист")
    salary = html.escape(resume_data.get("desired_salary", "Зарплата не указана") or "Зарплата не указана")
    about = html.escape(
        resume_data.get("about_me", "Профессиональный опыт и навыки") or "Профессиональный опыт и навыки")

    # Формируем описание
    experience_count = len(resume_data.get("work_experience", []))
    tags = resume_data.get("tags", [])
    skills_list = ', '.join(tags) if tags else ''
    short_about = about[:157] + '...' if len(about) > 160 else about

    seo_description = f"{full_name} - {position}. {salary}. Опыт работы: {experience_count} мест. Навыки: {skills_list}. {short_about}"
    seo_description = seo_description[:320]

    # Формируем ключевые слова
    keywords = f"{full_name}, {position}, резюме, поиск сотрудников, навыки: {skills_list[:200]}"

    # Формируем структурированные данные
    work_experience_json = []
    for exp in resume_data.get("work_experience", []):
        period = exp.get("period", "")
        period_parts = period.split('–') if period else []
        work_experience_json.append({
            "@type": "OrganizationRole",
            "roleName": exp.get("position", ""),
            "startDate": period_parts[0] if len(period_parts) > 0 else None,
            "endDate": period_parts[1] if len(period_parts) > 1 else None,
            "organization": {"@type": "Organization", "name": exp.get("company", "")}
        })

    education_json = []
    for edu in resume_data.get("education", []):
        education_json.append({
            "@type": "EducationalOccupationalCredential",
            "credentialCategory": "Degree",
            "name": edu.get("specialty", ""),
            "educationalLevel": edu.get("institution", ""),
            "dateCreated": edu.get("graduation_year", "")
        })

    structured_data = {
        "@context": "https://schema.org",
        "@type": "Person",
        "name": full_name,
        "jobTitle": position,
        "description": resume_data.get("about_me", ""),
        "worksFor": work_experience_json,
        "alumniOf": education_json,
        "knowsAbout": tags,
        "url": f"https://yarmarka.rabota.today/resume/{resume_id}"
    }

    return {
        "title": f"{full_name} - {position} | Rabota.Today",
        "description": seo_description,
        "keywords": keywords,
        "og_title": f"{full_name} - {position}",
        "og_description": seo_description[:300],
        "og_url": f"https://yarmarka.rabota.today/resume/{resume_id}",
        "profile_first_name": first_name,
        "profile_last_name": last_name,
        "twitter_title": f"{full_name} - {position}",
        "twitter_description": seo_description[:300],
        "canonical_url": f"https://yarmarka.rabota.today/resume/{resume_id}",
        "structured_data": json.dumps(structured_data, ensure_ascii=False, indent=2)
    }


def generate_vacancy_seo_tags(vacancy_data: Dict[str, Any], vacancy_id: int) -> Dict[str, str]:
    """
    Генерация SEO-тегов для страницы вакансии

    Args:
        vacancy_data: данные вакансии из базы
        vacancy_id: ID вакансии

    Returns:
        Dict с SEO-тегами
    """
    # Декодируем и экранируем данные
    title = html.escape(vacancy_data.get("title", "") or "")
    company = html.escape(vacancy_data.get("company_name", "Компания") or "Компания")
    salary = html.escape(vacancy_data.get("salary", "Зарплата не указана") or "Зарплата не указана")
    description = html.escape(
        vacancy_data.get("description", "Подробная информация о вакансии") or "Подробная информация о вакансии")

    # Формируем описание
    tags = vacancy_data.get("tags", [])
    tags_str = ', '.join(tags) if tags else ''
    short_description = description[:157] + '...' if len(description) > 160 else description

    seo_description = f"{title} в компании {company}. {salary}. {short_description}"
    seo_description = seo_description[:320]

    # Формируем ключевые слова
    keywords = f"{title}, {company}, вакансия, работа, {tags_str}"

    # Формируем структурированные данные для вакансии
    salary_value = 0
    if salary:
        # Используем re для поиска чисел
        salary_match = re.search(r'(\d+)', salary)
        if salary_match:
            salary_value = int(salary_match.group(1))

    structured_data = {
        "@context": "https://schema.org",
        "@type": "JobPosting",
        "title": title,
        "description": description,
        "datePosted": vacancy_data.get("created_at"),
        "validThrough": vacancy_data.get("valid_through"),
        "employmentType": "FULL_TIME",
        "hiringOrganization": {
            "@type": "Organization",
            "name": company,
            "sameAs": vacancy_data.get("company_website", ""),
            "logo": vacancy_data.get("company_logo", "https://yarmarka.rabota.today/static/images/logo.png")
        },
        "jobLocation": {
            "@type": "Place",
            "address": {
                "@type": "PostalAddress",
                "addressLocality": vacancy_data.get("company_address", "Москва"),
                "addressCountry": "RU"
            }
        },
        "baseSalary": {
            "@type": "MonetaryAmount",
            "currency": "RUB",
            "value": {
                "@type": "QuantitativeValue",
                "value": salary_value,
                "unitText": "MONTH"
            }
        },
        "workHours": "Полный день"
    }

    return {
        "title": f"{title} в {company} | Rabota.Today",
        "description": seo_description,
        "keywords": keywords,
        "og_title": f"{title} в {company}",
        "og_description": seo_description[:300],
        "og_url": f"https://yarmarka.rabota.today/vacancy/{vacancy_id}",
        "twitter_title": f"{title} в {company}",
        "twitter_description": seo_description[:300],
        "canonical_url": f"https://yarmarka.rabota.today/vacancy/{vacancy_id}",
        "structured_data": json.dumps(structured_data, ensure_ascii=False, indent=2)
    }


def inject_seo_tags(html_template: str, seo_tags: Dict[str, str]) -> str:
    """
    Внедрение SEO-тегов в HTML шаблон
    """
    result = html_template

    # Заменяем title
    title_pattern = '<title id="pageTitle">.*?</title>'
    result = re.sub(title_pattern, f'<title>{seo_tags.get("title", "Rabota.Today")}</title>', result)

    # Заменяем description
    desc_pattern = '<meta name="description" id="metaDescription" content=".*?">'
    result = re.sub(desc_pattern, f'<meta name="description" content="{seo_tags.get("description", "")}">', result)

    # Заменяем keywords
    keywords_pattern = '<meta name="keywords" id="metaKeywords" content=".*?">'
    result = re.sub(keywords_pattern, f'<meta name="keywords" content="{seo_tags.get("keywords", "")}">', result)

    # Заменяем og:title
    og_title_pattern = '<meta property="og:title" id="ogTitle" content=".*?">'
    result = re.sub(og_title_pattern, f'<meta property="og:title" content="{seo_tags.get("og_title", "")}">', result)

    # Заменяем og:description
    og_desc_pattern = '<meta property="og:description" id="ogDescription" content=".*?">'
    result = re.sub(og_desc_pattern, f'<meta property="og:description" content="{seo_tags.get("og_description", "")}">',
                    result)

    # Заменяем og:url
    og_url_pattern = '<meta property="og:url" id="ogUrl" content=".*?">'
    result = re.sub(og_url_pattern, f'<meta property="og:url" content="{seo_tags.get("og_url", "")}">', result)

    # Заменяем profile:first_name
    first_name_pattern = '<meta property="profile:first_name" id="profileFirstName" content=".*?">'
    if 'profile_first_name' in seo_tags:
        result = re.sub(first_name_pattern,
                        f'<meta property="profile:first_name" content="{seo_tags.get("profile_first_name", "")}">',
                        result)

    # Заменяем profile:last_name
    last_name_pattern = '<meta property="profile:last_name" id="profileLastName" content=".*?">'
    if 'profile_last_name' in seo_tags:
        result = re.sub(last_name_pattern,
                        f'<meta property="profile:last_name" content="{seo_tags.get("profile_last_name", "")}">',
                        result)

    # Заменяем twitter:title
    twitter_title_pattern = '<meta name="twitter:title" id="twitterTitle" content=".*?">'
    result = re.sub(twitter_title_pattern, f'<meta name="twitter:title" content="{seo_tags.get("twitter_title", "")}">',
                    result)

    # Заменяем twitter:description
    twitter_desc_pattern = '<meta name="twitter:description" id="twitterDescription" content=".*?">'
    result = re.sub(twitter_desc_pattern,
                    f'<meta name="twitter:description" content="{seo_tags.get("twitter_description", "")}">', result)

    # Заменяем canonical
    canonical_pattern = '<link rel="canonical" id="canonicalUrl" href=".*?">'
    result = re.sub(canonical_pattern, f'<link rel="canonical" href="{seo_tags.get("canonical_url", "")}">', result)

    # Заменяем структурированные данные - заменяем весь блок от <script type="application/ld+json"> до </script>
    structured_pattern = r'<script type="application/ld+json">.*?</script>'
    # Находим и заменяем первый блок структурированных данных
    result = re.sub(structured_pattern,
                    f'<script type="application/ld+json">\n{seo_tags.get("structured_data", "{}")}\n</script>', result,
                    count=1)

    # Удаляем второй пустой блок, если он есть
    empty_structured_pattern = r'<script type="application/ld\+json" id="structuredData" style="display:none;">\s*</script>'
    result = re.sub(empty_structured_pattern, '', result)

    return result