Files
yarmarka/seo_helpers.py
2026-03-20 19:49:22 +03:00

247 lines
11 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# seo_helpers.py
import html
import json
import re # Добавляем импорт re
from typing import Dict, List, Any, Optional
def generate_resume_seo_tags(resume_data: Dict[str, Any], resume_id: int) -> Dict[str, str]:
"""
Генерация SEO-тегов для страницы резюме
Args:
resume_data: данные резюме из базы
resume_id: ID резюме
Returns:
Dict с SEO-тегами
"""
# Декодируем и экранируем данные
full_name = html.escape(resume_data.get("full_name", "") or "")
name_parts = full_name.split(' ')
first_name = name_parts[0] if name_parts else ''
last_name = ' '.join(name_parts[1:]) if len(name_parts) > 1 else ''
position = html.escape(resume_data.get("desired_position", "Специалист") or "Специалист")
salary = html.escape(resume_data.get("desired_salary", "Зарплата не указана") or "Зарплата не указана")
about = html.escape(
resume_data.get("about_me", "Профессиональный опыт и навыки") or "Профессиональный опыт и навыки")
# Формируем описание
experience_count = len(resume_data.get("work_experience", []))
tags = resume_data.get("tags", [])
skills_list = ', '.join(tags) if tags else ''
short_about = about[:157] + '...' if len(about) > 160 else about
seo_description = f"{full_name} - {position}. {salary}. Опыт работы: {experience_count} мест. Навыки: {skills_list}. {short_about}"
seo_description = seo_description[:320]
# Формируем ключевые слова
keywords = f"{full_name}, {position}, резюме, поиск сотрудников, навыки: {skills_list[:200]}"
# Формируем структурированные данные
work_experience_json = []
for exp in resume_data.get("work_experience", []):
period = exp.get("period", "")
period_parts = period.split('') if period else []
work_experience_json.append({
"@type": "OrganizationRole",
"roleName": exp.get("position", ""),
"startDate": period_parts[0] if len(period_parts) > 0 else None,
"endDate": period_parts[1] if len(period_parts) > 1 else None,
"organization": {"@type": "Organization", "name": exp.get("company", "")}
})
education_json = []
for edu in resume_data.get("education", []):
education_json.append({
"@type": "EducationalOccupationalCredential",
"credentialCategory": "Degree",
"name": edu.get("specialty", ""),
"educationalLevel": edu.get("institution", ""),
"dateCreated": edu.get("graduation_year", "")
})
structured_data = {
"@context": "https://schema.org",
"@type": "Person",
"name": full_name,
"jobTitle": position,
"description": resume_data.get("about_me", ""),
"worksFor": work_experience_json,
"alumniOf": education_json,
"knowsAbout": tags,
"url": f"https://yarmarka.rabota.today/resume/{resume_id}"
}
return {
"title": f"{full_name} - {position} | Rabota.Today",
"description": seo_description,
"keywords": keywords,
"og_title": f"{full_name} - {position}",
"og_description": seo_description[:300],
"og_url": f"https://yarmarka.rabota.today/resume/{resume_id}",
"profile_first_name": first_name,
"profile_last_name": last_name,
"twitter_title": f"{full_name} - {position}",
"twitter_description": seo_description[:300],
"canonical_url": f"https://yarmarka.rabota.today/resume/{resume_id}",
"structured_data": json.dumps(structured_data, ensure_ascii=False, indent=2)
}
def generate_vacancy_seo_tags(vacancy_data: Dict[str, Any], vacancy_id: int) -> Dict[str, str]:
"""
Генерация SEO-тегов для страницы вакансии
Args:
vacancy_data: данные вакансии из базы
vacancy_id: ID вакансии
Returns:
Dict с SEO-тегами
"""
# Декодируем и экранируем данные
title = html.escape(vacancy_data.get("title", "") or "")
company = html.escape(vacancy_data.get("company_name", "Компания") or "Компания")
salary = html.escape(vacancy_data.get("salary", "Зарплата не указана") or "Зарплата не указана")
description = html.escape(
vacancy_data.get("description", "Подробная информация о вакансии") or "Подробная информация о вакансии")
# Формируем описание
tags = vacancy_data.get("tags", [])
tags_str = ', '.join(tags) if tags else ''
short_description = description[:157] + '...' if len(description) > 160 else description
seo_description = f"{title} в компании {company}. {salary}. {short_description}"
seo_description = seo_description[:320]
# Формируем ключевые слова
keywords = f"{title}, {company}, вакансия, работа, {tags_str}"
# Формируем структурированные данные для вакансии
salary_value = 0
if salary:
# Используем re для поиска чисел
salary_match = re.search(r'(\d+)', salary)
if salary_match:
salary_value = int(salary_match.group(1))
structured_data = {
"@context": "https://schema.org",
"@type": "JobPosting",
"title": title,
"description": description,
"datePosted": vacancy_data.get("created_at"),
"validThrough": vacancy_data.get("valid_through"),
"employmentType": "FULL_TIME",
"hiringOrganization": {
"@type": "Organization",
"name": company,
"sameAs": vacancy_data.get("company_website", ""),
"logo": vacancy_data.get("company_logo", "https://yarmarka.rabota.today/static/images/logo.png")
},
"jobLocation": {
"@type": "Place",
"address": {
"@type": "PostalAddress",
"addressLocality": vacancy_data.get("company_address", "Москва"),
"addressCountry": "RU"
}
},
"baseSalary": {
"@type": "MonetaryAmount",
"currency": "RUB",
"value": {
"@type": "QuantitativeValue",
"value": salary_value,
"unitText": "MONTH"
}
},
"workHours": "Полный день"
}
return {
"title": f"{title} в {company} | Rabota.Today",
"description": seo_description,
"keywords": keywords,
"og_title": f"{title} в {company}",
"og_description": seo_description[:300],
"og_url": f"https://yarmarka.rabota.today/vacancy/{vacancy_id}",
"twitter_title": f"{title} в {company}",
"twitter_description": seo_description[:300],
"canonical_url": f"https://yarmarka.rabota.today/vacancy/{vacancy_id}",
"structured_data": json.dumps(structured_data, ensure_ascii=False, indent=2)
}
def inject_seo_tags(html_template: str, seo_tags: Dict[str, str]) -> str:
"""
Внедрение SEO-тегов в HTML шаблон
"""
result = html_template
# Заменяем title
title_pattern = '<title id="pageTitle">.*?</title>'
result = re.sub(title_pattern, f'<title>{seo_tags.get("title", "Rabota.Today")}</title>', result)
# Заменяем description
desc_pattern = '<meta name="description" id="metaDescription" content=".*?">'
result = re.sub(desc_pattern, f'<meta name="description" content="{seo_tags.get("description", "")}">', result)
# Заменяем keywords
keywords_pattern = '<meta name="keywords" id="metaKeywords" content=".*?">'
result = re.sub(keywords_pattern, f'<meta name="keywords" content="{seo_tags.get("keywords", "")}">', result)
# Заменяем og:title
og_title_pattern = '<meta property="og:title" id="ogTitle" content=".*?">'
result = re.sub(og_title_pattern, f'<meta property="og:title" content="{seo_tags.get("og_title", "")}">', result)
# Заменяем og:description
og_desc_pattern = '<meta property="og:description" id="ogDescription" content=".*?">'
result = re.sub(og_desc_pattern, f'<meta property="og:description" content="{seo_tags.get("og_description", "")}">',
result)
# Заменяем og:url
og_url_pattern = '<meta property="og:url" id="ogUrl" content=".*?">'
result = re.sub(og_url_pattern, f'<meta property="og:url" content="{seo_tags.get("og_url", "")}">', result)
# Заменяем profile:first_name
first_name_pattern = '<meta property="profile:first_name" id="profileFirstName" content=".*?">'
if 'profile_first_name' in seo_tags:
result = re.sub(first_name_pattern,
f'<meta property="profile:first_name" content="{seo_tags.get("profile_first_name", "")}">',
result)
# Заменяем profile:last_name
last_name_pattern = '<meta property="profile:last_name" id="profileLastName" content=".*?">'
if 'profile_last_name' in seo_tags:
result = re.sub(last_name_pattern,
f'<meta property="profile:last_name" content="{seo_tags.get("profile_last_name", "")}">',
result)
# Заменяем twitter:title
twitter_title_pattern = '<meta name="twitter:title" id="twitterTitle" content=".*?">'
result = re.sub(twitter_title_pattern, f'<meta name="twitter:title" content="{seo_tags.get("twitter_title", "")}">',
result)
# Заменяем twitter:description
twitter_desc_pattern = '<meta name="twitter:description" id="twitterDescription" content=".*?">'
result = re.sub(twitter_desc_pattern,
f'<meta name="twitter:description" content="{seo_tags.get("twitter_description", "")}">', result)
# Заменяем canonical
canonical_pattern = '<link rel="canonical" id="canonicalUrl" href=".*?">'
result = re.sub(canonical_pattern, f'<link rel="canonical" href="{seo_tags.get("canonical_url", "")}">', result)
# Заменяем структурированные данные - заменяем весь блок от <script type="application/ld+json"> до </script>
structured_pattern = r'<script type="application/ld+json">.*?</script>'
# Находим и заменяем первый блок структурированных данных
result = re.sub(structured_pattern,
f'<script type="application/ld+json">\n{seo_tags.get("structured_data", "{}")}\n</script>', result,
count=1)
# Удаляем второй пустой блок, если он есть
empty_structured_pattern = r'<script type="application/ld\+json" id="structuredData" style="display:none;">\s*</script>'
result = re.sub(empty_structured_pattern, '', result)
return result