Третий коммит, добавление share, share_kb, а также ADMIN_ID
This commit is contained in:
14
myenv/Lib/site-packages/yarl/__init__.py
Normal file
14
myenv/Lib/site-packages/yarl/__init__.py
Normal file
@@ -0,0 +1,14 @@
|
||||
from ._query import Query, QueryVariable, SimpleQuery
|
||||
from ._url import URL, cache_clear, cache_configure, cache_info
|
||||
|
||||
__version__ = "1.20.1"
|
||||
|
||||
__all__ = (
|
||||
"URL",
|
||||
"SimpleQuery",
|
||||
"QueryVariable",
|
||||
"Query",
|
||||
"cache_clear",
|
||||
"cache_configure",
|
||||
"cache_info",
|
||||
)
|
203
myenv/Lib/site-packages/yarl/_parse.py
Normal file
203
myenv/Lib/site-packages/yarl/_parse.py
Normal file
@@ -0,0 +1,203 @@
|
||||
"""URL parsing utilities."""
|
||||
|
||||
import re
|
||||
import unicodedata
|
||||
from functools import lru_cache
|
||||
from typing import Union
|
||||
from urllib.parse import scheme_chars, uses_netloc
|
||||
|
||||
from ._quoters import QUOTER, UNQUOTER_PLUS
|
||||
|
||||
# Leading and trailing C0 control and space to be stripped per WHATWG spec.
|
||||
# == "".join([chr(i) for i in range(0, 0x20 + 1)])
|
||||
WHATWG_C0_CONTROL_OR_SPACE = (
|
||||
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10"
|
||||
"\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f "
|
||||
)
|
||||
|
||||
# Unsafe bytes to be removed per WHATWG spec
|
||||
UNSAFE_URL_BYTES_TO_REMOVE = ["\t", "\r", "\n"]
|
||||
USES_AUTHORITY = frozenset(uses_netloc)
|
||||
|
||||
SplitURLType = tuple[str, str, str, str, str]
|
||||
|
||||
|
||||
def split_url(url: str) -> SplitURLType:
|
||||
"""Split URL into parts."""
|
||||
# Adapted from urllib.parse.urlsplit
|
||||
# Only lstrip url as some applications rely on preserving trailing space.
|
||||
# (https://url.spec.whatwg.org/#concept-basic-url-parser would strip both)
|
||||
url = url.lstrip(WHATWG_C0_CONTROL_OR_SPACE)
|
||||
for b in UNSAFE_URL_BYTES_TO_REMOVE:
|
||||
if b in url:
|
||||
url = url.replace(b, "")
|
||||
|
||||
scheme = netloc = query = fragment = ""
|
||||
i = url.find(":")
|
||||
if i > 0 and url[0] in scheme_chars:
|
||||
for c in url[1:i]:
|
||||
if c not in scheme_chars:
|
||||
break
|
||||
else:
|
||||
scheme, url = url[:i].lower(), url[i + 1 :]
|
||||
has_hash = "#" in url
|
||||
has_question_mark = "?" in url
|
||||
if url[:2] == "//":
|
||||
delim = len(url) # position of end of domain part of url, default is end
|
||||
if has_hash and has_question_mark:
|
||||
delim_chars = "/?#"
|
||||
elif has_question_mark:
|
||||
delim_chars = "/?"
|
||||
elif has_hash:
|
||||
delim_chars = "/#"
|
||||
else:
|
||||
delim_chars = "/"
|
||||
for c in delim_chars: # look for delimiters; the order is NOT important
|
||||
wdelim = url.find(c, 2) # find first of this delim
|
||||
if wdelim >= 0 and wdelim < delim: # if found
|
||||
delim = wdelim # use earliest delim position
|
||||
netloc = url[2:delim]
|
||||
url = url[delim:]
|
||||
has_left_bracket = "[" in netloc
|
||||
has_right_bracket = "]" in netloc
|
||||
if (has_left_bracket and not has_right_bracket) or (
|
||||
has_right_bracket and not has_left_bracket
|
||||
):
|
||||
raise ValueError("Invalid IPv6 URL")
|
||||
if has_left_bracket:
|
||||
bracketed_host = netloc.partition("[")[2].partition("]")[0]
|
||||
# Valid bracketed hosts are defined in
|
||||
# https://www.rfc-editor.org/rfc/rfc3986#page-49
|
||||
# https://url.spec.whatwg.org/
|
||||
if bracketed_host and bracketed_host[0] == "v":
|
||||
if not re.match(r"\Av[a-fA-F0-9]+\..+\Z", bracketed_host):
|
||||
raise ValueError("IPvFuture address is invalid")
|
||||
elif ":" not in bracketed_host:
|
||||
raise ValueError("The IPv6 content between brackets is not valid")
|
||||
if has_hash:
|
||||
url, _, fragment = url.partition("#")
|
||||
if has_question_mark:
|
||||
url, _, query = url.partition("?")
|
||||
if netloc and not netloc.isascii():
|
||||
_check_netloc(netloc)
|
||||
return scheme, netloc, url, query, fragment
|
||||
|
||||
|
||||
def _check_netloc(netloc: str) -> None:
|
||||
# Adapted from urllib.parse._checknetloc
|
||||
# looking for characters like \u2100 that expand to 'a/c'
|
||||
# IDNA uses NFKC equivalence, so normalize for this check
|
||||
|
||||
# ignore characters already included
|
||||
# but not the surrounding text
|
||||
n = netloc.replace("@", "").replace(":", "").replace("#", "").replace("?", "")
|
||||
normalized_netloc = unicodedata.normalize("NFKC", n)
|
||||
if n == normalized_netloc:
|
||||
return
|
||||
# Note that there are no unicode decompositions for the character '@' so
|
||||
# its currently impossible to have test coverage for this branch, however if the
|
||||
# one should be added in the future we want to make sure its still checked.
|
||||
for c in "/?#@:": # pragma: no branch
|
||||
if c in normalized_netloc:
|
||||
raise ValueError(
|
||||
f"netloc '{netloc}' contains invalid "
|
||||
"characters under NFKC normalization"
|
||||
)
|
||||
|
||||
|
||||
@lru_cache # match the same size as urlsplit
|
||||
def split_netloc(
|
||||
netloc: str,
|
||||
) -> tuple[Union[str, None], Union[str, None], Union[str, None], Union[int, None]]:
|
||||
"""Split netloc into username, password, host and port."""
|
||||
if "@" not in netloc:
|
||||
username: Union[str, None] = None
|
||||
password: Union[str, None] = None
|
||||
hostinfo = netloc
|
||||
else:
|
||||
userinfo, _, hostinfo = netloc.rpartition("@")
|
||||
username, have_password, password = userinfo.partition(":")
|
||||
if not have_password:
|
||||
password = None
|
||||
|
||||
if "[" in hostinfo:
|
||||
_, _, bracketed = hostinfo.partition("[")
|
||||
hostname, _, port_str = bracketed.partition("]")
|
||||
_, _, port_str = port_str.partition(":")
|
||||
else:
|
||||
hostname, _, port_str = hostinfo.partition(":")
|
||||
|
||||
if not port_str:
|
||||
return username or None, password, hostname or None, None
|
||||
|
||||
try:
|
||||
port = int(port_str)
|
||||
except ValueError:
|
||||
raise ValueError("Invalid URL: port can't be converted to integer")
|
||||
if not (0 <= port <= 65535):
|
||||
raise ValueError("Port out of range 0-65535")
|
||||
return username or None, password, hostname or None, port
|
||||
|
||||
|
||||
def unsplit_result(
|
||||
scheme: str, netloc: str, url: str, query: str, fragment: str
|
||||
) -> str:
|
||||
"""Unsplit a URL without any normalization."""
|
||||
if netloc or (scheme and scheme in USES_AUTHORITY) or url[:2] == "//":
|
||||
if url and url[:1] != "/":
|
||||
url = f"{scheme}://{netloc}/{url}" if scheme else f"{scheme}:{url}"
|
||||
else:
|
||||
url = f"{scheme}://{netloc}{url}" if scheme else f"//{netloc}{url}"
|
||||
elif scheme:
|
||||
url = f"{scheme}:{url}"
|
||||
if query:
|
||||
url = f"{url}?{query}"
|
||||
return f"{url}#{fragment}" if fragment else url
|
||||
|
||||
|
||||
@lru_cache # match the same size as urlsplit
|
||||
def make_netloc(
|
||||
user: Union[str, None],
|
||||
password: Union[str, None],
|
||||
host: Union[str, None],
|
||||
port: Union[int, None],
|
||||
encode: bool = False,
|
||||
) -> str:
|
||||
"""Make netloc from parts.
|
||||
|
||||
The user and password are encoded if encode is True.
|
||||
|
||||
The host must already be encoded with _encode_host.
|
||||
"""
|
||||
if host is None:
|
||||
return ""
|
||||
ret = host
|
||||
if port is not None:
|
||||
ret = f"{ret}:{port}"
|
||||
if user is None and password is None:
|
||||
return ret
|
||||
if password is not None:
|
||||
if not user:
|
||||
user = ""
|
||||
elif encode:
|
||||
user = QUOTER(user)
|
||||
if encode:
|
||||
password = QUOTER(password)
|
||||
user = f"{user}:{password}"
|
||||
elif user and encode:
|
||||
user = QUOTER(user)
|
||||
return f"{user}@{ret}" if user else ret
|
||||
|
||||
|
||||
def query_to_pairs(query_string: str) -> list[tuple[str, str]]:
|
||||
"""Parse a query given as a string argument.
|
||||
|
||||
Works like urllib.parse.parse_qsl with keep empty values.
|
||||
"""
|
||||
pairs: list[tuple[str, str]] = []
|
||||
if not query_string:
|
||||
return pairs
|
||||
for k_v in query_string.split("&"):
|
||||
k, _, v = k_v.partition("=")
|
||||
pairs.append((UNQUOTER_PLUS(k), UNQUOTER_PLUS(v)))
|
||||
return pairs
|
41
myenv/Lib/site-packages/yarl/_path.py
Normal file
41
myenv/Lib/site-packages/yarl/_path.py
Normal file
@@ -0,0 +1,41 @@
|
||||
"""Utilities for working with paths."""
|
||||
|
||||
from collections.abc import Sequence
|
||||
from contextlib import suppress
|
||||
|
||||
|
||||
def normalize_path_segments(segments: Sequence[str]) -> list[str]:
|
||||
"""Drop '.' and '..' from a sequence of str segments"""
|
||||
|
||||
resolved_path: list[str] = []
|
||||
|
||||
for seg in segments:
|
||||
if seg == "..":
|
||||
# ignore any .. segments that would otherwise cause an
|
||||
# IndexError when popped from resolved_path if
|
||||
# resolving for rfc3986
|
||||
with suppress(IndexError):
|
||||
resolved_path.pop()
|
||||
elif seg != ".":
|
||||
resolved_path.append(seg)
|
||||
|
||||
if segments and segments[-1] in (".", ".."):
|
||||
# do some post-processing here.
|
||||
# if the last segment was a relative dir,
|
||||
# then we need to append the trailing '/'
|
||||
resolved_path.append("")
|
||||
|
||||
return resolved_path
|
||||
|
||||
|
||||
def normalize_path(path: str) -> str:
|
||||
# Drop '.' and '..' from str path
|
||||
prefix = ""
|
||||
if path and path[0] == "/":
|
||||
# preserve the "/" root element of absolute paths, copying it to the
|
||||
# normalised output as per sections 5.2.4 and 6.2.2.3 of rfc3986.
|
||||
prefix = "/"
|
||||
path = path[1:]
|
||||
|
||||
segments = path.split("/")
|
||||
return prefix + "/".join(normalize_path_segments(segments))
|
114
myenv/Lib/site-packages/yarl/_query.py
Normal file
114
myenv/Lib/site-packages/yarl/_query.py
Normal file
@@ -0,0 +1,114 @@
|
||||
"""Query string handling."""
|
||||
|
||||
import math
|
||||
from collections.abc import Iterable, Mapping, Sequence
|
||||
from typing import Any, SupportsInt, Union
|
||||
|
||||
from multidict import istr
|
||||
|
||||
from ._quoters import QUERY_PART_QUOTER, QUERY_QUOTER
|
||||
|
||||
SimpleQuery = Union[str, SupportsInt, float]
|
||||
QueryVariable = Union[SimpleQuery, Sequence[SimpleQuery]]
|
||||
Query = Union[
|
||||
None, str, Mapping[str, QueryVariable], Sequence[tuple[str, QueryVariable]]
|
||||
]
|
||||
|
||||
|
||||
def query_var(v: SimpleQuery) -> str:
|
||||
"""Convert a query variable to a string."""
|
||||
cls = type(v)
|
||||
if cls is int: # Fast path for non-subclassed int
|
||||
return str(v)
|
||||
if isinstance(v, str):
|
||||
return v
|
||||
if isinstance(v, float):
|
||||
if math.isinf(v):
|
||||
raise ValueError("float('inf') is not supported")
|
||||
if math.isnan(v):
|
||||
raise ValueError("float('nan') is not supported")
|
||||
return str(float(v))
|
||||
if cls is not bool and isinstance(v, SupportsInt):
|
||||
return str(int(v))
|
||||
raise TypeError(
|
||||
"Invalid variable type: value "
|
||||
"should be str, int or float, got {!r} "
|
||||
"of type {}".format(v, cls)
|
||||
)
|
||||
|
||||
|
||||
def get_str_query_from_sequence_iterable(
|
||||
items: Iterable[tuple[Union[str, istr], QueryVariable]],
|
||||
) -> str:
|
||||
"""Return a query string from a sequence of (key, value) pairs.
|
||||
|
||||
value is a single value or a sequence of values for the key
|
||||
|
||||
The sequence of values must be a list or tuple.
|
||||
"""
|
||||
quoter = QUERY_PART_QUOTER
|
||||
pairs = [
|
||||
f"{quoter(k)}={quoter(v if type(v) is str else query_var(v))}"
|
||||
for k, val in items
|
||||
for v in (
|
||||
val if type(val) is not str and isinstance(val, (list, tuple)) else (val,)
|
||||
)
|
||||
]
|
||||
return "&".join(pairs)
|
||||
|
||||
|
||||
def get_str_query_from_iterable(
|
||||
items: Iterable[tuple[Union[str, istr], SimpleQuery]],
|
||||
) -> str:
|
||||
"""Return a query string from an iterable.
|
||||
|
||||
The iterable must contain (key, value) pairs.
|
||||
|
||||
The values are not allowed to be sequences, only single values are
|
||||
allowed. For sequences, use `_get_str_query_from_sequence_iterable`.
|
||||
"""
|
||||
quoter = QUERY_PART_QUOTER
|
||||
# A listcomp is used since listcomps are inlined on CPython 3.12+ and
|
||||
# they are a bit faster than a generator expression.
|
||||
pairs = [
|
||||
f"{quoter(k)}={quoter(v if type(v) is str else query_var(v))}" for k, v in items
|
||||
]
|
||||
return "&".join(pairs)
|
||||
|
||||
|
||||
def get_str_query(*args: Any, **kwargs: Any) -> Union[str, None]:
|
||||
"""Return a query string from supported args."""
|
||||
query: Union[str, Mapping[str, QueryVariable], None]
|
||||
if kwargs:
|
||||
if args:
|
||||
msg = "Either kwargs or single query parameter must be present"
|
||||
raise ValueError(msg)
|
||||
query = kwargs
|
||||
elif len(args) == 1:
|
||||
query = args[0]
|
||||
else:
|
||||
raise ValueError("Either kwargs or single query parameter must be present")
|
||||
|
||||
if query is None:
|
||||
return None
|
||||
if not query:
|
||||
return ""
|
||||
if type(query) is dict:
|
||||
return get_str_query_from_sequence_iterable(query.items())
|
||||
if type(query) is str or isinstance(query, str):
|
||||
return QUERY_QUOTER(query)
|
||||
if isinstance(query, Mapping):
|
||||
return get_str_query_from_sequence_iterable(query.items())
|
||||
if isinstance(query, (bytes, bytearray, memoryview)): # type: ignore[unreachable]
|
||||
msg = "Invalid query type: bytes, bytearray and memoryview are forbidden"
|
||||
raise TypeError(msg)
|
||||
if isinstance(query, Sequence):
|
||||
# We don't expect sequence values if we're given a list of pairs
|
||||
# already; only mappings like builtin `dict` which can't have the
|
||||
# same key pointing to multiple values are allowed to use
|
||||
# `_query_seq_pairs`.
|
||||
return get_str_query_from_iterable(query)
|
||||
raise TypeError(
|
||||
"Invalid query type: only str, mapping or "
|
||||
"sequence of (key, value) pairs is allowed"
|
||||
)
|
33
myenv/Lib/site-packages/yarl/_quoters.py
Normal file
33
myenv/Lib/site-packages/yarl/_quoters.py
Normal file
@@ -0,0 +1,33 @@
|
||||
"""Quoting and unquoting utilities for URL parts."""
|
||||
|
||||
from typing import Union
|
||||
from urllib.parse import quote
|
||||
|
||||
from ._quoting import _Quoter, _Unquoter
|
||||
|
||||
QUOTER = _Quoter(requote=False)
|
||||
REQUOTER = _Quoter()
|
||||
PATH_QUOTER = _Quoter(safe="@:", protected="/+", requote=False)
|
||||
PATH_REQUOTER = _Quoter(safe="@:", protected="/+")
|
||||
QUERY_QUOTER = _Quoter(safe="?/:@", protected="=+&;", qs=True, requote=False)
|
||||
QUERY_REQUOTER = _Quoter(safe="?/:@", protected="=+&;", qs=True)
|
||||
QUERY_PART_QUOTER = _Quoter(safe="?/:@", qs=True, requote=False)
|
||||
FRAGMENT_QUOTER = _Quoter(safe="?/:@", requote=False)
|
||||
FRAGMENT_REQUOTER = _Quoter(safe="?/:@")
|
||||
|
||||
UNQUOTER = _Unquoter()
|
||||
PATH_UNQUOTER = _Unquoter(unsafe="+")
|
||||
PATH_SAFE_UNQUOTER = _Unquoter(ignore="/%", unsafe="+")
|
||||
QS_UNQUOTER = _Unquoter(qs=True)
|
||||
UNQUOTER_PLUS = _Unquoter(plus=True) # to match urllib.parse.unquote_plus
|
||||
|
||||
|
||||
def human_quote(s: Union[str, None], unsafe: str) -> Union[str, None]:
|
||||
if not s:
|
||||
return s
|
||||
for c in "%" + unsafe:
|
||||
if c in s:
|
||||
s = s.replace(c, f"%{ord(c):02X}")
|
||||
if s.isprintable():
|
||||
return s
|
||||
return "".join(c if c.isprintable() else quote(c) for c in s)
|
19
myenv/Lib/site-packages/yarl/_quoting.py
Normal file
19
myenv/Lib/site-packages/yarl/_quoting.py
Normal file
@@ -0,0 +1,19 @@
|
||||
import os
|
||||
import sys
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
__all__ = ("_Quoter", "_Unquoter")
|
||||
|
||||
|
||||
NO_EXTENSIONS = bool(os.environ.get("YARL_NO_EXTENSIONS")) # type: bool
|
||||
if sys.implementation.name != "cpython":
|
||||
NO_EXTENSIONS = True
|
||||
|
||||
|
||||
if TYPE_CHECKING or NO_EXTENSIONS:
|
||||
from ._quoting_py import _Quoter, _Unquoter
|
||||
else:
|
||||
try:
|
||||
from ._quoting_c import _Quoter, _Unquoter
|
||||
except ImportError: # pragma: no cover
|
||||
from ._quoting_py import _Quoter, _Unquoter # type: ignore[assignment]
|
BIN
myenv/Lib/site-packages/yarl/_quoting_c.cp313-win_amd64.pyd
Normal file
BIN
myenv/Lib/site-packages/yarl/_quoting_c.cp313-win_amd64.pyd
Normal file
Binary file not shown.
453
myenv/Lib/site-packages/yarl/_quoting_c.pyx
Normal file
453
myenv/Lib/site-packages/yarl/_quoting_c.pyx
Normal file
@@ -0,0 +1,453 @@
|
||||
# cython: language_level=3, freethreading_compatible=True
|
||||
|
||||
from cpython.exc cimport PyErr_NoMemory
|
||||
from cpython.mem cimport PyMem_Free, PyMem_Malloc, PyMem_Realloc
|
||||
from cpython.unicode cimport (
|
||||
PyUnicode_DATA,
|
||||
PyUnicode_DecodeASCII,
|
||||
PyUnicode_DecodeUTF8Stateful,
|
||||
PyUnicode_GET_LENGTH,
|
||||
PyUnicode_KIND,
|
||||
PyUnicode_READ,
|
||||
)
|
||||
from libc.stdint cimport uint8_t, uint64_t
|
||||
from libc.string cimport memcpy, memset
|
||||
|
||||
from string import ascii_letters, digits
|
||||
|
||||
|
||||
cdef str GEN_DELIMS = ":/?#[]@"
|
||||
cdef str SUB_DELIMS_WITHOUT_QS = "!$'()*,"
|
||||
cdef str SUB_DELIMS = SUB_DELIMS_WITHOUT_QS + '+?=;'
|
||||
cdef str RESERVED = GEN_DELIMS + SUB_DELIMS
|
||||
cdef str UNRESERVED = ascii_letters + digits + '-._~'
|
||||
cdef str ALLOWED = UNRESERVED + SUB_DELIMS_WITHOUT_QS
|
||||
cdef str QS = '+&=;'
|
||||
|
||||
DEF BUF_SIZE = 8 * 1024 # 8KiB
|
||||
|
||||
cdef inline Py_UCS4 _to_hex(uint8_t v) noexcept:
|
||||
if v < 10:
|
||||
return <Py_UCS4>(v+0x30) # ord('0') == 0x30
|
||||
else:
|
||||
return <Py_UCS4>(v+0x41-10) # ord('A') == 0x41
|
||||
|
||||
|
||||
cdef inline int _from_hex(Py_UCS4 v) noexcept:
|
||||
if '0' <= v <= '9':
|
||||
return <int>(v) - 0x30 # ord('0') == 0x30
|
||||
elif 'A' <= v <= 'F':
|
||||
return <int>(v) - 0x41 + 10 # ord('A') == 0x41
|
||||
elif 'a' <= v <= 'f':
|
||||
return <int>(v) - 0x61 + 10 # ord('a') == 0x61
|
||||
else:
|
||||
return -1
|
||||
|
||||
|
||||
cdef inline int _is_lower_hex(Py_UCS4 v) noexcept:
|
||||
return 'a' <= v <= 'f'
|
||||
|
||||
|
||||
cdef inline long _restore_ch(Py_UCS4 d1, Py_UCS4 d2):
|
||||
cdef int digit1 = _from_hex(d1)
|
||||
if digit1 < 0:
|
||||
return -1
|
||||
cdef int digit2 = _from_hex(d2)
|
||||
if digit2 < 0:
|
||||
return -1
|
||||
return digit1 << 4 | digit2
|
||||
|
||||
|
||||
cdef uint8_t ALLOWED_TABLE[16]
|
||||
cdef uint8_t ALLOWED_NOTQS_TABLE[16]
|
||||
|
||||
|
||||
cdef inline bint bit_at(uint8_t array[], uint64_t ch) noexcept:
|
||||
return array[ch >> 3] & (1 << (ch & 7))
|
||||
|
||||
|
||||
cdef inline void set_bit(uint8_t array[], uint64_t ch) noexcept:
|
||||
array[ch >> 3] |= (1 << (ch & 7))
|
||||
|
||||
|
||||
memset(ALLOWED_TABLE, 0, sizeof(ALLOWED_TABLE))
|
||||
memset(ALLOWED_NOTQS_TABLE, 0, sizeof(ALLOWED_NOTQS_TABLE))
|
||||
|
||||
for i in range(128):
|
||||
if chr(i) in ALLOWED:
|
||||
set_bit(ALLOWED_TABLE, i)
|
||||
set_bit(ALLOWED_NOTQS_TABLE, i)
|
||||
if chr(i) in QS:
|
||||
set_bit(ALLOWED_NOTQS_TABLE, i)
|
||||
|
||||
# ----------------- writer ---------------------------
|
||||
|
||||
cdef struct Writer:
|
||||
char *buf
|
||||
bint heap_allocated_buf
|
||||
Py_ssize_t size
|
||||
Py_ssize_t pos
|
||||
bint changed
|
||||
|
||||
|
||||
cdef inline void _init_writer(Writer* writer, char* buf):
|
||||
writer.buf = buf
|
||||
writer.heap_allocated_buf = False
|
||||
writer.size = BUF_SIZE
|
||||
writer.pos = 0
|
||||
writer.changed = 0
|
||||
|
||||
|
||||
cdef inline void _release_writer(Writer* writer):
|
||||
if writer.heap_allocated_buf:
|
||||
PyMem_Free(writer.buf)
|
||||
|
||||
|
||||
cdef inline int _write_char(Writer* writer, Py_UCS4 ch, bint changed):
|
||||
cdef char * buf
|
||||
cdef Py_ssize_t size
|
||||
|
||||
if writer.pos == writer.size:
|
||||
# reallocate
|
||||
size = writer.size + BUF_SIZE
|
||||
if not writer.heap_allocated_buf:
|
||||
buf = <char*>PyMem_Malloc(size)
|
||||
if buf == NULL:
|
||||
PyErr_NoMemory()
|
||||
return -1
|
||||
memcpy(buf, writer.buf, writer.size)
|
||||
writer.heap_allocated_buf = True
|
||||
else:
|
||||
buf = <char*>PyMem_Realloc(writer.buf, size)
|
||||
if buf == NULL:
|
||||
PyErr_NoMemory()
|
||||
return -1
|
||||
writer.buf = buf
|
||||
writer.size = size
|
||||
writer.buf[writer.pos] = <char>ch
|
||||
writer.pos += 1
|
||||
writer.changed |= changed
|
||||
return 0
|
||||
|
||||
|
||||
cdef inline int _write_pct(Writer* writer, uint8_t ch, bint changed):
|
||||
if _write_char(writer, '%', changed) < 0:
|
||||
return -1
|
||||
if _write_char(writer, _to_hex(<uint8_t>ch >> 4), changed) < 0:
|
||||
return -1
|
||||
return _write_char(writer, _to_hex(<uint8_t>ch & 0x0f), changed)
|
||||
|
||||
|
||||
cdef inline int _write_utf8(Writer* writer, Py_UCS4 symbol):
|
||||
cdef uint64_t utf = <uint64_t> symbol
|
||||
|
||||
if utf < 0x80:
|
||||
return _write_pct(writer, <uint8_t>utf, True)
|
||||
elif utf < 0x800:
|
||||
if _write_pct(writer, <uint8_t>(0xc0 | (utf >> 6)), True) < 0:
|
||||
return -1
|
||||
return _write_pct(writer, <uint8_t>(0x80 | (utf & 0x3f)), True)
|
||||
elif 0xD800 <= utf <= 0xDFFF:
|
||||
# surogate pair, ignored
|
||||
return 0
|
||||
elif utf < 0x10000:
|
||||
if _write_pct(writer, <uint8_t>(0xe0 | (utf >> 12)), True) < 0:
|
||||
return -1
|
||||
if _write_pct(writer, <uint8_t>(0x80 | ((utf >> 6) & 0x3f)),
|
||||
True) < 0:
|
||||
return -1
|
||||
return _write_pct(writer, <uint8_t>(0x80 | (utf & 0x3f)), True)
|
||||
elif utf > 0x10FFFF:
|
||||
# symbol is too large
|
||||
return 0
|
||||
else:
|
||||
if _write_pct(writer, <uint8_t>(0xf0 | (utf >> 18)), True) < 0:
|
||||
return -1
|
||||
if _write_pct(writer, <uint8_t>(0x80 | ((utf >> 12) & 0x3f)),
|
||||
True) < 0:
|
||||
return -1
|
||||
if _write_pct(writer, <uint8_t>(0x80 | ((utf >> 6) & 0x3f)),
|
||||
True) < 0:
|
||||
return -1
|
||||
return _write_pct(writer, <uint8_t>(0x80 | (utf & 0x3f)), True)
|
||||
|
||||
|
||||
# --------------------- end writer --------------------------
|
||||
|
||||
|
||||
cdef class _Quoter:
|
||||
cdef bint _qs
|
||||
cdef bint _requote
|
||||
|
||||
cdef uint8_t _safe_table[16]
|
||||
cdef uint8_t _protected_table[16]
|
||||
|
||||
def __init__(
|
||||
self, *, str safe='', str protected='', bint qs=False, bint requote=True,
|
||||
):
|
||||
cdef Py_UCS4 ch
|
||||
|
||||
self._qs = qs
|
||||
self._requote = requote
|
||||
|
||||
if not self._qs:
|
||||
memcpy(self._safe_table,
|
||||
ALLOWED_NOTQS_TABLE,
|
||||
sizeof(self._safe_table))
|
||||
else:
|
||||
memcpy(self._safe_table,
|
||||
ALLOWED_TABLE,
|
||||
sizeof(self._safe_table))
|
||||
for ch in safe:
|
||||
if ord(ch) > 127:
|
||||
raise ValueError("Only safe symbols with ORD < 128 are allowed")
|
||||
set_bit(self._safe_table, ch)
|
||||
|
||||
memset(self._protected_table, 0, sizeof(self._protected_table))
|
||||
for ch in protected:
|
||||
if ord(ch) > 127:
|
||||
raise ValueError("Only safe symbols with ORD < 128 are allowed")
|
||||
set_bit(self._safe_table, ch)
|
||||
set_bit(self._protected_table, ch)
|
||||
|
||||
def __call__(self, val):
|
||||
if val is None:
|
||||
return None
|
||||
if type(val) is not str:
|
||||
if isinstance(val, str):
|
||||
# derived from str
|
||||
val = str(val)
|
||||
else:
|
||||
raise TypeError("Argument should be str")
|
||||
return self._do_quote_or_skip(<str>val)
|
||||
|
||||
cdef str _do_quote_or_skip(self, str val):
|
||||
cdef char[BUF_SIZE] buffer
|
||||
cdef Py_UCS4 ch
|
||||
cdef Py_ssize_t length = PyUnicode_GET_LENGTH(val)
|
||||
cdef Py_ssize_t idx = length
|
||||
cdef bint must_quote = 0
|
||||
cdef Writer writer
|
||||
cdef int kind = PyUnicode_KIND(val)
|
||||
cdef const void *data = PyUnicode_DATA(val)
|
||||
|
||||
# If everything in the string is in the safe
|
||||
# table and all ASCII, we can skip quoting
|
||||
while idx:
|
||||
idx -= 1
|
||||
ch = PyUnicode_READ(kind, data, idx)
|
||||
if ch >= 128 or not bit_at(self._safe_table, ch):
|
||||
must_quote = 1
|
||||
break
|
||||
|
||||
if not must_quote:
|
||||
return val
|
||||
|
||||
_init_writer(&writer, &buffer[0])
|
||||
try:
|
||||
return self._do_quote(<str>val, length, kind, data, &writer)
|
||||
finally:
|
||||
_release_writer(&writer)
|
||||
|
||||
cdef str _do_quote(
|
||||
self,
|
||||
str val,
|
||||
Py_ssize_t length,
|
||||
int kind,
|
||||
const void *data,
|
||||
Writer *writer
|
||||
):
|
||||
cdef Py_UCS4 ch
|
||||
cdef long chl
|
||||
cdef int changed
|
||||
cdef Py_ssize_t idx = 0
|
||||
|
||||
while idx < length:
|
||||
ch = PyUnicode_READ(kind, data, idx)
|
||||
idx += 1
|
||||
if ch == '%' and self._requote and idx <= length - 2:
|
||||
chl = _restore_ch(
|
||||
PyUnicode_READ(kind, data, idx),
|
||||
PyUnicode_READ(kind, data, idx + 1)
|
||||
)
|
||||
if chl != -1:
|
||||
ch = <Py_UCS4>chl
|
||||
idx += 2
|
||||
if ch < 128:
|
||||
if bit_at(self._protected_table, ch):
|
||||
if _write_pct(writer, ch, True) < 0:
|
||||
raise
|
||||
continue
|
||||
|
||||
if bit_at(self._safe_table, ch):
|
||||
if _write_char(writer, ch, True) < 0:
|
||||
raise
|
||||
continue
|
||||
|
||||
changed = (_is_lower_hex(PyUnicode_READ(kind, data, idx - 2)) or
|
||||
_is_lower_hex(PyUnicode_READ(kind, data, idx - 1)))
|
||||
if _write_pct(writer, ch, changed) < 0:
|
||||
raise
|
||||
continue
|
||||
else:
|
||||
ch = '%'
|
||||
|
||||
if self._write(writer, ch) < 0:
|
||||
raise
|
||||
|
||||
if not writer.changed:
|
||||
return val
|
||||
else:
|
||||
return PyUnicode_DecodeASCII(writer.buf, writer.pos, "strict")
|
||||
|
||||
cdef inline int _write(self, Writer *writer, Py_UCS4 ch):
|
||||
if self._qs:
|
||||
if ch == ' ':
|
||||
return _write_char(writer, '+', True)
|
||||
|
||||
if ch < 128 and bit_at(self._safe_table, ch):
|
||||
return _write_char(writer, ch, False)
|
||||
|
||||
return _write_utf8(writer, ch)
|
||||
|
||||
|
||||
cdef class _Unquoter:
|
||||
cdef str _ignore
|
||||
cdef bint _has_ignore
|
||||
cdef str _unsafe
|
||||
cdef bytes _unsafe_bytes
|
||||
cdef Py_ssize_t _unsafe_bytes_len
|
||||
cdef const unsigned char * _unsafe_bytes_char
|
||||
cdef bint _qs
|
||||
cdef bint _plus # to match urllib.parse.unquote_plus
|
||||
cdef _Quoter _quoter
|
||||
cdef _Quoter _qs_quoter
|
||||
|
||||
def __init__(self, *, ignore="", unsafe="", qs=False, plus=False):
|
||||
self._ignore = ignore
|
||||
self._has_ignore = bool(self._ignore)
|
||||
self._unsafe = unsafe
|
||||
# unsafe may only be extended ascii characters (0-255)
|
||||
self._unsafe_bytes = self._unsafe.encode('ascii')
|
||||
self._unsafe_bytes_len = len(self._unsafe_bytes)
|
||||
self._unsafe_bytes_char = self._unsafe_bytes
|
||||
self._qs = qs
|
||||
self._plus = plus
|
||||
self._quoter = _Quoter()
|
||||
self._qs_quoter = _Quoter(qs=True)
|
||||
|
||||
def __call__(self, val):
|
||||
if val is None:
|
||||
return None
|
||||
if type(val) is not str:
|
||||
if isinstance(val, str):
|
||||
# derived from str
|
||||
val = str(val)
|
||||
else:
|
||||
raise TypeError("Argument should be str")
|
||||
return self._do_unquote(<str>val)
|
||||
|
||||
cdef str _do_unquote(self, str val):
|
||||
cdef Py_ssize_t length = PyUnicode_GET_LENGTH(val)
|
||||
if length == 0:
|
||||
return val
|
||||
|
||||
cdef list ret = []
|
||||
cdef char buffer[4]
|
||||
cdef Py_ssize_t buflen = 0
|
||||
cdef Py_ssize_t consumed
|
||||
cdef str unquoted
|
||||
cdef Py_UCS4 ch = 0
|
||||
cdef long chl = 0
|
||||
cdef Py_ssize_t idx = 0
|
||||
cdef Py_ssize_t start_pct
|
||||
cdef int kind = PyUnicode_KIND(val)
|
||||
cdef const void *data = PyUnicode_DATA(val)
|
||||
cdef bint changed = 0
|
||||
while idx < length:
|
||||
ch = PyUnicode_READ(kind, data, idx)
|
||||
idx += 1
|
||||
if ch == '%' and idx <= length - 2:
|
||||
changed = 1
|
||||
chl = _restore_ch(
|
||||
PyUnicode_READ(kind, data, idx),
|
||||
PyUnicode_READ(kind, data, idx + 1)
|
||||
)
|
||||
if chl != -1:
|
||||
ch = <Py_UCS4>chl
|
||||
idx += 2
|
||||
assert buflen < 4
|
||||
buffer[buflen] = ch
|
||||
buflen += 1
|
||||
try:
|
||||
unquoted = PyUnicode_DecodeUTF8Stateful(buffer, buflen,
|
||||
NULL, &consumed)
|
||||
except UnicodeDecodeError:
|
||||
start_pct = idx - buflen * 3
|
||||
buffer[0] = ch
|
||||
buflen = 1
|
||||
ret.append(val[start_pct : idx - 3])
|
||||
try:
|
||||
unquoted = PyUnicode_DecodeUTF8Stateful(buffer, buflen,
|
||||
NULL, &consumed)
|
||||
except UnicodeDecodeError:
|
||||
buflen = 0
|
||||
ret.append(val[idx - 3 : idx])
|
||||
continue
|
||||
if not unquoted:
|
||||
assert consumed == 0
|
||||
continue
|
||||
assert consumed == buflen
|
||||
buflen = 0
|
||||
if self._qs and unquoted in '+=&;':
|
||||
ret.append(self._qs_quoter(unquoted))
|
||||
elif (
|
||||
(self._unsafe_bytes_len and unquoted in self._unsafe) or
|
||||
(self._has_ignore and unquoted in self._ignore)
|
||||
):
|
||||
ret.append(self._quoter(unquoted))
|
||||
else:
|
||||
ret.append(unquoted)
|
||||
continue
|
||||
else:
|
||||
ch = '%'
|
||||
|
||||
if buflen:
|
||||
start_pct = idx - 1 - buflen * 3
|
||||
ret.append(val[start_pct : idx - 1])
|
||||
buflen = 0
|
||||
|
||||
if ch == '+':
|
||||
if (
|
||||
(not self._qs and not self._plus) or
|
||||
(self._unsafe_bytes_len and self._is_char_unsafe(ch))
|
||||
):
|
||||
ret.append('+')
|
||||
else:
|
||||
changed = 1
|
||||
ret.append(' ')
|
||||
continue
|
||||
|
||||
if self._unsafe_bytes_len and self._is_char_unsafe(ch):
|
||||
changed = 1
|
||||
ret.append('%')
|
||||
h = hex(ord(ch)).upper()[2:]
|
||||
for ch in h:
|
||||
ret.append(ch)
|
||||
continue
|
||||
|
||||
ret.append(ch)
|
||||
|
||||
if not changed:
|
||||
return val
|
||||
|
||||
if buflen:
|
||||
ret.append(val[length - buflen * 3 : length])
|
||||
|
||||
return ''.join(ret)
|
||||
|
||||
cdef inline bint _is_char_unsafe(self, Py_UCS4 ch):
|
||||
for i in range(self._unsafe_bytes_len):
|
||||
if ch == self._unsafe_bytes_char[i]:
|
||||
return True
|
||||
return False
|
213
myenv/Lib/site-packages/yarl/_quoting_py.py
Normal file
213
myenv/Lib/site-packages/yarl/_quoting_py.py
Normal file
@@ -0,0 +1,213 @@
|
||||
import codecs
|
||||
import re
|
||||
from string import ascii_letters, ascii_lowercase, digits
|
||||
from typing import Union, cast, overload
|
||||
|
||||
BASCII_LOWERCASE = ascii_lowercase.encode("ascii")
|
||||
BPCT_ALLOWED = {f"%{i:02X}".encode("ascii") for i in range(256)}
|
||||
GEN_DELIMS = ":/?#[]@"
|
||||
SUB_DELIMS_WITHOUT_QS = "!$'()*,"
|
||||
SUB_DELIMS = SUB_DELIMS_WITHOUT_QS + "+&=;"
|
||||
RESERVED = GEN_DELIMS + SUB_DELIMS
|
||||
UNRESERVED = ascii_letters + digits + "-._~"
|
||||
ALLOWED = UNRESERVED + SUB_DELIMS_WITHOUT_QS
|
||||
|
||||
|
||||
_IS_HEX = re.compile(b"[A-Z0-9][A-Z0-9]")
|
||||
_IS_HEX_STR = re.compile("[A-Fa-f0-9][A-Fa-f0-9]")
|
||||
|
||||
utf8_decoder = codecs.getincrementaldecoder("utf-8")
|
||||
|
||||
|
||||
class _Quoter:
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
safe: str = "",
|
||||
protected: str = "",
|
||||
qs: bool = False,
|
||||
requote: bool = True,
|
||||
) -> None:
|
||||
self._safe = safe
|
||||
self._protected = protected
|
||||
self._qs = qs
|
||||
self._requote = requote
|
||||
|
||||
@overload
|
||||
def __call__(self, val: str) -> str: ...
|
||||
@overload
|
||||
def __call__(self, val: None) -> None: ...
|
||||
def __call__(self, val: Union[str, None]) -> Union[str, None]:
|
||||
if val is None:
|
||||
return None
|
||||
if not isinstance(val, str):
|
||||
raise TypeError("Argument should be str")
|
||||
if not val:
|
||||
return ""
|
||||
bval = val.encode("utf8", errors="ignore")
|
||||
ret = bytearray()
|
||||
pct = bytearray()
|
||||
safe = self._safe
|
||||
safe += ALLOWED
|
||||
if not self._qs:
|
||||
safe += "+&=;"
|
||||
safe += self._protected
|
||||
bsafe = safe.encode("ascii")
|
||||
idx = 0
|
||||
while idx < len(bval):
|
||||
ch = bval[idx]
|
||||
idx += 1
|
||||
|
||||
if pct:
|
||||
if ch in BASCII_LOWERCASE:
|
||||
ch = ch - 32 # convert to uppercase
|
||||
pct.append(ch)
|
||||
if len(pct) == 3: # pragma: no branch # peephole optimizer
|
||||
buf = pct[1:]
|
||||
if not _IS_HEX.match(buf):
|
||||
ret.extend(b"%25")
|
||||
pct.clear()
|
||||
idx -= 2
|
||||
continue
|
||||
try:
|
||||
unquoted = chr(int(pct[1:].decode("ascii"), base=16))
|
||||
except ValueError:
|
||||
ret.extend(b"%25")
|
||||
pct.clear()
|
||||
idx -= 2
|
||||
continue
|
||||
|
||||
if unquoted in self._protected:
|
||||
ret.extend(pct)
|
||||
elif unquoted in safe:
|
||||
ret.append(ord(unquoted))
|
||||
else:
|
||||
ret.extend(pct)
|
||||
pct.clear()
|
||||
|
||||
# special case, if we have only one char after "%"
|
||||
elif len(pct) == 2 and idx == len(bval):
|
||||
ret.extend(b"%25")
|
||||
pct.clear()
|
||||
idx -= 1
|
||||
|
||||
continue
|
||||
|
||||
elif ch == ord("%") and self._requote:
|
||||
pct.clear()
|
||||
pct.append(ch)
|
||||
|
||||
# special case if "%" is last char
|
||||
if idx == len(bval):
|
||||
ret.extend(b"%25")
|
||||
|
||||
continue
|
||||
|
||||
if self._qs and ch == ord(" "):
|
||||
ret.append(ord("+"))
|
||||
continue
|
||||
if ch in bsafe:
|
||||
ret.append(ch)
|
||||
continue
|
||||
|
||||
ret.extend((f"%{ch:02X}").encode("ascii"))
|
||||
|
||||
ret2 = ret.decode("ascii")
|
||||
if ret2 == val:
|
||||
return val
|
||||
return ret2
|
||||
|
||||
|
||||
class _Unquoter:
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
ignore: str = "",
|
||||
unsafe: str = "",
|
||||
qs: bool = False,
|
||||
plus: bool = False,
|
||||
) -> None:
|
||||
self._ignore = ignore
|
||||
self._unsafe = unsafe
|
||||
self._qs = qs
|
||||
self._plus = plus # to match urllib.parse.unquote_plus
|
||||
self._quoter = _Quoter()
|
||||
self._qs_quoter = _Quoter(qs=True)
|
||||
|
||||
@overload
|
||||
def __call__(self, val: str) -> str: ...
|
||||
@overload
|
||||
def __call__(self, val: None) -> None: ...
|
||||
def __call__(self, val: Union[str, None]) -> Union[str, None]:
|
||||
if val is None:
|
||||
return None
|
||||
if not isinstance(val, str):
|
||||
raise TypeError("Argument should be str")
|
||||
if not val:
|
||||
return ""
|
||||
decoder = cast(codecs.BufferedIncrementalDecoder, utf8_decoder())
|
||||
ret = []
|
||||
idx = 0
|
||||
while idx < len(val):
|
||||
ch = val[idx]
|
||||
idx += 1
|
||||
if ch == "%" and idx <= len(val) - 2:
|
||||
pct = val[idx : idx + 2]
|
||||
if _IS_HEX_STR.fullmatch(pct):
|
||||
b = bytes([int(pct, base=16)])
|
||||
idx += 2
|
||||
try:
|
||||
unquoted = decoder.decode(b)
|
||||
except UnicodeDecodeError:
|
||||
start_pct = idx - 3 - len(decoder.buffer) * 3
|
||||
ret.append(val[start_pct : idx - 3])
|
||||
decoder.reset()
|
||||
try:
|
||||
unquoted = decoder.decode(b)
|
||||
except UnicodeDecodeError:
|
||||
ret.append(val[idx - 3 : idx])
|
||||
continue
|
||||
if not unquoted:
|
||||
continue
|
||||
if self._qs and unquoted in "+=&;":
|
||||
to_add = self._qs_quoter(unquoted)
|
||||
if to_add is None: # pragma: no cover
|
||||
raise RuntimeError("Cannot quote None")
|
||||
ret.append(to_add)
|
||||
elif unquoted in self._unsafe or unquoted in self._ignore:
|
||||
to_add = self._quoter(unquoted)
|
||||
if to_add is None: # pragma: no cover
|
||||
raise RuntimeError("Cannot quote None")
|
||||
ret.append(to_add)
|
||||
else:
|
||||
ret.append(unquoted)
|
||||
continue
|
||||
|
||||
if decoder.buffer:
|
||||
start_pct = idx - 1 - len(decoder.buffer) * 3
|
||||
ret.append(val[start_pct : idx - 1])
|
||||
decoder.reset()
|
||||
|
||||
if ch == "+":
|
||||
if (not self._qs and not self._plus) or ch in self._unsafe:
|
||||
ret.append("+")
|
||||
else:
|
||||
ret.append(" ")
|
||||
continue
|
||||
|
||||
if ch in self._unsafe:
|
||||
ret.append("%")
|
||||
h = hex(ord(ch)).upper()[2:]
|
||||
for ch in h:
|
||||
ret.append(ch)
|
||||
continue
|
||||
|
||||
ret.append(ch)
|
||||
|
||||
if decoder.buffer:
|
||||
ret.append(val[-len(decoder.buffer) * 3 :])
|
||||
|
||||
ret2 = "".join(ret)
|
||||
if ret2 == val:
|
||||
return val
|
||||
return ret2
|
1604
myenv/Lib/site-packages/yarl/_url.py
Normal file
1604
myenv/Lib/site-packages/yarl/_url.py
Normal file
File diff suppressed because it is too large
Load Diff
1
myenv/Lib/site-packages/yarl/py.typed
Normal file
1
myenv/Lib/site-packages/yarl/py.typed
Normal file
@@ -0,0 +1 @@
|
||||
# Placeholder
|
Reference in New Issue
Block a user