Skip to content

Make regex methods accept all buffer types as inputs #7158

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from 27 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
3344316
Make `mmap.mmap` extend `bytearray`
itaisteinherz Feb 7, 2022
6039732
Extend `ReadableBuffer` instead of `bytearray`
itaisteinherz Feb 8, 2022
74aa464
Fixup
itaisteinherz Feb 8, 2022
8666c61
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 8, 2022
fdd90cd
Change `AnyStr` to include `ByteString` instead of `bytes`
itaisteinherz Feb 8, 2022
634c867
Merge branch 'feature/mmap-bytearray' of https://github.com/itaistein…
itaisteinherz Feb 8, 2022
efe5ee0
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 8, 2022
6978ce6
Revert unwanted changes
itaisteinherz Feb 14, 2022
24e2a34
Finish reverting
itaisteinherz Feb 14, 2022
b9b1176
Fix PoC
itaisteinherz Feb 14, 2022
2ded7aa
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 14, 2022
eb0e953
Attempt to create custom type for regex strings
itaisteinherz Feb 14, 2022
38f5996
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 14, 2022
1a56af6
Fixup
itaisteinherz Feb 14, 2022
e26ef4e
Fix regex string typevar
itaisteinherz Feb 14, 2022
884af92
Update `Match` to use `RegexString`
itaisteinherz Feb 14, 2022
c9af022
Fix extended typevar
itaisteinherz Feb 14, 2022
6e320ae
Attempt to fix
itaisteinherz Feb 14, 2022
d3094f9
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 14, 2022
d775b51
Remove unused imports
itaisteinherz Feb 14, 2022
d97f24a
Merge branch 'feature/mmap-bytearray' of https://github.com/itaistein…
itaisteinherz Feb 14, 2022
c63f691
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 14, 2022
ee5573a
Fix linting
itaisteinherz Feb 14, 2022
6d470e2
Merge branch 'feature/mmap-bytearray' of https://github.com/itaistein…
itaisteinherz Feb 14, 2022
85a5004
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 14, 2022
46af759
Ignore linting error
itaisteinherz Feb 14, 2022
d9e68ac
Fixup old changes
itaisteinherz Feb 14, 2022
0fe7ca3
Revert last changes
itaisteinherz Feb 18, 2022
c4f23f7
Add second typevar argument to `Match`
itaisteinherz Feb 19, 2022
e26ab0c
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 19, 2022
a410b16
`re.match` with `mmap.mmap` PoC
itaisteinherz Feb 19, 2022
7314338
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 19, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions stdlib/_typeshed/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,8 @@ WriteableBuffer = Union[bytearray, memoryview, array.array[Any], mmap.mmap, ctyp
# Same as _WriteableBuffer, but also includes read-only buffer types (like bytes).
ReadableBuffer = Union[ReadOnlyBuffer, WriteableBuffer] # stable

StrOrBuffer = TypeVar("StrOrBuffer", str, ReadOnlyBuffer, WriteableBuffer) # noqa Y001

# stable
if sys.version_info >= (3, 10):
from types import NoneType as NoneType
Expand Down
85 changes: 55 additions & 30 deletions stdlib/re.pyi
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import enum
import sys
from _typeshed import StrOrBuffer
from sre_constants import error as error
from typing import Any, AnyStr, Callable, Iterator, Union, overload
from typing import Any, Callable, Iterator, Union, overload

# ----- re variables and constants -----
if sys.version_info >= (3, 7):
Expand Down Expand Up @@ -52,66 +53,90 @@ if sys.version_info < (3, 7):
_pattern_type: type

@overload
def compile(pattern: AnyStr, flags: _FlagsType = ...) -> Pattern[AnyStr]: ...
def compile(pattern: StrOrBuffer, flags: _FlagsType = ...) -> Pattern[StrOrBuffer]: ...
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

re.compile actually works only on str or bytes

@overload
def compile(pattern: Pattern[AnyStr], flags: _FlagsType = ...) -> Pattern[AnyStr]: ...
def compile(pattern: Pattern[StrOrBuffer], flags: _FlagsType = ...) -> Pattern[StrOrBuffer]: ...
@overload
def search(pattern: AnyStr, string: AnyStr, flags: _FlagsType = ...) -> Match[AnyStr] | None: ...
def search(pattern: StrOrBuffer, string: StrOrBuffer, flags: _FlagsType = ...) -> Match[StrOrBuffer] | None: ...
@overload
def search(pattern: Pattern[AnyStr], string: AnyStr, flags: _FlagsType = ...) -> Match[AnyStr] | None: ...
def search(pattern: Pattern[StrOrBuffer], string: StrOrBuffer, flags: _FlagsType = ...) -> Match[StrOrBuffer] | None: ...
@overload
def match(pattern: AnyStr, string: AnyStr, flags: _FlagsType = ...) -> Match[AnyStr] | None: ...
def match(pattern: StrOrBuffer, string: StrOrBuffer, flags: _FlagsType = ...) -> Match[StrOrBuffer] | None: ...
@overload
def match(pattern: Pattern[AnyStr], string: AnyStr, flags: _FlagsType = ...) -> Match[AnyStr] | None: ...
def match(pattern: Pattern[StrOrBuffer], string: StrOrBuffer, flags: _FlagsType = ...) -> Match[StrOrBuffer] | None: ...

# New in Python 3.4
@overload
def fullmatch(pattern: AnyStr, string: AnyStr, flags: _FlagsType = ...) -> Match[AnyStr] | None: ...
def fullmatch(pattern: StrOrBuffer, string: StrOrBuffer, flags: _FlagsType = ...) -> Match[StrOrBuffer] | None: ...
@overload
def fullmatch(pattern: Pattern[AnyStr], string: AnyStr, flags: _FlagsType = ...) -> Match[AnyStr] | None: ...
def fullmatch(pattern: Pattern[StrOrBuffer], string: StrOrBuffer, flags: _FlagsType = ...) -> Match[StrOrBuffer] | None: ...
@overload
def split(pattern: AnyStr, string: AnyStr, maxsplit: int = ..., flags: _FlagsType = ...) -> list[AnyStr | Any]: ...
def split(pattern: StrOrBuffer, string: StrOrBuffer, maxsplit: int = ..., flags: _FlagsType = ...) -> list[StrOrBuffer | Any]: ...
@overload
def split(pattern: Pattern[AnyStr], string: AnyStr, maxsplit: int = ..., flags: _FlagsType = ...) -> list[AnyStr | Any]: ...
def split(
pattern: Pattern[StrOrBuffer], string: StrOrBuffer, maxsplit: int = ..., flags: _FlagsType = ...
) -> list[StrOrBuffer | Any]: ...
@overload
def findall(pattern: AnyStr, string: AnyStr, flags: _FlagsType = ...) -> list[Any]: ...
def findall(pattern: StrOrBuffer, string: StrOrBuffer, flags: _FlagsType = ...) -> list[Any]: ...
@overload
def findall(pattern: Pattern[AnyStr], string: AnyStr, flags: _FlagsType = ...) -> list[Any]: ...
def findall(pattern: Pattern[StrOrBuffer], string: StrOrBuffer, flags: _FlagsType = ...) -> list[Any]: ...

# Return an iterator yielding match objects over all non-overlapping matches
# for the RE pattern in string. The string is scanned left-to-right, and
# matches are returned in the order found. Empty matches are included in the
# result unless they touch the beginning of another match.
@overload
def finditer(pattern: AnyStr, string: AnyStr, flags: _FlagsType = ...) -> Iterator[Match[AnyStr]]: ...
def finditer(pattern: StrOrBuffer, string: StrOrBuffer, flags: _FlagsType = ...) -> Iterator[Match[StrOrBuffer]]: ...
@overload
def finditer(pattern: Pattern[AnyStr], string: AnyStr, flags: _FlagsType = ...) -> Iterator[Match[AnyStr]]: ...
def finditer(pattern: Pattern[StrOrBuffer], string: StrOrBuffer, flags: _FlagsType = ...) -> Iterator[Match[StrOrBuffer]]: ...
@overload
def sub(pattern: AnyStr, repl: AnyStr, string: AnyStr, count: int = ..., flags: _FlagsType = ...) -> AnyStr: ...
def sub(
pattern: StrOrBuffer, repl: StrOrBuffer, string: StrOrBuffer, count: int = ..., flags: _FlagsType = ...
) -> StrOrBuffer: ...
@overload
def sub(
pattern: AnyStr, repl: Callable[[Match[AnyStr]], AnyStr], string: AnyStr, count: int = ..., flags: _FlagsType = ...
) -> AnyStr: ...
pattern: StrOrBuffer,
repl: Callable[[Match[StrOrBuffer]], StrOrBuffer],
string: StrOrBuffer,
count: int = ...,
flags: _FlagsType = ...,
) -> StrOrBuffer: ...
@overload
def sub(pattern: Pattern[AnyStr], repl: AnyStr, string: AnyStr, count: int = ..., flags: _FlagsType = ...) -> AnyStr: ...
def sub(
pattern: Pattern[StrOrBuffer], repl: StrOrBuffer, string: StrOrBuffer, count: int = ..., flags: _FlagsType = ...
) -> StrOrBuffer: ...
@overload
def sub(
pattern: Pattern[AnyStr], repl: Callable[[Match[AnyStr]], AnyStr], string: AnyStr, count: int = ..., flags: _FlagsType = ...
) -> AnyStr: ...
pattern: Pattern[StrOrBuffer],
repl: Callable[[Match[StrOrBuffer]], StrOrBuffer],
string: StrOrBuffer,
count: int = ...,
flags: _FlagsType = ...,
) -> StrOrBuffer: ...
@overload
def subn(pattern: AnyStr, repl: AnyStr, string: AnyStr, count: int = ..., flags: _FlagsType = ...) -> tuple[AnyStr, int]: ...
def subn(
pattern: StrOrBuffer, repl: StrOrBuffer, string: StrOrBuffer, count: int = ..., flags: _FlagsType = ...
) -> tuple[StrOrBuffer, int]: ...
@overload
def subn(
pattern: AnyStr, repl: Callable[[Match[AnyStr]], AnyStr], string: AnyStr, count: int = ..., flags: _FlagsType = ...
) -> tuple[AnyStr, int]: ...
pattern: StrOrBuffer,
repl: Callable[[Match[StrOrBuffer]], StrOrBuffer],
string: StrOrBuffer,
count: int = ...,
flags: _FlagsType = ...,
) -> tuple[StrOrBuffer, int]: ...
@overload
def subn(
pattern: Pattern[AnyStr], repl: AnyStr, string: AnyStr, count: int = ..., flags: _FlagsType = ...
) -> tuple[AnyStr, int]: ...
pattern: Pattern[StrOrBuffer], repl: StrOrBuffer, string: StrOrBuffer, count: int = ..., flags: _FlagsType = ...
) -> tuple[StrOrBuffer, int]: ...
@overload
def subn(
pattern: Pattern[AnyStr], repl: Callable[[Match[AnyStr]], AnyStr], string: AnyStr, count: int = ..., flags: _FlagsType = ...
) -> tuple[AnyStr, int]: ...
def escape(pattern: AnyStr) -> AnyStr: ...
pattern: Pattern[StrOrBuffer],
repl: Callable[[Match[StrOrBuffer]], StrOrBuffer],
string: StrOrBuffer,
count: int = ...,
flags: _FlagsType = ...,
) -> tuple[StrOrBuffer, int]: ...
def escape(pattern: StrOrBuffer) -> StrOrBuffer: ...
def purge() -> None: ...
def template(pattern: AnyStr | Pattern[AnyStr], flags: _FlagsType = ...) -> Pattern[AnyStr]: ...
def template(pattern: StrOrBuffer | Pattern[StrOrBuffer], flags: _FlagsType = ...) -> Pattern[StrOrBuffer]: ...
66 changes: 34 additions & 32 deletions stdlib/typing.pyi
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import collections # Needed by aliases like DefaultDict, see mypy issue 2986
import sys
from _typeshed import Self, SupportsKeysAndGetItem
from _typeshed import Self, StrOrBuffer, SupportsKeysAndGetItem
from abc import ABCMeta, abstractmethod
from types import BuiltinFunctionType, CodeType, FrameType, FunctionType, MethodType, ModuleType, TracebackType
from typing_extensions import Literal as _Literal, ParamSpec as _ParamSpec, final as _final
Expand Down Expand Up @@ -587,69 +587,71 @@ class TextIO(IO[str]):
class ByteString(Sequence[int], metaclass=ABCMeta): ...

@_final
class Match(Generic[AnyStr]):
class Match(Generic[StrOrBuffer]):
pos: int
endpos: int
lastindex: int | None
lastgroup: str | None
string: AnyStr
string: StrOrBuffer

# The regular expression object whose match() or search() method produced
# this match instance.
re: Pattern[AnyStr]
def expand(self, template: AnyStr) -> AnyStr: ...
# group() returns "AnyStr" or "AnyStr | None", depending on the pattern.
re: Pattern[StrOrBuffer]
def expand(self, template: StrOrBuffer) -> StrOrBuffer: ...
# group() returns "StrOrBuffer" or "StrOrBuffer | None", depending on the pattern.
@overload
def group(self, __group: _Literal[0] = ...) -> AnyStr: ...
def group(self, __group: _Literal[0] = ...) -> StrOrBuffer: ...
@overload
def group(self, __group: str | int) -> AnyStr | Any: ...
def group(self, __group: str | int) -> StrOrBuffer | Any: ...
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This isn't right: if you match on a memoryview, you get bytes objects from .group(), not memoryviews.

@overload
def group(self, __group1: str | int, __group2: str | int, *groups: str | int) -> tuple[AnyStr | Any, ...]: ...
# Each item of groups()'s return tuple is either "AnyStr" or
# "AnyStr | None", depending on the pattern.
def group(self, __group1: str | int, __group2: str | int, *groups: str | int) -> tuple[StrOrBuffer | Any, ...]: ...
# Each item of groups()'s return tuple is either "StrOrBuffer" or
# "StrOrBuffer | None", depending on the pattern.
@overload
def groups(self) -> tuple[AnyStr | Any, ...]: ...
def groups(self) -> tuple[StrOrBuffer | Any, ...]: ...
@overload
def groups(self, default: _T) -> tuple[AnyStr | _T, ...]: ...
# Each value in groupdict()'s return dict is either "AnyStr" or
# "AnyStr | None", depending on the pattern.
def groups(self, default: _T) -> tuple[StrOrBuffer | _T, ...]: ...
# Each value in groupdict()'s return dict is either "StrOrBuffer" or
# "StrOrBuffer | None", depending on the pattern.
@overload
def groupdict(self) -> dict[str, AnyStr | Any]: ...
def groupdict(self) -> dict[str, StrOrBuffer | Any]: ...
@overload
def groupdict(self, default: _T) -> dict[str, AnyStr | _T]: ...
def groupdict(self, default: _T) -> dict[str, StrOrBuffer | _T]: ...
def start(self, __group: int | str = ...) -> int: ...
def end(self, __group: int | str = ...) -> int: ...
def span(self, __group: int | str = ...) -> tuple[int, int]: ...
@property
def regs(self) -> tuple[tuple[int, int], ...]: ... # undocumented
# __getitem__() returns "AnyStr" or "AnyStr | None", depending on the pattern.
# __getitem__() returns "StrOrBuffer" or "StrOrBuffer | None", depending on the pattern.
@overload
def __getitem__(self, __key: _Literal[0]) -> AnyStr: ...
def __getitem__(self, __key: _Literal[0]) -> StrOrBuffer: ...
@overload
def __getitem__(self, __key: int | str) -> AnyStr | Any: ...
def __getitem__(self, __key: int | str) -> StrOrBuffer | Any: ...
if sys.version_info >= (3, 9):
def __class_getitem__(cls, item: Any) -> GenericAlias: ...

@_final
class Pattern(Generic[AnyStr]):
class Pattern(Generic[StrOrBuffer]):
flags: int
groupindex: Mapping[str, int]
groups: int
pattern: AnyStr
def search(self, string: AnyStr, pos: int = ..., endpos: int = ...) -> Match[AnyStr] | None: ...
def match(self, string: AnyStr, pos: int = ..., endpos: int = ...) -> Match[AnyStr] | None: ...
def fullmatch(self, string: AnyStr, pos: int = ..., endpos: int = ...) -> Match[AnyStr] | None: ...
def split(self, string: AnyStr, maxsplit: int = ...) -> list[AnyStr | Any]: ...
def findall(self, string: AnyStr, pos: int = ..., endpos: int = ...) -> list[Any]: ...
def finditer(self, string: AnyStr, pos: int = ..., endpos: int = ...) -> Iterator[Match[AnyStr]]: ...
pattern: StrOrBuffer
def search(self, string: StrOrBuffer, pos: int = ..., endpos: int = ...) -> Match[StrOrBuffer] | None: ...
def match(self, string: StrOrBuffer, pos: int = ..., endpos: int = ...) -> Match[StrOrBuffer] | None: ...
def fullmatch(self, string: StrOrBuffer, pos: int = ..., endpos: int = ...) -> Match[StrOrBuffer] | None: ...
def split(self, string: StrOrBuffer, maxsplit: int = ...) -> list[StrOrBuffer | Any]: ...
def findall(self, string: StrOrBuffer, pos: int = ..., endpos: int = ...) -> list[Any]: ...
def finditer(self, string: StrOrBuffer, pos: int = ..., endpos: int = ...) -> Iterator[Match[StrOrBuffer]]: ...
@overload
def sub(self, repl: AnyStr, string: AnyStr, count: int = ...) -> AnyStr: ...
def sub(self, repl: StrOrBuffer, string: StrOrBuffer, count: int = ...) -> StrOrBuffer: ...
@overload
def sub(self, repl: Callable[[Match[AnyStr]], AnyStr], string: AnyStr, count: int = ...) -> AnyStr: ...
def sub(self, repl: Callable[[Match[StrOrBuffer]], StrOrBuffer], string: StrOrBuffer, count: int = ...) -> StrOrBuffer: ...
@overload
def subn(self, repl: AnyStr, string: AnyStr, count: int = ...) -> tuple[AnyStr, int]: ...
def subn(self, repl: StrOrBuffer, string: StrOrBuffer, count: int = ...) -> tuple[StrOrBuffer, int]: ...
@overload
def subn(self, repl: Callable[[Match[AnyStr]], AnyStr], string: AnyStr, count: int = ...) -> tuple[AnyStr, int]: ...
def subn(
self, repl: Callable[[Match[StrOrBuffer]], StrOrBuffer], string: StrOrBuffer, count: int = ...
) -> tuple[StrOrBuffer, int]: ...
if sys.version_info >= (3, 9):
def __class_getitem__(cls, item: Any) -> GenericAlias: ...

Expand Down