Skip to content

Commit 5de8a17

Browse files
committed
refactor: some optimization in searching files
1 parent f116bd8 commit 5de8a17

File tree

1 file changed

+52
-32
lines changed

1 file changed

+52
-32
lines changed

packages/core/src/robotcode/core/utils/glob_path.py

Lines changed: 52 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -6,23 +6,19 @@
66
import os
77
import re
88
from pathlib import Path, PurePath
9-
from typing import Any, Iterable, Iterator, Sequence, Tuple, Union, cast
9+
from typing import Any, Iterable, Iterator, Optional, Sequence, Union, cast
1010

1111

12-
def _glob_pattern_to_re(pattern: str) -> Tuple[str, bool]:
12+
def _glob_pattern_to_re(pattern: str) -> str:
1313
result = "(?ms)^"
1414

1515
in_group = False
16-
only_dirs = False
1716

1817
i = 0
1918
while i < len(pattern):
2019
c = pattern[i]
2120
if c in "\\/$^+.()=!|":
22-
if c == "/" and i == len(pattern) - 1:
23-
only_dirs = True
24-
else:
25-
result += "\\" + c
21+
result += "\\" + c
2622
elif c == "?":
2723
result += "."
2824
elif c in "[]":
@@ -64,26 +60,40 @@ def _glob_pattern_to_re(pattern: str) -> Tuple[str, bool]:
6460

6561
result += "$"
6662

67-
return result, only_dirs
63+
return result
6864

6965

7066
@functools.lru_cache(maxsize=256)
71-
def _compile_glob_pattern(pattern: str) -> Tuple[re.Pattern[str], bool]:
72-
re_pattern, only_dirs = _glob_pattern_to_re(pattern)
73-
return re.compile(re_pattern), only_dirs
67+
def _compile_glob_pattern(pattern: str) -> re.Pattern[str]:
68+
return re.compile(_glob_pattern_to_re(pattern))
7469

7570

7671
class Pattern:
7772
def __init__(self, pattern: str) -> None:
78-
self.pattern = pattern
79-
self._re_pattern, self.only_dirs = _compile_glob_pattern(pattern)
73+
pattern = pattern.strip()
74+
75+
self.only_dirs = pattern.endswith("/")
76+
77+
path = PurePath(pattern)
78+
if path.is_absolute():
79+
self.pattern = path.relative_to(path.anchor).as_posix()
80+
else:
81+
self.pattern = path.as_posix()
82+
83+
if "*" in self.pattern or "?" in self.pattern or "[" in self.pattern or "{" in self.pattern:
84+
self.re_pattern: Optional[re.Pattern[str]] = _compile_glob_pattern(self.pattern)
85+
else:
86+
self.re_pattern = None
8087

8188
def matches(self, path: Union[PurePath, str, os.PathLike[str]]) -> bool:
8289
if isinstance(path, PurePath):
8390
path = path.as_posix()
8491
else:
8592
path = str(os.fspath(path))
86-
return self._re_pattern.fullmatch(path) is not None
93+
if self.re_pattern is None:
94+
return path == self.pattern
95+
96+
return self.re_pattern.fullmatch(path) is not None
8797

8898
def __str__(self) -> str:
8999
return self.pattern
@@ -104,56 +114,66 @@ def _is_hidden(entry: os.DirEntry[str]) -> bool:
104114
return True
105115

106116
if os.name == "nt" and (
107-
entry.stat().st_file_attributes & 2 != 0 or entry.name.startswith("$") # type: ignore[attr-defined]
117+
(not entry.is_symlink() and entry.stat().st_file_attributes & 2 != 0) # type: ignore[attr-defined]
118+
or entry.name.startswith("$")
108119
):
109120
return True
110121

111122
return False
112123

113124

114125
def iter_files(
115-
path: Union[Path, str, os.PathLike[str]],
126+
path: Union[PurePath, str, os.PathLike[str]],
116127
patterns: Union[Sequence[Union[Pattern, str]], Pattern, str, None] = None,
117128
ignore_patterns: Union[Sequence[Union[Pattern, str]], Pattern, str, None] = None,
118129
*,
119130
include_hidden: bool = False,
120131
absolute: bool = False,
121-
_base_path: Union[Path, str, os.PathLike[str], None] = None,
122132
) -> Iterator[Path]:
123-
if not isinstance(path, Path):
124-
path = Path(path or ".")
125-
126-
if _base_path is None:
127-
_base_path = path
128-
else:
129-
if not isinstance(_base_path, Path):
130-
path = Path(_base_path)
133+
if not isinstance(path, PurePath):
134+
path = PurePath(path or ".")
131135

132136
if patterns is not None and isinstance(patterns, (str, Pattern)):
133137
patterns = [patterns]
134-
if patterns is not None:
135-
patterns = [p if isinstance(p, Pattern) else Pattern(p) for p in patterns]
136138

137139
if ignore_patterns is not None and isinstance(ignore_patterns, (str, Pattern)):
138140
ignore_patterns = [ignore_patterns]
139-
if ignore_patterns is not None:
140-
ignore_patterns = [p if isinstance(p, Pattern) else Pattern(p) for p in ignore_patterns]
141141

142+
yield from _iter_files_recursive_re(
143+
path=path,
144+
patterns=[] if patterns is None else [p if isinstance(p, Pattern) else Pattern(p) for p in patterns],
145+
ignore_patterns=[]
146+
if ignore_patterns is None
147+
else [p if isinstance(p, Pattern) else Pattern(p) for p in ignore_patterns],
148+
include_hidden=include_hidden,
149+
absolute=absolute,
150+
_base_path=path,
151+
)
152+
153+
154+
def _iter_files_recursive_re(
155+
path: PurePath,
156+
patterns: Sequence[Pattern],
157+
ignore_patterns: Sequence[Pattern],
158+
include_hidden: bool,
159+
absolute: bool,
160+
_base_path: PurePath,
161+
) -> Iterator[Path]:
142162
try:
143163
with os.scandir(path) as it:
144164
for f in it:
145165
if not include_hidden and _is_hidden(f):
146166
continue
147167

148-
relative_path = path / f.name
168+
relative_path = (path / f.name).relative_to(_base_path)
149169

150170
if not ignore_patterns or not any(
151171
p.matches(relative_path) and (not p.only_dirs or p.only_dirs and f.is_dir())
152172
for p in cast(Iterable[Pattern], ignore_patterns)
153173
):
154174
if f.is_dir():
155-
yield from iter_files(
156-
f,
175+
yield from _iter_files_recursive_re(
176+
PurePath(f),
157177
patterns,
158178
ignore_patterns,
159179
include_hidden=include_hidden,

0 commit comments

Comments
 (0)