diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index cc188710c1..07296c0a50 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -106,5 +106,6 @@ function(add_benchmark name) endfunction() add_benchmark(locale_classic src/locale_classic.cpp) +add_benchmark(path_lexically_normal src/path_lexically_normal.cpp) add_benchmark(random_integer_generation src/random_integer_generation.cpp) add_benchmark(std_copy src/std_copy.cpp) diff --git a/benchmarks/src/path_lexically_normal.cpp b/benchmarks/src/path_lexically_normal.cpp new file mode 100644 index 0000000000..48c842de31 --- /dev/null +++ b/benchmarks/src/path_lexically_normal.cpp @@ -0,0 +1,29 @@ +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include +#include + +namespace { + void BM_lexically_normal(benchmark::State& state) { + using namespace std::literals; + static constexpr std::wstring_view args[5]{ + LR"(X:DriveRelative)"sv, + LR"(\\server\\\share)"sv, + LR"(STL/.github/workflows/../..)"sv, + LR"(C:\Program Files\Azure Data Studio\resources\app\extensions\bat\snippets\batchfile.code-snippets)"sv, + LR"(/\server/\share/\a/\b/\c/\./\./\d/\../\../\../\../\../\../\../\other/x/y/z/.././..\meow.txt)"sv, + }; + + const auto index = state.range(0); + const std::filesystem::path p(args[index]); + for (auto _ : state) { + benchmark::DoNotOptimize(p.lexically_normal()); + } + } +} // namespace + +BENCHMARK(BM_lexically_normal)->DenseRange(0, 4, 1); + +BENCHMARK_MAIN(); diff --git a/stl/inc/filesystem b/stl/inc/filesystem index 30a92a9a82..7aa48219e3 100644 --- a/stl/inc/filesystem +++ b/stl/inc/filesystem @@ -16,7 +16,6 @@ _EMIT_STL_WARNING(STL4038, "The contents of are available only with #include #include #include -#include #include #include #include @@ -1204,80 +1203,91 @@ namespace filesystem { _STD replace(_Normalized.begin(), _Normalized.end(), L'/', L'\\'); // "3. Replace each directory-separator with a preferred-separator. - // [ Note: The generic pathname grammar (29.11.7.1) defines directory-separator + // [ Note 4: The generic pathname grammar defines directory-separator // as one or more slashes and preferred-separators. -end note ]" - list _Lst; // Empty wstring_view means directory-separator - // that will be normalized to a preferred-separator. - // Non-empty wstring_view means filename. - for (auto _Next = _Root_name_end; _Next != _Last;) { - if (_Is_slash(*_Next)) { - if (_Lst.empty() || !_Lst.back().empty()) { - // collapse one or more slashes and preferred-separators to one empty wstring_view - _Lst.emplace_back(); - } - - ++_Next; - } else { - const auto _Filename_end = _STD find_if(_Next + 1, _Last, _Is_slash); - _Lst.emplace_back(_Next, static_cast(_Filename_end - _Next)); - _Next = _Filename_end; + vector _Vec; // Empty wstring_view means directory-separator + // that will be normalized to a preferred-separator. + // Non-empty wstring_view means filename. + _Vec.reserve(13); // avoid frequent re-allocations + bool _Has_root_directory = false; // true: there is a slash right after root-name. + auto _Ptr = _Root_name_end; + if (_Ptr != _Last && _Is_slash(*_Ptr)) { + _Has_root_directory = true; + _Normalized += preferred_separator; + ++_Ptr; + while (_Ptr != _Last && _Is_slash(*_Ptr)) { + ++_Ptr; } } - - // "4. Remove each dot filename and any immediately following directory-separator." - for (auto _Next = _Lst.begin(); _Next != _Lst.end();) { - if (*_Next == _Dot) { - _Next = _Lst.erase(_Next); // erase dot filename - - if (_Next != _Lst.end()) { - _Next = _Lst.erase(_Next); // erase immediately following directory-separator + // _Vec will start with a filename (if not empty). + while (_Ptr != _Last) { + if (_Is_slash(*_Ptr)) { + if (_Vec.empty() || !_Vec.back().empty()) { + // collapse one or more slashes and preferred-separators to one empty wstring_view + _Vec.emplace_back(); } + ++_Ptr; } else { - ++_Next; + const auto _Filename_end = _STD find_if(_Ptr + 1, _Last, _Is_slash); + _Vec.emplace_back(_Ptr, static_cast(_Filename_end - _Ptr)); + _Ptr = _Filename_end; } } + // "4. Remove each dot filename and any immediately following directory-separator." // "5. As long as any appear, remove a non-dot-dot filename immediately followed by a // directory-separator and a dot-dot filename, along with any immediately following directory-separator." - for (auto _Next = _Lst.begin(); _Next != _Lst.end();) { - auto _Prev = _Next; - - ++_Next; // If we aren't going to erase, keep advancing. - // If we're going to erase, _Next now points past the dot-dot filename. - - if (*_Prev == _Dot_dot && _Prev != _Lst.begin() && --_Prev != _Lst.begin() && *--_Prev != _Dot_dot) { - if (_Next != _Lst.end()) { // dot-dot filename has an immediately following directory-separator - ++_Next; - } - - _Lst.erase(_Prev, _Next); // _Next remains valid - } - } - // "6. If there is a root-directory, remove all dot-dot filenames // and any directory-separators immediately following them. - // [ Note: These dot-dot filenames attempt to refer to nonexistent parent directories. -end note ]" - if (!_Lst.empty() && _Lst.front().empty()) { // we have a root-directory - for (auto _Next = _Lst.begin(); _Next != _Lst.end();) { - if (*_Next == _Dot_dot) { - _Next = _Lst.erase(_Next); // erase dot-dot filename - - if (_Next != _Lst.end()) { - _Next = _Lst.erase(_Next); // erase immediately following directory-separator + // [ Note 5: These dot-dot filenames attempt to refer to nonexistent parent directories. -end note ]" + auto _New_end = _Vec.begin(); + for (auto _Pos = _Vec.begin(); _Pos != _Vec.end();) { + const auto _Elem = *_Pos++; // _Pos points at a filename here; it points at end or a separator after ++. + if (_Elem == _Dot) { + // ignore dot (and following separator). + if (_Pos == _Vec.end()) { + break; + } + } else if (_Elem != _Dot_dot) { + // append normal filename and separator. + *_New_end++ = _Elem; // _New_end points at end or a separator after ++. + if (_Pos == _Vec.end()) { + break; + } + ++_New_end; // _New_end(<=_Pos) doesn't point at end; accept separator. + } else { // _Dot_dot + if (_New_end != _Vec.begin() && _New_end[-2] != _Dot_dot) { + // _New_end == _Vec.begin() + 2n here. + // remove preceding non-dot-dot filename and separator. + _New_end -= 2; + if (_Pos == _Vec.end()) { + break; + } + } else if (!_Has_root_directory) { + // due to 6, append dot-dot and separator only if !_Has_root_directory. + *_New_end++ = _Dot_dot; + if (_Pos == _Vec.end()) { + break; } + ++_New_end; } else { - ++_Next; + // ignore dot-dot and separator. + if (_Pos == _Vec.end()) { + break; + } } } + ++_Pos; // _Pos points at a separator here; it points at end or a filename after ++. } + _Vec.erase(_New_end, _Vec.end()); // "7. If the last filename is dot-dot, remove any trailing directory-separator." - if (_Lst.size() >= 2 && _Lst.back().empty() && *(_STD prev(_Lst.end(), 2)) == _Dot_dot) { - _Lst.pop_back(); + if (_Vec.size() >= 2 && _Vec.back().empty() && _Vec.end()[-2] == _Dot_dot) { + _Vec.pop_back(); } - // Build up _Normalized by flattening _Lst. - for (const auto& _Elem : _Lst) { + // Build up _Normalized by flattening _Vec. + for (const auto& _Elem : _Vec) { if (_Elem.empty()) { _Normalized += preferred_separator; } else { @@ -1291,9 +1301,7 @@ namespace filesystem { } // "The result of normalization is a path in normal form, which is said to be normalized." - path _Result(_STD move(_Normalized)); - - return _Result; + return path(_STD move(_Normalized)); } _NODISCARD inline path lexically_relative(const path& _Base) const;