From 799099e895ad78592a36db57e0144259c3ca8840 Mon Sep 17 00:00:00 2001 From: achabense <60953653+achabense@users.noreply.github.com> Date: Thu, 6 Jul 2023 05:01:55 +0800 Subject: [PATCH 01/19] optimize lexically_normal and drop inclusion --- stl/inc/filesystem | 117 +++++++++++++++++++++++---------------------- 1 file changed, 59 insertions(+), 58 deletions(-) diff --git a/stl/inc/filesystem b/stl/inc/filesystem index 30a92a9a82..367b3b9c0a 100644 --- a/stl/inc/filesystem +++ b/stl/inc/filesystem @@ -16,7 +16,6 @@ _EMIT_STL_WARNING(STL4038, "The contents of are available only with #include #include #include -#include #include #include #include @@ -1206,78 +1205,82 @@ namespace filesystem { // "3. Replace each directory-separator with a preferred-separator. // [ Note: The generic pathname grammar (29.11.7.1) defines directory-separator // as one or more slashes and preferred-separators. -end note ]" - list _Lst; // Empty wstring_view means directory-separator - // that will be normalized to a preferred-separator. - // Non-empty wstring_view means filename. - for (auto _Next = _Root_name_end; _Next != _Last;) { - if (_Is_slash(*_Next)) { - if (_Lst.empty() || !_Lst.back().empty()) { + vector _Vec; // Empty wstring_view means directory-separator + // that will be normalized to a preferred-separator. + // Non-empty wstring_view means filename. + _Vec.reserve(13); // avoid frequent re-allocations + bool _Has_root_directory = false; // true: there is a slash right after root name. + auto _Pos = _Root_name_end; + if (_Pos != _Last && _Is_slash(*_Pos)) { + _Has_root_directory = true; + _Normalized += preferred_separator; + ++_Pos; + while (_Pos != _Last && _Is_slash(*_Pos)) { ++_Pos; } + } + // _Vec will start with a filename (if not empty). + while (_Pos != _Last) { + if (_Is_slash(*_Pos)) { + if (_Vec.empty() || !_Vec.back().empty()) { // collapse one or more slashes and preferred-separators to one empty wstring_view - _Lst.emplace_back(); + _Vec.emplace_back(); } - - ++_Next; - } else { - const auto _Filename_end = _STD find_if(_Next + 1, _Last, _Is_slash); - _Lst.emplace_back(_Next, static_cast(_Filename_end - _Next)); - _Next = _Filename_end; + ++_Pos; } - } - - // "4. Remove each dot filename and any immediately following directory-separator." - for (auto _Next = _Lst.begin(); _Next != _Lst.end();) { - if (*_Next == _Dot) { - _Next = _Lst.erase(_Next); // erase dot filename - - if (_Next != _Lst.end()) { - _Next = _Lst.erase(_Next); // erase immediately following directory-separator - } - } else { - ++_Next; + else { + const auto _Filename_end = _STD find_if(_Pos + 1, _Last, _Is_slash); + _Vec.emplace_back(_Pos, static_cast(_Filename_end - _Pos)); + _Pos = _Filename_end; } } + // "4. Remove each dot filename and any immediately following directory-separator." // "5. As long as any appear, remove a non-dot-dot filename immediately followed by a // directory-separator and a dot-dot filename, along with any immediately following directory-separator." - for (auto _Next = _Lst.begin(); _Next != _Lst.end();) { - auto _Prev = _Next; - - ++_Next; // If we aren't going to erase, keep advancing. - // If we're going to erase, _Next now points past the dot-dot filename. - - if (*_Prev == _Dot_dot && _Prev != _Lst.begin() && --_Prev != _Lst.begin() && *--_Prev != _Dot_dot) { - if (_Next != _Lst.end()) { // dot-dot filename has an immediately following directory-separator - ++_Next; - } - - _Lst.erase(_Prev, _Next); // _Next remains valid - } - } - // "6. If there is a root-directory, remove all dot-dot filenames // and any directory-separators immediately following them. // [ Note: These dot-dot filenames attempt to refer to nonexistent parent directories. -end note ]" - if (!_Lst.empty() && _Lst.front().empty()) { // we have a root-directory - for (auto _Next = _Lst.begin(); _Next != _Lst.end();) { - if (*_Next == _Dot_dot) { - _Next = _Lst.erase(_Next); // erase dot-dot filename - - if (_Next != _Lst.end()) { - _Next = _Lst.erase(_Next); // erase immediately following directory-separator - } - } else { - ++_Next; + auto _New_end = _Vec.begin(); + for (auto _Pos = _Vec.begin(); _Pos != _Vec.end();) { + auto _Elem = *_Pos++; // _Pos points at end or a separator after ++. + if (_Elem == _Dot) { + // ignore dot (and following separator). + if (_Pos == _Vec.end()) { break; } + } + else if (_Elem == _Dot_dot) { + if (_New_end != _Vec.begin() && *prev(_New_end, 2) != _Dot_dot) { + // note: _New_end == _Vec.begin() + 2n + // remove preceding non-dot-dot filename and separator. + _New_end -= 2; + if (_Pos == _Vec.end()) { break; } + } + else if (!_Has_root_directory) { + // due to 6, append dot-dot only when !_Has_root_directory. + *_New_end++ = _Dot_dot; + if (_Pos == _Vec.end()) { break; } + *_New_end++ = {}; // as _Pos != _Vec.end(), it points at a separator; add it. + } + else { + // ignore dot-dot (and following separator). + if (_Pos == _Vec.end()) { break; } } } + else { + // append normal filename and separator. + *_New_end++ = _Elem; + if (_Pos == _Vec.end()) { break; } + *_New_end++ = {}; // add separator. + } + ++_Pos; // _Pos points at a separator here in all cases; skip it. } + _Vec.erase(_New_end, _Vec.end()); // "7. If the last filename is dot-dot, remove any trailing directory-separator." - if (_Lst.size() >= 2 && _Lst.back().empty() && *(_STD prev(_Lst.end(), 2)) == _Dot_dot) { - _Lst.pop_back(); + if (_Vec.size() >= 2 && _Vec.back().empty() && *(_STD prev(_Vec.end(), 2)) == _Dot_dot) { + _Vec.pop_back(); } - // Build up _Normalized by flattening _Lst. - for (const auto& _Elem : _Lst) { + // Build up _Normalized by flattening _Vec. + for (const auto& _Elem : _Vec) { if (_Elem.empty()) { _Normalized += preferred_separator; } else { @@ -1291,9 +1294,7 @@ namespace filesystem { } // "The result of normalization is a path in normal form, which is said to be normalized." - path _Result(_STD move(_Normalized)); - - return _Result; + return path(_STD move(_Normalized)); } _NODISCARD inline path lexically_relative(const path& _Base) const; From eaa143f6b59edfe0d5dfd6f145fa8623239b97ec Mon Sep 17 00:00:00 2001 From: achabense <60953653+achabense@users.noreply.github.com> Date: Thu, 6 Jul 2023 05:04:57 +0800 Subject: [PATCH 02/19] nit --- stl/inc/filesystem | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/inc/filesystem b/stl/inc/filesystem index 367b3b9c0a..46cc75909a 100644 --- a/stl/inc/filesystem +++ b/stl/inc/filesystem @@ -1254,7 +1254,7 @@ namespace filesystem { if (_Pos == _Vec.end()) { break; } } else if (!_Has_root_directory) { - // due to 6, append dot-dot only when !_Has_root_directory. + // due to 6, append dot-dot only if !_Has_root_directory. *_New_end++ = _Dot_dot; if (_Pos == _Vec.end()) { break; } *_New_end++ = {}; // as _Pos != _Vec.end(), it points at a separator; add it. From 06939580c73f4ae2444a1b75b1c7f238cc4d406a Mon Sep 17 00:00:00 2001 From: achabense <60953653+achabense@users.noreply.github.com> Date: Thu, 6 Jul 2023 05:44:52 +0800 Subject: [PATCH 03/19] formatting --- stl/inc/filesystem | 41 ++++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/stl/inc/filesystem b/stl/inc/filesystem index 46cc75909a..705c19e6a9 100644 --- a/stl/inc/filesystem +++ b/stl/inc/filesystem @@ -1210,12 +1210,14 @@ namespace filesystem { // Non-empty wstring_view means filename. _Vec.reserve(13); // avoid frequent re-allocations bool _Has_root_directory = false; // true: there is a slash right after root name. - auto _Pos = _Root_name_end; + auto _Pos = _Root_name_end; if (_Pos != _Last && _Is_slash(*_Pos)) { _Has_root_directory = true; _Normalized += preferred_separator; ++_Pos; - while (_Pos != _Last && _Is_slash(*_Pos)) { ++_Pos; } + while (_Pos != _Last && _Is_slash(*_Pos)) { + ++_Pos; + } } // _Vec will start with a filename (if not empty). while (_Pos != _Last) { @@ -1225,8 +1227,7 @@ namespace filesystem { _Vec.emplace_back(); } ++_Pos; - } - else { + } else { const auto _Filename_end = _STD find_if(_Pos + 1, _Last, _Is_slash); _Vec.emplace_back(_Pos, static_cast(_Filename_end - _Pos)); _Pos = _Filename_end; @@ -1244,30 +1245,36 @@ namespace filesystem { auto _Elem = *_Pos++; // _Pos points at end or a separator after ++. if (_Elem == _Dot) { // ignore dot (and following separator). - if (_Pos == _Vec.end()) { break; } - } - else if (_Elem == _Dot_dot) { + if (_Pos == _Vec.end()) { + break; + } + } else if (_Elem == _Dot_dot) { if (_New_end != _Vec.begin() && *prev(_New_end, 2) != _Dot_dot) { // note: _New_end == _Vec.begin() + 2n // remove preceding non-dot-dot filename and separator. _New_end -= 2; - if (_Pos == _Vec.end()) { break; } - } - else if (!_Has_root_directory) { + if (_Pos == _Vec.end()) { + break; + } + } else if (!_Has_root_directory) { // due to 6, append dot-dot only if !_Has_root_directory. *_New_end++ = _Dot_dot; - if (_Pos == _Vec.end()) { break; } + if (_Pos == _Vec.end()) { + break; + } *_New_end++ = {}; // as _Pos != _Vec.end(), it points at a separator; add it. - } - else { + } else { // ignore dot-dot (and following separator). - if (_Pos == _Vec.end()) { break; } + if (_Pos == _Vec.end()) { + break; + } } - } - else { + } else { // append normal filename and separator. *_New_end++ = _Elem; - if (_Pos == _Vec.end()) { break; } + if (_Pos == _Vec.end()) { + break; + } *_New_end++ = {}; // add separator. } ++_Pos; // _Pos points at a separator here in all cases; skip it. From db751a5e4231deda058434cfedefe2c5b8320d82 Mon Sep 17 00:00:00 2001 From: achabense <60953653+achabense@users.noreply.github.com> Date: Thu, 6 Jul 2023 06:46:49 +0800 Subject: [PATCH 04/19] renaming outerscope '_Pos' --- stl/inc/filesystem | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/stl/inc/filesystem b/stl/inc/filesystem index 705c19e6a9..e0a72a5fcf 100644 --- a/stl/inc/filesystem +++ b/stl/inc/filesystem @@ -1210,27 +1210,27 @@ namespace filesystem { // Non-empty wstring_view means filename. _Vec.reserve(13); // avoid frequent re-allocations bool _Has_root_directory = false; // true: there is a slash right after root name. - auto _Pos = _Root_name_end; - if (_Pos != _Last && _Is_slash(*_Pos)) { + auto _Ptr = _Root_name_end; + if (_Ptr != _Last && _Is_slash(*_Ptr)) { _Has_root_directory = true; _Normalized += preferred_separator; - ++_Pos; - while (_Pos != _Last && _Is_slash(*_Pos)) { - ++_Pos; + ++_Ptr; + while (_Ptr != _Last && _Is_slash(*_Ptr)) { + ++_Ptr; } } // _Vec will start with a filename (if not empty). - while (_Pos != _Last) { - if (_Is_slash(*_Pos)) { + while (_Ptr != _Last) { + if (_Is_slash(*_Ptr)) { if (_Vec.empty() || !_Vec.back().empty()) { // collapse one or more slashes and preferred-separators to one empty wstring_view _Vec.emplace_back(); } - ++_Pos; + ++_Ptr; } else { - const auto _Filename_end = _STD find_if(_Pos + 1, _Last, _Is_slash); - _Vec.emplace_back(_Pos, static_cast(_Filename_end - _Pos)); - _Pos = _Filename_end; + const auto _Filename_end = _STD find_if(_Ptr + 1, _Last, _Is_slash); + _Vec.emplace_back(_Ptr, static_cast(_Filename_end - _Ptr)); + _Ptr = _Filename_end; } } From 62640097ce71e58500baaa1e42ca1c5227c30ae9 Mon Sep 17 00:00:00 2001 From: achabense <60953653+achabense@users.noreply.github.com> Date: Thu, 6 Jul 2023 07:06:30 +0800 Subject: [PATCH 05/19] add benchmark --- benchmarks/src/path_lexically_normal.cpp | 48 ++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 benchmarks/src/path_lexically_normal.cpp diff --git a/benchmarks/src/path_lexically_normal.cpp b/benchmarks/src/path_lexically_normal.cpp new file mode 100644 index 0000000000..93d1b63c99 --- /dev/null +++ b/benchmarks/src/path_lexically_normal.cpp @@ -0,0 +1,48 @@ +#include "benchmark/benchmark.h" +#include + +using namespace std; +namespace fs = std::filesystem; + +namespace { + void BM_lexically_normal_0(benchmark::State& state) { + const fs::path& p(LR"(X:DriveRelative)"sv); + for (auto _ : state) { + benchmark::DoNotOptimize(p.lexically_normal()); + } + } + void BM_lexically_normal_1(benchmark::State& state) { + const fs::path& p(LR"(\\server\\\share)"sv); + for (auto _ : state) { + benchmark::DoNotOptimize(p.lexically_normal()); + } + } + void BM_lexically_normal_2(benchmark::State& state) { + const fs::path& p(LR"(STL/.github/workflows/../..)"sv); + for (auto _ : state) { + benchmark::DoNotOptimize(p.lexically_normal()); + } + } + void BM_lexically_normal_3(benchmark::State& state) { + const fs::path& p( + LR"(C:\Program Files\Azure Data Studio\resources\app\extensions\bat\snippets\batchfile.code-snippets)"sv); + for (auto _ : state) { + benchmark::DoNotOptimize(p.lexically_normal()); + } + } + void BM_lexically_normal_4(benchmark::State& state) { + const fs::path& p( + LR"(/\server/\share/\a/\b/\c/\./\./\d/\../\../\../\../\../\../\../\other/x/y/z/.././..\meow.txt)"sv); + for (auto _ : state) { + benchmark::DoNotOptimize(p.lexically_normal()); + } + } +} // namespace + +BENCHMARK(BM_lexically_normal_0); +BENCHMARK(BM_lexically_normal_1); +BENCHMARK(BM_lexically_normal_2); +BENCHMARK(BM_lexically_normal_3); +BENCHMARK(BM_lexically_normal_4); + +BENCHMARK_MAIN(); From 9b90aecc39c17770526ec253ab4f3554aab5dbde Mon Sep 17 00:00:00 2001 From: achabense <60953653+achabense@users.noreply.github.com> Date: Thu, 6 Jul 2023 07:14:23 +0800 Subject: [PATCH 06/19] update benchmark --- benchmarks/src/path_lexically_normal.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/benchmarks/src/path_lexically_normal.cpp b/benchmarks/src/path_lexically_normal.cpp index 93d1b63c99..6b6451fd0b 100644 --- a/benchmarks/src/path_lexically_normal.cpp +++ b/benchmarks/src/path_lexically_normal.cpp @@ -1,4 +1,7 @@ -#include "benchmark/benchmark.h" +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include #include using namespace std; From 37a03937112ce373ce2fa4cb08ea03082f6dd2ee Mon Sep 17 00:00:00 2001 From: achabense <60953653+achabense@users.noreply.github.com> Date: Thu, 6 Jul 2023 15:53:28 +0800 Subject: [PATCH 07/19] refactor benchmark --- benchmarks/src/path_lexically_normal.cpp | 47 ++++++------------------ 1 file changed, 11 insertions(+), 36 deletions(-) diff --git a/benchmarks/src/path_lexically_normal.cpp b/benchmarks/src/path_lexically_normal.cpp index 6b6451fd0b..5a69bc85e9 100644 --- a/benchmarks/src/path_lexically_normal.cpp +++ b/benchmarks/src/path_lexically_normal.cpp @@ -3,49 +3,24 @@ #include #include - -using namespace std; -namespace fs = std::filesystem; +#include namespace { - void BM_lexically_normal_0(benchmark::State& state) { - const fs::path& p(LR"(X:DriveRelative)"sv); - for (auto _ : state) { - benchmark::DoNotOptimize(p.lexically_normal()); - } - } - void BM_lexically_normal_1(benchmark::State& state) { - const fs::path& p(LR"(\\server\\\share)"sv); - for (auto _ : state) { - benchmark::DoNotOptimize(p.lexically_normal()); - } - } - void BM_lexically_normal_2(benchmark::State& state) { - const fs::path& p(LR"(STL/.github/workflows/../..)"sv); - for (auto _ : state) { - benchmark::DoNotOptimize(p.lexically_normal()); - } - } - void BM_lexically_normal_3(benchmark::State& state) { - const fs::path& p( - LR"(C:\Program Files\Azure Data Studio\resources\app\extensions\bat\snippets\batchfile.code-snippets)"sv); - for (auto _ : state) { - benchmark::DoNotOptimize(p.lexically_normal()); - } - } - void BM_lexically_normal_4(benchmark::State& state) { - const fs::path& p( - LR"(/\server/\share/\a/\b/\c/\./\./\d/\../\../\../\../\../\../\../\other/x/y/z/.././..\meow.txt)"sv); + void BM_lexically_normal(benchmark::State& state) { + using namespace std::literals; + static constexpr std::wstring_view args[5]{LR"(X:DriveRelative)"sv, LR"(\\server\\\share)"sv, + LR"(STL/.github/workflows/../..)"sv, + LR"(C:\Program Files\Azure Data Studio\resources\app\extensions\bat\snippets\batchfile.code-snippets)"sv, + LR"(/\server/\share/\a/\b/\c/\./\./\d/\../\../\../\../\../\../\../\other/x/y/z/.././..\meow.txt)"sv}; + + const auto index = state.range(0); + const std::filesystem::path p(args[index]); for (auto _ : state) { benchmark::DoNotOptimize(p.lexically_normal()); } } } // namespace -BENCHMARK(BM_lexically_normal_0); -BENCHMARK(BM_lexically_normal_1); -BENCHMARK(BM_lexically_normal_2); -BENCHMARK(BM_lexically_normal_3); -BENCHMARK(BM_lexically_normal_4); +BENCHMARK(BM_lexically_normal)->Arg(0)->Arg(1)->Arg(2)->Arg(3)->Arg(4); BENCHMARK_MAIN(); From c93c0c55405e273bfd0ac0ad6db984c65965ce56 Mon Sep 17 00:00:00 2001 From: achabense <60953653+achabense@users.noreply.github.com> Date: Thu, 6 Jul 2023 15:55:43 +0800 Subject: [PATCH 08/19] apply [] Co-authored-by: A. Jiang --- stl/inc/filesystem | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/inc/filesystem b/stl/inc/filesystem index e0a72a5fcf..a3310bb7e1 100644 --- a/stl/inc/filesystem +++ b/stl/inc/filesystem @@ -1282,7 +1282,7 @@ namespace filesystem { _Vec.erase(_New_end, _Vec.end()); // "7. If the last filename is dot-dot, remove any trailing directory-separator." - if (_Vec.size() >= 2 && _Vec.back().empty() && *(_STD prev(_Vec.end(), 2)) == _Dot_dot) { + if (_Vec.size() >= 2 && _Vec.back().empty() && _Vec.end()[-2] == _Dot_dot) { _Vec.pop_back(); } From c5ed88cd311827aa0c987cae49c0e5722cf3f332 Mon Sep 17 00:00:00 2001 From: achabense <60953653+achabense@users.noreply.github.com> Date: Thu, 6 Jul 2023 15:56:20 +0800 Subject: [PATCH 09/19] apply [] Co-authored-by: A. Jiang --- stl/inc/filesystem | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/inc/filesystem b/stl/inc/filesystem index a3310bb7e1..7d727cbca6 100644 --- a/stl/inc/filesystem +++ b/stl/inc/filesystem @@ -1249,7 +1249,7 @@ namespace filesystem { break; } } else if (_Elem == _Dot_dot) { - if (_New_end != _Vec.begin() && *prev(_New_end, 2) != _Dot_dot) { + if (_New_end != _Vec.begin() && _New_end[-2] != _Dot_dot) { // note: _New_end == _Vec.begin() + 2n // remove preceding non-dot-dot filename and separator. _New_end -= 2; From f948375c6e2ac0a815dd60582bb54034212e9fbd Mon Sep 17 00:00:00 2001 From: achabense <60953653+achabense@users.noreply.github.com> Date: Thu, 6 Jul 2023 23:02:30 +0800 Subject: [PATCH 10/19] nits --- stl/inc/filesystem | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/stl/inc/filesystem b/stl/inc/filesystem index 7d727cbca6..c3a109d438 100644 --- a/stl/inc/filesystem +++ b/stl/inc/filesystem @@ -1242,7 +1242,7 @@ namespace filesystem { // [ Note: These dot-dot filenames attempt to refer to nonexistent parent directories. -end note ]" auto _New_end = _Vec.begin(); for (auto _Pos = _Vec.begin(); _Pos != _Vec.end();) { - auto _Elem = *_Pos++; // _Pos points at end or a separator after ++. + auto _Elem = *_Pos++; // _Pos points at a filename here; it points at end or a separator after ++. if (_Elem == _Dot) { // ignore dot (and following separator). if (_Pos == _Vec.end()) { @@ -1250,21 +1250,21 @@ namespace filesystem { } } else if (_Elem == _Dot_dot) { if (_New_end != _Vec.begin() && _New_end[-2] != _Dot_dot) { - // note: _New_end == _Vec.begin() + 2n + // _New_end == _Vec.begin() + 2n here. // remove preceding non-dot-dot filename and separator. _New_end -= 2; if (_Pos == _Vec.end()) { break; } } else if (!_Has_root_directory) { - // due to 6, append dot-dot only if !_Has_root_directory. + // due to 6, append dot-dot and separator only if !_Has_root_directory. *_New_end++ = _Dot_dot; if (_Pos == _Vec.end()) { break; } - *_New_end++ = {}; // as _Pos != _Vec.end(), it points at a separator; add it. + *_New_end++ = {}; } else { - // ignore dot-dot (and following separator). + // ignore dot-dot and separator. if (_Pos == _Vec.end()) { break; } @@ -1275,9 +1275,9 @@ namespace filesystem { if (_Pos == _Vec.end()) { break; } - *_New_end++ = {}; // add separator. + *_New_end++ = {}; } - ++_Pos; // _Pos points at a separator here in all cases; skip it. + ++_Pos; // _Pos points at a separator here; it points at end or a filename after ++. } _Vec.erase(_New_end, _Vec.end()); From a81598eac4c08b2b9e0e122100e9e430b0a93474 Mon Sep 17 00:00:00 2001 From: achabense <60953653+achabense@users.noreply.github.com> Date: Fri, 7 Jul 2023 09:13:53 +0800 Subject: [PATCH 11/19] review feedback --- stl/inc/filesystem | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/stl/inc/filesystem b/stl/inc/filesystem index c3a109d438..b3efd9f01a 100644 --- a/stl/inc/filesystem +++ b/stl/inc/filesystem @@ -1248,7 +1248,14 @@ namespace filesystem { if (_Pos == _Vec.end()) { break; } - } else if (_Elem == _Dot_dot) { + } else if (_Elem != _Dot_dot) { + // append normal filename and separator. + *_New_end++ = _Elem; // _New_end points at end or a seperator after ++. + if (_Pos == _Vec.end()) { + break; + } + ++_New_end; // accept separator. + } else /*_Dot_dot*/ { if (_New_end != _Vec.begin() && _New_end[-2] != _Dot_dot) { // _New_end == _Vec.begin() + 2n here. // remove preceding non-dot-dot filename and separator. @@ -1262,20 +1269,13 @@ namespace filesystem { if (_Pos == _Vec.end()) { break; } - *_New_end++ = {}; + ++_New_end; } else { // ignore dot-dot and separator. if (_Pos == _Vec.end()) { break; } } - } else { - // append normal filename and separator. - *_New_end++ = _Elem; - if (_Pos == _Vec.end()) { - break; - } - *_New_end++ = {}; } ++_Pos; // _Pos points at a separator here; it points at end or a filename after ++. } From 74f2aa8d150b019a2b5887b48c03602db4588526 Mon Sep 17 00:00:00 2001 From: achabense <60953653+achabense@users.noreply.github.com> Date: Sat, 8 Jul 2023 00:51:41 +0800 Subject: [PATCH 12/19] restore indentation of "// that will be..." & "// Non-empty wstring_view..."; try to improve comment --- stl/inc/filesystem | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/stl/inc/filesystem b/stl/inc/filesystem index b3efd9f01a..3221edf562 100644 --- a/stl/inc/filesystem +++ b/stl/inc/filesystem @@ -1206,8 +1206,8 @@ namespace filesystem { // [ Note: The generic pathname grammar (29.11.7.1) defines directory-separator // as one or more slashes and preferred-separators. -end note ]" vector _Vec; // Empty wstring_view means directory-separator - // that will be normalized to a preferred-separator. - // Non-empty wstring_view means filename. + // that will be normalized to a preferred-separator. + // Non-empty wstring_view means filename. _Vec.reserve(13); // avoid frequent re-allocations bool _Has_root_directory = false; // true: there is a slash right after root name. auto _Ptr = _Root_name_end; @@ -1254,7 +1254,7 @@ namespace filesystem { if (_Pos == _Vec.end()) { break; } - ++_New_end; // accept separator. + ++_New_end; // _New_end(<=_Pos) doesn't point at end; accept separator. } else /*_Dot_dot*/ { if (_New_end != _Vec.begin() && _New_end[-2] != _Dot_dot) { // _New_end == _Vec.begin() + 2n here. From 9e58b1d8a57327fc12c27fa1eb4a885be79556d3 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Fri, 7 Jul 2023 14:17:46 -0700 Subject: [PATCH 13/19] Add new benchmark to benchmarks/CMakeLists.txt. --- benchmarks/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index cc188710c1..07296c0a50 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -106,5 +106,6 @@ function(add_benchmark name) endfunction() add_benchmark(locale_classic src/locale_classic.cpp) +add_benchmark(path_lexically_normal src/path_lexically_normal.cpp) add_benchmark(random_integer_generation src/random_integer_generation.cpp) add_benchmark(std_copy src/std_copy.cpp) From 9f89fe9eb34dd88a13c2f6f7d83cd5fd9159bb37 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Fri, 7 Jul 2023 14:18:18 -0700 Subject: [PATCH 14/19] Include `` instead of ``. --- benchmarks/src/path_lexically_normal.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/src/path_lexically_normal.cpp b/benchmarks/src/path_lexically_normal.cpp index 5a69bc85e9..886e848fe1 100644 --- a/benchmarks/src/path_lexically_normal.cpp +++ b/benchmarks/src/path_lexically_normal.cpp @@ -3,7 +3,7 @@ #include #include -#include +#include namespace { void BM_lexically_normal(benchmark::State& state) { From fef68836be5e35600ba06fc1758fa35bee1fab88 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Fri, 7 Jul 2023 14:18:46 -0700 Subject: [PATCH 15/19] Style: Add a trailing comma to get one path per line. --- benchmarks/src/path_lexically_normal.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/benchmarks/src/path_lexically_normal.cpp b/benchmarks/src/path_lexically_normal.cpp index 886e848fe1..d4766dda26 100644 --- a/benchmarks/src/path_lexically_normal.cpp +++ b/benchmarks/src/path_lexically_normal.cpp @@ -8,10 +8,13 @@ namespace { void BM_lexically_normal(benchmark::State& state) { using namespace std::literals; - static constexpr std::wstring_view args[5]{LR"(X:DriveRelative)"sv, LR"(\\server\\\share)"sv, + static constexpr std::wstring_view args[5]{ + LR"(X:DriveRelative)"sv, + LR"(\\server\\\share)"sv, LR"(STL/.github/workflows/../..)"sv, LR"(C:\Program Files\Azure Data Studio\resources\app\extensions\bat\snippets\batchfile.code-snippets)"sv, - LR"(/\server/\share/\a/\b/\c/\./\./\d/\../\../\../\../\../\../\../\other/x/y/z/.././..\meow.txt)"sv}; + LR"(/\server/\share/\a/\b/\c/\./\./\d/\../\../\../\../\../\../\../\other/x/y/z/.././..\meow.txt)"sv, + }; const auto index = state.range(0); const std::filesystem::path p(args[index]); From df24c8d74a763fa02725deec53278a9bdb5a7a63 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Fri, 7 Jul 2023 14:19:35 -0700 Subject: [PATCH 16/19] Update quoted Standardese. --- stl/inc/filesystem | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stl/inc/filesystem b/stl/inc/filesystem index 3221edf562..52da2b6911 100644 --- a/stl/inc/filesystem +++ b/stl/inc/filesystem @@ -1203,7 +1203,7 @@ namespace filesystem { _STD replace(_Normalized.begin(), _Normalized.end(), L'/', L'\\'); // "3. Replace each directory-separator with a preferred-separator. - // [ Note: The generic pathname grammar (29.11.7.1) defines directory-separator + // [ Note 4: The generic pathname grammar defines directory-separator // as one or more slashes and preferred-separators. -end note ]" vector _Vec; // Empty wstring_view means directory-separator // that will be normalized to a preferred-separator. @@ -1239,7 +1239,7 @@ namespace filesystem { // directory-separator and a dot-dot filename, along with any immediately following directory-separator." // "6. If there is a root-directory, remove all dot-dot filenames // and any directory-separators immediately following them. - // [ Note: These dot-dot filenames attempt to refer to nonexistent parent directories. -end note ]" + // [ Note 5: These dot-dot filenames attempt to refer to nonexistent parent directories. -end note ]" auto _New_end = _Vec.begin(); for (auto _Pos = _Vec.begin(); _Pos != _Vec.end();) { auto _Elem = *_Pos++; // _Pos points at a filename here; it points at end or a separator after ++. From dac591cc96a7d017e0f8be07357f47f71e777534 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Fri, 7 Jul 2023 14:20:18 -0700 Subject: [PATCH 17/19] Comment nitpicks. --- stl/inc/filesystem | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/stl/inc/filesystem b/stl/inc/filesystem index 52da2b6911..8ddb59ff23 100644 --- a/stl/inc/filesystem +++ b/stl/inc/filesystem @@ -1209,7 +1209,7 @@ namespace filesystem { // that will be normalized to a preferred-separator. // Non-empty wstring_view means filename. _Vec.reserve(13); // avoid frequent re-allocations - bool _Has_root_directory = false; // true: there is a slash right after root name. + bool _Has_root_directory = false; // true: there is a slash right after root-name. auto _Ptr = _Root_name_end; if (_Ptr != _Last && _Is_slash(*_Ptr)) { _Has_root_directory = true; @@ -1250,12 +1250,12 @@ namespace filesystem { } } else if (_Elem != _Dot_dot) { // append normal filename and separator. - *_New_end++ = _Elem; // _New_end points at end or a seperator after ++. + *_New_end++ = _Elem; // _New_end points at end or a separator after ++. if (_Pos == _Vec.end()) { break; } ++_New_end; // _New_end(<=_Pos) doesn't point at end; accept separator. - } else /*_Dot_dot*/ { + } else { // _Dot_dot if (_New_end != _Vec.begin() && _New_end[-2] != _Dot_dot) { // _New_end == _Vec.begin() + 2n here. // remove preceding non-dot-dot filename and separator. From 753f9b64bf6104b7ec756eb7fcfb167d5c0b0598 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Fri, 7 Jul 2023 14:52:10 -0700 Subject: [PATCH 18/19] _Elem can be const. --- stl/inc/filesystem | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/inc/filesystem b/stl/inc/filesystem index 8ddb59ff23..7aa48219e3 100644 --- a/stl/inc/filesystem +++ b/stl/inc/filesystem @@ -1242,7 +1242,7 @@ namespace filesystem { // [ Note 5: These dot-dot filenames attempt to refer to nonexistent parent directories. -end note ]" auto _New_end = _Vec.begin(); for (auto _Pos = _Vec.begin(); _Pos != _Vec.end();) { - auto _Elem = *_Pos++; // _Pos points at a filename here; it points at end or a separator after ++. + const auto _Elem = *_Pos++; // _Pos points at a filename here; it points at end or a separator after ++. if (_Elem == _Dot) { // ignore dot (and following separator). if (_Pos == _Vec.end()) { From 426b64ed71c0992deba9fc42f8bbfef3bfdcb3f7 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Fri, 7 Jul 2023 15:10:00 -0700 Subject: [PATCH 19/19] Use DenseRange to avoid verbosity. --- benchmarks/src/path_lexically_normal.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/src/path_lexically_normal.cpp b/benchmarks/src/path_lexically_normal.cpp index d4766dda26..48c842de31 100644 --- a/benchmarks/src/path_lexically_normal.cpp +++ b/benchmarks/src/path_lexically_normal.cpp @@ -24,6 +24,6 @@ namespace { } } // namespace -BENCHMARK(BM_lexically_normal)->Arg(0)->Arg(1)->Arg(2)->Arg(3)->Arg(4); +BENCHMARK(BM_lexically_normal)->DenseRange(0, 4, 1); BENCHMARK_MAIN();