microsoft · StephanTLavavej · Jul 14, 2023 · Jul 5, 2023 · Jul 5, 2023 · Jul 5, 2023
@@ -106,5 +106,6 @@ function(add_benchmark name)
 endfunction()
 
 add_benchmark(locale_classic src/locale_classic.cpp)
+add_benchmark(path_lexically_normal src/path_lexically_normal.cpp)
 add_benchmark(random_integer_generation src/random_integer_generation.cpp)
 add_benchmark(std_copy src/std_copy.cpp)
@@ -0,0 +1,29 @@
+// Copyright (c) Microsoft Corporation.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <benchmark/benchmark.h>
+#include <filesystem>
+#include <string_view>
+
+namespace {
+    void BM_lexically_normal(benchmark::State& state) {
+        using namespace std::literals;
+        static constexpr std::wstring_view args[5]{
+            LR"(X:DriveRelative)"sv,
+            LR"(\\server\\\share)"sv,
+            LR"(STL/.github/workflows/../..)"sv,
+            LR"(C:\Program Files\Azure Data Studio\resources\app\extensions\bat\snippets\batchfile.code-snippets)"sv,
+            LR"(/\server/\share/\a/\b/\c/\./\./\d/\../\../\../\../\../\../\../\other/x/y/z/.././..\meow.txt)"sv,
+        };
+
+        const auto index = state.range(0);
+        const std::filesystem::path p(args[index]);
+        for (auto _ : state) {
+            benchmark::DoNotOptimize(p.lexically_normal());
+        }
+    }
+} // namespace
+
+BENCHMARK(BM_lexically_normal)->DenseRange(0, 4, 1);
+
+BENCHMARK_MAIN();
@@ -16,7 +16,6 @@ _EMIT_STL_WARNING(STL4038, "The contents of <filesystem> are available only with
 #include <chrono>
 #include <cwchar>
 #include <iomanip>
-#include <list>
 #include <locale>
 #include <memory>
 #include <system_error>
@@ -1204,80 +1203,91 @@ namespace filesystem {
             _STD replace(_Normalized.begin(), _Normalized.end(), L'/', L'\\');
 
             // "3. Replace each directory-separator with a preferred-separator.
-            // [ Note: The generic pathname grammar (29.11.7.1) defines directory-separator
+            // [ Note 4: The generic pathname grammar defines directory-separator
             // as one or more slashes and preferred-separators. -end note ]"
-            list<wstring_view> _Lst; // Empty wstring_view means directory-separator
-                                     // that will be normalized to a preferred-separator.
-                                     // Non-empty wstring_view means filename.
-            for (auto _Next = _Root_name_end; _Next != _Last;) {
-                if (_Is_slash(*_Next)) {
-                    if (_Lst.empty() || !_Lst.back().empty()) {
-                        // collapse one or more slashes and preferred-separators to one empty wstring_view
-                        _Lst.emplace_back();
-                    }
-
-                    ++_Next;
-                } else {
-                    const auto _Filename_end = _STD find_if(_Next + 1, _Last, _Is_slash);
-                    _Lst.emplace_back(_Next, static_cast<size_t>(_Filename_end - _Next));
-                    _Next = _Filename_end;
+            vector<wstring_view> _Vec; // Empty wstring_view means directory-separator
+                                       // that will be normalized to a preferred-separator.
+                                       // Non-empty wstring_view means filename.
+            _Vec.reserve(13); // avoid frequent re-allocations
+            bool _Has_root_directory = false; // true: there is a slash right after root-name.
+            auto _Ptr                = _Root_name_end;
+            if (_Ptr != _Last && _Is_slash(*_Ptr)) {
+                _Has_root_directory = true;
+                _Normalized += preferred_separator;
+                ++_Ptr;
+                while (_Ptr != _Last && _Is_slash(*_Ptr)) {
+                    ++_Ptr;
                 }
             }
-
-            // "4. Remove each dot filename and any immediately following directory-separator."
-            for (auto _Next = _Lst.begin(); _Next != _Lst.end();) {
-                if (*_Next == _Dot) {
-                    _Next = _Lst.erase(_Next); // erase dot filename
-
-                    if (_Next != _Lst.end()) {
-                        _Next = _Lst.erase(_Next); // erase immediately following directory-separator
+            // _Vec will start with a filename (if not empty).
+            while (_Ptr != _Last) {
+                if (_Is_slash(*_Ptr)) {
+                    if (_Vec.empty() || !_Vec.back().empty()) {
+                        // collapse one or more slashes and preferred-separators to one empty wstring_view
+                        _Vec.emplace_back();
                     }
+                    ++_Ptr;
                 } else {
-                    ++_Next;
+                    const auto _Filename_end = _STD find_if(_Ptr + 1, _Last, _Is_slash);
+                    _Vec.emplace_back(_Ptr, static_cast<size_t>(_Filename_end - _Ptr));
+                    _Ptr = _Filename_end;
                 }
             }
 
+            // "4. Remove each dot filename and any immediately following directory-separator."
             // "5. As long as any appear, remove a non-dot-dot filename immediately followed by a
             // directory-separator and a dot-dot filename, along with any immediately following directory-separator."
-            for (auto _Next = _Lst.begin(); _Next != _Lst.end();) {
-                auto _Prev = _Next;
-
-                ++_Next; // If we aren't going to erase, keep advancing.
-                         // If we're going to erase, _Next now points past the dot-dot filename.
-
-                if (*_Prev == _Dot_dot && _Prev != _Lst.begin() && --_Prev != _Lst.begin() && *--_Prev != _Dot_dot) {
-                    if (_Next != _Lst.end()) { // dot-dot filename has an immediately following directory-separator
-                        ++_Next;
-                    }
-
-                    _Lst.erase(_Prev, _Next); // _Next remains valid
-                }
-            }
-
             // "6. If there is a root-directory, remove all dot-dot filenames
             // and any directory-separators immediately following them.
-            // [ Note: These dot-dot filenames attempt to refer to nonexistent parent directories. -end note ]"
-            if (!_Lst.empty() && _Lst.front().empty()) { // we have a root-directory
-                for (auto _Next = _Lst.begin(); _Next != _Lst.end();) {
-                    if (*_Next == _Dot_dot) {
-                        _Next = _Lst.erase(_Next); // erase dot-dot filename
-
-                        if (_Next != _Lst.end()) {
-                            _Next = _Lst.erase(_Next); // erase immediately following directory-separator
+            // [ Note 5: These dot-dot filenames attempt to refer to nonexistent parent directories. -end note ]"
+            auto _New_end = _Vec.begin();
+            for (auto _Pos = _Vec.begin(); _Pos != _Vec.end();) {
+                const auto _Elem = *_Pos++; // _Pos points at a filename here; it points at end or a separator after ++.
+                if (_Elem == _Dot) {
+                    // ignore dot (and following separator).
+                    if (_Pos == _Vec.end()) {
+                        break;
+                    }
+                } else if (_Elem != _Dot_dot) {
+                    // append normal filename and separator.
+                    *_New_end++ = _Elem; // _New_end points at end or a separator after ++.
+                    if (_Pos == _Vec.end()) {
+                        break;
+                    }
+                    ++_New_end; // _New_end(<=_Pos) doesn't point at end; accept separator.
+                } else { // _Dot_dot
+                    if (_New_end != _Vec.begin() && _New_end[-2] != _Dot_dot) {
+                        // _New_end == _Vec.begin() + 2n here.
+                        // remove preceding non-dot-dot filename and separator.
+                        _New_end -= 2;
+                        if (_Pos == _Vec.end()) {
+                            break;
+                        }
+                    } else if (!_Has_root_directory) {
+                        // due to 6, append dot-dot and separator only if !_Has_root_directory.
+                        *_New_end++ = _Dot_dot;
+                        if (_Pos == _Vec.end()) {
+                            break;
                         }
+                        ++_New_end;
                     } else {
-                        ++_Next;
+                        // ignore dot-dot and separator.
+                        if (_Pos == _Vec.end()) {
+                            break;
+                        }
                     }
                 }
+                ++_Pos; // _Pos points at a separator here; it points at end or a filename after ++.
             }
+            _Vec.erase(_New_end, _Vec.end());
 
             // "7. If the last filename is dot-dot, remove any trailing directory-separator."
-            if (_Lst.size() >= 2 && _Lst.back().empty() && *(_STD prev(_Lst.end(), 2)) == _Dot_dot) {
-                _Lst.pop_back();
+            if (_Vec.size() >= 2 && _Vec.back().empty() && _Vec.end()[-2] == _Dot_dot) {
+                _Vec.pop_back();
             }
 
-            // Build up _Normalized by flattening _Lst.
-            for (const auto& _Elem : _Lst) {
+            // Build up _Normalized by flattening _Vec.
+            for (const auto& _Elem : _Vec) {
                 if (_Elem.empty()) {
                     _Normalized += preferred_separator;
                 } else {
@@ -1291,9 +1301,7 @@ namespace filesystem {
             }
 
             // "The result of normalization is a path in normal form, which is said to be normalized."
-            path _Result(_STD move(_Normalized));
-
-            return _Result;
+            return path(_STD move(_Normalized));
         }
 
         _NODISCARD inline path lexically_relative(const path& _Base) const;