Skip to content

Commit

Permalink
enhancements for <filesystem> (#3850)
Browse files Browse the repository at this point in the history
Co-authored-by: A. Jiang <de34@live.cn>
Co-authored-by: Stephan T. Lavavej <stl@nuwen.net>
  • Loading branch information
3 people committed Jul 14, 2023
1 parent 85eb9b6 commit f6401b1
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 57 deletions.
1 change: 1 addition & 0 deletions benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -106,5 +106,6 @@ function(add_benchmark name)
endfunction()

add_benchmark(locale_classic src/locale_classic.cpp)
add_benchmark(path_lexically_normal src/path_lexically_normal.cpp)
add_benchmark(random_integer_generation src/random_integer_generation.cpp)
add_benchmark(std_copy src/std_copy.cpp)
29 changes: 29 additions & 0 deletions benchmarks/src/path_lexically_normal.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include <benchmark/benchmark.h>
#include <filesystem>
#include <string_view>

namespace {
void BM_lexically_normal(benchmark::State& state) {
using namespace std::literals;
static constexpr std::wstring_view args[5]{
LR"(X:DriveRelative)"sv,
LR"(\\server\\\share)"sv,
LR"(STL/.github/workflows/../..)"sv,
LR"(C:\Program Files\Azure Data Studio\resources\app\extensions\bat\snippets\batchfile.code-snippets)"sv,
LR"(/\server/\share/\a/\b/\c/\./\./\d/\../\../\../\../\../\../\../\other/x/y/z/.././..\meow.txt)"sv,
};

const auto index = state.range(0);
const std::filesystem::path p(args[index]);
for (auto _ : state) {
benchmark::DoNotOptimize(p.lexically_normal());
}
}
} // namespace

BENCHMARK(BM_lexically_normal)->DenseRange(0, 4, 1);

BENCHMARK_MAIN();
122 changes: 65 additions & 57 deletions stl/inc/filesystem
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ _EMIT_STL_WARNING(STL4038, "The contents of <filesystem> are available only with
#include <chrono>
#include <cwchar>
#include <iomanip>
#include <list>
#include <locale>
#include <memory>
#include <system_error>
Expand Down Expand Up @@ -1204,80 +1203,91 @@ namespace filesystem {
_STD replace(_Normalized.begin(), _Normalized.end(), L'/', L'\\');

// "3. Replace each directory-separator with a preferred-separator.
// [ Note: The generic pathname grammar (29.11.7.1) defines directory-separator
// [ Note 4: The generic pathname grammar defines directory-separator
// as one or more slashes and preferred-separators. -end note ]"
list<wstring_view> _Lst; // Empty wstring_view means directory-separator
// that will be normalized to a preferred-separator.
// Non-empty wstring_view means filename.
for (auto _Next = _Root_name_end; _Next != _Last;) {
if (_Is_slash(*_Next)) {
if (_Lst.empty() || !_Lst.back().empty()) {
// collapse one or more slashes and preferred-separators to one empty wstring_view
_Lst.emplace_back();
}

++_Next;
} else {
const auto _Filename_end = _STD find_if(_Next + 1, _Last, _Is_slash);
_Lst.emplace_back(_Next, static_cast<size_t>(_Filename_end - _Next));
_Next = _Filename_end;
vector<wstring_view> _Vec; // Empty wstring_view means directory-separator
// that will be normalized to a preferred-separator.
// Non-empty wstring_view means filename.
_Vec.reserve(13); // avoid frequent re-allocations
bool _Has_root_directory = false; // true: there is a slash right after root-name.
auto _Ptr = _Root_name_end;
if (_Ptr != _Last && _Is_slash(*_Ptr)) {
_Has_root_directory = true;
_Normalized += preferred_separator;
++_Ptr;
while (_Ptr != _Last && _Is_slash(*_Ptr)) {
++_Ptr;
}
}

// "4. Remove each dot filename and any immediately following directory-separator."
for (auto _Next = _Lst.begin(); _Next != _Lst.end();) {
if (*_Next == _Dot) {
_Next = _Lst.erase(_Next); // erase dot filename

if (_Next != _Lst.end()) {
_Next = _Lst.erase(_Next); // erase immediately following directory-separator
// _Vec will start with a filename (if not empty).
while (_Ptr != _Last) {
if (_Is_slash(*_Ptr)) {
if (_Vec.empty() || !_Vec.back().empty()) {
// collapse one or more slashes and preferred-separators to one empty wstring_view
_Vec.emplace_back();
}
++_Ptr;
} else {
++_Next;
const auto _Filename_end = _STD find_if(_Ptr + 1, _Last, _Is_slash);
_Vec.emplace_back(_Ptr, static_cast<size_t>(_Filename_end - _Ptr));
_Ptr = _Filename_end;
}
}

// "4. Remove each dot filename and any immediately following directory-separator."
// "5. As long as any appear, remove a non-dot-dot filename immediately followed by a
// directory-separator and a dot-dot filename, along with any immediately following directory-separator."
for (auto _Next = _Lst.begin(); _Next != _Lst.end();) {
auto _Prev = _Next;

++_Next; // If we aren't going to erase, keep advancing.
// If we're going to erase, _Next now points past the dot-dot filename.

if (*_Prev == _Dot_dot && _Prev != _Lst.begin() && --_Prev != _Lst.begin() && *--_Prev != _Dot_dot) {
if (_Next != _Lst.end()) { // dot-dot filename has an immediately following directory-separator
++_Next;
}

_Lst.erase(_Prev, _Next); // _Next remains valid
}
}

// "6. If there is a root-directory, remove all dot-dot filenames
// and any directory-separators immediately following them.
// [ Note: These dot-dot filenames attempt to refer to nonexistent parent directories. -end note ]"
if (!_Lst.empty() && _Lst.front().empty()) { // we have a root-directory
for (auto _Next = _Lst.begin(); _Next != _Lst.end();) {
if (*_Next == _Dot_dot) {
_Next = _Lst.erase(_Next); // erase dot-dot filename

if (_Next != _Lst.end()) {
_Next = _Lst.erase(_Next); // erase immediately following directory-separator
// [ Note 5: These dot-dot filenames attempt to refer to nonexistent parent directories. -end note ]"
auto _New_end = _Vec.begin();
for (auto _Pos = _Vec.begin(); _Pos != _Vec.end();) {
const auto _Elem = *_Pos++; // _Pos points at a filename here; it points at end or a separator after ++.
if (_Elem == _Dot) {
// ignore dot (and following separator).
if (_Pos == _Vec.end()) {
break;
}
} else if (_Elem != _Dot_dot) {
// append normal filename and separator.
*_New_end++ = _Elem; // _New_end points at end or a separator after ++.
if (_Pos == _Vec.end()) {
break;
}
++_New_end; // _New_end(<=_Pos) doesn't point at end; accept separator.
} else { // _Dot_dot
if (_New_end != _Vec.begin() && _New_end[-2] != _Dot_dot) {
// _New_end == _Vec.begin() + 2n here.
// remove preceding non-dot-dot filename and separator.
_New_end -= 2;
if (_Pos == _Vec.end()) {
break;
}
} else if (!_Has_root_directory) {
// due to 6, append dot-dot and separator only if !_Has_root_directory.
*_New_end++ = _Dot_dot;
if (_Pos == _Vec.end()) {
break;
}
++_New_end;
} else {
++_Next;
// ignore dot-dot and separator.
if (_Pos == _Vec.end()) {
break;
}
}
}
++_Pos; // _Pos points at a separator here; it points at end or a filename after ++.
}
_Vec.erase(_New_end, _Vec.end());

// "7. If the last filename is dot-dot, remove any trailing directory-separator."
if (_Lst.size() >= 2 && _Lst.back().empty() && *(_STD prev(_Lst.end(), 2)) == _Dot_dot) {
_Lst.pop_back();
if (_Vec.size() >= 2 && _Vec.back().empty() && _Vec.end()[-2] == _Dot_dot) {
_Vec.pop_back();
}

// Build up _Normalized by flattening _Lst.
for (const auto& _Elem : _Lst) {
// Build up _Normalized by flattening _Vec.
for (const auto& _Elem : _Vec) {
if (_Elem.empty()) {
_Normalized += preferred_separator;
} else {
Expand All @@ -1291,9 +1301,7 @@ namespace filesystem {
}

// "The result of normalization is a path in normal form, which is said to be normalized."
path _Result(_STD move(_Normalized));

return _Result;
return path(_STD move(_Normalized));
}

_NODISCARD inline path lexically_relative(const path& _Base) const;
Expand Down

0 comments on commit f6401b1

Please sign in to comment.