Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

enhancements for <filesystem> #3850

Merged
merged 22 commits into from
Jul 14, 2023
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions benchmarks/src/path_lexically_normal.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
#include <benchmark/benchmark.h>
#include <filesystem>

using namespace std;
namespace fs = std::filesystem;

namespace {
void BM_lexically_normal_0(benchmark::State& state) {
const fs::path& p(LR"(X:DriveRelative)"sv);
for (auto _ : state) {
benchmark::DoNotOptimize(p.lexically_normal());
}
}
void BM_lexically_normal_1(benchmark::State& state) {
const fs::path& p(LR"(\\server\\\share)"sv);
for (auto _ : state) {
benchmark::DoNotOptimize(p.lexically_normal());
}
}
void BM_lexically_normal_2(benchmark::State& state) {
const fs::path& p(LR"(STL/.github/workflows/../..)"sv);
for (auto _ : state) {
benchmark::DoNotOptimize(p.lexically_normal());
}
}
void BM_lexically_normal_3(benchmark::State& state) {
const fs::path& p(
LR"(C:\Program Files\Azure Data Studio\resources\app\extensions\bat\snippets\batchfile.code-snippets)"sv);
for (auto _ : state) {
benchmark::DoNotOptimize(p.lexically_normal());
}
}
void BM_lexically_normal_4(benchmark::State& state) {
const fs::path& p(
LR"(/\server/\share/\a/\b/\c/\./\./\d/\../\../\../\../\../\../\../\other/x/y/z/.././..\meow.txt)"sv);
for (auto _ : state) {
benchmark::DoNotOptimize(p.lexically_normal());
}
}
} // namespace

BENCHMARK(BM_lexically_normal_0);
BENCHMARK(BM_lexically_normal_1);
BENCHMARK(BM_lexically_normal_2);
BENCHMARK(BM_lexically_normal_3);
BENCHMARK(BM_lexically_normal_4);

BENCHMARK_MAIN();
118 changes: 63 additions & 55 deletions stl/inc/filesystem
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ _EMIT_STL_WARNING(STL4038, "The contents of <filesystem> are available only with
#include <chrono>
#include <cwchar>
#include <iomanip>
#include <list>
#include <locale>
#include <memory>
#include <system_error>
Expand Down Expand Up @@ -1206,78 +1205,89 @@ namespace filesystem {
// "3. Replace each directory-separator with a preferred-separator.
// [ Note: The generic pathname grammar (29.11.7.1) defines directory-separator
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
// as one or more slashes and preferred-separators. -end note ]"
list<wstring_view> _Lst; // Empty wstring_view means directory-separator
// that will be normalized to a preferred-separator.
// Non-empty wstring_view means filename.
for (auto _Next = _Root_name_end; _Next != _Last;) {
if (_Is_slash(*_Next)) {
if (_Lst.empty() || !_Lst.back().empty()) {
// collapse one or more slashes and preferred-separators to one empty wstring_view
_Lst.emplace_back();
}

++_Next;
} else {
const auto _Filename_end = _STD find_if(_Next + 1, _Last, _Is_slash);
_Lst.emplace_back(_Next, static_cast<size_t>(_Filename_end - _Next));
_Next = _Filename_end;
vector<wstring_view> _Vec; // Empty wstring_view means directory-separator
// that will be normalized to a preferred-separator.
// Non-empty wstring_view means filename.
_Vec.reserve(13); // avoid frequent re-allocations
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
bool _Has_root_directory = false; // true: there is a slash right after root name.
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
auto _Ptr = _Root_name_end;
if (_Ptr != _Last && _Is_slash(*_Ptr)) {
_Has_root_directory = true;
_Normalized += preferred_separator;
++_Ptr;
while (_Ptr != _Last && _Is_slash(*_Ptr)) {
++_Ptr;
}
}

// "4. Remove each dot filename and any immediately following directory-separator."
for (auto _Next = _Lst.begin(); _Next != _Lst.end();) {
if (*_Next == _Dot) {
_Next = _Lst.erase(_Next); // erase dot filename

if (_Next != _Lst.end()) {
_Next = _Lst.erase(_Next); // erase immediately following directory-separator
// _Vec will start with a filename (if not empty).
while (_Ptr != _Last) {
if (_Is_slash(*_Ptr)) {
if (_Vec.empty() || !_Vec.back().empty()) {
// collapse one or more slashes and preferred-separators to one empty wstring_view
_Vec.emplace_back();
}
++_Ptr;
} else {
++_Next;
const auto _Filename_end = _STD find_if(_Ptr + 1, _Last, _Is_slash);
_Vec.emplace_back(_Ptr, static_cast<size_t>(_Filename_end - _Ptr));
_Ptr = _Filename_end;
}
}

// "4. Remove each dot filename and any immediately following directory-separator."
// "5. As long as any appear, remove a non-dot-dot filename immediately followed by a
// directory-separator and a dot-dot filename, along with any immediately following directory-separator."
for (auto _Next = _Lst.begin(); _Next != _Lst.end();) {
auto _Prev = _Next;

++_Next; // If we aren't going to erase, keep advancing.
// If we're going to erase, _Next now points past the dot-dot filename.

if (*_Prev == _Dot_dot && _Prev != _Lst.begin() && --_Prev != _Lst.begin() && *--_Prev != _Dot_dot) {
if (_Next != _Lst.end()) { // dot-dot filename has an immediately following directory-separator
++_Next;
}

_Lst.erase(_Prev, _Next); // _Next remains valid
}
}

// "6. If there is a root-directory, remove all dot-dot filenames
// and any directory-separators immediately following them.
// [ Note: These dot-dot filenames attempt to refer to nonexistent parent directories. -end note ]"
if (!_Lst.empty() && _Lst.front().empty()) { // we have a root-directory
for (auto _Next = _Lst.begin(); _Next != _Lst.end();) {
if (*_Next == _Dot_dot) {
_Next = _Lst.erase(_Next); // erase dot-dot filename

if (_Next != _Lst.end()) {
_Next = _Lst.erase(_Next); // erase immediately following directory-separator
auto _New_end = _Vec.begin();
for (auto _Pos = _Vec.begin(); _Pos != _Vec.end();) {
auto _Elem = *_Pos++; // _Pos points at end or a separator after ++.
if (_Elem == _Dot) {
// ignore dot (and following separator).
if (_Pos == _Vec.end()) {
break;
}
} else if (_Elem == _Dot_dot) {
if (_New_end != _Vec.begin() && *prev(_New_end, 2) != _Dot_dot) {
achabense marked this conversation as resolved.
Show resolved Hide resolved
// note: _New_end == _Vec.begin() + 2n
// remove preceding non-dot-dot filename and separator.
_New_end -= 2;
if (_Pos == _Vec.end()) {
break;
}
} else if (!_Has_root_directory) {
// due to 6, append dot-dot only if !_Has_root_directory.
*_New_end++ = _Dot_dot;
if (_Pos == _Vec.end()) {
break;
}
*_New_end++ = {}; // as _Pos != _Vec.end(), it points at a separator; add it.
} else {
++_Next;
// ignore dot-dot (and following separator).
if (_Pos == _Vec.end()) {
break;
}
}
} else {
// append normal filename and separator.
*_New_end++ = _Elem;
if (_Pos == _Vec.end()) {
break;
}
*_New_end++ = {}; // add separator.
}
++_Pos; // _Pos points at a separator here in all cases; skip it.
}
_Vec.erase(_New_end, _Vec.end());

// "7. If the last filename is dot-dot, remove any trailing directory-separator."
if (_Lst.size() >= 2 && _Lst.back().empty() && *(_STD prev(_Lst.end(), 2)) == _Dot_dot) {
_Lst.pop_back();
if (_Vec.size() >= 2 && _Vec.back().empty() && *(_STD prev(_Vec.end(), 2)) == _Dot_dot) {
achabense marked this conversation as resolved.
Show resolved Hide resolved
_Vec.pop_back();
}

// Build up _Normalized by flattening _Lst.
for (const auto& _Elem : _Lst) {
// Build up _Normalized by flattening _Vec.
for (const auto& _Elem : _Vec) {
if (_Elem.empty()) {
_Normalized += preferred_separator;
} else {
Expand All @@ -1291,9 +1301,7 @@ namespace filesystem {
}

// "The result of normalization is a path in normal form, which is said to be normalized."
path _Result(_STD move(_Normalized));

return _Result;
return path(_STD move(_Normalized));
}

_NODISCARD inline path lexically_relative(const path& _Base) const;
Expand Down