From b67bf38f31576c22143f51a3b314883e97f39c29 Mon Sep 17 00:00:00 2001 From: Ben Noordhuis Date: Tue, 23 Jan 2018 01:17:21 +0100 Subject: [PATCH] src: fix fs.write() externalized string handling * Respect `encoding` argument when the string is externalized. * Copy the string when the write request can outlive the externalized string. This commit removes `StringBytes::GetExternalParts()` because it is fundamentally broken. Fixes: https://github.com/nodejs/node/issues/18146 Fixes: https://github.com/nodejs/node/issues/22728 Backport-PR-URL: https://github.com/nodejs/node/pull/22731 PR-URL: https://github.com/nodejs/node/pull/18216 Reviewed-By: James M Snell --- src/node_file.cc | 50 ++++++++++----- src/string_bytes.cc | 113 +++++++++------------------------ src/string_bytes.h | 6 -- test/parallel/test-fs-write.js | 44 +++++++++++++ 4 files changed, 106 insertions(+), 107 deletions(-) diff --git a/src/node_file.cc b/src/node_file.cc index 39cce2ea6bdd64..e67be4ef4b0b21 100644 --- a/src/node_file.cc +++ b/src/node_file.cc @@ -37,6 +37,7 @@ # include #endif +#include #include namespace node { @@ -1127,39 +1128,54 @@ static void WriteString(const FunctionCallbackInfo& args) { if (!args[0]->IsInt32()) return env->ThrowTypeError("First argument must be file descriptor"); + std::unique_ptr delete_on_return; Local req; - Local string = args[1]; + Local value = args[1]; int fd = args[0]->Int32Value(); char* buf = nullptr; - int64_t pos; size_t len; FSReqWrap::Ownership ownership = FSReqWrap::COPY; - // will assign buf and len if string was external - if (!StringBytes::GetExternalParts(string, - const_cast(&buf), - &len)) { - enum encoding enc = ParseEncoding(env->isolate(), args[3], UTF8); - len = StringBytes::StorageSize(env->isolate(), string, enc); + const int64_t pos = GET_OFFSET(args[2]); + const auto enc = ParseEncoding(env->isolate(), args[3], UTF8); + const auto is_async = args[4]->IsObject(); + + // Avoid copying the string when it is externalized but only when: + // 1. The target encoding is compatible with the string's encoding, and + // 2. The write is synchronous, otherwise the string might get neutered + // while the request is in flight, and + // 3. For UCS2, when the host system is little-endian. Big-endian systems + // need to call StringBytes::Write() to ensure proper byte swapping. + // The const_casts are conceptually sound: memory is read but not written. + if (!is_async && value->IsString()) { + auto string = value.As(); + if ((enc == ASCII || enc == LATIN1) && string->IsExternalOneByte()) { + auto ext = string->GetExternalOneByteStringResource(); + buf = const_cast(ext->data()); + len = ext->length(); + } else if (enc == UCS2 && IsLittleEndian() && string->IsExternal()) { + auto ext = string->GetExternalStringResource(); + buf = reinterpret_cast(const_cast(ext->data())); + len = ext->length() * sizeof(*ext->data()); + } + } + + if (buf == nullptr) { + len = StringBytes::StorageSize(env->isolate(), value, enc); buf = new char[len]; + // SYNC_CALL returns on error. Make sure to always free the memory. + if (!is_async) delete_on_return.reset(buf); // StorageSize may return too large a char, so correct the actual length // by the write size len = StringBytes::Write(env->isolate(), buf, len, args[1], enc); ownership = FSReqWrap::MOVE; } - pos = GET_OFFSET(args[2]); + req = args[4]; - uv_buf_t uvbuf = uv_buf_init(const_cast(buf), len); + uv_buf_t uvbuf = uv_buf_init(buf, len); if (!req->IsObject()) { - // SYNC_CALL returns on error. Make sure to always free the memory. - struct Delete { - inline explicit Delete(char* pointer) : pointer_(pointer) {} - inline ~Delete() { delete[] pointer_; } - char* const pointer_; - }; - Delete delete_on_return(ownership == FSReqWrap::MOVE ? buf : nullptr); SYNC_CALL(write, nullptr, fd, &uvbuf, 1, pos) return args.GetReturnValue().Set(SYNC_RESULT); } diff --git a/src/string_bytes.cc b/src/string_bytes.cc index 258bc72cf62a43..2e41c1478c92fc 100644 --- a/src/string_bytes.cc +++ b/src/string_bytes.cc @@ -27,6 +27,8 @@ #include #include // memcpy + +#include #include // When creating strings >= this length v8's gc spins up and consumes @@ -269,39 +271,6 @@ static size_t hex_decode(char* buf, } -bool StringBytes::GetExternalParts(Local val, - const char** data, - size_t* len) { - if (Buffer::HasInstance(val)) { - *data = Buffer::Data(val); - *len = Buffer::Length(val); - return true; - } - - if (!val->IsString()) - return false; - - Local str = val.As(); - - if (str->IsExternalOneByte()) { - const String::ExternalOneByteStringResource* ext; - ext = str->GetExternalOneByteStringResource(); - *data = ext->data(); - *len = ext->length(); - return true; - - } else if (str->IsExternal()) { - const String::ExternalStringResource* ext; - ext = str->GetExternalStringResource(); - *data = reinterpret_cast(ext->data()); - *len = ext->length() * sizeof(*ext->data()); - return true; - } - - return false; -} - - size_t StringBytes::WriteUCS2(char* buf, size_t buflen, Local str, @@ -351,17 +320,15 @@ size_t StringBytes::Write(Isolate* isolate, enum encoding encoding, int* chars_written) { HandleScope scope(isolate); - const char* data = nullptr; - size_t nbytes = 0; - const bool is_extern = GetExternalParts(val, &data, &nbytes); - const size_t external_nbytes = nbytes; + size_t nbytes; + int nchars; + + if (chars_written == nullptr) + chars_written = &nchars; CHECK(val->IsString() == true); Local str = val.As(); - if (nbytes > buflen) - nbytes = buflen; - int flags = String::HINT_MANY_WRITES_EXPECTED | String::NO_NULL_TERMINATION | String::REPLACE_INVALID_UTF8; @@ -369,14 +336,15 @@ size_t StringBytes::Write(Isolate* isolate, switch (encoding) { case ASCII: case LATIN1: - if (is_extern && str->IsOneByte()) { - memcpy(buf, data, nbytes); + if (str->IsExternalOneByte()) { + auto ext = str->GetExternalOneByteStringResource(); + nbytes = std::min(buflen, ext->length()); + memcpy(buf, ext->data(), nbytes); } else { uint8_t* const dst = reinterpret_cast(buf); nbytes = str->WriteOneByte(dst, 0, buflen, flags); } - if (chars_written != nullptr) - *chars_written = nbytes; + *chars_written = nbytes; break; case BUFFER: @@ -387,14 +355,8 @@ size_t StringBytes::Write(Isolate* isolate, case UCS2: { size_t nchars; - if (is_extern && !str->IsOneByte()) { - memcpy(buf, data, nbytes); - nchars = nbytes / sizeof(uint16_t); - } else { - nbytes = WriteUCS2(buf, buflen, str, flags, &nchars); - } - if (chars_written != nullptr) - *chars_written = nchars; + nbytes = WriteUCS2(buf, buflen, str, flags, &nchars); + *chars_written = static_cast(nchars); // Node's "ucs2" encoding wants LE character data stored in // the Buffer, so we need to reorder on BE platforms. See @@ -407,27 +369,25 @@ size_t StringBytes::Write(Isolate* isolate, } case BASE64: - if (is_extern) { - nbytes = base64_decode(buf, buflen, data, external_nbytes); + if (str->IsExternalOneByte()) { + auto ext = str->GetExternalOneByteStringResource(); + nbytes = base64_decode(buf, buflen, ext->data(), ext->length()); } else { String::Value value(str); nbytes = base64_decode(buf, buflen, *value, value.length()); } - if (chars_written != nullptr) { - *chars_written = nbytes; - } + *chars_written = nbytes; break; case HEX: - if (is_extern) { - nbytes = hex_decode(buf, buflen, data, external_nbytes); + if (str->IsExternalOneByte()) { + auto ext = str->GetExternalOneByteStringResource(); + nbytes = hex_decode(buf, buflen, ext->data(), ext->length()); } else { String::Value value(str); nbytes = hex_decode(buf, buflen, *value, value.length()); } - if (chars_written != nullptr) { - *chars_written = nbytes; - } + *chars_written = nbytes; break; default: @@ -504,49 +464,34 @@ size_t StringBytes::Size(Isolate* isolate, Local val, enum encoding encoding) { HandleScope scope(isolate); - size_t data_size = 0; - bool is_buffer = Buffer::HasInstance(val); - if (is_buffer && (encoding == BUFFER || encoding == LATIN1)) + if (Buffer::HasInstance(val) && (encoding == BUFFER || encoding == LATIN1)) return Buffer::Length(val); - const char* data; - if (GetExternalParts(val, &data, &data_size)) - return data_size; - Local str = val->ToString(isolate); switch (encoding) { case ASCII: case LATIN1: - data_size = str->Length(); - break; + return str->Length(); case BUFFER: case UTF8: - data_size = str->Utf8Length(); - break; + return str->Utf8Length(); case UCS2: - data_size = str->Length() * sizeof(uint16_t); - break; + return str->Length() * sizeof(uint16_t); case BASE64: { String::Value value(str); - data_size = base64_decoded_size(*value, value.length()); - break; + return base64_decoded_size(*value, value.length()); } case HEX: - data_size = str->Length() / 2; - break; - - default: - CHECK(0 && "unknown encoding"); - break; + return str->Length() / 2; } - return data_size; + UNREACHABLE(); } diff --git a/src/string_bytes.h b/src/string_bytes.h index 17bbd80c0ab1c2..7a70f06f63dec4 100644 --- a/src/string_bytes.h +++ b/src/string_bytes.h @@ -81,12 +81,6 @@ class StringBytes { v8::Local val, enum encoding enc); - // If the string is external then assign external properties to data and len, - // then return true. If not return false. - static bool GetExternalParts(v8::Local val, - const char** data, - size_t* len); - // Write the bytes from the string or buffer into the char* // returns the number of bytes written, which will always be // <= buflen. Use StorageSize/Size first to know how much diff --git a/test/parallel/test-fs-write.js b/test/parallel/test-fs-write.js index 5dd9ca0e902a1e..f50f1eb314fdc8 100644 --- a/test/parallel/test-fs-write.js +++ b/test/parallel/test-fs-write.js @@ -19,6 +19,7 @@ // OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE // USE OR OTHER DEALINGS IN THE SOFTWARE. +// Flags: --expose_externalize_string 'use strict'; const common = require('../common'); const assert = require('assert'); @@ -34,6 +35,49 @@ const fn3 = path.join(tmpdir.path, 'write3.txt'); const expected = 'ümlaut.'; const constants = fs.constants; +/* eslint-disable no-undef */ +common.allowGlobals(externalizeString, isOneByteString, x); + +{ + const expected = 'ümlaut eins'; // Must be a unique string. + externalizeString(expected); + assert.strictEqual(true, isOneByteString(expected)); + const fd = fs.openSync(fn, 'w'); + fs.writeSync(fd, expected, 0, 'latin1'); + fs.closeSync(fd); + assert.strictEqual(expected, fs.readFileSync(fn, 'latin1')); +} + +{ + const expected = 'ümlaut zwei'; // Must be a unique string. + externalizeString(expected); + assert.strictEqual(true, isOneByteString(expected)); + const fd = fs.openSync(fn, 'w'); + fs.writeSync(fd, expected, 0, 'utf8'); + fs.closeSync(fd); + assert.strictEqual(expected, fs.readFileSync(fn, 'utf8')); +} + +{ + const expected = '中文 1'; // Must be a unique string. + externalizeString(expected); + assert.strictEqual(false, isOneByteString(expected)); + const fd = fs.openSync(fn, 'w'); + fs.writeSync(fd, expected, 0, 'ucs2'); + fs.closeSync(fd); + assert.strictEqual(expected, fs.readFileSync(fn, 'ucs2')); +} + +{ + const expected = '中文 2'; // Must be a unique string. + externalizeString(expected); + assert.strictEqual(false, isOneByteString(expected)); + const fd = fs.openSync(fn, 'w'); + fs.writeSync(fd, expected, 0, 'utf8'); + fs.closeSync(fd); + assert.strictEqual(expected, fs.readFileSync(fn, 'utf8')); +} + fs.open(fn, 'w', 0o644, common.mustCall(function(err, fd) { assert.ifError(err);