Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert "Rework SubprocessImpl for extern-worker" #9241

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .github/workflows/nix.yml
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,9 @@ jobs:
- hhvm_clang
os:
- ${{ github.event_name == 'pull_request' && '16-core' || 'ubuntu-latest' }}
flags:
- ''
- --repo
runs-on: ${{matrix.os}}
steps:
- uses: actions/checkout@v3
Expand Down Expand Up @@ -143,7 +146,7 @@ jobs:
- run: sudo apt-get install ./${{matrix.package}}.deb
- run: echo "HHVM_BIN=$(command -v hhvm)" >> "$GITHUB_ENV"
- run: |
"$HHVM_BIN" hphp/test/run.php -x hphp/test/github_excluded_tests all
"$HHVM_BIN" hphp/test/run.php ${{matrix.flags}} -x hphp/test/github_excluded_tests all
upload-deb:
if: github.event_name == 'push' && github.ref_type == 'tag'
runs-on: ubuntu-latest
Expand Down
2 changes: 0 additions & 2 deletions hphp/hhbbc/index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5650,8 +5650,6 @@ materialize_inputs(const IndexData& index,
auto program = std::make_unique<php::Program>();

auto const loadAndParse = [&] (Chunk chunk) -> coro::Task<void> {
HPHP_CORO_RESCHEDULE_ON_CURRENT_EXECUTOR;

auto [classes, funcs, units] = HPHP_CORO_AWAIT(coro::collect(
client->load(std::move(chunk.classes)),
client->load(std::move(chunk.funcs)),
Expand Down
2 changes: 1 addition & 1 deletion hphp/util/blob-encoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ struct BlobEncoder {
void writeRaw(const char* ptr, size_t size) {
auto const start = m_blob.size();
m_blob.resize(start + size);
std::copy(ptr, ptr + size, m_blob.data() + start);
std::copy(ptr, ptr + size, &m_blob[start]);
}

/*
Expand Down
41 changes: 7 additions & 34 deletions hphp/util/extern-worker-detail.h
Original file line number Diff line number Diff line change
Expand Up @@ -315,32 +315,6 @@ auto typesToValues(F&& f) {

//////////////////////////////////////////////////////////////////////

// Abstracts away how a worker should obtain it's inputs, and write
// it's outputs. These functions are tightly coupled with the logic in
// JobBase::serialize and JobBase::deserialize.
struct ISource {
virtual ~ISource() = default;
virtual std::string blob() = 0;
virtual Optional<std::string> optBlob() = 0;
virtual std::vector<std::string> variadic() = 0;
virtual void initDone() = 0;
virtual bool inputEnd() const = 0;
virtual void nextInput() = 0;
virtual void finish() = 0;
};

struct ISink {
virtual ~ISink() = default;
virtual void blob(const std::string&) = 0;
virtual void optBlob(const Optional<std::string>&) = 0;
virtual void variadic(const std::vector<std::string>&) = 0;
virtual void nextOutput() = 0;
virtual void startFini() = 0;
virtual void finish() = 0;
};

//////////////////////////////////////////////////////////////////////

// Base class for Jobs. This provide a consistent interface to invoke
// through.
struct JobBase {
Expand All @@ -349,16 +323,15 @@ struct JobBase {
explicit JobBase(const std::string& name);
virtual ~JobBase() = default;

template <typename T> static T deserialize(ISource&);
template <typename T> static void serialize(const T&, ISink&);
template <typename T> static T deserialize(const std::filesystem::path&);
template <typename T> static void serialize(const T&,
size_t,
const std::filesystem::path&);

private:
template <typename T> static T deserializeBlob(std::string);
template <typename T> static std::string serializeBlob(const T&);

virtual void init(ISource&) const = 0;
virtual void fini(ISink&) const = 0;
virtual void run(ISource&, ISink&) const = 0;
virtual void init(const std::filesystem::path&) const = 0;
virtual void fini(const std::filesystem::path&) const = 0;
virtual void run(const std::filesystem::path&, const std::filesystem::path&) const = 0;

std::string m_name;

Expand Down
174 changes: 75 additions & 99 deletions hphp/util/extern-worker-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,63 +43,63 @@ Job<C>::Job() : detail::JobBase{C::name()} {}
// expects, then invoke the function with those types.

template <typename C>
void Job<C>::init(detail::ISource& source) const {
void Job<C>::init(const std::filesystem::path& root) const {
using namespace detail;
using Args = typename Params<decltype(C::init)>::type;

// For each expected input, load it, and deserialize it into
// For each expected input, load the file, and deserialize it into
// the appropriate type. Return the types as a tuple, which can then
// std::apply to C::init, passing the inputs.
size_t DEBUG_ONLY nextIdx = 0;
std::apply(
C::init,
typesToValues<Args>(
[&] (size_t idx, auto tag) {
assertx(idx == nextIdx++);
return deserialize<typename decltype(tag)::Type>(source);
return deserialize<typename decltype(tag)::Type>(
root / folly::to<std::string>(idx)
);
}
)
);
source.initDone();

using Ret = typename Return<decltype(C::init)>::type;
static_assert(std::is_void_v<Ret>, "init() must return void");
}

template <typename C>
void Job<C>::fini(detail::ISink& sink) const {
void Job<C>::fini(const std::filesystem::path& outputRoot) const {
using namespace detail;

sink.startFini();
using Ret = typename Return<decltype(C::fini)>::type;
if constexpr (std::is_void_v<Ret>) {
C::fini();
} else {
auto const output = outputRoot / "fini";
std::filesystem::create_directory(output, outputRoot);
auto const v = C::fini();
time("writing fini outputs", [&] { return serialize(v, sink); });
time("writing fini outputs", [&] { return serialize(v, 0, output); });
}
}

template <typename C>
void Job<C>::run(detail::ISource& source, detail::ISink& sink) const {
void Job<C>::run(const std::filesystem::path& inputRoot,
const std::filesystem::path& outputRoot) const {
using namespace detail;

// For each expected input, load it, and deserialize it into the
// appropriate type, turning all of the types into a tuple.
// For each expected input, load the file, and deserialize it into
// the appropriate type, turning all of the types into a tuple.
using Args = typename Params<decltype(C::run)>::type;
size_t DEBUG_ONLY nextIdx = 0;
auto inputs = time(
"loading inputs",
[&] {
return typesToValues<Args>(
[&] (size_t idx, auto tag) {
assertx(idx == nextIdx++);
return deserialize<typename decltype(tag)::Type>(source);
return deserialize<typename decltype(tag)::Type>(
inputRoot / folly::to<std::string>(idx)
);
}
);
}
);
source.nextInput();

// Apply the tuple to C::run, passing the types as parameters.
auto outputs = time(
Expand All @@ -110,9 +110,8 @@ void Job<C>::run(detail::ISource& source, detail::ISink& sink) const {
using Ret = typename Return<decltype(C::run)>::type;
static_assert(!std::is_void_v<Ret>, "run() must return something");

// Serialize the outputs
time("writing outputs", [&] { return serialize(outputs, sink); });
sink.nextOutput();
// Serialize the outputs into the output directory.
time("writing outputs", [&] { return serialize(outputs, 0, outputRoot); });
}

//////////////////////////////////////////////////////////////////////
Expand All @@ -121,99 +120,81 @@ namespace detail {

//////////////////////////////////////////////////////////////////////

// Turn a blob into a specific (non-marker) type
// Given a file path, load the contents of the file, deserialize them
// into the type T, and return it.
template <typename T>
T JobBase::deserializeBlob(std::string blob) {
T JobBase::deserialize(const std::filesystem::path& path) {
using namespace detail;
static_assert(!IsMarker<T>::value, "Special markers cannot be nested");
if constexpr (std::is_same<T, std::string>::value) {
// A std::string is always stored as itself (this lets us store
// files directly as their contents without having to encode
// them).
return blob;
} else {
// For most types, the data is encoded using BlobEncoder, so undo
// that.
BlobDecoder decoder{blob.data(), blob.size()};
return decoder.makeWhole<T>();
}
}

// Deserialize the given input source into the type T and return
// it. The type might include markers, which might trigger
// sub-deserializations.
template <typename T>
T JobBase::deserialize(ISource& source) {
using namespace detail;
static_assert(!IsMulti<T>::value, "Multi can only be used as return type");

if constexpr (IsVariadic<T>::value) {
// files as their contents without having to encode them).
return readFile(path);
} else if constexpr (IsVariadic<T>::value) {
static_assert(!IsMarker<typename T::Type>::value,
"Special markers cannot be nested");
auto const blobs = source.variadic();
// Variadic<T> is actually a directory, not a file. Recurse into
// it, and do the deserialization for every file within it.
T out;
out.vals.reserve(blobs.size());
for (auto const& blob : blobs) {
out.vals.emplace_back(deserializeBlob<typename T::Type>(blob));
for (size_t i = 0;; ++i) {
auto const valPath = path / folly::to<std::string>(i);
// A break in the numbering means the end of the vector.
if (!std::filesystem::exists(valPath)) break;
out.vals.emplace_back(deserialize<typename T::Type>(valPath));
}
return out;
} else if constexpr (IsOpt<T>::value) {
static_assert(!IsMarker<typename T::Type>::value,
"Special markers cannot be nested");
// Opt<T> is like T, except the data may not exist (so is nullopt
// Opt<T> is like T, except the file may not exist (so is nullopt
// otherwise).
T out;
if (auto const blob = source.optBlob()) {
out.val.emplace(deserializeBlob<typename T::Type>(*blob));
if (std::filesystem::exists(path)) {
out.val.emplace(deserialize<typename T::Type>(path));
}
return out;
} else {
return deserializeBlob<T>(source.blob());
// For most types, the data is encoded using BlobEncoder, so undo
// that.
static_assert(!IsMulti<T>::value, "Multi can only be used as return type");
auto const data = readFile(path);
BlobDecoder decoder{data.data(), data.size()};
return decoder.makeWhole<T>();
}
}

// Serialize the given (non-marker) value into a blob
// Given a value, an index of that value (its positive in the output
// values), and an output root, serialize the value, and write its
// contents to the appropriate file.
template <typename T>
std::string JobBase::serializeBlob(const T& v) {
void JobBase::serialize(const T& v,
size_t idx,
const std::filesystem::path& root) {
using namespace detail;
static_assert(!IsMarker<T>::value,
"Special markers cannot be nested");
if constexpr (std::is_same<T, std::string>::value) {
// std::string always encodes to itself
return v;
} else {
BlobEncoder encoder;
encoder(v);
return std::string{(const char*)encoder.data(), encoder.size()};
}
}

// Serialize the given value into a blob and write it out to the given
// output sink. The value's type might be a marker, which can trigger
// sub-serializations.
template <typename T>
void JobBase::serialize(const T& v, ISink& sink) {
using namespace detail;
if constexpr (IsVariadic<T>::value) {
// std::string isn't serialized, but written as itself as
// root/idx.
return writeFile(root / folly::to<std::string>(idx), v.data(), v.size());
} else if constexpr (IsVariadic<T>::value) {
// For Variadic<T>, we create a directory root/idx, and under it,
// write a file for every element in the vector.
static_assert(!IsMarker<typename T::Type>::value,
"Special markers cannot be nested");
using namespace folly::gen;
auto const blobs = from(v.vals)
| map([&] (const typename T::Type& t) { return serializeBlob(t); })
| as<std::vector>();
sink.variadic(blobs);
auto const path = root / folly::to<std::string>(idx);
std::filesystem::create_directory(path, root);
for (size_t i = 0; i < v.vals.size(); ++i) {
serialize(v.vals[i], i, path);
}
} else if constexpr (IsOpt<T>::value) {
// Opt<T> is like T, except nothing is written if the value isn't
// present.
static_assert(!IsMarker<typename T::Type>::value,
"Special markers cannot be nested");
sink.optBlob(
v.val.has_value() ? serializeBlob(*v.val) : Optional<std::string>{}
);
if (!v.val.has_value()) return;
serialize(*v.val, idx, root);
} else if constexpr (IsMulti<T>::value) {
// Treat Multi as equivalent to std::tuple (IE, write each element
// separately).
size_t DEBUG_ONLY nextTupleIdx = 0;
// to a separate file).
assertx(idx == 0);
for_each(
v.vals,
[&] (auto const& elem, size_t tupleIdx) {
Expand All @@ -223,12 +204,18 @@ void JobBase::serialize(const T& v, ISink& sink) {
>::value,
"Multi cannot be nested"
);
assertx(tupleIdx == nextTupleIdx++);
serialize(elem, sink);
serialize(elem, tupleIdx, root);
}
);
} else {
sink.blob(serializeBlob(v));
// Most types are just encoded with BlobEncoder and written as
// root/idx
BlobEncoder encoder;
encoder(v);
writeFile(
root / folly::to<std::string>(idx),
(const char*)encoder.data(), encoder.size()
);
}
}

Expand All @@ -238,35 +225,24 @@ void JobBase::serialize(const T& v, ISink& sink) {

//////////////////////////////////////////////////////////////////////

inline RefId::RefId(std::string id, size_t size, size_t extra)
: m_id{std::move(id)}, m_size{size}, m_extra{extra}
inline RefId::RefId(std::string id, size_t size)
: m_id{std::move(id)}, m_size{size}
{}

inline bool RefId::operator==(const RefId& o) const {
return
std::tie(m_id, m_extra, m_size) ==
std::tie(o.m_id, o.m_extra, o.m_size);
return std::tie(m_id, m_size) == std::tie(o.m_id, o.m_size);
}

inline bool RefId::operator!=(const RefId& o) const {
return !(*this == o);
}

inline bool RefId::operator<(const RefId& o) const {
return
std::tie(m_id, m_extra, m_size) <
std::tie(o.m_id, o.m_extra, o.m_size);
return std::tie(m_size, m_id) < std::tie(o.m_size, o.m_id);
}

inline std::string RefId::toString() const {
// Don't print out the extra field if it's zero, to avoid clutter
// for implementations which don't use it. The id might contain
// binary data, so escape it before printing.
if (m_extra) {
return folly::sformat("{}:{}:{}", folly::humanify(m_id), m_extra, m_size);
} else {
return folly::sformat("{}:{}", folly::humanify(m_id), m_size);
}
return folly::sformat("{}:{}", m_id, m_size);
}

//////////////////////////////////////////////////////////////////////
Expand Down
Loading