Skip to content

Commit

Permalink
Add Linux Memory Manager
Browse files Browse the repository at this point in the history
  • Loading branch information
mohsaka authored and minhancao committed Aug 14, 2024
1 parent 51d3f69 commit 2928a7e
Show file tree
Hide file tree
Showing 8 changed files with 239 additions and 0 deletions.
10 changes: 10 additions & 0 deletions presto-docs/src/main/sphinx/presto_cpp/features.rst
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,16 @@ is valid.
If the time period exceeds the parameter value, the request is rejected as
authentication failure (HTTP 401).

LinuxMemoryChecker
--------------------

Enable the LinuxMemoryChecker by setting ``PRESTO_ENABLE_LINUX_MEMORY_CHECKER``
to ON in presto-native-execution/CMakeLists.txt.

The LinuxMemoryChecker extends from PeriodicMemoryChecker and periodically checks
memory usage using memory calculation from inactive_anon + active_anon in the memory stat
file from Linux cgroups V1 or V2.

Async Data Cache and Prefetching
--------------------------------

Expand Down
2 changes: 2 additions & 0 deletions presto-native-execution/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ option(PRESTO_ENABLE_TESTING "Enable tests" ON)

option(PRESTO_ENABLE_JWT "Enable JWT (JSON Web Token) authentication" OFF)

option(PRESTO_ENABLE_LINUX_MEMORY_CHECKER "Enable LinuxMemoryChecker" OFF)

# Set all Velox options below
add_compile_definitions(FOLLY_HAVE_INT128_T=1)

Expand Down
3 changes: 3 additions & 0 deletions presto-native-execution/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ endif
ifeq ($(PRESTO_ENABLE_JWT), ON)
EXTRA_CMAKE_FLAGS += -DPRESTO_ENABLE_JWT=ON
endif
ifeq ($(PRESTO_ENABLE_LINUX_MEMORY_CHECKER), ON)
EXTRA_CMAKE_FLAGS += -DPRESTO_ENABLE_LINUX_MEMORY_CHECKER=ON
endif
ifneq ($(PRESTO_STATS_REPORTER_TYPE),)
EXTRA_CMAKE_FLAGS += -DPRESTO_STATS_REPORTER_TYPE=$(PRESTO_STATS_REPORTER_TYPE)
endif
Expand Down
1 change: 1 addition & 0 deletions presto-native-execution/presto_cpp/main/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ add_library(
Announcer.cpp
CPUMon.cpp
CoordinatorDiscoverer.cpp
LinuxMemoryChecker.cpp
PeriodicMemoryChecker.cpp
PeriodicTaskManager.cpp
PrestoExchangeSource.cpp
Expand Down
127 changes: 127 additions & 0 deletions presto-native-execution/presto_cpp/main/LinuxMemoryChecker.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "presto_cpp/main/LinuxMemoryChecker.h"
#ifdef __linux__
#include <folly/File.h>
#include <folly/FileUtil.h>
#include <folly/gen/Base.h>
#include <folly/gen/File.h>
#include <folly/gen/String.h>

#include <folly/Conv.h>
#include <folly/CppAttributes.h>
#include <folly/Format.h>
#include <folly/Range.h>
#include <folly/String.h>

#include <sys/stat.h>

#endif // __linux__

namespace facebook::presto {

// Current memory calculation used is inactive_anon + active_anon
// Our first attempt was using memInfo memTotal - memAvailable.
// However memInfo is not containerized so we reserve this as a
// last resort.
//
// Next we tried to use what docker/kubernetes uses for their
// calculation. cgroup usage_in_bytes - total_inactive_files.
// However we found out that usage_in_bytes is a fuzz value
// and has a chance for the sync to occur after the shrink
// polling interval. This would result in double shrinks.
//
// Therefore we decided on values from the memory.stat file
// that are real time statistics. At first we tried to use
// the calculation suggested by the kernel team RSS+CACHE(+SWAP)
// However we noticed that this value was not closely related to the
// value in usage_in_bytes which is used to OOMKill. We then looked
// at all of the values in the stat file and decided that
// inactive_anon + active_anon moves closest to that of
// usage_in_bytes
//
// NOTE: We do not know if cgroup V2 memory.current is a fuzz
// value. It may be better than what we currently use. For
// consistency we will match cgroup V1 and change if
// necessary.

int64_t LinuxMemoryChecker::systemUsedMemoryBytes() {
#ifdef __linux__
static std::string statFile;
size_t memAvailable = 0;
size_t memTotal = 0;
size_t inactiveAnon = 0;
size_t activeAnon = 0;
boost::cmatch match;
std::array<char, 50> buf;
static const boost::regex inactiveAnonRegex(R"!(inactive_anon\s*(\d+)\s*)!");
static const boost::regex activeAnonRegex(R"!(active_anon\s*(\d+)\s*)!");

// Find out what cgroup version (v1 or v2) we have based on the directory it's
// mounted.
static const char* cgroupV1Path = "/sys/fs/cgroup/memory/memory.stat";
static const char* cgroupV2Path = "/sys/fs/cgroup/memory.stat";
if (statFile.empty()) {
struct stat buffer;
if ((stat(cgroupV1Path, &buffer) == 0)) {
statFile = cgroupV1Path;
} else if ((stat(cgroupV2Path, &buffer) == 0)) {
statFile = cgroupV2Path;
} else {
statFile = "None";
}
LOG(INFO) << fmt::format("Using memory stat file {}", statFile);
}

if (statFile != "None") {
folly::gen::byLine(statFile.c_str()) |
[&](folly::StringPiece line) -> void {
inactiveAnon = matchLineWithRegex(line, match, inactiveAnonRegex);
activeAnon = matchLineWithRegex(line, match, activeAnonRegex);
};

// Unit is in bytes.
return inactiveAnon + activeAnon;
}

// Default case variables.
static const boost::regex memAvailableRegex(
R"!(MemAvailable:\s*(\d+)\s*kB)!");
static const boost::regex memTotalRegex(R"!(MemTotal:\s*(\d+)\s*kB)!");
// Last resort use host machine info.
folly::gen::byLine("/proc/meminfo") | [&](folly::StringPiece line) -> void {
memAvailable = matchLineWithRegex(line, match, memAvailableRegex) * 1024;
memTotal = matchLineWithRegex(line, match, memTotalRegex) * 1024;
};
// Unit is in bytes.
return (memAvailable && memTotal) ? memTotal - memAvailable : 0;

#else
return 0;
#endif
}

size_t LinuxMemoryChecker::matchLineWithRegex(
const folly::StringPiece& line,
boost::cmatch& match,
const boost::regex& regex) {
std::string str(line.data(), line.size());
if (boost::regex_match(str.c_str(), match, regex)) {
std::string numStr(match[1].str());
return std::stoul(numStr);
}
return 0;
}
} // namespace facebook::presto
70 changes: 70 additions & 0 deletions presto-native-execution/presto_cpp/main/LinuxMemoryChecker.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <boost/regex.hpp>
#include "presto_cpp/main/PeriodicMemoryChecker.h"

namespace facebook::presto {
class LinuxMemoryChecker : public PeriodicMemoryChecker {
public:
explicit LinuxMemoryChecker(
const PeriodicMemoryChecker::Config& config,
int64_t mallocBytes = 0,
std::function<void()>&& periodicCb = nullptr,
std::function<bool(const std::string&)>&& heapDumpCb = nullptr)
: PeriodicMemoryChecker(config),
mallocBytes_(mallocBytes),
periodicCb_(std::move(periodicCb)),
heapDumpCb_(std::move(heapDumpCb)) {}

~LinuxMemoryChecker() override {}

void setMallocBytes(int64_t mallocBytes) {
mallocBytes_ = mallocBytes;
}

protected:
int64_t systemUsedMemoryBytes() override;

size_t matchLineWithRegex(
const folly::StringPiece& line,
boost::cmatch& match,
const boost::regex& regex);

int64_t mallocBytes() const override {
return mallocBytes_;
}

void periodicCb() const override {
if (periodicCb_) {
periodicCb_();
}
}

bool heapDumpCb(const std::string& filePath) const override {
if (heapDumpCb_) {
return heapDumpCb_(filePath);
}
return false;
}

void removeDumpFile(const std::string& filePath) const override {}

private:
int64_t mallocBytes_{0};
std::function<void()> periodicCb_;
std::function<bool(const std::string&)> heapDumpCb_;
};
} // namespace facebook::presto
18 changes: 18 additions & 0 deletions presto-native-execution/presto_cpp/main/PrestoServer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,10 @@
#include <sched.h>
#endif

#ifdef PRESTO_ENABLE_LINUX_MEMORY_CHECKER
#include "presto_cpp/main/LinuxMemoryChecker.h"
#endif // PRESTO_ENABLE_LINUX_MEMORY_CHECKER

namespace facebook::presto {
using namespace facebook::velox;

Expand Down Expand Up @@ -546,6 +550,9 @@ void PrestoServer::run() {
addAdditionalPeriodicTasks();
periodicTaskManager_->start();

// Start LinuxMemoryChecker.
startLinuxMemoryChecker();

// Start everything. After the return from the following call we are shutting
// down.
httpServer_->start(getHttpServerFilters());
Expand Down Expand Up @@ -656,6 +663,17 @@ void PrestoServer::run() {
}
}

void PrestoServer::startLinuxMemoryChecker() {
#ifdef PRESTO_ENABLE_LINUX_MEMORY_CHECKER
PeriodicMemoryChecker::Config config;
config.systemMemPushbackEnabled = systemConfig->systemMemPushbackEnabled();
config.systemMemLimitBytes = systemConfig->systemMemLimitGb() << 30;
config.systemMemShrinkBytes = systemConfig->systemMemShrinkGb() << 30;
linuxMemoryChecker_ = std::make_unique<LinuxMemoryChecker>(config);
linuxMemoryChecker_->start();
#endif // PRESTO_ENABLE_LINUX_MEMORY_CHECKER
}

void PrestoServer::yieldTasks() {
const auto timeslice = SystemConfig::instance()->taskRunTimeSliceMicros();
if (timeslice <= 0) {
Expand Down
8 changes: 8 additions & 0 deletions presto-native-execution/presto_cpp/main/PrestoServer.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ class SignalHandler;
class TaskManager;
class TaskResource;
class PeriodicTaskManager;
#ifdef PRESTO_ENABLE_LINUX_MEMORY_CHECKER
class LinuxMemoryChecker;
#endif // PRESTO_ENABLE_LINUX_MEMORY_CHECKER
class SystemConfig;

class PrestoServer {
Expand Down Expand Up @@ -182,6 +185,8 @@ class PrestoServer {

void registerStatsCounters();

void startLinuxMemoryChecker();

protected:
void updateAnnouncerDetails();

Expand Down Expand Up @@ -248,6 +253,9 @@ class PrestoServer {
folly::Synchronized<bool> shuttingDown_{false};
std::chrono::steady_clock::time_point start_;
std::unique_ptr<PeriodicTaskManager> periodicTaskManager_;
#ifdef PRESTO_ENABLE_LINUX_MEMORY_CHECKER
std::unique_ptr<LinuxMemoryChecker> linuxMemoryChecker_;
#endif // PRESTO_ENABLE_LINUX_MEMORY_CHECKER
std::unique_ptr<PrestoServerOperations> prestoServerOperations_;

// We update these members asynchronously and return in http requests w/o
Expand Down

0 comments on commit 2928a7e

Please sign in to comment.