Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 29 #31

Merged
merged 35 commits into from
Jan 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
113bd6a
Fix search for cmonitor_collector PID in cgroups v2
Dec 23, 2021
1219b35
log errors when in debug mode, fix assert
Dec 23, 2021
e78f29e
more cgroup config refactor
Dec 23, 2021
51a6973
f
Dec 23, 2021
04c0621
be resilient against missing cgroup limits
Dec 23, 2021
827af8f
keep monitoring if cannot read limits
Dec 23, 2021
50265c2
f
Dec 23, 2021
0319049
Fix formatting of Python scripts by increasing length used by black
Dec 23, 2021
17eb5b7
f
Dec 23, 2021
cc524a0
add info on how to build
Dec 23, 2021
d774cef
f
Dec 23, 2021
9471c3d
f
Dec 24, 2021
750d031
f
Dec 24, 2021
85acb51
f
Dec 24, 2021
691bce8
during cgroup init functions disable, if needed, monitoring of a cgro…
Dec 24, 2021
438a31b
allow to sample systemd cgroups
Dec 29, 2021
49cc4e5
add new systemd unit tests
Dec 29, 2021
6f41509
f
Dec 29, 2021
2c891fc
better isolate testcases
Dec 29, 2021
e71ab49
make it possible to use systemd cgroups for unit testing with "self" …
Dec 30, 2021
ddb4091
improve unit testing support
Dec 30, 2021
4bdc76f
regen fedora35 unit test data
f18m Dec 31, 2021
2e1819a
fix unit tests
Jan 2, 2022
6a0c7da
enable unit tests on fedora35 systemd cgroups
Jan 2, 2022
c3cbdc6
ensure unit tests are always detached from gtest executable PID
Jan 2, 2022
53a4346
regen ubuntu data
f18m Jan 2, 2022
bed27e6
collect more unit test data files
Jan 3, 2022
961f4bb
f
f18m Jan 3, 2022
a1b1c4e
fix
f18m Jan 3, 2022
5d91244
fix unit tests so they never use local proc/stat
Jan 3, 2022
e992544
fix collection script for ubuntu 20.04
Jan 3, 2022
97d9b30
fix
f18m Jan 3, 2022
4663038
add ubuntu 20.04 systemd
Jan 3, 2022
6a30faf
refactor
Jan 3, 2022
381a0c7
bump ver
Jan 3, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 21 additions & 11 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ examples/*/cmonitor_collector
examples/*/example-load.sh

# autogen from jinja2 templates
tools/chart/cmonitor_chart
tools/chart/cmonitor_chart.py
tools/statistics/cmonitor_statistics
tools/statistics/cmonitor_statistics.py

# ides
Expand All @@ -19,14 +21,22 @@ tools/statistics/cmonitor_statistics.py
.vscode

# unit test results
collector/src/tests/centos7-Linux-3.10.0-x86_64/result*.json
collector/src/tests/centos7-Linux-3.10.0-x86_64/current-sample
collector/src/tests/centos7-Linux-3.10.0-x86_64/sample*/proc/
collector/src/tests/centos7-Linux-3.10.0-x86_64/sample*/sys/
collector/src/tests/centos7-Linux-3.10.0-x86_64/sample*/sample-timestamp
collector/src/tests/ubuntu20.04-Linux-5.4.0-x86_64/result*.json
collector/src/tests/ubuntu20.04-Linux-5.4.0-x86_64/sample*
collector/src/tests/ubuntu20.04-Linux-5.4.0-x86_64/current-sample
collector/src/tests/fedora35-Linux-5.14.17-x86_64/result*.json
collector/src/tests/fedora35-Linux-5.14.17-x86_64/sample*
collector/src/tests/fedora35-Linux-5.14.17-x86_64/current-sample
collector/src/tests/centos7-Linux-3.10.0-x86_64-docker/result*.json
collector/src/tests/centos7-Linux-3.10.0-x86_64-docker/sample*
collector/src/tests/centos7-Linux-3.10.0-x86_64-docker/current-sample
collector/src/tests/centos7-Linux-3.10.0-x86_64-systemd/result*.json
collector/src/tests/centos7-Linux-3.10.0-x86_64-systemd/sample*
collector/src/tests/centos7-Linux-3.10.0-x86_64-systemd/current-sample
collector/src/tests/ubuntu20.04-Linux-5.4.0-x86_64-docker/result*.json
collector/src/tests/ubuntu20.04-Linux-5.4.0-x86_64-docker/sample*
collector/src/tests/ubuntu20.04-Linux-5.4.0-x86_64-docker/current-sample
collector/src/tests/ubuntu20.04-Linux-5.4.0-x86_64-systemd/result*.json
collector/src/tests/ubuntu20.04-Linux-5.4.0-x86_64-systemd/sample*
collector/src/tests/ubuntu20.04-Linux-5.4.0-x86_64-systemd/current-sample
collector/src/tests/fedora35-Linux-5.14.17-x86_64-docker/result*.json
collector/src/tests/fedora35-Linux-5.14.17-x86_64-docker/sample*
collector/src/tests/fedora35-Linux-5.14.17-x86_64-docker/current-sample
collector/src/tests/fedora35-Linux-5.14.17-x86_64-systemd/result*.json
collector/src/tests/fedora35-Linux-5.14.17-x86_64-systemd/sample*
collector/src/tests/fedora35-Linux-5.14.17-x86_64-systemd/current-sample

2 changes: 1 addition & 1 deletion Constants.mk
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ RPM_TARBALL_DIR:=/tmp/cmonitor/tarball
# IMPORTANT: other places where the version must be updated:
# - debian/changelog -> to release a new Ubuntu package
# See also https://github.com/f18m/cmonitor/wiki/new-release
CMONITOR_VERSION:=2.1
CMONITOR_VERSION:=2.2
CMONITOR_RELEASE:=0

ifeq ($(DOCKER_LATEST),1)
Expand Down
3 changes: 1 addition & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,7 @@ ifndef BINDIR
@exit 1
endif
$(MAKE) -C collector install DESTDIR=$(DESTDIR) BINDIR=$(BINDIR)
$(MAKE) -C tools/chart install DESTDIR=$(DESTDIR) BINDIR=$(BINDIR)
$(MAKE) -C tools/statistics install DESTDIR=$(DESTDIR) BINDIR=$(BINDIR)
$(MAKE) -C tools install DESTDIR=$(DESTDIR) BINDIR=$(BINDIR)

valgrind:
$(MAKE) -C collector valgrind
Expand Down
48 changes: 44 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ Table of contents of this README:
- [RPM (for Fedora, Centos)](#rpm-for-fedora-centos)
- [Debian package (for Debian, Ubuntu, etc)](#debian-package-for-debian-ubuntu-etc)
- [Docker](#docker)
- [How to build from sources](#how-to-build-from-sources)
- [On Fedora, Centos](#on-fedora-centos)
- [On Debian, Ubuntu](#on-debian-ubuntu)
- [How to use](#how-to-use)
- [Step 1: collect stats](#step-1-collect-stats)
- [Step 2: plot stats collected as JSON](#step-2-plot-stats-collected-as-json)
Expand Down Expand Up @@ -107,12 +110,22 @@ This means that the `cmonitor-collector` utility can run on any Linux kernel reg
(since boot options may alter the cgroups technology in use).

Note that `cmonitor-collector` utility is currently unit-tested against:
* Centos 7 (Linux kernel v3.10.0)
* Ubuntu 20.04 (Linux kernel v5.4.0)
* Fedora 35 (Linux kernel v5.14.17)
* cgroups created by Docker/systemd on Centos 7 (Linux kernel v3.10.0), click [here](collector/src/tests/centos7-Linux-3.10.0-x86_64-docker/README.md) for more info
* cgroups created by Docker/systemd on Ubuntu 20.04 (Linux kernel v5.4.0), click [here](collector/src/tests/ubuntu20.04-Linux-5.4.0-x86_64-docker/README.md) for more info
* cgroups created by Docker/systemd on Fedora 35 (Linux kernel v5.14.17), click [here](collector/src/tests/fedora35-Linux-5.14.17-x86_64-docker/README.md) for more info

Other kernels will be tested in near future. Of course pull requests are welcome to extend coverage.
See [tests folder](collector/src/tests) for more details.

Regarding cgroup driver, `cmonitor-collector` is tested against both the `cgroupfs` driver (used e.g. by Docker to create cgroups
for containers using cgroups v1) and the `systemd` driver (which creates cgroups for the baremetal environment, not for containers).
To find out which cgroup driver and which cgroup version you are using when launching e.g. Docker containers you can run:

```
docker info | grep -i cgroup
```

You may also be interested in this article https://lwn.net/Articles/676831/ for more details on the docker vs systemd friction in Linux world.


## How to install

Expand Down Expand Up @@ -165,6 +178,33 @@ which downloads the Docker image for this project from [Docker Hub](https://hub.
See below for examples on how to run the Docker image.


## How to build from sources

### On Fedora, Centos

First of all, checkout this repository on your Linux box using git or decompressing a tarball of a release.
Then run:

```
sudo dnf install -y gcc-c++ make gtest-devel fmt-devel
make all -j
make test # optional step to run unit tests
sudo make install DESTDIR=/usr/local BINDIR=bin # to install in /usr/local/bin
```

### On Debian, Ubuntu

First of all, checkout this repository on your Linux box using git or decompressing a tarball of a release.
Then run:

```
sudo apt install -y libgtest-dev libbenchmark-dev python3 libfmt-dev g++
make all -j
make test # optional step to run unit tests
sudo make install DESTDIR=/usr/local BINDIR=bin # to install in /usr/local/bin
```



## How to use

Expand Down
3 changes: 3 additions & 0 deletions collector/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ all:

test:
$(MAKE) -C src test

test_debug:
$(MAKE) -C src test_debug

clean:
$(MAKE) -C src clean
Expand Down
6 changes: 4 additions & 2 deletions collector/src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,11 @@ ifneq ($(DISABLE_BENCHMARKS_BUILD),1)
endif

test:
$(MAKE) -C tests test
$(MAKE) -C tests test ARGS=$(ARGS)
tests:
$(MAKE) -C tests test
$(MAKE) -C tests test ARGS=$(ARGS)
test_debug:
$(MAKE) -C tests test_debug ARGS=$(ARGS)

benchmark:
$(MAKE) -C benchmarks benchmarks
Expand Down
49 changes: 33 additions & 16 deletions collector/src/cgroups.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@
#define CGROUP_COLLECTOR_BUFF_SIZE (8192)

enum CGroupDetected {
CG_NONE, // force newline
CG_VERSION1, // force newline
CG_VERSION2 // force newline
CG_NONE = 0, // force newline
CG_VERSION1 = 1, // force newline
CG_VERSION2 = 2 // force newline
};

std::string CGroupDetected2string(CGroupDetected k);
Expand All @@ -65,6 +65,8 @@ typedef struct {
uint64_t throttled_time_nsec;
} cpuacct_throttling_t;

typedef std::map<std::string /* controller type */, std::string /* path */> cgroup_paths_map_t;

//------------------------------------------------------------------------------
// The CMonitorCgroups object
//------------------------------------------------------------------------------
Expand All @@ -82,13 +84,14 @@ class CMonitorCgroups : public CMonitorAppHelper {
memset(&m_cpuacct_prev_values_for_throttling, 0, sizeof(cpuacct_throttling_t));
}

~CMonitorCgroups()
{
}
~CMonitorCgroups() { }

// main setup
// NOTE: arguments are used only during unit testing
void init(bool include_threads, const std::string& cgroup_prefix_for_test = "", const std::string& proc_prefix_for_test = "");
// NOTE: arguments _for_test are used only during unit testing
void init(bool include_threads, // force newline
const std::string& cgroup_prefix_for_test = "", // force newline
const std::string& proc_prefix_for_test = "", // force newline
uint64_t my_own_pid_for_test = UINT64_MAX);

// one-shot configuration info
void output_config();
Expand All @@ -103,10 +106,18 @@ class CMonitorCgroups : public CMonitorAppHelper {
// misc helpers
bool cgroup_still_exists();
std::set<uint64_t> get_cgroup_cpus() const { return m_cgroup_cpus; }
CGroupDetected get_detected_cgroup_version() const { return m_nCGroupsFound; }

private:
// cgroups config
bool init_check_for_our_pid();
bool get_cgroup_paths_for_this_pid(cgroup_paths_map_t& cgroup_pathsOUT);
bool are_cgroups_v2_enabled(std::string& cgroup_pathOUT);
bool get_cgroup_v1_abs_path_prefix_for_this_pid(const std::string& cgroup_type, std::string& cgroup_pathOUT);
bool detect_cgroup_ver_and_paths_from_myself(const std::string& cgroup_prefix_for_test, uint64_t my_own_pid_for_test);
bool detect_my_own_cgroup();
bool detect_user_provided_cgroup();
bool search_my_pid_in_cgroups(); // sets m_cgroup_processes_path
bool search_processes_cgroup_path(); // sets m_cgroup_processes_path
void v1_read_limits();
void v2_read_limits();
void init_cpuacct(const std::string& cgroup_prefix_for_test);
Expand Down Expand Up @@ -136,15 +147,20 @@ class CMonitorCgroups : public CMonitorAppHelper {
private:
// main switch that indicates if init() was successful or not
CGroupDetected m_nCGroupsFound = CG_NONE;
pid_t m_my_pid = 0;

//------------------------------------------------------------------------------
// paths of cgroups controllers to monitor (either our own cgroup or another one):
//------------------------------------------------------------------------------
std::string m_cgroup_systemd_name;
std::string m_cgroup_memory_kernel_path;
std::string m_cgroup_cpuacct_kernel_path;
std::string m_cgroup_cpuset_kernel_path;
std::string m_cgroup_systemd_name; // contains the "name" of the cgroup
std::string m_cgroup_memory_kernel_path; // contains the abs path to the folder with memory controller files
std::string m_cgroup_cpuacct_kernel_path; // contains the abs path to the folder with cpuacct controller files
std::string m_cgroup_cpuset_kernel_path; // contains the abs path to the folder with cpuset controller files
std::string m_cgroup_processes_path; // contains the abs path to the folder which contains either the "tasks"
// (v1) or "cgroups.procs|threads" (v2) files
std::string m_proc_prefix; // used only during unit testing to insert an arbitrary prefix in front of "/proc"
std::string m_proc_self_cgroup; // defaults to "/proc/self/cgroup" but is changed during unit testing
std::string m_proc_self_mounts; // defaults to "/proc/self/mounts" but is changed during unit testing

//------------------------------------------------------------------------------
// counters of how many times each cgroup_proc_*() main API has been invoked
Expand All @@ -157,14 +173,15 @@ class CMonitorCgroups : public CMonitorAppHelper {
//------------------------------------------------------------------------------
// limits read from the cgroups controllers:
//------------------------------------------------------------------------------
uint64_t m_cgroup_memory_limit_bytes = 0;
uint64_t m_cgroup_memory_limit_bytes = 0; // if UINT64_MAX indicates no memory limit is present
std::set<uint64_t> m_cgroup_cpus;
uint64_t m_cgroup_cpuacct_period_us = 0;
uint64_t m_cgroup_cpuacct_quota_us = 0;
uint64_t m_cgroup_cpuacct_quota_us = 0; // if UINT64_MAX indicates there's no cpu limit

//------------------------------------------------------------------------------
// cpuacct controller
//------------------------------------------------------------------------------
std::string m_cpuacct_controller_name;
FastFileReader m_cgroup_cpuacct_v1_reader_sys_stat; // if has split user/system time
FastFileReader m_cgroup_cpuacct_v1_reader_user_stat; // if has split user/system time
FastFileReader m_cgroup_cpuacct_v1_reader_combined_stat; // if has COMBINED user/system time
Expand All @@ -184,7 +201,7 @@ class CMonitorCgroups : public CMonitorAppHelper {
FastFileReader m_cgroup_memory_v2_current;
FastFileReader m_cgroup_memory_v1v2_stat;
FastFileReader m_cgroup_memory_v1_failcnt;
FastFileReader m_cgroup_memory_v2_events;
FastFileReader m_cgroup_memory_v2_events;

//------------------------------------------------------------------------------
// cgroup network
Expand Down
Loading