From 7f2d0dabee104ad2aac3a276fc8e5667d6b1bb18 Mon Sep 17 00:00:00 2001 From: Nikolay Fiykov Date: Mon, 27 May 2019 00:47:53 +0300 Subject: [PATCH 1/8] new benchmark module --- app/include/user_modules.h | 1 + app/modules/benchmark.c | 495 ++++++++++++++++++++++++++++++++ docs/modules/benchmark.md | 565 +++++++++++++++++++++++++++++++++++++ mkdocs.yml | 1 + 4 files changed, 1062 insertions(+) create mode 100644 app/modules/benchmark.c create mode 100644 docs/modules/benchmark.md diff --git a/app/include/user_modules.h b/app/include/user_modules.h index ecb2f4461f..bac23fcde3 100644 --- a/app/include/user_modules.h +++ b/app/include/user_modules.h @@ -12,6 +12,7 @@ //#define LUA_USE_MODULES_ADXL345 //#define LUA_USE_MODULES_AM2320 //#define LUA_USE_MODULES_APA102 +//#define LUA_USE_MODULES_BENCHMARK #define LUA_USE_MODULES_BIT //#define LUA_USE_MODULES_BLOOM //#define LUA_USE_MODULES_BMP085 diff --git a/app/modules/benchmark.c b/app/modules/benchmark.c new file mode 100644 index 0000000000..4fc4203536 --- /dev/null +++ b/app/modules/benchmark.c @@ -0,0 +1,495 @@ +/* + * Source code inspired or based on https://sub.nanona.fi/esp8266/timing-and-ticks.html + * + * This way one can compare measurements of different setups directly against idle, SDK-only based results. + * + */ + +#include +#include "c_types.h" +#include "lauxlib.h" +#include "mem.h" +#include "module.h" +#include "pin_map.h" +#include "platform.h" +#include "hw_timer.h" + +/* need to prevent compiler optimizations */ +#define __NOINLINE __attribute__((noinline)) + +/* local helpers... */ +#define __asize(x) (sizeof(x) / sizeof(x[0])) + +#define luaL_argcheck2(L, cond, numarg, extramsg) \ + if (!(cond)) return luaL_argerror(L, (numarg), (extramsg)) + +#define assertNoErr(L, cond) \ + if (cond != 0) return + +// ##################### +// static test buffers to prevent compiler optimizations + +static volatile uint32_t __dummy_val; +static char __dummy_buf1[1024]; +static char __dummy_buf2[1024]; +static struct { /* used by timer function */ + bool isManualMode; + bool isTimerExclusive; + uint16_t cnt; + uint16_t exitWhenCnt; + uint32_t ticks; + uint32_t btime; + uint32_t duration; + uint32_t ownTime; + int lua_cb_fnc; + uint32_t *individualDelays; +} __timerData; +static uint16_t repetitions; +static uint16_t maskOnW1TSC; + +// ##################### +// helpers + +static inline int32_t asm_ccount(void) { + int32_t r; + asm volatile("rsr %0, ccount" + : "=r"(r)); + return r; +} + +static inline void asm_nop() { + asm volatile("nop"); +} + +static uint32_t __repeat_cb(int cnt, void (*callback)()) { + while (cnt-- > 0) { + callback(); + } +} + +static uint32_t __measure(void (*callback)()) { + const int32_t btime = asm_ccount(); + __repeat_cb(repetitions, callback); + const int32_t etime = asm_ccount(); + return ((uint32_t)(etime - btime)) / repetitions; +} + +// ######################## +// timing-tests + +static void __NOINLINE __timing_empty() { + /* nothing */ +} + +static void __NOINLINE __timing_single_nop() { + asm_nop(); +} + +static void __NOINLINE __timing_re_recurse() { + __timing_single_nop(); +} + +static void __NOINLINE __timing_1us_sleep() { + os_delay_us(1); +} + +static void __NOINLINE __timing_10us_sleep() { + os_delay_us(10); +} + +static void __NOINLINE __timing_100us_sleep() { + os_delay_us(100); +} + +static void __NOINLINE __timing_gpio_read_pin() { + /* read gpio4 -state */ + __dummy_val = GPIO_INPUT_GET(4); +} + +static void __NOINLINE __timing_get_cpu_counts() { + /* read gpio4 -state */ + __dummy_val = asm_ccount(); +} + +static void __NOINLINE __timing_gpio_pull_updown() { + GPIO_OUTPUT_SET(5, 1); + GPIO_OUTPUT_SET(5, 0); +} + +static void __NOINLINE __timing_gpio_status_read() { + __dummy_val = GPIO_REG_READ(GPIO_STATUS_ADDRESS); +} + +static void __NOINLINE __timing_gpio_status_write() { + GPIO_REG_WRITE(GPIO_STATUS_W1TC_ADDRESS, 0); +} + +static void __NOINLINE __timing_system_get_time() { + __dummy_val = system_get_time(); +} + +static void __NOINLINE __timing_system_get_rtc_time() { + __dummy_val = system_get_rtc_time(); +} + +static void __NOINLINE __timing_system_get_cpufreq() { + __dummy_val = system_get_cpu_freq(); +} + +static void __NOINLINE __timing_memcpy_1k_bytes() { + os_memcpy(__dummy_buf1, __dummy_buf2, sizeof(__dummy_buf1)); +} + +static void __NOINLINE __timing_memset_1k_bytes() { + os_memset(__dummy_buf1, 0, sizeof(__dummy_buf1)); +} + +static void __NOINLINE __timing_bzero_1k_bytes() { + os_bzero(__dummy_buf1, sizeof(__dummy_buf1)); +} + +static void __NOINLINE __timing_gpio_set_one() { + GPIO_REG_WRITE(GPIO_OUT_W1TS_ADDRESS, maskOnW1TSC); +} + +// timer interrupt handler related + +static void __report_interrupt_times() { + if (__timerData.lua_cb_fnc == 0) return; + lua_State *L = lua_getstate(); + lua_rawgeti(L, LUA_REGISTRYINDEX, __timerData.lua_cb_fnc); + lua_pushnumber(L, (double)__timerData.duration - __timerData.ownTime); + lua_pushnumber(L, (double)__timerData.ownTime); + lua_pushnumber(L, (double)__timerData.duration); + lua_call(L, 3, 0); + luaL_unref(L, LUA_REGISTRYINDEX, __timerData.lua_cb_fnc); +} + +static inline void __set_start_time() { + if (__timerData.exitWhenCnt == 1) { + __timerData.btime = asm_ccount(); + } +} + +static inline void __calc_duration() { + __timerData.duration = ((uint32_t)(asm_ccount() - __timerData.btime)) / (__timerData.cnt - 1); +} + +static inline void __close_timer() { + if (__timerData.isTimerExclusive) { + platform_hw_timer_close_exclusive(); + } else { + platform_hw_timer_close('B'); + } +} + +static inline void __interrupt_test_is_over() { + __calc_duration(); + __close_timer(); + __report_interrupt_times(); +} + +static inline void __recordInterruptTimeIfAsked() { + if (__timerData.individualDelays != NULL) { + *(__timerData.individualDelays + __timerData.exitWhenCnt) = asm_ccount(); + } +} + +static inline void __scheduleNextInterruptIfNeeded() { + if (__timerData.isManualMode) { + if (__timerData.isTimerExclusive) { + RTC_REG_WRITE(FRC1_LOAD_ADDRESS, __timerData.ticks); + } else { + platform_hw_timer_arm_ticks('B', __timerData.ticks); + } + } +} + +static void ICACHE_RAM_ATTR __timer_interrupt(os_param_t arg) { + __timerData.exitWhenCnt++; + if (__timerData.exitWhenCnt > __timerData.cnt) { + __close_timer(); + } else { + __recordInterruptTimeIfAsked(); + if (__timerData.exitWhenCnt == __timerData.cnt) { + __interrupt_test_is_over(); + } else { + __set_start_time(); + __scheduleNextInterruptIfNeeded(); + } + } +} + +// ########################### +// lua methods + +static int lbench_empty_call(lua_State *L) { + lua_pushnumber(L, (double)__measure(__timing_empty)); + return 1; +} + +static int lbench_single_nop(lua_State *L) { + lua_pushnumber(L, (double)__measure(__timing_single_nop)); + return 1; +} + +static int lbench_get_cpu_counts(lua_State *L) { + lua_pushnumber(L, (double)__measure(__timing_get_cpu_counts)); + return 1; +} + +static int lbench_re_recurse(lua_State *L) { + lua_pushnumber(L, (double)__measure(__timing_re_recurse)); + return 1; +} + +static int lbench_1us_sleep(lua_State *L) { + lua_pushnumber(L, (double)__measure(__timing_1us_sleep)); + return 1; +} + +static int lbench_10us_sleep(lua_State *L) { + lua_pushnumber(L, (double)__measure(__timing_10us_sleep)); + return 1; +} + +static int lbench_100us_sleep(lua_State *L) { + lua_pushnumber(L, (double)__measure(__timing_100us_sleep)); + return 1; +} + +static int lbench_gpio_read_pin(lua_State *L) { + lua_pushnumber(L, (double)__measure(__timing_gpio_read_pin)); + return 1; +} + +static int lbench_gpio_pull_updown(lua_State *L) { + PIN_FUNC_SELECT(PERIPHS_IO_MUX_GPIO5_U, FUNC_GPIO5); + GPIO_OUTPUT_SET(5, 0); + lua_pushnumber(L, (double)__measure(__timing_gpio_pull_updown)); + return 1; +} + +static int lbench_gpio_status_read(lua_State *L) { + lua_pushnumber(L, (double)__measure(__timing_gpio_status_read)); + return 1; +} + +static int lbench_gpio_status_write(lua_State *L) { + lua_pushnumber(L, (double)__measure(__timing_gpio_status_write)); + return 1; +} + +static int lbench_system_get_time(lua_State *L) { + lua_pushnumber(L, (double)__measure(__timing_system_get_time)); + return 1; +} + +static int lbench_system_get_rtc_time(lua_State *L) { + lua_pushnumber(L, (double)__measure(__timing_system_get_rtc_time)); + return 1; +} + +static int lbench_system_get_cpufreq(lua_State *L) { + lua_pushnumber(L, (double)__measure(__timing_system_get_cpufreq)); + return 1; +} + +static int lbench_memcpy_1k_bytes(lua_State *L) { + lua_pushnumber(L, (double)__measure(__timing_memcpy_1k_bytes)); + return 1; +} + +static int lbench_memset_1k_bytes(lua_State *L) { + lua_pushnumber(L, (double)__measure(__timing_memset_1k_bytes)); + return 1; +} + +static int lbench_bzero_1k_bytes(lua_State *L) { + lua_pushnumber(L, (double)__measure(__timing_bzero_1k_bytes)); + return 1; +} + +static uint16_t __getPinGpioMask(uint8_t pin) { + return 1 << GPIO_ID_PIN(pin_num[pin]); +} + +static int lbench_gpio_set_one(lua_State *L) { + const int pin = lua_tointeger(L, 1); + luaL_argcheck2(L, pin > 0 && pin < GPIO_PIN_NUM, 1, "invalid pin number"); + maskOnW1TSC = __getPinGpioMask(pin); + PIN_FUNC_SELECT(pin_mux[pin], pin_func[pin]); + GPIO_REG_WRITE(GPIO_ENABLE_W1TS_ADDRESS, maskOnW1TSC); + lua_pushnumber(L, (double)__measure(__timing_gpio_set_one)); + GPIO_REG_WRITE(GPIO_ENABLE_W1TC_ADDRESS, maskOnW1TSC); + return 1; +} + +static int __get_lua_cb_arg(lua_State *L, const int argPos) { + lua_pushvalue(L, argPos); + return luaL_ref(L, LUA_REGISTRYINDEX); +} + +static void __setup_timer_interrupt(lua_State *L) { + if (__timerData.isTimerExclusive) { + const bool flg = platform_hw_timer_init_exclusive(FRC1_SOURCE, !__timerData.isManualMode, __timer_interrupt, (os_param_t)NULL, (void (*)(void))NULL); + if (!flg) { + luaL_error(L, "Currently platform timer1 is being used by another module.\n"); + return; + } + platform_hw_timer_arm_ticks_exclusive(__timerData.ticks); + } else { + if (!platform_hw_timer_init('B', FRC1_SOURCE, !__timerData.isManualMode)) { + luaL_error(L, "Currently platform timer1 is being used by another module.\n"); + return; + } + if (!platform_hw_timer_set_func('B', __timer_interrupt, (os_param_t)NULL)) { + luaL_error(L, "Failed to assign timer interrupt handler for benchmark module.\n"); + return; + } + if (!platform_hw_timer_arm_ticks('B', __timerData.ticks)) { + luaL_error(L, "Failed to assign arm the timer for benchmark module.\n"); + } + } +} + +static void __benchmark_interrupt_handler_own_time() { + __timerData.ownTime = 0; + const int oldVal = __timerData.lua_cb_fnc; + __timerData.lua_cb_fnc = 0; + const bool oldVal2 = __timerData.isManualMode; + __timerData.isManualMode = false; + for (int i = 0; i <= __timerData.cnt; i++) { + __timer_interrupt((os_param_t)NULL); + } + __timerData.ownTime = __timerData.duration; + __timerData.lua_cb_fnc = oldVal; + __timerData.isManualMode = oldVal2; + __timerData.exitWhenCnt = 0; +} + +static int __benchmark_timer_common(lua_State *L) { + __benchmark_interrupt_handler_own_time(); + __setup_timer_interrupt(L); + return 0; +} + +static void __setup_timer_args(lua_State *L, const bool isManualMode, const bool isTimerExclusive) { + __timerData.ticks = lua_tointeger(L, 1); + __timerData.lua_cb_fnc = __get_lua_cb_arg(L, 3); + __timerData.cnt = repetitions + 1; + __timerData.isTimerExclusive = isTimerExclusive; + __timerData.isManualMode = isManualMode; + __timerData.exitWhenCnt = 0; + __timerData.duration = 0; + if (NULL != __timerData.individualDelays) { + free(__timerData.individualDelays); + __timerData.individualDelays = NULL; + } + const int shouldGatherIndividualTimes = lua_toboolean(L, 2); + if (shouldGatherIndividualTimes) { + __timerData.individualDelays = malloc((repetitions + 2) * sizeof(uint32_t)); + } +} + +#define AssertLuaArgs \ + luaL_argcheck2(L, ((uint32_t)lua_tointeger(L, 1)) >= 0, 1, "invalid timerTicks number"); \ + luaL_argcheck2(L, lua_type(L, 3) == LUA_TFUNCTION || lua_type(L, 3) == LUA_TLIGHTFUNCTION, 3, "invalid callback function"); + +static int lbench_timing_frc1_manual_overhead_exclusive(lua_State *L) { + AssertLuaArgs; + __setup_timer_args(L, true, true); + return __benchmark_timer_common(L); +} + +static int lbench_timing_frc1_autoload_overhead_exclusive(lua_State *L) { + AssertLuaArgs; + __setup_timer_args(L, false, true); + return __benchmark_timer_common(L); +} + +static int lbench_timing_frc1_manual_overhead_shared(lua_State *L) { + AssertLuaArgs; + __setup_timer_args(L, true, false); + return __benchmark_timer_common(L); +} + +static int lbench_timing_frc1_autoload_overhead_shared(lua_State *L) { + AssertLuaArgs; + __setup_timer_args(L, false, false); + return __benchmark_timer_common(L); +} + +static int lbench_set_repetitions(lua_State *L) { + const int r = lua_tointeger(L, 1); + luaL_argcheck2(L, r > 0, 1, "invalid repetitions number"); + repetitions = r; + return 0; +} + +static int lbench_get_repetitions(lua_State *L) { + lua_pushinteger(L, repetitions); + return 1; +} + +static int lbench_get_recorded_interrupt_counter(lua_State *L) { + const int r = lua_tointeger(L, 1); + luaL_argcheck2(L, r > 0 && r <= repetitions, 1, "out of range repetitions number"); + if (__timerData.individualDelays != NULL) { + lua_pushnumber(L, (double)*(__timerData.individualDelays + r)); + } else { + lua_pushnumber(L, 0); + } + return 1; +} + +static int lbench_open(lua_State *L) { + gpio_init(); + repetitions = 2000; + __timerData.individualDelays = NULL; +} + +static int lbench_print_timer_data(lua_State *L) { + ets_printf("isManualMode = %d\n", __timerData.isManualMode); + ets_printf("isTimerExclusive = %d\n", __timerData.isTimerExclusive); + ets_printf("cnt = %d\n", __timerData.cnt); + ets_printf("exitWhenCnt = %d\n", __timerData.exitWhenCnt); + ets_printf("ticks = %d\n", __timerData.ticks); + ets_printf("btime = %d\n", __timerData.btime); + ets_printf("duration = %d\n", __timerData.duration); + ets_printf("ownTime = %d\n", __timerData.ownTime); + ets_printf("lua_cb_fnc = %d\n", __timerData.lua_cb_fnc); + ets_printf("individualDelays = %d\n", __timerData.individualDelays); +} + +// Module function map +LROT_BEGIN(benchmark) +LROT_FUNCENTRY(print_timer_data, lbench_print_timer_data) +LROT_FUNCENTRY(set_repetitions, lbench_set_repetitions) +LROT_FUNCENTRY(get_repetitions, lbench_get_repetitions) +LROT_FUNCENTRY(empty_call, lbench_empty_call) +LROT_FUNCENTRY(single_nop, lbench_single_nop) +LROT_FUNCENTRY(get_cpu_counts, lbench_get_cpu_counts) +LROT_FUNCENTRY(re_recurse, lbench_re_recurse) +LROT_FUNCENTRY(us1_sleep, lbench_1us_sleep) +LROT_FUNCENTRY(us10_sleep, lbench_10us_sleep) +LROT_FUNCENTRY(us100_sleep, lbench_100us_sleep) +LROT_FUNCENTRY(gpio_read_pin, lbench_gpio_read_pin) +LROT_FUNCENTRY(gpio_pull_updown, lbench_gpio_pull_updown) +LROT_FUNCENTRY(gpio_status_read, lbench_gpio_status_read) +LROT_FUNCENTRY(gpio_status_write, lbench_gpio_status_write) +LROT_FUNCENTRY(system_get_time, lbench_system_get_time) +LROT_FUNCENTRY(system_get_rtc_time, lbench_system_get_rtc_time) +LROT_FUNCENTRY(system_get_cpufreq, lbench_system_get_cpufreq) +LROT_FUNCENTRY(memcpy_1k_bytes, lbench_memcpy_1k_bytes) +LROT_FUNCENTRY(memset_1k_bytes, lbench_memset_1k_bytes) +LROT_FUNCENTRY(bzero_1k_bytes, lbench_bzero_1k_bytes) +LROT_FUNCENTRY(gpio_set_one, lbench_gpio_set_one) +LROT_FUNCENTRY(frc1_manual_exclusive, lbench_timing_frc1_manual_overhead_exclusive) +LROT_FUNCENTRY(frc1_autoload_exclusive, lbench_timing_frc1_autoload_overhead_exclusive) +LROT_FUNCENTRY(frc1_manual_shared, lbench_timing_frc1_manual_overhead_shared) +LROT_FUNCENTRY(frc1_autoload_shared, lbench_timing_frc1_autoload_overhead_shared) +LROT_FUNCENTRY(get_recorded_interrupt_counter, lbench_get_recorded_interrupt_counter) +LROT_END(benchmark, NULL, 0) + +NODEMCU_MODULE(BENCHMARK, "benchmark", benchmark, lbench_open); diff --git a/docs/modules/benchmark.md b/docs/modules/benchmark.md new file mode 100644 index 0000000000..c9cca9bbfe --- /dev/null +++ b/docs/modules/benchmark.md @@ -0,0 +1,565 @@ +# PWM Module + +| Since | Origin / Contributor | Maintainer | Source | +| :----- | :-------------------- | :---------- | :------ | +| 2019-05-24 | [fikin](https://github.com/fikin) | [fikin](https://github.com/fikin) | [benchmark.c](../../../app/modules/benchmark.c)| + +Module to benchmark performance of various NodeMCU and ESP8266 operations. + +All benchmarking internally is done using CPU ticks. + +By default every test is repeated 2000 times before its duration is calculated. But for CPU160 one would probably want to increase it in order to obtain more precise results. + +This module is thought to be useful mainly to other modules developers to understand better low level API timing aspects. And to provide with basis for implmenenting other custom benchmarking metrics. + +Supported are CPU80 and CPU160. + +Typical usage is as following: + +```lua +print(benchmark.empty_call()) +print(benchmark.single_nop()) +print(benchmark.get_cpu_counts()) +print(benchmark.re_recurse()) +print(benchmark.us1_sleep()) +print(benchmark.us10_sleep()) +print(benchmark.us100_sleep()) +print(benchmark.gpio_read_pin()) +print(benchmark.gpio_pull_updown()) +print(benchmark.gpio_status_read()) +print(benchmark.gpio_status_write()) +print(benchmark.system_get_time()) +print(benchmark.system_get_rtc_time()) +print(benchmark.system_get_cpufreq()) +print(benchmark.memcpy_1k_bytes()) +print(benchmark.memset_1k_bytes()) +print(benchmark.bzero_1k_bytes()) +print(benchmark.gpio_set_one(4)) +benchmark.frc1_manual_exclusive(10, false, + function(osOverhead, testingHandlerOverhed, totalTestTime) + print(osOverhead, testingHandlerOverhed, totalTestTime) + end) +benchmark.frc1_autoload_exclusive(10, false, + function(osOverhead, testingHandlerOverhed, totalTestTime) + print(osOverhead, testingHandlerOverhed, totalTestTime) + end) +benchmark.frc1_manual_shared(10, false, + function(osOverhead, testingHandlerOverhed, totalTestTime) + print(osOverhead, testingHandlerOverhed, totalTestTime) + end) +benchmark.frc1_autoload_shared(10, false, + function(osOverhead, testingHandlerOverhed, totalTestTime) + print(osOverhead, testingHandlerOverhed, totalTestTime) + end +``` + +Credits to https://sub.nanona.fi/esp8266/timing-and-ticks.html for inspiring some of the tests and setting up the standard for the rest. + +## benchmark.set_repetitions() + +Assign new repetitions value. By default the module uses 2000. + +### Syntax + +`benchmark.set_repetitions(repetitionsCnt)` + +### Parameters + +- `repetitionsCnt` int, new repetitions count to be used by benchmarking tests. + +### Returns + +`nil` + +### See also + +- [benchmark.get_repetitions()](#benchmarkget_repetitions) + +## benchmark.get_repetitions() + +Get number of repetitions each test is repeated before its time is benchmarked. + +### Syntax + +`benchmark.get_repetitions()` + +### Parameters + +`nil` + +### Returns + +- `repetitionsCnt` int, repetitions count to be used by benchmarking tests. + +### See also + +- [benchmark.set_repetitions()](#benchmarkset_repetitions) + +## benchmark.empty_call() + +See [external documentation](https://sub.nanona.fi/esp8266/timing-and-ticks.html). + +### Syntax + +`benchmark.empty_call()` + +### Parameters + +`nil` + +### Returns + +- `cpuTicks` int, CPU ticks needed to perform the operation + +## benchmark.single_nop() + +See [external documentation](https://sub.nanona.fi/esp8266/timing-and-ticks.html). + +### Syntax + +`benchmark.single_nop()` + +### Parameters + +`nil` + +### Returns + +- `cpuTicks` int, CPU ticks needed to perform the operation + +## benchmark.get_cpu_counts() + +See [external documentation](https://sub.nanona.fi/esp8266/timing-and-ticks.html). + +### Syntax + +`benchmark.get_cpu_counts()` + +### Parameters + +`nil` + +### Returns + +- `cpuTicks` int, CPU ticks needed to perform the operation + +## benchmark.re_recurse() + +See [external documentation](https://sub.nanona.fi/esp8266/timing-and-ticks.html). + +### Syntax + +`benchmark.re_recurse()` + +### Parameters + +`nil` + +### Returns + +- `cpuTicks` int, CPU ticks needed to perform the operation + +## benchmark.us1_sleep() + +See [external documentation](https://sub.nanona.fi/esp8266/timing-and-ticks.html). + +### Syntax + +`benchmark.us1_sleep()` + +### Parameters + +`nil` + +### Returns + +- `cpuTicks` int, CPU ticks needed to perform the operation + +## benchmark.us10_sleep() + +See [external documentation](https://sub.nanona.fi/esp8266/timing-and-ticks.html). + +### Syntax + +`benchmark.us10_sleep()` + +### Parameters + +`nil` + +### Returns + +- `cpuTicks` int, CPU ticks needed to perform the operation + +## benchmark.us100_sleep() + +See [external documentation](https://sub.nanona.fi/esp8266/timing-and-ticks.html). + +### Syntax + +`benchmark.us100_sleep()` + +### Parameters + +`nil` + +### Returns + +- `cpuTicks` int, CPU ticks needed to perform the operation + +## benchmark.gpio_read_pin() + +See [external documentation](https://sub.nanona.fi/esp8266/timing-and-ticks.html). + +### Syntax + +`benchmark.gpio_read_pin()` + +### Parameters + +`nil` + +### Returns + +- `cpuTicks` int, CPU ticks needed to perform the operation + +## benchmark.gpio_pull_updown() + +See [external documentation](https://sub.nanona.fi/esp8266/timing-and-ticks.html). +Test is using GPIO_OUTPUT_SET to pull pin 5 up and then again to pull it down. + +### Syntax + +`benchmark.gpio_pull_updown()` + +### Parameters + +`nil` + +### Returns + +- `cpuTicks` int, CPU ticks needed to perform the operation + +## benchmark.gpio_status_read() + +See [external documentation](https://sub.nanona.fi/esp8266/timing-and-ticks.html). + +### Syntax + +`benchmark.gpio_status_read()` + +### Parameters + +`nil` + +### Returns + +- `cpuTicks` int, CPU ticks needed to perform the operation + +## benchmark.gpio_status_write() + +See [external documentation](https://sub.nanona.fi/esp8266/timing-and-ticks.html). + +### Syntax + +`benchmark.gpio_status_write()` + +### Parameters + +`nil` + +### Returns + +- `cpuTicks` int, CPU ticks needed to perform the operation + +## benchmark.system_get_time() + +See [external documentation](https://sub.nanona.fi/esp8266/timing-and-ticks.html). + +### Syntax + +`benchmark.system_get_time()` + +### Parameters + +`nil` + +### Returns + +- `cpuTicks` int, CPU ticks needed to perform the operation + +## benchmark.system_get_rtc_time() + +See [external documentation](https://sub.nanona.fi/esp8266/timing-and-ticks.html). + +### Syntax + +`benchmark.system_get_rtc_time()` + +### Parameters + +`nil` + +### Returns + +- `cpuTicks` int, CPU ticks needed to perform the operation + +## benchmark.system_get_cpufreq() + +See [external documentation](https://sub.nanona.fi/esp8266/timing-and-ticks.html). + +### Syntax + +`benchmark.system_get_cpufreq()` + +### Parameters + +`nil` + +### Returns + +- `cpuTicks` int, CPU ticks needed to perform the operation + +## benchmark.memcpy_1k_bytes() + +See [external documentation](https://sub.nanona.fi/esp8266/timing-and-ticks.html). + +### Syntax + +`benchmark.memcpy_1k_bytes()` + +### Parameters + +`nil` + +### Returns + +- `cpuTicks` int, CPU ticks needed to perform the operation + +## benchmark.memset_1k_bytes() + +See [external documentation](https://sub.nanona.fi/esp8266/timing-and-ticks.html). + +### Syntax + +`benchmark.memset_1k_bytes()` + +### Parameters + +`nil` + +### Returns + +- `cpuTicks` int, CPU ticks needed to perform the operation + +## benchmark.bzero_1k_bytes() + +See [external documentation](https://sub.nanona.fi/esp8266/timing-and-ticks.html). + +### Syntax + +`benchmark.bzero_1k_bytes()` + +### Parameters + +`nil` + +### Returns + +- `cpuTicks` int, CPU ticks needed to perform the operation + +## benchmark.gpio_set_one() + +Pulls given pin up using `GPIO_REG_WRITE(GPIO_OUT_W1TS_ADDRESS, pinAsMask)`. +This is perhaps the fastest gpio operation from all available. + +### Syntax + +`benchmark.gpio_set_one(pin)` + +### Parameters + +- `pin` 1~12, IO index + +### Returns + +- `cpuTicks` int, CPU ticks needed to perform the operation + +## benchmark.frc1_manual_exclusive() + +Test the operating system overhead when dispatching TIMER1 FRC1 events using MANUAL mode. + +Timer is used in exclusive mode i.e. no other module can use it at the same time. + +The test handler is coded to start measurement after first interrupt. + +This test uses as input as timer ticks which equals to 16 CPU ticks for CPU80 and 32 CPU ticks for CPU160. + +Times measured is interrupt switching time inside operating system, test's own handler time and total test duration. + +Operating system time is also including the waiting time between interrupts. If one specifies 0 timer ticks this delay is none. But there is a good change the board would brick. + +This method allows for recording CPU ticks of each interrupt call. Recording adds a little overhead to internal timer handler and allocates memory for all repetitions. The recorded values can be read via `benchmark.get_recorded_interrupt_counter(repetitionNbr)`. These values allow for further analysis of timing precisions and quality. + +### Syntax + +`benchmark.frc1_manual_exclusive(timerTicks,gatherDelays,callback_function)` + +### Parameters + +- `timerTicks` int, timer ticks to load TIMER1 FRC1 each time with. +- `gatherDelays` bool, should it gather current CPU ticks counter of each individual interrupt. +- `callback_function(osOverhead,testHandlerOwnTime,testDuration)` callback function when timer test ends. + - `osOverhead` int, CPU ticks of operating system overhead plus delay needed to fulfil timerTicks interrupt frequency. + - `testHandlerOwnTime` int, CPU ticks of test's own handler overhead + - `testDuration` int, total test duration as CPU ticks. + +### Returns + +`nil` + +### See also + +- [benchmark.get_recorded_interrupt_counter()](#benchmarkget_recorded_interrupt_counter) + +## benchmark.frc1_autoload_exclusive() + +Test the operating system overhead when dispatching TIMER1 FRC1 events using AUTO mode. + +Timer is used in exclusive mode i.e. no other module can use it at the same time. + +The test handler is coded to start measurement after first interrupt. + +This test uses as input as timer ticks which equals to 16 CPU ticks for CPU80 and 32 CPU ticks for CPU160. + +Times measured is interrupt switching time inside operating system, test's own handler time and total test duration. + +Operating system time is also including the waiting time between interrupts. If one specifies 0 timer ticks this delay is none. But there is a good change the board would brick. + +This method allows for recording CPU ticks of each interrupt call. Recording adds a little overhead to internal timer handler and allocates memory for all repetitions. The recorded values can be read via `benchmark.get_recorded_interrupt_counter(repetitionNbr)`. These values allow for further analysis of timing precisions and quality. + +### Syntax + +`benchmark.frc1_autoload_exclusive(timerTicks,gatherDelays,callback_function)` + +### Parameters + +- `timerTicks` int, timer ticks to load TIMER1 FRC1 each time with. +- `gatherDelays` bool, should it gather current CPU ticks counter of each individual interrupt. +- `callback_function(osOverhead,testHandlerOwnTime,testDuration)` callback function when timer test ends. + - `osOverhead` int, CPU ticks of operating system overhead plus delay needed to fulfil timerTicks interrupt frequency. + - `testHandlerOwnTime` int, CPU ticks of test's own handler overhead + - `testDuration` int, total test duration as CPU ticks. + +### Returns + +`nil` + +### See also + +- [benchmark.get_recorded_interrupt_counter()](#benchmarkget_recorded_interrupt_counter) + +## benchmark.frc1_manual_shared() + +Test the operating system overhead when dispatching TIMER1 FRC1 events using MANUAL mode. + +Timer is used in shared mode i.e. other modules can use it at the same time. + +The test handler is coded to start measurement after first interrupt. + +This test uses as input as timer ticks which equals to 16 CPU ticks for CPU80 and 32 CPU ticks for CPU160. + +Times measured is interrupt switching time inside operating system, test's own handler time and total test duration. + +Operating system time is also including the waiting time between interrupts. If one specifies 0 timer ticks this delay is none. But there is a good change the board would brick. + +This method allows for recording CPU ticks of each interrupt call. Recording adds a little overhead to internal timer handler and allocates memory for all repetitions. The recorded values can be read via `benchmark.get_recorded_interrupt_counter(repetitionNbr)`. These values allow for further analysis of timing precisions and quality. + +### Syntax + +`benchmark.frc1_manual_shared(timerTicks,gatherDelays,callback_function)` + +### Parameters + +- `timerTicks` int, timer ticks to load TIMER1 FRC1 each time with. +- `gatherDelays` bool, should it gather current CPU ticks counter of each individual interrupt. +- `callback_function(osOverhead,testHandlerOwnTime,testDuration)` callback function when timer test ends. + - `osOverhead` int, CPU ticks of operating system overhead plus delay needed to fulfil timerTicks interrupt frequency. + - `testHandlerOwnTime` int, CPU ticks of test's own handler overhead + - `testDuration` int, total test duration as CPU ticks. + +### Returns + +`nil` + +### See also + +- [benchmark.get_recorded_interrupt_counter()](#benchmarkget_recorded_interrupt_counter) + +## benchmark.frc1_autoload_shared() + +Test the operating system overhead when dispatching TIMER1 FRC1 events using AUTO mode. + +Timer is used in shared mode i.e. other modules can use it at the same time. + +The test handler is coded to start measurement after first interrupt. + +This test uses as input as timer ticks which equals to 16 CPU ticks for CPU80 and 32 CPU ticks for CPU160. + +Times measured is interrupt switching time inside operating system, test's own handler time and total test duration. + +Operating system time is also including the waiting time between interrupts. If one specifies 0 timer ticks this delay is none. But there is a good change the board would brick. + +This method allows for recording CPU ticks of each interrupt call. Recording adds a little overhead to internal timer handler and allocates memory for all repetitions. The recorded values can be read via `benchmark.get_recorded_interrupt_counter(repetitionNbr)`. These values allow for further analysis of timing precisions and quality. + +### Syntax + +`benchmark.frc1_autoload_shared(timerTicks,gatherDelays,callback_function)` + +### Parameters + +- `timerTicks` int, timer ticks to load TIMER1 FRC1 each time with. +- `gatherDelays` bool, should it gather current CPU ticks counter of each individual interrupt. +- `callback_function(osOverhead,testHandlerOwnTime,testDuration)` callback function when timer test ends. + - `osOverhead` int, CPU ticks of operating system overhead plus delay needed to fulfil timerTicks interrupt frequency. + - `testHandlerOwnTime` int, CPU ticks of test's own handler overhead + - `testDuration` int, total test duration as CPU ticks. + +### Returns + +`nil` + +### See also + +- [benchmark.get_recorded_interrupt_counter()](#benchmarkget_recorded_interrupt_counter) + +## benchmark.get_recorded_interrupt_counter() + +Get recorded in previous run CPU ticks counter at the moment of timer interrupt. Methods testing timer interrupts are `benchmark.frc1_XXX_XXX()`. If one runs them with `gatherDelays = True` this method will provide access to the records. + +### Syntax + +`benchmark.get_recorded_interrupt_counter(repetitionNbr)` + +### Parameters + +- `repetitionNbr` 1-repetitions, CPU ticks counter of which repetitions to return. + +### Returns + +- `cpuTicksCounter` int, CPU ticks counter or 0 if there is not such recording + +### Example + +```lua +-- run some interrupt test and record the CPU counter at each interrupt +print(benchmark.set_repetitions(50)) +benchmark.frc1_manual_exclusive(10, true, + function(osOverhead, testingHandlerOverhed, totalTestTime) + print(osOverhead, testingHandlerOverhed, totalTestTime) + for i=1,benchmark.get_repetitions() do + print("CPU counter at interrupt "..i.." = "..benchmark.get_recorded_interrupt_counter(i)) + end + end) + +``` diff --git a/mkdocs.yml b/mkdocs.yml index bd79d837ca..dfdecc2981 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -57,6 +57,7 @@ pages: - 'adxl345' : 'modules/adxl345.md' - 'am2320': 'modules/am2320.md' - 'apa102': 'modules/apa102.md' + - 'benchmark': 'modules/benchmark.md' - 'bit': 'modules/bit.md' - 'bloom' : 'modules/bloom.md' - 'bme280': 'modules/bme280.md' From a9cfee73aab524dd189285944b85e5ae86d99cfe Mon Sep 17 00:00:00 2001 From: Nikolay Fiykov Date: Mon, 27 May 2019 00:58:27 +0300 Subject: [PATCH 2/8] copy paste error fix --- docs/modules/benchmark.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/modules/benchmark.md b/docs/modules/benchmark.md index c9cca9bbfe..93cbc27cc3 100644 --- a/docs/modules/benchmark.md +++ b/docs/modules/benchmark.md @@ -1,4 +1,4 @@ -# PWM Module +# Benchmark Module | Since | Origin / Contributor | Maintainer | Source | | :----- | :-------------------- | :---------- | :------ | From 7593baec082762ddf1bc96b46ccfed9ca6e8fe6c Mon Sep 17 00:00:00 2001 From: Nikolay Fiykov Date: Mon, 27 May 2019 01:01:17 +0300 Subject: [PATCH 3/8] fixing link ref syntax --- docs/modules/benchmark.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/modules/benchmark.md b/docs/modules/benchmark.md index 93cbc27cc3..044a5a8330 100644 --- a/docs/modules/benchmark.md +++ b/docs/modules/benchmark.md @@ -53,7 +53,7 @@ benchmark.frc1_autoload_shared(10, false, end ``` -Credits to https://sub.nanona.fi/esp8266/timing-and-ticks.html for inspiring some of the tests and setting up the standard for the rest. +Credits to [https://sub.nanona.fi/esp8266/timing-and-ticks.html](https://sub.nanona.fi/esp8266/timing-and-ticks.html) for inspiring some of the tests and setting up the standard for the rest. ## benchmark.set_repetitions() From 30cb7659956cbc51b0308f074c6f288c53d0079f Mon Sep 17 00:00:00 2001 From: Nikolay Fiykov Date: Mon, 12 Aug 2019 18:58:04 +0300 Subject: [PATCH 4/8] using stdint imports instead of c-types --- app/modules/benchmark.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/modules/benchmark.c b/app/modules/benchmark.c index 4fc4203536..f8c862b959 100644 --- a/app/modules/benchmark.c +++ b/app/modules/benchmark.c @@ -6,7 +6,7 @@ */ #include -#include "c_types.h" +#include #include "lauxlib.h" #include "mem.h" #include "module.h" From 8290195257c4714f31ef268125f1ad90c360372d Mon Sep 17 00:00:00 2001 From: Nikolay Fiykov Date: Mon, 12 Aug 2019 19:11:12 +0300 Subject: [PATCH 5/8] adding benchmarking for syste_adc_read function --- app/modules/benchmark.c | 10 ++++++++++ docs/modules/benchmark.md | 17 +++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/app/modules/benchmark.c b/app/modules/benchmark.c index f8c862b959..a14bc4d3ae 100644 --- a/app/modules/benchmark.c +++ b/app/modules/benchmark.c @@ -124,6 +124,10 @@ static void __NOINLINE __timing_gpio_status_write() { GPIO_REG_WRITE(GPIO_STATUS_W1TC_ADDRESS, 0); } +static void __NOINLINE __timing_adc_read() { + system_adc_read(); +} + static void __NOINLINE __timing_system_get_time() { __dummy_val = system_get_time(); } @@ -280,6 +284,11 @@ static int lbench_gpio_status_write(lua_State *L) { return 1; } +static int lbench_adc_read(lua_State *L) { + lua_pushnumber(L, (double)__measure(__timing_adc_read)); + return 1; +} + static int lbench_system_get_time(lua_State *L) { lua_pushnumber(L, (double)__measure(__timing_system_get_time)); return 1; @@ -478,6 +487,7 @@ LROT_FUNCENTRY(gpio_read_pin, lbench_gpio_read_pin) LROT_FUNCENTRY(gpio_pull_updown, lbench_gpio_pull_updown) LROT_FUNCENTRY(gpio_status_read, lbench_gpio_status_read) LROT_FUNCENTRY(gpio_status_write, lbench_gpio_status_write) +LROT_FUNCENTRY(adc_read, lbench_adc_read) LROT_FUNCENTRY(system_get_time, lbench_system_get_time) LROT_FUNCENTRY(system_get_rtc_time, lbench_system_get_rtc_time) LROT_FUNCENTRY(system_get_cpufreq, lbench_system_get_cpufreq) diff --git a/docs/modules/benchmark.md b/docs/modules/benchmark.md index 044a5a8330..bc593d2689 100644 --- a/docs/modules/benchmark.md +++ b/docs/modules/benchmark.md @@ -28,6 +28,7 @@ print(benchmark.gpio_read_pin()) print(benchmark.gpio_pull_updown()) print(benchmark.gpio_status_read()) print(benchmark.gpio_status_write()) +print(benchmark.adc_read()) print(benchmark.system_get_time()) print(benchmark.system_get_rtc_time()) print(benchmark.system_get_cpufreq()) @@ -272,6 +273,22 @@ See [external documentation](https://sub.nanona.fi/esp8266/timing-and-ticks.html - `cpuTicks` int, CPU ticks needed to perform the operation +## benchmark.adc_read() + +Times reading of analog input (ADC) using `system_adc_read()` function. + +### Syntax + +`benchmark.adc_read()` + +### Parameters + +`nil` + +### Returns + +- `cpuTicks` int, CPU ticks needed to perform the operation + ## benchmark.system_get_time() See [external documentation](https://sub.nanona.fi/esp8266/timing-and-ticks.html). From 4b23e84f0a9ca9f39d615d208f06e22f05a7aa3d Mon Sep 17 00:00:00 2001 From: Nikolay Fiykov Date: Mon, 9 Sep 2019 19:43:34 +0300 Subject: [PATCH 6/8] adding missing include statements --- app/modules/benchmark.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/app/modules/benchmark.c b/app/modules/benchmark.c index a14bc4d3ae..a75eac79e4 100644 --- a/app/modules/benchmark.c +++ b/app/modules/benchmark.c @@ -7,10 +7,12 @@ #include #include +#include "osapi.h" #include "lauxlib.h" #include "mem.h" #include "module.h" #include "pin_map.h" +#include "user_interface.h" #include "platform.h" #include "hw_timer.h" From 3340353825439d995868f4b8cb8c302681b4966b Mon Sep 17 00:00:00 2001 From: Nikolay Fiykov Date: Thu, 3 Oct 2019 21:18:49 +0300 Subject: [PATCH 7/8] adding benchmarking of lua functions. and explicit getter for ccount register. --- app/modules/benchmark.c | 40 +++++++++++++++++++++++++++++++++++---- docs/modules/benchmark.md | 40 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 4 deletions(-) diff --git a/app/modules/benchmark.c b/app/modules/benchmark.c index a75eac79e4..beea56bca6 100644 --- a/app/modules/benchmark.c +++ b/app/modules/benchmark.c @@ -5,16 +5,16 @@ * */ -#include #include -#include "osapi.h" +#include +#include "platform.h" +#include "hw_timer.h" #include "lauxlib.h" #include "mem.h" #include "module.h" +#include "osapi.h" #include "pin_map.h" #include "user_interface.h" -#include "platform.h" -#include "hw_timer.h" /* need to prevent compiler optimizations */ #define __NOINLINE __attribute__((noinline)) @@ -48,6 +48,10 @@ static struct { /* used by timer function */ } __timerData; static uint16_t repetitions; static uint16_t maskOnW1TSC; +static struct { /* used by benchmarking lua functions */ + int cb; + lua_State *L; +} __luaFunc; // ##################### // helpers @@ -226,6 +230,13 @@ static void ICACHE_RAM_ATTR __timer_interrupt(os_param_t arg) { } } +// lua function benchmarking + +static void __run_lua_func() { + lua_rawgeti(__luaFunc.L, LUA_REGISTRYINDEX, __luaFunc.cb); + lua_call(__luaFunc.L, 0, 0); +} + // ########################### // lua methods @@ -460,6 +471,8 @@ static int lbench_open(lua_State *L) { __timerData.individualDelays = NULL; } + + static int lbench_print_timer_data(lua_State *L) { ets_printf("isManualMode = %d\n", __timerData.isManualMode); ets_printf("isTimerExclusive = %d\n", __timerData.isTimerExclusive); @@ -473,8 +486,27 @@ static int lbench_print_timer_data(lua_State *L) { ets_printf("individualDelays = %d\n", __timerData.individualDelays); } +static int lbench_ccount(lua_State *L) { + lua_pushinteger(L, asm_ccount()); + return 1; +} + +static int lbench_lua_func(lua_State *L) { + // set up the new callback if present + lua_pushvalue(L, 1); + __luaFunc.cb = luaL_ref(L, LUA_REGISTRYINDEX); + __luaFunc.L = L; + const double val = (double)__measure(__run_lua_func); + luaL_unref(__luaFunc.L, LUA_REGISTRYINDEX, __luaFunc.cb); + lua_pushnumber(L, val); + // lua_pushnumber(L, (double)__measure(__run_lua_func)); + return 1; +} + // Module function map LROT_BEGIN(benchmark) +LROT_FUNCENTRY(ccount, lbench_ccount) +LROT_FUNCENTRY(bench_lua_func, lbench_lua_func) LROT_FUNCENTRY(print_timer_data, lbench_print_timer_data) LROT_FUNCENTRY(set_repetitions, lbench_set_repetitions) LROT_FUNCENTRY(get_repetitions, lbench_get_repetitions) diff --git a/docs/modules/benchmark.md b/docs/modules/benchmark.md index bc593d2689..8914039754 100644 --- a/docs/modules/benchmark.md +++ b/docs/modules/benchmark.md @@ -56,6 +56,46 @@ benchmark.frc1_autoload_shared(10, false, Credits to [https://sub.nanona.fi/esp8266/timing-and-ticks.html](https://sub.nanona.fi/esp8266/timing-and-ticks.html) for inspiring some of the tests and setting up the standard for the rest. +## benchmark.ccount() + +Get value of CPU CCOUNT register which contains CPU ticks. It supports CPU80 and CPU160. + +This allows for calculation of elapsed time with microsecond precision. For example for CPU80 there are 80 ticks/us (80000 ticks/ms). + +Note the register is 32-bits and rolls over. + +### Syntax + +`benchmark.ccount()` + +### Returns + +The current value of CCOUNT register. + +### Example + +```lua +print ("benchmark.ccount() takes ", benchmark.ccount()-benchmark.ccount(), " CPU ticks to execute.") +``` + +## benchmark.bench_lua_func() + +Benchmark the given lua function. + +### Syntax + +`benchmark.bench_lua_func(function() end)` + +### Returns + +The time to execute the function in terms of CPU ticks. + +### Example + +```lua +print ("empty lua function takes ", benchmark.bench_lua_func(function()end), " CPU ticks to execute.") +``` + ## benchmark.set_repetitions() Assign new repetitions value. By default the module uses 2000. From c2f375c97250d226eaff7b7f47077a35ee597fcd Mon Sep 17 00:00:00 2001 From: Nikolay Fiykov Date: Sun, 3 Nov 2019 23:32:37 +0200 Subject: [PATCH 8/8] more examples --- docs/modules/benchmark.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/docs/modules/benchmark.md b/docs/modules/benchmark.md index 8914039754..1efe5a81e7 100644 --- a/docs/modules/benchmark.md +++ b/docs/modules/benchmark.md @@ -80,7 +80,13 @@ print ("benchmark.ccount() takes ", benchmark.ccount()-benchmark.ccount(), " CPU ## benchmark.bench_lua_func() -Benchmark the given lua function. +Benchmark the given lua function. + +Returned value includes lua's function logic + low-level code cost of running a lua function. In order obtian only function's logic time, one would have to offset the value with result for empty function: + +```lua +print ("print AA takes ", benchmark.bench_lua_func(function() print("AA") end)-benchmark.bench_lua_func(function()end), " CPU ticks to execute.") +``` ### Syntax @@ -93,7 +99,7 @@ The time to execute the function in terms of CPU ticks. ### Example ```lua -print ("empty lua function takes ", benchmark.bench_lua_func(function()end), " CPU ticks to execute.") +print ("empty lua function takes ", benchmark.bench_lua_func(function()end)-benchmark.bench_lua_func(function()end), " CPU ticks to execute.") ``` ## benchmark.set_repetitions()