From 221bd86f04d204f6a7fd9cc2f5abbe0ae7162bf7 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Thu, 20 Jul 2023 09:52:28 +0000 Subject: [PATCH 1/4] Remove MMTk counted malloc --- src/gc-common.c | 62 ++++++++++++++++++++++++++++++++++++++++++ src/gc.c | 62 ------------------------------------------ src/gc.h | 3 +++ src/mmtk-gc.c | 71 +++++-------------------------------------------- 4 files changed, 71 insertions(+), 127 deletions(-) diff --git a/src/gc-common.c b/src/gc-common.c index cfb83c08a7a6b..769ca8377432e 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -540,6 +540,68 @@ JL_DLLEXPORT void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz, return gc_managed_realloc_(ptls, d, sz, oldsz, isaligned, owner, 1); } +// malloc wrappers, aligned allocation + +#if defined(_OS_WINDOWS_) +inline void *jl_malloc_aligned(size_t sz, size_t align) +{ + return _aligned_malloc(sz ? sz : 1, align); +} +inline void *jl_realloc_aligned(void *p, size_t sz, size_t oldsz, + size_t align) +{ + (void)oldsz; + return _aligned_realloc(p, sz ? sz : 1, align); +} +inline void jl_free_aligned(void *p) JL_NOTSAFEPOINT +{ + _aligned_free(p); +} +#else +inline void *jl_malloc_aligned(size_t sz, size_t align) +{ +#if defined(_P64) || defined(__APPLE__) + if (align <= 16) + return malloc(sz); +#endif + void *ptr; + if (posix_memalign(&ptr, align, sz)) + return NULL; + return ptr; +} +inline void *jl_realloc_aligned(void *d, size_t sz, size_t oldsz, + size_t align) +{ +#if defined(_P64) || defined(__APPLE__) + if (align <= 16) + return realloc(d, sz); +#endif + void *b = jl_malloc_aligned(sz, align); + if (b != NULL) { + memcpy(b, d, oldsz > sz ? sz : oldsz); + free(d); + } + return b; +} +inline void jl_free_aligned(void *p) JL_NOTSAFEPOINT +{ + free(p); +} +#endif + +void jl_gc_free_array(jl_array_t *a) JL_NOTSAFEPOINT +{ + if (a->flags.how == 2) { + char *d = (char*)a->data - a->offset*a->elsize; + if (a->flags.isaligned) + jl_free_aligned(d); + else + free(d); + gc_num.freed += jl_array_nbytes(a); + gc_num.freecall++; + } +} + uv_mutex_t gc_perm_lock; JL_DLLEXPORT void jl_gc_add_finalizer(jl_value_t *v, jl_function_t *f) diff --git a/src/gc.c b/src/gc.c index 90eae32f0affc..52eef196d45d4 100644 --- a/src/gc.c +++ b/src/gc.c @@ -376,55 +376,6 @@ void jl_gc_wait_for_the_world(jl_ptls_t* gc_all_tls_states, int gc_n_threads) } } -// malloc wrappers, aligned allocation - -#if defined(_OS_WINDOWS_) -inline void *jl_malloc_aligned(size_t sz, size_t align) -{ - return _aligned_malloc(sz ? sz : 1, align); -} -inline void *jl_realloc_aligned(void *p, size_t sz, size_t oldsz, - size_t align) -{ - (void)oldsz; - return _aligned_realloc(p, sz ? sz : 1, align); -} -inline void jl_free_aligned(void *p) JL_NOTSAFEPOINT -{ - _aligned_free(p); -} -#else -inline void *jl_malloc_aligned(size_t sz, size_t align) -{ -#if defined(_P64) || defined(__APPLE__) - if (align <= 16) - return malloc(sz); -#endif - void *ptr; - if (posix_memalign(&ptr, align, sz)) - return NULL; - return ptr; -} -inline void *jl_realloc_aligned(void *d, size_t sz, size_t oldsz, - size_t align) -{ -#if defined(_P64) || defined(__APPLE__) - if (align <= 16) - return realloc(d, sz); -#endif - void *b = jl_malloc_aligned(sz, align); - if (b != NULL) { - memcpy(b, d, oldsz > sz ? sz : oldsz); - free(d); - } - return b; -} -inline void jl_free_aligned(void *p) JL_NOTSAFEPOINT -{ - free(p); -} -#endif - static void schedule_finalization(void *o, void *f) JL_NOTSAFEPOINT { arraylist_push(&to_finalize, o); @@ -1154,19 +1105,6 @@ static void sweep_big(jl_ptls_t ptls, int sweep_full) JL_NOTSAFEPOINT gc_time_big_end(); } -static void jl_gc_free_array(jl_array_t *a) JL_NOTSAFEPOINT -{ - if (a->flags.how == 2) { - char *d = (char*)a->data - a->offset*a->elsize; - if (a->flags.isaligned) - jl_free_aligned(d); - else - free(d); - gc_num.freed += jl_array_nbytes(a); - gc_num.freecall++; - } -} - static void sweep_malloced_arrays(void) JL_NOTSAFEPOINT { gc_time_mallocd_array_start(); diff --git a/src/gc.h b/src/gc.h index 701c2c769e1b4..652e17063a42d 100644 --- a/src/gc.h +++ b/src/gc.h @@ -33,10 +33,12 @@ extern void maybe_collect(jl_ptls_t ptls); extern void run_finalizer(jl_task_t *ct, void *o, void *ff); extern void *jl_malloc_aligned(size_t sz, size_t align); +extern void jl_free_aligned(void *p); extern void *jl_gc_counted_calloc(size_t nm, size_t sz); extern void jl_gc_counted_free_with_size(void *p, size_t sz); extern void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz); extern void *jl_realloc_aligned(void *d, size_t sz, size_t oldsz, size_t align); +extern void jl_gc_free_array(jl_array_t *a) JL_NOTSAFEPOINT; extern void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_function_t *f); extern void jl_finalize_th(jl_task_t *ct, jl_value_t *o); extern jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls, jl_value_t *value); @@ -47,6 +49,7 @@ extern void gc_premark(jl_ptls_t ptls2); extern void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t oldsz, int isaligned, jl_value_t *owner, int8_t can_collect); extern size_t jl_array_nbytes(jl_array_t *a); +extern void run_finalizers(jl_task_t *ct); #ifdef OBJPROFILE void objprofile_count(void *ty, int old, int sz) JL_NOTSAFEPOINT; diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index 6f7e5f124e4b0..c53f9213174c4 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -53,31 +53,6 @@ static inline void malloc_maybe_collect(jl_ptls_t ptls, size_t sz) } } - -// malloc wrappers, aligned allocation -// --- - -inline void *jl_malloc_aligned(size_t sz, size_t align) -{ - return mmtk_malloc_aligned(sz ? sz : 1, align); // XXX sz -} -inline void *jl_realloc_aligned(void *d, size_t sz, size_t oldsz, - size_t align) -{ - void *res = jl_malloc_aligned(sz, align); - if (res != NULL) { - memcpy(res, d, oldsz > sz ? sz : oldsz); - mmtk_free_aligned(d); - } - return res; -} -inline void jl_free_aligned(void *p) JL_NOTSAFEPOINT -{ - mmtk_free_aligned(p); -} - - -// finalizers // --- JL_DLLEXPORT void jl_gc_run_pending_finalizers(jl_task_t *ct) @@ -190,20 +165,6 @@ inline jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset, int o return v; } -void jl_gc_free_array(jl_array_t *a) JL_NOTSAFEPOINT -{ - if (a->flags.how == 2) { - char *d = (char*)a->data - a->offset*a->elsize; - if (a->flags.isaligned) - mmtk_free_aligned(d); - else - mmtk_free(d); - gc_num.freed += jl_array_nbytes(a); - gc_num.freecall++; - } -} - - // roots // --- @@ -384,11 +345,7 @@ JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz) if (pgcstack && ct->world_age) { jl_ptls_t ptls = ct->ptls; malloc_maybe_collect(ptls, sz); - jl_atomic_store_relaxed(&ptls->gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz); - jl_atomic_store_relaxed(&ptls->gc_num.malloc, - jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); - return mmtk_counted_malloc(sz); + jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, sz); } return malloc(sz); } @@ -400,11 +357,7 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz) if (pgcstack && ct->world_age) { jl_ptls_t ptls = ct->ptls; malloc_maybe_collect(ptls, sz); - jl_atomic_store_relaxed(&ptls->gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_num.allocd) + nm*sz); - jl_atomic_store_relaxed(&ptls->gc_num.malloc, - jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); - return mmtk_counted_calloc(nm, sz); + jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, nm * sz); } return calloc(nm, sz); } @@ -413,16 +366,10 @@ JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz) { jl_gcframe_t **pgcstack = jl_get_pgcstack(); jl_task_t *ct = jl_current_task; + free(p); if (pgcstack && ct->world_age) { - jl_ptls_t ptls = ct->ptls; - jl_atomic_store_relaxed(&ptls->gc_num.freed, - jl_atomic_load_relaxed(&ptls->gc_num.freed) + sz); - jl_atomic_store_relaxed(&ptls->gc_num.freecall, - jl_atomic_load_relaxed(&ptls->gc_num.freecall) + 1); - mmtk_free_with_size(p, sz); - return; + jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, -sz); } - free(p); } JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz) @@ -433,16 +380,10 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size jl_ptls_t ptls = ct->ptls; malloc_maybe_collect(ptls, sz); if (sz < old) - jl_atomic_store_relaxed(&ptls->gc_num.freed, - jl_atomic_load_relaxed(&ptls->gc_num.freed) + (old - sz)); + jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, old - sz); else - jl_atomic_store_relaxed(&ptls->gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (sz - old)); - jl_atomic_store_relaxed(&ptls->gc_num.realloc, - jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1); - return mmtk_realloc_with_old_size(p, sz, old); + jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, sz - old); } - // TODO: correct? return realloc(p, sz); } From a9bbaa78acbd16ce23ddda44a76e32b93d365636 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Mon, 24 Jul 2023 00:45:42 +0000 Subject: [PATCH 2/4] Minor fix --- src/mmtk-gc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index c53f9213174c4..065a46ad557d4 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -356,7 +356,7 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz) jl_task_t *ct = jl_current_task; if (pgcstack && ct->world_age) { jl_ptls_t ptls = ct->ptls; - malloc_maybe_collect(ptls, sz); + malloc_maybe_collect(ptls, num * sz); jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, nm * sz); } return calloc(nm, sz); From 09a4f7b16a89cc21fe06425d071ba7f42671e102 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Mon, 24 Jul 2023 02:07:45 +0000 Subject: [PATCH 3/4] Move malloc methods back to each GC. --- src/gc-common.c | 62 ------------------------------------------------ src/gc.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++ src/gc.h | 3 +-- src/mmtk-gc.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 126 insertions(+), 64 deletions(-) diff --git a/src/gc-common.c b/src/gc-common.c index 769ca8377432e..cfb83c08a7a6b 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -540,68 +540,6 @@ JL_DLLEXPORT void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz, return gc_managed_realloc_(ptls, d, sz, oldsz, isaligned, owner, 1); } -// malloc wrappers, aligned allocation - -#if defined(_OS_WINDOWS_) -inline void *jl_malloc_aligned(size_t sz, size_t align) -{ - return _aligned_malloc(sz ? sz : 1, align); -} -inline void *jl_realloc_aligned(void *p, size_t sz, size_t oldsz, - size_t align) -{ - (void)oldsz; - return _aligned_realloc(p, sz ? sz : 1, align); -} -inline void jl_free_aligned(void *p) JL_NOTSAFEPOINT -{ - _aligned_free(p); -} -#else -inline void *jl_malloc_aligned(size_t sz, size_t align) -{ -#if defined(_P64) || defined(__APPLE__) - if (align <= 16) - return malloc(sz); -#endif - void *ptr; - if (posix_memalign(&ptr, align, sz)) - return NULL; - return ptr; -} -inline void *jl_realloc_aligned(void *d, size_t sz, size_t oldsz, - size_t align) -{ -#if defined(_P64) || defined(__APPLE__) - if (align <= 16) - return realloc(d, sz); -#endif - void *b = jl_malloc_aligned(sz, align); - if (b != NULL) { - memcpy(b, d, oldsz > sz ? sz : oldsz); - free(d); - } - return b; -} -inline void jl_free_aligned(void *p) JL_NOTSAFEPOINT -{ - free(p); -} -#endif - -void jl_gc_free_array(jl_array_t *a) JL_NOTSAFEPOINT -{ - if (a->flags.how == 2) { - char *d = (char*)a->data - a->offset*a->elsize; - if (a->flags.isaligned) - jl_free_aligned(d); - else - free(d); - gc_num.freed += jl_array_nbytes(a); - gc_num.freecall++; - } -} - uv_mutex_t gc_perm_lock; JL_DLLEXPORT void jl_gc_add_finalizer(jl_value_t *v, jl_function_t *f) diff --git a/src/gc.c b/src/gc.c index 52eef196d45d4..90eae32f0affc 100644 --- a/src/gc.c +++ b/src/gc.c @@ -376,6 +376,55 @@ void jl_gc_wait_for_the_world(jl_ptls_t* gc_all_tls_states, int gc_n_threads) } } +// malloc wrappers, aligned allocation + +#if defined(_OS_WINDOWS_) +inline void *jl_malloc_aligned(size_t sz, size_t align) +{ + return _aligned_malloc(sz ? sz : 1, align); +} +inline void *jl_realloc_aligned(void *p, size_t sz, size_t oldsz, + size_t align) +{ + (void)oldsz; + return _aligned_realloc(p, sz ? sz : 1, align); +} +inline void jl_free_aligned(void *p) JL_NOTSAFEPOINT +{ + _aligned_free(p); +} +#else +inline void *jl_malloc_aligned(size_t sz, size_t align) +{ +#if defined(_P64) || defined(__APPLE__) + if (align <= 16) + return malloc(sz); +#endif + void *ptr; + if (posix_memalign(&ptr, align, sz)) + return NULL; + return ptr; +} +inline void *jl_realloc_aligned(void *d, size_t sz, size_t oldsz, + size_t align) +{ +#if defined(_P64) || defined(__APPLE__) + if (align <= 16) + return realloc(d, sz); +#endif + void *b = jl_malloc_aligned(sz, align); + if (b != NULL) { + memcpy(b, d, oldsz > sz ? sz : oldsz); + free(d); + } + return b; +} +inline void jl_free_aligned(void *p) JL_NOTSAFEPOINT +{ + free(p); +} +#endif + static void schedule_finalization(void *o, void *f) JL_NOTSAFEPOINT { arraylist_push(&to_finalize, o); @@ -1105,6 +1154,19 @@ static void sweep_big(jl_ptls_t ptls, int sweep_full) JL_NOTSAFEPOINT gc_time_big_end(); } +static void jl_gc_free_array(jl_array_t *a) JL_NOTSAFEPOINT +{ + if (a->flags.how == 2) { + char *d = (char*)a->data - a->offset*a->elsize; + if (a->flags.isaligned) + jl_free_aligned(d); + else + free(d); + gc_num.freed += jl_array_nbytes(a); + gc_num.freecall++; + } +} + static void sweep_malloced_arrays(void) JL_NOTSAFEPOINT { gc_time_mallocd_array_start(); diff --git a/src/gc.h b/src/gc.h index 652e17063a42d..6c689c4d5478e 100644 --- a/src/gc.h +++ b/src/gc.h @@ -33,12 +33,11 @@ extern void maybe_collect(jl_ptls_t ptls); extern void run_finalizer(jl_task_t *ct, void *o, void *ff); extern void *jl_malloc_aligned(size_t sz, size_t align); +extern void *jl_realloc_aligned(void *d, size_t sz, size_t oldsz, size_t align); extern void jl_free_aligned(void *p); extern void *jl_gc_counted_calloc(size_t nm, size_t sz); extern void jl_gc_counted_free_with_size(void *p, size_t sz); extern void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz); -extern void *jl_realloc_aligned(void *d, size_t sz, size_t oldsz, size_t align); -extern void jl_gc_free_array(jl_array_t *a) JL_NOTSAFEPOINT; extern void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_function_t *f); extern void jl_finalize_th(jl_task_t *ct, jl_value_t *o); extern jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls, jl_value_t *value); diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index 065a46ad557d4..b27d28367a88e 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -53,6 +53,69 @@ static inline void malloc_maybe_collect(jl_ptls_t ptls, size_t sz) } } +// malloc wrappers, aligned allocation +// We currently just duplicate what Julia GC does. We will in the future replace the malloc calls with MMTK's malloc. + +#if defined(_OS_WINDOWS_) +inline void *jl_malloc_aligned(size_t sz, size_t align) +{ + return _aligned_malloc(sz ? sz : 1, align); +} +inline void *jl_realloc_aligned(void *p, size_t sz, size_t oldsz, + size_t align) +{ + (void)oldsz; + return _aligned_realloc(p, sz ? sz : 1, align); +} +inline void jl_free_aligned(void *p) JL_NOTSAFEPOINT +{ + _aligned_free(p); +} +#else +inline void *jl_malloc_aligned(size_t sz, size_t align) +{ +#if defined(_P64) || defined(__APPLE__) + if (align <= 16) + return malloc(sz); +#endif + void *ptr; + if (posix_memalign(&ptr, align, sz)) + return NULL; + return ptr; +} +inline void *jl_realloc_aligned(void *d, size_t sz, size_t oldsz, + size_t align) +{ +#if defined(_P64) || defined(__APPLE__) + if (align <= 16) + return realloc(d, sz); +#endif + void *b = jl_malloc_aligned(sz, align); + if (b != NULL) { + memcpy(b, d, oldsz > sz ? sz : oldsz); + free(d); + } + return b; +} +inline void jl_free_aligned(void *p) JL_NOTSAFEPOINT +{ + free(p); +} +#endif + +static void jl_gc_free_array(jl_array_t *a) JL_NOTSAFEPOINT +{ + if (a->flags.how == 2) { + char *d = (char*)a->data - a->offset*a->elsize; + if (a->flags.isaligned) + jl_free_aligned(d); + else + free(d); + gc_num.freed += jl_array_nbytes(a); + gc_num.freecall++; + } +} + // --- JL_DLLEXPORT void jl_gc_run_pending_finalizers(jl_task_t *ct) From 81e850257c15fe48f51cf8865606fe4e0ad868fc Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Mon, 24 Jul 2023 02:29:22 +0000 Subject: [PATCH 4/4] Fix --- src/mmtk-gc.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index b27d28367a88e..fa9c4acd0aa9f 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -103,19 +103,6 @@ inline void jl_free_aligned(void *p) JL_NOTSAFEPOINT } #endif -static void jl_gc_free_array(jl_array_t *a) JL_NOTSAFEPOINT -{ - if (a->flags.how == 2) { - char *d = (char*)a->data - a->offset*a->elsize; - if (a->flags.isaligned) - jl_free_aligned(d); - else - free(d); - gc_num.freed += jl_array_nbytes(a); - gc_num.freecall++; - } -} - // --- JL_DLLEXPORT void jl_gc_run_pending_finalizers(jl_task_t *ct) @@ -228,6 +215,19 @@ inline jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset, int o return v; } +void jl_gc_free_array(jl_array_t *a) JL_NOTSAFEPOINT +{ + if (a->flags.how == 2) { + char *d = (char*)a->data - a->offset*a->elsize; + if (a->flags.isaligned) + jl_free_aligned(d); + else + free(d); + gc_num.freed += jl_array_nbytes(a); + gc_num.freecall++; + } +} + // roots // --- @@ -419,7 +419,7 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz) jl_task_t *ct = jl_current_task; if (pgcstack && ct->world_age) { jl_ptls_t ptls = ct->ptls; - malloc_maybe_collect(ptls, num * sz); + malloc_maybe_collect(ptls, nm * sz); jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, nm * sz); } return calloc(nm, sz);