|
9 | 9 |
|
10 | 10 | #include <linux/args.h>
|
11 | 11 |
|
| 12 | +#ifdef CONFIG_CC_IS_CLANG |
| 13 | +#define __pick_unrolled(x, y) _Pragma(#x) |
| 14 | +#elif CONFIG_GCC_VERSION >= 80000 |
| 15 | +#define __pick_unrolled(x, y) _Pragma(#y) |
| 16 | +#else |
| 17 | +#define __pick_unrolled(x, y) /* not supported */ |
| 18 | +#endif |
| 19 | + |
| 20 | +/** |
| 21 | + * unrolled - loop attributes to ask the compiler to unroll it |
| 22 | + * |
| 23 | + * Usage: |
| 24 | + * |
| 25 | + * #define BATCH 8 |
| 26 | + * |
| 27 | + * unrolled_count(BATCH) |
| 28 | + * for (u32 i = 0; i < BATCH; i++) |
| 29 | + * // loop body without cross-iteration dependencies |
| 30 | + * |
| 31 | + * This is only a hint and the compiler is free to disable unrolling if it |
| 32 | + * thinks the count is suboptimal and may hurt performance and/or hugely |
| 33 | + * increase object code size. |
| 34 | + * Not having any cross-iteration dependencies (i.e. when iter x + 1 depends |
| 35 | + * on what iter x will do with variables) is not a strict requirement, but |
| 36 | + * provides best performance and object code size. |
| 37 | + * Available only on Clang and GCC 8.x onwards. |
| 38 | + */ |
| 39 | + |
| 40 | +/* Ask the compiler to pick an optimal unroll count, Clang only */ |
| 41 | +#define unrolled \ |
| 42 | + __pick_unrolled(clang loop unroll(enable), /* nothing */) |
| 43 | + |
| 44 | +/* Unroll each @n iterations of the loop */ |
| 45 | +#define unrolled_count(n) \ |
| 46 | + __pick_unrolled(clang loop unroll_count(n), GCC unroll n) |
| 47 | + |
| 48 | +/* Unroll the whole loop */ |
| 49 | +#define unrolled_full \ |
| 50 | + __pick_unrolled(clang loop unroll(full), GCC unroll 65534) |
| 51 | + |
| 52 | +/* Never unroll the loop */ |
| 53 | +#define unrolled_none \ |
| 54 | + __pick_unrolled(clang loop unroll(disable), GCC unroll 1) |
| 55 | + |
12 | 56 | #define UNROLL(N, MACRO, args...) CONCATENATE(__UNROLL_, N)(MACRO, args)
|
13 | 57 |
|
14 | 58 | #define __UNROLL_0(MACRO, args...)
|
|
0 commit comments