|
| 1 | +/* |
| 2 | + This file is part of darktable, |
| 3 | + copyright (c) 2025 darktable developer. |
| 4 | +
|
| 5 | + darktable is free software: you can redistribute it and/or modify |
| 6 | + it under the terms of the GNU General Public License as published by |
| 7 | + the Free Software Foundation, either version 3 of the License, or |
| 8 | + (at your option) any later version. |
| 9 | +
|
| 10 | + darktable is distributed in the hope that it will be useful, |
| 11 | + but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 13 | + GNU General Public License for more details. |
| 14 | +
|
| 15 | + You should have received a copy of the GNU General Public License |
| 16 | + along with darktable. If not, see <http://www.gnu.org/licenses/>. |
| 17 | +*/ |
| 18 | + |
| 19 | +#include "common.h" |
| 20 | + |
| 21 | +#define CAPTURE_KERNEL_ALIGN 32 |
| 22 | +#define CAPTURE_BLEND_EPS 0.01f |
| 23 | +#define CAPTURE_YMIN 0.001f |
| 24 | +#define CAPTURE_THRESHPOWER 0.15f |
| 25 | + |
| 26 | +static inline float sqrf(float a) |
| 27 | +{ |
| 28 | + return (a * a); |
| 29 | +} |
| 30 | + |
| 31 | +__kernel void kernel_9x9_mul(global float *in, |
| 32 | + global float *out, |
| 33 | + global float *blend, |
| 34 | + global float *kernels, |
| 35 | + global unsigned char *table, |
| 36 | + const int w1, |
| 37 | + const int height) |
| 38 | +{ |
| 39 | + const int col = get_global_id(0); |
| 40 | + const int row = get_global_id(1); |
| 41 | + if(col >= w1 || row >= height) return; |
| 42 | + |
| 43 | + const int i = mad24(row, w1, col); |
| 44 | + const int w2 = 2 * w1; |
| 45 | + const int w3 = 3 * w1; |
| 46 | + const int w4 = 4 * w1; |
| 47 | + if(blend[i] <= CAPTURE_BLEND_EPS) |
| 48 | + return; |
| 49 | + |
| 50 | + global const float *kern = kernels + CAPTURE_KERNEL_ALIGN * table[i]; |
| 51 | + global float *d = in + i; |
| 52 | + |
| 53 | + float val = 0.0f; |
| 54 | + if(col >= 4 && row >= 4 && col < w1 - 4 && row < height - 4) |
| 55 | + { |
| 56 | + val = kern[10+4] * (d[-w4-2] + d[-w4+2] + d[-w2-4] + d[-w2+4] + d[w2-4] + d[w2+4] + d[w4-2] + d[w4+2]) + |
| 57 | + kern[5 +4] * (d[-w4-1] + d[-w4+1] + d[-w1-4] + d[-w1+4] + d[w1-4] + d[w1+4] + d[w4-1] + d[w4+1]) + |
| 58 | + kern[4] * (d[-w4 ] + d[ -4] + d[ 4] + d[ w4 ]) + |
| 59 | + kern[15+3] * (d[-w3-3] + d[-w3+3] + d[ w3-3] + d[ w3+3]) + |
| 60 | + kern[10+3] * (d[-w3-2] + d[-w3+2] + d[-w2-3] + d[-w2+3] + d[w2-3] + d[w2+3] + d[w3-2] + d[w3+2]) + |
| 61 | + kern[ 5+3] * (d[-w3-1] + d[-w3+1] + d[-w1-3] + d[-w1+3] + d[w1-3] + d[w1+3] + d[w3-1] + d[w3+1]) + |
| 62 | + kern[ 3] * (d[-w3 ] + d[ -3] + d[ 3] + d[ w3 ]) + |
| 63 | + kern[10+2] * (d[-w2-2] + d[-w2+2] + d[ w2-2] + d[ w2+2]) + |
| 64 | + kern[ 5+2] * (d[-w2-1] + d[-w2+1] + d[-w1-2] + d[-w1+2] + d[w1-2] + d[w1+2] + d[w2-1] + d[w2+1]) + |
| 65 | + kern[ 2] * (d[-w2 ] + d[ -2] + d[ 2] + d[ w2 ]) + |
| 66 | + kern[ 5+1] * (d[-w1-1] + d[-w1+1] + d[ w1-1] + d[ w1+1]) + |
| 67 | + kern[ 1] * (d[-w1 ] + d[ -1] + d[ 1] + d[ w1 ]) + |
| 68 | + kern[ 0] * (d[0]); |
| 69 | + } |
| 70 | + else |
| 71 | + { |
| 72 | + for(int ir = -4; ir <= 4; ir++) |
| 73 | + { |
| 74 | + const int irow = row+ir; |
| 75 | + if(irow >= 0 && irow < height) |
| 76 | + { |
| 77 | + for(int ic = -4; ic <= 4; ic++) |
| 78 | + { |
| 79 | + const int icol = col+ic; |
| 80 | + if(icol >=0 && icol < w1) |
| 81 | + val += kern[5 * abs(ir) + abs(ic)] * in[mad24(irow, w1, icol)]; |
| 82 | + } |
| 83 | + } |
| 84 | + } |
| 85 | + } |
| 86 | + out[i] *= val; |
| 87 | +} |
| 88 | + |
| 89 | +__kernel void kernel_9x9_div(global float *in, |
| 90 | + global float *out, |
| 91 | + global float *divbuff, |
| 92 | + global float *blend, |
| 93 | + global float *kernels, |
| 94 | + global unsigned char *table, |
| 95 | + const int w1, |
| 96 | + const int height) |
| 97 | +{ |
| 98 | + const int col = get_global_id(0); |
| 99 | + const int row = get_global_id(1); |
| 100 | + if(col >= w1 || row >= height) return; |
| 101 | + |
| 102 | + const int i = mad24(row, w1, col); |
| 103 | + const int w2 = 2 * w1; |
| 104 | + const int w3 = 3 * w1; |
| 105 | + const int w4 = 4 * w1; |
| 106 | + if(blend[i] <= CAPTURE_BLEND_EPS) |
| 107 | + return; |
| 108 | + |
| 109 | + global const float *kern = kernels + CAPTURE_KERNEL_ALIGN * table[i]; |
| 110 | + global float *d = in + i; |
| 111 | + |
| 112 | + float val = 0.0f; |
| 113 | + if(col >= 4 && row >= 4 && col < w1 - 4 && row < height - 4) |
| 114 | + { |
| 115 | + val = kern[10+4] * (d[-w4-2] + d[-w4+2] + d[-w2-4] + d[-w2+4] + d[w2-4] + d[w2+4] + d[w4-2] + d[w4+2]) + |
| 116 | + kern[5 +4] * (d[-w4-1] + d[-w4+1] + d[-w1-4] + d[-w1+4] + d[w1-4] + d[w1+4] + d[w4-1] + d[w4+1]) + |
| 117 | + kern[4] * (d[-w4 ] + d[ -4] + d[ 4] + d[ w4 ]) + |
| 118 | + kern[15+3] * (d[-w3-3] + d[-w3+3] + d[ w3-3] + d[ w3+3]) + |
| 119 | + kern[10+3] * (d[-w3-2] + d[-w3+2] + d[-w2-3] + d[-w2+3] + d[w2-3] + d[w2+3] + d[w3-2] + d[w3+2]) + |
| 120 | + kern[ 5+3] * (d[-w3-1] + d[-w3+1] + d[-w1-3] + d[-w1+3] + d[w1-3] + d[w1+3] + d[w3-1] + d[w3+1]) + |
| 121 | + kern[ 3] * (d[-w3 ] + d[ -3] + d[ 3] + d[ w3 ]) + |
| 122 | + kern[10+2] * (d[-w2-2] + d[-w2+2] + d[ w2-2] + d[ w2+2]) + |
| 123 | + kern[ 5+2] * (d[-w2-1] + d[-w2+1] + d[-w1-2] + d[-w1+2] + d[w1-2] + d[w1+2] + d[w2-1] + d[w2+1]) + |
| 124 | + kern[ 2] * (d[-w2 ] + d[ -2] + d[ 2] + d[ w2 ]) + |
| 125 | + kern[ 5+1] * (d[-w1-1] + d[-w1+1] + d[ w1-1] + d[ w1+1]) + |
| 126 | + kern[ 1] * (d[-w1 ] + d[ -1] + d[ 1] + d[ w1 ]) + |
| 127 | + kern[ 0] * (d[0]); |
| 128 | + } |
| 129 | + else |
| 130 | + { |
| 131 | + for(int ir = -4; ir <= 4; ir++) |
| 132 | + { |
| 133 | + const int irow = row+ir; |
| 134 | + if(irow >= 0 && irow < height) |
| 135 | + { |
| 136 | + for(int ic = -4; ic <= 4; ic++) |
| 137 | + { |
| 138 | + const int icol = col+ic; |
| 139 | + if(icol >=0 && icol < w1) |
| 140 | + val += kern[5 * abs(ir) + abs(ic)] * in[mad24(irow, w1, icol)]; |
| 141 | + } |
| 142 | + } |
| 143 | + } |
| 144 | + } |
| 145 | + out[i] = divbuff[i] / fmax(val, 0.00001f); |
| 146 | +} |
| 147 | + |
| 148 | +__kernel void prefill_clip_mask(global float *mask, |
| 149 | + const int width, |
| 150 | + const int height) |
| 151 | +{ |
| 152 | + const int col = get_global_id(0); |
| 153 | + const int row = get_global_id(1); |
| 154 | + if(col >= width || row >= height) return; |
| 155 | + |
| 156 | + const int i = mad24(row, width, col); |
| 157 | + mask[i] = 1.0f; |
| 158 | +} |
| 159 | + |
| 160 | +__kernel void prepare_blend(__read_only image2d_t cfa, |
| 161 | + __read_only image2d_t dev_out, |
| 162 | + const int filters, |
| 163 | + global const unsigned char (*const xtrans)[6], |
| 164 | + global float *mask, |
| 165 | + global float *Yold, |
| 166 | + global float *whites, |
| 167 | + const int w, |
| 168 | + const int height) |
| 169 | +{ |
| 170 | + const int col = get_global_id(0); |
| 171 | + const int row = get_global_id(1); |
| 172 | + if(col >= w || row >= height) return; |
| 173 | + |
| 174 | + const float4 rgb = read_imagef(dev_out, samplerA, (int2)(col, row)); |
| 175 | + const float Y = fmax(0.0f, 0.2626f * rgb.x + 0.7152f * rgb.y + 0.0722f * rgb.z); |
| 176 | + const int k = mad24(row, w, col); |
| 177 | + Yold[k] = Y; |
| 178 | + |
| 179 | + if(row > 1 && col > 1 && row < height-2 && col < w -2) |
| 180 | + { |
| 181 | + const int w2 = 2 * w; |
| 182 | + const int color = (filters == 9u) ? FCxtrans(row, col, xtrans) : FC(row, col, filters); |
| 183 | + const float val = read_imagef(cfa, samplerA, (int2)(col, row)).x; |
| 184 | + if(val > whites[color] || Y < CAPTURE_YMIN) |
| 185 | + { |
| 186 | + mask[k-w2-1] = mask[k-w2] = mask[k-w2+1] = |
| 187 | + mask[k-w-2] = mask[k-w-1] = mask[k-w ] = mask[k-w+1] = mask[k-w+2] = |
| 188 | + mask[k-2] = mask[k-1] = mask[k] = mask[k+1] = mask[k+2] = |
| 189 | + mask[k+w-2] = mask[k+w-1] = mask[k+w] = mask[k+w+1] = mask[k+w+2] = |
| 190 | + mask[k+w2-1] = mask[k+w2] = mask[k+w2+1] = 0.0f; |
| 191 | + } |
| 192 | + } |
| 193 | + else |
| 194 | + mask[k] = 0.0f; |
| 195 | +} |
| 196 | + |
| 197 | +__kernel void modify_blend(global float *blend, |
| 198 | + global float *Yold, |
| 199 | + global float *luminance, |
| 200 | + const float threshold, |
| 201 | + const int width, |
| 202 | + const int height) |
| 203 | +{ |
| 204 | + const int icol = get_global_id(0); |
| 205 | + const int irow = get_global_id(1); |
| 206 | + if(icol >= width || irow >= height) return; |
| 207 | + |
| 208 | + const int row = clamp(irow, 2, height-3); |
| 209 | + const int col = clamp(icol, 2, width-3); |
| 210 | + |
| 211 | + float av = 0.0f; |
| 212 | + for(int y = row-1; y < row+2; y++) |
| 213 | + { |
| 214 | + for(int x = col-2; x < col+3; x++) |
| 215 | + av += Yold[mad24(y, width, x)]; |
| 216 | + } |
| 217 | + for(int x = col-1; x < col+2; x++) |
| 218 | + { |
| 219 | + av += Yold[mad24(row-2, width, x)]; |
| 220 | + av += Yold[mad24(row+2, width, x)]; |
| 221 | + } |
| 222 | + av /= 21.0f; |
| 223 | + |
| 224 | + float sv = 0.0f; |
| 225 | + for(int y = row-1; y < row+2; y++) |
| 226 | + { |
| 227 | + for(int x = col-2; x < col+3; x++) |
| 228 | + sv += sqrf(Yold[mad24(y, width, x)] - av); |
| 229 | + } |
| 230 | + for(int x = col-2; x < col+3; x++) |
| 231 | + { |
| 232 | + sv+= sqrf(Yold[mad24(row-2, width, x)] - av); |
| 233 | + sv+= sqrf(Yold[mad24(row+2, width, x)] - av); |
| 234 | + } |
| 235 | + sv = dtcl_pow(fmax(0.0f, 5.0f * dtcl_sqrt(sv / 21.f) - threshold), CAPTURE_THRESHPOWER); |
| 236 | + const int k = mad24(irow, width, icol); |
| 237 | + |
| 238 | + blend[k] *= clamp(sv, 0.0f, 1.0f); |
| 239 | + luminance[k] = Yold[k]; |
| 240 | +} |
| 241 | + |
| 242 | +__kernel void show_blend_mask(__read_only image2d_t in, |
| 243 | + __write_only image2d_t out, |
| 244 | + global float *blend_mask, |
| 245 | + const int width, |
| 246 | + const int height) |
| 247 | +{ |
| 248 | + const int col = get_global_id(0); |
| 249 | + const int row = get_global_id(1); |
| 250 | + if(col >= width || row >= height) return; |
| 251 | + |
| 252 | + float4 pix = read_imagef(in, samplerA, (int2)(col, row)); |
| 253 | + const float blend = blend_mask[mad24(row, width, col)]; |
| 254 | + pix.w = blend < CAPTURE_BLEND_EPS ? 0.0f : blend; |
| 255 | + write_imagef(out, (int2)(col, row), pix); |
| 256 | +} |
| 257 | + |
| 258 | +__kernel void capture_result( __read_only image2d_t in, |
| 259 | + __write_only image2d_t out, |
| 260 | + global float *blendmask, |
| 261 | + global float *luminance, |
| 262 | + global float *tmp, |
| 263 | + const int width, |
| 264 | + const int height) |
| 265 | +{ |
| 266 | + const int col = get_global_id(0); |
| 267 | + const int row = get_global_id(1); |
| 268 | + if(col >= width || row >= height) return; |
| 269 | + |
| 270 | + float4 pix = read_imagef(in, samplerA, (int2)(col, row)); |
| 271 | + const int k = mad24(row, width, col); |
| 272 | + |
| 273 | + if(blendmask[k] > CAPTURE_BLEND_EPS) |
| 274 | + { |
| 275 | + const float mixer = clamp(blendmask[k], 0.0f, 1.0f); |
| 276 | + const float lumold = fmax(luminance[k], 0.000001f); |
| 277 | + const float lumtmp = fmax(tmp[k], 0.0000001f); |
| 278 | + const float luminance_new = mix(lumold, lumtmp, mixer); |
| 279 | + const float4 factor = luminance_new / lumold; |
| 280 | + pix = pix * factor; |
| 281 | + } |
| 282 | + write_imagef(out, (int2)(col, row), pix); |
| 283 | +} |
| 284 | + |
| 285 | +#undef CAPTURE_KERNEL_ALIGN |
0 commit comments