Skip to content

Commit 3d40d1f

Browse files
Implement capture sharpening inside demosaic module
Capture sharpening has been implemented to work inside the demosaic module so it's raw only. Credits to: Ingo Weyrich (heckflosse67@gmx.de), he implemented the original algorithm for rawtherapee, this implementation is based on his work, especially the convolution kernels. CPU and OpenCL code paths are both available. Demosaic module gets more parameters so there is a version bump, one float parameter has been reserved. A "mini manual" Capture sharpening (CS) tries to recover details lost due to in-camera blurring, which can be caused by diffraction, the anti-aliasing filter or other sources of gaussian-type blur. Prerequisites are - good white balance parameters (same requirement as for highlights reconstruction or demosaic) - no chromatic aberration, you might want to add the "raw chromatic aberration" module - luminance noise will be amplified by CS controls: 1. capture sharpen switches CS on if above zero and defines the strength of overall effect. CS works in an iterative process, this defines the number of iterations, mostly a setting of 10 will be enough. As downscaling after demosaicing reduces the visibility of CS we reduce iterations in such cases for performance. 2. radius defines the basic convolution gaussian sigma. This should not be set by "creative means" but to the blurring radius of the optical system and sensor. If set too high halos will be introduced. A click on the button besides the slider forces a pipe run approximating and setting the radius from sensor data, UI feedback via control log is given. If radius is set to zero, the automatic radius will be used, an easy way for a generic preset. 3. contrast threshold As sensor noise will be amplified by CS we take some care about this by a per pixel variance analysis and use a logistic function with this threshold to avoid CS in noisy areas. The default is good for low iso images. 4. corner boost Increase the radius in image corners. We assume a circle of 1/2 of image size to be safe (only use main radius), locations outside this center circle get an increased convolution radius.
1 parent 952d604 commit 3d40d1f

File tree

4 files changed

+1357
-36
lines changed

4 files changed

+1357
-36
lines changed

data/kernels/capture.cl

Lines changed: 295 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,295 @@
1+
/*
2+
This file is part of darktable,
3+
copyright (c) 2025 darktable developer.
4+
5+
darktable is free software: you can redistribute it and/or modify
6+
it under the terms of the GNU General Public License as published by
7+
the Free Software Foundation, either version 3 of the License, or
8+
(at your option) any later version.
9+
10+
darktable is distributed in the hope that it will be useful,
11+
but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
GNU General Public License for more details.
14+
15+
You should have received a copy of the GNU General Public License
16+
along with darktable. If not, see <http://www.gnu.org/licenses/>.
17+
*/
18+
19+
#include "common.h"
20+
21+
#define CAPTURE_KERNEL_ALIGN 32
22+
#define CAPTURE_BLEND_EPS 0.01f
23+
#define VARLUT_SIZE 4096
24+
25+
__kernel void kernel_9x9_mul(global float *in,
26+
global float *out,
27+
global float *blend,
28+
global float *kernels,
29+
global unsigned char *table,
30+
const int w1,
31+
const int height)
32+
{
33+
const int col = get_global_id(0);
34+
const int row = get_global_id(1);
35+
if(col >= w1 || row >= height) return;
36+
37+
const int i = mad24(row, w1, col);
38+
const int w2 = 2 * w1;
39+
const int w3 = 3 * w1;
40+
const int w4 = 4 * w1;
41+
if(blend[i] <= CAPTURE_BLEND_EPS)
42+
return;
43+
44+
global const float *kern = kernels + CAPTURE_KERNEL_ALIGN * table[i];
45+
global float *d = in + i;
46+
47+
float val = 0.0f;
48+
if(col >= 4 && row >= 4 && col < w1 - 4 && row < height - 4)
49+
{
50+
val = kern[10+4] * (d[-w4-2] + d[-w4+2] + d[-w2-4] + d[-w2+4] + d[w2-4] + d[w2+4] + d[w4-2] + d[w4+2]) +
51+
kern[5 +4] * (d[-w4-1] + d[-w4+1] + d[-w1-4] + d[-w1+4] + d[w1-4] + d[w1+4] + d[w4-1] + d[w4+1]) +
52+
kern[4] * (d[-w4 ] + d[ -4] + d[ 4] + d[ w4 ]) +
53+
kern[15+3] * (d[-w3-3] + d[-w3+3] + d[ w3-3] + d[ w3+3]) +
54+
kern[10+3] * (d[-w3-2] + d[-w3+2] + d[-w2-3] + d[-w2+3] + d[w2-3] + d[w2+3] + d[w3-2] + d[w3+2]) +
55+
kern[ 5+3] * (d[-w3-1] + d[-w3+1] + d[-w1-3] + d[-w1+3] + d[w1-3] + d[w1+3] + d[w3-1] + d[w3+1]) +
56+
kern[ 3] * (d[-w3 ] + d[ -3] + d[ 3] + d[ w3 ]) +
57+
kern[10+2] * (d[-w2-2] + d[-w2+2] + d[ w2-2] + d[ w2+2]) +
58+
kern[ 5+2] * (d[-w2-1] + d[-w2+1] + d[-w1-2] + d[-w1+2] + d[w1-2] + d[w1+2] + d[w2-1] + d[w2+1]) +
59+
kern[ 2] * (d[-w2 ] + d[ -2] + d[ 2] + d[ w2 ]) +
60+
kern[ 5+1] * (d[-w1-1] + d[-w1+1] + d[ w1-1] + d[ w1+1]) +
61+
kern[ 1] * (d[-w1 ] + d[ -1] + d[ 1] + d[ w1 ]) +
62+
kern[ 0] * (d[0]);
63+
}
64+
else
65+
{
66+
for(int ir = -4; ir <= 4; ir++)
67+
{
68+
const int irow = row+ir;
69+
if(irow >= 0 && irow < height)
70+
{
71+
for(int ic = -4; ic <= 4; ic++)
72+
{
73+
const int icol = col+ic;
74+
if(icol >=0 && icol < w1)
75+
val += kern[5 * abs(ir) + abs(ic)] * in[mad24(irow, w1, icol)];
76+
}
77+
}
78+
}
79+
}
80+
out[i] *= val;
81+
}
82+
83+
__kernel void kernel_9x9_div(global float *in,
84+
global float *out,
85+
global float *divbuff,
86+
global float *blend,
87+
global float *kernels,
88+
global unsigned char *table,
89+
const int w1,
90+
const int height)
91+
{
92+
const int col = get_global_id(0);
93+
const int row = get_global_id(1);
94+
if(col >= w1 || row >= height) return;
95+
96+
const int i = mad24(row, w1, col);
97+
const int w2 = 2 * w1;
98+
const int w3 = 3 * w1;
99+
const int w4 = 4 * w1;
100+
if(blend[i] <= CAPTURE_BLEND_EPS)
101+
return;
102+
103+
global const float *kern = kernels + CAPTURE_KERNEL_ALIGN * table[i];
104+
global float *d = in + i;
105+
106+
float val = 0.0f;
107+
if(col >= 4 && row >= 4 && col < w1 - 4 && row < height - 4)
108+
{
109+
val = kern[10+4] * (d[-w4-2] + d[-w4+2] + d[-w2-4] + d[-w2+4] + d[w2-4] + d[w2+4] + d[w4-2] + d[w4+2]) +
110+
kern[5 +4] * (d[-w4-1] + d[-w4+1] + d[-w1-4] + d[-w1+4] + d[w1-4] + d[w1+4] + d[w4-1] + d[w4+1]) +
111+
kern[4] * (d[-w4 ] + d[ -4] + d[ 4] + d[ w4 ]) +
112+
kern[15+3] * (d[-w3-3] + d[-w3+3] + d[ w3-3] + d[ w3+3]) +
113+
kern[10+3] * (d[-w3-2] + d[-w3+2] + d[-w2-3] + d[-w2+3] + d[w2-3] + d[w2+3] + d[w3-2] + d[w3+2]) +
114+
kern[ 5+3] * (d[-w3-1] + d[-w3+1] + d[-w1-3] + d[-w1+3] + d[w1-3] + d[w1+3] + d[w3-1] + d[w3+1]) +
115+
kern[ 3] * (d[-w3 ] + d[ -3] + d[ 3] + d[ w3 ]) +
116+
kern[10+2] * (d[-w2-2] + d[-w2+2] + d[ w2-2] + d[ w2+2]) +
117+
kern[ 5+2] * (d[-w2-1] + d[-w2+1] + d[-w1-2] + d[-w1+2] + d[w1-2] + d[w1+2] + d[w2-1] + d[w2+1]) +
118+
kern[ 2] * (d[-w2 ] + d[ -2] + d[ 2] + d[ w2 ]) +
119+
kern[ 5+1] * (d[-w1-1] + d[-w1+1] + d[ w1-1] + d[ w1+1]) +
120+
kern[ 1] * (d[-w1 ] + d[ -1] + d[ 1] + d[ w1 ]) +
121+
kern[ 0] * (d[0]);
122+
}
123+
else
124+
{
125+
for(int ir = -4; ir <= 4; ir++)
126+
{
127+
const int irow = row+ir;
128+
if(irow >= 0 && irow < height)
129+
{
130+
for(int ic = -4; ic <= 4; ic++)
131+
{
132+
const int icol = col+ic;
133+
if(icol >=0 && icol < w1)
134+
val += kern[5 * abs(ir) + abs(ic)] * in[mad24(irow, w1, icol)];
135+
}
136+
}
137+
}
138+
}
139+
out[i] = divbuff[i] / fmax(val, 0.00001f);
140+
}
141+
142+
__kernel void prefill_clip_mask(global float *mask,
143+
const int width,
144+
const int height)
145+
{
146+
const int col = get_global_id(0);
147+
const int row = get_global_id(1);
148+
if(col >= width || row >= height) return;
149+
150+
const int i = mad24(row, width, col);
151+
mask[i] = 1.0f;
152+
}
153+
154+
static inline float flab_f(float x)
155+
{
156+
const float epsilon = 216.0f / 24389.0f;
157+
const float kappa = 24389.0f / 27.0f;
158+
return (x > epsilon) ? dtcl_pow(x, 1.0f/3.0f) : (kappa * x + 16.0f) / 116.0f;
159+
}
160+
161+
__kernel void prepare_blend(__read_only image2d_t cfa,
162+
__read_only image2d_t dev_out,
163+
const int filters,
164+
global const unsigned char (*const xtrans)[6],
165+
global float *mask,
166+
global float *Yold,
167+
global float *whites,
168+
const float threshold,
169+
const int w,
170+
const int height)
171+
{
172+
const int col = get_global_id(0);
173+
const int row = get_global_id(1);
174+
if(col >= w || row >= height) return;
175+
176+
const float4 rgb = read_imagef(dev_out, samplerA, (int2)(col, row));
177+
const float Y = fmax(0.0f, 0.2626f * rgb.x + 0.7152f * rgb.y + 0.0722f * rgb.z);
178+
const int k = mad24(row, w, col);
179+
Yold[k] = Y;
180+
const float Ymin = 0.005f + 0.005f * threshold;
181+
182+
if(row > 1 && col > 1 && row < height-2 && col < w -2)
183+
{
184+
const int w2 = 2 * w;
185+
const int color = (filters == 9u) ? FCxtrans(row, col, xtrans) : FC(row, col, filters);
186+
const float val = read_imagef(cfa, samplerA, (int2)(col, row)).x;
187+
if(val > 0.9f * whites[color] || Y < Ymin)
188+
{
189+
mask[k-w2-1] = mask[k-w2] = mask[k-w2+1] =
190+
mask[k-w-2] = mask[k-w-1] = mask[k-w ] = mask[k-w+1] = mask[k-w+2] =
191+
mask[k-2] = mask[k-1] = mask[k] = mask[k+1] = mask[k+2] =
192+
mask[k+w-2] = mask[k+w-1] = mask[k+w] = mask[k+w+1] = mask[k+w+2] =
193+
mask[k+w2-1] = mask[k+w2] = mask[k+w2+1] = 0.0f;
194+
}
195+
}
196+
else
197+
mask[k] = 0.0f;
198+
}
199+
200+
__kernel void modify_blend(global float *blend,
201+
global float *Yold,
202+
global float *luminance,
203+
global float *lut,
204+
const float threshold,
205+
const int width,
206+
const int height)
207+
{
208+
const int icol = get_global_id(0);
209+
const int irow = get_global_id(1);
210+
if(icol >= width || irow >= height) return;
211+
212+
const int row = clamp(irow, 2, height-3);
213+
const int col = clamp(icol, 2, width-3);
214+
215+
float av = 0.0f;
216+
for(int y = row-1; y < row+2; y++)
217+
{
218+
for(int x = col-2; x < col+3; x++)
219+
av += Yold[mad24(y, width, x)];
220+
}
221+
for(int x = col-1; x < col+2; x++)
222+
{
223+
av += Yold[mad24(row-2, width, x)];
224+
av += Yold[mad24(row+2, width, x)];
225+
}
226+
av /= 21.0f;
227+
228+
float sv = 0.0f;
229+
for(int y = row-1; y < row+2; y++)
230+
{
231+
for(int x = col-2; x < col+3; x++)
232+
{
233+
const float v = Yold[mad24(y, width, x)] - av;
234+
sv += v*v;
235+
}
236+
}
237+
for(int x = col-2; x < col+3; x++)
238+
{
239+
float v = Yold[mad24(row-2, width, x)] - av;
240+
sv += v*v;
241+
v = Yold[mad24(row+2, width, x)] - av;
242+
sv += v*v;
243+
}
244+
sv = (float)VARLUT_SIZE * 10.0f * (sqrt(sv / 21.f) - 0.08f * threshold);
245+
const int idx = clamp((int)sv + VARLUT_SIZE, 0, VARLUT_SIZE-1);
246+
const int k = mad24(irow, width, icol);
247+
248+
blend[k] *= clamp(lut[idx], 0.0f, 1.0f);
249+
luminance[k] = Yold[k];
250+
}
251+
252+
__kernel void show_blend_mask(__read_only image2d_t in,
253+
__write_only image2d_t out,
254+
global float *blend_mask,
255+
const int width,
256+
const int height)
257+
{
258+
const int col = get_global_id(0);
259+
const int row = get_global_id(1);
260+
if(col >= width || row >= height) return;
261+
262+
float4 pix = read_imagef(in, samplerA, (int2)(col, row));
263+
const float blend = blend_mask[mad24(row, width, col)];
264+
pix.w = blend < CAPTURE_BLEND_EPS ? 0.0f : blend;
265+
write_imagef(out, (int2)(col, row), pix);
266+
}
267+
268+
__kernel void capture_result( __read_only image2d_t in,
269+
__write_only image2d_t out,
270+
global float *blendmask,
271+
global float *luminance,
272+
global float *tmp,
273+
const int width,
274+
const int height)
275+
{
276+
const int col = get_global_id(0);
277+
const int row = get_global_id(1);
278+
if(col >= width || row >= height) return;
279+
280+
float4 pix = read_imagef(in, samplerA, (int2)(col, row));
281+
const int k = mad24(row, width, col);
282+
283+
if(blendmask[k] > CAPTURE_BLEND_EPS)
284+
{
285+
const float mixer = clamp(blendmask[k], 0.0f, 1.0f);
286+
const float lumold = fmax(luminance[k], 0.000001f);
287+
const float lumtmp = fmax(tmp[k], 0.0000001f);
288+
const float luminance_new = mix(lumold, lumtmp, mixer);
289+
const float4 factor = luminance_new / lumold;
290+
pix = pix * factor;
291+
}
292+
write_imagef(out, (int2)(col, row), pix);
293+
}
294+
295+
#undef CAPTURE_KERNEL_ALIGN

data/kernels/programs.conf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,4 @@ blurs.cl 34
3838
bspline.cl 35
3939
sigmoid.cl 36
4040
colorequal.cl 37
41+
capture.cl 38

0 commit comments

Comments
 (0)