Skip to content

Commit 203a60a

Browse files
Implement capture sharpening inside demosaic module
Capture sharpening has been implemented to work inside the demosaic module so it's raw only. Credits to: Ingo Weyrich (heckflosse67@gmx.de), he implemented the original algorithm for rawtherapee, this implementation is based on his work, especially the convolution kernels. CPU and OpenCL code paths are both available. Demosaic module gets more parameters so there is a version bump, one still unused float parameter has been reserved. A "mini manual" Capture sharpening (CS) tries to recover details lost due to in-camera blurring, which can be caused by diffraction, the anti-aliasing filter or other sources of gaussian-type blur. Prerequisites are - good white balance parameters (same requirement as for highlights reconstruction or demosaic) - no chromatic aberration, you might want to add the "raw chromatic aberration" module - sensor noise will be amplified by CS controls: 1. capture sharpen switches CS on if above zero and defines the strength of overall effect. CS works in an iterative process, this defines the number of iterations, mostly a setting of 10 will be enough. 2. radius defines the basic convolution gaussian sigma. This should not be set by "creative means" but to the blurring radius of the optical system and sensor, too large values will lead to artifacts like halos. Calculating a correct radius is provided internally. This will be done either if you a) click on the button besides the slider b) activate capture sharpen the first time after resetting to demosaic defaults or developing old edits. 3. contrast threshold As sensor noise will be amplified by CS we take some care about this by a per pixel variance analysis and restrict CS to locations with higher variance. The default is good for low to medium ISO images. 4. corner boost Increase the radius in image corners. We assume a circle of 1/2 of image size to be "sharp" (only use main radius), locations outside this center circle get an increased convolution radius.
1 parent d2f15c8 commit 203a60a

File tree

4 files changed

+1371
-40
lines changed

4 files changed

+1371
-40
lines changed

data/kernels/capture.cl

Lines changed: 285 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,285 @@
1+
/*
2+
This file is part of darktable,
3+
copyright (c) 2025 darktable developer.
4+
5+
darktable is free software: you can redistribute it and/or modify
6+
it under the terms of the GNU General Public License as published by
7+
the Free Software Foundation, either version 3 of the License, or
8+
(at your option) any later version.
9+
10+
darktable is distributed in the hope that it will be useful,
11+
but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
GNU General Public License for more details.
14+
15+
You should have received a copy of the GNU General Public License
16+
along with darktable. If not, see <http://www.gnu.org/licenses/>.
17+
*/
18+
19+
#include "common.h"
20+
21+
#define CAPTURE_KERNEL_ALIGN 32
22+
#define CAPTURE_BLEND_EPS 0.01f
23+
#define CAPTURE_YMIN 0.001f
24+
#define CAPTURE_THRESHPOWER 0.15f
25+
26+
static inline float sqrf(float a)
27+
{
28+
return (a * a);
29+
}
30+
31+
__kernel void kernel_9x9_mul(global float *in,
32+
global float *out,
33+
global float *blend,
34+
global float *kernels,
35+
global unsigned char *table,
36+
const int w1,
37+
const int height)
38+
{
39+
const int col = get_global_id(0);
40+
const int row = get_global_id(1);
41+
if(col >= w1 || row >= height) return;
42+
43+
const int i = mad24(row, w1, col);
44+
const int w2 = 2 * w1;
45+
const int w3 = 3 * w1;
46+
const int w4 = 4 * w1;
47+
if(blend[i] <= CAPTURE_BLEND_EPS)
48+
return;
49+
50+
global const float *kern = kernels + CAPTURE_KERNEL_ALIGN * table[i];
51+
global float *d = in + i;
52+
53+
float val = 0.0f;
54+
if(col >= 4 && row >= 4 && col < w1 - 4 && row < height - 4)
55+
{
56+
val = kern[10+4] * (d[-w4-2] + d[-w4+2] + d[-w2-4] + d[-w2+4] + d[w2-4] + d[w2+4] + d[w4-2] + d[w4+2]) +
57+
kern[5 +4] * (d[-w4-1] + d[-w4+1] + d[-w1-4] + d[-w1+4] + d[w1-4] + d[w1+4] + d[w4-1] + d[w4+1]) +
58+
kern[4] * (d[-w4 ] + d[ -4] + d[ 4] + d[ w4 ]) +
59+
kern[15+3] * (d[-w3-3] + d[-w3+3] + d[ w3-3] + d[ w3+3]) +
60+
kern[10+3] * (d[-w3-2] + d[-w3+2] + d[-w2-3] + d[-w2+3] + d[w2-3] + d[w2+3] + d[w3-2] + d[w3+2]) +
61+
kern[ 5+3] * (d[-w3-1] + d[-w3+1] + d[-w1-3] + d[-w1+3] + d[w1-3] + d[w1+3] + d[w3-1] + d[w3+1]) +
62+
kern[ 3] * (d[-w3 ] + d[ -3] + d[ 3] + d[ w3 ]) +
63+
kern[10+2] * (d[-w2-2] + d[-w2+2] + d[ w2-2] + d[ w2+2]) +
64+
kern[ 5+2] * (d[-w2-1] + d[-w2+1] + d[-w1-2] + d[-w1+2] + d[w1-2] + d[w1+2] + d[w2-1] + d[w2+1]) +
65+
kern[ 2] * (d[-w2 ] + d[ -2] + d[ 2] + d[ w2 ]) +
66+
kern[ 5+1] * (d[-w1-1] + d[-w1+1] + d[ w1-1] + d[ w1+1]) +
67+
kern[ 1] * (d[-w1 ] + d[ -1] + d[ 1] + d[ w1 ]) +
68+
kern[ 0] * (d[0]);
69+
}
70+
else
71+
{
72+
for(int ir = -4; ir <= 4; ir++)
73+
{
74+
const int irow = row+ir;
75+
if(irow >= 0 && irow < height)
76+
{
77+
for(int ic = -4; ic <= 4; ic++)
78+
{
79+
const int icol = col+ic;
80+
if(icol >=0 && icol < w1)
81+
val += kern[5 * abs(ir) + abs(ic)] * in[mad24(irow, w1, icol)];
82+
}
83+
}
84+
}
85+
}
86+
out[i] *= val;
87+
}
88+
89+
__kernel void kernel_9x9_div(global float *in,
90+
global float *out,
91+
global float *divbuff,
92+
global float *blend,
93+
global float *kernels,
94+
global unsigned char *table,
95+
const int w1,
96+
const int height)
97+
{
98+
const int col = get_global_id(0);
99+
const int row = get_global_id(1);
100+
if(col >= w1 || row >= height) return;
101+
102+
const int i = mad24(row, w1, col);
103+
const int w2 = 2 * w1;
104+
const int w3 = 3 * w1;
105+
const int w4 = 4 * w1;
106+
if(blend[i] <= CAPTURE_BLEND_EPS)
107+
return;
108+
109+
global const float *kern = kernels + CAPTURE_KERNEL_ALIGN * table[i];
110+
global float *d = in + i;
111+
112+
float val = 0.0f;
113+
if(col >= 4 && row >= 4 && col < w1 - 4 && row < height - 4)
114+
{
115+
val = kern[10+4] * (d[-w4-2] + d[-w4+2] + d[-w2-4] + d[-w2+4] + d[w2-4] + d[w2+4] + d[w4-2] + d[w4+2]) +
116+
kern[5 +4] * (d[-w4-1] + d[-w4+1] + d[-w1-4] + d[-w1+4] + d[w1-4] + d[w1+4] + d[w4-1] + d[w4+1]) +
117+
kern[4] * (d[-w4 ] + d[ -4] + d[ 4] + d[ w4 ]) +
118+
kern[15+3] * (d[-w3-3] + d[-w3+3] + d[ w3-3] + d[ w3+3]) +
119+
kern[10+3] * (d[-w3-2] + d[-w3+2] + d[-w2-3] + d[-w2+3] + d[w2-3] + d[w2+3] + d[w3-2] + d[w3+2]) +
120+
kern[ 5+3] * (d[-w3-1] + d[-w3+1] + d[-w1-3] + d[-w1+3] + d[w1-3] + d[w1+3] + d[w3-1] + d[w3+1]) +
121+
kern[ 3] * (d[-w3 ] + d[ -3] + d[ 3] + d[ w3 ]) +
122+
kern[10+2] * (d[-w2-2] + d[-w2+2] + d[ w2-2] + d[ w2+2]) +
123+
kern[ 5+2] * (d[-w2-1] + d[-w2+1] + d[-w1-2] + d[-w1+2] + d[w1-2] + d[w1+2] + d[w2-1] + d[w2+1]) +
124+
kern[ 2] * (d[-w2 ] + d[ -2] + d[ 2] + d[ w2 ]) +
125+
kern[ 5+1] * (d[-w1-1] + d[-w1+1] + d[ w1-1] + d[ w1+1]) +
126+
kern[ 1] * (d[-w1 ] + d[ -1] + d[ 1] + d[ w1 ]) +
127+
kern[ 0] * (d[0]);
128+
}
129+
else
130+
{
131+
for(int ir = -4; ir <= 4; ir++)
132+
{
133+
const int irow = row+ir;
134+
if(irow >= 0 && irow < height)
135+
{
136+
for(int ic = -4; ic <= 4; ic++)
137+
{
138+
const int icol = col+ic;
139+
if(icol >=0 && icol < w1)
140+
val += kern[5 * abs(ir) + abs(ic)] * in[mad24(irow, w1, icol)];
141+
}
142+
}
143+
}
144+
}
145+
out[i] = divbuff[i] / fmax(val, 0.00001f);
146+
}
147+
148+
__kernel void prefill_clip_mask(global float *mask,
149+
const int width,
150+
const int height)
151+
{
152+
const int col = get_global_id(0);
153+
const int row = get_global_id(1);
154+
if(col >= width || row >= height) return;
155+
156+
const int i = mad24(row, width, col);
157+
mask[i] = 1.0f;
158+
}
159+
160+
__kernel void prepare_blend(__read_only image2d_t cfa,
161+
__read_only image2d_t dev_out,
162+
const int filters,
163+
global const unsigned char (*const xtrans)[6],
164+
global float *mask,
165+
global float *Yold,
166+
global float *whites,
167+
const int w,
168+
const int height)
169+
{
170+
const int col = get_global_id(0);
171+
const int row = get_global_id(1);
172+
if(col >= w || row >= height) return;
173+
174+
const float4 rgb = read_imagef(dev_out, samplerA, (int2)(col, row));
175+
const float Y = fmax(0.0f, 0.2626f * rgb.x + 0.7152f * rgb.y + 0.0722f * rgb.z);
176+
const int k = mad24(row, w, col);
177+
Yold[k] = Y;
178+
179+
if(row > 1 && col > 1 && row < height-2 && col < w -2)
180+
{
181+
const int w2 = 2 * w;
182+
const int color = (filters == 9u) ? FCxtrans(row, col, xtrans) : FC(row, col, filters);
183+
const float val = read_imagef(cfa, samplerA, (int2)(col, row)).x;
184+
if(val > whites[color] || Y < CAPTURE_YMIN)
185+
{
186+
mask[k-w2-1] = mask[k-w2] = mask[k-w2+1] =
187+
mask[k-w-2] = mask[k-w-1] = mask[k-w ] = mask[k-w+1] = mask[k-w+2] =
188+
mask[k-2] = mask[k-1] = mask[k] = mask[k+1] = mask[k+2] =
189+
mask[k+w-2] = mask[k+w-1] = mask[k+w] = mask[k+w+1] = mask[k+w+2] =
190+
mask[k+w2-1] = mask[k+w2] = mask[k+w2+1] = 0.0f;
191+
}
192+
}
193+
else
194+
mask[k] = 0.0f;
195+
}
196+
197+
__kernel void modify_blend(global float *blend,
198+
global float *Yold,
199+
global float *luminance,
200+
const float threshold,
201+
const int width,
202+
const int height)
203+
{
204+
const int icol = get_global_id(0);
205+
const int irow = get_global_id(1);
206+
if(icol >= width || irow >= height) return;
207+
208+
const int row = clamp(irow, 2, height-3);
209+
const int col = clamp(icol, 2, width-3);
210+
211+
float av = 0.0f;
212+
for(int y = row-1; y < row+2; y++)
213+
{
214+
for(int x = col-2; x < col+3; x++)
215+
av += Yold[mad24(y, width, x)];
216+
}
217+
for(int x = col-1; x < col+2; x++)
218+
{
219+
av += Yold[mad24(row-2, width, x)];
220+
av += Yold[mad24(row+2, width, x)];
221+
}
222+
av /= 21.0f;
223+
224+
float sv = 0.0f;
225+
for(int y = row-1; y < row+2; y++)
226+
{
227+
for(int x = col-2; x < col+3; x++)
228+
sv += sqrf(Yold[mad24(y, width, x)] - av);
229+
}
230+
for(int x = col-2; x < col+3; x++)
231+
{
232+
sv+= sqrf(Yold[mad24(row-2, width, x)] - av);
233+
sv+= sqrf(Yold[mad24(row+2, width, x)] - av);
234+
}
235+
sv = dtcl_pow(fmax(0.0f, 5.0f * dtcl_sqrt(sv / 21.f) - threshold), CAPTURE_THRESHPOWER);
236+
const int k = mad24(irow, width, icol);
237+
238+
blend[k] *= clamp(sv, 0.0f, 1.0f);
239+
luminance[k] = Yold[k];
240+
}
241+
242+
__kernel void show_blend_mask(__read_only image2d_t in,
243+
__write_only image2d_t out,
244+
global float *blend_mask,
245+
const int width,
246+
const int height)
247+
{
248+
const int col = get_global_id(0);
249+
const int row = get_global_id(1);
250+
if(col >= width || row >= height) return;
251+
252+
float4 pix = read_imagef(in, samplerA, (int2)(col, row));
253+
const float blend = blend_mask[mad24(row, width, col)];
254+
pix.w = blend < CAPTURE_BLEND_EPS ? 0.0f : blend;
255+
write_imagef(out, (int2)(col, row), pix);
256+
}
257+
258+
__kernel void capture_result( __read_only image2d_t in,
259+
__write_only image2d_t out,
260+
global float *blendmask,
261+
global float *luminance,
262+
global float *tmp,
263+
const int width,
264+
const int height)
265+
{
266+
const int col = get_global_id(0);
267+
const int row = get_global_id(1);
268+
if(col >= width || row >= height) return;
269+
270+
float4 pix = read_imagef(in, samplerA, (int2)(col, row));
271+
const int k = mad24(row, width, col);
272+
273+
if(blendmask[k] > CAPTURE_BLEND_EPS)
274+
{
275+
const float mixer = clamp(blendmask[k], 0.0f, 1.0f);
276+
const float lumold = fmax(luminance[k], 0.000001f);
277+
const float lumtmp = fmax(tmp[k], 0.0000001f);
278+
const float luminance_new = mix(lumold, lumtmp, mixer);
279+
const float4 factor = luminance_new / lumold;
280+
pix = pix * factor;
281+
}
282+
write_imagef(out, (int2)(col, row), pix);
283+
}
284+
285+
#undef CAPTURE_KERNEL_ALIGN

data/kernels/programs.conf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,4 @@ blurs.cl 34
3838
bspline.cl 35
3939
sigmoid.cl 36
4040
colorequal.cl 37
41+
capture.cl 38

0 commit comments

Comments
 (0)