diff --git a/data/kernels/capture.cl b/data/kernels/capture.cl
new file mode 100644
index 000000000000..5c311163dd31
--- /dev/null
+++ b/data/kernels/capture.cl
@@ -0,0 +1,285 @@
+/*
+    This file is part of darktable,
+    copyright (c) 2025 darktable developer.
+
+    darktable is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    darktable is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with darktable.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "common.h"
+
+#define CAPTURE_KERNEL_ALIGN 32
+#define CAPTURE_BLEND_EPS 0.01f
+#define CAPTURE_YMIN 0.001f
+#define CAPTURE_THRESHPOWER 0.15f
+
+static inline float sqrf(float a)
+{
+  return (a * a);
+}
+
+__kernel void kernel_9x9_mul(global float *in,
+                             global float *out,
+                             global float *blend,
+                             global float *kernels,
+                             global unsigned char *table,
+                             const int w1,
+                             const int height)
+{
+  const int col = get_global_id(0);
+  const int row = get_global_id(1);
+  if(col >= w1 || row >= height) return;
+
+  const int i = mad24(row, w1, col);
+  const int w2 = 2 * w1;
+  const int w3 = 3 * w1;
+  const int w4 = 4 * w1;
+  if(blend[i] <= CAPTURE_BLEND_EPS)
+     return;
+
+  global const float *kern = kernels + CAPTURE_KERNEL_ALIGN * table[i];
+  global float *d = in + i;
+
+  float val = 0.0f;
+  if(col >= 4 && row >= 4 && col < w1 - 4 && row < height - 4)
+  {
+    val = kern[10+4] * (d[-w4-2]  + d[-w4+2]  + d[-w2-4]  + d[-w2+4] + d[w2-4] + d[w2+4] + d[w4-2] + d[w4+2]) +
+          kern[5 +4] * (d[-w4-1]  + d[-w4+1]  + d[-w1-4]  + d[-w1+4] + d[w1-4] + d[w1+4] + d[w4-1] + d[w4+1]) +
+          kern[4]    * (d[-w4  ]  + d[   -4]  + d[    4]  + d[ w4  ]) +
+          kern[15+3] * (d[-w3-3]  + d[-w3+3]  + d[ w3-3]  + d[ w3+3]) +
+          kern[10+3] * (d[-w3-2]  + d[-w3+2]  + d[-w2-3]  + d[-w2+3] + d[w2-3] + d[w2+3] + d[w3-2] + d[w3+2]) +
+          kern[ 5+3] * (d[-w3-1]  + d[-w3+1]  + d[-w1-3]  + d[-w1+3] + d[w1-3] + d[w1+3] + d[w3-1] + d[w3+1]) +
+          kern[   3] * (d[-w3  ]  + d[   -3]  + d[    3]  + d[ w3  ]) +
+          kern[10+2] * (d[-w2-2]  + d[-w2+2]  + d[ w2-2]  + d[ w2+2]) +
+          kern[ 5+2] * (d[-w2-1]  + d[-w2+1]  + d[-w1-2]  + d[-w1+2] + d[w1-2] + d[w1+2] + d[w2-1] + d[w2+1]) +
+          kern[   2] * (d[-w2  ]  + d[   -2]  + d[    2]  + d[ w2  ]) +
+          kern[ 5+1] * (d[-w1-1]  + d[-w1+1]  + d[ w1-1]  + d[ w1+1]) +
+          kern[   1] * (d[-w1  ]  + d[   -1]  + d[    1]  + d[ w1  ]) +
+          kern[   0] * (d[0]);
+  }
+  else
+  {
+    for(int ir = -4; ir <= 4; ir++)
+    {
+      const int irow = row+ir;
+      if(irow >= 0 && irow < height)
+      {
+        for(int ic = -4; ic <= 4; ic++)
+        {
+          const int icol = col+ic;
+          if(icol >=0 && icol < w1)
+            val += kern[5 * abs(ir) + abs(ic)] * in[mad24(irow, w1, icol)];
+        }
+      }
+    }
+  }
+  out[i] *= val;
+}
+
+__kernel void kernel_9x9_div(global float *in,
+                             global float *out,
+                             global float *divbuff,
+                             global float *blend,
+                             global float *kernels,
+                             global unsigned char *table,
+                             const int w1,
+                             const int height)
+{
+  const int col = get_global_id(0);
+  const int row = get_global_id(1);
+  if(col >= w1 || row >= height) return;
+
+  const int i = mad24(row, w1, col);
+  const int w2 = 2 * w1;
+  const int w3 = 3 * w1;
+  const int w4 = 4 * w1;
+  if(blend[i] <= CAPTURE_BLEND_EPS)
+    return;
+
+  global const float *kern = kernels + CAPTURE_KERNEL_ALIGN * table[i];
+  global float *d = in + i;
+
+  float val = 0.0f;
+  if(col >= 4 && row >= 4 && col < w1 - 4 && row < height - 4)
+  {
+    val = kern[10+4] * (d[-w4-2]  + d[-w4+2]  + d[-w2-4]  + d[-w2+4] + d[w2-4] + d[w2+4] + d[w4-2] + d[w4+2]) +
+          kern[5 +4] * (d[-w4-1]  + d[-w4+1]  + d[-w1-4]  + d[-w1+4] + d[w1-4] + d[w1+4] + d[w4-1] + d[w4+1]) +
+          kern[4]    * (d[-w4  ]  + d[   -4]  + d[    4]  + d[ w4  ]) +
+          kern[15+3] * (d[-w3-3]  + d[-w3+3]  + d[ w3-3]  + d[ w3+3]) +
+          kern[10+3] * (d[-w3-2]  + d[-w3+2]  + d[-w2-3]  + d[-w2+3] + d[w2-3] + d[w2+3] + d[w3-2] + d[w3+2]) +
+          kern[ 5+3] * (d[-w3-1]  + d[-w3+1]  + d[-w1-3]  + d[-w1+3] + d[w1-3] + d[w1+3] + d[w3-1] + d[w3+1]) +
+          kern[   3] * (d[-w3  ]  + d[   -3]  + d[    3]  + d[ w3  ]) +
+          kern[10+2] * (d[-w2-2]  + d[-w2+2]  + d[ w2-2]  + d[ w2+2]) +
+          kern[ 5+2] * (d[-w2-1]  + d[-w2+1]  + d[-w1-2]  + d[-w1+2] + d[w1-2] + d[w1+2] + d[w2-1] + d[w2+1]) +
+          kern[   2] * (d[-w2  ]  + d[   -2]  + d[    2]  + d[ w2  ]) +
+          kern[ 5+1] * (d[-w1-1]  + d[-w1+1]  + d[ w1-1]  + d[ w1+1]) +
+          kern[   1] * (d[-w1  ]  + d[   -1]  + d[    1]  + d[ w1  ]) +
+          kern[   0] * (d[0]);
+  }
+  else
+  {
+    for(int ir = -4; ir <= 4; ir++)
+    {
+      const int irow = row+ir;
+      if(irow >= 0 && irow < height)
+      {
+        for(int ic = -4; ic <= 4; ic++)
+        {
+          const int icol = col+ic;
+          if(icol >=0 && icol < w1)
+            val += kern[5 * abs(ir) + abs(ic)] * in[mad24(irow, w1, icol)];
+        }
+      }
+    }
+  }
+  out[i] = divbuff[i] / fmax(val, 0.00001f);
+}
+
+__kernel void prefill_clip_mask(global float *mask,
+                                const int width,
+                                const int height)
+{
+  const int col = get_global_id(0);
+  const int row = get_global_id(1);
+  if(col >= width || row >= height) return;
+
+  const int i = mad24(row, width, col);
+  mask[i] = 1.0f;
+}
+
+__kernel void prepare_blend(__read_only image2d_t cfa,
+                            __read_only image2d_t dev_out,
+                            const int filters,
+                            global const unsigned char (*const xtrans)[6],
+                            global float *mask,
+                            global float *Yold,
+                            global float *whites,
+                            const int w,
+                            const int height)
+{
+  const int col = get_global_id(0);
+  const int row = get_global_id(1);
+  if(col >= w || row >= height) return;
+
+  const float4 rgb = read_imagef(dev_out, samplerA, (int2)(col, row));
+  const float Y = fmax(0.0f, 0.2626f * rgb.x + 0.7152f * rgb.y + 0.0722f * rgb.z);
+  const int k = mad24(row, w, col);
+  Yold[k] = Y;
+
+  if(row > 1 && col > 1 && row < height-2 && col < w -2)
+  {
+    const int w2 = 2 * w;
+    const int color = (filters == 9u) ? FCxtrans(row, col, xtrans) : FC(row, col, filters);
+    const float val = read_imagef(cfa, samplerA, (int2)(col, row)).x;
+    if(val > whites[color] || Y < CAPTURE_YMIN)
+    {
+      mask[k-w2-1] = mask[k-w2]  = mask[k-w2+1] =
+      mask[k-w-2]  = mask[k-w-1] = mask[k-w ]   = mask[k-w+1] = mask[k-w+2] =
+      mask[k-2]    = mask[k-1]   = mask[k]      = mask[k+1]   = mask[k+2] =
+      mask[k+w-2]  = mask[k+w-1] = mask[k+w]    = mask[k+w+1] = mask[k+w+2] =
+      mask[k+w2-1] = mask[k+w2]  = mask[k+w2+1] = 0.0f;
+    }
+  }
+  else
+    mask[k] = 0.0f;
+}
+
+__kernel void modify_blend(global float *blend,
+                           global float *Yold,
+                           global float *luminance,
+                           const float threshold,
+                           const int width,
+                           const int height)
+{
+  const int icol = get_global_id(0);
+  const int irow = get_global_id(1);
+  if(icol >= width || irow >= height) return;
+
+  const int row = clamp(irow, 2, height-3);
+  const int col = clamp(icol, 2, width-3);
+
+  float av = 0.0f;
+  for(int y = row-1; y < row+2; y++)
+  {
+    for(int x = col-2; x < col+3; x++)
+      av += Yold[mad24(y, width, x)];
+  }
+  for(int x = col-1; x < col+2; x++)
+  {
+    av += Yold[mad24(row-2, width, x)];
+    av += Yold[mad24(row+2, width, x)];
+  }
+  av /= 21.0f;
+
+  float sv = 0.0f;
+  for(int y = row-1; y < row+2; y++)
+  {
+    for(int x = col-2; x < col+3; x++)
+      sv += sqrf(Yold[mad24(y, width, x)] - av);
+  }
+  for(int x = col-2; x < col+3; x++)
+  {
+    sv+= sqrf(Yold[mad24(row-2, width, x)] - av);
+    sv+= sqrf(Yold[mad24(row+2, width, x)] - av);
+  }
+  sv = dtcl_pow(fmax(0.0f, 5.0f * dtcl_sqrt(sv / 21.f) - threshold), CAPTURE_THRESHPOWER);
+  const int k = mad24(irow, width, icol);
+
+  blend[k] *= clamp(sv, 0.0f, 1.0f);
+  luminance[k] = Yold[k];
+}
+
+__kernel void show_blend_mask(__read_only image2d_t in,
+                              __write_only image2d_t out,
+                              global float *blend_mask,
+                              const int width,
+                              const int height)
+{
+  const int col = get_global_id(0);
+  const int row = get_global_id(1);
+  if(col >= width || row >= height) return;
+
+  float4 pix = read_imagef(in, samplerA, (int2)(col, row));
+  const float blend = blend_mask[mad24(row, width, col)];
+  pix.w = blend < CAPTURE_BLEND_EPS ? 0.0f : blend;
+  write_imagef(out, (int2)(col, row), pix);
+}
+
+__kernel void capture_result( __read_only image2d_t in,
+                              __write_only image2d_t out,
+                              global float *blendmask,
+                              global float *luminance,
+                              global float *tmp,
+                              const int width,
+                              const int height)
+{
+  const int col = get_global_id(0);
+  const int row = get_global_id(1);
+  if(col >= width || row >= height) return;
+
+  float4 pix = read_imagef(in, samplerA, (int2)(col, row));
+  const int k = mad24(row, width, col);
+
+  if(blendmask[k] > CAPTURE_BLEND_EPS)
+  {
+    const float mixer = clamp(blendmask[k], 0.0f, 1.0f);
+    const float lumold = fmax(luminance[k], 0.000001f);
+    const float lumtmp = fmax(tmp[k], 0.0000001f);
+    const float luminance_new = mix(lumold, lumtmp, mixer);
+    const float4 factor = luminance_new / lumold;
+    pix = pix * factor;
+  }
+  write_imagef(out, (int2)(col, row), pix);
+}
+
+#undef CAPTURE_KERNEL_ALIGN
diff --git a/data/kernels/demosaic_ppg.cl b/data/kernels/demosaic_ppg.cl
index 8498700782a9..0c499f5d626e 100644
--- a/data/kernels/demosaic_ppg.cl
+++ b/data/kernels/demosaic_ppg.cl
@@ -36,8 +36,13 @@ backtransformf (float2 p, const int r_x, const int r_y, const int r_wd, const in
 }
 
 kernel void
-green_equilibration_lavg(read_only image2d_t in, write_only image2d_t out, const int width, const int height, const unsigned int filters,
-                         const int r_x, const int r_y, const float thr, local float *buffer)
+green_equilibration_lavg(read_only image2d_t in,
+                         write_only image2d_t out,
+                         const int width,
+                         const int height,
+                         const unsigned int filters,
+                         const float thr,
+                         local float *buffer)
 {
   const int x = get_global_id(0);
   const int y = get_global_id(1);
@@ -79,11 +84,11 @@ green_equilibration_lavg(read_only image2d_t in, write_only image2d_t out, const
 
   if(x >= width || y >= height) return;
 
-  const int c = FC(y + r_y, x + r_x, filters);
+  const int c = FC(y, x, filters);
   const float maximum = 1.0f;
   float o = buffer[0];
 
-  if(c == 1 && ((y + r_y) & 1))
+  if(c == 1 && (y & 1))
   {
     const float o1_1 = buffer[-1 * stride - 1];
     const float o1_2 = buffer[-1 * stride + 1];
@@ -112,8 +117,12 @@ green_equilibration_lavg(read_only image2d_t in, write_only image2d_t out, const
 
 
 kernel void
-green_equilibration_favg_reduce_first(read_only image2d_t in, const int width, const int height,
-                                      global float2 *accu, const unsigned int filters, const int r_x, const int r_y, local float2 *buffer)
+green_equilibration_favg_reduce_first(read_only image2d_t in,
+                                      const int width,
+                                      const int height,
+                                      global float2 *accu,
+                                      const unsigned int filters,
+                                      local float2 *buffer)
 {
   const int x = get_global_id(0);
   const int y = get_global_id(1);
@@ -124,11 +133,11 @@ green_equilibration_favg_reduce_first(read_only image2d_t in, const int width, c
 
   const int l = mad24(ylid, xlsz, xlid);
 
-  const int c = FC(y + r_y, x + r_x, filters);
+  const int c = FC(y, x, filters);
 
   const int isinimage = (x < 2 * (width / 2) && y < 2 * (height / 2));
-  const int isgreen1 = (c == 1 && !((y + r_y) & 1));
-  const int isgreen2 = (c == 1 && ((y + r_y) & 1));
+  const int isgreen1 = (c == 1 && !(y & 1));
+  const int isgreen2 = (c == 1 && (y & 1));
 
   float pixel = read_imagef(in, sampleri, (int2)(x, y)).x;
 
@@ -194,8 +203,12 @@ green_equilibration_favg_reduce_second(const global float2* input, global float2
 
 
 kernel void
-green_equilibration_favg_apply(read_only image2d_t in, write_only image2d_t out, const int width, const int height, const unsigned int filters,
-                               const int r_x, const int r_y, const float gr_ratio)
+green_equilibration_favg_apply(read_only image2d_t in,
+                               write_only image2d_t out,
+                               const int width,
+                               const int height,
+                               const unsigned int filters,
+                               const float gr_ratio)
 {
   const int x = get_global_id(0);
   const int y = get_global_id(1);
@@ -204,9 +217,9 @@ green_equilibration_favg_apply(read_only image2d_t in, write_only image2d_t out,
 
   float pixel = read_imagef(in, sampleri, (int2)(x, y)).x;
 
-  const int c = FC(y + r_y, x + r_x, filters);
+  const int c = FC(y, x, filters);
 
-  const int isgreen1 = (c == 1 && !((y + r_y) & 1));
+  const int isgreen1 = (c == 1 && !(y & 1));
 
   pixel *= (isgreen1 ? gr_ratio : 1.0f);
 
diff --git a/data/kernels/programs.conf b/data/kernels/programs.conf
index d2f38ad82157..dc1dc287ba46 100644
--- a/data/kernels/programs.conf
+++ b/data/kernels/programs.conf
@@ -38,3 +38,4 @@ blurs.cl                34
 bspline.cl              35
 sigmoid.cl              36
 colorequal.cl           37
+capture.cl              38
diff --git a/src/common/iop_order.c b/src/common/iop_order.c
index 8ff8e3d1dd2d..fa7f60d761cc 100644
--- a/src/common/iop_order.c
+++ b/src/common/iop_order.c
@@ -88,6 +88,7 @@ const dt_iop_order_entry_t legacy_order[] = {
   { { 6.0f }, "hotpixels", 0},
   { { 7.0f }, "rawdenoise", 0},
   { { 8.0f }, "demosaic", 0},
+  { { 8.5f }, "pipescale", 0},
   { { 9.0f }, "mask_manager", 0},
   { {10.0f }, "denoiseprofile", 0},
   { {11.0f }, "tonemap", 0},
@@ -184,6 +185,7 @@ const dt_iop_order_entry_t v30_order[] = {
   { { 6.0f }, "hotpixels", 0},
   { { 7.0f }, "rawdenoise", 0},
   { { 8.0f }, "demosaic", 0},
+  { { 8.5f }, "pipescale", 0},
   { { 9.0f }, "denoiseprofile", 0},
   { {10.0f }, "bilateral", 0},
   { {11.0f }, "rotatepixels", 0},
@@ -301,6 +303,7 @@ const dt_iop_order_entry_t v50_order[] = {
   { { 6.0f }, "hotpixels", 0},
   { { 7.0f }, "rawdenoise", 0},
   { { 8.0f }, "demosaic", 0},
+  { { 8.5f }, "pipescale", 0},
   { { 9.0f }, "denoiseprofile", 0},
   { {10.0f }, "bilateral", 0},
   { {11.0f }, "rotatepixels", 0},
@@ -420,6 +423,7 @@ const dt_iop_order_entry_t v30_jpg_order[] = {
   { { 6.0f }, "hotpixels", 0 },
   { { 7.0f }, "rawdenoise", 0 },
   { { 8.0f }, "demosaic", 0 },
+  { { 8.5f }, "pipescale", 0},
   // all the modules between [8; 28] expect linear RGB, so they need to be moved after colorin
   { { 28.0f }, "colorin", 0 },
   // moved modules : (copy-pasted in the same order)
@@ -540,6 +544,7 @@ const dt_iop_order_entry_t v50_jpg_order[] = {
   { { 6.0f }, "hotpixels", 0 },
   { { 7.0f }, "rawdenoise", 0 },
   { { 8.0f }, "demosaic", 0 },
+  { { 8.5f }, "pipescale", 0},
   // all the modules between [8; 28] expect linear RGB, so they need to be moved after colorin
   { { 28.0f }, "colorin", 0 },
   // moved modules : (copy-pasted in the same order)
@@ -1179,6 +1184,7 @@ GList *dt_ioppr_get_iop_order_list(const dt_imgid_t imgid,
           _insert_before(iop_order_list, "filmicrgb", "sigmoid");
           _insert_before(iop_order_list, "colorbalancergb", "colorequal");
           _insert_before(iop_order_list, "highlights", "rasterfile");
+          _insert_before(iop_order_list, "denoiseprofile", "pipescale"); // ????
         }
       }
       else if(version >= DT_IOP_ORDER_LEGACY
diff --git a/src/develop/pixelpipe_hb.c b/src/develop/pixelpipe_hb.c
index 991cb4ae25b4..b5f6f91a3abe 100644
--- a/src/develop/pixelpipe_hb.c
+++ b/src/develop/pixelpipe_hb.c
@@ -1737,9 +1737,6 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe,
   // 3b) recurse and obtain output array in &input
 
   // get region of interest which is needed in input
-  if(dt_pipe_shutdown(pipe))
-    return TRUE;
-
   module->modify_roi_in(module, piece, roi_out, &roi_in);
   if((darktable.unmuted & DT_DEBUG_PIPE) && memcmp(roi_out, &roi_in, sizeof(dt_iop_roi_t)))
   {
@@ -1775,18 +1772,11 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe,
   const size_t out_bpp = dt_iop_buffer_dsc_to_bpp(*out_format);
 
   // reserve new cache line: output
-  if(dt_pipe_shutdown(pipe))
-    return TRUE;
-
   const gboolean important = module
       && (pipe->mask_display == DT_DEV_PIXELPIPE_DISPLAY_NONE)
-      && (((pipe->type & DT_DEV_PIXELPIPE_PREVIEW)
-           && dt_iop_module_is(module->so, "colorout"))
-       || ((pipe->type & DT_DEV_PIXELPIPE_FULL)
-           && dt_iop_module_is(module->so, "gamma")));
+      && dt_iop_module_is(module->so, "pipescale");
 
-  dt_dev_pixelpipe_cache_get(pipe, hash, bufsize,
-                             output, out_format, module, important);
+  dt_dev_pixelpipe_cache_get(pipe, hash, bufsize, output, out_format, module, important);
 
   if(dt_pipe_shutdown(pipe))
     return TRUE;
@@ -2546,8 +2536,8 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe,
            && dev->gui_attached
            && ((module == dt_dev_gui_module())
                 || darktable.develop->history_last_module == module
-                || dt_iop_module_is(module->so, "colorout")
-                || dt_iop_module_is(module->so, "finalscale"));
+                || dt_iop_module_is(module->so, "finalscale")
+                || dt_iop_module_is(module->so, "pipescale"));
 
         if(important_cl)
         {
diff --git a/src/iop/CMakeLists.txt b/src/iop/CMakeLists.txt
index 89a6e7453e1b..7eb71fd384cb 100644
--- a/src/iop/CMakeLists.txt
+++ b/src/iop/CMakeLists.txt
@@ -155,6 +155,7 @@ add_iop(sigmoid "sigmoid.c")
 add_iop(primaries "primaries.c")
 add_iop(colorequal "colorequal.c")
 add_iop(rasterfile "rasterfile.c")
+add_iop(pipescale "pipescale.c")
 
 if(Rsvg2_FOUND)
   add_iop(watermark "watermark.c")
diff --git a/src/iop/cacorrect.c b/src/iop/cacorrect.c
index 269c7ad81e86..2b780043103e 100644
--- a/src/iop/cacorrect.c
+++ b/src/iop/cacorrect.c
@@ -1215,37 +1215,6 @@ DT_OMP_PRAGMA(barrier)
 /*==================================================================================
  * end raw therapee code
  *==================================================================================*/
-void modify_roi_out(dt_iop_module_t *self,
-                    dt_dev_pixelpipe_iop_t *piece,
-                    dt_iop_roi_t *roi_out,
-                    const dt_iop_roi_t *const roi_in)
-{
-  *roi_out = *roi_in;
-  roi_out->x = MAX(0, roi_in->x);
-  roi_out->y = MAX(0, roi_in->y);
-}
-void modify_roi_in(dt_iop_module_t *self,
-                   dt_dev_pixelpipe_iop_t *piece,
-                   const dt_iop_roi_t *const roi_out,
-                   dt_iop_roi_t *roi_in)
-{
-  *roi_in = *roi_out;
-  roi_in->x = 0;
-  roi_in->y = 0;
-  roi_in->width = piece->buf_in.width;
-  roi_in->height = piece->buf_in.height;
-  roi_in->scale = 1.0f;
-}
-
-void distort_mask(dt_iop_module_t *self,
-                  dt_dev_pixelpipe_iop_t *piece,
-                  const float *const in,
-                  float *const out,
-                  const dt_iop_roi_t *const roi_in,
-                  const dt_iop_roi_t *const roi_out)
-{
-  dt_iop_copy_image_roi(out, in, 1, roi_in, roi_out);
-}
 
 void reload_defaults(dt_iop_module_t *self)
 {
diff --git a/src/iop/demosaic.c b/src/iop/demosaic.c
index 96d50748c391..c1bc7016d592 100644
--- a/src/iop/demosaic.c
+++ b/src/iop/demosaic.c
@@ -46,7 +46,7 @@
 #include <string.h>
 #include <time.h>
 
-DT_MODULE_INTROSPECTION(4, dt_iop_demosaic_params_t)
+DT_MODULE_INTROSPECTION(5, dt_iop_demosaic_params_t)
 
 #define DT_DEMOSAIC_XTRANS 1024 // masks for non-Bayer demosaic ops
 #define DT_DEMOSAIC_DUAL 2048   // masks for dual demosaicing methods
@@ -89,15 +89,6 @@ typedef enum dt_iop_demosaic_greeneq_t
   DT_IOP_GREEN_EQ_BOTH = 3   // $DESCRIPTION: "full and local average"
 } dt_iop_demosaic_greeneq_t;
 
-typedef enum dt_iop_demosaic_qual_flags_t
-{
-  // either perform full scale demosaicing or choose simple half scale
-  // or third scale interpolation instead
-  DT_DEMOSAIC_DEFAULT                 = 0,
-  DT_DEMOSAIC_FULL_SCALE              = 1 << 0,
-  DT_DEMOSAIC_ONLY_VNG_LINEAR         = 1 << 1,
-} dt_iop_demosaic_qual_flags_t;
-
 typedef enum dt_iop_demosaic_smooth_t
 {
   DT_DEMOSAIC_SMOOTH_OFF = 0, // $DESCRIPTION: "disabled"
@@ -124,7 +115,12 @@ typedef struct dt_iop_demosaic_params_t
   dt_iop_demosaic_smooth_t color_smoothing;     // $DEFAULT: DT_DEMOSAIC_SMOOTH_OFF $DESCRIPTION: "color smoothing"
   dt_iop_demosaic_method_t demosaicing_method;  // $DEFAULT: DT_IOP_DEMOSAIC_RCD $DESCRIPTION: "method"
   dt_iop_demosaic_lmmse_t lmmse_refine;         // $DEFAULT: DT_LMMSE_REFINE_1 $DESCRIPTION: "LMMSE refine"
-  float dual_thrs;                              // $MIN: 0.0 $MAX: 1.0 $DEFAULT: 0.20 $DESCRIPTION: "dual threshold"
+  float dual_thrs;                              // $MIN: 0.0 $MAX: 1.0 $DEFAULT: 0.2 $DESCRIPTION: "dual threshold"
+  float cs_radius;                              // $MIN: 0.0 $MAX: 2.0 $DEFAULT: 0.0 $DESCRIPTION: "radius"
+  float cs_thrs;                                // $MIN: 0.0 $MAX: 1.0 $DEFAULT: 0.35 $DESCRIPTION: "contrast threshold"
+  float cs_boost;                               // $MIN: 0.0 $MAX: 1.5 $DEFAULT: 0.0 $DESCRIPTION: "corner boost"
+  int cs_strength;                              // $MIN: 0 $MAX: 20 $DEFAULT: 0 $DESCRIPTION: "capture sharpen"
+  float reserved;
 } dt_iop_demosaic_params_t;
 
 typedef struct dt_iop_demosaic_gui_data_t
@@ -137,7 +133,13 @@ typedef struct dt_iop_demosaic_gui_data_t
   GtkWidget *demosaic_method_bayerfour;
   GtkWidget *dual_thrs;
   GtkWidget *lmmse_refine;
+  GtkWidget *cs_thrs;
+  GtkWidget *cs_radius;
+  GtkWidget *cs_boost;
+  GtkWidget *cs_strength;
+  gboolean cs_mask;
   gboolean dual_mask;
+  gboolean autoradius;
 } dt_iop_demosaic_gui_data_t;
 
 typedef struct dt_iop_demosaic_global_data_t
@@ -152,14 +154,10 @@ typedef struct dt_iop_demosaic_global_data_t
   int kernel_passthrough_color;
   int kernel_ppg_green;
   int kernel_ppg_redblue;
-  int kernel_zoom_half_size;
-  int kernel_downsample;
   int kernel_border_interpolate;
   int kernel_color_smoothing;
-  int kernel_zoom_passthrough_monochrome;
   int kernel_vng_border_interpolate;
   int kernel_vng_lin_interpolate;
-  int kernel_zoom_third_size;
   int kernel_vng_green_equilibrate;
   int kernel_vng_interpolate;
   int kernel_markesteijn_initial_copy;
@@ -193,6 +191,14 @@ typedef struct dt_iop_demosaic_global_data_t
   int kernel_rcd_border_redblue;
   int kernel_rcd_border_green;
   int kernel_write_blended_dual;
+  int gaussian_9x9_mul;
+  int gaussian_9x9_div;
+  int prefill_clip_mask;
+  int prepare_blend;
+  int modify_blend;
+  int show_blend_mask;
+  int capture_result;
+  float *gauss_coeffs;
 } dt_iop_demosaic_global_data_t;
 
 typedef struct dt_iop_demosaic_data_t
@@ -204,6 +210,10 @@ typedef struct dt_iop_demosaic_data_t
   float median_thrs;
   double CAM_to_RGB[3][4];
   float dual_thrs;
+  float cs_radius;
+  float cs_thrs;
+  float cs_boost;
+  int cs_strength;
 } dt_iop_demosaic_data_t;
 
 static gboolean _get_thumb_quality(const int width, const int height)
@@ -216,57 +226,6 @@ static gboolean _get_thumb_quality(const int width, const int height)
   return (level >= min_s);
 }
 
-// set flags for demosaic quality based on factors besides demosaic
-// method (e.g. config, scale, pixelpipe type)
-static dt_iop_demosaic_qual_flags_t demosaic_qual_flags(const dt_dev_pixelpipe_iop_t *const piece,
-                                                        const dt_image_t *const img,
-                                                        const dt_iop_roi_t *const roi_out)
-{
-  const uint32_t filters = piece->pipe->dsc.filters;
-  const gboolean is_xtrans = filters == 9u;
-
-  dt_iop_demosaic_qual_flags_t flags = DT_DEMOSAIC_DEFAULT;
-  switch(piece->pipe->type & DT_DEV_PIXELPIPE_ANY)
-  {
-    case DT_DEV_PIXELPIPE_PREVIEW2:
-      flags |= DT_DEMOSAIC_FULL_SCALE;
-      break;
-    case DT_DEV_PIXELPIPE_FULL:
-      flags |= DT_DEMOSAIC_FULL_SCALE;
-      break;
-    case DT_DEV_PIXELPIPE_EXPORT:
-      flags |= DT_DEMOSAIC_FULL_SCALE;
-      break;
-    case DT_DEV_PIXELPIPE_THUMBNAIL:
-      flags |= (piece->pipe->want_detail_mask || _get_thumb_quality(roi_out->width, roi_out->height))
-                  ? DT_DEMOSAIC_FULL_SCALE
-                  : DT_DEMOSAIC_DEFAULT;
-      break;
-    default: // make C not complain about missing enum members
-      break;
-  }
-
-  // For sufficiently small scaling, one or more repetitition of the
-  // CFA pattern can be merged into a single pixel, hence it is
-  // possible to skip the full demosaic and perform a quick downscale.
-  // Note even though the X-Trans CFA is 6x6, for this purposes we can
-  // see each 6x6 tile as four fairly similar 3x3 tiles
-  if(roi_out->scale > (is_xtrans ? 0.667f : 0.5f))
-    flags |= DT_DEMOSAIC_FULL_SCALE;
-
-  // half_size_f doesn't support 4bayer images
-  if(img->flags & DT_IMAGE_4BAYER)
-    flags |= DT_DEMOSAIC_FULL_SCALE;
-
-  // we check if we can stop at the linear interpolation step in VNG
-  // instead of going the full way
-  if(((flags & DT_DEMOSAIC_FULL_SCALE) && (roi_out->scale < (is_xtrans ? 0.5f : 0.667f)))
-    || piece->pipe->mask_display == DT_DEV_PIXELPIPE_DISPLAY_PASSTHRU)
-    flags |= DT_DEMOSAIC_ONLY_VNG_LINEAR;
-
-  return flags;
-}
-
 // Implemented in demosaicing/amaze.cc
 void amaze_demosaic(dt_dev_pixelpipe_iop_t *piece,
                     const float *const in,
@@ -282,6 +241,7 @@ void amaze_demosaic(dt_dev_pixelpipe_iop_t *piece,
 #include "iop/demosaicing/rcd.c"
 #include "iop/demosaicing/lmmse.c"
 #include "iop/demosaicing/dual.c"
+#include "iop/demosaicing/capture.c"
 
 const char *name()
 {
@@ -314,6 +274,18 @@ dt_iop_colorspace_type_t default_colorspace(dt_iop_module_t *self,
   return IOP_CS_RAW;
 }
 
+// We always have to snap to the upper/left sensor pattern corner
+void modify_roi_in(dt_iop_module_t *self,
+                   dt_dev_pixelpipe_iop_t *piece,
+                   const dt_iop_roi_t *roi_out,
+                   dt_iop_roi_t *roi_in)
+{
+  *roi_in = *roi_out;
+  const int snap = (piece->pipe->dsc.filters != 9u) ? DT_BAYER_SNAPPER : DT_XTRANS_SNAPPER;
+  roi_in->x = MAX(0, (roi_in->x / snap) * snap);
+  roi_in->y = MAX(0, (roi_in->y / snap) * snap);
+}
+
 int legacy_params(dt_iop_module_t *self,
                   const void *const old_params,
                   const int old_version,
@@ -321,7 +293,7 @@ int legacy_params(dt_iop_module_t *self,
                   int32_t *new_params_size,
                   int *new_version)
 {
-  typedef struct dt_iop_demosaic_params_v4_t
+  typedef struct dt_iop_demosaic_params_v5_t
   {
     dt_iop_demosaic_greeneq_t green_eq;
     float median_thrs;
@@ -329,7 +301,12 @@ int legacy_params(dt_iop_module_t *self,
     dt_iop_demosaic_method_t demosaicing_method;
     dt_iop_demosaic_lmmse_t lmmse_refine;
     float dual_thrs;
-  } dt_iop_demosaic_params_v4_t;
+    float cs_radius;
+    float cs_thrs;
+    float cs_boost;
+    int cs_strength;
+    float reserved;
+  } dt_iop_demosaic_params_v5_t;
 
   if(old_version == 2)
   {
@@ -340,17 +317,22 @@ int legacy_params(dt_iop_module_t *self,
     } dt_iop_demosaic_params_v2_t;
 
     const dt_iop_demosaic_params_v2_t *o = (dt_iop_demosaic_params_v2_t *)old_params;
-    dt_iop_demosaic_params_v4_t *n = malloc(sizeof(dt_iop_demosaic_params_v4_t));
+    dt_iop_demosaic_params_v5_t *n = malloc(sizeof(dt_iop_demosaic_params_v5_t));
     n->green_eq = o->green_eq;
     n->median_thrs = o->median_thrs;
     n->color_smoothing = DT_DEMOSAIC_SMOOTH_OFF;
     n->demosaicing_method = DT_IOP_DEMOSAIC_PPG;
     n->lmmse_refine = DT_LMMSE_REFINE_1;
     n->dual_thrs = 0.20f;
+    n->cs_radius = 0.0f;
+    n->cs_thrs = 0.35f;
+    n->cs_boost = 0.0f;
+    n->cs_strength = 0;
+    n->reserved = 0.0f;
 
     *new_params = n;
-    *new_params_size = sizeof(dt_iop_demosaic_params_v4_t);
-    *new_version = 4;
+    *new_params_size = sizeof(dt_iop_demosaic_params_v5_t);
+    *new_version = 5;
     return 0;
   }
 
@@ -366,13 +348,45 @@ int legacy_params(dt_iop_module_t *self,
     } dt_iop_demosaic_params_v3_t;
 
     const dt_iop_demosaic_params_v3_t *o = (dt_iop_demosaic_params_v3_t *)old_params;
-    dt_iop_demosaic_params_v4_t *n = malloc(sizeof(dt_iop_demosaic_params_v4_t));
+    dt_iop_demosaic_params_v5_t *n = malloc(sizeof(dt_iop_demosaic_params_v5_t));
     memcpy(n, o, sizeof *o);
     n->dual_thrs = 0.20f;
+    n->cs_radius = 0.0f;
+    n->cs_thrs = 0.35f;
+    n->cs_boost = 0.0f;
+    n->cs_strength = 0;
+    n->reserved = 0.0f;
 
     *new_params = n;
-    *new_params_size = sizeof(dt_iop_demosaic_params_v4_t);
-    *new_version = 4;
+    *new_params_size = sizeof(dt_iop_demosaic_params_v5_t);
+    *new_version = 5;
+    return 0;
+  }
+
+  if(old_version == 4)
+  {
+    typedef struct dt_iop_demosaic_params_v4_t
+    {
+      dt_iop_demosaic_greeneq_t green_eq;
+      float median_thrs;
+      dt_iop_demosaic_smooth_t color_smoothing;
+      dt_iop_demosaic_method_t demosaicing_method;
+      dt_iop_demosaic_lmmse_t lmmse_refine;
+      float dual_thrs;
+    } dt_iop_demosaic_params_v4_t;
+
+    const dt_iop_demosaic_params_v4_t *o = (dt_iop_demosaic_params_v4_t *)old_params;
+    dt_iop_demosaic_params_v5_t *n = malloc(sizeof(dt_iop_demosaic_params_v5_t));
+    memcpy(n, o, sizeof *o);
+    n->cs_radius = 0.0f;
+    n->cs_thrs = 0.35f;
+    n->cs_boost = 0.0f;
+    n->cs_strength = 0;
+    n->reserved = 0.0f;
+
+    *new_params = n;
+    *new_params_size = sizeof(dt_iop_demosaic_params_v5_t);
+    *new_version = 5;
     return 0;
   }
 
@@ -393,80 +407,6 @@ dt_iop_colorspace_type_t output_colorspace(dt_iop_module_t *self,
   return IOP_CS_RGB;
 }
 
-void distort_mask(dt_iop_module_t *self,
-                  dt_dev_pixelpipe_iop_t *piece,
-                  const float *const in,
-                  float *const out,
-                  const dt_iop_roi_t *const roi_in,
-                  const dt_iop_roi_t *const roi_out)
-{
-  if(roi_out->scale != roi_in->scale)
-  {
-    const dt_interpolation_t *itor = dt_interpolation_new(DT_INTERPOLATION_USERPREF_WARP);
-    dt_interpolation_resample_roi_1c(itor, out, roi_out, in, roi_in);
-  }
-  else
-    dt_iop_copy_image_roi(out, in, 1, roi_in, roi_out);
-}
-
-void modify_roi_out(dt_iop_module_t *self,
-                    dt_dev_pixelpipe_iop_t *piece,
-                    dt_iop_roi_t *roi_out,
-                    const dt_iop_roi_t *const roi_in)
-{
-  *roi_out = *roi_in;
-  roi_out->x = 0;
-  roi_out->y = 0;
-}
-
-void modify_roi_in(dt_iop_module_t *self,
-                   dt_dev_pixelpipe_iop_t *piece,
-                   const dt_iop_roi_t *roi_out,
-                   dt_iop_roi_t *roi_in)
-{
-  *roi_in = *roi_out;
-  // need 1:1, demosaic and then sub-sample. or directly sample half-size
-  roi_in->x /= roi_out->scale;
-  roi_in->y /= roi_out->scale;
-  roi_in->width /= roi_out->scale;
-  roi_in->height /= roi_out->scale;
-  roi_in->scale = 1.0f;
-
-  dt_iop_demosaic_data_t *d = piece->data;
-  const dt_iop_demosaic_method_t method = d->demosaicing_method;
-  const gboolean passthrough = method == DT_IOP_DEMOSAIC_PASSTHROUGH_MONOCHROME ||
-                               method == DT_IOP_DEMOSAIC_PASSTHR_MONOX ||
-                               method == DT_IOP_DEMOSAIC_PASSTHROUGH_COLOR ||
-                               method == DT_IOP_DEMOSAIC_PASSTHR_COLORX;
-  // set position to closest top/left sensor pattern snap
-  if(!passthrough)
-  {
-    const int aligner = (piece->pipe->dsc.filters != 9u) ? DT_BAYER_SNAPPER : DT_XTRANS_SNAPPER;
-    const int dx = roi_in->x % aligner;
-    const int dy = roi_in->y % aligner;
-
-/*
-    // This implements snapping to closest position, meant for optimized xtrans position
-    // but with problems at extreme zoom levels
-    const int shift_x = (dx > aligner / 2) ? aligner - dx : -dx;
-    const int shift_y = (dy > aligner / 2) ? aligner - dy : -dy;
-
-    roi_in->x += shift_x;
-    roi_in->y += shift_y;
-*/
-
-    // currently we always snap to left & upper
-    roi_in->x -= dx;
-    roi_in->y -= dy;
-  }
-
-  // clamp to full buffer fixing numeric inaccuracies
-  roi_in->x = MAX(0, roi_in->x);
-  roi_in->y = MAX(0, roi_in->y);
-  roi_in->width = MIN(roi_in->width, piece->buf_in.width);
-  roi_in->height = MIN(roi_in->height, piece->buf_in.height);
-}
-
 void tiling_callback(dt_iop_module_t *self,
                      dt_dev_pixelpipe_iop_t *piece,
                      const dt_iop_roi_t *roi_in,
@@ -475,18 +415,11 @@ void tiling_callback(dt_iop_module_t *self,
 {
   dt_iop_demosaic_data_t *d = piece->data;
 
-  const float ioratio = (float)roi_out->width * roi_out->height / ((float)roi_in->width * roi_in->height);
-  const float smooth = d->color_smoothing != DT_DEMOSAIC_SMOOTH_OFF ? ioratio : 0.0f;
+  const float smooth = d->color_smoothing != DT_DEMOSAIC_SMOOTH_OFF ? 1.0f : 0.0f;
   const gboolean is_xtrans = piece->pipe->dsc.filters == 9u;
   const float greeneq = (!is_xtrans && (d->green_eq != DT_IOP_GREEN_EQ_NO)) ? 0.25f : 0.0f;
   const dt_iop_demosaic_method_t demosaicing_method = d->demosaicing_method & ~DT_DEMOSAIC_DUAL;
 
-  const int qual_flags = demosaic_qual_flags(piece, &self->dev->image_storage, roi_out);
-  const int full_scale = qual_flags & DT_DEMOSAIC_FULL_SCALE;
-
-  // check if output buffer has same dimension as input buffer (thus avoiding one
-  // additional temporary buffer)
-  const gboolean unscaled = roi_out->width == roi_in->width && roi_out->height == roi_in->height && feqf(roi_in->scale, roi_out->scale, 1e-8f);
   const gboolean is_opencl = piece->pipe->devid > DT_DEVICE_CPU;
   // define aligners
   tiling->xalign = is_xtrans ? DT_XTRANS_SNAPPER : DT_BAYER_SNAPPER;
@@ -501,15 +434,8 @@ void tiling_callback(dt_iop_module_t *self,
      demosaicing_method == DT_IOP_DEMOSAIC_AMAZE)
   {
     // Bayer pattern with PPG, Passthrough or Amaze
-    tiling->factor = 1.0f + ioratio;         // in + out
-
-    if(full_scale && unscaled)
-      tiling->factor += MAX(1.0f + greeneq, smooth);  // + tmp + geeneq | + smooth
-    else if(full_scale)
-      tiling->factor += MAX(2.0f + greeneq, smooth);  // + tmp + aux + greeneq | + smooth
-    else
-      tiling->factor += smooth;                        // + smooth
-
+    tiling->factor = 2.0f;
+    tiling->factor += MAX(1.0f + greeneq, smooth);  // + tmp + geeneq | + smooth
     tiling->overhead = 0;
     tiling->overlap = 5; // take care of border handling
   }
@@ -521,67 +447,43 @@ void tiling_callback(dt_iop_module_t *self,
     const int ndir = demosaicing_method == DT_IOP_DEMOSAIC_MARKESTEIJN_3 ? 8 : 4;
     const int overlap = demosaicing_method == DT_IOP_DEMOSAIC_MARKESTEIJN_3 ? 18 : 12;
 
-    tiling->factor = 1.0f + ioratio;
+    tiling->factor = 2.0f;
     tiling->factor += ndir * 1.0f      // rgb
                       + ndir * 0.25f   // drv
                       + ndir * 0.125f  // homo + homosum
                       + 1.0f;          // aux
-
-    if(full_scale && unscaled)
-      tiling->factor += MAX(1.0f + greeneq, smooth);
-    else if(full_scale)
-      tiling->factor += MAX(2.0f + greeneq, smooth);
-    else
-      tiling->factor += smooth;
-
+    tiling->factor += MAX(1.0f + greeneq, smooth);
     tiling->overlap = overlap;
   }
   else if(demosaicing_method == DT_IOP_DEMOSAIC_RCD)
   {
-    tiling->factor = 1.0f + ioratio;
-    if(full_scale && unscaled)
-      tiling->factor += MAX(1.0f + greeneq, smooth);  // + tmp + geeneq | + smooth
-    else if(full_scale)
-      tiling->factor += MAX(2.0f + greeneq, smooth);  // + tmp + aux + greeneq | + smooth
-    else
-      tiling->factor += smooth;                        // + smooth
-
+    tiling->factor = 2.0f;
+    tiling->factor += MAX(1.0f + greeneq, smooth);  // + tmp + geeneq | + smooth
     tiling->overhead = is_opencl ? 0 : sizeof(float) * DT_RCD_TILESIZE * DT_RCD_TILESIZE * 8 * dt_get_num_threads();
     tiling->overlap = 10;
     tiling->factor_cl = tiling->factor + 3.0f;
   }
   else if(demosaicing_method == DT_IOP_DEMOSAIC_LMMSE)
   {
-    tiling->factor = 1.0f + ioratio;
-    if(full_scale && unscaled)
-      tiling->factor += MAX(1.0f + greeneq, smooth);  // + tmp + geeneq | + smooth
-    else if(full_scale)
-      tiling->factor += MAX(2.0f + greeneq, smooth);  // + tmp + aux + greeneq | + smooth
-    else
-      tiling->factor += smooth;                        // + smooth
+    tiling->factor = 2.0f;
+    tiling->factor += MAX(1.0f + greeneq, smooth);  // + tmp + geeneq | + smooth
     tiling->overhead = sizeof(float) * DT_LMMSE_TILESIZE * DT_LMMSE_TILESIZE * 6 * dt_get_num_threads();
     tiling->overlap = 10;
   }
   else
   {
     // VNG
-    tiling->factor = 1.0f + ioratio;
-
-    if(full_scale && unscaled)
-      tiling->factor += MAX(1.0f + greeneq, smooth);
-    else if(full_scale)
-      tiling->factor += MAX(2.0f + greeneq, smooth);
-    else
-      tiling->factor += smooth;
-
+    tiling->factor = 2.0f;
+    tiling->factor += MAX(1.0f + greeneq, smooth);
     tiling->overlap = 6;
   }
 
-  if(d->demosaicing_method & DT_DEMOSAIC_DUAL)
+  if((d->demosaicing_method & DT_DEMOSAIC_DUAL) || d->cs_strength)
   {
-    // make sure VNG4 is also possible
+    // internals plus 2 output
     tiling->factor += 1.0f;
-    tiling->overlap = MAX(6, tiling->overlap);
+    // works for bayer and xtrans
+    tiling->overlap = MAX(d->cs_strength ? 18 : 6, tiling->overlap);
   }
   return;
 }
@@ -601,14 +503,11 @@ void process(dt_iop_module_t *self,
   const gboolean run_fast = pipe->type & DT_DEV_PIXELPIPE_FAST;
   const gboolean fullpipe = pipe->type & DT_DEV_PIXELPIPE_FULL;
   const gboolean previewpipe = pipe->type & DT_DEV_PIXELPIPE_PREVIEW;
-
   const uint8_t(*const xtrans)[6] = (const uint8_t(*const)[6])pipe->dsc.xtrans;
 
   const dt_iop_demosaic_data_t *d = piece->data;
   const dt_iop_demosaic_gui_data_t *g = self->gui_data;
 
-  const int qual_flags = demosaic_qual_flags(piece, img, roi_out);
-  const gboolean fullscale = qual_flags & DT_DEMOSAIC_FULL_SCALE;
   const gboolean is_xtrans = pipe->dsc.filters == 9u;
   const gboolean is_4bayer = img->flags & DT_IMAGE_4BAYER;
   const gboolean is_bayer = !is_xtrans && pipe->dsc.filters != 0;
@@ -622,42 +521,44 @@ void process(dt_iop_module_t *self,
           && demosaicing_method != DT_IOP_DEMOSAIC_PASSTHROUGH_COLOR))
     demosaicing_method = is_xtrans ? DT_IOP_DEMOSAIC_VNG : DT_IOP_DEMOSAIC_VNG4;
 
-  gboolean showmask = FALSE;
+  gboolean show_dualmask = FALSE;
+  gboolean show_capturemask = FALSE;
+  gboolean vng_linear = FALSE;
   if(self->dev->gui_attached && fullpipe)
   {
     if(g->dual_mask)
     {
-      showmask = TRUE;
+      show_dualmask = TRUE;
+      pipe->mask_display = DT_DEV_PIXELPIPE_DISPLAY_MASK;
+    }
+    if(g->cs_mask)
+    {
+      show_capturemask = TRUE;
       pipe->mask_display = DT_DEV_PIXELPIPE_DISPLAY_MASK;
     }
     // take care of passthru modes
     if(pipe->mask_display == DT_DEV_PIXELPIPE_DISPLAY_PASSTHRU)
+    {
       demosaicing_method = is_xtrans ? DT_IOP_DEMOSAIC_VNG : DT_IOP_DEMOSAIC_VNG4;
+      vng_linear = TRUE;
+    }
   }
 
   float *in  = (float *)i;
   float *out = (float *)o;
 
-  if(!fullscale)
-  {
-    dt_print_pipe(DT_DEBUG_PIPE, "demosaic approx zoom", pipe, self, DT_DEVICE_CPU, roi_in, roi_out);
-    if(demosaicing_method == DT_IOP_DEMOSAIC_PASSTHROUGH_MONOCHROME || demosaicing_method == DT_IOP_DEMOSAIC_PASSTHROUGH_COLOR)
-      dt_iop_clip_and_zoom_demosaic_passthrough_monochrome_f(out, in, roi_out, roi_in, roi_out->width, width);
-    else if(is_xtrans)
-      dt_iop_clip_and_zoom_demosaic_third_size_xtrans_f(out, in, roi_out, roi_in, roi_out->width, width, xtrans);
-    else
-      dt_iop_clip_and_zoom_demosaic_half_size_f(out, in, roi_out, roi_in, roi_out->width, width, pipe->dsc.filters);
-
-    return;
-  }
-
   const int base_demosaicing_method = demosaicing_method & ~DT_DEMOSAIC_DUAL;
   const gboolean dual = (demosaicing_method & DT_DEMOSAIC_DUAL) && !run_fast && !previewpipe;
 
-  const gboolean direct = roi_out->width == width && roi_out->height == height && feqf(roi_in->scale, roi_out->scale, 1e-8f);
+  const gboolean passthru = demosaicing_method == DT_IOP_DEMOSAIC_PASSTHROUGH_MONOCHROME
+                         || demosaicing_method == DT_IOP_DEMOSAIC_PASSTHROUGH_COLOR;
 
-  if(!direct)
-    out = dt_alloc_align_float((size_t)4 * width * height);
+  const gboolean do_capture = !passthru
+                          &&  !is_4bayer
+                          &&  !show_dualmask
+                          &&  !run_fast
+                          &&  !previewpipe
+                          &&  d->cs_strength;
 
   if(is_bayer && d->green_eq != DT_IOP_GREEN_EQ_NO)
   {
@@ -668,15 +569,15 @@ void process(dt_iop_module_t *self,
     switch(d->green_eq)
     {
       case DT_IOP_GREEN_EQ_FULL:
-        green_equilibration_favg(in, (float *)i, width, height, pipe->dsc.filters, roi_in->x, roi_in->y);
+        green_equilibration_favg(in, (float *)i, width, height, pipe->dsc.filters);
         break;
       case DT_IOP_GREEN_EQ_LOCAL:
-        green_equilibration_lavg(in, (float *)i, width, height, pipe->dsc.filters, roi_in->x, roi_in->y, threshold);
+        green_equilibration_lavg(in, (float *)i, width, height, pipe->dsc.filters, threshold);
         break;
       case DT_IOP_GREEN_EQ_BOTH:
         aux = dt_alloc_align_float((size_t)height * width);
-        green_equilibration_favg(aux, (float *)i, width, height, pipe->dsc.filters, roi_in->x, roi_in->y);
-        green_equilibration_lavg(in, aux, width, height, pipe->dsc.filters, roi_in->x, roi_in->y, threshold);
+        green_equilibration_favg(aux, (float *)i, width, height, pipe->dsc.filters);
+        green_equilibration_lavg(in, aux, width, height, pipe->dsc.filters, threshold);
         dt_free_align(aux);
         break;
       default:
@@ -696,13 +597,13 @@ void process(dt_iop_module_t *self,
     else if(base_demosaicing_method == DT_IOP_DEMOSAIC_MARKESTEIJN || base_demosaicing_method == DT_IOP_DEMOSAIC_MARKESTEIJN_3)
       xtrans_markesteijn_interpolate(out, in, roi_in, xtrans, passes);
     else
-      vng_interpolate(out, in, roi_in, pipe->dsc.filters, xtrans, qual_flags & DT_DEMOSAIC_ONLY_VNG_LINEAR);
+      vng_interpolate(out, in, roi_in, pipe->dsc.filters, xtrans, vng_linear);
   }
   else
   {
     if(demosaicing_method == DT_IOP_DEMOSAIC_VNG4 || is_4bayer)
     {
-      vng_interpolate(out, in, roi_in, pipe->dsc.filters, xtrans, qual_flags & DT_DEMOSAIC_ONLY_VNG_LINEAR);
+      vng_interpolate(out, in, roi_in, pipe->dsc.filters, xtrans, vng_linear);
       if(is_4bayer)
       {
         dt_colorspaces_cygm_to_rgb(out, width * height, d->CAM_to_RGB);
@@ -722,24 +623,16 @@ void process(dt_iop_module_t *self,
   if(pipe->want_detail_mask)
     dt_dev_write_scharr_mask(piece, out, roi_in, TRUE);
 
-  if(dual)
-    dual_demosaic(piece, out, in, roi_in, pipe->dsc.filters, xtrans, showmask, d->dual_thrs);
+  if(do_capture)
+    _capture_sharpen(self, piece, in, out, roi_in, show_capturemask);
+
+  if(dual && !show_capturemask)
+    dual_demosaic(piece, out, in, roi_in, pipe->dsc.filters, xtrans, show_dualmask, d->dual_thrs);
 
   if((float *)i != in) dt_free_align(in);
 
   if(d->color_smoothing != DT_DEMOSAIC_SMOOTH_OFF)
     color_smoothing(out, roi_in, d->color_smoothing);
-
-  dt_print_pipe(DT_DEBUG_VERBOSE, direct ? "demosaic inplace" : "demosaic clip_and_zoom", pipe, self, DT_DEVICE_CPU, roi_in, roi_out);
-  if(!direct)
-  {
-    dt_iop_roi_t roo = *roi_out;
-    roo.width = width;
-    roo.height = height;
-    roo.scale = 1.0f;
-    dt_iop_clip_and_zoom_roi((float *)o, out, roi_out, &roo);
-    dt_free_align(out);
-  }
 }
 
 #ifdef HAVE_OPENCL
@@ -750,13 +643,10 @@ int process_cl(dt_iop_module_t *self,
                const dt_iop_roi_t *const roi_in,
                const dt_iop_roi_t *const roi_out)
 {
-  const dt_image_t *img = &self->dev->image_storage;
   dt_dev_pixelpipe_t *const pipe = piece->pipe;
   const gboolean run_fast = pipe->type & DT_DEV_PIXELPIPE_FAST;
   const gboolean fullpipe = pipe->type & DT_DEV_PIXELPIPE_FULL;
   const gboolean previewpipe = pipe->type & DT_DEV_PIXELPIPE_PREVIEW;
-  const int qual_flags = demosaic_qual_flags(piece, img, roi_out);
-  const gboolean fullscale = qual_flags & DT_DEMOSAIC_FULL_SCALE;
   const gboolean is_xtrans = pipe->dsc.filters == 9u;
   const gboolean is_bayer = !is_xtrans && pipe->dsc.filters != 0;
 
@@ -764,7 +654,6 @@ int process_cl(dt_iop_module_t *self,
 
   const dt_iop_demosaic_data_t *d = piece->data;
   const dt_iop_demosaic_gui_data_t *g = self->gui_data;
-  const dt_iop_demosaic_global_data_t *gd = self->global_data;
 
   int demosaicing_method = d->demosaicing_method;
 
@@ -782,17 +671,27 @@ int process_cl(dt_iop_module_t *self,
       && demosaicing_method != DT_IOP_DEMOSAIC_PASSTHROUGH_COLOR))
     demosaicing_method = is_xtrans ? DT_IOP_DEMOSAIC_VNG : DT_IOP_DEMOSAIC_VNG4;
 
-  gboolean showmask = FALSE;
+  gboolean show_dualmask = FALSE;
+  gboolean show_capturemask = FALSE;
+  gboolean vng_linear = FALSE;
   if(self->dev->gui_attached && fullpipe)
   {
     if(g->dual_mask)
     {
-      showmask = TRUE;
+      show_dualmask = TRUE;
+      pipe->mask_display = DT_DEV_PIXELPIPE_DISPLAY_MASK;
+    }
+    if(g->cs_mask)
+    {
+      show_capturemask = TRUE;
       pipe->mask_display = DT_DEV_PIXELPIPE_DISPLAY_MASK;
     }
     // take care of passthru modes
     if(pipe->mask_display == DT_DEV_PIXELPIPE_DISPLAY_PASSTHRU)
+    {
       demosaicing_method = is_xtrans ? DT_IOP_DEMOSAIC_VNG : DT_IOP_DEMOSAIC_VNG4;
+      vng_linear = TRUE;
+    }
   }
 
   const int devid = pipe->devid;
@@ -801,40 +700,18 @@ int process_cl(dt_iop_module_t *self,
 
   if(dev_in  == NULL || dev_out == NULL) return err;
 
-  if(!fullscale)
-  {
-    dt_print_pipe(DT_DEBUG_PIPE, "demosaic approx zoom", pipe, self, devid, roi_in, roi_out);
-    const int zero = 0;
-    if(is_xtrans)
-    {
-      cl_mem dev_xtrans = dt_opencl_copy_host_to_device_constant(devid, sizeof(pipe->dsc.xtrans), pipe->dsc.xtrans);
-      if(dev_xtrans == NULL) return err;
-      // sample third-size image
-      err = dt_opencl_enqueue_kernel_2d_args(devid, gd->kernel_zoom_third_size, roi_out->width, roi_out->height,
-          CLARG(dev_in), CLARG(dev_out), CLARG(roi_out->width), CLARG(roi_out->height), CLARG(roi_in->x), CLARG(roi_in->y),
-          CLARG(width), CLARG(height), CLARG(roi_out->scale), CLARG(dev_xtrans));
-      dt_opencl_release_mem_object(dev_xtrans);
-      return err;
-    }
-    else if(demosaicing_method == DT_IOP_DEMOSAIC_PASSTHROUGH_MONOCHROME)
-      return dt_opencl_enqueue_kernel_2d_args(devid, gd->kernel_zoom_passthrough_monochrome, roi_out->width, roi_out->height,
-          CLARG(dev_in), CLARG(dev_out), CLARG(roi_out->width), CLARG(roi_out->height), CLARG(zero), CLARG(zero), CLARG(width),
-          CLARG(height), CLARG(roi_out->scale), CLARG(pipe->dsc.filters));
-    else // bayer
-      return dt_opencl_enqueue_kernel_2d_args(devid, gd->kernel_zoom_half_size, roi_out->width, roi_out->height,
-          CLARG(dev_in), CLARG(dev_out), CLARG(roi_out->width), CLARG(roi_out->height), CLARG(zero), CLARG(zero), CLARG(width),
-          CLARG(height), CLARG(roi_out->scale), CLARG(pipe->dsc.filters));
-  }
-
-  const gboolean direct = roi_out->width == width && roi_out->height == height && feqf(roi_in->scale, roi_out->scale, 1e-8f);
   const int base_demosaicing_method = demosaicing_method & ~DT_DEMOSAIC_DUAL;
   const gboolean dual = (demosaicing_method & DT_DEMOSAIC_DUAL) && !run_fast && !previewpipe;
+  const gboolean passthru = demosaicing_method == DT_IOP_DEMOSAIC_PASSTHROUGH_MONOCHROME
+                         || demosaicing_method == DT_IOP_DEMOSAIC_PASSTHROUGH_COLOR;
 
-  cl_mem out_image = direct ? dev_out : dt_opencl_alloc_device(devid, width, height, sizeof(float) * 4);
-  cl_mem in_image = dev_in;
+  const gboolean do_capture = !passthru
+                          &&  !run_fast
+                          &&  !show_dualmask
+                          &&  !previewpipe
+                          &&  d->cs_strength;
 
-  if(out_image == NULL)
-    goto finish;
+  cl_mem in_image = dev_in;
 
   if(is_bayer && d->green_eq != DT_IOP_GREEN_EQ_NO)
   {
@@ -845,26 +722,24 @@ int process_cl(dt_iop_module_t *self,
     if(err != CL_SUCCESS) goto finish;
   }
 
-  if(demosaicing_method == DT_IOP_DEMOSAIC_PASSTHROUGH_MONOCHROME ||
-     demosaicing_method == DT_IOP_DEMOSAIC_PPG ||
-     demosaicing_method == DT_IOP_DEMOSAIC_PASSTHROUGH_COLOR)
+  if(passthru || demosaicing_method == DT_IOP_DEMOSAIC_PPG)
   {
-    err = process_default_cl(self, piece, in_image, out_image, roi_in, demosaicing_method);
+    err = process_default_cl(self, piece, in_image, dev_out, roi_in, demosaicing_method);
     if(err != CL_SUCCESS) return err;
   }
   else if(base_demosaicing_method == DT_IOP_DEMOSAIC_RCD)
   {
-    err = process_rcd_cl(self, piece, in_image, out_image, roi_in);
+    err = process_rcd_cl(self, piece, in_image, dev_out, roi_in);
     if(err != CL_SUCCESS) goto finish;
   }
   else if(demosaicing_method == DT_IOP_DEMOSAIC_VNG4 || demosaicing_method == DT_IOP_DEMOSAIC_VNG)
   {
-    err = process_vng_cl(self, piece, in_image, out_image, roi_in, qual_flags & DT_DEMOSAIC_ONLY_VNG_LINEAR);
+    err = process_vng_cl(self, piece, in_image, dev_out, roi_in, vng_linear);
     if(err != CL_SUCCESS) goto finish;
   }
   else if(base_demosaicing_method == DT_IOP_DEMOSAIC_MARKESTEIJN || base_demosaicing_method == DT_IOP_DEMOSAIC_MARKESTEIJN_3)
   {
-    err = process_markesteijn_cl(self, piece, in_image, out_image, roi_in);
+    err = process_markesteijn_cl(self, piece, in_image, dev_out, roi_in);
     if(err != CL_SUCCESS) goto finish;
   }
   else
@@ -876,11 +751,17 @@ int process_cl(dt_iop_module_t *self,
 
   if(pipe->want_detail_mask)
   {
-    err = dt_dev_write_scharr_mask_cl(piece, out_image, roi_in, TRUE);
+    err = dt_dev_write_scharr_mask_cl(piece, dev_out, roi_in, TRUE);
     if(err != CL_SUCCESS) goto finish;
   }
 
-  if(dual)
+  if(do_capture)
+  {
+    err = _capture_sharpen_cl(self, piece, dev_in, dev_out, roi_in, show_capturemask);
+    if(err != CL_SUCCESS) goto finish;
+  }
+
+  if(dual && !show_capturemask)
   {
     err = CL_MEM_OBJECT_ALLOCATION_FAILURE;
     cl_mem low_image = dt_opencl_alloc_device(devid, width, height, sizeof(float) * 4);
@@ -889,13 +770,13 @@ int process_cl(dt_iop_module_t *self,
     {
       size_t origin[] = { 0, 0, 0 };
       size_t region[] = { width, height, 1 };
-      err = dt_opencl_enqueue_copy_image(devid, out_image, cp_image, origin, origin, region);
+      err = dt_opencl_enqueue_copy_image(devid, dev_out, cp_image, origin, origin, region);
       if(err == CL_SUCCESS)
         err = process_vng_cl(self, piece, in_image, low_image, roi_in, TRUE);
       if(err == CL_SUCCESS)
         err = color_smoothing_cl(self, piece, low_image, low_image, roi_in, DT_DEMOSAIC_SMOOTH_2);
       if(err == CL_SUCCESS)
-        err = dual_demosaic_cl(self, piece, cp_image, low_image, out_image, roi_in, showmask);
+        err = dual_demosaic_cl(self, piece, cp_image, low_image, dev_out, roi_in, show_dualmask);
       dt_opencl_release_mem_object(cp_image);
       dt_opencl_release_mem_object(low_image);
     }
@@ -909,20 +790,11 @@ int process_cl(dt_iop_module_t *self,
   }
 
   if(d->color_smoothing != DT_DEMOSAIC_SMOOTH_OFF)
-  {
-    err = color_smoothing_cl(self, piece, out_image, out_image, roi_in, d->color_smoothing);
-    if(err != CL_SUCCESS) goto finish;
-  }
-
-  dt_print_pipe(DT_DEBUG_VERBOSE, direct ? "demosaic inplace" : "demosaic clip_and_zoom", pipe, self, devid, roi_in, roi_out);
-  if(!direct)
-    err = dt_iop_clip_and_zoom_roi_cl(devid, dev_out, out_image, roi_out, roi_in);
+    err = color_smoothing_cl(self, piece, dev_out, dev_out, roi_in, d->color_smoothing);
 
 finish:
 
   if(in_image != dev_in) dt_opencl_release_mem_object(in_image);
-  if(out_image != dev_out) dt_opencl_release_mem_object(out_image);
-
   return err;
 }
 #endif
@@ -933,7 +805,6 @@ void init_global(dt_iop_module_so_t *self)
   dt_iop_demosaic_global_data_t *gd = malloc(sizeof(dt_iop_demosaic_global_data_t));
   self->data = gd;
 
-  gd->kernel_zoom_half_size = dt_opencl_create_kernel(program, "clip_and_zoom_demosaic_half_size");
   gd->kernel_ppg_green = dt_opencl_create_kernel(program, "ppg_demosaic_green");
   gd->kernel_green_eq_lavg = dt_opencl_create_kernel(program, "green_equilibration_lavg");
   gd->kernel_green_eq_favg_reduce_first = dt_opencl_create_kernel(program, "green_equilibration_favg_reduce_first");
@@ -941,19 +812,16 @@ void init_global(dt_iop_module_so_t *self)
   gd->kernel_green_eq_favg_apply = dt_opencl_create_kernel(program, "green_equilibration_favg_apply");
   gd->kernel_pre_median = dt_opencl_create_kernel(program, "pre_median");
   gd->kernel_ppg_redblue = dt_opencl_create_kernel(program, "ppg_demosaic_redblue");
-  gd->kernel_downsample = dt_opencl_create_kernel(program, "clip_and_zoom");
   gd->kernel_border_interpolate = dt_opencl_create_kernel(program, "border_interpolate");
   gd->kernel_color_smoothing = dt_opencl_create_kernel(program, "color_smoothing");
 
   const int other = 14; // from programs.conf
   gd->kernel_passthrough_monochrome = dt_opencl_create_kernel(other, "passthrough_monochrome");
   gd->kernel_passthrough_color = dt_opencl_create_kernel(other, "passthrough_color");
-  gd->kernel_zoom_passthrough_monochrome = dt_opencl_create_kernel(other, "clip_and_zoom_demosaic_passthrough_monochrome");
 
   const int vng = 15; // from programs.conf
   gd->kernel_vng_border_interpolate = dt_opencl_create_kernel(vng, "vng_border_interpolate");
   gd->kernel_vng_lin_interpolate = dt_opencl_create_kernel(vng, "vng_lin_interpolate");
-  gd->kernel_zoom_third_size = dt_opencl_create_kernel(vng, "clip_and_zoom_demosaic_third_size_xtrans");
   gd->kernel_vng_green_equilibrate = dt_opencl_create_kernel(vng, "vng_green_equilibrate");
   gd->kernel_vng_interpolate = dt_opencl_create_kernel(vng, "vng_interpolate");
 
@@ -991,12 +859,24 @@ void init_global(dt_iop_module_so_t *self)
   gd->kernel_rcd_border_redblue = dt_opencl_create_kernel(rcd, "rcd_border_redblue");
   gd->kernel_rcd_border_green = dt_opencl_create_kernel(rcd, "rcd_border_green");
   gd->kernel_write_blended_dual  = dt_opencl_create_kernel(rcd, "write_blended_dual");
+
+  const int capt = 38; // capture.cl, from programs.conf
+  gd->gaussian_9x9_mul = dt_opencl_create_kernel(capt, "kernel_9x9_mul");
+  gd->gaussian_9x9_div = dt_opencl_create_kernel(capt, "kernel_9x9_div");
+  gd->prefill_clip_mask = dt_opencl_create_kernel(capt, "prefill_clip_mask");
+  gd->prepare_blend = dt_opencl_create_kernel(capt, "prepare_blend");
+  gd->modify_blend = dt_opencl_create_kernel(capt, "modify_blend");
+  gd->show_blend_mask = dt_opencl_create_kernel(capt, "show_blend_mask");
+  gd->capture_result = dt_opencl_create_kernel(capt, "capture_result");
+
+  gd->gauss_coeffs = dt_alloc_align_float(CAPTURE_KERNEL_ALIGN * (UCHAR_MAX+1));
+  for(int i = 0; i <= UCHAR_MAX; i++)
+    _calc_9x9_gauss_coeffs(&gd->gauss_coeffs[i * CAPTURE_KERNEL_ALIGN], MAX(1e-7f, (float)i * CAPTURE_GAUSS_FRACTION));
 }
 
 void cleanup_global(dt_iop_module_so_t *self)
 {
   dt_iop_demosaic_global_data_t *gd = self->data;
-  dt_opencl_free_kernel(gd->kernel_zoom_half_size);
   dt_opencl_free_kernel(gd->kernel_ppg_green);
   dt_opencl_free_kernel(gd->kernel_pre_median);
   dt_opencl_free_kernel(gd->kernel_green_eq_lavg);
@@ -1004,15 +884,12 @@ void cleanup_global(dt_iop_module_so_t *self)
   dt_opencl_free_kernel(gd->kernel_green_eq_favg_reduce_second);
   dt_opencl_free_kernel(gd->kernel_green_eq_favg_apply);
   dt_opencl_free_kernel(gd->kernel_ppg_redblue);
-  dt_opencl_free_kernel(gd->kernel_downsample);
   dt_opencl_free_kernel(gd->kernel_border_interpolate);
   dt_opencl_free_kernel(gd->kernel_color_smoothing);
   dt_opencl_free_kernel(gd->kernel_passthrough_monochrome);
   dt_opencl_free_kernel(gd->kernel_passthrough_color);
-  dt_opencl_free_kernel(gd->kernel_zoom_passthrough_monochrome);
   dt_opencl_free_kernel(gd->kernel_vng_border_interpolate);
   dt_opencl_free_kernel(gd->kernel_vng_lin_interpolate);
-  dt_opencl_free_kernel(gd->kernel_zoom_third_size);
   dt_opencl_free_kernel(gd->kernel_vng_green_equilibrate);
   dt_opencl_free_kernel(gd->kernel_vng_interpolate);
   dt_opencl_free_kernel(gd->kernel_markesteijn_initial_copy);
@@ -1046,6 +923,14 @@ void cleanup_global(dt_iop_module_so_t *self)
   dt_opencl_free_kernel(gd->kernel_rcd_border_redblue);
   dt_opencl_free_kernel(gd->kernel_rcd_border_green);
   dt_opencl_free_kernel(gd->kernel_write_blended_dual);
+  dt_opencl_free_kernel(gd->gaussian_9x9_mul);
+  dt_opencl_free_kernel(gd->gaussian_9x9_div);
+  dt_opencl_free_kernel(gd->prefill_clip_mask);
+  dt_opencl_free_kernel(gd->prepare_blend);
+  dt_opencl_free_kernel(gd->modify_blend);
+  dt_opencl_free_kernel(gd->show_blend_mask);
+  dt_opencl_free_kernel(gd->capture_result);
+  dt_free_align(gd->gauss_coeffs);
   free(self->data);
   self->data = NULL;
   _cleanup_lmmse_gamma();
@@ -1067,7 +952,10 @@ void commit_params(dt_iop_module_t *self,
   d->dual_thrs = p->dual_thrs;
   d->lmmse_refine = p->lmmse_refine;
   dt_iop_demosaic_method_t use_method = p->demosaicing_method;
-
+  d->cs_radius = p->cs_radius;
+  d->cs_thrs = p->cs_thrs;
+  d->cs_boost = p->cs_boost;
+  d->cs_strength = p->cs_strength;
   const gboolean xmethod = use_method & DT_DEMOSAIC_XTRANS;
   const gboolean bayer4  = self->dev->image_storage.flags & DT_IMAGE_4BAYER;
   const gboolean bayer   = self->dev->image_storage.buf_dsc.filters != 9u && !bayer4;
@@ -1154,7 +1042,6 @@ void commit_params(dt_iop_module_t *self,
       piece->process_cl_ready = FALSE;
   }
 
-
   // green-equilibrate over full image excludes tiling
   // The details mask calculation required for dual demosaicing does not allow tiling.
   if(    d->green_eq == DT_IOP_GREEN_EQ_FULL
@@ -1244,10 +1131,18 @@ void gui_changed(dt_iop_module_t *self, GtkWidget *w, void *previous)
     || use_method == DT_IOP_DEMOSAIC_PASSTHR_MONOX
     || use_method == DT_IOP_DEMOSAIC_PASSTHR_COLORX;
 
+  const gboolean capture_support = !passing && !bayer4;
+  const gboolean do_capture = capture_support && p->cs_strength;
+
   gtk_widget_set_visible(g->demosaic_method_bayer, bayer);
   gtk_widget_set_visible(g->demosaic_method_bayerfour, bayer4);
   gtk_widget_set_visible(g->demosaic_method_xtrans, xtrans);
 
+  gtk_widget_set_sensitive(g->cs_radius, do_capture);
+  gtk_widget_set_sensitive(g->cs_thrs, do_capture);
+  gtk_widget_set_sensitive(g->cs_boost, do_capture);
+  gtk_widget_set_sensitive(g->cs_strength, capture_support);
+
   // we might have a wrong method dur to xtrans/bayer - mode mismatch
   if(bayer)
     dt_bauhaus_combobox_set_from_value(g->demosaic_method_bayer, use_method);
@@ -1286,6 +1181,11 @@ void gui_changed(dt_iop_module_t *self, GtkWidget *w, void *previous)
     dt_bauhaus_widget_set_quad_active(g->dual_thrs, FALSE);
     g->dual_mask = FALSE;
   }
+  if(!w || w != g->cs_thrs)
+  {
+    dt_bauhaus_widget_set_quad_active(g->cs_thrs, FALSE);
+    g->cs_mask = FALSE;
+  }
 
   // as the dual modes change behaviour for previous pipeline modules we do a reprocess
   if(isdual && (w == g->demosaic_method_bayer || w == g->demosaic_method_xtrans))
@@ -1296,26 +1196,64 @@ void gui_update(dt_iop_module_t *self)
 {
   gui_changed(self, NULL, NULL);
   gtk_stack_set_visible_child_name(GTK_STACK(self->widget), self->default_enabled ? "raw" : "non_raw");
+  dt_iop_demosaic_gui_data_t *g = self->gui_data;
+  g->autoradius = FALSE;
 }
 
-static void _visualize_callback(GtkWidget *quad, dt_iop_module_t *self)
+static void _dual_quad_callback(GtkWidget *quad, dt_iop_module_t *self)
 {
   if(darktable.gui->reset) return;
   dt_iop_demosaic_gui_data_t *g = self->gui_data;
 
   g->dual_mask = dt_bauhaus_widget_get_quad_active(quad);
+
+  dt_bauhaus_widget_set_quad_active(g->cs_thrs, FALSE);
+  g->cs_mask = FALSE;
   dt_dev_reprocess_center(self->dev);
 }
 
+static void _cs_quad_callback(GtkWidget *quad, dt_iop_module_t *self)
+{
+  if(darktable.gui->reset) return;
+  dt_iop_demosaic_gui_data_t *g = self->gui_data;
+  g->cs_mask = dt_bauhaus_widget_get_quad_active(quad);
+
+  dt_bauhaus_widget_set_quad_active(g->dual_thrs, FALSE);
+  g->dual_mask = FALSE;
+  dt_dev_reprocess_center(self->dev);
+}
+
+static void _cs_autoradius_callback(GtkWidget *quad, dt_iop_module_t *self)
+{
+  if(darktable.gui->reset) return;
+  dt_iop_demosaic_gui_data_t *g = self->gui_data;
+  g->autoradius = TRUE;
+  dt_dev_reprocess_center(self->dev);
+}
+
+static void _check_autoradius(gpointer instance, dt_iop_module_t *self)
+{
+  dt_iop_demosaic_gui_data_t *g = self->gui_data;
+  if(g && g->autoradius)
+  {
+    dt_iop_demosaic_params_t *p = self->params;
+    g->autoradius = FALSE;
+    dt_bauhaus_slider_set_val(g->cs_radius, p->cs_radius);
+    dt_dev_add_history_item(darktable.develop, self, TRUE);
+  }
+}
+
 void gui_focus(dt_iop_module_t *self, gboolean in)
 {
   dt_iop_demosaic_gui_data_t *g = self->gui_data;
   if(!in)
   {
-    const gboolean was_dualmask = g->dual_mask;
+    const gboolean was_masking = g->dual_mask || g->cs_mask;
     dt_bauhaus_widget_set_quad_active(g->dual_thrs, FALSE);
     g->dual_mask = FALSE;
-    if(was_dualmask) dt_dev_reprocess_center(self->dev);
+    dt_bauhaus_widget_set_quad_active(g->cs_thrs, FALSE);
+    g->cs_mask = FALSE;
+    if(was_masking) dt_dev_reprocess_center(self->dev);
   }
 }
 
@@ -1342,18 +1280,17 @@ void gui_init(dt_iop_module_t *self)
   for(int i=0;i<4;i++) dt_bauhaus_combobox_remove_at(g->demosaic_method_bayerfour, 1);
   gtk_widget_set_tooltip_text(g->demosaic_method_bayerfour, _("Bayer4 sensor demosaicing methods."));
 
-  g->median_thrs = dt_bauhaus_slider_from_params(self, "median_thrs");
-  dt_bauhaus_slider_set_digits(g->median_thrs, 3);
-  gtk_widget_set_tooltip_text(g->median_thrs, _("threshold for edge-aware median.\nset to 0.0 to switch off\n"
-                                                "set to 1.0 to ignore edges"));
-
   g->dual_thrs = dt_bauhaus_slider_from_params(self, "dual_thrs");
   dt_bauhaus_slider_set_digits(g->dual_thrs, 2);
   gtk_widget_set_tooltip_text(g->dual_thrs, _("contrast threshold for dual demosaic.\nset to 0.0 for high frequency content\n"
                                                 "set to 1.0 for flat content"));
-  dt_bauhaus_widget_set_quad(g->dual_thrs, self, dtgtk_cairo_paint_showmask, TRUE, _visualize_callback,
+  dt_bauhaus_widget_set_quad(g->dual_thrs, self, dtgtk_cairo_paint_showmask, TRUE, _dual_quad_callback,
                              _("toggle mask visualization"));
 
+  g->median_thrs = dt_bauhaus_slider_from_params(self, "median_thrs");
+  dt_bauhaus_slider_set_digits(g->median_thrs, 3);
+  gtk_widget_set_tooltip_text(g->median_thrs, _("threshold for edge-aware median.\nset to 0.0 to switch off\n"
+                                                "set to 1.0 to ignore edges"));
   g->lmmse_refine = dt_bauhaus_combobox_from_params(self, "lmmse_refine");
   gtk_widget_set_tooltip_text(g->lmmse_refine, _("LMMSE refinement steps. the median steps average the output,\nrefine adds some recalculation of red & blue channels"));
 
@@ -1363,6 +1300,33 @@ void gui_init(dt_iop_module_t *self)
   g->greeneq = dt_bauhaus_combobox_from_params(self, "green_eq");
   gtk_widget_set_tooltip_text(g->greeneq, _("green channels matching method"));
 
+  g->cs_strength = dt_bauhaus_slider_from_params(self, "cs_strength");
+  gtk_widget_set_tooltip_text(g->cs_strength, _("enable capture sharpening and set effect strength based on iterations"));
+
+  g->cs_radius = dt_bauhaus_slider_from_params(self, "cs_radius");
+  dt_bauhaus_slider_set_digits(g->cs_radius, 2);
+  dt_bauhaus_slider_set_format(g->cs_radius, _(_(" px")));
+  gtk_widget_set_tooltip_text(g->cs_radius, _("capture sharpen radius should reflect the gaussian type blur by camera\n"
+                                              "sensor, possibly the anti-aliasing filter and the lens.\n"
+                                              "increasing this too far will lead to artifacts like halos\n"
+                                              "especially at sharp transitions\n"));
+  dt_bauhaus_slider_set_hard_min(g->cs_radius, 0.01f);
+  dt_bauhaus_widget_set_quad(g->cs_radius, self, dtgtk_cairo_paint_reset, FALSE, _cs_autoradius_callback,
+    _("calculate the capture sharpen radius from sensor data"));
+  g->autoradius = FALSE;
+
+  g->cs_thrs = dt_bauhaus_slider_from_params(self, "cs_thrs");
+  dt_bauhaus_slider_set_format(g->cs_thrs, "%");
+  dt_bauhaus_slider_set_digits(g->cs_thrs, 0);
+  gtk_widget_set_tooltip_text(g->cs_thrs, _("adjust the threshold to restrict capture sharpening to parts with high contrast"));
+  dt_bauhaus_widget_set_quad(g->cs_thrs, self, dtgtk_cairo_paint_showmask, TRUE, _cs_quad_callback, _("visualize the restricting mask"));
+
+  g->cs_boost = dt_bauhaus_slider_from_params(self, "cs_boost");
+  dt_bauhaus_slider_set_digits(g->cs_boost, 2);
+  dt_bauhaus_slider_set_format(g->cs_boost, _(_(" px")));
+  gtk_widget_set_tooltip_text(g->cs_boost, _("further increase radius at image corners.\n"
+                                             "the centre half of the image will not be affected"));
+
   // start building top level widget
   self->widget = gtk_stack_new();
   gtk_stack_set_homogeneous(GTK_STACK(self->widget), FALSE);
@@ -1372,6 +1336,7 @@ void gui_init(dt_iop_module_t *self)
 
   gtk_stack_add_named(GTK_STACK(self->widget), label_non_raw, "non_raw");
   gtk_stack_add_named(GTK_STACK(self->widget), box_raw, "raw");
+  DT_CONTROL_SIGNAL_HANDLE(DT_SIGNAL_DEVELOP_UI_PIPE_FINISHED, _check_autoradius);
 }
 
 // clang-format off
diff --git a/src/iop/demosaicing/basics.c b/src/iop/demosaicing/basics.c
index 95e632b0d465..abf8836b9646 100644
--- a/src/iop/demosaicing/basics.c
+++ b/src/iop/demosaicing/basics.c
@@ -151,16 +151,14 @@ static void green_equilibration_lavg(float *out,
                                      const int width,
                                      const int height,
                                      const uint32_t filters,
-                                     const int x,
-                                     const int y,
                                      const float thr)
 {
   const float maximum = 1.0f;
 
   int oj = 2, oi = 2;
-  if(FC(oj + y, oi + x, filters) != 1) oj++;
-  if(FC(oj + y, oi + x, filters) != 1) oi++;
-  if(FC(oj + y, oi + x, filters) != 1) oj--;
+  if(FC(oj, oi, filters) != 1) oj++;
+  if(FC(oj, oi, filters) != 1) oi++;
+  if(FC(oj, oi, filters) != 1) oj--;
 
   dt_iop_image_copy_by_size(out, in, width, height, 1);
 
@@ -203,15 +201,13 @@ static void green_equilibration_favg(float *out,
                                      const float *const in,
                                      const int width,
                                      const int height,
-                                     const uint32_t filters,
-                                     const int x,
-                                     const int y)
+                                     const uint32_t filters)
 {
   int oj = 0, oi = 0;
   // const float ratio_max = 1.1f;
   double sum1 = 0.0, sum2 = 0.0, gr_ratio;
 
-  if((FC(oj + y, oi + x, filters) & 1) != 1) oi++;
+  if((FC(oj, oi, filters) & 1) != 1) oi++;
   const int g2_offset = oi ? -1 : 1;
   dt_iop_image_copy_by_size(out, in, width, height, 1);
   DT_OMP_FOR(reduction(+ : sum1, sum2) collapse(2))
@@ -387,7 +383,7 @@ static int green_equilibration_cl(const dt_iop_module_t *self,
     size_t flocal[3] = { flocopt.sizex, flocopt.sizey, 1 };
     dt_opencl_set_kernel_args(devid, gd->kernel_green_eq_favg_reduce_first, 0,
       CLARG(dev_in1), CLARG(width),
-      CLARG(height), CLARG(dev_m), CLARG(piece->pipe->dsc.filters), CLARG(roi_in->x), CLARG(roi_in->y),
+      CLARG(height), CLARG(dev_m), CLARG(piece->pipe->dsc.filters),
       CLLOCAL(sizeof(float) * 2 * flocopt.sizex * flocopt.sizey));
     err = dt_opencl_enqueue_kernel_2d_with_local(devid, gd->kernel_green_eq_favg_reduce_first, fsizes, flocal);
     if(err != CL_SUCCESS) goto error;
@@ -442,7 +438,7 @@ static int green_equilibration_cl(const dt_iop_module_t *self,
 
     err = dt_opencl_enqueue_kernel_2d_args(devid, gd->kernel_green_eq_favg_apply, width, height,
       CLARG(dev_in1), CLARG(dev_out1), CLARG(width), CLARG(height), CLARG(piece->pipe->dsc.filters),
-      CLARG(roi_in->x), CLARG(roi_in->y), CLARG(gr_ratio));
+      CLARG(gr_ratio));
     if(err != CL_SUCCESS) goto error;
   }
 
@@ -466,7 +462,7 @@ static int green_equilibration_cl(const dt_iop_module_t *self,
     size_t local[3] = { locopt.sizex, locopt.sizey, 1 };
     dt_opencl_set_kernel_args(devid, gd->kernel_green_eq_lavg, 0,
       CLARG(dev_in2), CLARG(dev_out2),
-      CLARG(width), CLARG(height), CLARG(piece->pipe->dsc.filters), CLARG(roi_in->x), CLARG(roi_in->y),
+      CLARG(width), CLARG(height), CLARG(piece->pipe->dsc.filters),
       CLARG(threshold), CLLOCAL(sizeof(float) * (locopt.sizex + 4) * (locopt.sizey + 4)));
     err = dt_opencl_enqueue_kernel_2d_with_local(devid, gd->kernel_green_eq_lavg, sizes, local);
     if(err != CL_SUCCESS) goto error;
diff --git a/src/iop/demosaicing/capture.c b/src/iop/demosaicing/capture.c
new file mode 100644
index 000000000000..d23993676a0f
--- /dev/null
+++ b/src/iop/demosaicing/capture.c
@@ -0,0 +1,832 @@
+/*
+    This file is part of darktable,
+    Copyright (C) 2025 darktable developers.
+
+    darktable is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    darktable is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with darktable.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/* remarks:
+    Credits go to: Ingo Weyrich (heckflosse67@gmx.de), he implemented the original algorithm for rawtherapee.
+
+   1) - The gaussian convolution filters take the coeffs from precalculated data in gd->gauss_coeffs,
+        we have CHAR_MAX kernels with a sigma step of CAPTURE_GAUSS_FRACTION.
+      - The chosen kernel is selected per pixel via an index map, this is derived from cs_radius, cs_boost
+        and distance from image centre.
+      - using the index map improves performance and allows runtime modification of the used per pixel
+        gaussian sigma.
+      - Note: this is similar to the per-tile sigma in the RT implementation.
+   2) It's currently not planned to increase the maximum sigma so we can stay with the 9x9 kernels.
+   3) Reminders and possibly left to do:
+      - halo supprssion at very strong gradients?
+      - automatic noise detection or reduction?
+      - can we auto-stop? per pixel?
+      - Internal CPU code tiling for performance? tile size would be the same as for rcd
+   4) Notes
+      - If the demosaicer downscales effects are less visible so it seems safe to reduce the
+        number of iterartions for performance.
+*/
+
+#ifdef __GNUC__
+  #pragma GCC push_options
+  #pragma GCC optimize ("fast-math", "fp-contract=fast", "finite-math-only", "no-math-errno")
+#endif
+
+#define CAPTURE_KERNEL_ALIGN 32
+#define CAPTURE_BLEND_EPS 0.01f
+#define CAPTURE_GAUSS_FRACTION 0.01f
+#define CAPTURE_YMIN 0.001f
+#define CAPTURE_THRESHPOWER 0.15f
+#define CAPTURE_CFACLIP 0.9f
+
+static inline void _calc_9x9_gauss_coeffs(float *coeffs, const float sigma)
+{
+  float kernel[9][9];
+  const float range = 4.5f * 4.5f;
+  const float temp = -2.0f * sigma * sigma;
+  float sum = 0.0;
+  for(int k = -4; k < 5; k++)
+  {
+    for(int j = -4; j < 5; j++)
+    {
+      const float rad = (float)(k*k + j*j);
+      if(rad <= range)
+      {
+        kernel[k + 4][j + 4] = expf(rad / temp);
+        sum += kernel[k + 4][j + 4];
+      }
+      else
+        kernel[k + 4][j + 4] = 0.0f;
+    }
+  }
+
+  for(int k = 0; k < 5; k++)
+    for(int j = 0; j < 5; j++)
+      coeffs[5*k+j] = kernel[k+4][j+4] / sum;
+}
+
+// provide an index map so the convolution kernels can easily get the correct coeffs
+static unsigned char *_cs_precalc_gauss_idx(dt_iop_module_t *self,
+                                   const dt_iop_roi_t *const roi,
+                                   const float isigma,
+                                   const float boost)
+{
+  const dt_image_t *img = &self->dev->image_storage;
+  const int rwidth = img->p_width / 2;
+  const int rheight = img->p_height / 2;
+  const float mdim = MIN(rwidth, rheight);
+  const int width = roi->width;
+  const int height = roi->height;
+  const int dy = roi->y;
+  const int dx = roi->x;
+  unsigned char *table = dt_alloc_aligned((size_t)height * width);
+  if(!table) return NULL;
+
+  DT_OMP_FOR()
+  for(int row = 0; row < height; row++)
+  {
+    const float frow = row + dy - rheight;
+    for(int col = 0; col < width; col++)
+    {
+      const float fcol = col + dx - rwidth;
+      const float sc = sqrtf(frow * frow + fcol * fcol) / mdim;
+      const float corr = MAX(0.0f, sc - 0.5f);
+      float sigma = isigma + boost * powf(corr, 1.5f);
+
+      // special care for the image borders
+      if(col < 8)               sigma *= (float)col * 0.125f;
+      else if(row < 8)          sigma *= (float)row * 0.125f;
+      else if(col > width - 8)  sigma *= (float)(width - col) * 0.125f;
+      else if(row > height - 8) sigma *= (float)(height - row) * 0.125f;
+
+      const int kern = sigma / CAPTURE_GAUSS_FRACTION;
+      table[row * width + col] = CLAMP(kern, 0, UCHAR_MAX);
+    }
+  }
+  return table;
+}
+
+#define RAWEPS 0.005f
+static float _calcRadiusBayer(const float *in,
+                              const int width,
+                              const int height,
+                              const float lowerLimit,
+                              const float upperLimit,
+                              const uint32_t filters)
+{
+  const unsigned int fc[2] = {FC(0, 0, filters), FC(1, 0, filters)};
+  float maxRatio = 1.f;
+  DT_OMP_FOR(reduction(max: maxRatio))
+  for(int row = 4; row < height - 4; ++row)
+  {
+    for(int col = 5 + (fc[row & 1] & 1); col < width - 4; col += 2)
+    {
+      const float *cfa = in + row*width + col;
+      const float val00 = cfa[0];
+      if(val00 > RAWEPS)
+      {
+        const float val1m1 = cfa[width-1];
+        const float val1p1 = cfa[width+1];
+        const float maxVal0 = MAX(val00, val1m1);
+        if(val1m1 > RAWEPS && maxVal0 > lowerLimit)
+        {
+          const float minVal = MIN(val00, val1m1);
+          if(maxVal0 > maxRatio * minVal)
+          {
+            gboolean clipped = FALSE;
+            if(maxVal0 == val00)
+            { // check for influence by clipped green in neighborhood
+              if(MAX(MAX(cfa[-width-1], cfa[-width+1]), val1p1) >= upperLimit)
+                clipped = TRUE;
+            }
+            else
+            { // check for influence by clipped green in neighborhood
+              if(MAX(MAX(MAX(cfa[-2], val00), cfa[2*width-2]), cfa[2*width]) >= upperLimit)
+                clipped = TRUE;
+            }
+            if(!clipped)
+              maxRatio = maxVal0 / minVal;
+          }
+        }
+
+        const float maxVal1 = MAX(val00, val1p1);
+        if(val1p1 > RAWEPS && maxVal1 > lowerLimit)
+        {
+          const float minVal = MIN(val00, val1p1);
+          if(maxVal1 > maxRatio * minVal)
+          {
+            if(maxVal1 == val00)
+            { // check for influence by clipped green in neighborhood
+              if(MAX(MAX(cfa[-width-1], cfa[-width+1]), val1p1) >= upperLimit)
+                continue;
+             }
+            else
+            { // check for influence by clipped green in neighborhood
+              if(MAX(MAX(MAX(val00, cfa[2]), cfa[2*width]), cfa[2*width+2]) >= upperLimit)
+                continue;
+             }
+            maxRatio = maxVal1 / minVal;
+          }
+        }
+      }
+    }
+  }
+  return sqrtf(1.0f / logf(maxRatio));
+}
+
+static float _calcRadiusXtrans(const float *in,
+                               const float lowerLimit,
+                               const float upperLimit,
+                               const dt_iop_roi_t *const roi,
+                               const uint8_t(*const xtrans)[6])
+{
+  const int width = roi->width;
+  const int height = roi->height;
+
+  int startx, starty;
+  gboolean found = FALSE;
+  for(starty = 6; starty < 12 && !found; starty++)
+  {
+    for(startx = 6; startx < 12 && !found; startx++)
+    {
+      if(FCxtrans(starty, startx, roi, xtrans) == 1)
+      {
+        if(FCxtrans(starty, startx - 1, roi, xtrans) != FCxtrans(starty, startx + 1, roi, xtrans))
+        {
+          if(FCxtrans(starty -1, startx, roi, xtrans) != 1)
+          {
+            if(FCxtrans(starty, startx -1, roi, xtrans) != 1)
+            {
+              found = TRUE;
+              break;
+            }
+          }
+        }
+      }
+    }
+  }
+
+  float maxRatio = 1.0f;
+  DT_OMP_FOR(reduction(max: maxRatio))
+  for(int row = starty + 2; row < height - 4; row += 3)
+  {
+    for(int col = startx + 2; col < width - 4; col += 3)
+    {
+      const float *cfa = in + row*width + col;
+      const float valp1p1 = cfa[width+1];
+      const gboolean squareClipped = MAX(MAX(MAX(valp1p1, cfa[width+2]), cfa[2*width+1]), cfa[2*width+2]) >= upperLimit;
+      const float greenSolitary = cfa[0];
+      if(greenSolitary > RAWEPS && MAX(cfa[-width-1], cfa[-width+1]) < upperLimit)
+      {
+        if(greenSolitary < upperLimit)
+        {
+          const float valp1m1 = cfa[width-1];
+          if(valp1m1 > RAWEPS && MAX(MAX(MAX(cfa[width-2], valp1m1), cfa[2*width-2]), cfa[width-1]) < upperLimit)
+          {
+            const float maxVal = MAX(greenSolitary, valp1m1);
+            if(maxVal > lowerLimit)
+            {
+              const float minVal = MIN(greenSolitary, valp1m1);
+              if(maxVal > maxRatio * minVal)
+                maxRatio = maxVal / minVal;
+            }
+          }
+          if(valp1p1 > RAWEPS && !squareClipped)
+          {
+            const float maxVal = MAX(greenSolitary, valp1p1);
+            if(maxVal > lowerLimit)
+            {
+              const float minVal = MIN(greenSolitary, valp1p1);
+              if(maxVal > maxRatio * minVal)
+                maxRatio = maxVal / minVal;
+            }
+          }
+        }
+      }
+
+      if(!squareClipped)
+      {
+        const float valp2p2 = cfa[2*width+2];
+        if(valp2p2 > RAWEPS)
+        {
+          if(valp1p1 > RAWEPS)
+          {
+            const float maxVal = MAX(valp1p1, valp2p2);
+            if(maxVal > lowerLimit)
+            {
+              const float minVal = MIN(valp1p1, valp2p2);
+              if(maxVal > maxRatio * minVal)
+                 maxRatio = maxVal / minVal;
+            }
+          }
+          const float greenSolitaryRight = cfa[3*width+3];
+          if(MAX(MAX(greenSolitaryRight, cfa[4*width+2]), cfa[4*width+4]) < upperLimit)
+          {
+            if(greenSolitaryRight > RAWEPS)
+            {
+              const float maxVal = MAX(greenSolitaryRight, valp2p2);
+              if(maxVal > lowerLimit)
+              {
+                const float minVal = MIN(greenSolitaryRight, valp2p2);
+                if(maxVal > maxRatio * minVal)
+                  maxRatio = maxVal / minVal;
+              }
+            }
+          }
+        }
+        const float valp1p2 = cfa[width+2];
+        const float valp2p1 = cfa[2*width+1];
+        if(valp2p1 > RAWEPS)
+        {
+          if(valp1p2 > RAWEPS)
+          {
+            const float maxVal = MAX(valp1p2, valp2p1);
+            if(maxVal > lowerLimit)
+            {
+              const float minVal = MIN(valp1p2, valp2p1);
+              if(maxVal > maxRatio * minVal)
+                maxRatio = maxVal / minVal;
+            }
+          }
+          const float greenSolitaryLeft = cfa[3*width];
+          if(MAX(MAX(greenSolitaryLeft, cfa[4*width-1]), cfa[4*width+1]) < upperLimit)
+          {
+            if(greenSolitaryLeft > RAWEPS)
+            {
+              const float maxVal = MAX(greenSolitaryLeft, valp2p1);
+              if(maxVal > lowerLimit)
+              {
+                const float minVal = MIN(greenSolitaryLeft, valp2p1);
+                if(maxVal > maxRatio * minVal)
+                  maxRatio = maxVal / minVal;
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  return sqrtf(1.0f / logf(maxRatio));
+}
+#undef RAWEPS
+
+DT_OMP_DECLARE_SIMD(aligned(in, out, kernels:64))
+static inline void _blur_mul(const float *const in,
+                             float *out,
+                             const float *blend,
+                             const float *const kernels,
+                             const unsigned char *const table,
+                             const int w1,
+                             const int height)
+{
+  const int w2 = 2 * w1;
+  const int w3 = 3 * w1;
+  const int w4 = 4 * w1;
+
+  DT_OMP_FOR()
+  for(int row = 0; row < height; row++)
+  {
+    for(int col = 0; col < w1; col++)
+    {
+      const size_t i = (size_t)row * w1 + col;
+      if(blend[i] > CAPTURE_BLEND_EPS)
+      {
+        const float *kern = kernels + CAPTURE_KERNEL_ALIGN * table[i];
+        float val = 0.0f;
+        if(col >= 4 && row >= 4 && col < w1 - 4 && row < height - 4)
+        {
+          const float *d = in + i;
+          val =
+              kern[10+4] * (d[-w4-2] + d[-w4+2] + d[-w2-4] + d[-w2+4] + d[w2-4] + d[w2+4] + d[w4-2] + d[w4+2]) +
+              kern[5 +4] * (d[-w4-1] + d[-w4+1] + d[-w1-4] + d[-w1+4] + d[w1-4] + d[w1+4] + d[w4-1] + d[w4+1]) +
+              kern[4]    * (d[-w4  ] + d[   -4] + d[    4] + d[ w4  ]) +
+              kern[15+3] * (d[-w3-3] + d[-w3+3] + d[ w3-3] + d[ w3+3]) +
+              kern[10+3] * (d[-w3-2] + d[-w3+2] + d[-w2-3] + d[-w2+3] + d[w2-3] + d[w2+3] + d[w3-2] + d[w3+2]) +
+              kern[ 5+3] * (d[-w3-1] + d[-w3+1] + d[-w1-3] + d[-w1+3] + d[w1-3] + d[w1+3] + d[w3-1] + d[w3+1]) +
+              kern[   3] * (d[-w3  ] + d[   -3] + d[    3] + d[ w3  ]) +
+              kern[10+2] * (d[-w2-2] + d[-w2+2] + d[ w2-2] + d[ w2+2]) +
+              kern[ 5+2] * (d[-w2-1] + d[-w2+1] + d[-w1-2] + d[-w1+2] + d[w1-2] + d[w1+2] + d[w2-1] + d[w2+1]) +
+              kern[   2] * (d[-w2  ] + d[   -2] + d[    2] + d[ w2  ]) +
+              kern[ 5+1] * (d[-w1-1] + d[-w1+1] + d[ w1-1] + d[ w1+1]) +
+              kern[   1] * (d[-w1  ] + d[   -1] + d[    1] + d[ w1  ]) +
+              kern[   0] * (d[0]);
+        }
+        else
+        {
+          for(int ir = -4; ir <= 4; ir++)
+          {
+            const int irow = row+ir;
+            if(irow >= 0 && irow < height)
+            {
+              for(int ic = -4; ic <= 4; ic++)
+              {
+                const int icol = col+ic;
+                if(icol >=0 && icol < w1)
+                  val += kern[5 * ABS(ir) + ABS(ic)] * in[(size_t)irow * w1 + icol];
+              }
+            }
+          }
+        }
+        out[i] *= val;
+      }
+      // if blend value is too low we don't have to copy data as we also didn't in _blur_div
+      // and we just keep the original
+    }
+  }
+}
+
+DT_OMP_DECLARE_SIMD(aligned(in, out, divbuff, kernels :64))
+static inline void _blur_div(const float *const in,
+                             float *out,
+                             const float *const divbuff,
+                             const float *blend,
+                             const float *const kernels,
+                             const unsigned char *const table,
+                             const int w1,
+                             const int height)
+{
+  const int w2 = 2 * w1;
+  const int w3 = 3 * w1;
+  const int w4 = 4 * w1;
+
+  DT_OMP_FOR()
+  for(int row = 0; row < height; row++)
+  {
+    for(int col = 0; col < w1; col++)
+    {
+      const size_t i = (size_t)row * w1 + col;
+      if(blend[i] > CAPTURE_BLEND_EPS)
+      {
+        const float *kern = kernels + CAPTURE_KERNEL_ALIGN * table[i];
+        float val = 0.0f;
+        if(col >= 4 && row >= 4 && col < w1 - 4 && row < height - 4)
+        {
+          const float *d = in + i;
+          val =
+              kern[10+4] * (d[-w4-2] + d[-w4+2] + d[-w2-4] + d[-w2+4] + d[w2-4] + d[w2+4] + d[w4-2] + d[w4+2]) +
+              kern[5 +4] * (d[-w4-1] + d[-w4+1] + d[-w1-4] + d[-w1+4] + d[w1-4] + d[w1+4] + d[w4-1] + d[w4+1]) +
+              kern[4]    * (d[-w4  ] + d[   -4] + d[    4] + d[ w4  ]) +
+              kern[15+3] * (d[-w3-3] + d[-w3+3] + d[ w3-3] + d[ w3+3]) +
+              kern[10+3] * (d[-w3-2] + d[-w3+2] + d[-w2-3] + d[-w2+3] + d[w2-3] + d[w2+3] + d[w3-2] + d[w3+2]) +
+              kern[ 5+3] * (d[-w3-1] + d[-w3+1] + d[-w1-3] + d[-w1+3] + d[w1-3] + d[w1+3] + d[w3-1] + d[w3+1]) +
+              kern[   3] * (d[-w3  ] + d[   -3] + d[    3] + d[ w3  ]) +
+              kern[10+2] * (d[-w2-2] + d[-w2+2] + d[ w2-2] + d[ w2+2]) +
+              kern[ 5+2] * (d[-w2-1] + d[-w2+1] + d[-w1-2] + d[-w1+2] + d[w1-2] + d[w1+2] + d[w2-1] + d[w2+1]) +
+              kern[   2] * (d[-w2  ] + d[   -2] + d[    2] + d[ w2  ]) +
+              kern[ 5+1] * (d[-w1-1] + d[-w1+1] + d[ w1-1] + d[ w1+1]) +
+              kern[   1] * (d[-w1  ] + d[   -1] + d[    1] + d[ w1  ]) +
+              kern[   0] * (d[0]);
+        }
+        else
+        {
+          for(int ir = -4; ir <= 4; ir++)
+          {
+            const int irow = row+ir;
+            if(irow >= 0 && irow < height)
+            {
+              for(int ic = -4; ic <= 4; ic++)
+              {
+                const int icol = col+ic;
+                if(icol >=0 && icol < w1)
+                  val += kern[5 * ABS(ir) + ABS(ic)] * in[(size_t)irow * w1 + icol];
+              }
+            }
+          }
+        }
+        out[i] = divbuff[i] / MAX(val, 0.00001f);
+      }
+    }
+  }
+}
+
+static void _prepare_blend(const float *cfa,
+                           const float *rgb,
+                           const uint32_t filters,
+                           const uint8_t (*const xtrans)[6],
+                           const dt_iop_roi_t *const roi,
+                           float *mask,
+                           float *Yold,
+                           const float *whites,
+                           const int w1,
+                           const int height)
+{
+  dt_iop_image_fill(mask, 1.0f, w1, height, 1);
+  const int w2 = 2 * w1;
+  DT_OMP_FOR(collapse(2))
+  for(size_t row = 0; row < height; row++)
+  {
+    for(size_t col = 0; col < w1; col++)
+    {
+      const size_t k = row * w1 + col;
+      // Photometric/digital ITU BT.709
+      const float Y = MAX(0.0f, 0.2126f*rgb[k*4] + 0.7152f*rgb[k*4+1] + 0.0722f*rgb[k*4+2]);
+      Yold[k] = Y;
+      if(row > 1 && col > 1 && row < height-2 && col < w1-2)
+      {
+        const int color = (filters == 9u) ? FCxtrans(row, col, roi, xtrans) : FC(row, col, filters);
+        if(cfa[k] > whites[color] || Y < CAPTURE_YMIN)
+        {
+          mask[k-w2-1] = mask[k-w2]   = mask[k-w2+1] =
+          mask[k-w1-2] = mask[k-w1-1] = mask[k-w1]   = mask[k-w1+1] = mask[k-w1+2] =
+          mask[k-2]    = mask[k-1]    = mask[k]      = mask[k+1]    = mask[k+2] =
+          mask[k+w1-2] = mask[k+w1-1] = mask[k+w1]   = mask[k+w1+1] = mask[k+w1+2] =
+          mask[k+w2-1] = mask[k+w2]   = mask[k+w2+1] = 0.0f;
+        }
+      }
+      else
+        mask[k] = 0.0f;
+    }
+  }
+}
+
+static void _modify_blend(float *blend,
+                          float *Yold,
+                          float *luminance,
+                          const float threshold,
+                          const int width,
+                          const int height)
+{
+  DT_OMP_FOR()
+  for(int irow = 0; irow < height; irow++)
+  {
+    const int row = CLAMP(irow, 2, height-3);
+    for(int icol = 0; icol < width; icol++)
+    {
+      const int col = CLAMP(icol, 2, width-3);
+      const size_t k = (size_t)irow * width + icol;
+      float av = 0.0f;
+      for(int y = row-1; y < row+2; y++)
+      {
+        for(int x = col-2; x < col+3; x++)
+          av += Yold[(size_t)y*width + x];
+      }
+      for(int x = col-1; x < col+2; x++)
+      {
+        av += Yold[(size_t)(row-2)*width + x];
+        av += Yold[(size_t)(row+2)*width + x];
+      }
+      av /= 21.0f;
+
+      float sv = 0.0f;
+      for(int y = row-1; y < row+2; y++)
+      {
+        for(int x = col-2; x < col+3; x++)
+          sv += sqrf(Yold[(size_t)y*width + x] - av);
+      }
+      for(int x = col-1; x < col+2; x++)
+      {
+        sv += sqrf(Yold[(size_t)(row-2)*width + x] - av);
+        sv += sqrf(Yold[(size_t)(row+2)*width + x] - av);
+      }
+      sv = powf(MAX(0.0f, 5.0f * sqrtf(sv / 21.f) - threshold), CAPTURE_THRESHPOWER);
+      blend[k] *= CLIP(sv);
+      luminance[k] = Yold[k];
+    }
+  }
+}
+
+static inline float _get_threshold(const float threshold)
+{
+  return 0.06f * sqrf(threshold);
+}
+
+void _capture_sharpen(dt_iop_module_t *self,
+                      dt_dev_pixelpipe_iop_t *piece,
+                      float *in,
+                      float *out,
+                      const dt_iop_roi_t *const roi,
+                      const gboolean showmask)
+{
+  dt_dev_pixelpipe_t *pipe = piece->pipe;
+
+  const size_t width = roi->width;
+  const size_t height = roi->height;
+  const size_t pixels = width * height;
+  const dt_iop_demosaic_data_t *d = piece->data;
+  const dt_iop_demosaic_global_data_t *gd = self->global_data;
+  dt_iop_demosaic_gui_data_t *g = self->gui_data;
+
+  if(pipe->type & DT_DEV_PIXELPIPE_THUMBNAIL)
+  {
+    const gboolean hqthumb = _get_thumb_quality(pipe->final_width, pipe->final_height);
+    if(!hqthumb) return;
+  }
+
+  const int iterations = powf((float)d->cs_strength, 1.3f);
+  if(iterations < 1 && !showmask) return;
+
+  const uint8_t(*const xtrans)[6] = (const uint8_t(*const)[6])pipe->dsc.xtrans;
+  const uint32_t filters = pipe->dsc.filters;
+  const dt_iop_buffer_dsc_t *dsc = &pipe->dsc;
+  const gboolean wbon = dsc->temperature.enabled;
+  const dt_aligned_pixel_t icoeffs = { wbon ? CAPTURE_CFACLIP * dsc->temperature.coeffs[0] : CAPTURE_CFACLIP,
+                                       wbon ? CAPTURE_CFACLIP * dsc->temperature.coeffs[1] : CAPTURE_CFACLIP,
+                                       wbon ? CAPTURE_CFACLIP * dsc->temperature.coeffs[2] : CAPTURE_CFACLIP,
+                                       0.0f };
+  const gboolean fullpipe = pipe->type & DT_DEV_PIXELPIPE_FULL;
+  const gboolean autoradius = fullpipe && g && g->autoradius;
+  const float old_radius = d->cs_radius;
+  float radius = old_radius;
+  if(autoradius || radius < 0.005f)
+  {
+    radius = filters != 9u
+              ? _calcRadiusBayer(in, width, height, 0.01f, 1.0f, filters)
+              : _calcRadiusXtrans(in, 0.01f, 1.0f, roi, xtrans);
+
+    dt_print_pipe(DT_DEBUG_PIPE, filters != 9u ? "bayer autoradius" : "xtrans autoradius",
+      pipe, self, DT_DEVICE_CPU, roi, NULL, "autoradius=%.2f", radius);
+
+    if(!feqf(radius, old_radius, 0.002f))
+    {
+      if(fullpipe)
+      {
+        if(g)
+        {
+          dt_control_log(_("calculated radius: %.2f"), radius);
+          g->autoradius = TRUE;
+        }
+        dt_iop_demosaic_params_t *p = self->params;
+        p->cs_radius = radius;
+      }
+    }
+    else if(g) g->autoradius = FALSE;
+  }
+
+  unsigned char *gauss_idx = NULL;
+  gboolean error = TRUE;
+
+  float *luminance = dt_alloc_align_float(pixels);
+  float *tmp2 = dt_alloc_align_float(pixels);
+  float *tmp1 = dt_alloc_align_float(pixels);
+  float *blendmask = dt_alloc_align_float(pixels);
+  if(!luminance || !tmp2 || !tmp1 || !blendmask)
+    goto finalize;
+
+  const float threshold = _get_threshold(d->cs_thrs);
+
+  // tmp2 will hold the temporary clipmask, tmp1 holds Y data
+  _prepare_blend(in, out, filters, xtrans, roi, tmp2, tmp1, icoeffs, width, height);
+  // modify clipmask in tmp2 according to Y variance, also write L to luminance
+  _modify_blend(tmp2, tmp1, luminance, threshold, width, height);
+  dt_gaussian_fast_blur(tmp2, blendmask, width, height, 2.0f, 0.0f, 1.0f, 1);
+
+  if(showmask)
+  {
+    DT_OMP_FOR()
+    for(size_t k = 0; k < pixels*4; k +=4)
+    {
+      const float blend = blendmask[k/4];
+      out[k+3] = blend < CAPTURE_BLEND_EPS ? 0.0f : blend;
+    }
+    error = FALSE;
+    goto finalize;
+  }
+
+  gauss_idx = _cs_precalc_gauss_idx(self, roi, radius, d->cs_boost);
+  if(!gauss_idx) goto finalize;
+
+  for(int iter = 0; iter < iterations && !dt_pipe_shutdown(pipe); iter++)
+  {
+    _blur_div(tmp1, tmp2, luminance, blendmask, gd->gauss_coeffs, gauss_idx, width, height);
+    _blur_mul(tmp2, tmp1, blendmask, gd->gauss_coeffs, gauss_idx, width, height);
+  }
+
+  DT_OMP_FOR()
+  for(size_t k = 0; k < (size_t)width * height; k++)
+  {
+    if(blendmask[k] > CAPTURE_BLEND_EPS)
+    {
+      const float luminance_new = interpolatef(blendmask[k], tmp1[k], luminance[k]);
+      const float factor = luminance_new / MAX(luminance[k], 0.00001f);
+      for_three_channels(c) out[k*4+c] *= factor;
+    }
+  }
+
+  error = FALSE;
+
+  finalize:
+  if(error)
+    dt_print_pipe(DT_DEBUG_ALWAYS, "capture sharpen failed", pipe, self, DT_DEVICE_CPU, NULL, NULL,
+      "unable to allocate memory");
+
+  dt_free_align(gauss_idx);
+  dt_free_align(tmp2);
+  dt_free_align(tmp1);
+  dt_free_align(luminance);
+  dt_free_align(blendmask);
+}
+
+// revert aggressive optimizing
+#ifdef __GNUC__
+  #pragma GCC pop_options
+#endif
+
+#if HAVE_OPENCL
+
+int _capture_sharpen_cl(dt_iop_module_t *self,
+                        dt_dev_pixelpipe_iop_t *piece,
+                        cl_mem dev_in,
+                        cl_mem dev_out,
+                        const dt_iop_roi_t *const roi,
+                        const gboolean showmask)
+{
+  dt_dev_pixelpipe_t *pipe = piece->pipe;
+
+  const int width = roi->width;
+  const int height = roi->height;
+  const int bsize = sizeof(float) * width * height;
+  const int devid = piece->pipe->devid;
+
+  const dt_iop_demosaic_data_t *d = piece->data;
+  dt_iop_demosaic_global_data_t *const gd = self->global_data;
+  dt_iop_demosaic_gui_data_t *g = self->gui_data;
+
+  if(pipe->type & DT_DEV_PIXELPIPE_THUMBNAIL)
+  {
+    const gboolean hqthumb = _get_thumb_quality(pipe->final_width, pipe->final_height);
+    if(!hqthumb) return CL_SUCCESS;
+  }
+
+  const int iterations = powf((float)d->cs_strength, 1.3f);
+  if(iterations < 1 && !showmask) return CL_SUCCESS;
+
+  const uint32_t filters = pipe->dsc.filters;
+  const dt_iop_buffer_dsc_t *dsc = &pipe->dsc;
+  const gboolean wbon = dsc->temperature.enabled;
+  dt_aligned_pixel_t icoeffs = { wbon ? CAPTURE_CFACLIP * dsc->temperature.coeffs[0] : CAPTURE_CFACLIP,
+                                 wbon ? CAPTURE_CFACLIP * dsc->temperature.coeffs[1] : CAPTURE_CFACLIP,
+                                 wbon ? CAPTURE_CFACLIP * dsc->temperature.coeffs[2] : CAPTURE_CFACLIP,
+                                 0.0f };
+
+  const gboolean fullpipe = pipe->type & DT_DEV_PIXELPIPE_FULL;
+  const gboolean autoradius = fullpipe && g && g->autoradius;
+  const float old_radius = d->cs_radius;
+  float radius = old_radius;
+  if(autoradius || radius < 0.005f)
+  {
+    float *in = dt_alloc_align_float((size_t)width * height);
+    if(in)
+    {
+      if(dt_opencl_copy_device_to_host(devid, in, dev_in, width, height, sizeof(float)) == CL_SUCCESS)
+      {
+        radius = filters != 9u
+                ? _calcRadiusBayer(in, width, height, 0.01f, 1.0f, filters)
+                : _calcRadiusXtrans(in, 0.01f, 1.0f, roi, (const uint8_t(*const)[6])pipe->dsc.xtrans);
+        dt_print_pipe(DT_DEBUG_PIPE, filters != 9u ? "bayer autoradius" : "xtrans autoradius",
+            pipe, self, devid, roi, NULL, "autoradius=%.2f", radius);
+
+        if(!feqf(radius, old_radius, 0.002f))
+        {
+          if(fullpipe)
+          {
+            if(g)
+            {
+              dt_control_log(_("calculated radius: %.2f"), radius);
+              g->autoradius = TRUE;
+            }
+            dt_iop_demosaic_params_t *p = self->params;
+            p->cs_radius = radius;
+          }
+        }
+        else if(g) g->autoradius = FALSE;
+      }
+      dt_free_align(in);
+    }
+  }
+
+  const float threshold = _get_threshold(d->cs_thrs);
+
+  cl_mem gcoeffs = NULL;
+  cl_mem gauss_idx = NULL;
+
+  cl_int err = CL_MEM_OBJECT_ALLOCATION_FAILURE;
+  cl_mem blendmask = dt_opencl_alloc_device_buffer(devid, bsize);
+  cl_mem luminance = dt_opencl_alloc_device_buffer(devid, bsize);
+  cl_mem tmp2 = dt_opencl_alloc_device_buffer(devid, bsize);
+  cl_mem tmp1 = dt_opencl_alloc_device_buffer(devid, bsize);
+  cl_mem xtrans = dt_opencl_copy_host_to_device_constant(devid, sizeof(pipe->dsc.xtrans), pipe->dsc.xtrans);
+  cl_mem whites = dt_opencl_copy_host_to_device_constant(devid, 4 * sizeof(float), icoeffs);
+  cl_mem dev_rgb = dt_opencl_duplicate_image(devid, dev_out);
+
+  if(!blendmask || !luminance || !tmp2 || !tmp1 || !xtrans || !whites || !dev_rgb) goto finish;
+
+  err = dt_opencl_enqueue_kernel_2d_args(devid, gd->prefill_clip_mask, width, height,
+          CLARG(tmp2), CLARG(width), CLARG(height));
+  if(err != CL_SUCCESS) goto finish;
+
+  err = dt_opencl_enqueue_kernel_2d_args(devid, gd->prepare_blend, width, height,
+          CLARG(dev_in), CLARG(dev_out), CLARG(filters), CLARG(xtrans), CLARG(tmp2), CLARG(tmp1),
+          CLARG(whites), CLARG(width), CLARG(height));
+  if(err != CL_SUCCESS) goto finish;
+
+  err = dt_opencl_enqueue_kernel_2d_args(devid, gd->modify_blend, width, height,
+          CLARG(tmp2), CLARG(tmp1), CLARG(luminance), CLARG(threshold), CLARG(width), CLARG(height));
+  if(err != CL_SUCCESS) goto finish;
+
+  err = dt_gaussian_fast_blur_cl_buffer(devid, tmp2, blendmask, width, height, 2.0f, 1, 0.0f, 1.0f);
+  if(err != CL_SUCCESS) goto finish;
+
+  if(showmask)
+  {
+    err = dt_opencl_enqueue_kernel_2d_args(devid, gd->show_blend_mask, width, height,
+          CLARG(dev_rgb), CLARG(dev_out), CLARG(blendmask), CLARG(width), CLARG(height));
+    goto finish;
+  }
+
+  unsigned char *f_gauss_idx = _cs_precalc_gauss_idx(self, roi, radius, d->cs_boost);
+  if(f_gauss_idx)
+  {
+    gcoeffs = dt_opencl_copy_host_to_device_constant(devid, sizeof(float) * (UCHAR_MAX+1) * CAPTURE_KERNEL_ALIGN, gd->gauss_coeffs);
+    gauss_idx = dt_opencl_copy_host_to_device_constant(devid, sizeof(unsigned char) * height * width, f_gauss_idx);
+  }
+  dt_free_align(f_gauss_idx);
+
+  err = CL_MEM_OBJECT_ALLOCATION_FAILURE;
+  if(!gcoeffs || !gauss_idx) goto finish;
+
+  for(int iter = 0; iter < iterations && !dt_pipe_shutdown(pipe); iter++)
+  {
+    err = dt_opencl_enqueue_kernel_2d_args(devid, gd->gaussian_9x9_div, width, height,
+      CLARG(tmp1), CLARG(tmp2), CLARG(luminance), CLARG(blendmask),
+      CLARG(gcoeffs), CLARG(gauss_idx), CLARG(width), CLARG(height));
+    if(err != CL_SUCCESS) goto finish;
+
+    err = dt_opencl_enqueue_kernel_2d_args(devid, gd->gaussian_9x9_mul, width, height,
+      CLARG(tmp2), CLARG(tmp1), CLARG(blendmask),
+      CLARG(gcoeffs), CLARG(gauss_idx), CLARG(width), CLARG(height));
+    if(err != CL_SUCCESS) goto finish;
+  }
+
+  err = dt_opencl_enqueue_kernel_2d_args(devid, gd->capture_result, width, height,
+    CLARG(dev_rgb), CLARG(dev_out), CLARG(blendmask), CLARG(luminance), CLARG(tmp1),
+    CLARG(width), CLARG(height));
+
+  finish:
+  if(err != CL_SUCCESS)
+    dt_print_pipe(DT_DEBUG_ALWAYS, "capture sharpen failed",
+      pipe, self, devid, NULL, NULL,
+      "Error: %s", cl_errstr(err));
+
+  dt_opencl_release_mem_object(gauss_idx);
+  dt_opencl_release_mem_object(gcoeffs);
+  dt_opencl_release_mem_object(blendmask);
+  dt_opencl_release_mem_object(dev_rgb);
+  dt_opencl_release_mem_object(tmp2);
+  dt_opencl_release_mem_object(tmp1);
+  dt_opencl_release_mem_object(luminance);
+  dt_opencl_release_mem_object(xtrans);
+  dt_opencl_release_mem_object(whites);
+
+  return err;
+}
+#endif // OpenCL
diff --git a/src/iop/demosaicing/vng.c b/src/iop/demosaicing/vng.c
index d4f02ca02874..10970b9504f6 100644
--- a/src/iop/demosaicing/vng.c
+++ b/src/iop/demosaicing/vng.c
@@ -342,10 +342,14 @@ static cl_int process_vng_cl(const dt_iop_module_t *self,
   const int prow = (filters4 == 9u) ? 6 : 8;
   const int pcol = (filters4 == 9u) ? 6 : 2;
   const int devid = piece->pipe->devid;
+  const int width = roi_in->width;
+  const int height = roi_in->height;
 
   int *ips = NULL;
 
-  cl_mem dev_tmp = NULL;
+  cl_mem dev_tmp = only_vng_linear ? dev_out : dt_opencl_alloc_device(devid, width, height, sizeof(float) * 4);
+  if(dev_tmp == NULL) dev_tmp = dev_out;
+
   cl_mem dev_xtrans = NULL;
   cl_mem dev_lookup = NULL;
   cl_mem dev_code = NULL;
@@ -486,15 +490,6 @@ static cl_int process_vng_cl(const dt_iop_module_t *self,
     dev_ips = dt_opencl_copy_host_to_device_constant(devid, ips_size, ips);
     if(dev_ips == NULL) goto finish;
 
-    int width = roi_in->width;
-    int height = roi_in->height;
-
-    // need to reserve scaled auxiliary buffer or use dev_out
-    err = CL_MEM_OBJECT_ALLOCATION_FAILURE;
-
-    dev_tmp = dt_opencl_alloc_device(devid, width, height, sizeof(float) * 4);
-    if(dev_tmp == NULL) goto finish;
-
     // manage borders for linear interpolation part
     int border = 1;
     err = dt_opencl_enqueue_kernel_2d_args(devid, gd->kernel_vng_border_interpolate, width, height,
@@ -524,13 +519,8 @@ static cl_int process_vng_cl(const dt_iop_module_t *self,
       if(err != CL_SUCCESS) goto finish;
     }
 
-
-    if(only_vng_linear)
+    if(dev_tmp == dev_out)
     {
-      // leave it at linear interpolation and skip VNG
-      size_t origin[] = { 0, 0, 0 };
-      size_t region[] = { width, height, 1 };
-      err = dt_opencl_enqueue_copy_image(devid, dev_tmp, dev_out, origin, origin, region);
       goto finish;
     }
     else
@@ -577,7 +567,7 @@ static cl_int process_vng_cl(const dt_iop_module_t *self,
     }
 
 finish:
-  dt_opencl_release_mem_object(dev_tmp);
+  if(dev_tmp != dev_out) dt_opencl_release_mem_object(dev_tmp);
   dt_opencl_release_mem_object(dev_xtrans);
   dt_opencl_release_mem_object(dev_lookup);
   free(lookup);
diff --git a/src/iop/pipescale.c b/src/iop/pipescale.c
new file mode 100644
index 000000000000..77daa6dd7c51
--- /dev/null
+++ b/src/iop/pipescale.c
@@ -0,0 +1,200 @@
+/*
+    This file is part of darktable,
+    Copyright (C) 2025 darktable developers.
+
+    darktable is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    darktable is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with darktable.  If not, see <http://www.gnu.org/licenses/>.
+*/
+#include "bauhaus/bauhaus.h"
+#include "common/interpolation.h"
+#include "common/opencl.h"
+#include "common/imagebuf.h"
+#include "develop/imageop.h"
+#include "develop/imageop_math.h"
+#include "develop/tiling.h"
+#include "iop/iop_api.h"
+
+DT_MODULE_INTROSPECTION(1, dt_iop_pipescale_params_t)
+
+typedef struct dt_iop_pipescale_params_t
+{
+  int dummy;
+} dt_iop_pipescale_params_t;
+
+typedef dt_iop_pipescale_params_t dt_iop_pipescale_data_t;
+
+typedef struct dt_iop_pipescale_gui_data_t
+{
+  int dummy;
+} dt_iop_pipescale_gui_data_t;
+
+const char *name()
+{
+  return _("pipe scale");
+}
+
+const char **description(dt_iop_module_t *self)
+{
+  return dt_iop_set_description(self, _("crop and scale sensor data to current region of interest"),
+                                      _("mandatory"),
+                                      _("linear, RGB, scene-referred"),
+                                      _("linear, RGB"),
+                                      _("linear, RGB, scene-referred"));
+}
+
+int flags()
+{
+  return IOP_FLAGS_ALLOW_TILING | IOP_FLAGS_TILING_FULL_ROI
+    | IOP_FLAGS_ONE_INSTANCE | IOP_FLAGS_NO_HISTORY_STACK;
+}
+
+int default_group()
+{
+  return IOP_GROUP_BASIC;
+}
+
+
+dt_iop_colorspace_type_t default_colorspace(dt_iop_module_t *self,
+                                            dt_dev_pixelpipe_t *pipe,
+                                            dt_dev_pixelpipe_iop_t *piece)
+{
+  return IOP_CS_RGB;
+}
+
+void modify_roi_out(dt_iop_module_t *self,
+                    dt_dev_pixelpipe_iop_t *piece,
+                    dt_iop_roi_t *roi_out,
+                    const dt_iop_roi_t *const roi_in)
+{
+  *roi_out = *roi_in;
+  roi_out->x = 0;
+  roi_out->y = 0;
+}
+
+void modify_roi_in(dt_iop_module_t *self,
+                   dt_dev_pixelpipe_iop_t *piece,
+                   const dt_iop_roi_t *const roi_out,
+                   dt_iop_roi_t *roi_in)
+{
+  *roi_in = *roi_out;
+  roi_in->scale = 1.0f;
+  roi_in->x = 0;
+  roi_in->y = 0;
+  roi_in->width = piece->buf_in.width;
+  roi_in->height = piece->buf_in.height;
+}
+
+void tiling_callback(dt_iop_module_t *self,
+                     dt_dev_pixelpipe_iop_t *piece,
+                     const dt_iop_roi_t *roi_in,
+                     const dt_iop_roi_t *roi_out,
+                     struct dt_develop_tiling_t *tiling)
+{
+  const float ioratio
+      = (float)(roi_out->width * roi_out->height) / (float)(roi_in->width * roi_in->height);
+
+  tiling->factor = 1.0f + ioratio;
+  tiling->factor += ioratio != 1.0f ? 0.5f : 0.0f; // approximate extra requirements for interpolation
+  tiling->factor_cl = tiling->factor;
+  tiling->maxbuf = 1.0f;
+  tiling->maxbuf_cl = tiling->maxbuf;
+  tiling->overhead = 0;
+
+  tiling->overlap = 4;
+  tiling->xalign = 1;
+  tiling->yalign = 1;
+}
+
+void distort_mask(dt_iop_module_t *self,
+                  dt_dev_pixelpipe_iop_t *piece,
+                  const float *const in,
+                  float *const out,
+                  const dt_iop_roi_t *const roi_in,
+                  const dt_iop_roi_t *const roi_out)
+{
+  if(roi_out->scale != roi_in->scale)
+  {
+    const dt_interpolation_t *itor = dt_interpolation_new(DT_INTERPOLATION_USERPREF_WARP);
+    dt_interpolation_resample_1c(itor, out, roi_out, in, roi_in);
+  }
+  else
+    dt_iop_copy_image_roi(out, in, 1, roi_in, roi_out);
+}
+
+#ifdef HAVE_OPENCL
+int process_cl(dt_iop_module_t *self,
+               dt_dev_pixelpipe_iop_t *piece,
+               cl_mem dev_in, cl_mem dev_out,
+               const dt_iop_roi_t *const roi,
+               const dt_iop_roi_t *const roo)
+{
+  const int devid = piece->pipe->devid;
+  if(roo->width == roi->width && roo->height == roi->height && roi->scale == roo->scale)
+  {
+    size_t origin[] = { 0, 0, 0 };
+    size_t region[] = { roo->width, roo->height, 1 };
+    return dt_opencl_enqueue_copy_image(devid, dev_in, dev_out, origin, origin, region);
+  }
+  else
+    return dt_iop_clip_and_zoom_cl(devid, dev_out, dev_in, roo, roi);
+}
+#endif
+
+void process(dt_iop_module_t *self,
+             dt_dev_pixelpipe_iop_t *piece,
+             const void *const ivoid,
+             void *const ovoid,
+             const dt_iop_roi_t *const roi,
+             const dt_iop_roi_t *const roo)
+{
+  if(roo->width == roi->width && roo->height == roi->height && roi->scale == roo->scale)
+    dt_iop_copy_image_roi((float *)ovoid, (float *)ivoid, 4, roi, roo);
+  else
+    dt_iop_clip_and_zoom((float *)ovoid, (float *)ivoid, roo, roi);
+}
+
+void init_pipe(dt_iop_module_t *self,
+               dt_dev_pixelpipe_t *pipe,
+               dt_dev_pixelpipe_iop_t *piece)
+{
+  piece->data = calloc(1, sizeof(dt_iop_pipescale_data_t));
+}
+
+void cleanup_pipe(dt_iop_module_t *self,
+                  dt_dev_pixelpipe_t *pipe,
+                  dt_dev_pixelpipe_iop_t *piece)
+{
+  free(piece->data);
+  piece->data = NULL;
+}
+
+void init(dt_iop_module_t *self)
+{
+  self->params = calloc(1, sizeof(dt_iop_pipescale_params_t));
+  self->default_params = calloc(1, sizeof(dt_iop_pipescale_params_t));
+  self->default_enabled = TRUE;
+  self->hide_enable_button = TRUE;
+  self->params_size = sizeof(dt_iop_pipescale_params_t);
+}
+
+void gui_init(dt_iop_module_t *self)
+{
+  IOP_GUI_ALLOC(pipescale);
+  self->widget = dt_ui_label_new("");
+}
+
+// clang-format off
+// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py
+// vim: shiftwidth=2 expandtab tabstop=2 cindent
+// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;
+// clang-format on