From 279e7b19856a100d1708b30f9500f7d4c6efeea3 Mon Sep 17 00:00:00 2001
From: Hanno Schwalm <hanno@schwalm-bremen.de>
Date: Thu, 19 Jun 2025 07:39:56 +0200
Subject: [PATCH 1/3] Implement capture sharpening inside demosaic module

Capture sharpening has been implemented to work inside the demosaic module so it's raw only.
Credits to: Ingo Weyrich (heckflosse67@gmx.de), he implemented the original algorithm for rawtherapee,
this implementation is based on his work, especially the convolution kernels.
CPU and OpenCL code paths are both available.
Demosaic module gets more parameters so there is a version bump, one still unused float parameter has been reserved.

A "mini manual"

Capture sharpening (CS) tries to recover details lost due to in-camera blurring, which can be caused by diffraction,
the anti-aliasing filter or other sources of gaussian-type blur.
Prerequisites are
- good white balance parameters (same requirement as for highlights reconstruction or demosaic)
- no chromatic aberration, you might want to add the "raw chromatic aberration" module
- sensor noise will be amplified by CS

controls:
1. capture sharpen
   switches CS on if above zero and defines the strength of overall effect.
   CS works in an iterative process, this defines the number of iterations, mostly a setting of 10 will be enough.
2. radius
   defines the basic convolution gaussian sigma.
   This should not be set by "creative means" but to the blurring radius of the optical system and sensor, too large
   values will lead to artifacts like halos.
   Calculating a correct radius is provided internally. This will be done either if you
   a) click on the button besides the slider
   b) activate capture sharpen the first time after resetting to demosaic defaults or developing old edits.
3. contrast threshold
   As sensor noise will be amplified by CS we take some care about this by a per pixel variance analysis
   and restrict CS to locations with higher variance. The default is good for low to medium ISO images.
4. corner boost
   Increase the radius in image corners. We assume a circle of 1/2 of image size to be "sharp" (only use main radius),
   locations outside this center circle get an increased convolution radius.
---
 data/kernels/capture.cl       | 285 ++++++++++++
 data/kernels/programs.conf    |   1 +
 src/iop/demosaic.c            | 293 ++++++++++--
 src/iop/demosaicing/capture.c | 832 ++++++++++++++++++++++++++++++++++
 4 files changed, 1371 insertions(+), 40 deletions(-)
 create mode 100644 data/kernels/capture.cl
 create mode 100644 src/iop/demosaicing/capture.c

diff --git a/data/kernels/capture.cl b/data/kernels/capture.cl
new file mode 100644
index 000000000000..5c311163dd31
--- /dev/null
+++ b/data/kernels/capture.cl
@@ -0,0 +1,285 @@
+/*
+    This file is part of darktable,
+    copyright (c) 2025 darktable developer.
+
+    darktable is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    darktable is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with darktable.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "common.h"
+
+#define CAPTURE_KERNEL_ALIGN 32
+#define CAPTURE_BLEND_EPS 0.01f
+#define CAPTURE_YMIN 0.001f
+#define CAPTURE_THRESHPOWER 0.15f
+
+static inline float sqrf(float a)
+{
+  return (a * a);
+}
+
+__kernel void kernel_9x9_mul(global float *in,
+                             global float *out,
+                             global float *blend,
+                             global float *kernels,
+                             global unsigned char *table,
+                             const int w1,
+                             const int height)
+{
+  const int col = get_global_id(0);
+  const int row = get_global_id(1);
+  if(col >= w1 || row >= height) return;
+
+  const int i = mad24(row, w1, col);
+  const int w2 = 2 * w1;
+  const int w3 = 3 * w1;
+  const int w4 = 4 * w1;
+  if(blend[i] <= CAPTURE_BLEND_EPS)
+     return;
+
+  global const float *kern = kernels + CAPTURE_KERNEL_ALIGN * table[i];
+  global float *d = in + i;
+
+  float val = 0.0f;
+  if(col >= 4 && row >= 4 && col < w1 - 4 && row < height - 4)
+  {
+    val = kern[10+4] * (d[-w4-2]  + d[-w4+2]  + d[-w2-4]  + d[-w2+4] + d[w2-4] + d[w2+4] + d[w4-2] + d[w4+2]) +
+          kern[5 +4] * (d[-w4-1]  + d[-w4+1]  + d[-w1-4]  + d[-w1+4] + d[w1-4] + d[w1+4] + d[w4-1] + d[w4+1]) +
+          kern[4]    * (d[-w4  ]  + d[   -4]  + d[    4]  + d[ w4  ]) +
+          kern[15+3] * (d[-w3-3]  + d[-w3+3]  + d[ w3-3]  + d[ w3+3]) +
+          kern[10+3] * (d[-w3-2]  + d[-w3+2]  + d[-w2-3]  + d[-w2+3] + d[w2-3] + d[w2+3] + d[w3-2] + d[w3+2]) +
+          kern[ 5+3] * (d[-w3-1]  + d[-w3+1]  + d[-w1-3]  + d[-w1+3] + d[w1-3] + d[w1+3] + d[w3-1] + d[w3+1]) +
+          kern[   3] * (d[-w3  ]  + d[   -3]  + d[    3]  + d[ w3  ]) +
+          kern[10+2] * (d[-w2-2]  + d[-w2+2]  + d[ w2-2]  + d[ w2+2]) +
+          kern[ 5+2] * (d[-w2-1]  + d[-w2+1]  + d[-w1-2]  + d[-w1+2] + d[w1-2] + d[w1+2] + d[w2-1] + d[w2+1]) +
+          kern[   2] * (d[-w2  ]  + d[   -2]  + d[    2]  + d[ w2  ]) +
+          kern[ 5+1] * (d[-w1-1]  + d[-w1+1]  + d[ w1-1]  + d[ w1+1]) +
+          kern[   1] * (d[-w1  ]  + d[   -1]  + d[    1]  + d[ w1  ]) +
+          kern[   0] * (d[0]);
+  }
+  else
+  {
+    for(int ir = -4; ir <= 4; ir++)
+    {
+      const int irow = row+ir;
+      if(irow >= 0 && irow < height)
+      {
+        for(int ic = -4; ic <= 4; ic++)
+        {
+          const int icol = col+ic;
+          if(icol >=0 && icol < w1)
+            val += kern[5 * abs(ir) + abs(ic)] * in[mad24(irow, w1, icol)];
+        }
+      }
+    }
+  }
+  out[i] *= val;
+}
+
+__kernel void kernel_9x9_div(global float *in,
+                             global float *out,
+                             global float *divbuff,
+                             global float *blend,
+                             global float *kernels,
+                             global unsigned char *table,
+                             const int w1,
+                             const int height)
+{
+  const int col = get_global_id(0);
+  const int row = get_global_id(1);
+  if(col >= w1 || row >= height) return;
+
+  const int i = mad24(row, w1, col);
+  const int w2 = 2 * w1;
+  const int w3 = 3 * w1;
+  const int w4 = 4 * w1;
+  if(blend[i] <= CAPTURE_BLEND_EPS)
+    return;
+
+  global const float *kern = kernels + CAPTURE_KERNEL_ALIGN * table[i];
+  global float *d = in + i;
+
+  float val = 0.0f;
+  if(col >= 4 && row >= 4 && col < w1 - 4 && row < height - 4)
+  {
+    val = kern[10+4] * (d[-w4-2]  + d[-w4+2]  + d[-w2-4]  + d[-w2+4] + d[w2-4] + d[w2+4] + d[w4-2] + d[w4+2]) +
+          kern[5 +4] * (d[-w4-1]  + d[-w4+1]  + d[-w1-4]  + d[-w1+4] + d[w1-4] + d[w1+4] + d[w4-1] + d[w4+1]) +
+          kern[4]    * (d[-w4  ]  + d[   -4]  + d[    4]  + d[ w4  ]) +
+          kern[15+3] * (d[-w3-3]  + d[-w3+3]  + d[ w3-3]  + d[ w3+3]) +
+          kern[10+3] * (d[-w3-2]  + d[-w3+2]  + d[-w2-3]  + d[-w2+3] + d[w2-3] + d[w2+3] + d[w3-2] + d[w3+2]) +
+          kern[ 5+3] * (d[-w3-1]  + d[-w3+1]  + d[-w1-3]  + d[-w1+3] + d[w1-3] + d[w1+3] + d[w3-1] + d[w3+1]) +
+          kern[   3] * (d[-w3  ]  + d[   -3]  + d[    3]  + d[ w3  ]) +
+          kern[10+2] * (d[-w2-2]  + d[-w2+2]  + d[ w2-2]  + d[ w2+2]) +
+          kern[ 5+2] * (d[-w2-1]  + d[-w2+1]  + d[-w1-2]  + d[-w1+2] + d[w1-2] + d[w1+2] + d[w2-1] + d[w2+1]) +
+          kern[   2] * (d[-w2  ]  + d[   -2]  + d[    2]  + d[ w2  ]) +
+          kern[ 5+1] * (d[-w1-1]  + d[-w1+1]  + d[ w1-1]  + d[ w1+1]) +
+          kern[   1] * (d[-w1  ]  + d[   -1]  + d[    1]  + d[ w1  ]) +
+          kern[   0] * (d[0]);
+  }
+  else
+  {
+    for(int ir = -4; ir <= 4; ir++)
+    {
+      const int irow = row+ir;
+      if(irow >= 0 && irow < height)
+      {
+        for(int ic = -4; ic <= 4; ic++)
+        {
+          const int icol = col+ic;
+          if(icol >=0 && icol < w1)
+            val += kern[5 * abs(ir) + abs(ic)] * in[mad24(irow, w1, icol)];
+        }
+      }
+    }
+  }
+  out[i] = divbuff[i] / fmax(val, 0.00001f);
+}
+
+__kernel void prefill_clip_mask(global float *mask,
+                                const int width,
+                                const int height)
+{
+  const int col = get_global_id(0);
+  const int row = get_global_id(1);
+  if(col >= width || row >= height) return;
+
+  const int i = mad24(row, width, col);
+  mask[i] = 1.0f;
+}
+
+__kernel void prepare_blend(__read_only image2d_t cfa,
+                            __read_only image2d_t dev_out,
+                            const int filters,
+                            global const unsigned char (*const xtrans)[6],
+                            global float *mask,
+                            global float *Yold,
+                            global float *whites,
+                            const int w,
+                            const int height)
+{
+  const int col = get_global_id(0);
+  const int row = get_global_id(1);
+  if(col >= w || row >= height) return;
+
+  const float4 rgb = read_imagef(dev_out, samplerA, (int2)(col, row));
+  const float Y = fmax(0.0f, 0.2626f * rgb.x + 0.7152f * rgb.y + 0.0722f * rgb.z);
+  const int k = mad24(row, w, col);
+  Yold[k] = Y;
+
+  if(row > 1 && col > 1 && row < height-2 && col < w -2)
+  {
+    const int w2 = 2 * w;
+    const int color = (filters == 9u) ? FCxtrans(row, col, xtrans) : FC(row, col, filters);
+    const float val = read_imagef(cfa, samplerA, (int2)(col, row)).x;
+    if(val > whites[color] || Y < CAPTURE_YMIN)
+    {
+      mask[k-w2-1] = mask[k-w2]  = mask[k-w2+1] =
+      mask[k-w-2]  = mask[k-w-1] = mask[k-w ]   = mask[k-w+1] = mask[k-w+2] =
+      mask[k-2]    = mask[k-1]   = mask[k]      = mask[k+1]   = mask[k+2] =
+      mask[k+w-2]  = mask[k+w-1] = mask[k+w]    = mask[k+w+1] = mask[k+w+2] =
+      mask[k+w2-1] = mask[k+w2]  = mask[k+w2+1] = 0.0f;
+    }
+  }
+  else
+    mask[k] = 0.0f;
+}
+
+__kernel void modify_blend(global float *blend,
+                           global float *Yold,
+                           global float *luminance,
+                           const float threshold,
+                           const int width,
+                           const int height)
+{
+  const int icol = get_global_id(0);
+  const int irow = get_global_id(1);
+  if(icol >= width || irow >= height) return;
+
+  const int row = clamp(irow, 2, height-3);
+  const int col = clamp(icol, 2, width-3);
+
+  float av = 0.0f;
+  for(int y = row-1; y < row+2; y++)
+  {
+    for(int x = col-2; x < col+3; x++)
+      av += Yold[mad24(y, width, x)];
+  }
+  for(int x = col-1; x < col+2; x++)
+  {
+    av += Yold[mad24(row-2, width, x)];
+    av += Yold[mad24(row+2, width, x)];
+  }
+  av /= 21.0f;
+
+  float sv = 0.0f;
+  for(int y = row-1; y < row+2; y++)
+  {
+    for(int x = col-2; x < col+3; x++)
+      sv += sqrf(Yold[mad24(y, width, x)] - av);
+  }
+  for(int x = col-2; x < col+3; x++)
+  {
+    sv+= sqrf(Yold[mad24(row-2, width, x)] - av);
+    sv+= sqrf(Yold[mad24(row+2, width, x)] - av);
+  }
+  sv = dtcl_pow(fmax(0.0f, 5.0f * dtcl_sqrt(sv / 21.f) - threshold), CAPTURE_THRESHPOWER);
+  const int k = mad24(irow, width, icol);
+
+  blend[k] *= clamp(sv, 0.0f, 1.0f);
+  luminance[k] = Yold[k];
+}
+
+__kernel void show_blend_mask(__read_only image2d_t in,
+                              __write_only image2d_t out,
+                              global float *blend_mask,
+                              const int width,
+                              const int height)
+{
+  const int col = get_global_id(0);
+  const int row = get_global_id(1);
+  if(col >= width || row >= height) return;
+
+  float4 pix = read_imagef(in, samplerA, (int2)(col, row));
+  const float blend = blend_mask[mad24(row, width, col)];
+  pix.w = blend < CAPTURE_BLEND_EPS ? 0.0f : blend;
+  write_imagef(out, (int2)(col, row), pix);
+}
+
+__kernel void capture_result( __read_only image2d_t in,
+                              __write_only image2d_t out,
+                              global float *blendmask,
+                              global float *luminance,
+                              global float *tmp,
+                              const int width,
+                              const int height)
+{
+  const int col = get_global_id(0);
+  const int row = get_global_id(1);
+  if(col >= width || row >= height) return;
+
+  float4 pix = read_imagef(in, samplerA, (int2)(col, row));
+  const int k = mad24(row, width, col);
+
+  if(blendmask[k] > CAPTURE_BLEND_EPS)
+  {
+    const float mixer = clamp(blendmask[k], 0.0f, 1.0f);
+    const float lumold = fmax(luminance[k], 0.000001f);
+    const float lumtmp = fmax(tmp[k], 0.0000001f);
+    const float luminance_new = mix(lumold, lumtmp, mixer);
+    const float4 factor = luminance_new / lumold;
+    pix = pix * factor;
+  }
+  write_imagef(out, (int2)(col, row), pix);
+}
+
+#undef CAPTURE_KERNEL_ALIGN
diff --git a/data/kernels/programs.conf b/data/kernels/programs.conf
index d2f38ad82157..dc1dc287ba46 100644
--- a/data/kernels/programs.conf
+++ b/data/kernels/programs.conf
@@ -38,3 +38,4 @@ blurs.cl                34
 bspline.cl              35
 sigmoid.cl              36
 colorequal.cl           37
+capture.cl              38
diff --git a/src/iop/demosaic.c b/src/iop/demosaic.c
index 96d50748c391..8ffe68d4f4e7 100644
--- a/src/iop/demosaic.c
+++ b/src/iop/demosaic.c
@@ -46,7 +46,7 @@
 #include <string.h>
 #include <time.h>
 
-DT_MODULE_INTROSPECTION(4, dt_iop_demosaic_params_t)
+DT_MODULE_INTROSPECTION(5, dt_iop_demosaic_params_t)
 
 #define DT_DEMOSAIC_XTRANS 1024 // masks for non-Bayer demosaic ops
 #define DT_DEMOSAIC_DUAL 2048   // masks for dual demosaicing methods
@@ -124,7 +124,12 @@ typedef struct dt_iop_demosaic_params_t
   dt_iop_demosaic_smooth_t color_smoothing;     // $DEFAULT: DT_DEMOSAIC_SMOOTH_OFF $DESCRIPTION: "color smoothing"
   dt_iop_demosaic_method_t demosaicing_method;  // $DEFAULT: DT_IOP_DEMOSAIC_RCD $DESCRIPTION: "method"
   dt_iop_demosaic_lmmse_t lmmse_refine;         // $DEFAULT: DT_LMMSE_REFINE_1 $DESCRIPTION: "LMMSE refine"
-  float dual_thrs;                              // $MIN: 0.0 $MAX: 1.0 $DEFAULT: 0.20 $DESCRIPTION: "dual threshold"
+  float dual_thrs;                              // $MIN: 0.0 $MAX: 1.0 $DEFAULT: 0.2 $DESCRIPTION: "dual threshold"
+  float cs_radius;                              // $MIN: 0.0 $MAX: 2.0 $DEFAULT: 0.0 $DESCRIPTION: "radius"
+  float cs_thrs;                                // $MIN: 0.0 $MAX: 1.0 $DEFAULT: 0.35 $DESCRIPTION: "contrast threshold"
+  float cs_boost;                               // $MIN: 0.0 $MAX: 1.5 $DEFAULT: 0.0 $DESCRIPTION: "corner boost"
+  int cs_strength;                              // $MIN: 0 $MAX: 20 $DEFAULT: 0 $DESCRIPTION: "capture sharpen"
+  float reserved;
 } dt_iop_demosaic_params_t;
 
 typedef struct dt_iop_demosaic_gui_data_t
@@ -137,7 +142,13 @@ typedef struct dt_iop_demosaic_gui_data_t
   GtkWidget *demosaic_method_bayerfour;
   GtkWidget *dual_thrs;
   GtkWidget *lmmse_refine;
+  GtkWidget *cs_thrs;
+  GtkWidget *cs_radius;
+  GtkWidget *cs_boost;
+  GtkWidget *cs_strength;
+  gboolean cs_mask;
   gboolean dual_mask;
+  gboolean autoradius;
 } dt_iop_demosaic_gui_data_t;
 
 typedef struct dt_iop_demosaic_global_data_t
@@ -193,6 +204,14 @@ typedef struct dt_iop_demosaic_global_data_t
   int kernel_rcd_border_redblue;
   int kernel_rcd_border_green;
   int kernel_write_blended_dual;
+  int gaussian_9x9_mul;
+  int gaussian_9x9_div;
+  int prefill_clip_mask;
+  int prepare_blend;
+  int modify_blend;
+  int show_blend_mask;
+  int capture_result;
+  float *gauss_coeffs;
 } dt_iop_demosaic_global_data_t;
 
 typedef struct dt_iop_demosaic_data_t
@@ -204,6 +223,10 @@ typedef struct dt_iop_demosaic_data_t
   float median_thrs;
   double CAM_to_RGB[3][4];
   float dual_thrs;
+  float cs_radius;
+  float cs_thrs;
+  float cs_boost;
+  int cs_strength;
 } dt_iop_demosaic_data_t;
 
 static gboolean _get_thumb_quality(const int width, const int height)
@@ -282,6 +305,7 @@ void amaze_demosaic(dt_dev_pixelpipe_iop_t *piece,
 #include "iop/demosaicing/rcd.c"
 #include "iop/demosaicing/lmmse.c"
 #include "iop/demosaicing/dual.c"
+#include "iop/demosaicing/capture.c"
 
 const char *name()
 {
@@ -321,7 +345,7 @@ int legacy_params(dt_iop_module_t *self,
                   int32_t *new_params_size,
                   int *new_version)
 {
-  typedef struct dt_iop_demosaic_params_v4_t
+  typedef struct dt_iop_demosaic_params_v5_t
   {
     dt_iop_demosaic_greeneq_t green_eq;
     float median_thrs;
@@ -329,7 +353,12 @@ int legacy_params(dt_iop_module_t *self,
     dt_iop_demosaic_method_t demosaicing_method;
     dt_iop_demosaic_lmmse_t lmmse_refine;
     float dual_thrs;
-  } dt_iop_demosaic_params_v4_t;
+    float cs_radius;
+    float cs_thrs;
+    float cs_boost;
+    int cs_strength;
+    float reserved;
+  } dt_iop_demosaic_params_v5_t;
 
   if(old_version == 2)
   {
@@ -340,17 +369,22 @@ int legacy_params(dt_iop_module_t *self,
     } dt_iop_demosaic_params_v2_t;
 
     const dt_iop_demosaic_params_v2_t *o = (dt_iop_demosaic_params_v2_t *)old_params;
-    dt_iop_demosaic_params_v4_t *n = malloc(sizeof(dt_iop_demosaic_params_v4_t));
+    dt_iop_demosaic_params_v5_t *n = malloc(sizeof(dt_iop_demosaic_params_v5_t));
     n->green_eq = o->green_eq;
     n->median_thrs = o->median_thrs;
     n->color_smoothing = DT_DEMOSAIC_SMOOTH_OFF;
     n->demosaicing_method = DT_IOP_DEMOSAIC_PPG;
     n->lmmse_refine = DT_LMMSE_REFINE_1;
     n->dual_thrs = 0.20f;
+    n->cs_radius = 0.0f;
+    n->cs_thrs = 0.35f;
+    n->cs_boost = 0.0f;
+    n->cs_strength = 0;
+    n->reserved = 0.0f;
 
     *new_params = n;
-    *new_params_size = sizeof(dt_iop_demosaic_params_v4_t);
-    *new_version = 4;
+    *new_params_size = sizeof(dt_iop_demosaic_params_v5_t);
+    *new_version = 5;
     return 0;
   }
 
@@ -366,13 +400,45 @@ int legacy_params(dt_iop_module_t *self,
     } dt_iop_demosaic_params_v3_t;
 
     const dt_iop_demosaic_params_v3_t *o = (dt_iop_demosaic_params_v3_t *)old_params;
-    dt_iop_demosaic_params_v4_t *n = malloc(sizeof(dt_iop_demosaic_params_v4_t));
+    dt_iop_demosaic_params_v5_t *n = malloc(sizeof(dt_iop_demosaic_params_v5_t));
     memcpy(n, o, sizeof *o);
     n->dual_thrs = 0.20f;
+    n->cs_radius = 0.0f;
+    n->cs_thrs = 0.35f;
+    n->cs_boost = 0.0f;
+    n->cs_strength = 0;
+    n->reserved = 0.0f;
 
     *new_params = n;
-    *new_params_size = sizeof(dt_iop_demosaic_params_v4_t);
-    *new_version = 4;
+    *new_params_size = sizeof(dt_iop_demosaic_params_v5_t);
+    *new_version = 5;
+    return 0;
+  }
+
+  if(old_version == 4)
+  {
+    typedef struct dt_iop_demosaic_params_v4_t
+    {
+      dt_iop_demosaic_greeneq_t green_eq;
+      float median_thrs;
+      dt_iop_demosaic_smooth_t color_smoothing;
+      dt_iop_demosaic_method_t demosaicing_method;
+      dt_iop_demosaic_lmmse_t lmmse_refine;
+      float dual_thrs;
+    } dt_iop_demosaic_params_v4_t;
+
+    const dt_iop_demosaic_params_v4_t *o = (dt_iop_demosaic_params_v4_t *)old_params;
+    dt_iop_demosaic_params_v5_t *n = malloc(sizeof(dt_iop_demosaic_params_v5_t));
+    memcpy(n, o, sizeof *o);
+    n->cs_radius = 0.0f;
+    n->cs_thrs = 0.35f;
+    n->cs_boost = 0.0f;
+    n->cs_strength = 0;
+    n->reserved = 0.0f;
+
+    *new_params = n;
+    *new_params_size = sizeof(dt_iop_demosaic_params_v5_t);
+    *new_version = 5;
     return 0;
   }
 
@@ -577,11 +643,12 @@ void tiling_callback(dt_iop_module_t *self,
     tiling->overlap = 6;
   }
 
-  if(d->demosaicing_method & DT_DEMOSAIC_DUAL)
+  if((d->demosaicing_method & DT_DEMOSAIC_DUAL) || d->cs_strength)
   {
-    // make sure VNG4 is also possible
-    tiling->factor += 1.0f;
-    tiling->overlap = MAX(6, tiling->overlap);
+    // internals plus 2 output
+    tiling->factor = MAX(tiling->factor, 1.0f + 2.0f * ioratio);
+    // works for bayer and xtrans
+    tiling->overlap = MAX(d->cs_strength ? 18 : 6, tiling->overlap);
   }
   return;
 }
@@ -601,7 +668,6 @@ void process(dt_iop_module_t *self,
   const gboolean run_fast = pipe->type & DT_DEV_PIXELPIPE_FAST;
   const gboolean fullpipe = pipe->type & DT_DEV_PIXELPIPE_FULL;
   const gboolean previewpipe = pipe->type & DT_DEV_PIXELPIPE_PREVIEW;
-
   const uint8_t(*const xtrans)[6] = (const uint8_t(*const)[6])pipe->dsc.xtrans;
 
   const dt_iop_demosaic_data_t *d = piece->data;
@@ -622,17 +688,27 @@ void process(dt_iop_module_t *self,
           && demosaicing_method != DT_IOP_DEMOSAIC_PASSTHROUGH_COLOR))
     demosaicing_method = is_xtrans ? DT_IOP_DEMOSAIC_VNG : DT_IOP_DEMOSAIC_VNG4;
 
-  gboolean showmask = FALSE;
+  gboolean show_dualmask = FALSE;
+  gboolean show_capturemask = FALSE;
+  gboolean vng_linear = FALSE;
   if(self->dev->gui_attached && fullpipe)
   {
     if(g->dual_mask)
     {
-      showmask = TRUE;
+      show_dualmask = TRUE;
+      pipe->mask_display = DT_DEV_PIXELPIPE_DISPLAY_MASK;
+    }
+    if(g->cs_mask)
+    {
+      show_capturemask = TRUE;
       pipe->mask_display = DT_DEV_PIXELPIPE_DISPLAY_MASK;
     }
     // take care of passthru modes
     if(pipe->mask_display == DT_DEV_PIXELPIPE_DISPLAY_PASSTHRU)
+    {
       demosaicing_method = is_xtrans ? DT_IOP_DEMOSAIC_VNG : DT_IOP_DEMOSAIC_VNG4;
+      vng_linear = TRUE;
+    }
   }
 
   float *in  = (float *)i;
@@ -656,6 +732,16 @@ void process(dt_iop_module_t *self,
 
   const gboolean direct = roi_out->width == width && roi_out->height == height && feqf(roi_in->scale, roi_out->scale, 1e-8f);
 
+  const gboolean passthru = demosaicing_method == DT_IOP_DEMOSAIC_PASSTHROUGH_MONOCHROME
+                         || demosaicing_method == DT_IOP_DEMOSAIC_PASSTHROUGH_COLOR;
+
+  const gboolean do_capture = !passthru
+                          &&  !is_4bayer
+                          &&  !show_dualmask
+                          &&  !run_fast
+                          &&  !previewpipe
+                          &&  d->cs_strength;
+
   if(!direct)
     out = dt_alloc_align_float((size_t)4 * width * height);
 
@@ -696,13 +782,13 @@ void process(dt_iop_module_t *self,
     else if(base_demosaicing_method == DT_IOP_DEMOSAIC_MARKESTEIJN || base_demosaicing_method == DT_IOP_DEMOSAIC_MARKESTEIJN_3)
       xtrans_markesteijn_interpolate(out, in, roi_in, xtrans, passes);
     else
-      vng_interpolate(out, in, roi_in, pipe->dsc.filters, xtrans, qual_flags & DT_DEMOSAIC_ONLY_VNG_LINEAR);
+      vng_interpolate(out, in, roi_in, pipe->dsc.filters, xtrans, vng_linear);
   }
   else
   {
     if(demosaicing_method == DT_IOP_DEMOSAIC_VNG4 || is_4bayer)
     {
-      vng_interpolate(out, in, roi_in, pipe->dsc.filters, xtrans, qual_flags & DT_DEMOSAIC_ONLY_VNG_LINEAR);
+      vng_interpolate(out, in, roi_in, pipe->dsc.filters, xtrans, vng_linear);
       if(is_4bayer)
       {
         dt_colorspaces_cygm_to_rgb(out, width * height, d->CAM_to_RGB);
@@ -722,8 +808,11 @@ void process(dt_iop_module_t *self,
   if(pipe->want_detail_mask)
     dt_dev_write_scharr_mask(piece, out, roi_in, TRUE);
 
-  if(dual)
-    dual_demosaic(piece, out, in, roi_in, pipe->dsc.filters, xtrans, showmask, d->dual_thrs);
+  if(do_capture)
+    _capture_sharpen(self, piece, in, out, roi_in, show_capturemask);
+
+  if(dual && !show_capturemask)
+    dual_demosaic(piece, out, in, roi_in, pipe->dsc.filters, xtrans, show_dualmask, d->dual_thrs);
 
   if((float *)i != in) dt_free_align(in);
 
@@ -755,6 +844,7 @@ int process_cl(dt_iop_module_t *self,
   const gboolean run_fast = pipe->type & DT_DEV_PIXELPIPE_FAST;
   const gboolean fullpipe = pipe->type & DT_DEV_PIXELPIPE_FULL;
   const gboolean previewpipe = pipe->type & DT_DEV_PIXELPIPE_PREVIEW;
+
   const int qual_flags = demosaic_qual_flags(piece, img, roi_out);
   const gboolean fullscale = qual_flags & DT_DEMOSAIC_FULL_SCALE;
   const gboolean is_xtrans = pipe->dsc.filters == 9u;
@@ -782,17 +872,27 @@ int process_cl(dt_iop_module_t *self,
       && demosaicing_method != DT_IOP_DEMOSAIC_PASSTHROUGH_COLOR))
     demosaicing_method = is_xtrans ? DT_IOP_DEMOSAIC_VNG : DT_IOP_DEMOSAIC_VNG4;
 
-  gboolean showmask = FALSE;
+  gboolean show_dualmask = FALSE;
+  gboolean show_capturemask = FALSE;
+  gboolean vng_linear = FALSE;
   if(self->dev->gui_attached && fullpipe)
   {
     if(g->dual_mask)
     {
-      showmask = TRUE;
+      show_dualmask = TRUE;
+      pipe->mask_display = DT_DEV_PIXELPIPE_DISPLAY_MASK;
+    }
+    if(g->cs_mask)
+    {
+      show_capturemask = TRUE;
       pipe->mask_display = DT_DEV_PIXELPIPE_DISPLAY_MASK;
     }
     // take care of passthru modes
     if(pipe->mask_display == DT_DEV_PIXELPIPE_DISPLAY_PASSTHRU)
+    {
       demosaicing_method = is_xtrans ? DT_IOP_DEMOSAIC_VNG : DT_IOP_DEMOSAIC_VNG4;
+      vng_linear = TRUE;
+    }
   }
 
   const int devid = pipe->devid;
@@ -829,6 +929,14 @@ int process_cl(dt_iop_module_t *self,
   const gboolean direct = roi_out->width == width && roi_out->height == height && feqf(roi_in->scale, roi_out->scale, 1e-8f);
   const int base_demosaicing_method = demosaicing_method & ~DT_DEMOSAIC_DUAL;
   const gboolean dual = (demosaicing_method & DT_DEMOSAIC_DUAL) && !run_fast && !previewpipe;
+  const gboolean passthru = demosaicing_method == DT_IOP_DEMOSAIC_PASSTHROUGH_MONOCHROME
+                         || demosaicing_method == DT_IOP_DEMOSAIC_PASSTHROUGH_COLOR;
+
+  const gboolean do_capture = !passthru
+                          &&  !run_fast
+                          &&  !show_dualmask
+                          &&  !previewpipe
+                          &&  d->cs_strength;
 
   cl_mem out_image = direct ? dev_out : dt_opencl_alloc_device(devid, width, height, sizeof(float) * 4);
   cl_mem in_image = dev_in;
@@ -845,9 +953,7 @@ int process_cl(dt_iop_module_t *self,
     if(err != CL_SUCCESS) goto finish;
   }
 
-  if(demosaicing_method == DT_IOP_DEMOSAIC_PASSTHROUGH_MONOCHROME ||
-     demosaicing_method == DT_IOP_DEMOSAIC_PPG ||
-     demosaicing_method == DT_IOP_DEMOSAIC_PASSTHROUGH_COLOR)
+  if(passthru || demosaicing_method == DT_IOP_DEMOSAIC_PPG)
   {
     err = process_default_cl(self, piece, in_image, out_image, roi_in, demosaicing_method);
     if(err != CL_SUCCESS) return err;
@@ -859,7 +965,7 @@ int process_cl(dt_iop_module_t *self,
   }
   else if(demosaicing_method == DT_IOP_DEMOSAIC_VNG4 || demosaicing_method == DT_IOP_DEMOSAIC_VNG)
   {
-    err = process_vng_cl(self, piece, in_image, out_image, roi_in, qual_flags & DT_DEMOSAIC_ONLY_VNG_LINEAR);
+    err = process_vng_cl(self, piece, in_image, out_image, roi_in, vng_linear);
     if(err != CL_SUCCESS) goto finish;
   }
   else if(base_demosaicing_method == DT_IOP_DEMOSAIC_MARKESTEIJN || base_demosaicing_method == DT_IOP_DEMOSAIC_MARKESTEIJN_3)
@@ -880,7 +986,13 @@ int process_cl(dt_iop_module_t *self,
     if(err != CL_SUCCESS) goto finish;
   }
 
-  if(dual)
+  if(do_capture)
+  {
+    err = _capture_sharpen_cl(self, piece, dev_in, out_image, roi_in, show_capturemask);
+    if(err != CL_SUCCESS) goto finish;
+  }
+
+  if(dual && !show_capturemask)
   {
     err = CL_MEM_OBJECT_ALLOCATION_FAILURE;
     cl_mem low_image = dt_opencl_alloc_device(devid, width, height, sizeof(float) * 4);
@@ -895,7 +1007,7 @@ int process_cl(dt_iop_module_t *self,
       if(err == CL_SUCCESS)
         err = color_smoothing_cl(self, piece, low_image, low_image, roi_in, DT_DEMOSAIC_SMOOTH_2);
       if(err == CL_SUCCESS)
-        err = dual_demosaic_cl(self, piece, cp_image, low_image, out_image, roi_in, showmask);
+        err = dual_demosaic_cl(self, piece, cp_image, low_image, out_image, roi_in, show_dualmask);
       dt_opencl_release_mem_object(cp_image);
       dt_opencl_release_mem_object(low_image);
     }
@@ -991,6 +1103,19 @@ void init_global(dt_iop_module_so_t *self)
   gd->kernel_rcd_border_redblue = dt_opencl_create_kernel(rcd, "rcd_border_redblue");
   gd->kernel_rcd_border_green = dt_opencl_create_kernel(rcd, "rcd_border_green");
   gd->kernel_write_blended_dual  = dt_opencl_create_kernel(rcd, "write_blended_dual");
+
+  const int capt = 38; // capture.cl, from programs.conf
+  gd->gaussian_9x9_mul = dt_opencl_create_kernel(capt, "kernel_9x9_mul");
+  gd->gaussian_9x9_div = dt_opencl_create_kernel(capt, "kernel_9x9_div");
+  gd->prefill_clip_mask = dt_opencl_create_kernel(capt, "prefill_clip_mask");
+  gd->prepare_blend = dt_opencl_create_kernel(capt, "prepare_blend");
+  gd->modify_blend = dt_opencl_create_kernel(capt, "modify_blend");
+  gd->show_blend_mask = dt_opencl_create_kernel(capt, "show_blend_mask");
+  gd->capture_result = dt_opencl_create_kernel(capt, "capture_result");
+
+  gd->gauss_coeffs = dt_alloc_align_float(CAPTURE_KERNEL_ALIGN * (UCHAR_MAX+1));
+  for(int i = 0; i <= UCHAR_MAX; i++)
+    _calc_9x9_gauss_coeffs(&gd->gauss_coeffs[i * CAPTURE_KERNEL_ALIGN], MAX(1e-7f, (float)i * CAPTURE_GAUSS_FRACTION));
 }
 
 void cleanup_global(dt_iop_module_so_t *self)
@@ -1046,6 +1171,14 @@ void cleanup_global(dt_iop_module_so_t *self)
   dt_opencl_free_kernel(gd->kernel_rcd_border_redblue);
   dt_opencl_free_kernel(gd->kernel_rcd_border_green);
   dt_opencl_free_kernel(gd->kernel_write_blended_dual);
+  dt_opencl_free_kernel(gd->gaussian_9x9_mul);
+  dt_opencl_free_kernel(gd->gaussian_9x9_div);
+  dt_opencl_free_kernel(gd->prefill_clip_mask);
+  dt_opencl_free_kernel(gd->prepare_blend);
+  dt_opencl_free_kernel(gd->modify_blend);
+  dt_opencl_free_kernel(gd->show_blend_mask);
+  dt_opencl_free_kernel(gd->capture_result);
+  dt_free_align(gd->gauss_coeffs);
   free(self->data);
   self->data = NULL;
   _cleanup_lmmse_gamma();
@@ -1067,7 +1200,10 @@ void commit_params(dt_iop_module_t *self,
   d->dual_thrs = p->dual_thrs;
   d->lmmse_refine = p->lmmse_refine;
   dt_iop_demosaic_method_t use_method = p->demosaicing_method;
-
+  d->cs_radius = p->cs_radius;
+  d->cs_thrs = p->cs_thrs;
+  d->cs_boost = p->cs_boost;
+  d->cs_strength = p->cs_strength;
   const gboolean xmethod = use_method & DT_DEMOSAIC_XTRANS;
   const gboolean bayer4  = self->dev->image_storage.flags & DT_IMAGE_4BAYER;
   const gboolean bayer   = self->dev->image_storage.buf_dsc.filters != 9u && !bayer4;
@@ -1154,7 +1290,6 @@ void commit_params(dt_iop_module_t *self,
       piece->process_cl_ready = FALSE;
   }
 
-
   // green-equilibrate over full image excludes tiling
   // The details mask calculation required for dual demosaicing does not allow tiling.
   if(    d->green_eq == DT_IOP_GREEN_EQ_FULL
@@ -1244,10 +1379,18 @@ void gui_changed(dt_iop_module_t *self, GtkWidget *w, void *previous)
     || use_method == DT_IOP_DEMOSAIC_PASSTHR_MONOX
     || use_method == DT_IOP_DEMOSAIC_PASSTHR_COLORX;
 
+  const gboolean capture_support = !passing && !bayer4;
+  const gboolean do_capture = capture_support && p->cs_strength;
+
   gtk_widget_set_visible(g->demosaic_method_bayer, bayer);
   gtk_widget_set_visible(g->demosaic_method_bayerfour, bayer4);
   gtk_widget_set_visible(g->demosaic_method_xtrans, xtrans);
 
+  gtk_widget_set_sensitive(g->cs_radius, do_capture);
+  gtk_widget_set_sensitive(g->cs_thrs, do_capture);
+  gtk_widget_set_sensitive(g->cs_boost, do_capture);
+  gtk_widget_set_sensitive(g->cs_strength, capture_support);
+
   // we might have a wrong method dur to xtrans/bayer - mode mismatch
   if(bayer)
     dt_bauhaus_combobox_set_from_value(g->demosaic_method_bayer, use_method);
@@ -1286,6 +1429,11 @@ void gui_changed(dt_iop_module_t *self, GtkWidget *w, void *previous)
     dt_bauhaus_widget_set_quad_active(g->dual_thrs, FALSE);
     g->dual_mask = FALSE;
   }
+  if(!w || w != g->cs_thrs)
+  {
+    dt_bauhaus_widget_set_quad_active(g->cs_thrs, FALSE);
+    g->cs_mask = FALSE;
+  }
 
   // as the dual modes change behaviour for previous pipeline modules we do a reprocess
   if(isdual && (w == g->demosaic_method_bayer || w == g->demosaic_method_xtrans))
@@ -1296,26 +1444,64 @@ void gui_update(dt_iop_module_t *self)
 {
   gui_changed(self, NULL, NULL);
   gtk_stack_set_visible_child_name(GTK_STACK(self->widget), self->default_enabled ? "raw" : "non_raw");
+  dt_iop_demosaic_gui_data_t *g = self->gui_data;
+  g->autoradius = FALSE;
 }
 
-static void _visualize_callback(GtkWidget *quad, dt_iop_module_t *self)
+static void _dual_quad_callback(GtkWidget *quad, dt_iop_module_t *self)
 {
   if(darktable.gui->reset) return;
   dt_iop_demosaic_gui_data_t *g = self->gui_data;
 
   g->dual_mask = dt_bauhaus_widget_get_quad_active(quad);
+
+  dt_bauhaus_widget_set_quad_active(g->cs_thrs, FALSE);
+  g->cs_mask = FALSE;
+  dt_dev_reprocess_center(self->dev);
+}
+
+static void _cs_quad_callback(GtkWidget *quad, dt_iop_module_t *self)
+{
+  if(darktable.gui->reset) return;
+  dt_iop_demosaic_gui_data_t *g = self->gui_data;
+  g->cs_mask = dt_bauhaus_widget_get_quad_active(quad);
+
+  dt_bauhaus_widget_set_quad_active(g->dual_thrs, FALSE);
+  g->dual_mask = FALSE;
   dt_dev_reprocess_center(self->dev);
 }
 
+static void _cs_autoradius_callback(GtkWidget *quad, dt_iop_module_t *self)
+{
+  if(darktable.gui->reset) return;
+  dt_iop_demosaic_gui_data_t *g = self->gui_data;
+  g->autoradius = TRUE;
+  dt_dev_reprocess_center(self->dev);
+}
+
+static void _check_autoradius(gpointer instance, dt_iop_module_t *self)
+{
+  dt_iop_demosaic_gui_data_t *g = self->gui_data;
+  if(g && g->autoradius)
+  {
+    dt_iop_demosaic_params_t *p = self->params;
+    g->autoradius = FALSE;
+    dt_bauhaus_slider_set_val(g->cs_radius, p->cs_radius);
+    dt_dev_add_history_item(darktable.develop, self, TRUE);
+  }
+}
+
 void gui_focus(dt_iop_module_t *self, gboolean in)
 {
   dt_iop_demosaic_gui_data_t *g = self->gui_data;
   if(!in)
   {
-    const gboolean was_dualmask = g->dual_mask;
+    const gboolean was_masking = g->dual_mask || g->cs_mask;
     dt_bauhaus_widget_set_quad_active(g->dual_thrs, FALSE);
     g->dual_mask = FALSE;
-    if(was_dualmask) dt_dev_reprocess_center(self->dev);
+    dt_bauhaus_widget_set_quad_active(g->cs_thrs, FALSE);
+    g->cs_mask = FALSE;
+    if(was_masking) dt_dev_reprocess_center(self->dev);
   }
 }
 
@@ -1342,18 +1528,17 @@ void gui_init(dt_iop_module_t *self)
   for(int i=0;i<4;i++) dt_bauhaus_combobox_remove_at(g->demosaic_method_bayerfour, 1);
   gtk_widget_set_tooltip_text(g->demosaic_method_bayerfour, _("Bayer4 sensor demosaicing methods."));
 
-  g->median_thrs = dt_bauhaus_slider_from_params(self, "median_thrs");
-  dt_bauhaus_slider_set_digits(g->median_thrs, 3);
-  gtk_widget_set_tooltip_text(g->median_thrs, _("threshold for edge-aware median.\nset to 0.0 to switch off\n"
-                                                "set to 1.0 to ignore edges"));
-
   g->dual_thrs = dt_bauhaus_slider_from_params(self, "dual_thrs");
   dt_bauhaus_slider_set_digits(g->dual_thrs, 2);
   gtk_widget_set_tooltip_text(g->dual_thrs, _("contrast threshold for dual demosaic.\nset to 0.0 for high frequency content\n"
                                                 "set to 1.0 for flat content"));
-  dt_bauhaus_widget_set_quad(g->dual_thrs, self, dtgtk_cairo_paint_showmask, TRUE, _visualize_callback,
+  dt_bauhaus_widget_set_quad(g->dual_thrs, self, dtgtk_cairo_paint_showmask, TRUE, _dual_quad_callback,
                              _("toggle mask visualization"));
 
+  g->median_thrs = dt_bauhaus_slider_from_params(self, "median_thrs");
+  dt_bauhaus_slider_set_digits(g->median_thrs, 3);
+  gtk_widget_set_tooltip_text(g->median_thrs, _("threshold for edge-aware median.\nset to 0.0 to switch off\n"
+                                                "set to 1.0 to ignore edges"));
   g->lmmse_refine = dt_bauhaus_combobox_from_params(self, "lmmse_refine");
   gtk_widget_set_tooltip_text(g->lmmse_refine, _("LMMSE refinement steps. the median steps average the output,\nrefine adds some recalculation of red & blue channels"));
 
@@ -1363,6 +1548,33 @@ void gui_init(dt_iop_module_t *self)
   g->greeneq = dt_bauhaus_combobox_from_params(self, "green_eq");
   gtk_widget_set_tooltip_text(g->greeneq, _("green channels matching method"));
 
+  g->cs_strength = dt_bauhaus_slider_from_params(self, "cs_strength");
+  gtk_widget_set_tooltip_text(g->cs_strength, _("enable capture sharpening and set effect strength based on iterations"));
+
+  g->cs_radius = dt_bauhaus_slider_from_params(self, "cs_radius");
+  dt_bauhaus_slider_set_digits(g->cs_radius, 2);
+  dt_bauhaus_slider_set_format(g->cs_radius, _(_(" px")));
+  gtk_widget_set_tooltip_text(g->cs_radius, _("capture sharpen radius should reflect the gaussian type blur by camera\n"
+                                              "sensor, possibly the anti-aliasing filter and the lens.\n"
+                                              "increasing this too far will lead to artifacts like halos\n"
+                                              "especially at sharp transitions\n"));
+  dt_bauhaus_slider_set_hard_min(g->cs_radius, 0.01f);
+  dt_bauhaus_widget_set_quad(g->cs_radius, self, dtgtk_cairo_paint_reset, FALSE, _cs_autoradius_callback,
+    _("calculate the capture sharpen radius from sensor data"));
+  g->autoradius = FALSE;
+
+  g->cs_thrs = dt_bauhaus_slider_from_params(self, "cs_thrs");
+  dt_bauhaus_slider_set_format(g->cs_thrs, "%");
+  dt_bauhaus_slider_set_digits(g->cs_thrs, 0);
+  gtk_widget_set_tooltip_text(g->cs_thrs, _("adjust the threshold to restrict capture sharpening to parts with high contrast"));
+  dt_bauhaus_widget_set_quad(g->cs_thrs, self, dtgtk_cairo_paint_showmask, TRUE, _cs_quad_callback, _("visualize the restricting mask"));
+
+  g->cs_boost = dt_bauhaus_slider_from_params(self, "cs_boost");
+  dt_bauhaus_slider_set_digits(g->cs_boost, 2);
+  dt_bauhaus_slider_set_format(g->cs_boost, _(_(" px")));
+  gtk_widget_set_tooltip_text(g->cs_boost, _("further increase radius at image corners.\n"
+                                             "the centre half of the image will not be affected"));
+
   // start building top level widget
   self->widget = gtk_stack_new();
   gtk_stack_set_homogeneous(GTK_STACK(self->widget), FALSE);
@@ -1372,6 +1584,7 @@ void gui_init(dt_iop_module_t *self)
 
   gtk_stack_add_named(GTK_STACK(self->widget), label_non_raw, "non_raw");
   gtk_stack_add_named(GTK_STACK(self->widget), box_raw, "raw");
+  DT_CONTROL_SIGNAL_HANDLE(DT_SIGNAL_DEVELOP_UI_PIPE_FINISHED, _check_autoradius);
 }
 
 // clang-format off
diff --git a/src/iop/demosaicing/capture.c b/src/iop/demosaicing/capture.c
new file mode 100644
index 000000000000..d23993676a0f
--- /dev/null
+++ b/src/iop/demosaicing/capture.c
@@ -0,0 +1,832 @@
+/*
+    This file is part of darktable,
+    Copyright (C) 2025 darktable developers.
+
+    darktable is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    darktable is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with darktable.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/* remarks:
+    Credits go to: Ingo Weyrich (heckflosse67@gmx.de), he implemented the original algorithm for rawtherapee.
+
+   1) - The gaussian convolution filters take the coeffs from precalculated data in gd->gauss_coeffs,
+        we have CHAR_MAX kernels with a sigma step of CAPTURE_GAUSS_FRACTION.
+      - The chosen kernel is selected per pixel via an index map, this is derived from cs_radius, cs_boost
+        and distance from image centre.
+      - using the index map improves performance and allows runtime modification of the used per pixel
+        gaussian sigma.
+      - Note: this is similar to the per-tile sigma in the RT implementation.
+   2) It's currently not planned to increase the maximum sigma so we can stay with the 9x9 kernels.
+   3) Reminders and possibly left to do:
+      - halo supprssion at very strong gradients?
+      - automatic noise detection or reduction?
+      - can we auto-stop? per pixel?
+      - Internal CPU code tiling for performance? tile size would be the same as for rcd
+   4) Notes
+      - If the demosaicer downscales effects are less visible so it seems safe to reduce the
+        number of iterartions for performance.
+*/
+
+#ifdef __GNUC__
+  #pragma GCC push_options
+  #pragma GCC optimize ("fast-math", "fp-contract=fast", "finite-math-only", "no-math-errno")
+#endif
+
+#define CAPTURE_KERNEL_ALIGN 32
+#define CAPTURE_BLEND_EPS 0.01f
+#define CAPTURE_GAUSS_FRACTION 0.01f
+#define CAPTURE_YMIN 0.001f
+#define CAPTURE_THRESHPOWER 0.15f
+#define CAPTURE_CFACLIP 0.9f
+
+static inline void _calc_9x9_gauss_coeffs(float *coeffs, const float sigma)
+{
+  float kernel[9][9];
+  const float range = 4.5f * 4.5f;
+  const float temp = -2.0f * sigma * sigma;
+  float sum = 0.0;
+  for(int k = -4; k < 5; k++)
+  {
+    for(int j = -4; j < 5; j++)
+    {
+      const float rad = (float)(k*k + j*j);
+      if(rad <= range)
+      {
+        kernel[k + 4][j + 4] = expf(rad / temp);
+        sum += kernel[k + 4][j + 4];
+      }
+      else
+        kernel[k + 4][j + 4] = 0.0f;
+    }
+  }
+
+  for(int k = 0; k < 5; k++)
+    for(int j = 0; j < 5; j++)
+      coeffs[5*k+j] = kernel[k+4][j+4] / sum;
+}
+
+// provide an index map so the convolution kernels can easily get the correct coeffs
+static unsigned char *_cs_precalc_gauss_idx(dt_iop_module_t *self,
+                                   const dt_iop_roi_t *const roi,
+                                   const float isigma,
+                                   const float boost)
+{
+  const dt_image_t *img = &self->dev->image_storage;
+  const int rwidth = img->p_width / 2;
+  const int rheight = img->p_height / 2;
+  const float mdim = MIN(rwidth, rheight);
+  const int width = roi->width;
+  const int height = roi->height;
+  const int dy = roi->y;
+  const int dx = roi->x;
+  unsigned char *table = dt_alloc_aligned((size_t)height * width);
+  if(!table) return NULL;
+
+  DT_OMP_FOR()
+  for(int row = 0; row < height; row++)
+  {
+    const float frow = row + dy - rheight;
+    for(int col = 0; col < width; col++)
+    {
+      const float fcol = col + dx - rwidth;
+      const float sc = sqrtf(frow * frow + fcol * fcol) / mdim;
+      const float corr = MAX(0.0f, sc - 0.5f);
+      float sigma = isigma + boost * powf(corr, 1.5f);
+
+      // special care for the image borders
+      if(col < 8)               sigma *= (float)col * 0.125f;
+      else if(row < 8)          sigma *= (float)row * 0.125f;
+      else if(col > width - 8)  sigma *= (float)(width - col) * 0.125f;
+      else if(row > height - 8) sigma *= (float)(height - row) * 0.125f;
+
+      const int kern = sigma / CAPTURE_GAUSS_FRACTION;
+      table[row * width + col] = CLAMP(kern, 0, UCHAR_MAX);
+    }
+  }
+  return table;
+}
+
+#define RAWEPS 0.005f
+static float _calcRadiusBayer(const float *in,
+                              const int width,
+                              const int height,
+                              const float lowerLimit,
+                              const float upperLimit,
+                              const uint32_t filters)
+{
+  const unsigned int fc[2] = {FC(0, 0, filters), FC(1, 0, filters)};
+  float maxRatio = 1.f;
+  DT_OMP_FOR(reduction(max: maxRatio))
+  for(int row = 4; row < height - 4; ++row)
+  {
+    for(int col = 5 + (fc[row & 1] & 1); col < width - 4; col += 2)
+    {
+      const float *cfa = in + row*width + col;
+      const float val00 = cfa[0];
+      if(val00 > RAWEPS)
+      {
+        const float val1m1 = cfa[width-1];
+        const float val1p1 = cfa[width+1];
+        const float maxVal0 = MAX(val00, val1m1);
+        if(val1m1 > RAWEPS && maxVal0 > lowerLimit)
+        {
+          const float minVal = MIN(val00, val1m1);
+          if(maxVal0 > maxRatio * minVal)
+          {
+            gboolean clipped = FALSE;
+            if(maxVal0 == val00)
+            { // check for influence by clipped green in neighborhood
+              if(MAX(MAX(cfa[-width-1], cfa[-width+1]), val1p1) >= upperLimit)
+                clipped = TRUE;
+            }
+            else
+            { // check for influence by clipped green in neighborhood
+              if(MAX(MAX(MAX(cfa[-2], val00), cfa[2*width-2]), cfa[2*width]) >= upperLimit)
+                clipped = TRUE;
+            }
+            if(!clipped)
+              maxRatio = maxVal0 / minVal;
+          }
+        }
+
+        const float maxVal1 = MAX(val00, val1p1);
+        if(val1p1 > RAWEPS && maxVal1 > lowerLimit)
+        {
+          const float minVal = MIN(val00, val1p1);
+          if(maxVal1 > maxRatio * minVal)
+          {
+            if(maxVal1 == val00)
+            { // check for influence by clipped green in neighborhood
+              if(MAX(MAX(cfa[-width-1], cfa[-width+1]), val1p1) >= upperLimit)
+                continue;
+             }
+            else
+            { // check for influence by clipped green in neighborhood
+              if(MAX(MAX(MAX(val00, cfa[2]), cfa[2*width]), cfa[2*width+2]) >= upperLimit)
+                continue;
+             }
+            maxRatio = maxVal1 / minVal;
+          }
+        }
+      }
+    }
+  }
+  return sqrtf(1.0f / logf(maxRatio));
+}
+
+static float _calcRadiusXtrans(const float *in,
+                               const float lowerLimit,
+                               const float upperLimit,
+                               const dt_iop_roi_t *const roi,
+                               const uint8_t(*const xtrans)[6])
+{
+  const int width = roi->width;
+  const int height = roi->height;
+
+  int startx, starty;
+  gboolean found = FALSE;
+  for(starty = 6; starty < 12 && !found; starty++)
+  {
+    for(startx = 6; startx < 12 && !found; startx++)
+    {
+      if(FCxtrans(starty, startx, roi, xtrans) == 1)
+      {
+        if(FCxtrans(starty, startx - 1, roi, xtrans) != FCxtrans(starty, startx + 1, roi, xtrans))
+        {
+          if(FCxtrans(starty -1, startx, roi, xtrans) != 1)
+          {
+            if(FCxtrans(starty, startx -1, roi, xtrans) != 1)
+            {
+              found = TRUE;
+              break;
+            }
+          }
+        }
+      }
+    }
+  }
+
+  float maxRatio = 1.0f;
+  DT_OMP_FOR(reduction(max: maxRatio))
+  for(int row = starty + 2; row < height - 4; row += 3)
+  {
+    for(int col = startx + 2; col < width - 4; col += 3)
+    {
+      const float *cfa = in + row*width + col;
+      const float valp1p1 = cfa[width+1];
+      const gboolean squareClipped = MAX(MAX(MAX(valp1p1, cfa[width+2]), cfa[2*width+1]), cfa[2*width+2]) >= upperLimit;
+      const float greenSolitary = cfa[0];
+      if(greenSolitary > RAWEPS && MAX(cfa[-width-1], cfa[-width+1]) < upperLimit)
+      {
+        if(greenSolitary < upperLimit)
+        {
+          const float valp1m1 = cfa[width-1];
+          if(valp1m1 > RAWEPS && MAX(MAX(MAX(cfa[width-2], valp1m1), cfa[2*width-2]), cfa[width-1]) < upperLimit)
+          {
+            const float maxVal = MAX(greenSolitary, valp1m1);
+            if(maxVal > lowerLimit)
+            {
+              const float minVal = MIN(greenSolitary, valp1m1);
+              if(maxVal > maxRatio * minVal)
+                maxRatio = maxVal / minVal;
+            }
+          }
+          if(valp1p1 > RAWEPS && !squareClipped)
+          {
+            const float maxVal = MAX(greenSolitary, valp1p1);
+            if(maxVal > lowerLimit)
+            {
+              const float minVal = MIN(greenSolitary, valp1p1);
+              if(maxVal > maxRatio * minVal)
+                maxRatio = maxVal / minVal;
+            }
+          }
+        }
+      }
+
+      if(!squareClipped)
+      {
+        const float valp2p2 = cfa[2*width+2];
+        if(valp2p2 > RAWEPS)
+        {
+          if(valp1p1 > RAWEPS)
+          {
+            const float maxVal = MAX(valp1p1, valp2p2);
+            if(maxVal > lowerLimit)
+            {
+              const float minVal = MIN(valp1p1, valp2p2);
+              if(maxVal > maxRatio * minVal)
+                 maxRatio = maxVal / minVal;
+            }
+          }
+          const float greenSolitaryRight = cfa[3*width+3];
+          if(MAX(MAX(greenSolitaryRight, cfa[4*width+2]), cfa[4*width+4]) < upperLimit)
+          {
+            if(greenSolitaryRight > RAWEPS)
+            {
+              const float maxVal = MAX(greenSolitaryRight, valp2p2);
+              if(maxVal > lowerLimit)
+              {
+                const float minVal = MIN(greenSolitaryRight, valp2p2);
+                if(maxVal > maxRatio * minVal)
+                  maxRatio = maxVal / minVal;
+              }
+            }
+          }
+        }
+        const float valp1p2 = cfa[width+2];
+        const float valp2p1 = cfa[2*width+1];
+        if(valp2p1 > RAWEPS)
+        {
+          if(valp1p2 > RAWEPS)
+          {
+            const float maxVal = MAX(valp1p2, valp2p1);
+            if(maxVal > lowerLimit)
+            {
+              const float minVal = MIN(valp1p2, valp2p1);
+              if(maxVal > maxRatio * minVal)
+                maxRatio = maxVal / minVal;
+            }
+          }
+          const float greenSolitaryLeft = cfa[3*width];
+          if(MAX(MAX(greenSolitaryLeft, cfa[4*width-1]), cfa[4*width+1]) < upperLimit)
+          {
+            if(greenSolitaryLeft > RAWEPS)
+            {
+              const float maxVal = MAX(greenSolitaryLeft, valp2p1);
+              if(maxVal > lowerLimit)
+              {
+                const float minVal = MIN(greenSolitaryLeft, valp2p1);
+                if(maxVal > maxRatio * minVal)
+                  maxRatio = maxVal / minVal;
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  return sqrtf(1.0f / logf(maxRatio));
+}
+#undef RAWEPS
+
+DT_OMP_DECLARE_SIMD(aligned(in, out, kernels:64))
+static inline void _blur_mul(const float *const in,
+                             float *out,
+                             const float *blend,
+                             const float *const kernels,
+                             const unsigned char *const table,
+                             const int w1,
+                             const int height)
+{
+  const int w2 = 2 * w1;
+  const int w3 = 3 * w1;
+  const int w4 = 4 * w1;
+
+  DT_OMP_FOR()
+  for(int row = 0; row < height; row++)
+  {
+    for(int col = 0; col < w1; col++)
+    {
+      const size_t i = (size_t)row * w1 + col;
+      if(blend[i] > CAPTURE_BLEND_EPS)
+      {
+        const float *kern = kernels + CAPTURE_KERNEL_ALIGN * table[i];
+        float val = 0.0f;
+        if(col >= 4 && row >= 4 && col < w1 - 4 && row < height - 4)
+        {
+          const float *d = in + i;
+          val =
+              kern[10+4] * (d[-w4-2] + d[-w4+2] + d[-w2-4] + d[-w2+4] + d[w2-4] + d[w2+4] + d[w4-2] + d[w4+2]) +
+              kern[5 +4] * (d[-w4-1] + d[-w4+1] + d[-w1-4] + d[-w1+4] + d[w1-4] + d[w1+4] + d[w4-1] + d[w4+1]) +
+              kern[4]    * (d[-w4  ] + d[   -4] + d[    4] + d[ w4  ]) +
+              kern[15+3] * (d[-w3-3] + d[-w3+3] + d[ w3-3] + d[ w3+3]) +
+              kern[10+3] * (d[-w3-2] + d[-w3+2] + d[-w2-3] + d[-w2+3] + d[w2-3] + d[w2+3] + d[w3-2] + d[w3+2]) +
+              kern[ 5+3] * (d[-w3-1] + d[-w3+1] + d[-w1-3] + d[-w1+3] + d[w1-3] + d[w1+3] + d[w3-1] + d[w3+1]) +
+              kern[   3] * (d[-w3  ] + d[   -3] + d[    3] + d[ w3  ]) +
+              kern[10+2] * (d[-w2-2] + d[-w2+2] + d[ w2-2] + d[ w2+2]) +
+              kern[ 5+2] * (d[-w2-1] + d[-w2+1] + d[-w1-2] + d[-w1+2] + d[w1-2] + d[w1+2] + d[w2-1] + d[w2+1]) +
+              kern[   2] * (d[-w2  ] + d[   -2] + d[    2] + d[ w2  ]) +
+              kern[ 5+1] * (d[-w1-1] + d[-w1+1] + d[ w1-1] + d[ w1+1]) +
+              kern[   1] * (d[-w1  ] + d[   -1] + d[    1] + d[ w1  ]) +
+              kern[   0] * (d[0]);
+        }
+        else
+        {
+          for(int ir = -4; ir <= 4; ir++)
+          {
+            const int irow = row+ir;
+            if(irow >= 0 && irow < height)
+            {
+              for(int ic = -4; ic <= 4; ic++)
+              {
+                const int icol = col+ic;
+                if(icol >=0 && icol < w1)
+                  val += kern[5 * ABS(ir) + ABS(ic)] * in[(size_t)irow * w1 + icol];
+              }
+            }
+          }
+        }
+        out[i] *= val;
+      }
+      // if blend value is too low we don't have to copy data as we also didn't in _blur_div
+      // and we just keep the original
+    }
+  }
+}
+
+DT_OMP_DECLARE_SIMD(aligned(in, out, divbuff, kernels :64))
+static inline void _blur_div(const float *const in,
+                             float *out,
+                             const float *const divbuff,
+                             const float *blend,
+                             const float *const kernels,
+                             const unsigned char *const table,
+                             const int w1,
+                             const int height)
+{
+  const int w2 = 2 * w1;
+  const int w3 = 3 * w1;
+  const int w4 = 4 * w1;
+
+  DT_OMP_FOR()
+  for(int row = 0; row < height; row++)
+  {
+    for(int col = 0; col < w1; col++)
+    {
+      const size_t i = (size_t)row * w1 + col;
+      if(blend[i] > CAPTURE_BLEND_EPS)
+      {
+        const float *kern = kernels + CAPTURE_KERNEL_ALIGN * table[i];
+        float val = 0.0f;
+        if(col >= 4 && row >= 4 && col < w1 - 4 && row < height - 4)
+        {
+          const float *d = in + i;
+          val =
+              kern[10+4] * (d[-w4-2] + d[-w4+2] + d[-w2-4] + d[-w2+4] + d[w2-4] + d[w2+4] + d[w4-2] + d[w4+2]) +
+              kern[5 +4] * (d[-w4-1] + d[-w4+1] + d[-w1-4] + d[-w1+4] + d[w1-4] + d[w1+4] + d[w4-1] + d[w4+1]) +
+              kern[4]    * (d[-w4  ] + d[   -4] + d[    4] + d[ w4  ]) +
+              kern[15+3] * (d[-w3-3] + d[-w3+3] + d[ w3-3] + d[ w3+3]) +
+              kern[10+3] * (d[-w3-2] + d[-w3+2] + d[-w2-3] + d[-w2+3] + d[w2-3] + d[w2+3] + d[w3-2] + d[w3+2]) +
+              kern[ 5+3] * (d[-w3-1] + d[-w3+1] + d[-w1-3] + d[-w1+3] + d[w1-3] + d[w1+3] + d[w3-1] + d[w3+1]) +
+              kern[   3] * (d[-w3  ] + d[   -3] + d[    3] + d[ w3  ]) +
+              kern[10+2] * (d[-w2-2] + d[-w2+2] + d[ w2-2] + d[ w2+2]) +
+              kern[ 5+2] * (d[-w2-1] + d[-w2+1] + d[-w1-2] + d[-w1+2] + d[w1-2] + d[w1+2] + d[w2-1] + d[w2+1]) +
+              kern[   2] * (d[-w2  ] + d[   -2] + d[    2] + d[ w2  ]) +
+              kern[ 5+1] * (d[-w1-1] + d[-w1+1] + d[ w1-1] + d[ w1+1]) +
+              kern[   1] * (d[-w1  ] + d[   -1] + d[    1] + d[ w1  ]) +
+              kern[   0] * (d[0]);
+        }
+        else
+        {
+          for(int ir = -4; ir <= 4; ir++)
+          {
+            const int irow = row+ir;
+            if(irow >= 0 && irow < height)
+            {
+              for(int ic = -4; ic <= 4; ic++)
+              {
+                const int icol = col+ic;
+                if(icol >=0 && icol < w1)
+                  val += kern[5 * ABS(ir) + ABS(ic)] * in[(size_t)irow * w1 + icol];
+              }
+            }
+          }
+        }
+        out[i] = divbuff[i] / MAX(val, 0.00001f);
+      }
+    }
+  }
+}
+
+static void _prepare_blend(const float *cfa,
+                           const float *rgb,
+                           const uint32_t filters,
+                           const uint8_t (*const xtrans)[6],
+                           const dt_iop_roi_t *const roi,
+                           float *mask,
+                           float *Yold,
+                           const float *whites,
+                           const int w1,
+                           const int height)
+{
+  dt_iop_image_fill(mask, 1.0f, w1, height, 1);
+  const int w2 = 2 * w1;
+  DT_OMP_FOR(collapse(2))
+  for(size_t row = 0; row < height; row++)
+  {
+    for(size_t col = 0; col < w1; col++)
+    {
+      const size_t k = row * w1 + col;
+      // Photometric/digital ITU BT.709
+      const float Y = MAX(0.0f, 0.2126f*rgb[k*4] + 0.7152f*rgb[k*4+1] + 0.0722f*rgb[k*4+2]);
+      Yold[k] = Y;
+      if(row > 1 && col > 1 && row < height-2 && col < w1-2)
+      {
+        const int color = (filters == 9u) ? FCxtrans(row, col, roi, xtrans) : FC(row, col, filters);
+        if(cfa[k] > whites[color] || Y < CAPTURE_YMIN)
+        {
+          mask[k-w2-1] = mask[k-w2]   = mask[k-w2+1] =
+          mask[k-w1-2] = mask[k-w1-1] = mask[k-w1]   = mask[k-w1+1] = mask[k-w1+2] =
+          mask[k-2]    = mask[k-1]    = mask[k]      = mask[k+1]    = mask[k+2] =
+          mask[k+w1-2] = mask[k+w1-1] = mask[k+w1]   = mask[k+w1+1] = mask[k+w1+2] =
+          mask[k+w2-1] = mask[k+w2]   = mask[k+w2+1] = 0.0f;
+        }
+      }
+      else
+        mask[k] = 0.0f;
+    }
+  }
+}
+
+static void _modify_blend(float *blend,
+                          float *Yold,
+                          float *luminance,
+                          const float threshold,
+                          const int width,
+                          const int height)
+{
+  DT_OMP_FOR()
+  for(int irow = 0; irow < height; irow++)
+  {
+    const int row = CLAMP(irow, 2, height-3);
+    for(int icol = 0; icol < width; icol++)
+    {
+      const int col = CLAMP(icol, 2, width-3);
+      const size_t k = (size_t)irow * width + icol;
+      float av = 0.0f;
+      for(int y = row-1; y < row+2; y++)
+      {
+        for(int x = col-2; x < col+3; x++)
+          av += Yold[(size_t)y*width + x];
+      }
+      for(int x = col-1; x < col+2; x++)
+      {
+        av += Yold[(size_t)(row-2)*width + x];
+        av += Yold[(size_t)(row+2)*width + x];
+      }
+      av /= 21.0f;
+
+      float sv = 0.0f;
+      for(int y = row-1; y < row+2; y++)
+      {
+        for(int x = col-2; x < col+3; x++)
+          sv += sqrf(Yold[(size_t)y*width + x] - av);
+      }
+      for(int x = col-1; x < col+2; x++)
+      {
+        sv += sqrf(Yold[(size_t)(row-2)*width + x] - av);
+        sv += sqrf(Yold[(size_t)(row+2)*width + x] - av);
+      }
+      sv = powf(MAX(0.0f, 5.0f * sqrtf(sv / 21.f) - threshold), CAPTURE_THRESHPOWER);
+      blend[k] *= CLIP(sv);
+      luminance[k] = Yold[k];
+    }
+  }
+}
+
+static inline float _get_threshold(const float threshold)
+{
+  return 0.06f * sqrf(threshold);
+}
+
+void _capture_sharpen(dt_iop_module_t *self,
+                      dt_dev_pixelpipe_iop_t *piece,
+                      float *in,
+                      float *out,
+                      const dt_iop_roi_t *const roi,
+                      const gboolean showmask)
+{
+  dt_dev_pixelpipe_t *pipe = piece->pipe;
+
+  const size_t width = roi->width;
+  const size_t height = roi->height;
+  const size_t pixels = width * height;
+  const dt_iop_demosaic_data_t *d = piece->data;
+  const dt_iop_demosaic_global_data_t *gd = self->global_data;
+  dt_iop_demosaic_gui_data_t *g = self->gui_data;
+
+  if(pipe->type & DT_DEV_PIXELPIPE_THUMBNAIL)
+  {
+    const gboolean hqthumb = _get_thumb_quality(pipe->final_width, pipe->final_height);
+    if(!hqthumb) return;
+  }
+
+  const int iterations = powf((float)d->cs_strength, 1.3f);
+  if(iterations < 1 && !showmask) return;
+
+  const uint8_t(*const xtrans)[6] = (const uint8_t(*const)[6])pipe->dsc.xtrans;
+  const uint32_t filters = pipe->dsc.filters;
+  const dt_iop_buffer_dsc_t *dsc = &pipe->dsc;
+  const gboolean wbon = dsc->temperature.enabled;
+  const dt_aligned_pixel_t icoeffs = { wbon ? CAPTURE_CFACLIP * dsc->temperature.coeffs[0] : CAPTURE_CFACLIP,
+                                       wbon ? CAPTURE_CFACLIP * dsc->temperature.coeffs[1] : CAPTURE_CFACLIP,
+                                       wbon ? CAPTURE_CFACLIP * dsc->temperature.coeffs[2] : CAPTURE_CFACLIP,
+                                       0.0f };
+  const gboolean fullpipe = pipe->type & DT_DEV_PIXELPIPE_FULL;
+  const gboolean autoradius = fullpipe && g && g->autoradius;
+  const float old_radius = d->cs_radius;
+  float radius = old_radius;
+  if(autoradius || radius < 0.005f)
+  {
+    radius = filters != 9u
+              ? _calcRadiusBayer(in, width, height, 0.01f, 1.0f, filters)
+              : _calcRadiusXtrans(in, 0.01f, 1.0f, roi, xtrans);
+
+    dt_print_pipe(DT_DEBUG_PIPE, filters != 9u ? "bayer autoradius" : "xtrans autoradius",
+      pipe, self, DT_DEVICE_CPU, roi, NULL, "autoradius=%.2f", radius);
+
+    if(!feqf(radius, old_radius, 0.002f))
+    {
+      if(fullpipe)
+      {
+        if(g)
+        {
+          dt_control_log(_("calculated radius: %.2f"), radius);
+          g->autoradius = TRUE;
+        }
+        dt_iop_demosaic_params_t *p = self->params;
+        p->cs_radius = radius;
+      }
+    }
+    else if(g) g->autoradius = FALSE;
+  }
+
+  unsigned char *gauss_idx = NULL;
+  gboolean error = TRUE;
+
+  float *luminance = dt_alloc_align_float(pixels);
+  float *tmp2 = dt_alloc_align_float(pixels);
+  float *tmp1 = dt_alloc_align_float(pixels);
+  float *blendmask = dt_alloc_align_float(pixels);
+  if(!luminance || !tmp2 || !tmp1 || !blendmask)
+    goto finalize;
+
+  const float threshold = _get_threshold(d->cs_thrs);
+
+  // tmp2 will hold the temporary clipmask, tmp1 holds Y data
+  _prepare_blend(in, out, filters, xtrans, roi, tmp2, tmp1, icoeffs, width, height);
+  // modify clipmask in tmp2 according to Y variance, also write L to luminance
+  _modify_blend(tmp2, tmp1, luminance, threshold, width, height);
+  dt_gaussian_fast_blur(tmp2, blendmask, width, height, 2.0f, 0.0f, 1.0f, 1);
+
+  if(showmask)
+  {
+    DT_OMP_FOR()
+    for(size_t k = 0; k < pixels*4; k +=4)
+    {
+      const float blend = blendmask[k/4];
+      out[k+3] = blend < CAPTURE_BLEND_EPS ? 0.0f : blend;
+    }
+    error = FALSE;
+    goto finalize;
+  }
+
+  gauss_idx = _cs_precalc_gauss_idx(self, roi, radius, d->cs_boost);
+  if(!gauss_idx) goto finalize;
+
+  for(int iter = 0; iter < iterations && !dt_pipe_shutdown(pipe); iter++)
+  {
+    _blur_div(tmp1, tmp2, luminance, blendmask, gd->gauss_coeffs, gauss_idx, width, height);
+    _blur_mul(tmp2, tmp1, blendmask, gd->gauss_coeffs, gauss_idx, width, height);
+  }
+
+  DT_OMP_FOR()
+  for(size_t k = 0; k < (size_t)width * height; k++)
+  {
+    if(blendmask[k] > CAPTURE_BLEND_EPS)
+    {
+      const float luminance_new = interpolatef(blendmask[k], tmp1[k], luminance[k]);
+      const float factor = luminance_new / MAX(luminance[k], 0.00001f);
+      for_three_channels(c) out[k*4+c] *= factor;
+    }
+  }
+
+  error = FALSE;
+
+  finalize:
+  if(error)
+    dt_print_pipe(DT_DEBUG_ALWAYS, "capture sharpen failed", pipe, self, DT_DEVICE_CPU, NULL, NULL,
+      "unable to allocate memory");
+
+  dt_free_align(gauss_idx);
+  dt_free_align(tmp2);
+  dt_free_align(tmp1);
+  dt_free_align(luminance);
+  dt_free_align(blendmask);
+}
+
+// revert aggressive optimizing
+#ifdef __GNUC__
+  #pragma GCC pop_options
+#endif
+
+#if HAVE_OPENCL
+
+int _capture_sharpen_cl(dt_iop_module_t *self,
+                        dt_dev_pixelpipe_iop_t *piece,
+                        cl_mem dev_in,
+                        cl_mem dev_out,
+                        const dt_iop_roi_t *const roi,
+                        const gboolean showmask)
+{
+  dt_dev_pixelpipe_t *pipe = piece->pipe;
+
+  const int width = roi->width;
+  const int height = roi->height;
+  const int bsize = sizeof(float) * width * height;
+  const int devid = piece->pipe->devid;
+
+  const dt_iop_demosaic_data_t *d = piece->data;
+  dt_iop_demosaic_global_data_t *const gd = self->global_data;
+  dt_iop_demosaic_gui_data_t *g = self->gui_data;
+
+  if(pipe->type & DT_DEV_PIXELPIPE_THUMBNAIL)
+  {
+    const gboolean hqthumb = _get_thumb_quality(pipe->final_width, pipe->final_height);
+    if(!hqthumb) return CL_SUCCESS;
+  }
+
+  const int iterations = powf((float)d->cs_strength, 1.3f);
+  if(iterations < 1 && !showmask) return CL_SUCCESS;
+
+  const uint32_t filters = pipe->dsc.filters;
+  const dt_iop_buffer_dsc_t *dsc = &pipe->dsc;
+  const gboolean wbon = dsc->temperature.enabled;
+  dt_aligned_pixel_t icoeffs = { wbon ? CAPTURE_CFACLIP * dsc->temperature.coeffs[0] : CAPTURE_CFACLIP,
+                                 wbon ? CAPTURE_CFACLIP * dsc->temperature.coeffs[1] : CAPTURE_CFACLIP,
+                                 wbon ? CAPTURE_CFACLIP * dsc->temperature.coeffs[2] : CAPTURE_CFACLIP,
+                                 0.0f };
+
+  const gboolean fullpipe = pipe->type & DT_DEV_PIXELPIPE_FULL;
+  const gboolean autoradius = fullpipe && g && g->autoradius;
+  const float old_radius = d->cs_radius;
+  float radius = old_radius;
+  if(autoradius || radius < 0.005f)
+  {
+    float *in = dt_alloc_align_float((size_t)width * height);
+    if(in)
+    {
+      if(dt_opencl_copy_device_to_host(devid, in, dev_in, width, height, sizeof(float)) == CL_SUCCESS)
+      {
+        radius = filters != 9u
+                ? _calcRadiusBayer(in, width, height, 0.01f, 1.0f, filters)
+                : _calcRadiusXtrans(in, 0.01f, 1.0f, roi, (const uint8_t(*const)[6])pipe->dsc.xtrans);
+        dt_print_pipe(DT_DEBUG_PIPE, filters != 9u ? "bayer autoradius" : "xtrans autoradius",
+            pipe, self, devid, roi, NULL, "autoradius=%.2f", radius);
+
+        if(!feqf(radius, old_radius, 0.002f))
+        {
+          if(fullpipe)
+          {
+            if(g)
+            {
+              dt_control_log(_("calculated radius: %.2f"), radius);
+              g->autoradius = TRUE;
+            }
+            dt_iop_demosaic_params_t *p = self->params;
+            p->cs_radius = radius;
+          }
+        }
+        else if(g) g->autoradius = FALSE;
+      }
+      dt_free_align(in);
+    }
+  }
+
+  const float threshold = _get_threshold(d->cs_thrs);
+
+  cl_mem gcoeffs = NULL;
+  cl_mem gauss_idx = NULL;
+
+  cl_int err = CL_MEM_OBJECT_ALLOCATION_FAILURE;
+  cl_mem blendmask = dt_opencl_alloc_device_buffer(devid, bsize);
+  cl_mem luminance = dt_opencl_alloc_device_buffer(devid, bsize);
+  cl_mem tmp2 = dt_opencl_alloc_device_buffer(devid, bsize);
+  cl_mem tmp1 = dt_opencl_alloc_device_buffer(devid, bsize);
+  cl_mem xtrans = dt_opencl_copy_host_to_device_constant(devid, sizeof(pipe->dsc.xtrans), pipe->dsc.xtrans);
+  cl_mem whites = dt_opencl_copy_host_to_device_constant(devid, 4 * sizeof(float), icoeffs);
+  cl_mem dev_rgb = dt_opencl_duplicate_image(devid, dev_out);
+
+  if(!blendmask || !luminance || !tmp2 || !tmp1 || !xtrans || !whites || !dev_rgb) goto finish;
+
+  err = dt_opencl_enqueue_kernel_2d_args(devid, gd->prefill_clip_mask, width, height,
+          CLARG(tmp2), CLARG(width), CLARG(height));
+  if(err != CL_SUCCESS) goto finish;
+
+  err = dt_opencl_enqueue_kernel_2d_args(devid, gd->prepare_blend, width, height,
+          CLARG(dev_in), CLARG(dev_out), CLARG(filters), CLARG(xtrans), CLARG(tmp2), CLARG(tmp1),
+          CLARG(whites), CLARG(width), CLARG(height));
+  if(err != CL_SUCCESS) goto finish;
+
+  err = dt_opencl_enqueue_kernel_2d_args(devid, gd->modify_blend, width, height,
+          CLARG(tmp2), CLARG(tmp1), CLARG(luminance), CLARG(threshold), CLARG(width), CLARG(height));
+  if(err != CL_SUCCESS) goto finish;
+
+  err = dt_gaussian_fast_blur_cl_buffer(devid, tmp2, blendmask, width, height, 2.0f, 1, 0.0f, 1.0f);
+  if(err != CL_SUCCESS) goto finish;
+
+  if(showmask)
+  {
+    err = dt_opencl_enqueue_kernel_2d_args(devid, gd->show_blend_mask, width, height,
+          CLARG(dev_rgb), CLARG(dev_out), CLARG(blendmask), CLARG(width), CLARG(height));
+    goto finish;
+  }
+
+  unsigned char *f_gauss_idx = _cs_precalc_gauss_idx(self, roi, radius, d->cs_boost);
+  if(f_gauss_idx)
+  {
+    gcoeffs = dt_opencl_copy_host_to_device_constant(devid, sizeof(float) * (UCHAR_MAX+1) * CAPTURE_KERNEL_ALIGN, gd->gauss_coeffs);
+    gauss_idx = dt_opencl_copy_host_to_device_constant(devid, sizeof(unsigned char) * height * width, f_gauss_idx);
+  }
+  dt_free_align(f_gauss_idx);
+
+  err = CL_MEM_OBJECT_ALLOCATION_FAILURE;
+  if(!gcoeffs || !gauss_idx) goto finish;
+
+  for(int iter = 0; iter < iterations && !dt_pipe_shutdown(pipe); iter++)
+  {
+    err = dt_opencl_enqueue_kernel_2d_args(devid, gd->gaussian_9x9_div, width, height,
+      CLARG(tmp1), CLARG(tmp2), CLARG(luminance), CLARG(blendmask),
+      CLARG(gcoeffs), CLARG(gauss_idx), CLARG(width), CLARG(height));
+    if(err != CL_SUCCESS) goto finish;
+
+    err = dt_opencl_enqueue_kernel_2d_args(devid, gd->gaussian_9x9_mul, width, height,
+      CLARG(tmp2), CLARG(tmp1), CLARG(blendmask),
+      CLARG(gcoeffs), CLARG(gauss_idx), CLARG(width), CLARG(height));
+    if(err != CL_SUCCESS) goto finish;
+  }
+
+  err = dt_opencl_enqueue_kernel_2d_args(devid, gd->capture_result, width, height,
+    CLARG(dev_rgb), CLARG(dev_out), CLARG(blendmask), CLARG(luminance), CLARG(tmp1),
+    CLARG(width), CLARG(height));
+
+  finish:
+  if(err != CL_SUCCESS)
+    dt_print_pipe(DT_DEBUG_ALWAYS, "capture sharpen failed",
+      pipe, self, devid, NULL, NULL,
+      "Error: %s", cl_errstr(err));
+
+  dt_opencl_release_mem_object(gauss_idx);
+  dt_opencl_release_mem_object(gcoeffs);
+  dt_opencl_release_mem_object(blendmask);
+  dt_opencl_release_mem_object(dev_rgb);
+  dt_opencl_release_mem_object(tmp2);
+  dt_opencl_release_mem_object(tmp1);
+  dt_opencl_release_mem_object(luminance);
+  dt_opencl_release_mem_object(xtrans);
+  dt_opencl_release_mem_object(whites);
+
+  return err;
+}
+#endif // OpenCL

From 0859bb47c01799d373c594b9ff80eb16c0daa41a Mon Sep 17 00:00:00 2001
From: Hanno Schwalm <hanno@schwalm-bremen.de>
Date: Thu, 19 Jun 2025 16:35:31 +0200
Subject: [PATCH 2/3] Some demoasicer maintenance

1. Remove code that hinted we could snap to other locations than upper/left corner of sensor pattern.
   Not a good idea at all due to what we provide in rawprepare.
2. Remove special snapper cases for passthru modes to avoid exposing jumps when changing demosaicers
   and it's simply not worth trying to do so.
3. Amaze tiling should happen with a slightly larger overlap
4. As we demosaic at a snap position in all cases the color smoothing and green averaging for bayer
   sensors don't require roi x/yshifts.
5. If we use the VNG demosaicer for some good reason like maze patterns we will very likely want good
   quality output as from all other demosaicers for details mask or capture sharpening so let's always
   use the "quality phase". This is **not** required / wanted for
   a) mask passthru mode
   b) low-frequency content of dual demosaicers
6. Avoid one copy cl buffer step for vng linear only
---
 data/kernels/demosaic_ppg.cl | 39 ++++++++++++++++++---------
 src/iop/demosaic.c           | 52 +++++++-----------------------------
 src/iop/demosaicing/basics.c | 20 ++++++--------
 src/iop/demosaicing/vng.c    | 24 +++++------------
 4 files changed, 50 insertions(+), 85 deletions(-)

diff --git a/data/kernels/demosaic_ppg.cl b/data/kernels/demosaic_ppg.cl
index 8498700782a9..0c499f5d626e 100644
--- a/data/kernels/demosaic_ppg.cl
+++ b/data/kernels/demosaic_ppg.cl
@@ -36,8 +36,13 @@ backtransformf (float2 p, const int r_x, const int r_y, const int r_wd, const in
 }
 
 kernel void
-green_equilibration_lavg(read_only image2d_t in, write_only image2d_t out, const int width, const int height, const unsigned int filters,
-                         const int r_x, const int r_y, const float thr, local float *buffer)
+green_equilibration_lavg(read_only image2d_t in,
+                         write_only image2d_t out,
+                         const int width,
+                         const int height,
+                         const unsigned int filters,
+                         const float thr,
+                         local float *buffer)
 {
   const int x = get_global_id(0);
   const int y = get_global_id(1);
@@ -79,11 +84,11 @@ green_equilibration_lavg(read_only image2d_t in, write_only image2d_t out, const
 
   if(x >= width || y >= height) return;
 
-  const int c = FC(y + r_y, x + r_x, filters);
+  const int c = FC(y, x, filters);
   const float maximum = 1.0f;
   float o = buffer[0];
 
-  if(c == 1 && ((y + r_y) & 1))
+  if(c == 1 && (y & 1))
   {
     const float o1_1 = buffer[-1 * stride - 1];
     const float o1_2 = buffer[-1 * stride + 1];
@@ -112,8 +117,12 @@ green_equilibration_lavg(read_only image2d_t in, write_only image2d_t out, const
 
 
 kernel void
-green_equilibration_favg_reduce_first(read_only image2d_t in, const int width, const int height,
-                                      global float2 *accu, const unsigned int filters, const int r_x, const int r_y, local float2 *buffer)
+green_equilibration_favg_reduce_first(read_only image2d_t in,
+                                      const int width,
+                                      const int height,
+                                      global float2 *accu,
+                                      const unsigned int filters,
+                                      local float2 *buffer)
 {
   const int x = get_global_id(0);
   const int y = get_global_id(1);
@@ -124,11 +133,11 @@ green_equilibration_favg_reduce_first(read_only image2d_t in, const int width, c
 
   const int l = mad24(ylid, xlsz, xlid);
 
-  const int c = FC(y + r_y, x + r_x, filters);
+  const int c = FC(y, x, filters);
 
   const int isinimage = (x < 2 * (width / 2) && y < 2 * (height / 2));
-  const int isgreen1 = (c == 1 && !((y + r_y) & 1));
-  const int isgreen2 = (c == 1 && ((y + r_y) & 1));
+  const int isgreen1 = (c == 1 && !(y & 1));
+  const int isgreen2 = (c == 1 && (y & 1));
 
   float pixel = read_imagef(in, sampleri, (int2)(x, y)).x;
 
@@ -194,8 +203,12 @@ green_equilibration_favg_reduce_second(const global float2* input, global float2
 
 
 kernel void
-green_equilibration_favg_apply(read_only image2d_t in, write_only image2d_t out, const int width, const int height, const unsigned int filters,
-                               const int r_x, const int r_y, const float gr_ratio)
+green_equilibration_favg_apply(read_only image2d_t in,
+                               write_only image2d_t out,
+                               const int width,
+                               const int height,
+                               const unsigned int filters,
+                               const float gr_ratio)
 {
   const int x = get_global_id(0);
   const int y = get_global_id(1);
@@ -204,9 +217,9 @@ green_equilibration_favg_apply(read_only image2d_t in, write_only image2d_t out,
 
   float pixel = read_imagef(in, sampleri, (int2)(x, y)).x;
 
-  const int c = FC(y + r_y, x + r_x, filters);
+  const int c = FC(y, x, filters);
 
-  const int isgreen1 = (c == 1 && !((y + r_y) & 1));
+  const int isgreen1 = (c == 1 && !(y & 1));
 
   pixel *= (isgreen1 ? gr_ratio : 1.0f);
 
diff --git a/src/iop/demosaic.c b/src/iop/demosaic.c
index 8ffe68d4f4e7..ccd881dfcbc0 100644
--- a/src/iop/demosaic.c
+++ b/src/iop/demosaic.c
@@ -95,7 +95,6 @@ typedef enum dt_iop_demosaic_qual_flags_t
   // or third scale interpolation instead
   DT_DEMOSAIC_DEFAULT                 = 0,
   DT_DEMOSAIC_FULL_SCALE              = 1 << 0,
-  DT_DEMOSAIC_ONLY_VNG_LINEAR         = 1 << 1,
 } dt_iop_demosaic_qual_flags_t;
 
 typedef enum dt_iop_demosaic_smooth_t
@@ -281,12 +280,6 @@ static dt_iop_demosaic_qual_flags_t demosaic_qual_flags(const dt_dev_pixelpipe_i
   if(img->flags & DT_IMAGE_4BAYER)
     flags |= DT_DEMOSAIC_FULL_SCALE;
 
-  // we check if we can stop at the linear interpolation step in VNG
-  // instead of going the full way
-  if(((flags & DT_DEMOSAIC_FULL_SCALE) && (roi_out->scale < (is_xtrans ? 0.5f : 0.667f)))
-    || piece->pipe->mask_display == DT_DEV_PIXELPIPE_DISPLAY_PASSTHRU)
-    flags |= DT_DEMOSAIC_ONLY_VNG_LINEAR;
-
   return flags;
 }
 
@@ -498,37 +491,10 @@ void modify_roi_in(dt_iop_module_t *self,
   roi_in->height /= roi_out->scale;
   roi_in->scale = 1.0f;
 
-  dt_iop_demosaic_data_t *d = piece->data;
-  const dt_iop_demosaic_method_t method = d->demosaicing_method;
-  const gboolean passthrough = method == DT_IOP_DEMOSAIC_PASSTHROUGH_MONOCHROME ||
-                               method == DT_IOP_DEMOSAIC_PASSTHR_MONOX ||
-                               method == DT_IOP_DEMOSAIC_PASSTHROUGH_COLOR ||
-                               method == DT_IOP_DEMOSAIC_PASSTHR_COLORX;
-  // set position to closest top/left sensor pattern snap
-  if(!passthrough)
-  {
-    const int aligner = (piece->pipe->dsc.filters != 9u) ? DT_BAYER_SNAPPER : DT_XTRANS_SNAPPER;
-    const int dx = roi_in->x % aligner;
-    const int dy = roi_in->y % aligner;
-
-/*
-    // This implements snapping to closest position, meant for optimized xtrans position
-    // but with problems at extreme zoom levels
-    const int shift_x = (dx > aligner / 2) ? aligner - dx : -dx;
-    const int shift_y = (dy > aligner / 2) ? aligner - dy : -dy;
-
-    roi_in->x += shift_x;
-    roi_in->y += shift_y;
-*/
-
-    // currently we always snap to left & upper
-    roi_in->x -= dx;
-    roi_in->y -= dy;
-  }
-
-  // clamp to full buffer fixing numeric inaccuracies
-  roi_in->x = MAX(0, roi_in->x);
-  roi_in->y = MAX(0, roi_in->y);
+  // always set position to closest top/left sensor pattern snap
+  const int snap = (piece->pipe->dsc.filters != 9u) ? DT_BAYER_SNAPPER : DT_XTRANS_SNAPPER;
+  roi_in->x = MAX(0, (roi_in->x / snap) * snap);
+  roi_in->y = MAX(0, (roi_in->y / snap) * snap);
   roi_in->width = MIN(roi_in->width, piece->buf_in.width);
   roi_in->height = MIN(roi_in->height, piece->buf_in.height);
 }
@@ -577,7 +543,7 @@ void tiling_callback(dt_iop_module_t *self,
       tiling->factor += smooth;                        // + smooth
 
     tiling->overhead = 0;
-    tiling->overlap = 5; // take care of border handling
+    tiling->overlap = demosaicing_method == DT_IOP_DEMOSAIC_AMAZE ? 8 : 5; // take care of border handling
   }
   else if(demosaicing_method == DT_IOP_DEMOSAIC_MARKESTEIJN ||
           demosaicing_method == DT_IOP_DEMOSAIC_MARKESTEIJN_3 ||
@@ -754,15 +720,15 @@ void process(dt_iop_module_t *self,
     switch(d->green_eq)
     {
       case DT_IOP_GREEN_EQ_FULL:
-        green_equilibration_favg(in, (float *)i, width, height, pipe->dsc.filters, roi_in->x, roi_in->y);
+        green_equilibration_favg(in, (float *)i, width, height, pipe->dsc.filters);
         break;
       case DT_IOP_GREEN_EQ_LOCAL:
-        green_equilibration_lavg(in, (float *)i, width, height, pipe->dsc.filters, roi_in->x, roi_in->y, threshold);
+        green_equilibration_lavg(in, (float *)i, width, height, pipe->dsc.filters, threshold);
         break;
       case DT_IOP_GREEN_EQ_BOTH:
         aux = dt_alloc_align_float((size_t)height * width);
-        green_equilibration_favg(aux, (float *)i, width, height, pipe->dsc.filters, roi_in->x, roi_in->y);
-        green_equilibration_lavg(in, aux, width, height, pipe->dsc.filters, roi_in->x, roi_in->y, threshold);
+        green_equilibration_favg(aux, (float *)i, width, height, pipe->dsc.filters);
+        green_equilibration_lavg(in, aux, width, height, pipe->dsc.filters, threshold);
         dt_free_align(aux);
         break;
       default:
diff --git a/src/iop/demosaicing/basics.c b/src/iop/demosaicing/basics.c
index 95e632b0d465..abf8836b9646 100644
--- a/src/iop/demosaicing/basics.c
+++ b/src/iop/demosaicing/basics.c
@@ -151,16 +151,14 @@ static void green_equilibration_lavg(float *out,
                                      const int width,
                                      const int height,
                                      const uint32_t filters,
-                                     const int x,
-                                     const int y,
                                      const float thr)
 {
   const float maximum = 1.0f;
 
   int oj = 2, oi = 2;
-  if(FC(oj + y, oi + x, filters) != 1) oj++;
-  if(FC(oj + y, oi + x, filters) != 1) oi++;
-  if(FC(oj + y, oi + x, filters) != 1) oj--;
+  if(FC(oj, oi, filters) != 1) oj++;
+  if(FC(oj, oi, filters) != 1) oi++;
+  if(FC(oj, oi, filters) != 1) oj--;
 
   dt_iop_image_copy_by_size(out, in, width, height, 1);
 
@@ -203,15 +201,13 @@ static void green_equilibration_favg(float *out,
                                      const float *const in,
                                      const int width,
                                      const int height,
-                                     const uint32_t filters,
-                                     const int x,
-                                     const int y)
+                                     const uint32_t filters)
 {
   int oj = 0, oi = 0;
   // const float ratio_max = 1.1f;
   double sum1 = 0.0, sum2 = 0.0, gr_ratio;
 
-  if((FC(oj + y, oi + x, filters) & 1) != 1) oi++;
+  if((FC(oj, oi, filters) & 1) != 1) oi++;
   const int g2_offset = oi ? -1 : 1;
   dt_iop_image_copy_by_size(out, in, width, height, 1);
   DT_OMP_FOR(reduction(+ : sum1, sum2) collapse(2))
@@ -387,7 +383,7 @@ static int green_equilibration_cl(const dt_iop_module_t *self,
     size_t flocal[3] = { flocopt.sizex, flocopt.sizey, 1 };
     dt_opencl_set_kernel_args(devid, gd->kernel_green_eq_favg_reduce_first, 0,
       CLARG(dev_in1), CLARG(width),
-      CLARG(height), CLARG(dev_m), CLARG(piece->pipe->dsc.filters), CLARG(roi_in->x), CLARG(roi_in->y),
+      CLARG(height), CLARG(dev_m), CLARG(piece->pipe->dsc.filters),
       CLLOCAL(sizeof(float) * 2 * flocopt.sizex * flocopt.sizey));
     err = dt_opencl_enqueue_kernel_2d_with_local(devid, gd->kernel_green_eq_favg_reduce_first, fsizes, flocal);
     if(err != CL_SUCCESS) goto error;
@@ -442,7 +438,7 @@ static int green_equilibration_cl(const dt_iop_module_t *self,
 
     err = dt_opencl_enqueue_kernel_2d_args(devid, gd->kernel_green_eq_favg_apply, width, height,
       CLARG(dev_in1), CLARG(dev_out1), CLARG(width), CLARG(height), CLARG(piece->pipe->dsc.filters),
-      CLARG(roi_in->x), CLARG(roi_in->y), CLARG(gr_ratio));
+      CLARG(gr_ratio));
     if(err != CL_SUCCESS) goto error;
   }
 
@@ -466,7 +462,7 @@ static int green_equilibration_cl(const dt_iop_module_t *self,
     size_t local[3] = { locopt.sizex, locopt.sizey, 1 };
     dt_opencl_set_kernel_args(devid, gd->kernel_green_eq_lavg, 0,
       CLARG(dev_in2), CLARG(dev_out2),
-      CLARG(width), CLARG(height), CLARG(piece->pipe->dsc.filters), CLARG(roi_in->x), CLARG(roi_in->y),
+      CLARG(width), CLARG(height), CLARG(piece->pipe->dsc.filters),
       CLARG(threshold), CLLOCAL(sizeof(float) * (locopt.sizex + 4) * (locopt.sizey + 4)));
     err = dt_opencl_enqueue_kernel_2d_with_local(devid, gd->kernel_green_eq_lavg, sizes, local);
     if(err != CL_SUCCESS) goto error;
diff --git a/src/iop/demosaicing/vng.c b/src/iop/demosaicing/vng.c
index d4f02ca02874..10970b9504f6 100644
--- a/src/iop/demosaicing/vng.c
+++ b/src/iop/demosaicing/vng.c
@@ -342,10 +342,14 @@ static cl_int process_vng_cl(const dt_iop_module_t *self,
   const int prow = (filters4 == 9u) ? 6 : 8;
   const int pcol = (filters4 == 9u) ? 6 : 2;
   const int devid = piece->pipe->devid;
+  const int width = roi_in->width;
+  const int height = roi_in->height;
 
   int *ips = NULL;
 
-  cl_mem dev_tmp = NULL;
+  cl_mem dev_tmp = only_vng_linear ? dev_out : dt_opencl_alloc_device(devid, width, height, sizeof(float) * 4);
+  if(dev_tmp == NULL) dev_tmp = dev_out;
+
   cl_mem dev_xtrans = NULL;
   cl_mem dev_lookup = NULL;
   cl_mem dev_code = NULL;
@@ -486,15 +490,6 @@ static cl_int process_vng_cl(const dt_iop_module_t *self,
     dev_ips = dt_opencl_copy_host_to_device_constant(devid, ips_size, ips);
     if(dev_ips == NULL) goto finish;
 
-    int width = roi_in->width;
-    int height = roi_in->height;
-
-    // need to reserve scaled auxiliary buffer or use dev_out
-    err = CL_MEM_OBJECT_ALLOCATION_FAILURE;
-
-    dev_tmp = dt_opencl_alloc_device(devid, width, height, sizeof(float) * 4);
-    if(dev_tmp == NULL) goto finish;
-
     // manage borders for linear interpolation part
     int border = 1;
     err = dt_opencl_enqueue_kernel_2d_args(devid, gd->kernel_vng_border_interpolate, width, height,
@@ -524,13 +519,8 @@ static cl_int process_vng_cl(const dt_iop_module_t *self,
       if(err != CL_SUCCESS) goto finish;
     }
 
-
-    if(only_vng_linear)
+    if(dev_tmp == dev_out)
     {
-      // leave it at linear interpolation and skip VNG
-      size_t origin[] = { 0, 0, 0 };
-      size_t region[] = { width, height, 1 };
-      err = dt_opencl_enqueue_copy_image(devid, dev_tmp, dev_out, origin, origin, region);
       goto finish;
     }
     else
@@ -577,7 +567,7 @@ static cl_int process_vng_cl(const dt_iop_module_t *self,
     }
 
 finish:
-  dt_opencl_release_mem_object(dev_tmp);
+  if(dev_tmp != dev_out) dt_opencl_release_mem_object(dev_tmp);
   dt_opencl_release_mem_object(dev_xtrans);
   dt_opencl_release_mem_object(dev_lookup);
   free(lookup);

From a009566351c0458b5af842ecb8df983125422c9d Mon Sep 17 00:00:00 2001
From: Hanno Schwalm <hanno@schwalm-bremen.de>
Date: Wed, 18 Jun 2025 06:43:02 +0200
Subject: [PATCH 3/3] Redesign pixelpipe input scaling

Analysis: With current pixelpipe and cache design we must reprocess the full pipe whenever we zoom in/out or
drag around, as we select another area and thus the hash won't match.

Solution:
1. Decouple crop&scale from demosaic module and introduce a "pipescale" module that presents the currently
   chosen roi.
2. Make sure modules before pipescale always process full image data.
3. By doing so we will have a 100% cache hit rate for the pipescale modulule if there are no changed parameters
   in any of the modules before.
   As those modules before pipescale could include very performance costly algorithms the UI will now be far more
   responsive.
4. The default pipescale position is now right after demosaicing, the current implementation allows dragging
   it's position further up the pipe so it could also include costly stuff like denoising or even notorious
   modules like dehaze as that loves to be processed with full image data.
5. As we now always have full image data available before and including demosaicing all raw-only modules can
   use simplified code reducing code and complexity.
   It will also be possible to provide details mask while demosaic tiling.
6. The scale module is also available for non-raw files allowing crop&scale to be done **after** colorin.
   (One problem here so far: the preview pipe scaling is not fixed so far)
7. The low quality demosaicers are gone as we always have full data to be processed.
---
 src/common/iop_order.c     |   6 +
 src/develop/pixelpipe_hb.c |  18 +--
 src/iop/CMakeLists.txt     |   1 +
 src/iop/cacorrect.c        |  31 ----
 src/iop/demosaic.c         | 282 +++++--------------------------------
 src/iop/pipescale.c        | 200 ++++++++++++++++++++++++++
 6 files changed, 245 insertions(+), 293 deletions(-)
 create mode 100644 src/iop/pipescale.c

diff --git a/src/common/iop_order.c b/src/common/iop_order.c
index 8ff8e3d1dd2d..fa7f60d761cc 100644
--- a/src/common/iop_order.c
+++ b/src/common/iop_order.c
@@ -88,6 +88,7 @@ const dt_iop_order_entry_t legacy_order[] = {
   { { 6.0f }, "hotpixels", 0},
   { { 7.0f }, "rawdenoise", 0},
   { { 8.0f }, "demosaic", 0},
+  { { 8.5f }, "pipescale", 0},
   { { 9.0f }, "mask_manager", 0},
   { {10.0f }, "denoiseprofile", 0},
   { {11.0f }, "tonemap", 0},
@@ -184,6 +185,7 @@ const dt_iop_order_entry_t v30_order[] = {
   { { 6.0f }, "hotpixels", 0},
   { { 7.0f }, "rawdenoise", 0},
   { { 8.0f }, "demosaic", 0},
+  { { 8.5f }, "pipescale", 0},
   { { 9.0f }, "denoiseprofile", 0},
   { {10.0f }, "bilateral", 0},
   { {11.0f }, "rotatepixels", 0},
@@ -301,6 +303,7 @@ const dt_iop_order_entry_t v50_order[] = {
   { { 6.0f }, "hotpixels", 0},
   { { 7.0f }, "rawdenoise", 0},
   { { 8.0f }, "demosaic", 0},
+  { { 8.5f }, "pipescale", 0},
   { { 9.0f }, "denoiseprofile", 0},
   { {10.0f }, "bilateral", 0},
   { {11.0f }, "rotatepixels", 0},
@@ -420,6 +423,7 @@ const dt_iop_order_entry_t v30_jpg_order[] = {
   { { 6.0f }, "hotpixels", 0 },
   { { 7.0f }, "rawdenoise", 0 },
   { { 8.0f }, "demosaic", 0 },
+  { { 8.5f }, "pipescale", 0},
   // all the modules between [8; 28] expect linear RGB, so they need to be moved after colorin
   { { 28.0f }, "colorin", 0 },
   // moved modules : (copy-pasted in the same order)
@@ -540,6 +544,7 @@ const dt_iop_order_entry_t v50_jpg_order[] = {
   { { 6.0f }, "hotpixels", 0 },
   { { 7.0f }, "rawdenoise", 0 },
   { { 8.0f }, "demosaic", 0 },
+  { { 8.5f }, "pipescale", 0},
   // all the modules between [8; 28] expect linear RGB, so they need to be moved after colorin
   { { 28.0f }, "colorin", 0 },
   // moved modules : (copy-pasted in the same order)
@@ -1179,6 +1184,7 @@ GList *dt_ioppr_get_iop_order_list(const dt_imgid_t imgid,
           _insert_before(iop_order_list, "filmicrgb", "sigmoid");
           _insert_before(iop_order_list, "colorbalancergb", "colorequal");
           _insert_before(iop_order_list, "highlights", "rasterfile");
+          _insert_before(iop_order_list, "denoiseprofile", "pipescale"); // ????
         }
       }
       else if(version >= DT_IOP_ORDER_LEGACY
diff --git a/src/develop/pixelpipe_hb.c b/src/develop/pixelpipe_hb.c
index 991cb4ae25b4..b5f6f91a3abe 100644
--- a/src/develop/pixelpipe_hb.c
+++ b/src/develop/pixelpipe_hb.c
@@ -1737,9 +1737,6 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe,
   // 3b) recurse and obtain output array in &input
 
   // get region of interest which is needed in input
-  if(dt_pipe_shutdown(pipe))
-    return TRUE;
-
   module->modify_roi_in(module, piece, roi_out, &roi_in);
   if((darktable.unmuted & DT_DEBUG_PIPE) && memcmp(roi_out, &roi_in, sizeof(dt_iop_roi_t)))
   {
@@ -1775,18 +1772,11 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe,
   const size_t out_bpp = dt_iop_buffer_dsc_to_bpp(*out_format);
 
   // reserve new cache line: output
-  if(dt_pipe_shutdown(pipe))
-    return TRUE;
-
   const gboolean important = module
       && (pipe->mask_display == DT_DEV_PIXELPIPE_DISPLAY_NONE)
-      && (((pipe->type & DT_DEV_PIXELPIPE_PREVIEW)
-           && dt_iop_module_is(module->so, "colorout"))
-       || ((pipe->type & DT_DEV_PIXELPIPE_FULL)
-           && dt_iop_module_is(module->so, "gamma")));
+      && dt_iop_module_is(module->so, "pipescale");
 
-  dt_dev_pixelpipe_cache_get(pipe, hash, bufsize,
-                             output, out_format, module, important);
+  dt_dev_pixelpipe_cache_get(pipe, hash, bufsize, output, out_format, module, important);
 
   if(dt_pipe_shutdown(pipe))
     return TRUE;
@@ -2546,8 +2536,8 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe,
            && dev->gui_attached
            && ((module == dt_dev_gui_module())
                 || darktable.develop->history_last_module == module
-                || dt_iop_module_is(module->so, "colorout")
-                || dt_iop_module_is(module->so, "finalscale"));
+                || dt_iop_module_is(module->so, "finalscale")
+                || dt_iop_module_is(module->so, "pipescale"));
 
         if(important_cl)
         {
diff --git a/src/iop/CMakeLists.txt b/src/iop/CMakeLists.txt
index 89a6e7453e1b..7eb71fd384cb 100644
--- a/src/iop/CMakeLists.txt
+++ b/src/iop/CMakeLists.txt
@@ -155,6 +155,7 @@ add_iop(sigmoid "sigmoid.c")
 add_iop(primaries "primaries.c")
 add_iop(colorequal "colorequal.c")
 add_iop(rasterfile "rasterfile.c")
+add_iop(pipescale "pipescale.c")
 
 if(Rsvg2_FOUND)
   add_iop(watermark "watermark.c")
diff --git a/src/iop/cacorrect.c b/src/iop/cacorrect.c
index 269c7ad81e86..2b780043103e 100644
--- a/src/iop/cacorrect.c
+++ b/src/iop/cacorrect.c
@@ -1215,37 +1215,6 @@ DT_OMP_PRAGMA(barrier)
 /*==================================================================================
  * end raw therapee code
  *==================================================================================*/
-void modify_roi_out(dt_iop_module_t *self,
-                    dt_dev_pixelpipe_iop_t *piece,
-                    dt_iop_roi_t *roi_out,
-                    const dt_iop_roi_t *const roi_in)
-{
-  *roi_out = *roi_in;
-  roi_out->x = MAX(0, roi_in->x);
-  roi_out->y = MAX(0, roi_in->y);
-}
-void modify_roi_in(dt_iop_module_t *self,
-                   dt_dev_pixelpipe_iop_t *piece,
-                   const dt_iop_roi_t *const roi_out,
-                   dt_iop_roi_t *roi_in)
-{
-  *roi_in = *roi_out;
-  roi_in->x = 0;
-  roi_in->y = 0;
-  roi_in->width = piece->buf_in.width;
-  roi_in->height = piece->buf_in.height;
-  roi_in->scale = 1.0f;
-}
-
-void distort_mask(dt_iop_module_t *self,
-                  dt_dev_pixelpipe_iop_t *piece,
-                  const float *const in,
-                  float *const out,
-                  const dt_iop_roi_t *const roi_in,
-                  const dt_iop_roi_t *const roi_out)
-{
-  dt_iop_copy_image_roi(out, in, 1, roi_in, roi_out);
-}
 
 void reload_defaults(dt_iop_module_t *self)
 {
diff --git a/src/iop/demosaic.c b/src/iop/demosaic.c
index ccd881dfcbc0..c1bc7016d592 100644
--- a/src/iop/demosaic.c
+++ b/src/iop/demosaic.c
@@ -89,14 +89,6 @@ typedef enum dt_iop_demosaic_greeneq_t
   DT_IOP_GREEN_EQ_BOTH = 3   // $DESCRIPTION: "full and local average"
 } dt_iop_demosaic_greeneq_t;
 
-typedef enum dt_iop_demosaic_qual_flags_t
-{
-  // either perform full scale demosaicing or choose simple half scale
-  // or third scale interpolation instead
-  DT_DEMOSAIC_DEFAULT                 = 0,
-  DT_DEMOSAIC_FULL_SCALE              = 1 << 0,
-} dt_iop_demosaic_qual_flags_t;
-
 typedef enum dt_iop_demosaic_smooth_t
 {
   DT_DEMOSAIC_SMOOTH_OFF = 0, // $DESCRIPTION: "disabled"
@@ -162,14 +154,10 @@ typedef struct dt_iop_demosaic_global_data_t
   int kernel_passthrough_color;
   int kernel_ppg_green;
   int kernel_ppg_redblue;
-  int kernel_zoom_half_size;
-  int kernel_downsample;
   int kernel_border_interpolate;
   int kernel_color_smoothing;
-  int kernel_zoom_passthrough_monochrome;
   int kernel_vng_border_interpolate;
   int kernel_vng_lin_interpolate;
-  int kernel_zoom_third_size;
   int kernel_vng_green_equilibrate;
   int kernel_vng_interpolate;
   int kernel_markesteijn_initial_copy;
@@ -238,51 +226,6 @@ static gboolean _get_thumb_quality(const int width, const int height)
   return (level >= min_s);
 }
 
-// set flags for demosaic quality based on factors besides demosaic
-// method (e.g. config, scale, pixelpipe type)
-static dt_iop_demosaic_qual_flags_t demosaic_qual_flags(const dt_dev_pixelpipe_iop_t *const piece,
-                                                        const dt_image_t *const img,
-                                                        const dt_iop_roi_t *const roi_out)
-{
-  const uint32_t filters = piece->pipe->dsc.filters;
-  const gboolean is_xtrans = filters == 9u;
-
-  dt_iop_demosaic_qual_flags_t flags = DT_DEMOSAIC_DEFAULT;
-  switch(piece->pipe->type & DT_DEV_PIXELPIPE_ANY)
-  {
-    case DT_DEV_PIXELPIPE_PREVIEW2:
-      flags |= DT_DEMOSAIC_FULL_SCALE;
-      break;
-    case DT_DEV_PIXELPIPE_FULL:
-      flags |= DT_DEMOSAIC_FULL_SCALE;
-      break;
-    case DT_DEV_PIXELPIPE_EXPORT:
-      flags |= DT_DEMOSAIC_FULL_SCALE;
-      break;
-    case DT_DEV_PIXELPIPE_THUMBNAIL:
-      flags |= (piece->pipe->want_detail_mask || _get_thumb_quality(roi_out->width, roi_out->height))
-                  ? DT_DEMOSAIC_FULL_SCALE
-                  : DT_DEMOSAIC_DEFAULT;
-      break;
-    default: // make C not complain about missing enum members
-      break;
-  }
-
-  // For sufficiently small scaling, one or more repetitition of the
-  // CFA pattern can be merged into a single pixel, hence it is
-  // possible to skip the full demosaic and perform a quick downscale.
-  // Note even though the X-Trans CFA is 6x6, for this purposes we can
-  // see each 6x6 tile as four fairly similar 3x3 tiles
-  if(roi_out->scale > (is_xtrans ? 0.667f : 0.5f))
-    flags |= DT_DEMOSAIC_FULL_SCALE;
-
-  // half_size_f doesn't support 4bayer images
-  if(img->flags & DT_IMAGE_4BAYER)
-    flags |= DT_DEMOSAIC_FULL_SCALE;
-
-  return flags;
-}
-
 // Implemented in demosaicing/amaze.cc
 void amaze_demosaic(dt_dev_pixelpipe_iop_t *piece,
                     const float *const in,
@@ -331,6 +274,18 @@ dt_iop_colorspace_type_t default_colorspace(dt_iop_module_t *self,
   return IOP_CS_RAW;
 }
 
+// We always have to snap to the upper/left sensor pattern corner
+void modify_roi_in(dt_iop_module_t *self,
+                   dt_dev_pixelpipe_iop_t *piece,
+                   const dt_iop_roi_t *roi_out,
+                   dt_iop_roi_t *roi_in)
+{
+  *roi_in = *roi_out;
+  const int snap = (piece->pipe->dsc.filters != 9u) ? DT_BAYER_SNAPPER : DT_XTRANS_SNAPPER;
+  roi_in->x = MAX(0, (roi_in->x / snap) * snap);
+  roi_in->y = MAX(0, (roi_in->y / snap) * snap);
+}
+
 int legacy_params(dt_iop_module_t *self,
                   const void *const old_params,
                   const int old_version,
@@ -452,53 +407,6 @@ dt_iop_colorspace_type_t output_colorspace(dt_iop_module_t *self,
   return IOP_CS_RGB;
 }
 
-void distort_mask(dt_iop_module_t *self,
-                  dt_dev_pixelpipe_iop_t *piece,
-                  const float *const in,
-                  float *const out,
-                  const dt_iop_roi_t *const roi_in,
-                  const dt_iop_roi_t *const roi_out)
-{
-  if(roi_out->scale != roi_in->scale)
-  {
-    const dt_interpolation_t *itor = dt_interpolation_new(DT_INTERPOLATION_USERPREF_WARP);
-    dt_interpolation_resample_roi_1c(itor, out, roi_out, in, roi_in);
-  }
-  else
-    dt_iop_copy_image_roi(out, in, 1, roi_in, roi_out);
-}
-
-void modify_roi_out(dt_iop_module_t *self,
-                    dt_dev_pixelpipe_iop_t *piece,
-                    dt_iop_roi_t *roi_out,
-                    const dt_iop_roi_t *const roi_in)
-{
-  *roi_out = *roi_in;
-  roi_out->x = 0;
-  roi_out->y = 0;
-}
-
-void modify_roi_in(dt_iop_module_t *self,
-                   dt_dev_pixelpipe_iop_t *piece,
-                   const dt_iop_roi_t *roi_out,
-                   dt_iop_roi_t *roi_in)
-{
-  *roi_in = *roi_out;
-  // need 1:1, demosaic and then sub-sample. or directly sample half-size
-  roi_in->x /= roi_out->scale;
-  roi_in->y /= roi_out->scale;
-  roi_in->width /= roi_out->scale;
-  roi_in->height /= roi_out->scale;
-  roi_in->scale = 1.0f;
-
-  // always set position to closest top/left sensor pattern snap
-  const int snap = (piece->pipe->dsc.filters != 9u) ? DT_BAYER_SNAPPER : DT_XTRANS_SNAPPER;
-  roi_in->x = MAX(0, (roi_in->x / snap) * snap);
-  roi_in->y = MAX(0, (roi_in->y / snap) * snap);
-  roi_in->width = MIN(roi_in->width, piece->buf_in.width);
-  roi_in->height = MIN(roi_in->height, piece->buf_in.height);
-}
-
 void tiling_callback(dt_iop_module_t *self,
                      dt_dev_pixelpipe_iop_t *piece,
                      const dt_iop_roi_t *roi_in,
@@ -507,18 +415,11 @@ void tiling_callback(dt_iop_module_t *self,
 {
   dt_iop_demosaic_data_t *d = piece->data;
 
-  const float ioratio = (float)roi_out->width * roi_out->height / ((float)roi_in->width * roi_in->height);
-  const float smooth = d->color_smoothing != DT_DEMOSAIC_SMOOTH_OFF ? ioratio : 0.0f;
+  const float smooth = d->color_smoothing != DT_DEMOSAIC_SMOOTH_OFF ? 1.0f : 0.0f;
   const gboolean is_xtrans = piece->pipe->dsc.filters == 9u;
   const float greeneq = (!is_xtrans && (d->green_eq != DT_IOP_GREEN_EQ_NO)) ? 0.25f : 0.0f;
   const dt_iop_demosaic_method_t demosaicing_method = d->demosaicing_method & ~DT_DEMOSAIC_DUAL;
 
-  const int qual_flags = demosaic_qual_flags(piece, &self->dev->image_storage, roi_out);
-  const int full_scale = qual_flags & DT_DEMOSAIC_FULL_SCALE;
-
-  // check if output buffer has same dimension as input buffer (thus avoiding one
-  // additional temporary buffer)
-  const gboolean unscaled = roi_out->width == roi_in->width && roi_out->height == roi_in->height && feqf(roi_in->scale, roi_out->scale, 1e-8f);
   const gboolean is_opencl = piece->pipe->devid > DT_DEVICE_CPU;
   // define aligners
   tiling->xalign = is_xtrans ? DT_XTRANS_SNAPPER : DT_BAYER_SNAPPER;
@@ -533,17 +434,10 @@ void tiling_callback(dt_iop_module_t *self,
      demosaicing_method == DT_IOP_DEMOSAIC_AMAZE)
   {
     // Bayer pattern with PPG, Passthrough or Amaze
-    tiling->factor = 1.0f + ioratio;         // in + out
-
-    if(full_scale && unscaled)
-      tiling->factor += MAX(1.0f + greeneq, smooth);  // + tmp + geeneq | + smooth
-    else if(full_scale)
-      tiling->factor += MAX(2.0f + greeneq, smooth);  // + tmp + aux + greeneq | + smooth
-    else
-      tiling->factor += smooth;                        // + smooth
-
+    tiling->factor = 2.0f;
+    tiling->factor += MAX(1.0f + greeneq, smooth);  // + tmp + geeneq | + smooth
     tiling->overhead = 0;
-    tiling->overlap = demosaicing_method == DT_IOP_DEMOSAIC_AMAZE ? 8 : 5; // take care of border handling
+    tiling->overlap = 5; // take care of border handling
   }
   else if(demosaicing_method == DT_IOP_DEMOSAIC_MARKESTEIJN ||
           demosaicing_method == DT_IOP_DEMOSAIC_MARKESTEIJN_3 ||
@@ -553,66 +447,41 @@ void tiling_callback(dt_iop_module_t *self,
     const int ndir = demosaicing_method == DT_IOP_DEMOSAIC_MARKESTEIJN_3 ? 8 : 4;
     const int overlap = demosaicing_method == DT_IOP_DEMOSAIC_MARKESTEIJN_3 ? 18 : 12;
 
-    tiling->factor = 1.0f + ioratio;
+    tiling->factor = 2.0f;
     tiling->factor += ndir * 1.0f      // rgb
                       + ndir * 0.25f   // drv
                       + ndir * 0.125f  // homo + homosum
                       + 1.0f;          // aux
-
-    if(full_scale && unscaled)
-      tiling->factor += MAX(1.0f + greeneq, smooth);
-    else if(full_scale)
-      tiling->factor += MAX(2.0f + greeneq, smooth);
-    else
-      tiling->factor += smooth;
-
+    tiling->factor += MAX(1.0f + greeneq, smooth);
     tiling->overlap = overlap;
   }
   else if(demosaicing_method == DT_IOP_DEMOSAIC_RCD)
   {
-    tiling->factor = 1.0f + ioratio;
-    if(full_scale && unscaled)
-      tiling->factor += MAX(1.0f + greeneq, smooth);  // + tmp + geeneq | + smooth
-    else if(full_scale)
-      tiling->factor += MAX(2.0f + greeneq, smooth);  // + tmp + aux + greeneq | + smooth
-    else
-      tiling->factor += smooth;                        // + smooth
-
+    tiling->factor = 2.0f;
+    tiling->factor += MAX(1.0f + greeneq, smooth);  // + tmp + geeneq | + smooth
     tiling->overhead = is_opencl ? 0 : sizeof(float) * DT_RCD_TILESIZE * DT_RCD_TILESIZE * 8 * dt_get_num_threads();
     tiling->overlap = 10;
     tiling->factor_cl = tiling->factor + 3.0f;
   }
   else if(demosaicing_method == DT_IOP_DEMOSAIC_LMMSE)
   {
-    tiling->factor = 1.0f + ioratio;
-    if(full_scale && unscaled)
-      tiling->factor += MAX(1.0f + greeneq, smooth);  // + tmp + geeneq | + smooth
-    else if(full_scale)
-      tiling->factor += MAX(2.0f + greeneq, smooth);  // + tmp + aux + greeneq | + smooth
-    else
-      tiling->factor += smooth;                        // + smooth
+    tiling->factor = 2.0f;
+    tiling->factor += MAX(1.0f + greeneq, smooth);  // + tmp + geeneq | + smooth
     tiling->overhead = sizeof(float) * DT_LMMSE_TILESIZE * DT_LMMSE_TILESIZE * 6 * dt_get_num_threads();
     tiling->overlap = 10;
   }
   else
   {
     // VNG
-    tiling->factor = 1.0f + ioratio;
-
-    if(full_scale && unscaled)
-      tiling->factor += MAX(1.0f + greeneq, smooth);
-    else if(full_scale)
-      tiling->factor += MAX(2.0f + greeneq, smooth);
-    else
-      tiling->factor += smooth;
-
+    tiling->factor = 2.0f;
+    tiling->factor += MAX(1.0f + greeneq, smooth);
     tiling->overlap = 6;
   }
 
   if((d->demosaicing_method & DT_DEMOSAIC_DUAL) || d->cs_strength)
   {
     // internals plus 2 output
-    tiling->factor = MAX(tiling->factor, 1.0f + 2.0f * ioratio);
+    tiling->factor += 1.0f;
     // works for bayer and xtrans
     tiling->overlap = MAX(d->cs_strength ? 18 : 6, tiling->overlap);
   }
@@ -639,8 +508,6 @@ void process(dt_iop_module_t *self,
   const dt_iop_demosaic_data_t *d = piece->data;
   const dt_iop_demosaic_gui_data_t *g = self->gui_data;
 
-  const int qual_flags = demosaic_qual_flags(piece, img, roi_out);
-  const gboolean fullscale = qual_flags & DT_DEMOSAIC_FULL_SCALE;
   const gboolean is_xtrans = pipe->dsc.filters == 9u;
   const gboolean is_4bayer = img->flags & DT_IMAGE_4BAYER;
   const gboolean is_bayer = !is_xtrans && pipe->dsc.filters != 0;
@@ -680,24 +547,9 @@ void process(dt_iop_module_t *self,
   float *in  = (float *)i;
   float *out = (float *)o;
 
-  if(!fullscale)
-  {
-    dt_print_pipe(DT_DEBUG_PIPE, "demosaic approx zoom", pipe, self, DT_DEVICE_CPU, roi_in, roi_out);
-    if(demosaicing_method == DT_IOP_DEMOSAIC_PASSTHROUGH_MONOCHROME || demosaicing_method == DT_IOP_DEMOSAIC_PASSTHROUGH_COLOR)
-      dt_iop_clip_and_zoom_demosaic_passthrough_monochrome_f(out, in, roi_out, roi_in, roi_out->width, width);
-    else if(is_xtrans)
-      dt_iop_clip_and_zoom_demosaic_third_size_xtrans_f(out, in, roi_out, roi_in, roi_out->width, width, xtrans);
-    else
-      dt_iop_clip_and_zoom_demosaic_half_size_f(out, in, roi_out, roi_in, roi_out->width, width, pipe->dsc.filters);
-
-    return;
-  }
-
   const int base_demosaicing_method = demosaicing_method & ~DT_DEMOSAIC_DUAL;
   const gboolean dual = (demosaicing_method & DT_DEMOSAIC_DUAL) && !run_fast && !previewpipe;
 
-  const gboolean direct = roi_out->width == width && roi_out->height == height && feqf(roi_in->scale, roi_out->scale, 1e-8f);
-
   const gboolean passthru = demosaicing_method == DT_IOP_DEMOSAIC_PASSTHROUGH_MONOCHROME
                          || demosaicing_method == DT_IOP_DEMOSAIC_PASSTHROUGH_COLOR;
 
@@ -708,9 +560,6 @@ void process(dt_iop_module_t *self,
                           &&  !previewpipe
                           &&  d->cs_strength;
 
-  if(!direct)
-    out = dt_alloc_align_float((size_t)4 * width * height);
-
   if(is_bayer && d->green_eq != DT_IOP_GREEN_EQ_NO)
   {
     const float threshold = 0.0001f * img->exif_iso;
@@ -784,17 +633,6 @@ void process(dt_iop_module_t *self,
 
   if(d->color_smoothing != DT_DEMOSAIC_SMOOTH_OFF)
     color_smoothing(out, roi_in, d->color_smoothing);
-
-  dt_print_pipe(DT_DEBUG_VERBOSE, direct ? "demosaic inplace" : "demosaic clip_and_zoom", pipe, self, DT_DEVICE_CPU, roi_in, roi_out);
-  if(!direct)
-  {
-    dt_iop_roi_t roo = *roi_out;
-    roo.width = width;
-    roo.height = height;
-    roo.scale = 1.0f;
-    dt_iop_clip_and_zoom_roi((float *)o, out, roi_out, &roo);
-    dt_free_align(out);
-  }
 }
 
 #ifdef HAVE_OPENCL
@@ -805,14 +643,10 @@ int process_cl(dt_iop_module_t *self,
                const dt_iop_roi_t *const roi_in,
                const dt_iop_roi_t *const roi_out)
 {
-  const dt_image_t *img = &self->dev->image_storage;
   dt_dev_pixelpipe_t *const pipe = piece->pipe;
   const gboolean run_fast = pipe->type & DT_DEV_PIXELPIPE_FAST;
   const gboolean fullpipe = pipe->type & DT_DEV_PIXELPIPE_FULL;
   const gboolean previewpipe = pipe->type & DT_DEV_PIXELPIPE_PREVIEW;
-
-  const int qual_flags = demosaic_qual_flags(piece, img, roi_out);
-  const gboolean fullscale = qual_flags & DT_DEMOSAIC_FULL_SCALE;
   const gboolean is_xtrans = pipe->dsc.filters == 9u;
   const gboolean is_bayer = !is_xtrans && pipe->dsc.filters != 0;
 
@@ -820,7 +654,6 @@ int process_cl(dt_iop_module_t *self,
 
   const dt_iop_demosaic_data_t *d = piece->data;
   const dt_iop_demosaic_gui_data_t *g = self->gui_data;
-  const dt_iop_demosaic_global_data_t *gd = self->global_data;
 
   int demosaicing_method = d->demosaicing_method;
 
@@ -867,32 +700,6 @@ int process_cl(dt_iop_module_t *self,
 
   if(dev_in  == NULL || dev_out == NULL) return err;
 
-  if(!fullscale)
-  {
-    dt_print_pipe(DT_DEBUG_PIPE, "demosaic approx zoom", pipe, self, devid, roi_in, roi_out);
-    const int zero = 0;
-    if(is_xtrans)
-    {
-      cl_mem dev_xtrans = dt_opencl_copy_host_to_device_constant(devid, sizeof(pipe->dsc.xtrans), pipe->dsc.xtrans);
-      if(dev_xtrans == NULL) return err;
-      // sample third-size image
-      err = dt_opencl_enqueue_kernel_2d_args(devid, gd->kernel_zoom_third_size, roi_out->width, roi_out->height,
-          CLARG(dev_in), CLARG(dev_out), CLARG(roi_out->width), CLARG(roi_out->height), CLARG(roi_in->x), CLARG(roi_in->y),
-          CLARG(width), CLARG(height), CLARG(roi_out->scale), CLARG(dev_xtrans));
-      dt_opencl_release_mem_object(dev_xtrans);
-      return err;
-    }
-    else if(demosaicing_method == DT_IOP_DEMOSAIC_PASSTHROUGH_MONOCHROME)
-      return dt_opencl_enqueue_kernel_2d_args(devid, gd->kernel_zoom_passthrough_monochrome, roi_out->width, roi_out->height,
-          CLARG(dev_in), CLARG(dev_out), CLARG(roi_out->width), CLARG(roi_out->height), CLARG(zero), CLARG(zero), CLARG(width),
-          CLARG(height), CLARG(roi_out->scale), CLARG(pipe->dsc.filters));
-    else // bayer
-      return dt_opencl_enqueue_kernel_2d_args(devid, gd->kernel_zoom_half_size, roi_out->width, roi_out->height,
-          CLARG(dev_in), CLARG(dev_out), CLARG(roi_out->width), CLARG(roi_out->height), CLARG(zero), CLARG(zero), CLARG(width),
-          CLARG(height), CLARG(roi_out->scale), CLARG(pipe->dsc.filters));
-  }
-
-  const gboolean direct = roi_out->width == width && roi_out->height == height && feqf(roi_in->scale, roi_out->scale, 1e-8f);
   const int base_demosaicing_method = demosaicing_method & ~DT_DEMOSAIC_DUAL;
   const gboolean dual = (demosaicing_method & DT_DEMOSAIC_DUAL) && !run_fast && !previewpipe;
   const gboolean passthru = demosaicing_method == DT_IOP_DEMOSAIC_PASSTHROUGH_MONOCHROME
@@ -904,12 +711,8 @@ int process_cl(dt_iop_module_t *self,
                           &&  !previewpipe
                           &&  d->cs_strength;
 
-  cl_mem out_image = direct ? dev_out : dt_opencl_alloc_device(devid, width, height, sizeof(float) * 4);
   cl_mem in_image = dev_in;
 
-  if(out_image == NULL)
-    goto finish;
-
   if(is_bayer && d->green_eq != DT_IOP_GREEN_EQ_NO)
   {
     in_image = dt_opencl_alloc_device(devid, width, height, sizeof(float));
@@ -921,22 +724,22 @@ int process_cl(dt_iop_module_t *self,
 
   if(passthru || demosaicing_method == DT_IOP_DEMOSAIC_PPG)
   {
-    err = process_default_cl(self, piece, in_image, out_image, roi_in, demosaicing_method);
+    err = process_default_cl(self, piece, in_image, dev_out, roi_in, demosaicing_method);
     if(err != CL_SUCCESS) return err;
   }
   else if(base_demosaicing_method == DT_IOP_DEMOSAIC_RCD)
   {
-    err = process_rcd_cl(self, piece, in_image, out_image, roi_in);
+    err = process_rcd_cl(self, piece, in_image, dev_out, roi_in);
     if(err != CL_SUCCESS) goto finish;
   }
   else if(demosaicing_method == DT_IOP_DEMOSAIC_VNG4 || demosaicing_method == DT_IOP_DEMOSAIC_VNG)
   {
-    err = process_vng_cl(self, piece, in_image, out_image, roi_in, vng_linear);
+    err = process_vng_cl(self, piece, in_image, dev_out, roi_in, vng_linear);
     if(err != CL_SUCCESS) goto finish;
   }
   else if(base_demosaicing_method == DT_IOP_DEMOSAIC_MARKESTEIJN || base_demosaicing_method == DT_IOP_DEMOSAIC_MARKESTEIJN_3)
   {
-    err = process_markesteijn_cl(self, piece, in_image, out_image, roi_in);
+    err = process_markesteijn_cl(self, piece, in_image, dev_out, roi_in);
     if(err != CL_SUCCESS) goto finish;
   }
   else
@@ -948,13 +751,13 @@ int process_cl(dt_iop_module_t *self,
 
   if(pipe->want_detail_mask)
   {
-    err = dt_dev_write_scharr_mask_cl(piece, out_image, roi_in, TRUE);
+    err = dt_dev_write_scharr_mask_cl(piece, dev_out, roi_in, TRUE);
     if(err != CL_SUCCESS) goto finish;
   }
 
   if(do_capture)
   {
-    err = _capture_sharpen_cl(self, piece, dev_in, out_image, roi_in, show_capturemask);
+    err = _capture_sharpen_cl(self, piece, dev_in, dev_out, roi_in, show_capturemask);
     if(err != CL_SUCCESS) goto finish;
   }
 
@@ -967,13 +770,13 @@ int process_cl(dt_iop_module_t *self,
     {
       size_t origin[] = { 0, 0, 0 };
       size_t region[] = { width, height, 1 };
-      err = dt_opencl_enqueue_copy_image(devid, out_image, cp_image, origin, origin, region);
+      err = dt_opencl_enqueue_copy_image(devid, dev_out, cp_image, origin, origin, region);
       if(err == CL_SUCCESS)
         err = process_vng_cl(self, piece, in_image, low_image, roi_in, TRUE);
       if(err == CL_SUCCESS)
         err = color_smoothing_cl(self, piece, low_image, low_image, roi_in, DT_DEMOSAIC_SMOOTH_2);
       if(err == CL_SUCCESS)
-        err = dual_demosaic_cl(self, piece, cp_image, low_image, out_image, roi_in, show_dualmask);
+        err = dual_demosaic_cl(self, piece, cp_image, low_image, dev_out, roi_in, show_dualmask);
       dt_opencl_release_mem_object(cp_image);
       dt_opencl_release_mem_object(low_image);
     }
@@ -987,20 +790,11 @@ int process_cl(dt_iop_module_t *self,
   }
 
   if(d->color_smoothing != DT_DEMOSAIC_SMOOTH_OFF)
-  {
-    err = color_smoothing_cl(self, piece, out_image, out_image, roi_in, d->color_smoothing);
-    if(err != CL_SUCCESS) goto finish;
-  }
-
-  dt_print_pipe(DT_DEBUG_VERBOSE, direct ? "demosaic inplace" : "demosaic clip_and_zoom", pipe, self, devid, roi_in, roi_out);
-  if(!direct)
-    err = dt_iop_clip_and_zoom_roi_cl(devid, dev_out, out_image, roi_out, roi_in);
+    err = color_smoothing_cl(self, piece, dev_out, dev_out, roi_in, d->color_smoothing);
 
 finish:
 
   if(in_image != dev_in) dt_opencl_release_mem_object(in_image);
-  if(out_image != dev_out) dt_opencl_release_mem_object(out_image);
-
   return err;
 }
 #endif
@@ -1011,7 +805,6 @@ void init_global(dt_iop_module_so_t *self)
   dt_iop_demosaic_global_data_t *gd = malloc(sizeof(dt_iop_demosaic_global_data_t));
   self->data = gd;
 
-  gd->kernel_zoom_half_size = dt_opencl_create_kernel(program, "clip_and_zoom_demosaic_half_size");
   gd->kernel_ppg_green = dt_opencl_create_kernel(program, "ppg_demosaic_green");
   gd->kernel_green_eq_lavg = dt_opencl_create_kernel(program, "green_equilibration_lavg");
   gd->kernel_green_eq_favg_reduce_first = dt_opencl_create_kernel(program, "green_equilibration_favg_reduce_first");
@@ -1019,19 +812,16 @@ void init_global(dt_iop_module_so_t *self)
   gd->kernel_green_eq_favg_apply = dt_opencl_create_kernel(program, "green_equilibration_favg_apply");
   gd->kernel_pre_median = dt_opencl_create_kernel(program, "pre_median");
   gd->kernel_ppg_redblue = dt_opencl_create_kernel(program, "ppg_demosaic_redblue");
-  gd->kernel_downsample = dt_opencl_create_kernel(program, "clip_and_zoom");
   gd->kernel_border_interpolate = dt_opencl_create_kernel(program, "border_interpolate");
   gd->kernel_color_smoothing = dt_opencl_create_kernel(program, "color_smoothing");
 
   const int other = 14; // from programs.conf
   gd->kernel_passthrough_monochrome = dt_opencl_create_kernel(other, "passthrough_monochrome");
   gd->kernel_passthrough_color = dt_opencl_create_kernel(other, "passthrough_color");
-  gd->kernel_zoom_passthrough_monochrome = dt_opencl_create_kernel(other, "clip_and_zoom_demosaic_passthrough_monochrome");
 
   const int vng = 15; // from programs.conf
   gd->kernel_vng_border_interpolate = dt_opencl_create_kernel(vng, "vng_border_interpolate");
   gd->kernel_vng_lin_interpolate = dt_opencl_create_kernel(vng, "vng_lin_interpolate");
-  gd->kernel_zoom_third_size = dt_opencl_create_kernel(vng, "clip_and_zoom_demosaic_third_size_xtrans");
   gd->kernel_vng_green_equilibrate = dt_opencl_create_kernel(vng, "vng_green_equilibrate");
   gd->kernel_vng_interpolate = dt_opencl_create_kernel(vng, "vng_interpolate");
 
@@ -1087,7 +877,6 @@ void init_global(dt_iop_module_so_t *self)
 void cleanup_global(dt_iop_module_so_t *self)
 {
   dt_iop_demosaic_global_data_t *gd = self->data;
-  dt_opencl_free_kernel(gd->kernel_zoom_half_size);
   dt_opencl_free_kernel(gd->kernel_ppg_green);
   dt_opencl_free_kernel(gd->kernel_pre_median);
   dt_opencl_free_kernel(gd->kernel_green_eq_lavg);
@@ -1095,15 +884,12 @@ void cleanup_global(dt_iop_module_so_t *self)
   dt_opencl_free_kernel(gd->kernel_green_eq_favg_reduce_second);
   dt_opencl_free_kernel(gd->kernel_green_eq_favg_apply);
   dt_opencl_free_kernel(gd->kernel_ppg_redblue);
-  dt_opencl_free_kernel(gd->kernel_downsample);
   dt_opencl_free_kernel(gd->kernel_border_interpolate);
   dt_opencl_free_kernel(gd->kernel_color_smoothing);
   dt_opencl_free_kernel(gd->kernel_passthrough_monochrome);
   dt_opencl_free_kernel(gd->kernel_passthrough_color);
-  dt_opencl_free_kernel(gd->kernel_zoom_passthrough_monochrome);
   dt_opencl_free_kernel(gd->kernel_vng_border_interpolate);
   dt_opencl_free_kernel(gd->kernel_vng_lin_interpolate);
-  dt_opencl_free_kernel(gd->kernel_zoom_third_size);
   dt_opencl_free_kernel(gd->kernel_vng_green_equilibrate);
   dt_opencl_free_kernel(gd->kernel_vng_interpolate);
   dt_opencl_free_kernel(gd->kernel_markesteijn_initial_copy);
diff --git a/src/iop/pipescale.c b/src/iop/pipescale.c
new file mode 100644
index 000000000000..77daa6dd7c51
--- /dev/null
+++ b/src/iop/pipescale.c
@@ -0,0 +1,200 @@
+/*
+    This file is part of darktable,
+    Copyright (C) 2025 darktable developers.
+
+    darktable is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    darktable is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with darktable.  If not, see <http://www.gnu.org/licenses/>.
+*/
+#include "bauhaus/bauhaus.h"
+#include "common/interpolation.h"
+#include "common/opencl.h"
+#include "common/imagebuf.h"
+#include "develop/imageop.h"
+#include "develop/imageop_math.h"
+#include "develop/tiling.h"
+#include "iop/iop_api.h"
+
+DT_MODULE_INTROSPECTION(1, dt_iop_pipescale_params_t)
+
+typedef struct dt_iop_pipescale_params_t
+{
+  int dummy;
+} dt_iop_pipescale_params_t;
+
+typedef dt_iop_pipescale_params_t dt_iop_pipescale_data_t;
+
+typedef struct dt_iop_pipescale_gui_data_t
+{
+  int dummy;
+} dt_iop_pipescale_gui_data_t;
+
+const char *name()
+{
+  return _("pipe scale");
+}
+
+const char **description(dt_iop_module_t *self)
+{
+  return dt_iop_set_description(self, _("crop and scale sensor data to current region of interest"),
+                                      _("mandatory"),
+                                      _("linear, RGB, scene-referred"),
+                                      _("linear, RGB"),
+                                      _("linear, RGB, scene-referred"));
+}
+
+int flags()
+{
+  return IOP_FLAGS_ALLOW_TILING | IOP_FLAGS_TILING_FULL_ROI
+    | IOP_FLAGS_ONE_INSTANCE | IOP_FLAGS_NO_HISTORY_STACK;
+}
+
+int default_group()
+{
+  return IOP_GROUP_BASIC;
+}
+
+
+dt_iop_colorspace_type_t default_colorspace(dt_iop_module_t *self,
+                                            dt_dev_pixelpipe_t *pipe,
+                                            dt_dev_pixelpipe_iop_t *piece)
+{
+  return IOP_CS_RGB;
+}
+
+void modify_roi_out(dt_iop_module_t *self,
+                    dt_dev_pixelpipe_iop_t *piece,
+                    dt_iop_roi_t *roi_out,
+                    const dt_iop_roi_t *const roi_in)
+{
+  *roi_out = *roi_in;
+  roi_out->x = 0;
+  roi_out->y = 0;
+}
+
+void modify_roi_in(dt_iop_module_t *self,
+                   dt_dev_pixelpipe_iop_t *piece,
+                   const dt_iop_roi_t *const roi_out,
+                   dt_iop_roi_t *roi_in)
+{
+  *roi_in = *roi_out;
+  roi_in->scale = 1.0f;
+  roi_in->x = 0;
+  roi_in->y = 0;
+  roi_in->width = piece->buf_in.width;
+  roi_in->height = piece->buf_in.height;
+}
+
+void tiling_callback(dt_iop_module_t *self,
+                     dt_dev_pixelpipe_iop_t *piece,
+                     const dt_iop_roi_t *roi_in,
+                     const dt_iop_roi_t *roi_out,
+                     struct dt_develop_tiling_t *tiling)
+{
+  const float ioratio
+      = (float)(roi_out->width * roi_out->height) / (float)(roi_in->width * roi_in->height);
+
+  tiling->factor = 1.0f + ioratio;
+  tiling->factor += ioratio != 1.0f ? 0.5f : 0.0f; // approximate extra requirements for interpolation
+  tiling->factor_cl = tiling->factor;
+  tiling->maxbuf = 1.0f;
+  tiling->maxbuf_cl = tiling->maxbuf;
+  tiling->overhead = 0;
+
+  tiling->overlap = 4;
+  tiling->xalign = 1;
+  tiling->yalign = 1;
+}
+
+void distort_mask(dt_iop_module_t *self,
+                  dt_dev_pixelpipe_iop_t *piece,
+                  const float *const in,
+                  float *const out,
+                  const dt_iop_roi_t *const roi_in,
+                  const dt_iop_roi_t *const roi_out)
+{
+  if(roi_out->scale != roi_in->scale)
+  {
+    const dt_interpolation_t *itor = dt_interpolation_new(DT_INTERPOLATION_USERPREF_WARP);
+    dt_interpolation_resample_1c(itor, out, roi_out, in, roi_in);
+  }
+  else
+    dt_iop_copy_image_roi(out, in, 1, roi_in, roi_out);
+}
+
+#ifdef HAVE_OPENCL
+int process_cl(dt_iop_module_t *self,
+               dt_dev_pixelpipe_iop_t *piece,
+               cl_mem dev_in, cl_mem dev_out,
+               const dt_iop_roi_t *const roi,
+               const dt_iop_roi_t *const roo)
+{
+  const int devid = piece->pipe->devid;
+  if(roo->width == roi->width && roo->height == roi->height && roi->scale == roo->scale)
+  {
+    size_t origin[] = { 0, 0, 0 };
+    size_t region[] = { roo->width, roo->height, 1 };
+    return dt_opencl_enqueue_copy_image(devid, dev_in, dev_out, origin, origin, region);
+  }
+  else
+    return dt_iop_clip_and_zoom_cl(devid, dev_out, dev_in, roo, roi);
+}
+#endif
+
+void process(dt_iop_module_t *self,
+             dt_dev_pixelpipe_iop_t *piece,
+             const void *const ivoid,
+             void *const ovoid,
+             const dt_iop_roi_t *const roi,
+             const dt_iop_roi_t *const roo)
+{
+  if(roo->width == roi->width && roo->height == roi->height && roi->scale == roo->scale)
+    dt_iop_copy_image_roi((float *)ovoid, (float *)ivoid, 4, roi, roo);
+  else
+    dt_iop_clip_and_zoom((float *)ovoid, (float *)ivoid, roo, roi);
+}
+
+void init_pipe(dt_iop_module_t *self,
+               dt_dev_pixelpipe_t *pipe,
+               dt_dev_pixelpipe_iop_t *piece)
+{
+  piece->data = calloc(1, sizeof(dt_iop_pipescale_data_t));
+}
+
+void cleanup_pipe(dt_iop_module_t *self,
+                  dt_dev_pixelpipe_t *pipe,
+                  dt_dev_pixelpipe_iop_t *piece)
+{
+  free(piece->data);
+  piece->data = NULL;
+}
+
+void init(dt_iop_module_t *self)
+{
+  self->params = calloc(1, sizeof(dt_iop_pipescale_params_t));
+  self->default_params = calloc(1, sizeof(dt_iop_pipescale_params_t));
+  self->default_enabled = TRUE;
+  self->hide_enable_button = TRUE;
+  self->params_size = sizeof(dt_iop_pipescale_params_t);
+}
+
+void gui_init(dt_iop_module_t *self)
+{
+  IOP_GUI_ALLOC(pipescale);
+  self->widget = dt_ui_label_new("");
+}
+
+// clang-format off
+// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py
+// vim: shiftwidth=2 expandtab tabstop=2 cindent
+// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;
+// clang-format on