Git commit a0492cc9486ee296fd51ea913686ee0cdc3c3270 by Vlad Zagorodniy. Committed on 14/09/2018 at 19:06. Pushed by vladz into branch 'zzag/box-shadow-helper-box-blur'. [libbreezecommon] Use box blur instead of FFT approach M +0 -2 CMakeLists.txt D +0 -20 cmake/Modules/FindFFTW.cmake M +1 -7 libbreezecommon/CMakeLists.txt M +103 -190 libbreezecommon/breezeboxshadowhelper.cpp https://commits.kde.org/breeze/a0492cc9486ee296fd51ea913686ee0cdc3c3270 diff --git a/CMakeLists.txt b/CMakeLists.txt index 3b307444..8aca0b09 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,8 +10,6 @@ include(GenerateExportHeader) include(WriteBasicConfigVersionFile) include(FeatureSummary) = -set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modu= les") - if(USE_KDE4) find_package(KDE4 REQUIRED) include(KDE4Defaults) diff --git a/cmake/Modules/FindFFTW.cmake b/cmake/Modules/FindFFTW.cmake deleted file mode 100644 index c3214373..00000000 --- a/cmake/Modules/FindFFTW.cmake +++ /dev/null @@ -1,20 +0,0 @@ -# Find the FFTW library -# -# Usage: -# find_package(FFTW [REQUIRED]) -# -# It sets the following variables: -# FFTW_FOUND -# FFTW_INCLUDES -# FFTW_LIBRARIES - - -find_path(FFTW_INCLUDES fftw3.h) - -find_library(FFTW_LIBRARIES NAMES fftw3) - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(FFTW DEFAULT_MSG - FFTW_INCLUDES FFTW_LIBRARIES) - -mark_as_advanced(FFTW_INCLUDES FFTW_LIBRARIES) diff --git a/libbreezecommon/CMakeLists.txt b/libbreezecommon/CMakeLists.txt index 92d924b0..571923ac 100644 --- a/libbreezecommon/CMakeLists.txt +++ b/libbreezecommon/CMakeLists.txt @@ -11,9 +11,6 @@ if (BREEZE_COMMON_USE_KDE4) endif () = ################# dependencies ################# -### FFTW -find_package(FFTW REQUIRED) - ### Qt/KDE if (NOT BREEZE_COMMON_USE_KDE4) find_package(Qt5 REQUIRED CONFIG COMPONENTS Widgets) @@ -35,7 +32,6 @@ if (BREEZE_COMMON_USE_KDE4) EXPORT_FILE_NAME breezecommon_export.h) = target_link_libraries(breezecommon4 ${KDE4_KDEUI_LIBS}) - target_link_libraries(breezecommon4 ${FFTW_LIBRARIES}) = set_target_properties(breezecommon4 PROPERTIES VERSION ${PROJECT_VERSION} @@ -52,9 +48,7 @@ else () target_link_libraries(breezecommon5 PUBLIC Qt5::Core - Qt5::Gui - PRIVATE - ${FFTW_LIBRARIES}) + Qt5::Gui) = set_target_properties(breezecommon5 PROPERTIES VERSION ${PROJECT_VERSION} diff --git a/libbreezecommon/breezeboxshadowhelper.cpp b/libbreezecommon/br= eezeboxshadowhelper.cpp index 1f345e3a..e8503e57 100644 --- a/libbreezecommon/breezeboxshadowhelper.cpp +++ b/libbreezecommon/breezeboxshadowhelper.cpp @@ -23,8 +23,6 @@ = #include = -#include - #include = = @@ -32,227 +30,146 @@ namespace Breeze { namespace BoxShadowHelper { = namespace { - // FFT approach outperforms naive blur method when blur radius >=3D 64. - // (was discovered after doing a lot of benchmarks) - const int FFT_BLUR_RADIUS_THRESHOLD =3D 64; - - // According to the CSS Level 3 spec, standard deviation must be equal= to - // half of the blur radius. https://www.w3.org/TR/css-backgrounds-3/#s= hadow-blur - // Current window size is too small for sigma equal to half of the blu= r radius. - // As a workaround, sigma blur scale is lowered. With the lowered sigma - // blur scale, area under the kernel equals to 0.98, which is pretty e= nough. - // Maybe, it should be changed in the future. - const double SIGMA_BLUR_SCALE =3D 0.4375; +// According to the CSS Level 3 spec, standard deviation must be equal to +// half of the blur radius. https://www.w3.org/TR/css-backgrounds-3/#shado= w-blur +// Current window size is too small for sigma equal to half of the blur ra= dius. +// As a workaround, sigma blur scale is lowered. With the lowered sigma +// blur scale, area under the kernel equals to 0.98, which is pretty enoug= h. +// Maybe, it should be changed in the future. +const qreal BLUR_SIGMA_SCALE =3D 0.4375; } = -inline int kernelSizeToRadius(int kernelSize) +inline qreal radiusToSigma(qreal radius) { - return (kernelSize - 1) / 2; + return radius * BLUR_SIGMA_SCALE; } = -inline int radiusToKernelSize(int radius) +inline int boxSizeToRadius(int boxSize) { - return radius * 2 + 1; + return (boxSize - 1) / 2; } = -QVector computeGaussianKernel(int radius) +class BoxBlurProfile { - QVector kernel; - const int kernelSize =3D radiusToKernelSize(radius); - kernel.reserve(kernelSize); - - const double sigma =3D SIGMA_BLUR_SCALE * radius; - const double den =3D std::sqrt(2.0) * sigma; - double kernelNorm =3D 0.0; - double lastInt =3D 0.5 * std::erf((-radius - 0.5) / den); - - for (int i =3D 0; i < kernelSize; i++) { - const double currInt =3D 0.5 * std::erf((i - radius + 0.5) / den); - const double w =3D currInt - lastInt; - kernel << w; - kernelNorm +=3D w; - lastInt =3D currInt; - } +public: + BoxBlurProfile(int radius, int passes =3D 3); = - for (auto &w : kernel) { - w /=3D kernelNorm; - } + int padding() const; + QVector boxSizes() const; = - return kernel; -} +private: + int m_padding; + QVector m_boxSizes; +}; = -// Do horizontal pass of the Gaussian filter. Please notice that the result -// is transposed. So, the dst image should have proper size, e.g. if the s= rc -// image have (wxh) size then the dst image should have (hxw) size. The -// result is transposed so we read memory in linear order. -void blurAlphaNaivePass(const QImage &src, QImage &dst, const QVector &kernel) +BoxBlurProfile::BoxBlurProfile(int radius, int passes) { - const int alphaOffset =3D QSysInfo::ByteOrder =3D=3D QSysInfo::BigEndi= an ? 0 : 3; - const int alphaStride =3D src.depth() >> 3; - const int radius =3D kernelSizeToRadius(kernel.size()); - - for (int y =3D 0; y < src.height(); y++) { - const uchar *in =3D src.scanLine(y) + alphaOffset; - uchar *out =3D dst.scanLine(0) + alphaOffset + y * alphaStride; - - for (int x =3D 0; x < radius; x++) { - const uchar *window =3D in; - double alpha =3D 0.0; - for (int k =3D radius - x; k < kernel.size(); k++) { - alpha +=3D *window * kernel[k]; - window +=3D alphaStride; - } - *out =3D static_cast(alpha); - out +=3D dst.width() * alphaStride; - } - - for (int x =3D radius; x < src.width() - radius; x++) { - const uchar *window =3D in + (x - radius) * alphaStride; - double alpha =3D 0.0; - for (int k =3D 0; k < kernel.size(); k++) { - alpha +=3D *window * kernel[k]; - window +=3D alphaStride; - } - *out =3D static_cast(alpha); - out +=3D dst.width() * alphaStride; - } + const qreal sigma =3D radiusToSigma(radius); = - for (int x =3D src.width() - radius; x < src.width(); x++) { - const uchar *window =3D in + (x - radius - 1) * alphaStride; - double alpha =3D 0.0; - const int outside =3D x + radius - src.width(); - for (int k =3D 0; k < kernel.size() - outside; k++) { - alpha +=3D *window * kernel[k]; - window +=3D alphaStride; - } - *out =3D static_cast(alpha); - out +=3D dst.width() * alphaStride; - } + // Box sizes are computed according to the "Fast Almost-Gaussian Filte= ring" + // paper, see http://www.peterkovesi.com/papers/FastGaussianSmoothing.= pdf + int lower =3D std::floor(std::sqrt(12 * std::pow(sigma, 2) / passes + = 1)); + if (lower % 2 =3D=3D 0) { + lower--; + } + const int upper =3D lower + 2; + + const int threshold =3D std::round( + (12 * std::pow(sigma, 2) + - passes * std::pow(lower, 2) + - 4 * passes * lower + - 3 * passes) + / (-4 * lower - 4)); + + m_padding =3D radius; + for (int i =3D 0; i < passes; ++i) { + m_boxSizes.append(i < threshold ? lower : upper); } } = -// Blur alpha channel of the given image using separable convolution -// gaussian kernel. Not very efficient with big blur radii. -void blurAlphaNaive(QImage &img, int radius) +int BoxBlurProfile::padding() const { - const QVector kernel =3D computeGaussianKernel(radius); - QImage tmp(img.height(), img.width(), img.format()); + return m_padding; +} = - blurAlphaNaivePass(img, tmp, kernel); // horizontal pass - blurAlphaNaivePass(tmp, img, kernel); // vertical pass +QVector BoxBlurProfile::boxSizes() const +{ + return m_boxSizes; } = -// Blur alpha channel of the given image using Fourier Transform. -// It's somewhat efficient with big blur radii. -// -// It works as follows: -// - do FFT on given input image(it is expected, that the -// input image was padded before) -// - compute Gaussian kernel, pad it to the size of the input -// image, and do FFT on it -// - multiply the two in the frequency domain(element-wise) -// - transform the result back to "time domain" -// -void blurAlphaFFT(QImage &img, int radius) +void boxBlurPass(const QImage &src, QImage &dst, int boxSize) { + const int alphaStride =3D src.depth() >> 3; const int alphaOffset =3D QSysInfo::ByteOrder =3D=3D QSysInfo::BigEndi= an ? 0 : 3; - const int alphaStride =3D img.depth() >> 3; - const int size =3D img.width() * img.height(); - - // Use FFTW's malloc function so the returned pointer obeys any - // special alignment restrictions. (e.g. for SIMD acceleration, etc) - // See http://www.fftw.org/fftw3_doc/MekernelSizeToRadius(mory-Allocat= ion.html - fftw_complex *imageIn =3D fftw_alloc_complex(size); - fftw_complex *imageOut =3D fftw_alloc_complex(size); - - uchar *data =3D img.scanLine(0) + alphaOffset; - for (int i =3D 0; i < size; i++) { - imageIn[i][0] =3D *data; - imageIn[i][1] =3D 0.0; - data +=3D alphaStride; - } - - fftw_plan imageFFT =3D fftw_plan_dft_2d( - img.height(), img.width(), - imageIn, imageOut, - FFTW_FORWARD, FFTW_ESTIMATE); - - fftw_plan imageIFFT =3D fftw_plan_dft_2d( - img.height(), img.width(), - imageOut, imageIn, - FFTW_BACKWARD, FFTW_ESTIMATE); - - // The computed Gaussian kernel has to have the same size as the input= image. - // Please note that the center of the computed Gaussian kernel is plac= ed - // at the top-left corner and the whole kernel is wrapped around so we= read - // result in linear order. - // Note: the kernel is computed by taking a product of two 1-D Gaussia= n kernels. - QVector kernel(size, 0); - const QVector kernel_ =3D computeGaussianKernel(radius); - for (int y =3D 0; y < kernel_.size(); y++) { - const int i =3D (img.height() + y - radius) % img.height(); - for (int x =3D 0; x < kernel_.size(); x++) { - const int j =3D (img.width() + x - radius) % img.width(); - kernel[j + i * img.width()] =3D kernel_[x] * kernel_[y]; - } - } = - fftw_complex *kernelIn =3D fftw_alloc_complex(kernel.size()); - fftw_complex *kernelOut =3D fftw_alloc_complex(kernel.size()); + const int radius =3D boxSizeToRadius(boxSize); + const qreal invSize =3D 1.0 / boxSize; = - for (int i =3D 0; i < size; i++) { - kernelIn[i][0] =3D kernel[i]; - kernelIn[i][1] =3D 0.0; - } + const int dstStride =3D dst.width() * alphaStride; = - fftw_plan kernelFFT =3D fftw_plan_dft_2d( - img.height(), img.width(), - kernelIn, kernelOut, - FFTW_FORWARD, FFTW_ESTIMATE); + for (int y =3D 0; y < src.height(); ++y) { + const uchar *srcAlpha =3D src.scanLine(y); + uchar *dstAlpha =3D dst.scanLine(0); = - // Do actual FFT. - fftw_execute(imageFFT); - fftw_execute(kernelFFT); + srcAlpha +=3D alphaOffset; + dstAlpha +=3D alphaOffset + y * alphaStride; = - for (int i =3D 0; i < size; i++) { - const double re =3D imageOut[i][0] * kernelOut[i][0] - imageOut[i]= [1] * kernelOut[i][1]; - const double im =3D imageOut[i][0] * kernelOut[i][1] + imageOut[i]= [1] * kernelOut[i][0]; - imageOut[i][0] =3D re; - imageOut[i][1] =3D im; - } + const uchar *left =3D srcAlpha; + const uchar *right =3D left + alphaStride * radius; = - fftw_execute(imageIFFT); - - // Copy result back. Please note, result is scaled by `width x height`= so we need to scale it down. - const double invSize =3D 1.0 / size; - data =3D img.scanLine(0) + alphaOffset; - for (int i =3D 0; i < size; i++) { - *data =3D imageIn[i][0] * invSize; - data +=3D alphaStride; - } + int window =3D 0; + for (int x =3D 0; x < radius; ++x) { + window +=3D *srcAlpha; + srcAlpha +=3D alphaStride; + } = - fftw_destroy_plan(kernelFFT); - fftw_destroy_plan(imageFFT); - fftw_destroy_plan(imageIFFT); + for (int x =3D 0; x <=3D radius; ++x) { + window +=3D *right; + right +=3D alphaStride; + *dstAlpha =3D static_cast(window * invSize); + dstAlpha +=3D dstStride; + } = - fftw_free(kernelIn); - fftw_free(kernelOut); + for (int x =3D radius + 1; x < src.width() - radius; ++x) { + window +=3D *right - *left; + left +=3D alphaStride; + right +=3D alphaStride; + *dstAlpha =3D static_cast(window * invSize); + dstAlpha +=3D dstStride; + } = - fftw_free(imageIn); - fftw_free(imageOut); + for (int x =3D src.width() - radius; x < src.width(); ++x) { + window -=3D *left; + left +=3D alphaStride; + *dstAlpha =3D static_cast(window * invSize); + dstAlpha +=3D dstStride; + } + } } = -void boxShadow(QPainter *p, const QRect &box, const QPoint &offset, int ra= dius, const QColor &color) +void boxBlurAlpha(QImage &image, const BoxBlurProfile &profile) { - const QSize size =3D box.size() + 2 * QSize(radius, radius); + // Temporary buffer is transposed so we always read memory + // in linear order. + QImage tmp(image.height(), image.width(), image.format()); + + const auto boxSizes =3D profile.boxSizes(); + for (const int &boxSize : boxSizes) { + boxBlurPass(image, tmp, boxSize); // horizontal pass + boxBlurPass(tmp, image, boxSize); // vertical pass + } +} = +void boxShadow(QPainter *p, const QRect &box, const QPoint &offset, + int radius, const QColor &color) +{ #if BREEZE_COMMON_USE_KDE4 const qreal dpr =3D 1.0; #else const qreal dpr =3D p->device()->devicePixelRatioF(); #endif - - QPainter painter; + const BoxBlurProfile profile(radius * dpr, 3); + const QSize size =3D box.size() + 2 * QSize(profile.padding(), profile= .padding()); = QImage shadow(size * dpr, QImage::Format_ARGB32_Premultiplied); #if !BREEZE_COMMON_USE_KDE4 @@ -260,18 +177,14 @@ void boxShadow(QPainter *p, const QRect &box, const Q= Point &offset, int radius, #endif shadow.fill(Qt::transparent); = + QPainter painter; painter.begin(&shadow); - painter.fillRect(QRect(QPoint(radius, radius), box.size()), Qt::black); + painter.fillRect(QRect(QPoint(profile.padding(), profile.padding()), b= ox.size()), Qt::black); painter.end(); = // There is no need to blur RGB channels. Blur the alpha - // channel and then give the shadow a tint of the desired color. - const int radius_ =3D radius * dpr; - if (radius_ < FFT_BLUR_RADIUS_THRESHOLD) { - blurAlphaNaive(shadow, radius_); - } else { - blurAlphaFFT(shadow, radius_); - } + // channel and do compositing stuff later. + boxBlurAlpha(shadow, profile); = painter.begin(&shadow); painter.setCompositionMode(QPainter::CompositionMode_SourceIn);