: 0: 55 push %rbp 1: 48 89 e5 mov %rsp,%rbp 4: 48 83 ec 10 sub $0x10,%rsp 8: c7 45 f8 28 00 00 00 movl $0x28,-0x8(%rbp) f: c7 45 f4 14 00 00 00 movl $0x14,-0xc(%rbp) 16: 8b 55 f4 mov -0xc(%rbp),%edx 19: 8b 4d f8 mov -0x8(%rbp),%ecx 1c: 48 8d 45 ff lea -0x1(%rbp),%rax 20: 89 ce mov %ecx,%esi 22: 48 89 c7 mov %rax,%rdi 25: e8 00 00 00 00 callq 2a 2a: b8 00 00 00 00 mov $0x0,%eax 2f: c9 leaveq 30: c3 retq Disassembly of section .text._Z5bfAddIiET_S0_S0_: 0000000000000000 <_Z5bfAddIiET_S0_S0_>: 0: 55 push %rbp 1: 48 89 e5 mov %rsp,%rbp 4: 89 7d fc mov %edi,-0x4(%rbp) 7: 89 75 f8 mov %esi,-0x8(%rbp) a: 8b 45 f8 mov -0x8(%rbp),%eax d: 8b 55 fc mov -0x4(%rbp),%edx 10: 8d 04 02 lea (%rdx,%rax,1),%eax 13: c9 leaveq 14: c3 retq Disassembly of section .text._ZN13CompositingOpIiXadL_Z5bfAddIiET_S1_S1_EEE11doSomethingEii: 0000000000000000 <_ZN13CompositingOpIiXadL_Z5bfAddIiET_S1_S1_EEE11doSomethingEii>: 0: 55 push %rbp 1: 48 89 e5 mov %rsp,%rbp 4: 48 83 ec 20 sub $0x20,%rsp 8: 48 89 7d e8 mov %rdi,-0x18(%rbp) c: 89 75 e4 mov %esi,-0x1c(%rbp) f: 89 55 e0 mov %edx,-0x20(%rbp) 12: 8b 55 e0 mov -0x20(%rbp),%edx 15: 8b 45 e4 mov -0x1c(%rbp),%eax 18: 89 d6 mov %edx,%esi 1a: 89 c7 mov %eax,%edi 1c: e8 00 00 00 00 callq 21 <_ZN13CompositingOpIiXadL_Z5bfAddIiET_S1_S1_EEE11doSomethingEii+0x21> 21: f2 0f 2a c0 cvtsi2sd %eax,%xmm0 25: f2 0f 11 45 f8 movsd %xmm0,-0x8(%rbp) 2a: f2 0f 10 45 f8 movsd -0x8(%rbp),%xmm0 2f: bf 00 00 00 00 mov $0x0,%edi 34: b8 01 00 00 00 mov $0x1,%eax 39: e8 00 00 00 00 callq 3e <_ZN13CompositingOpIiXadL_Z5bfAddIiET_S1_S1_EEE11doSomethingEii+0x3e> 3e: c9 leaveq 3f: c3 retq _Optimized code compiled and disassembled with:_ g++ -c -O1 -g3 main.cpp objdump -d -s main.o 0000000000000000

: 0: 48 83 ec 08 sub $0x8,%rsp 4: f2 0f 10 05 00 00 00 movsd 0x0(%rip),%xmm0 # c b: 00 c: be 00 00 00 00 mov $0x0,%esi 11: bf 01 00 00 00 mov $0x1,%edi 16: b8 01 00 00 00 mov $0x1,%eax 1b: e8 00 00 00 00 callq 20 20: b8 00 00 00 00 mov $0x0,%eax 25: 48 83 c4 08 add $0x8,%rsp 29: c3 retq --------------020803010605030901080709 Content-Type: text/html; charset=ISO-8859-1 Content-Transfer-Encoding: 7bit On 02/06/2011 09:46 AM, Cyrille Berger Skott wrote:

Hi,

Here is some code review:

* I did some more testing and it is indeed a regression for this bug 
https://bugs.kde.org/show_bug.cgi?id=176536 , since I am assuming that you 
wanted it that way, having transparent pixel being erased.

For instance look at the result of the following file:

http://cyrille.diwi.org/tmp/krita/colored_leaves.kra
Before patch:
http://cyrille.diwi.org/tmp/krita/colored_leaves_before.png
After patch:
http://cyrille.diwi.org/tmp/krita/colored_leaves_after.png

Since I am not convinced that "alpha lock" should be used for that, since it 
is there to prevent editing of the alpha channel, if the having transparent 
pixel erased is a wanted behaviour we should probably introduce a "keep 
transparency when compositing" and have it on by default

Oh, yes you are right. This is a problem for layer compositing.
I don't know how easily it can be done but I would like to have a button, to keep
transparency when compositing layers, for every layer next to the alpha lock button
and of course turned on by default if you want it that way.
I have the rough guess that the layer compositing simply works with the CompositeOps alpha
locking turned off by default. So turning it on by default and adding an option to turn
it off (per layer) should hopefully solve the problem :D if you agree.

* the optimized integers operations do provide a significant boost in speed, 
about 10% on the composite operations, that translate to about 5% when 
drawing. I don't know at what size of brush you tried, but even on high-end 
hardware it makes a noticeable difference when using large size brushes. On 
the other hand, it does provide an unnoticeable difference in the results. 
This is why those optimizwd integers operations are used in Gimp, imagemagick 
and many other graphics applications. So I am against dropping them. If it is 
really a problem for you, I would suggest the use of a compile switch, 
ENABLE_EXACT_PRECISION_COMPOSITE_OPS , defaulted to off.

Ok, my biggest brush only has a diameter of 300px.
I didn't meant to drop those optimized functions. I just had some problems with the
result and wanted to get everything working right before i start optimizing anything :D

* KoCompositeOpFunction.h should not duplicate the content of 
KoColorSpaceMath.h (especially lerp, mul) and the functions that does not 
exist in KoColorSpaceMath should be added (like inv) Similary use "clamp" from 
KoColorSpaceMath, since your version clamp values for floating points as well, 
we should not happen.

Would you agree if I add those functions as aliases to KoColorSpaceMath.h
because i think in my opinion it makes the code more readable to have a
simple function mul(..., ...) then to specify every time KoColorSpaceMaths<...>::multiply(..., ...).
And I would like to have two versions of the multiplication functions (optimized and unoptimized) like:

// fast version
template<class T>
inline T mul(T a, T b) { T(KoColorSpaceMaths<T>::multiply(a, b)); }

// accurate version
template<class T>
inline T mulac(T a, T b) {
typedef typename KoColorSpaceMathsTraits<T>::compositetype composite_type;
return T(composite_type(a) * b / KoColorSpaceMathsTraits<T>::unitValue);
}

So i can add the fast version everywhere where it works and the accurate version everywhere else.

About the clamp function. I thought even the FP versions need to be clamped to the range
between 0.0 and 1.0 (I used the camp function only where the values could exceed this bound e.g. after divisions)
because without clamping the FP versions would behave different to the integer versions or do
I understand something wrong here?

Ah, yes something else. I've got a question about the composite type for float data types.
The KoColorSpaceMathsTraits<float>::compositetype is double. is this necessary?
wouldn't float be sufficient here?

And the KoColorSpaceMaths::divide method takes values of type T and returns a value of type T.
But i think it should return a value of type T->compositetype. For example dividing 1.0 by 0.5
will result in 2.0 what would be 510 in 8bit per channel mode and we have a data loss here because it will be
truncated to T.

* have you cheked if compositeFunc is inlined inside KoCompositeOpGenericSC ?

Well, yes I'm pretty sure. I compiled a little test program:

#include <cstdio> template<class T> inline T bfAdd(T src, T dst) { return src + dst; } template<class T, T blendFunc(T,T)> struct CompositingOp {     inline void doSomething(T src, T dst)     {         double result = double(blendFunc(src, dst));         std::printf("%f\n", result);     } }; int main() {     CompositingOp<int, &bfAdd> opAdd;     int src = 40;     int dst = 20;         opAdd.doSomething(src, dst);     return 0; }

The ASM result should speak for itself :D

Unoptimized code compiled and disassembled with:
g++ -c -O0 -g3 main.cpp
objdump -d -s main.o

0000000000000000 <main>:    0:    55                       push   %rbp    1:    48 89 e5                 mov    %rsp,%rbp    4:    48 83 ec 10              sub    $0x10,%rsp    8:    c7 45 f8 28 00 00 00     movl   $0x28,-0x8(%rbp)    f:    c7 45 f4 14 00 00 00     movl   $0x14,-0xc(%rbp) 16:    8b 55 f4                 mov    -0xc(%rbp),%edx 19:    8b 4d f8                 mov    -0x8(%rbp),%ecx 1c:    48 8d 45 ff              lea    -0x1(%rbp),%rax 20:    89 ce                    mov    %ecx,%esi 22:    48 89 c7                 mov    %rax,%rdi 25:    e8 00 00 00 00           callq 2a <main+0x2a> 2a:    b8 00 00 00 00           mov    $0x0,%eax 2f:    c9                       leaveq 30:    c3                       retq   Disassembly of section .text._Z5bfAddIiET_S0_S0_: 0000000000000000 <_Z5bfAddIiET_S0_S0_>:    0:    55                       push   %rbp    1:    48 89 e5                 mov    %rsp,%rbp    4:    89 7d fc                 mov    %edi,-0x4(%rbp)    7:    89 75 f8                 mov    %esi,-0x8(%rbp)    a:    8b 45 f8                 mov    -0x8(%rbp),%eax    d:    8b 55 fc                 mov    -0x4(%rbp),%edx 10:    8d 04 02                 lea    (%rdx,%rax,1),%eax 13:    c9                       leaveq 14:    c3                       retq   Disassembly of section .text._ZN13CompositingOpIiXadL_Z5bfAddIiET_S1_S1_EEE11doSomethingEii: 0000000000000000 <_ZN13CompositingOpIiXadL_Z5bfAddIiET_S1_S1_EEE11doSomethingEii>:    0:    55                       push   %rbp    1:    48 89 e5                 mov    %rsp,%rbp    4:    48 83 ec 20              sub    $0x20,%rsp    8:    48 89 7d e8              mov    %rdi,-0x18(%rbp)    c:    89 75 e4                 mov    %esi,-0x1c(%rbp)    f:    89 55 e0                 mov    %edx,-0x20(%rbp) 12:    8b 55 e0                 mov    -0x20(%rbp),%edx 15:    8b 45 e4                 mov    -0x1c(%rbp),%eax 18:    89 d6                    mov    %edx,%esi 1a:    89 c7                    mov    %eax,%edi 1c:    e8 00 00 00 00           callq 21 <_ZN13CompositingOpIiXadL_Z5bfAddIiET_S1_S1_EEE11doSomethingEii+0x21> 21:    f2 0f 2a c0              cvtsi2sd %eax,%xmm0 25:    f2 0f 11 45 f8           movsd %xmm0,-0x8(%rbp) 2a:    f2 0f 10 45 f8           movsd -0x8(%rbp),%xmm0 2f:    bf 00 00 00 00           mov    $0x0,%edi 34:    b8 01 00 00 00           mov    $0x1,%eax 39:    e8 00 00 00 00           callq 3e <_ZN13CompositingOpIiXadL_Z5bfAddIiET_S1_S1_EEE11doSomethingEii+0x3e> 3e:    c9                       leaveq 3f:    c3                       retq
Optimized code compiled and disassembled with:
g++ -c -O1 -g3 main.cpp
objdump -d -s main.o

0000000000000000 <main>:    0:    48 83 ec 08              sub    $0x8,%rsp    4:    f2 0f 10 05 00 00 00     movsd 0x0(%rip),%xmm0        # c <main+0xc>    b:    00    c:    be 00 00 00 00           mov    $0x0,%esi 11:    bf 01 00 00 00           mov    $0x1,%edi 16:    b8 01 00 00 00           mov    $0x1,%eax 1b:    e8 00 00 00 00           callq 20 <main+0x20> 20:    b8 00 00 00 00           mov    $0x0,%eax 25:    48 83 c4 08              add    $0x8,%rsp 29:    c3                       retq

--------------020803010605030901080709-- --===============0818458834== Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Disposition: inline _______________________________________________ kimageshop mailing list kimageshop@kde.org https://mail.kde.org/mailman/listinfo/kimageshop --===============0818458834==--