Quantcast
Channel: Intel® C++ Compiler
Viewing all articles
Browse latest Browse all 1616

RGB to ARGB and xor faster

$
0
0

Whats the fastest way to convert RGB to ARGB and optionally xor at the same time? This is what I currently use:

bool DrawImage(const unsigned char* __restrict a, unsigned int* __restrict b, const int size, const bool xor){
	if (xor){
#pragma simd
		for (int i = size / 3; 0 < i; --i , a += 3){
			*b++ = *reinterpret_cast<const unsigned int*>(a) ^ *b | 0xFF000000;
		}
	}else{
#pragma ivdep
#pragma simd
		for (int i = size / 3; 0 < i; --i , a += 3){
			*b++ = *reinterpret_cast<const unsigned int*>(a) | 0xFF000000;
		}
	}
	return true;
}

These functions are performance critical and I want to convert them to inline assembly can this be optimized further:

lea         ecx,[esi+esi*2]
movd        xmm1,dword ptr [edi+ecx+9]
movd        xmm3,dword ptr [edi+ecx+3]
movd        xmm2,dword ptr [edi+ecx+6]
movd        xmm4,dword ptr [edi+ecx]
punpckldq   xmm3,xmm1
punpckldq   xmm4,xmm2
punpckldq   xmm4,xmm3
pxor        xmm4,xmmword ptr [ebx+esi*4]
por         xmm4,xmm0
movdqa      xmmword ptr [ebx+esi*4],xmm4
add         esi,4
cmp         esi,eax
jb          DrawImage+0A4h (0F8F96F4h)  
lea         ecx,[esi+esi*2]
movd        xmm1,dword ptr [edi+ecx+9]
movd        xmm3,dword ptr [edi+ecx+3]
movd        xmm2,dword ptr [edi+ecx+6]
movd        xmm4,dword ptr [edi+ecx]
punpckldq   xmm3,xmm1
punpckldq   xmm4,xmm2
punpckldq   xmm4,xmm3
pxor        xmm4,xmmword ptr [ebx+esi*4]
por         xmm4,xmm0
movdqa      xmmword ptr [ebx+esi*4],xmm4
add         esi,4
cmp         esi,eax
jb          DrawImage+0A4h (0F8F96F4h)  

 


Viewing all articles
Browse latest Browse all 1616

Trending Articles



<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>