I'm just starting off coding for the GPGPU using the Intel compiler, so there's probably a lesson to be learned here. But can someone look at this code and tell me why I get this output when running it. Please note that the function isn't called (in this code example), I've commented it out and am executing the code within the cilk_for loop instead.
Here is the code:
// IntelTestBed2.cpp : Defines the entry point for the console application. // #include "stdafx.h" #include<gfx/gfx_rt.h> #include "cilk/cilk.h" #include <iostream> #define WIN32_LEAN_AND_MEAN #include <Windows.h> struct rgb { unsigned char red; unsigned char green; unsigned char blue; }; __declspec (target(gfx)) void process_image(rgb &originalImage, rgb &modifiedImage) { float temp; temp = (0.393f * originalImage.red) + (0.769f * originalImage.green) + (0.189f * originalImage.blue); modifiedImage.red = (temp > 255.f) ? 255.f : temp; temp = (0.349f * originalImage.red) + (0.686f * originalImage.green) + (0.168f * originalImage.blue); modifiedImage.green = (temp > 255.f) ? 255.f : temp; temp = (0.272f * originalImage.red) + (0.534f * originalImage.green) + (0.131f * originalImage.blue); modifiedImage.blue = (temp > 255.f) ? 255.f : temp; } int main() { rgb* originalImage = new rgb[320 * 240]; rgb* modifiedImage = new rgb[320 * 240]; #pragma offload target(gfx) pin(originalImage,modifiedImage:length(320*240*sizeof(rgb))) cilk_for(int c = 0; c < 320 * 240; c++) { //process_image(originalImage[c], modifiedImage[c]); float temp; temp = (0.393f * originalImage[c].red) + (0.769f * originalImage[c].green) + (0.189f * originalImage[c].blue); modifiedImage[c].red = (temp > 255.f) ? 255.f : temp; temp = (0.349f * originalImage[c].red) + (0.686f * originalImage[c].green) + (0.168f * originalImage[c].blue); modifiedImage[c].green = (temp > 255.f) ? 255.f : temp; temp = (0.272f * originalImage[c].red) + (0.534f * originalImage[c].green) + (0.131f * originalImage[c].blue); modifiedImage[c].blue = (temp > 255.f) ? 255.f : temp; } getchar(); return 0; }
Here is the output (note the error is bold below):
GFX(10:39:45): Linked with dynamic gfx-runtime library
GFX(10:39:45): Library loaded C:\Program Files (x86)\IntelSWTools\compilers_and_libraries_2016\windows\redist\intel64\compiler\libgfxoffload.dll
GFX(10:39:45): Library version 13.0.0.0
GFX(10:39:45): Found OS: Windows 8
GFX(10:39:45):
GFX(10:39:45): Environment vars:
GFX(10:39:45): GFX_SLM_LIMIT = 65536
GFX(10:39:45): GFX_L3_CONFIG = -2
GFX(10:39:45): GFX_NOTIFY_DEBUGGER = 0
GFX(10:39:45): GFX_THREAD_SPACE_HEIGHT = 0
GFX(10:39:45): GFX_THREAD_SPACE_WIDTH = 0
GFX(10:39:45): GFX_THREAD_GROUP_HEIGHT = 8
GFX(10:39:45): GFX_THREAD_GROUP_WIDTH = 1
GFX(10:39:45): GFX_STACK_SIZE = 33554432
GFX(10:39:45): GFX_SPECIAL_EXIT = 0
GFX(10:39:45): GFX_DUMP_CISA = 0
GFX(10:39:45): GFX_DUMP_RESOURCE = 0
GFX(10:39:45): GFX_OFFLOAD_DISABLE = 0
GFX(10:39:45): GFX_SIM_MODE = 0
GFX(10:39:45): GFX_DX_MODE = UNDEFINED
GFX(10:39:45): GFX_PROG_OPTIONS =
GFX(10:39:45): GFX_UNION_PTRS = 8
GFX(10:39:45): GFX_ALIGN_PINS = 1
GFX(10:39:45): * GFX_DEBUG = 1
GFX(10:39:45): GFX_CPU_BACKUP = 2
GFX(10:39:45): GFX_USE_MEDIA_WALKER = 0
GFX(10:39:45): GFX_CACHE_KERNELS = 0
GFX(10:39:45): GFX_LOOP_MAPPING = 2
GFX(10:39:45): GFX_OFFLOAD_TIMEOUT_FUNC = -1
GFX(10:39:45): GFX_OFFLOAD_TIMEOUT = 60
GFX(10:39:45): GFX_USE_BUFFER_UP = 1
GFX(10:39:45): * GFX_SHOW_TIME = 1
GFX(10:39:45): * GFX_PRINT_DIAG = 1
GFX(10:39:45): GFX_MAX_THREAD_COUNT = -1
GFX(10:39:45): * GFX_LOG_OFFLOAD = 1
GFX(10:39:45): Library loaded C:\WINDOWS\SYSTEM32\igfx11cmrt64.dll
GFX(10:39:46): Library version CmRT(5.0.0.1133), CmJIT(5.0.0.1133)
GFX(10:39:46): Selected DirectX mode: DX11
GFX(10:39:46): Simulator mode: OFF
GFX(10:39:46): TDR info: Graphics timeout set to default (2 seconds)
GFX(10:39:46):
GFX(10:39:46): Device capabilites:
GFX(10:39:46): CAP_KERNEL_COUNT_PER_TASK = 16
GFX(10:39:46): CAP_KERNEL_BINARY_SIZE = 65536
GFX(10:39:46): CAP_SAMPLER_COUNT = 64
GFX(10:39:46): CAP_SAMPLER_COUNT_PER_KERNEL = 16
GFX(10:39:46): CAP_BUFFER_COUNT = 256
GFX(10:39:46): CAP_SURFACE2D_COUNT = 256
GFX(10:39:46): CAP_SURFACE3D_COUNT = 64
GFX(10:39:46): CAP_SURFACE_COUNT_PER_KERNEL = 255
GFX(10:39:46): CAP_ARG_COUNT_PER_KERNEL = 255
GFX(10:39:46): CAP_ARG_SIZE_PER_KERNEL = 2016
GFX(10:39:46): CAP_USER_DEFINED_THREAD_COUNT_PER_TASK = 262144
GFX(10:39:46): CAP_HW_THREAD_COUNT = 161
GFX(10:39:46): CAP_SURFACE2D_FORMAT_COUNT = 23
GFX(10:39:46): CAP_SURFACE3D_FORMAT_COUNT = 2
GFX(10:39:46): CAP_VME_STATE_G6_COUNT = 8
GFX(10:39:46): CAP_GPU_PLATFORM = <future platform1>
GFX(10:39:46): CAP_GT_PLATFORM = GT2
GFX(10:39:46): CAP_MIN_FREQUENCY = 350
GFX(10:39:46): CAP_MAX_FREQUENCY = 1200
GFX(10:39:46): CAP_GPU_CURRENT_FREQUENCY = 0
GFX(10:39:46): The number of available hardware threads: 161
GFX(10:39:46):
GFX(10:39:46): GFX image loaded from executable: resource name 2500414GFX6.gfx
GFX(10:39:46): Program created
GFX(10:39:46): GFX image size: 36232
GFX(10:39:46): CISA count: 1
GFX(10:39:46):
GFX(10:39:46): CISA #0:
GFX(10:39:46): version 3.1
GFX(10:39:46): num_kernels 1
GFX(10:39:46): kernel #0: 'L_main_IntelTestBed2_cpp_35_35__par_region0_2'
GFX(10:39:46): num_binaries 0
GFX(10:39:46): num_functions 1
GFX(10:39:46): function #0: '_process_image__YAXAEAUrgb__0_Z'
GFX(10:39:46):
GFX(10:39:46): LoadProgram (cisa #0)...
GFX(10:39:46): LoadProgram (cisa #0)...success
GFX(10:39:46): WARNING: surface creation failed
GFX(10:39:46): FATAL ERROR: could not create surface
GFX performance timers with non-zero value (milliseconds, activation counter):
Offload Total = 13.83, 1
Device Creation = 36.70, 1
Kernel Creation = 10.82, 1
Buffer Creation = 0.06, 1
Iteration Space Splitting = 0.01, 1
Argument Setup = 0.01, 1
ELF Parsing = 2.02, 1
Program Loading = 11.80, 1
Press any key to continue . . .