#include #include #include #include #include #include void check_error(cl_int error) { if (error != CL_SUCCESS) { printf("OpenCL call failed with error %d\n", (int)error); exit(error); } } void load_kernel_from_file(char *file, char **src) { long fsize; FILE *f = fopen(file, "rb"); fseek(f, 0, SEEK_END); fsize = ftell(f); fseek(f, 0, SEEK_SET); //same as rewind(f); *src = (char *)malloc(fsize + 1); fread(*src, fsize, 1, f); fclose(f); (*src)[fsize] = 0; } cl_program create_program(char *src, cl_context context) { cl_program program; size_t lengths[1]; cl_int error = 0; lengths[0] = strlen(src); printf("strlen: %u\n", (unsigned int)lengths[0]); program = clCreateProgramWithSource(context, 1, (const char **)&src, lengths, &error); check_error(error); return program; } float *test_a, *test_b; static void prepare_data() { int i; test_a = malloc(sizeof(float)*1000000000); test_b = malloc(sizeof(float)*1000000000); for (i = 0; i < 1000000000; i++) { test_a[i] = (float)i; test_b[i] = 0.0f; } } float mult = 1.0f; int main(void) { int i; cl_int error; cl_int ret; cl_platform_id *platform_ids = NULL; cl_device_id *device_ids = NULL; cl_uint platform_id_count = 0; cl_uint device_id_count = 0; cl_context context; cl_program program; cl_kernel kernel; cl_command_queue queue; cl_mem a_buff, b_buff; char *source_code = NULL; char temp_buff[100]; size_t data_size = 1000000000; size_t work_size = 64; prepare_data(); clGetPlatformIDs(0, NULL, &platform_id_count); if (platform_id_count == 0) return -1; platform_ids = (cl_platform_id *)malloc(platform_id_count * sizeof(cl_platform_id)); if (platform_ids == NULL) return -1; clGetPlatformIDs(platform_id_count, platform_ids, NULL); printf("Platforms available: %u\n", (unsigned int)platform_id_count); for (i = 0; i < platform_id_count; i++) { temp_buff[0] = '\0'; clGetPlatformInfo(platform_ids[i], CL_PLATFORM_NAME, sizeof(temp_buff), temp_buff, NULL); printf("Platform ID %d: %s\n", i, temp_buff); } clGetDeviceIDs(platform_ids[0], CL_DEVICE_TYPE_ALL, 0, NULL, &device_id_count); printf("Device in Platform 0 count: %u\n", (unsigned int)device_id_count); if (device_id_count == 0) return -1; device_ids = (cl_device_id *) malloc(sizeof(cl_device_id) *device_id_count); clGetDeviceIDs(platform_ids[0], CL_DEVICE_TYPE_ALL, device_id_count, device_ids, NULL); for (i = 0; i < device_id_count; i++) { temp_buff[0] = '\0'; clGetDeviceInfo(device_ids[i], CL_DEVICE_NAME, sizeof(temp_buff), temp_buff, NULL); printf("Device ID %d: %s\n", i, temp_buff); } context = clCreateContext(NULL, 1, &device_ids[0], NULL, NULL, &error); check_error(error); queue = clCreateCommandQueue(context, device_ids[0], 0, &error); /* Create memory buffers for data */ a_buff = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(test_a[0])*data_size, test_a, &error); check_error(error); b_buff = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(test_b[0])*data_size, test_b, &error); check_error(error); load_kernel_from_file("cl_kernels/saxpy.cl", &source_code); program = create_program(source_code, context); /* Compile the source code */ check_error(clBuildProgram(program, device_id_count, device_ids, NULL, NULL, NULL)); kernel = clCreateKernel(program, "SAXPY", &error); /* Setting the arguments */ clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&a_buff); clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&b_buff); clSetKernelArg(kernel, 2, sizeof(float), (void *)&mult); ret = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &data_size, &work_size, 0, NULL, NULL); check_error(ret); clEnqueueReadBuffer(queue, b_buff, CL_TRUE, 0, 1000000000*sizeof(float), test_b, 0, NULL, NULL); ret = clFlush(queue); ret = clFinish(queue); ret = clReleaseKernel(kernel); ret = clReleaseProgram(program); ret = clReleaseMemObject(a_buff); ret = clReleaseMemObject(b_buff); ret = clReleaseCommandQueue(queue); ret = clReleaseContext(context); free(test_a); free(test_b); free(platform_ids); free(device_ids); return 0; }