Become a MacRumors Supporter for $50/year with no ads, ability to filter front page stories, and private forums.

stereomatching

macrumors newbie
Original poster
Apr 17, 2013
2
0
I bought a mac mini 4 months ago (2012/11)
The graphics card are Intel HD Graphics 4000,
cpu is 2.5 GHz Intel Core i5.

There are opencl framework in my mac mini, but
I can't find any intel sdk design for mac, the exe
always complain "can't find any device"

Do I have any luck to learn(make the codes run) how
to program with opencl on my mac mini?Is it possible to
program with opencl if I choose cpu as the device rather than
gpu:confused:Or I need to change a graphic card:confused:

I stuck at ch1 because I can't find a way to make the program
find a proper device.

The codes and makefile(come from the ch1 of the book)

matvec.c
Code:
#ifndef CH1EX00_HPP
#define CH1EX00_HPP

#include <cstdio>
#include <cstdlib>
#include <sys/types.h>

#ifdef MAC
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif

namespace
{

char const *PROGRAM_FILE = "../Qt/program/experiment_apps_and_libs/openclTest/source_code_mac/Ch1/matvec/matvec.cl";
char const *KERNEL_FUNC = "matvec_mult";

}

inline void ch1_test_00()
{       
    /* Host/device data structures */
    cl_platform_id platform;
    cl_device_id device;
    cl_context context;
    cl_command_queue queue;
    cl_int i, err;

    /* Program/kernel data structures */
    cl_program program;
    FILE *program_handle;
    char *program_buffer, *program_log;
    size_t program_size, log_size;
    cl_kernel kernel;

    /* Data and buffers */
    float mat[16], vec[4], result[4];
    float correct[4] = {0.0f, 0.0f, 0.0f, 0.0f};
    cl_mem mat_buff, vec_buff, res_buff;
    size_t work_units_per_kernel;

    /* Initialize data to be processed by the kernel */
    for(i=0; i<16; i++) {
       mat[i] = i * 2.0f;
    }
    for(i=0; i<4; i++) {
       vec[i] = i * 3.0f;
       correct[0] += mat[i]    * vec[i];
       correct[1] += mat[i+4]  * vec[i];
       correct[2] += mat[i+8]  * vec[i];
       correct[3] += mat[i+12] * vec[i];
    }

    /* Identify a platform */
    err = clGetPlatformIDs(1, &platform, NULL);
    if(err < 0) {
       perror("Couldn't find any platforms");
       exit(1);
    }

    /* Access a device */
    err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
    if(err < 0) {
       perror("Couldn't find any devices");
       exit(1);
    }

    /* Create the context */
    context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
    if(err < 0) {
       perror("Couldn't create a context");
       exit(1);
    }

    /* Read program file and place content into buffer */
    program_handle = fopen(PROGRAM_FILE, "r");
    if(program_handle == NULL) {
       perror("Couldn't find the program file");
       exit(1);
    }
    fseek(program_handle, 0, SEEK_END);
    program_size = ftell(program_handle);
    rewind(program_handle);
    program_buffer = (char*)malloc(program_size + 1);
    program_buffer[program_size] = '\0';
    fread(program_buffer, sizeof(char), program_size, program_handle);
    fclose(program_handle);

    /* Create program from file */
    program = clCreateProgramWithSource(context, 1,
       (const char**)&program_buffer, &program_size, &err);
    if(err < 0) {
       perror("Couldn't create the program");
       exit(1);
    }
    free(program_buffer);

    /* Build program */
    err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
    if(err < 0) {

       /* Find size of log and print to std output */
       clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,
             0, NULL, &log_size);
       program_log = (char*) malloc(log_size + 1);
       program_log[log_size] = '\0';
       clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,
             log_size + 1, program_log, NULL);
       printf("%s\n", program_log);
       free(program_log);
       exit(1);
    }

    /* Create kernel for the mat_vec_mult function */
    kernel = clCreateKernel(program, KERNEL_FUNC, &err);
    if(err < 0) {
       perror("Couldn't create the kernel");
       exit(1);
    }

    /* Create CL buffers to hold input and output data */
    mat_buff = clCreateBuffer(context, CL_MEM_READ_ONLY |
       CL_MEM_COPY_HOST_PTR, sizeof(float)*16, mat, &err);
    if(err < 0) {
       perror("Couldn't create a buffer object");
       exit(1);
    }
    vec_buff = clCreateBuffer(context, CL_MEM_READ_ONLY |
       CL_MEM_COPY_HOST_PTR, sizeof(float)*4, vec, NULL);
    res_buff = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
       sizeof(float)*4, NULL, NULL);

    /* Create kernel arguments from the CL buffers */
    err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &mat_buff);
    if(err < 0) {
       perror("Couldn't set the kernel argument");
       exit(1);
    }
    clSetKernelArg(kernel, 1, sizeof(cl_mem), &vec_buff);
    clSetKernelArg(kernel, 2, sizeof(cl_mem), &res_buff);

    /* Create a CL command queue for the device*/
    queue = clCreateCommandQueue(context, device, 0, &err);
    if(err < 0) {
       perror("Couldn't create the command queue");
       exit(1);
    }

    /* Enqueue the command queue to the device */
    work_units_per_kernel = 4; /* 4 work-units per kernel */
    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &work_units_per_kernel,
       NULL, 0, NULL, NULL);
    if(err < 0) {
       perror("Couldn't enqueue the kernel execution command");
       exit(1);
    }

    /* Read the result */
    err = clEnqueueReadBuffer(queue, res_buff, CL_TRUE, 0, sizeof(float)*4,
       result, 0, NULL, NULL);
    if(err < 0) {
       perror("Couldn't enqueue the read buffer command");
       exit(1);
    }

    /* Test the result */
    if((result[0] == correct[0]) && (result[1] == correct[1])
       && (result[2] == correct[2]) && (result[3] == correct[3])) {
       printf("Matrix-vector multiplication successful.\n");
    }
    else {
       printf("Matrix-vector multiplication unsuccessful.\n");
    }

    /* Deallocate resources */
    clReleaseMemObject(mat_buff);
    clReleaseMemObject(vec_buff);
    clReleaseMemObject(res_buff);
    clReleaseKernel(kernel);
    clReleaseCommandQueue(queue);
    clReleaseProgram(program);
    clReleaseContext(context);
}

#endif // CH1EX00_HPP

matvec.cl
Code:
__kernel void matvec_mult(__global float4* matrix,
                          __global float4* vector,
                          __global float* result) {
   
   int i = get_global_id(0);
   result[i] = dot(matrix[i], vector[0]);
}

make file
Code:
PROJ=matvec

CC=gcc

CFLAGS=-std=c99 -Wall -DUNIX -g -DDEBUG

# Check for 32-bit vs 64-bit
PROC_TYPE = $(strip $(shell uname -m | grep 64))
 
# Check for Mac OS
OS = $(shell uname -s 2>/dev/null | tr [:lower:] [:upper:])
DARWIN = $(strip $(findstring DARWIN, $(OS)))

# MacOS System
ifneq ($(DARWIN),)
	CFLAGS += -DMAC
	LIBS=-framework OpenCL

	ifeq ($(PROC_TYPE),)
		CFLAGS+=-arch i386
	else
		CFLAGS+=-arch x86_64
	endif
else

# Linux OS
LIBS=-lOpenCL
ifeq ($(PROC_TYPE),)
	CFLAGS+=-m32
else
	CFLAGS+=-m64
endif

# Check for Linux-AMD
ifdef AMDAPPSDKROOT
   INC_DIRS=. $(AMDAPPSDKROOT)/include
	ifeq ($(PROC_TYPE),)
		LIB_DIRS=$(AMDAPPSDKROOT)/lib/x86
	else
		LIB_DIRS=$(AMDAPPSDKROOT)/lib/x86_64
	endif
else

# Check for Linux-Nvidia
ifdef NVSDKCOMPUTE_ROOT
   INC_DIRS=. $(NVSDKCOMPUTE_ROOT)/OpenCL/common/inc
endif

endif
endif

$(PROJ): $(PROJ).c
	$(CC) $(CFLAGS) -o $@ $^ $(INC_DIRS:%=-I%) $(LIB_DIRS:%=-L%) $(LIBS)

.PHONY: clean

clean:
	rm $(PROJ)

If this is not a place to ask this kind of problem, please show me which way should I go to?Thanks
 
Last edited:

stereomatching

macrumors newbie
Original poster
Apr 17, 2013
2
0
I change the line
Code:
err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);

to

Code:
err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &device, NULL);

The program do the job without a problem.
Do intel has any plan to support the sdk of mac in the
future?I hope I can leverage the power of gpu asap.
 

MajorPain

macrumors newbie
May 17, 2011
16
0
NYC
Google "intel hd 4000 opencl" several relevant pages will come up. I did not read them at all just now but skimmed them over the past few weeks and it is my impression that the HD 4000 is capable of 140 gflops single precission, and that it will be up to Apple to implement OpenCL for HD 4000. You may want to contact Apple
about this. It could come in an OS 10.8 update in the future.
 
Register on MacRumors! This sidebar will go away, and you'll see fewer ads.