这里做的就是使用OpenCL对图象旋转90度,也算是1个比较入门级别的程序。希望对大家有所帮助吧,看着看着这些代码就熟习了。
图象旋转是指把定义的图象绕某1点以逆时针或顺时针方向旋转1定的角度,通常是指绕图象的中心以逆时针方向旋转。假定图象的左上角为(left, top),右下角为(right, bottom),则图象上任意点(x0, y0) 绕其中心(xcenter, ycenter) 逆时针旋转angle 角度后,新的坐标位置(x′, y′) 的计算公式为:
需要对图象进行处理,那末在这里介绍1个库给大家:FreeImage。
不熟习的请看:请点这里。
使用这个库的方法:(通用方法,极有效)
属性->C/C++->常规->附加包括目录:添加.h的路径。
链接器->常规->附加库目录: 添加lib路径。
链接器->输入->附加依赖项: 添加需要的lib名称。
将dll文件放入exe路径下。
#pragma OPENCL EXTENSION cl_amd_printf : enable
__kernel void image_rotate(
__global uchar * src_data,
__global uchar * dest_data,
//Data in global memory
int W,
int H,
//Image Dimensions
float sinTheta,
float cosTheta )
//Rotation Parameters
{
//Thread gets its index within index space
const int ix = get_global_id(0);
const int iy = get_global_id(1);
int xc = W/2;
int yc = H/2;
int xpos = ( ix-xc)*cosTheta - (iy-yc)*sinTheta+xc;
int ypos = ( ix-xc)*sinTheta + (iy-yc)*cosTheta+yc;
if ((xpos>=0) && (xpos< W) && (ypos>=0) && (ypos< H)) //Bound Checking
{
dest_data[ypos*W+xpos]= src_data[iy*W+ix];
}
}
我们把这个东西和CPU串行处理比较1下可以得到以下:
//CPU旋转图象:使用CPU来旋转图片
void cpu_rotate(unsigned char* inbuf, unsigned char* outbuf, int w, int h,float sinTheta, float cosTheta)
{
int i, j;
int xc = w/2;
int yc = h/2;
for(i = 0; i < h; i++)
{
for(j=0; j< w; j++)
{
int xpos = ( j-xc)*cosTheta - (i-yc)*sinTheta+xc;
int ypos = (j-xc)*sinTheta + ( i-yc)*cosTheta+yc;
if(xpos>=0&&ypos>=0&&xpos<w&&ypos<h)
outbuf[ypos*w + xpos] = inbuf[i*w+j];
}
}
}
对照以后我们发现OpenCL写kernel的时候循环没有了,取而代之的就是给出global_id便可。
这里还触及到1些图片的操作,具体请看FreeImage的使用。
#include "stdafx.h"
#include <CL/cl.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <iostream>
#include <fstream>
#include "gFreeImage.h"
using namespace std;
#define NWITEMS 4
#pragma comment (lib,"OpenCL.lib")
#pragma comment (lib,"FreeImage.lib")
//把文本文件读入1个string中,其实就是把运行程序传给从机
int convertToString(const char *filename, std::string& s)
{
size_t size;
char* str;
std::fstream f(filename, (std::fstream::in | std::fstream::binary));
if(f.is_open())
{
size_t fileSize;
f.seekg(0, std::fstream::end);
size = fileSize = (size_t)f.tellg();
f.seekg(0, std::fstream::beg);
str = new char[size+1];
if(!str)
{
f.close();
return NULL;
}
f.read(str, fileSize);
f.close();
str[size] = ' ';
s = str;
delete[] str;
return 0;
}
printf("Error: Failed to open file %s
", filename);
return 1;
}
//CPU旋转图象:使用CPU来旋转图片
void cpu_rotate(unsigned char* inbuf, unsigned char* outbuf, int w, int h,float sinTheta, float cosTheta)
{
int i, j;
int xc = w/2;
int yc = h/2;
for(i = 0; i < h; i++)
{
for(j=0; j< w; j++)
{
int xpos = ( j-xc)*cosTheta - (i-yc)*sinTheta+xc;
int ypos = (j-xc)*sinTheta + ( i-yc)*cosTheta+yc;
if(xpos>=0&&ypos>=0&&xpos<w&&ypos<h)
outbuf[ypos*w + xpos] = inbuf[i*w+j];
}
}
}
int main(int argc, char* argv[])
{
//装入图象
unsigned char *src_image=0;
unsigned char *cpu_image=0;
int W, H;
gFreeImage img;
if(!img.LoadImageGrey("lenna.jpg"))
{
printf("装入lenna.jpg失败
");
exit(0);
}
else
src_image = img.getImageDataGrey(W, H);
size_t mem_size = W*H;
cpu_image = (unsigned char*)malloc(mem_size);
cl_uint status;
cl_platform_id platform;
//创建平台对象
status = clGetPlatformIDs( 1, &platform, NULL );
cl_device_id device;
//创建GPU装备
clGetDeviceIDs( platform, CL_DEVICE_TYPE_GPU,
1,
&device,
NULL);
//创建context
cl_context context = clCreateContext( NULL,
1,
&device,
NULL, NULL, NULL);
//创建命令队列
cl_command_queue queue = clCreateCommandQueue( context,
device,
CL_QUEUE_PROFILING_ENABLE, NULL );
//创建3个OpenCL内存对象,并把buf1的内容通过隐式拷贝的方式
//拷贝到clbuf1,buf2的内容通过显示拷贝的方式拷贝到clbuf2
cl_mem d_ip = clCreateBuffer(
context, CL_MEM_READ_ONLY,
mem_size,
NULL, NULL);
cl_mem d_op = clCreateBuffer(
context, CL_MEM_WRITE_ONLY,
mem_size,
NULL, NULL);
status = clEnqueueWriteBuffer (
queue , d_ip, CL_TRUE,
0, mem_size, (void *)src_image,
0, NULL, NULL);
const char * filename = "rotate.cl";
std::string sourceStr;
status = convertToString(filename, sourceStr);
const char * source = sourceStr.c_str();
size_t sourceSize[] = { strlen(source) };
//创建程序对象
cl_program program = clCreateProgramWithSource(
context,
1,
&source,
sourceSize,
NULL);
//编译程序对象
status = clBuildProgram( program, 1, &device, NULL, NULL, NULL );
if(status != 0)
{
printf("clBuild failed:%d
", status);
char tbuf[0x10000];
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0x10000, tbuf, NULL);
printf("
%s
", tbuf);
return -1;
}
//创建Kernel对象
//Use the “image_rotate” function as the kernel
//创建Kernel对象
cl_kernel kernel = clCreateKernel( program, "image_rotate", NULL );
//设置Kernel参数
float sintheta = 1, costheta = 0;
clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&d_ip);
clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&d_op);
clSetKernelArg(kernel, 2, sizeof(cl_int), (void *)&W);
clSetKernelArg(kernel, 3, sizeof(cl_int), (void *)&H);
clSetKernelArg(kernel, 4, sizeof(cl_float), (void *)&sintheta);
clSetKernelArg(kernel, 5, sizeof(cl_float), (void *)&costheta);
//Set local and global workgroup sizes
size_t localws[2] = {16,16} ;
size_t globalws[2] = {W, H};//Assume divisible by 16
cl_event ev;
//履行kernel
clEnqueueNDRangeKernel(
queue ,kernel,
2, 0, globalws, localws,
0, NULL, &ev);
clFinish( queue );
//计算kerenl履行时间
cl_ulong startTime, endTime;
clGetEventProfilingInfo(ev, CL_PROFILING_COMMAND_START,
sizeof(cl_ulong), &startTime, NULL);
clGetEventProfilingInfo(ev, CL_PROFILING_COMMAND_END,
sizeof(cl_ulong), &endTime, NULL);
cl_ulong kernelExecTimeNs = endTime-startTime;
printf("kernal exec time :%8.6f ms
", kernelExecTimeNs*1e⑹ );
//数据拷回host内存
// copy results from device back to host
unsigned char *op_data=0;
op_data = (cl_uchar *) clEnqueueMapBuffer( queue,
d_op,
CL_TRUE,
CL_MAP_READ,
0,
mem_size,
0, NULL, NULL, NULL );
int i;
cpu_rotate(src_image,cpu_image, W, H, 1, 0);
for(i = 0; i < mem_size; i++)
{
src_image[i] =cpu_image[i];
}
img.SaveImage("cpu_lenna_rotate.jpg");
for(i = 0; i < mem_size; i++)
{
src_image[i] =op_data[i];
}
img.SaveImage("lenna_rotate.jpg");
if(cpu_image)
free(cpu_image);
//删除OpenCL资源对象
clReleaseMemObject(d_ip);
clReleaseMemObject(d_op);
clReleaseProgram(program);
clReleaseCommandQueue(queue);
clReleaseContext(context);
return 0;
}
最初的图片:
用OpenCL处理以后:用了灰度图。
用CPU处理以后:用了灰度图。
FreeImage下载,请点击这里。
参考代码,请点击这里。