/* myopencl.cc */

#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#define CL_TARGET_OPENCL_VERSION 120
#define __CL_ENABLE_EXCEPTIONS
#include <CL/cl.hpp>
#include <vector_types.h>
#include "myopencl.h"
#include "kernel.h"
#include "edelgas.h"

#define VERBOSE  //beim Compilieren etwas anzeigen
#define PTX      //Assembler-aehnliches Listing erstellen
//#define DEBUG

// Vordeklarationen von Hilfsroutinen:
void opencl_init();
int filelaenge(const char *name);
void datei_einlesen(const char *name,char *ziel);
void print_TPB();
int rup(int a,int b) {return ((a+b-1)/b*b);} //aufrunden, damit durch b teilbar

// Statische OpenCL-Objekte:
cl::Device default_device;
cl::Context context;
cl::CommandQueue queue;
cl::Kernel kernel_hello,kernel_fsum,kernel_vpos,kernel_sort;
cl::Buffer d_px,d_py,d_pz,d_vx,d_vy,d_vz; //Zeiger auf Felder auf dem Device
cl::Buffer d_mass,d_radi; //Zeiger auf Felder auf dem Device
int size1=0,size2=0,size3=0; //Groesse der Felder in Bytes
cl::Buffer d_list; //Zeiger auf int-Feld fuer Sortierung
cl::Buffer d_paartab; //(short2 *) Tabelle zum Optimieren
cl::Buffer d_kraft; //Kraefte-Tabelle fuer optimierung3, (double3 *)
int size4=0; //Groesse der Kraefte-Tabelle

extern
bool copy_to_device(const double *px,const double *py,const double *pz,
		    const double *vx,const double *vy,const double *vz,
		    const float *mass,const float *radi,const int natoms)
{
 opencl_init();
 if(natoms!=NATOMS) printf("Fehler: natoms!=NATOMS\n");//test
 //Damit auch mit float8 gerechnet weden kann:
 // wenn nicht durch 8 teilbar, dann entsprechend aufrunden.
 size1 = rup(natoms,8)*sizeof(double);
 size2 = rup(natoms,8)*sizeof(float);
 size3 = rup(natoms,8)*sizeof(int);
 //size4 = NATOMS*NATOMS*sizeof(double3); //ca. 6.3MB bei 512 Atomen
 size4 = NATOMS*NATOMS*4*sizeof(double); //test
 printf("size4=%d\n",size4);//test
 try {
 d_px = cl::Buffer(context,CL_MEM_READ_WRITE,size1);
 } catch(cl::Error &error) {
  printf("Fehler1: konnte keinen Speicher reservieren\n");
 }
 try {
 d_py = cl::Buffer(context,CL_MEM_READ_WRITE,size1);
 d_pz = cl::Buffer(context,CL_MEM_READ_WRITE,size1);
 d_vx = cl::Buffer(context,CL_MEM_READ_WRITE,size1);
 d_vy = cl::Buffer(context,CL_MEM_READ_WRITE,size1);
 d_vz = cl::Buffer(context,CL_MEM_READ_WRITE,size1);
 d_mass = cl::Buffer(context,CL_MEM_READ_WRITE,size2);
 d_radi = cl::Buffer(context,CL_MEM_READ_WRITE,size2);
 d_list = cl::Buffer(context,CL_MEM_READ_WRITE,size3);
 d_kraft = cl::Buffer(context,CL_MEM_READ_WRITE,size4);
 } catch(cl::Error &error) {
  printf("Fehler1: konnte nicht genug Speicher reservieren\n");
  std::cout << error.what() << "(" << error.err() << ")" << std::endl;
  return false;
 }
 queue=cl::CommandQueue(context,default_device);
 try {
  queue.enqueueWriteBuffer(d_px,CL_TRUE,0,size1,px);
  queue.enqueueWriteBuffer(d_py,CL_TRUE,0,size1,py);
  queue.enqueueWriteBuffer(d_pz,CL_TRUE,0,size1,pz);
  queue.enqueueWriteBuffer(d_vx,CL_TRUE,0,size1,vx);
  queue.enqueueWriteBuffer(d_vy,CL_TRUE,0,size1,vy);
  queue.enqueueWriteBuffer(d_vz,CL_TRUE,0,size1,vz);
  queue.enqueueWriteBuffer(d_mass,CL_TRUE,0,size2,mass);
  queue.enqueueWriteBuffer(d_radi,CL_TRUE,0,size2,radi);
  return true;
 } catch(cl::Error &error) {
  printf("Fehler2: copy_to_device():\n");
  std::cout << error.what() << "(" << error.err() << ")" << std::endl;
  return false;
 }
}

extern
bool copy_to_device_opt(const double *px,const double *py,const double *pz,
		    const double *vx,const double *vy,const double *vz,
		    const float *mass,const float *radi,const short2 *paartab)
{
 copy_to_device(px,py,pz,vx,vy,vz,mass,radi,NATOMS);
 int size=(NATOMS-1)*NATOMS/2*sizeof(short2);
 try {
 d_paartab = cl::Buffer(context,CL_MEM_READ_WRITE,size);
 queue.enqueueWriteBuffer(d_paartab,CL_TRUE,0,size,paartab);
  return true;
 } catch(cl::Error &error) {
  printf("Fehler7: copy_to_device_opt():\n");
  std::cout << error.what() << "(" << error.err() << ")" << std::endl;
  return false;
 }
}

extern bool copy_from_device(double *px,double *py,double *pz, int n)
{
 try {
  int size=n*sizeof(double);
  queue.enqueueReadBuffer(d_px,CL_TRUE,0,size,px);
  queue.enqueueReadBuffer(d_py,CL_TRUE,0,size,py);
  queue.enqueueReadBuffer(d_pz,CL_TRUE,0,size,pz);
#ifdef DEBUG
  print_TPB();//test
#endif
  return true;
 } catch(cl::Error &error) {
  printf("Fehler3: copy_from_device():\n");
  std::cout << error.what() << "(" << error.err() << ")" << std::endl;
  return false;
 }
}

#ifdef DEBUG
extern bool debug_copy_kraft(double *kraft)
{
 try {
  queue.enqueueReadBuffer(d_kraft,CL_TRUE,0,size4,kraft);
  return true;
 } catch(cl::Error &error) {
  printf("Fehler3: debug_copy_kraft():\n");
  std::cout << error.what() << "(" << error.err() << ")" << std::endl;
  return false;
 }
}
#endif

extern bool kernel_compilieren(const char *kernel_name,int maxreg)
{
 if(size1==0)
  {printf("Fehler5: kein Speicher auf dem Device reserviert.\n"); return false;}
 char *quellcode;
 int len=filelaenge(kernel_name)+1;
 if(len<=0)
  {printf("Fehler6: \"%s\" nicht gefunden\n",kernel_name); return false;}
 quellcode=new char[len+2000];
 datei_einlesen(kernel_name,quellcode);
#ifdef DEBUG
 printf("-------------\neingelesene Datei:\n%s\n-------------\n",
	quellcode); //test: eingelesenen Kernel anzeigen
#endif
 std::string code=quellcode;
 delete[] quellcode;
 cl::Program::Sources sources;
 sources.push_back({code.c_str(),code.length()});
 cl::Program program(context,sources);
 try {
#ifdef DEBUG
 printf("program.build()\n");//test
#endif
 int err;
#ifdef VERBOSE
 char optionen[80]="-cl-nv-verbose";
#else
 char optionen[80]="";
#endif
 if(maxreg!=0)
  {
#ifdef VERBOSE
   const char *optstr="-cl-nv-verbose -cl-nv-maxrregcount=%d";
#else
   const char *optstr="-cl-nv-maxrregcount=%d";
#endif
   sprintf(optionen,optstr,maxreg);
  }
 err=program.build({default_device},optionen);
#ifdef VERBOSE
 std::string name     = default_device.getInfo<CL_DEVICE_NAME>();
 std::string buildlog = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(default_device);
 std::cerr << "\nBuild log for " << name << ":" << std::endl
	   << buildlog << std::endl;
#endif
 if(err!=CL_SUCCESS) {printf("Fehler8\n"); return false;}
 
#ifdef PTX
 size_t bin_sz;
 program.getInfo(CL_PROGRAM_BINARY_SIZES,&bin_sz);
 char *bin=new char[bin_sz];
 program.getInfo(CL_PROGRAM_BINARIES,&bin);
 FILE *fp=fopen("tmp.ptx","w");
 fprintf(fp,"%s\n",bin);
 fclose(fp);
 delete[] bin;
#endif

#ifdef DEBUG
 printf("program.build() erfolgreich\n");//test
#endif
 } catch(cl::Error &error) {
  printf("Fehler9: ");
  std::cout << error.what() << "(" << error.err() << ") " << getErrorString(error.err()) << std::endl;
  
  std::string name     = default_device.getInfo<CL_DEVICE_NAME>();
  std::string buildlog = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(default_device);
  std::cerr << "\nBuild log for " << name << ":" << std::endl
	    << buildlog << std::endl;
  
  return false;
 }
 kernel_hello=cl::Kernel(program,"hello");
#if(OPTIMIERUNG==3)
 kernel_fsum=cl::Kernel(program,"fsum");
#endif
 kernel_vpos=cl::Kernel(program,"vpos");
 kernel_sort=cl::Kernel(program,"sort");
#ifdef DEBUG
 printf("kernel_compilieren() erfolgreich\n");//test
#endif
 return true;
}

static int TPB=1024; //ThreadsPerBlock, Voreinstellung: 1024 (ist maximaler Wert)
static int tpbx=32, tpby=32; //fuer 2D-Kernels
extern void get_tpb(int *t,int *x,int *y)
{
 *t=TPB; *x=tpbx; *y=tpby;
}

void print_TPB()
{
 printf("TPB=%d tpbx=%d tpby=%d\n",TPB,tpbx,tpby);
}

extern void set_TPB(int n)
{
 TPB=n;
}

extern void set_TPBxy(int x,int y)
{
 if(x!=0) tpbx=x;
 if(y!=0) tpby=y;
}

extern bool kernel_aufruf_sort(int *list)
{
 try {
 kernel_sort.setArg(0,d_pz);
 kernel_sort.setArg(1,d_list);
 const int N=NATOMS/2;
 if(N>1024) printf("Fehler: Anzahl Atome auf 2048 beschraenkt\n");//test
 queue.enqueueNDRangeKernel(kernel_sort,cl::NullRange,
			    cl::NDRange(N), cl::NDRange(N));
 queue.finish();
 queue.enqueueReadBuffer(d_list,CL_TRUE,0,NATOMS*sizeof(int),list);
 } catch(cl::Error &error) {
  printf("Fehler13: Kernelaufruf Sortierung misslungen:\n  ");
  std::cout << error.what() << "(" << error.err() << ") " << getErrorString(error.err()) << std::endl;
  //TODO: genauere Fehlermeldung?
  return false;
 }
#ifdef DEBUG
 printf("bool kernel_aufruf_sort() erfolgreich\n");//test
#endif
 return true;
}

extern bool kernel_aufrufen(double dt,int *zwi)
{
 //opt nicht mehr gebraucht, da jetzt als OPTIMIERUNG in kernel.h
 if(size1==0)
  {printf("Fehler4: zuerst copy_to_device() machen\n"); return false;}
#ifdef DEBUG
 printf("bool kernel_aufrufen()\n");//test
#endif

 // Abstaende und Kraefte zwischen den Atomen berechnen:
 int tpb = (NATOMS<TPB) ? NATOMS : TPB; //Threads Per Block
 int ntr = NATOMS; //Anzahl Threads total (muss restlos durch TPB teilbar sein)
 if(ntr%tpb!=0) //test
  printf("Fehler: Anzahl Threads nicht durch TPB teilbar (nrt=%d tpb=%d)\n",ntr,tpb);
 //if(tpb<16) {tpb=16; if(ntr<tpb) ntr=tpb;} //minimale Workgroup-size
#if(OPTIMIERUNG==3)
 //int tpbx=32; //weiter oben definiert. variieren zum Optimieren?
 //int tpby=32; //weiter oben definiert. variieren zum Optimieren?
 int nx=NATOMS,ny=NATOMS;
 if(nx<tpbx) nx=tpbx;
 if(ny<tpby) ny=tpby;
#endif
 try {
 int k=0;
 kernel_hello.setArg(k++,d_px);
 kernel_hello.setArg(k++,d_py);
 kernel_hello.setArg(k++,d_pz);
#if(OPTIMIERUNG!=3)
 kernel_hello.setArg(k++,d_vx);
 kernel_hello.setArg(k++,d_vy);
 kernel_hello.setArg(k++,d_vz);
#endif
 kernel_hello.setArg(k++,d_mass);
 kernel_hello.setArg(k++,d_radi);

#if(OPTIMIERUNG==1)
   kernel_hello.setArg(k++,d_paartab);
#elif(OPTIMIERUNG==3)
   kernel_hello.setArg(k++,d_kraft);
#endif

 kernel_hello.setArg(k++,dt);

#if(OPTIMIERUNG==3)
 queue.enqueueNDRangeKernel(kernel_hello,cl::NullRange,
			    cl::NDRange(nx,ny), cl::NDRange(tpbx,tpby));
#else

#if(OPTIMIERUNG==1)
 ntr = NATOMS/2;
 tpb = (ntr<TPB) ? ntr : TPB; //Threads Per Block
#endif
 queue.enqueueNDRangeKernel(kernel_hello,cl::NullRange,
			    cl::NDRange(ntr), cl::NDRange(tpb));
#endif
 queue.finish();
 } catch(cl::Error &error) {
  printf("Fehler10: Kernelaufruf hallo() misslungen:\n  ");
  std::cout << error.what() << "(" << error.err() << ") " << getErrorString(error.err()) << std::endl;
  //TODO: genauere Fehlermeldung?
  return false;
 }
 
#if(OPTIMIERUNG==3)
 if(zwi!=NULL) zwi[0]=stoppuhr_read();//erste Zwischenzeit
 // Kraefte aufsummieren und neue Geschwindigkeiten berechnen:
 try {
 int k=0;
 kernel_fsum.setArg(k++,d_vx);
 kernel_fsum.setArg(k++,d_vy);
 kernel_fsum.setArg(k++,d_vz);
 kernel_fsum.setArg(k++,d_mass);
 kernel_fsum.setArg(k++,d_kraft);
 kernel_fsum.setArg(k++,dt);
 int nx = (NATOMS<tpb) ? tpb : NATOMS;
 queue.enqueueNDRangeKernel(kernel_fsum,cl::NullRange,
			    cl::NDRange(nx), cl::NDRange(tpb));
 queue.finish();
 if(zwi!=NULL) zwi[1]=stoppuhr_read();//2. Zwischenzeit
 } catch(cl::Error &error) {
  printf("Fehler11: Kernelaufruf fsum() misslungen:\n  ");
  std::cout << error.what() << "(" << error.err() << ") " << getErrorString(error.err()) << std::endl;
  //TODO: genauere Fehlermeldung?
  return false;
 }
#endif
 
 // Aus Geschwindigkeiten neue Positionen berechnen:
 try {
 int k=0;
 kernel_vpos.setArg(k++,d_px);
 kernel_vpos.setArg(k++,d_py);
 kernel_vpos.setArg(k++,d_pz);
 kernel_vpos.setArg(k++,d_vx);
 kernel_vpos.setArg(k++,d_vy);
 kernel_vpos.setArg(k++,d_vz);
 kernel_vpos.setArg(k++,d_radi);
 kernel_vpos.setArg(k++,dt);
 int nx = (NATOMS<tpb) ? tpb : NATOMS;
 queue.enqueueNDRangeKernel(kernel_vpos,cl::NullRange,
			    cl::NDRange(nx), cl::NDRange(tpb));
 queue.finish();
 } catch(cl::Error &error) {
  printf("Fehler12: Kernelaufruf vpos() misslungen:\n  ");
  std::cout << error.what() << "(" << error.err() << ") " << getErrorString(error.err()) << std::endl;
  //TODO: genauere Fehlermeldung?
  return false;
 }
#ifdef DEBUG
 printf("bool kernel_aufrufen() erfolgreich\n");//test
#endif
 return true;
}

extern void device_speicher_freigeben()
{
#ifdef DEBUG
 printf("device_speicher_freigeben()\n");//test
#endif
 //clReleaseMemObject(d_ma); //geht nicht, wie machen? oder unnoetig?
 queue.finish();
}

void opencl_init()
{
 //get all platforms (drivers)
 std::vector<cl::Platform> all_platforms;
 cl::Platform::get(&all_platforms);
 if(all_platforms.size()==0)
  {std::cout<<" No platforms found. Check OpenCL installation!\n"; exit(1);}
 cl::Platform default_platform=all_platforms[0];
 //get default device of the default platform
 std::vector<cl::Device> all_devices;
 default_platform.getDevices(CL_DEVICE_TYPE_ALL, &all_devices);
 std::cout << "Using platform: "<<default_platform.getInfo<CL_PLATFORM_NAME>()<<"\n";
 if(all_platforms.size() > 1)
  {
   printf("Weitere platforms:\n");
   for(unsigned int i=1;i<all_platforms.size();i++)
    std::cout << ' ' << i << ": "<<all_platforms[i].getInfo<CL_PLATFORM_NAME>()<<"\n";
  }
 if(all_devices.size()==0)
  {std::cout<<" No devices found. Check OpenCL installation!\n"; exit(1);}
 default_device=all_devices[0];
 std::cout<< "Using device: "<<default_device.getInfo<CL_DEVICE_NAME>()<<"\n";
 if(all_devices.size() > 1)
  {
   printf("Weitere devices:\n");
   for(unsigned int i=1;i<all_devices.size();i++)
    std::cout << ' ' << i << ": "<<all_devices[i].getInfo<CL_DEVICE_NAME>()<<"\n";
  }
 context=cl::Context({default_device});
}

/*************************** kleinkram ***************************/
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
const int BLOCKS=0, BYTES=1, BSIZE=2;

int filelaenge(const char *name,int flag=BSIZE)
{
 struct stat buf;
 int n;
 if(lstat(name,&buf)<0) return -1;
 if(!S_ISREG(buf.st_mode)) return -2;
//fuer Inhalt von buf siehe "man lstat"
 if(flag==BLOCKS) n=buf.st_blocks;
 else if(flag==BSIZE) n=buf.st_blksize;
 else n=buf.st_size;
 return n;
}

int filelaenge(const char *name) {return filelaenge(name,BSIZE);}

int index(const char *s1,const char *s2)
{	             /* Sucht den String s2 innerhalb von s1           */
 int i,c;	     /* und gibt Position zurueck (nicht gefunden: -1) */
 const char *p1,*p2;
 if(*s2==0) return 0;   /* leerer String ist immer enthalten */
 for(i=0;;i++)
	{if((c= *s1++)==0) return -1; /* nicht gefunden */
	 if(c== *s2)
		{for(p1=s1,p2=s2; c= *++p2;)
			if(*p1++!=c) break; /* noch nicht gefunden */
		 if(c==0) break; /* gefunden */
		}
	}
 return i;
}

bool getline(FILE *fp,char *s,int lim)
{		/* liest eine Textzeile oder maximal lim Zeichen */
		/* und ersetzt den Zeilentrenner durch 0         */
 int c=0;
 while(--lim && (c=getc(fp))!=EOF && c!='\n')
	*s++ = c;
 *s='\0';
 return (c!=EOF);	/* TRUE wenn erfolgreich, FALSE wenn Fileende */
}

void datei_einlesen(const char *name,char *ziel)
{
 char zeile[200];
 const char *s;
 int c;
 FILE *fp=fopen(name,"r");
 if(fp==NULL) {printf("konnte \"%s\" nicht oeffnen\n",name); *ziel=0; return;}
 while(getline(fp,zeile,200))
  {
   for(s=zeile;*s==' ' || *s=='\t';s++) {}//Leerzeichen ueberlesen
   if(s[0]==0) continue; //Leerzeilen ueberlesen
   if(strncmp(s,"//",2)==0) continue; //Kommentare ueberlesen
   if(index(s,"//nur zum mit gcc testen") > 0) continue;//testzeilen ueberlesen
   if(strncmp(s,"#include \"",10)==0)
    { //#include "name.h" --> name.h einfuegen
     char *t;
     for(t= &zeile[10];*t!='"' && *t!=0;t++) {} //Endzeichen " suchen
     *t=0; //Endzeichen durch 0 ersetzen
     datei_einlesen(&zeile[10],ziel);
     while(*ziel!=0) ziel++;
     continue;
    }
   for(int i=0;(c=zeile[i])!=0 && (c!='/' || zeile[i+1]!='/');i++)
     *ziel++ = c; //Zeile kopieren, aber Kommentare weglassen
   *ziel++ = '\n';
  }
 *ziel=0;
 fclose(fp);
}

const char *getErrorString(int error)
{
switch(error){
    // run-time and JIT compiler errors
    case 0: return "CL_SUCCESS";
    case -1: return "CL_DEVICE_NOT_FOUND";
    case -2: return "CL_DEVICE_NOT_AVAILABLE";
    case -3: return "CL_COMPILER_NOT_AVAILABLE";
    case -4: return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
    case -5: return "CL_OUT_OF_RESOURCES";
    case -6: return "CL_OUT_OF_HOST_MEMORY";
    case -7: return "CL_PROFILING_INFO_NOT_AVAILABLE";
    case -8: return "CL_MEM_COPY_OVERLAP";
    case -9: return "CL_IMAGE_FORMAT_MISMATCH";
    case -10: return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
    case -11: return "CL_BUILD_PROGRAM_FAILURE";
    case -12: return "CL_MAP_FAILURE";
    case -13: return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
    case -14: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
    case -15: return "CL_COMPILE_PROGRAM_FAILURE";
    case -16: return "CL_LINKER_NOT_AVAILABLE";
    case -17: return "CL_LINK_PROGRAM_FAILURE";
    case -18: return "CL_DEVICE_PARTITION_FAILED";
    case -19: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";

    // compile-time errors
    case -30: return "CL_INVALID_VALUE";
    case -31: return "CL_INVALID_DEVICE_TYPE";
    case -32: return "CL_INVALID_PLATFORM";
    case -33: return "CL_INVALID_DEVICE";
    case -34: return "CL_INVALID_CONTEXT";
    case -35: return "CL_INVALID_QUEUE_PROPERTIES";
    case -36: return "CL_INVALID_COMMAND_QUEUE";
    case -37: return "CL_INVALID_HOST_PTR";
    case -38: return "CL_INVALID_MEM_OBJECT";
    case -39: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
    case -40: return "CL_INVALID_IMAGE_SIZE";
    case -41: return "CL_INVALID_SAMPLER";
    case -42: return "CL_INVALID_BINARY";
    case -43: return "CL_INVALID_BUILD_OPTIONS";
    case -44: return "CL_INVALID_PROGRAM";
    case -45: return "CL_INVALID_PROGRAM_EXECUTABLE";
    case -46: return "CL_INVALID_KERNEL_NAME";
    case -47: return "CL_INVALID_KERNEL_DEFINITION";
    case -48: return "CL_INVALID_KERNEL";
    case -49: return "CL_INVALID_ARG_INDEX";
    case -50: return "CL_INVALID_ARG_VALUE";
    case -51: return "CL_INVALID_ARG_SIZE";
    case -52: return "CL_INVALID_KERNEL_ARGS";
    case -53: return "CL_INVALID_WORK_DIMENSION";
    case -54: return "CL_INVALID_WORK_GROUP_SIZE";
    case -55: return "CL_INVALID_WORK_ITEM_SIZE";
    case -56: return "CL_INVALID_GLOBAL_OFFSET";
    case -57: return "CL_INVALID_EVENT_WAIT_LIST";
    case -58: return "CL_INVALID_EVENT";
    case -59: return "CL_INVALID_OPERATION";
    case -60: return "CL_INVALID_GL_OBJECT";
    case -61: return "CL_INVALID_BUFFER_SIZE";
    case -62: return "CL_INVALID_MIP_LEVEL";
    case -63: return "CL_INVALID_GLOBAL_WORK_SIZE";
    case -64: return "CL_INVALID_PROPERTY";
    case -65: return "CL_INVALID_IMAGE_DESCRIPTOR";
    case -66: return "CL_INVALID_COMPILER_OPTIONS";
    case -67: return "CL_INVALID_LINKER_OPTIONS";
    case -68: return "CL_INVALID_DEVICE_PARTITION_COUNT";

    // extension errors
    case -1000: return "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR";
    case -1001: return "CL_PLATFORM_NOT_FOUND_KHR";
    case -1002: return "CL_INVALID_D3D10_DEVICE_KHR";
    case -1003: return "CL_INVALID_D3D10_RESOURCE_KHR";
    case -1004: return "CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR";
    case -1005: return "CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR";
    default: return "Unknown OpenCL error";
    }
}
/************************ ende kleinkram *************************/
