
#include <stdio.h>

#include <cuda.h>
#include <vector_types.h>

// et aussi nb de vecteurs
const int nb_threads = 27; 

void init()
{
	// Initialiser CUDA
	CUresult result = cuInit(0);
	switch(result)
	{
	case CUDA_SUCCESS:
		puts("Librairie CUDA initialisee correctement");
		break;
	case CUDA_ERROR_INVALID_VALUE:
	case CUDA_ERROR_NO_DEVICE:
	default:
		puts("Une erreur s'est produite  l'initialisation de CUDA");
		exit(1);
	}
}


__global__ void setVec(int3 *v)
{
	int i = threadIdx.x;
	int j = threadIdx.y;
	int k = threadIdx.z;
	int index = i + j*3 + k*3*3;
	 
	/* ci-dessous, instinctif mais faux :)
	v[i].x = i;
	v[i].y = j;
	v[i].z = k;
	*/
	
	v[index].x = i;
	v[index].y = j;
	v[index].z = k;
	

	
	#ifdef __DEVICE_EMULATION__
		printf("Thread: %d, %d, %d -- %d\n", i, j, k, i + j*3 + k*3*3);
	#endif
}

void run(int3 *v_array)
{
	int3 *gpu_v_array; 
	int size;
	dim3 dimBlock(3, 3, 3);
	
	size = sizeof(int3) * nb_threads;
	cudaMalloc((void**)&gpu_v_array, size);
	cudaMemset((void *)gpu_v_array, -1, size);

	setVec<<<1, dimBlock>>>(gpu_v_array);
	
	cudaMemcpy(v_array, gpu_v_array, size, cudaMemcpyDeviceToHost);
	
	cudaFree(gpu_v_array);
}


void print_vector(int3 *v)
{
	printf("x: %d, y: %d, z: %d\n", v->x, v->y, v->z);	
}

int main(int argc, char** argv)
{
	int3 v_array[nb_threads];
	
	puts("Cuda DemoBlock -- Programmez!");
	
	init();
	run((int3*)&v_array);
	
	for(int i=0; i<nb_threads; i++)
		print_vector(&v_array[i]);	
		
	return 0;
}
