#include <stdio.h>

#include <cuda.h>
#include <vector_types.h>

// et aussi nb de vecteurs
const int nb_threads = 9; 

void init()
{
	// Initialiser CUDA
	CUresult result = cuInit(0);
	switch(result)
	{
	case CUDA_SUCCESS:
		puts("Librairie CUDA initialisee correctement");
		break;
	case CUDA_ERROR_INVALID_VALUE:
	case CUDA_ERROR_NO_DEVICE:
	default:
		puts("Une erreur s'est produite  l'initialisation de CUDA");
		exit(1);
	}
}



__global__ void setVec(int3 *v)
{
	int i = threadIdx.x;
	v[i].x = threadIdx.x;
	v[i].y = threadIdx.y;
	v[i].z = threadIdx.z;
}

void run(int3 *v_array)
{
	int3 *gpu_v_array; 
	int size;
	
	size = sizeof(int3) * nb_threads;
	cudaMalloc((void**)&gpu_v_array, size);
	cudaMemset((void *)gpu_v_array, -1, size);
	
	// sucre syntaxique
	setVec<<<1, nb_threads>>>(gpu_v_array);
	
	
	/* code C++
	dim3 le_block(9, 1, 1);
	dim3 la_grille(1, 1, 1);
	setVec<<<la_grille, le_block>>>(gpu_v_array);
	*/
	
	/* code C
	#undef __cplusplus
	dim3 le_block;
	dim3 la_grille;
	le_block.x = 9;
	le_block.y = 1;
	le_block.z = 1;
	la_grille.x = la_grille.y = la_grille.z = 1;
	setVec<<<la_grille, le_block>>>(gpu_v_array);
	#define __cplusplus
	*/
	cudaMemcpy(v_array, gpu_v_array, size, cudaMemcpyDeviceToHost);
	
	cudaFree(gpu_v_array);
}


void print_vector(int3 *v)
{
	printf("x: %d, y: %d, z: %d\n", v->x, v->y, v->z);	
}

int main(int argc, char** argv)
{
	int3 v_array[nb_threads];
	
	puts("Cude DemoIdThreads -- Programmez!");
	
	init();
	run((int3*)&v_array);
	for(int i=0; i<nb_threads; i++)
		print_vector(&v_array[i]);	
	return 0;
}


