c++ - Segmentation fault on Cuda -
i'm writing cuda program processing images. got segmentation fault problem , i’ve no no idea why. i'm miss minor thing, after hours of trying correct myself couldn’t make running.
i'm setting correct grid,block , shred memory values( @ least think so), according devicequery on hardware(geforce310m total shmem:16384,max threads per block:512 , max block dim 521). here output before segmentation fault:
cols , rows: 256 384 total:98304 // rows*cols block size:512 grid size:192 shared mem:2048
below kernel code
__global__ void reduce_min(float *minvar,float* d_logluminance) { extern __shared__ float s_data[]; //shared memeory unsigned int tid = threadidx.x; unsigned int global_id = blockidx.x*blockdim.x + tid; //copy shared mem s_data[tid] = d_logluminance[global_id]; __syncthreads(); for(unsigned int = 1;i<blockdim.x;i*=2) { if(tid%(2*i) == 0) { s_data[tid] = min(s_data[tid],s_data[tid+i]); } __syncthreads(); } if(tid == 0) d_logluminance[blockidx.x] = s_data[0]; }
and part allocate memory , invoke kernel
size_t size = 512; size_t sizeofgrid = (numrows*numcols)/size; const dim3 blocksize(size,1,1); const dim3 gridsize(sizeofgrid,1,1); unsigned int sharedmem =(sizeof(float)*size); //shared memory per block printf("%s\n%d\n%d\n%s%d\n","cols , rows:",numcols,numrows,"total:",numrows*numcols); printf("%s%d\n%s%d\n%s%d\n","block size:",size,"grid size:",sizeofgrid,"shared mem:",sharedmem); float* d_lum; float* outdata; checkcudaerrors(cudamalloc(&d_lum,sizeof(float)*numrows*numcols)); checkcudaerrors(cudamemcpy(d_lum,d_logluminance,sizeof(float)*numrows*numcols,cudamemcpydevicetodevice)); checkcudaerrors(cudamalloc(&outdata,sizeof(float)*numrows*numcols)); reduce_min<<<gridsize,blocksize,sharedmem>>>(outdata,d_lum); cudadevicesynchronize(); checkcudaerrors(cudagetlasterror()); checkcudaerrors(cudamemcpy(&min_loglum,outdata,sizeof(float)*numrows*numcols,cudamemcpydevicetohost));
thanks in advance help.
checkcudaerrors(cudamemcpy(&min_loglum,outdata,sizeof(float)*numrows*numcols,cudamemcpydevicetohost));
function generating segmentation fault, beacause fogrot mention(and missed when writing code) min_loglum float, not float* of size numrows*numcols. in end trying copy more memory had allocated.
Comments
Post a Comment