alignedTypes               testKernel                       --gridDim=64           --blockDim=256
asyncAPI                   increment_kernel                 --gridDim=[32768,1,1]  --blockDim=[512,1,1]
bilateralFilter            d_bilateral_filter               --gridDim=[40,30,1]    --blockDim=[16,16,1]
bindlessTexture            d_render                         --gridDim=[32,32,1]    --blockDim=[16,16,1]
binomialOptions            binomialOptionsKernel            --gridDim=512          --blockDim=256
BlackScholes               BlackScholesGPU                  --gridDim=480          --blockDim=128
clock                      timedReduction                   --gridDim=64           --blockDim=256
concurrentKernels          clock_block                      --gridDim=1            --blockDim=1
concurrentKernels          sum                              --gridDim=1            --blockDim=32
convolutionFFT2D           modulateAndNormalize_kernel      --gridDim=8320         --blockDim=256
convolutionFFT2D           padDataClampToBorder_kernel      --gridDim=[64,256,1]   --blockDim=[32,8,1]
convolutionFFT2D           padKernel_kernel                 --gridDim=[1,1,1]      --blockDim=[32,8,1]
convolutionFFT2D           spPostprocess2D_kernel           --gridDim=4096         --blockDim=256
convolutionFFT2D           spPreprocess2D_kernel            --gridDim=4096         --blockDim=256
convolutionFFT2D           modulateAndNormalize_kernel      --gridDim=8200         --blockDim=256
convolutionFFT2D           spPostprocess2D_kernel           --gridDim=4096         --blockDim=256
convolutionFFT2D           spProcess2D_kernel               --gridDim=4096         --blockDim=256
convolutionSeparable       convolutionColumnsKernel         --gridDim=[192,48,1]   --blockDim=[16,8,1]
convolutionSeparable       convolutionRowsKernel            --gridDim=[24,768,1]   --blockDim=[16,4,1]
convolutionTexture         convolutionColumnsKernel         --gridDim=[192,128,1]  --blockDim=[16,12,1]
convolutionTexture         convolutionRowsKernel            --gridDim=[192,128,1]  --blockDim=[16,12,1]
cppIntegration             kernel                           --gridDim=[1,1,1]      --blockDim=[4,1,1]
cppIntegration             kernel2                          --gridDim=[1,1,1]      --blockDim=[16,1,1]
cudaOpenMP                 kernelAddConstant                --gridDim=[64,1,1]     --blockDim=[128,1,1]
dct8x8                     CUDAkernel1DCT                   --gridDim=[64,64,1]    --blockDim=[8,8,1]
dct8x8                     CUDAkernel1IDCT                  --gridDim=[64,64,1]    --blockDim=[8,8,1]
dct8x8                     CUDAkernel2DCT                   --gridDim=[16,32,1]    --blockDim=[8,4,2]
dct8x8                     CUDAkernel2IDCT                  --gridDim=[16,32,1]    --blockDim=[8,4,2]
dct8x8                     CUDAkernelQuantizationFloat      --gridDim=[64,64,1]    --blockDim=[8,8,1]
dct8x8                     CUDAkernelQuantizationShort      --gridDim=[64,64,1]    --blockDim=[8,8,1]
dct8x8                     CUDAkernelShortIDCT              --gridDim=[16,16,1]    --blockDim=[8,4,4]
dct8x8                     CUDAkernel1DCT                   --gridDim=[64,64,1]    --blockDim=[8,8,1]
dct8x8                     CUDAkernel2DCT                   --gridDim=[16,32,1]    --blockDim=[8,4,2]
dct8x8                     CUDAkernelShortDCT               --gridDim=[16,16,1]    --blockDim=[8,4,4]
dwtHaar1D                  dwtHaar1D                        --gridDim=[1,1,1]      --blockDim=[2,1,1]
dwtHaar1D                  dwtHaar1D                        --gridDim=[4,1,1]      --blockDim=[512,1,1]
dwtHaar1D                  initValue                        --gridDim=[4,1,1]      --blockDim=[512,1,1]
dxtc                       compress                         --gridDim=6400         --blockDim=64
dxtc                       compress                         --gridDim=9984         --blockDim=64
eigenvalues                bisectKernelLarge                --gridDim=[1,1,1]      --blockDim=[256,1,1]
eigenvalues                bisectKernelLarge_MultIntervals  --gridDim=[11,1,1]     --blockDim=[256,1,1]
fastWalshTransform         fwtBatch1Kernel                  --gridDim=4096         --blockDim=2048
fastWalshTransform         fwtBatch2Kernel                  --gridDim=[8192,1,1]   --blockDim=256
fastWalshTransform         modulateKernel                   --gridDim=128          --blockDim=256
FDTD3d                     FiniteDifferencesKernel          --gridDim=[12,24,1]    --blockDim=[32,16,1]
grabcutNPP                 ApplyMatteKernel                 --gridDim=[19,15,1]    --blockDim=[32,8,1]
grabcutNPP                 downscaleKernel                  --gridDim=[10,8,1]     --blockDim=[32,8,1]
grabcutNPP                 downscaleKernel                  --gridDim=[5,4,1]      --blockDim=[32,8,1]
grabcutNPP                 EdgeCuesKernel                   --gridDim=[19,15,1]    --blockDim=[32,4,1]
grabcutNPP                 EdgeCuesKernel                   --gridDim=[5,4,1]      --blockDim=[32,4,1]
grabcutNPP                 GMMcommonTerm                    --gridDim=1            --blockDim=[32,2,1]
grabcutNPP                 GMMDataTermKernel                --gridDim=[19,57,1]    --blockDim=[32,8,1]
grabcutNPP                 GMMDataTermKernel                --gridDim=[5,15,1]     --blockDim=[32,8,1]
grabcutNPP                 GMMDoSplit                       --gridDim=[19,15,1]    --blockDim=[32,4,1]
grabcutNPP                 GMMDoSplit                       --gridDim=[5,4,1]      --blockDim=[32,4,1]
grabcutNPP                 GMMFinalizeKernel                --gridDim=2            --blockDim=32
grabcutNPP                 GMMFinalizeKernel                --gridDim=4            --blockDim=32
grabcutNPP                 GMMFinalizeKernel                --gridDim=6            --blockDim=32
grabcutNPP                 GMMFinalizeKernel                --gridDim=8            --blockDim=32
grabcutNPP                 GMMFindSplit                     --gridDim=1            --blockDim=[32,2,1]
grabcutNPP                 GMMReductionKernel               --gridDim=[19,15,1]    --blockDim=[32,4,1]
grabcutNPP                 GMMReductionKernel               --gridDim=[5,4,1]      --blockDim=[32,4,1]
grabcutNPP                 MeanEdgeStrengthFinalKernel      --gridDim=1            --blockDim=[32,4,1]
grabcutNPP                 MeanEdgeStrengthReductionKernel  --gridDim=[19,15,1]    --blockDim=[32,8,1]
grabcutNPP                 MeanEdgeStrengthReductionKernel  --gridDim=[5,4,1]      --blockDim=[32,8,1]
grabcutNPP                 SegmentationChangedKernel        --gridDim=[19,15,1]    --blockDim=[32,8,1]
grabcutNPP                 TrimapFromRectKernel             --gridDim=[5,15,1]     --blockDim=[32,8,1]
grabcutNPP                 upsampleAlphaKernel              --gridDim=[5,15,1]     --blockDim=[32,8,1]
histogram                  histogram256Kernel               --gridDim=240          --blockDim=192
histogram                  histogram64Kernel                --gridDim=4370         --blockDim=64
histogram                  mergeHistogram256Kernel          --gridDim=256          --blockDim=256
histogram                  mergeHistogram64Kernel           --gridDim=64           --blockDim=256
HSOpticalFlow              AddKernel                        --gridDim=[1200,1,1]   --blockDim=[256,1,1]
HSOpticalFlow              AddKernel                        --gridDim=[23,1,1]     --blockDim=[256,1,1]
HSOpticalFlow              AddKernel                        --gridDim=[300,1,1]    --blockDim=[256,1,1]
HSOpticalFlow              AddKernel                        --gridDim=[75,1,1]     --blockDim=[256,1,1]
HSOpticalFlow              AddKernel                        --gridDim=[8,1,1]      --blockDim=[256,1,1]
HSOpticalFlow              ComputeDerivativesKernel         --gridDim=[10,40,1]    --blockDim=[32,6,1]
HSOpticalFlow              ComputeDerivativesKernel         --gridDim=[20,80,1]    --blockDim=[32,6,1]
HSOpticalFlow              ComputeDerivativesKernel         --gridDim=[2,5,1]      --blockDim=[32,6,1]
HSOpticalFlow              ComputeDerivativesKernel         --gridDim=[3,10,1]     --blockDim=[32,6,1]
HSOpticalFlow              ComputeDerivativesKernel         --gridDim=[5,20,1]     --blockDim=[32,6,1]
HSOpticalFlow              DownscaleKernel                  --gridDim=[10,30,1]    --blockDim=[32,8,1]
HSOpticalFlow              DownscaleKernel                  --gridDim=[2,4,1]      --blockDim=[32,8,1]
HSOpticalFlow              DownscaleKernel                  --gridDim=[3,8,1]      --blockDim=[32,8,1]
HSOpticalFlow              DownscaleKernel                  --gridDim=[5,15,1]     --blockDim=[32,8,1]
HSOpticalFlow              JacobiIteration                  --gridDim=[10,40,1]    --blockDim=[32,6,1]
HSOpticalFlow              JacobiIteration                  --gridDim=[20,80,1]    --blockDim=[32,6,1]
HSOpticalFlow              JacobiIteration                  --gridDim=[2,5,1]      --blockDim=[32,6,1]
HSOpticalFlow              JacobiIteration                  --gridDim=[3,10,1]     --blockDim=[32,6,1]
HSOpticalFlow              JacobiIteration                  --gridDim=[5,20,1]     --blockDim=[32,6,1]
HSOpticalFlow              UpscaleKernel                    --gridDim=[10,30,1]    --blockDim=[32,8,1]
HSOpticalFlow              UpscaleKernel                    --gridDim=[20,60,1]    --blockDim=[32,8,1]
HSOpticalFlow              UpscaleKernel                    --gridDim=[3,8,1]      --blockDim=[32,8,1]
HSOpticalFlow              UpscaleKernel                    --gridDim=[5,15,1]     --blockDim=[32,8,1]
HSOpticalFlow              WarpingKernel                    --gridDim=[10,40,1]    --blockDim=[32,6,1]
HSOpticalFlow              WarpingKernel                    --gridDim=[20,80,1]    --blockDim=[32,6,1]
HSOpticalFlow              WarpingKernel                    --gridDim=[2,5,1]      --blockDim=[32,6,1]
HSOpticalFlow              WarpingKernel                    --gridDim=[3,10,1]     --blockDim=[32,6,1]
HSOpticalFlow              WarpingKernel                    --gridDim=[5,20,1]     --blockDim=[32,6,1]
inlinePTX                  sequence_gpu                     --gridDim=[4,1,1]      --blockDim=[256,1,1]
interval                   test_interval_newton             --gridDim=1024         --blockDim=64
lineOfSight                computeAngles_kernel             --gridDim=[40,1,1]     --blockDim=[256,1,1]
lineOfSight                computeVisibilities_kernel       --gridDim=[40,1,1]     --blockDim=[256,1,1]
Mandelbrot                 Mandelbrot0                      --gridDim=14           --blockDim=[32,32,1]
matrixMul                  matrixMulCUDA                    --gridDim=[20,10,1]    --blockDim=[32,32,1]
MC_EstimatePiInlineQ       computeValue                     --gridDim=[195,1,1]    --blockDim=[128,1,1]
MC_EstimatePiInlineQ       initRNG                          --gridDim=[195,1,1]    --blockDim=[128,1,1]
MC_EstimatePiP             computeValue                     --gridDim=[195,1,1]    --blockDim=[128,1,1]
MC_EstimatePiQ             computeValue                     --gridDim=[195,1,1]    --blockDim=[128,1,1]
MC_SingleAsianOptionP      computeValue                     --gridDim=[195,1,1]    --blockDim=[128,1,1]
MC_SingleAsianOptionP      generatePaths                    --gridDim=[195,1,1]    --blockDim=[128,1,1]
MC_SingleAsianOptionP      initRNG                          --gridDim=[195,1,1]    --blockDim=[128,1,1]
mergeSort                  generateSampleRanksKernel        --gridDim=64           --blockDim=256
mergeSort                  mergeElementaryIntervalsKernel   --gridDim=32768        --blockDim=128
mergeSort                  mergeRanksAndIndicesKernel       --gridDim=64           --blockDim=256
mergeSort                  mergeSortSharedKernel            --gridDim=4096         --blockDim=512
MonteCarloMultiGPU         MonteCarloOneBlockPerOption      --gridDim=256          --blockDim=256
MonteCarloMultiGPU         rngSetupStates                   --gridDim=256          --blockDim=256
newdelete                  stackCreate                      --gridDim=1            --blockDim=1
newdelete                  vectorCreate                     --gridDim=1            --blockDim=1
newdelete                  placementNew                     --gridDim=1            --blockDim=1024
newdelete                  complexVector                    --gridDim=1            --blockDim=1024
newdelete                  containerConsume                 --gridDim=128          --blockDim=128
newdelete                  containerDelete                  --gridDim=1            --blockDim=1
newdelete                  containerFill                    --gridDim=128          --blockDim=128
quasirandomGenerator       inverseCNDKernel                 --gridDim=128          --blockDim=128
quasirandomGenerator       quasirandomGeneratorKernel       --gridDim=128          --blockDim=[128,3,1]
reduction                  reduce6                          --gridDim=[1,1,1]      --blockDim=[32,1,1]
reduction                  reduce6                          --gridDim=[64,1,1]     --blockDim=[256,1,1]
scalarProd                 scalarProdGPU                    --gridDim=128          --blockDim=256
scan                       scanExclusiveShared2             --gridDim=26           --blockDim=256
segmentationTreeThrust     markSegments                     --gridDim=[4800,1,1]   --blockDim=[256,1,1]
segmentationTreeThrust     addScalar                        --gridDim=[11377,1,1]  --blockDim=[256,1,1]
segmentationTreeThrust     addScalar                        --gridDim=[1767,1,1]   --blockDim=[256,1,1]
segmentationTreeThrust     addScalar                        --gridDim=[180,1,1]    --blockDim=[256,1,1]
segmentationTreeThrust     addScalar                        --gridDim=[19183,1,1]  --blockDim=[256,1,1]
segmentationTreeThrust     addScalar                        --gridDim=[3492,1,1]   --blockDim=[256,1,1]
segmentationTreeThrust     addScalar                        --gridDim=[391,1,1]    --blockDim=[256,1,1]
segmentationTreeThrust     addScalar                        --gridDim=[46,1,1]     --blockDim=[256,1,1]
segmentationTreeThrust     addScalar                        --gridDim=[58,1,1]     --blockDim=[256,1,1]
segmentationTreeThrust     addScalar                        --gridDim=[6575,1,1]   --blockDim=[256,1,1]
segmentationTreeThrust     addScalar                        --gridDim=[880,1,1]    --blockDim=[256,1,1]
segmentationTreeThrust     calculateEdgesInfo               --gridDim=[11377,1,1]  --blockDim=[256,1,1]
segmentationTreeThrust     calculateEdgesInfo               --gridDim=[1767,1,1]   --blockDim=[256,1,1]
segmentationTreeThrust     calculateEdgesInfo               --gridDim=[180,1,1]    --blockDim=[256,1,1]
segmentationTreeThrust     calculateEdgesInfo               --gridDim=[19183,1,1]  --blockDim=[256,1,1]
segmentationTreeThrust     calculateEdgesInfo               --gridDim=[3492,1,1]   --blockDim=[256,1,1]
segmentationTreeThrust     calculateEdgesInfo               --gridDim=[391,1,1]    --blockDim=[256,1,1]
segmentationTreeThrust     calculateEdgesInfo               --gridDim=[58,1,1]     --blockDim=[256,1,1]
segmentationTreeThrust     calculateEdgesInfo               --gridDim=[6575,1,1]   --blockDim=[256,1,1]
segmentationTreeThrust     calculateEdgesInfo               --gridDim=[880,1,1]    --blockDim=[256,1,1]
segmentationTreeThrust     getRepresentatives               --gridDim=[1,1,1]      --blockDim=[150,1,1]
segmentationTreeThrust     getRepresentatives               --gridDim=[1,1,1]      --blockDim=[3,1,1]
segmentationTreeThrust     getRepresentatives               --gridDim=[1,1,1]      --blockDim=[34,1,1]
segmentationTreeThrust     getRepresentatives               --gridDim=[1,1,1]      --blockDim=[7,1,1]
segmentationTreeThrust     getRepresentatives               --gridDim=[1322,1,1]   --blockDim=[256,1,1]
segmentationTreeThrust     getRepresentatives               --gridDim=[14,1,1]     --blockDim=[256,1,1]
segmentationTreeThrust     getRepresentatives               --gridDim=[311,1,1]    --blockDim=[256,1,1]
segmentationTreeThrust     getRepresentatives               --gridDim=[3,1,1]      --blockDim=[256,1,1]
segmentationTreeThrust     getRepresentatives               --gridDim=[4800,1,1]   --blockDim=[256,1,1]
segmentationTreeThrust     getRepresentatives               --gridDim=[66,1,1]     --blockDim=[256,1,1]
segmentationTreeThrust     getSuccessors                    --gridDim=[1,1,1]      --blockDim=[150,1,1]
segmentationTreeThrust     getSuccessors                    --gridDim=[1,1,1]      --blockDim=[3,1,1]
segmentationTreeThrust     getSuccessors                    --gridDim=[1,1,1]      --blockDim=[34,1,1]
segmentationTreeThrust     getSuccessors                    --gridDim=[1,1,1]      --blockDim=[7,1,1]
segmentationTreeThrust     getSuccessors                    --gridDim=[1322,1,1]   --blockDim=[256,1,1]
segmentationTreeThrust     getSuccessors                    --gridDim=[14,1,1]     --blockDim=[256,1,1]
segmentationTreeThrust     getSuccessors                    --gridDim=[311,1,1]    --blockDim=[256,1,1]
segmentationTreeThrust     getSuccessors                    --gridDim=[3,1,1]      --blockDim=[256,1,1]
segmentationTreeThrust     getSuccessors                    --gridDim=[4800,1,1]   --blockDim=[256,1,1]
segmentationTreeThrust     getSuccessors                    --gridDim=[66,1,1]     --blockDim=[256,1,1]
segmentationTreeThrust     getVerticesMapping               --gridDim=[1,1,1]      --blockDim=[150,1,1]
segmentationTreeThrust     getVerticesMapping               --gridDim=[1,1,1]      --blockDim=[34,1,1]
segmentationTreeThrust     getVerticesMapping               --gridDim=[1,1,1]      --blockDim=[7,1,1]
segmentationTreeThrust     getVerticesMapping               --gridDim=[1322,1,1]   --blockDim=[256,1,1]
segmentationTreeThrust     getVerticesMapping               --gridDim=[14,1,1]     --blockDim=[256,1,1]
segmentationTreeThrust     getVerticesMapping               --gridDim=[311,1,1]    --blockDim=[256,1,1]
segmentationTreeThrust     getVerticesMapping               --gridDim=[3,1,1]      --blockDim=[256,1,1]
segmentationTreeThrust     getVerticesMapping               --gridDim=[4800,1,1]   --blockDim=[256,1,1]
segmentationTreeThrust     getVerticesMapping               --gridDim=[66,1,1]     --blockDim=[256,1,1]
segmentationTreeThrust     invalidateLoops                  --gridDim=[11377,1,1]  --blockDim=[256,1,1]
segmentationTreeThrust     invalidateLoops                  --gridDim=[1767,1,1]   --blockDim=[256,1,1]
segmentationTreeThrust     invalidateLoops                  --gridDim=[180,1,1]    --blockDim=[256,1,1]
segmentationTreeThrust     invalidateLoops                  --gridDim=[19183,1,1]  --blockDim=[256,1,1]
segmentationTreeThrust     invalidateLoops                  --gridDim=[3492,1,1]   --blockDim=[256,1,1]
segmentationTreeThrust     invalidateLoops                  --gridDim=[391,1,1]    --blockDim=[256,1,1]
segmentationTreeThrust     invalidateLoops                  --gridDim=[58,1,1]     --blockDim=[256,1,1]
segmentationTreeThrust     invalidateLoops                  --gridDim=[6575,1,1]   --blockDim=[256,1,1]
segmentationTreeThrust     invalidateLoops                  --gridDim=[880,1,1]    --blockDim=[256,1,1]
segmentationTreeThrust     makeNewEdges                     --gridDim=[11377,1,1]  --blockDim=[256,1,1]
segmentationTreeThrust     makeNewEdges                     --gridDim=[1767,1,1]   --blockDim=[256,1,1]
segmentationTreeThrust     makeNewEdges                     --gridDim=[180,1,1]    --blockDim=[256,1,1]
segmentationTreeThrust     makeNewEdges                     --gridDim=[3492,1,1]   --blockDim=[256,1,1]
segmentationTreeThrust     makeNewEdges                     --gridDim=[391,1,1]    --blockDim=[256,1,1]
segmentationTreeThrust     makeNewEdges                     --gridDim=[46,1,1]     --blockDim=[256,1,1]
segmentationTreeThrust     makeNewEdges                     --gridDim=[58,1,1]     --blockDim=[256,1,1]
segmentationTreeThrust     makeNewEdges                     --gridDim=[6575,1,1]   --blockDim=[256,1,1]
segmentationTreeThrust     makeNewEdges                     --gridDim=[880,1,1]    --blockDim=[256,1,1]
segmentationTreeThrust     markSegments                     --gridDim=[1,1,1]      --blockDim=[150,1,1]
segmentationTreeThrust     markSegments                     --gridDim=[1,1,1]      --blockDim=[3,1,1]
segmentationTreeThrust     markSegments                     --gridDim=[1,1,1]      --blockDim=[34,1,1]
segmentationTreeThrust     markSegments                     --gridDim=[1,1,1]      --blockDim=[7,1,1]
segmentationTreeThrust     markSegments                     --gridDim=[1322,1,1]   --blockDim=[256,1,1]
segmentationTreeThrust     markSegments                     --gridDim=[14,1,1]     --blockDim=[256,1,1]
segmentationTreeThrust     markSegments                     --gridDim=[311,1,1]    --blockDim=[256,1,1]
segmentationTreeThrust     markSegments                     --gridDim=[3,1,1]      --blockDim=[256,1,1]
segmentationTreeThrust     markSegments                     --gridDim=[66,1,1]     --blockDim=[256,1,1]
segmentationTreeThrust     removeCycles                     --gridDim=[1,1,1]      --blockDim=[150,1,1]
segmentationTreeThrust     removeCycles                     --gridDim=[1,1,1]      --blockDim=[3,1,1]
segmentationTreeThrust     removeCycles                     --gridDim=[1,1,1]      --blockDim=[34,1,1]
segmentationTreeThrust     removeCycles                     --gridDim=[1,1,1]      --blockDim=[7,1,1]
segmentationTreeThrust     removeCycles                     --gridDim=[1322,1,1]   --blockDim=[256,1,1]
segmentationTreeThrust     removeCycles                     --gridDim=[14,1,1]     --blockDim=[256,1,1]
segmentationTreeThrust     removeCycles                     --gridDim=[311,1,1]    --blockDim=[256,1,1]
segmentationTreeThrust     removeCycles                     --gridDim=[3,1,1]      --blockDim=[256,1,1]
segmentationTreeThrust     removeCycles                     --gridDim=[4800,1,1]   --blockDim=[256,1,1]
segmentationTreeThrust     removeCycles                     --gridDim=[66,1,1]     --blockDim=[256,1,1]
shfl_scan                  shfl_intimage_rows               --gridDim=1080         --blockDim=120
shfl_scan                  shfl_scan_test                   --gridDim=1            --blockDim=256
shfl_scan                  shfl_scan_test                   --gridDim=256          --blockDim=256
shfl_scan                  shfl_vertical_shfl               --gridDim=[60,1,1]     --blockDim=[32,8,1]
shfl_scan                  uniform_add                      --gridDim=255          --blockDim=256
simpleAssert               testKernel                       --gridDim=[2,1,1]      --blockDim=[32,1,1]
simpleAtomicIntrinsics     testKernel                       --gridDim=64           --blockDim=256
simpleCallback             incKernel                        --gridDim=[196,1,1]    --blockDim=[512,1,1]
simpleCubemapTexture       transformKernel                  --gridDim=[8,8,1]      --blockDim=[8,8,1]
simpleCUFFT                ComplexPointwiseMulAndScale      --gridDim=32           --blockDim=256
simpleHyperQ               kernel_A                         --gridDim=1            --blockDim=1
simpleHyperQ               kernel_B                         --gridDim=1            --blockDim=1
simpleHyperQ               sum                              --gridDim=1            --blockDim=32
simpleLayeredTexture       transformKernel                  --gridDim=[64,64,1]    --blockDim=[8,8,1]
simpleMPI                  simpleMPIKernel                  --gridDim=10000        --blockDim=256
simpleMultiCopy            incKernel                        --gridDim=[8192,1,1]   --blockDim=[512,1,1]
simpleMultiGPU             reduceKernel                     --gridDim=32           --blockDim=256
simplePitchLinearTexture   shiftArray                       --gridDim=[128,128,1]  --blockDim=[16,16,1]
simplePitchLinearTexture   shiftPitchLinear                 --gridDim=[128,128,1]  --blockDim=[16,16,1]
simplePrintf               testKernel                       --gridDim=[2,2,1]      --blockDim=[2,2,2]
simpleP2P                  SimpleKernel                     --gridDim=[32768,1,1]  --blockDim=[512,1,1]
simpleSeparateCompilation  transformVector                  --gridDim=[1,1,1]      --blockDim=[1024,1,1]
simpleStreams              init_array                       --gridDim=[32768,1,1]  --blockDim=[512,1,1]
simpleStreams              init_array                       --gridDim=[8192,1,1]   --blockDim=[512,1,1]
simpleSurfaceWrite         surfaceWriteKernel               --gridDim=[64,64,1]    --blockDim=[8,8,1]
simpleSurfaceWrite         transformKernel                  --gridDim=[64,64,1]    --blockDim=[8,8,1]
simpleTemplates            testKernel                       --gridDim=[1,1,1]      --blockDim=[32,1,1]
simpleTemplates            testKernel                       --gridDim=[1,1,1]      --blockDim=[64,1,1]
simpleTexture3D            d_render                         --gridDim=[32,32,1]    --blockDim=[16,16,1]
simpleTexture              transformKernel                  --gridDim=[64,64,1]    --blockDim=[8,8,1]
simpleVoteIntrinsics       VoteAllKernel2                   --gridDim=[1,1,1]      --blockDim=[128,1,1]
simpleVoteIntrinsics       VoteAnyKernel1                   --gridDim=[1,1,1]      --blockDim=[128,1,1]
simpleVoteIntrinsics       VoteAnyKernel3                   --gridDim=1            --blockDim=32
simpleZeroCopy             vectorAddGPU                     --gridDim=[4096,1,1]   --blockDim=[256,1,1]
SobelFilter                SobelCopyImage                   --gridDim=512          --blockDim=384
sortingNetworks            bitonicMergeShared               --gridDim=1024         --blockDim=512
sortingNetworks            bitonicSortShared                --gridDim=1024         --blockDim=512
sortingNetworks            bitonicSortShared1               --gridDim=1024         --blockDim=512
stereoDisparity            stereoDisparityKernel            --gridDim=[20,67,1]    --blockDim=[32,8,1]
template                   testKernel                       --gridDim=[1,1,1]      --blockDim=[32,1,1]
template_runtime           sequence_gpu                     --gridDim=[4,1,1]      --blockDim=[32,1,1]
threadFenceReduction       reduceMultiPass                  --gridDim=[64,1,1]     --blockDim=[128,1,1]
threadFenceReduction       reduceSinglePass                 --gridDim=[64,1,1]     --blockDim=[128,1,1]
transpose                  kernel                           --gridDim=[64,64,1]    --blockDim=[16,16,1]
vectorAdd                  vectorAdd                        --gridDim=196          --blockDim=256
volumeFiltering            d_filter_surface3d               --gridDim=[1,1,32]     --blockDim=[32,32,1]
volumeFiltering            d_integrate_trapezoidal          --gridDim=[32,1,1]     --blockDim=[32,1,1]
volumeFiltering            d_preintegrate                   --gridDim=[64,64,1]    --blockDim=[16,16,1]
volumeRender               d_render                         --gridDim=[32,32,1]    --blockDim=[16,16,1]
