Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

quick & dirty but working support for VK_NV_cuda_kernel_launch! #9

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added shaders/cache.ptx.bin
Binary file not shown.
1 change: 1 addition & 0 deletions shaders/hazptx.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
nvcc shader.cu --ptx -gencode arch=compute_89,code=sm_89
183 changes: 183 additions & 0 deletions shaders/shader.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
//https://github.com/NVIDIA/cuda-samples/blob/master/Common/helper_math.h
#define WIDTH 3200
#define HEIGHT 2400
#define WORKGROUP_SIZE 32


#define vec2 float2
#define vec4 float4
#define vec3 float3

/*__device__ float2(const float a, const float b) {

return make_float2(a, b);

}*/
#if 0
#define float2(a) (make_float2(a,a))
#define float2(a,b) (make_float2(a,b))
#define float3(a,b,c) (make_float3(a,b,c))

#else
__device__ float2 vec2a(float a) {
return(make_float2(a, a));
}
__device__ float2 vec2a(float a, float b)
{
return(make_float2(a, b));
}
__device__ float3 vec3a(float a, float b, float c) {
return(make_float3(a, b, c));
}

__device__ float4 vec4a(float3 a, float b) {
return(make_float4(a.x,a.y,a.z,b ));
}


__device__ float3 vec3d(double a, double b, double c) {
return(make_float3(float(a), float(b), float(c)));
}

#endif

__device__ float2 operator-(const float2& a, const float &b) {

return make_float2(a.x - b, a.y -b);

}


__device__ float2 operator+(const float2& a, const float2& b) {

return make_float2(a.x + b.x, a.y + b.y);

}

__device__ float dot(const float2& a, const float2& b) {

return (a.x * b.x + a.y * b.y);

}


__device__ float3 operator+(const float3& a, const float3& b) {

return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);

}

__device__ float2 operator*(const float2& a, const float& b) {

return make_float2(a.x * b, a.y*b);

}


__device__ float3 operator*(const float3& a, const float3& b) {

return make_float3(a.x * b.x, a.y * b.y, a.z * b.z);

}

__device__ float3 operator*(const float3& a, const float& b) {

return make_float3(a.x * b, a.y * b, a.z * b);

}

__device__ float3 operator*(const float& a, const float3& b) {

return make_float3(a * b.x, a * b.y, a * b.z);

}

__device__ float3 mycos(const float3& a) {

return make_float3(cos(a.x), cos(a.y), cos(a.z));

}

#define USE_ARG
#ifdef USE_ARG
__global__ void main2(float4* imageData) {
#else
__global__ void main2(/*float4* imageData*/) {
#endif

/*
In order to fit the work into workgroups, some unnecessary threads are launched.
We terminate those threads here.
*/
int idx = blockDim.x * blockIdx.x + threadIdx.x;
int idy = blockDim.y * blockIdx.y + threadIdx.y;
if (idx >= WIDTH || idy >= HEIGHT)
return;

float x = float(idx) / float(WIDTH);
float y = float(idy) / float(HEIGHT);


#if 0
/*
What follows is code for rendering the mandelbrot set.
*/
vec2 uv = vec2{ x,y };
float n = 0.0;
vec2 temp1 = vec2{ uv.x - 0.5f, uv.y - 0.5f };
temp1.x = (2.0 + 1.7 * 0.2);
temp1.y = (2.0 + 1.7 * 0.2);
vec2 c = vec2{ -.445, 0.0 };
c.x = c.x + temp1.x;
c.y = c.y + temp1.y;
vec2 z = vec2{0,0};
const int M =128;
for (int i = 0; i<M; i++)
{
z = vec2{ z.x * z.x - z.y * z.y +c.x, 2.0f * z.x * z.y +c.y};
if ((z.x*z.x+z.y*z.y) > 2) break;
n++;
}

// we use a simple cosine palette to determine color:
// http://iquilezles.org/www/articles/palettes/palettes.htm
float t = float(n) / float(M);
vec3 d = vec3{ 0.3f, 0.3f ,0.5f };
vec3 e = vec3{ -0.2f, -0.3f ,-0.5f };
vec3 f = vec3{ 2.1f, 2.0f, 3.0f };
vec3 g = vec3{ 0.0f, 0.1f, 0.0f };
vec3 hh = d + e * mycos(6.28318 * (f * t + g));
vec4 color = vec4{ hh.x,hh.y,hh.z,1.0 };
#else

/*
What follows is code for rendering the mandelbrot set.
*/
vec2 uv = vec2a(x, y);
float n = 0.0;
vec2 c = vec2a(-.445, 0.0) + (uv - 0.5) * (2.0 + 1.7 * 0.2),
z = vec2a(0.0);
const int M = 128;
for (int i = 0; i < M; i++)
{
z = vec2a(z.x * z.x - z.y * z.y, 2.f * z.x * z.y) + c;
if (dot(z, z) > 2) break;
n++;
}

// we use a simple cosine palette to determine color:
// http://iquilezles.org/www/articles/palettes/palettes.htm
float t = float(n) / float(M);
vec3 d = vec3d(0.3, 0.3, 0.5);
vec3 e = vec3d(-0.2, -0.3, -0.5);
vec3 f = vec3d(2.1, 2.0, 3.0);
vec3 g = vec3d(0.0, 0.1, 0.0);
vec4 color = vec4a(d + e * mycos(6.28318 * (f * t + g)), 1.0);
#endif
// store the rendered mandelbrot set into a storage buffer:
#ifdef USE_ARG
imageData[WIDTH * idy + idx] = float4{ color.x,color.y,color.z,1.0 };
//imageData[WIDTH * idy + idx] = float4{ color.x,color.y,color.z,color.w };
//imageData[WIDTH * idy + idx] = float4{ 1.0,0.0,0.0,1.0 };
#endif
}
Loading