relu.comp 579 Bytes
#version 450
#define LOCAL_SZ_X 32
layout(push_constant) uniform pushBlock {
      int total;
      float slope;
} p;

layout(binding = 0) readonly buffer inbuf{
    float in_buffer[];
};

layout(binding = 1) writeonly buffer outbuf{
    float out_buffer[];
};

layout(local_size_x = LOCAL_SZ_X, local_size_y = 1, local_size_z = 1) in;
void main()
{
    for (int i = int(gl_GlobalInvocationID.x); i < p.total; i += int(gl_NumWorkGroups.x * gl_WorkGroupSize.x))
    {
        float in_val = in_buffer[i];
        out_buffer[i] = in_val >= 0.f ? in_val : p.slope * in_val;
    }
}