Skip to content
Commits on Source (38)
19.0.0-rc5
19.0.0-rc6
......@@ -1922,7 +1922,7 @@ if test x"$enable_dri3" = xyes; then
dri3_modifier_modules="xcb-dri3 >= $XCBDRI3_MODIFIERS_REQUIRED xcb-present >= $XCBPRESENT_MODIFIERS_REQUIRED"
PKG_CHECK_MODULES([XCB_DRI3_MODIFIERS], [$dri3_modifier_modules], [have_dri3_modifiers=yes], [have_dri3_modifiers=no])
if test "x$have_dri3_modifiers" == xyes; then
if test "x$have_dri3_modifiers" = xyes; then
DEFINES="$DEFINES -DHAVE_DRI3_MODIFIERS"
fi
fi
......@@ -2949,7 +2949,7 @@ if test "x$enable_llvm" = xyes; then
dnl the LLVM library propagated in the Libs.private of the respective .pc
dnl file which ensures complete dependency information when statically
dnl linking.
if test "x$enable_glx" == xgallium-xlib; then
if test "x$enable_glx" = xgallium-xlib; then
GL_PC_LIB_PRIV="$GL_PC_LIB_PRIV $LLVM_LIBS"
fi
if test "x$enable_gallium_osmesa" = xyes; then
......
......@@ -171,6 +171,7 @@ CHIPSET(0x3185, glk_2x6, "Intel(R) UHD Graphics 600 (Geminilake 2x6)")
CHIPSET(0x3E90, cfl_gt1, "Intel(R) UHD Graphics 610 (Coffeelake 2x6 GT1)")
CHIPSET(0x3E93, cfl_gt1, "Intel(R) UHD Graphics 610 (Coffeelake 2x6 GT1)")
CHIPSET(0x3E99, cfl_gt1, "Intel(R) HD Graphics (Coffeelake 2x6 GT1)")
CHIPSET(0x3E9C, cfl_gt1, "Intel(R) HD Graphics (Coffeelake 2x6 GT1)")
CHIPSET(0x3E91, cfl_gt2, "Intel(R) UHD Graphics 630 (Coffeelake 3x8 GT2)")
CHIPSET(0x3E92, cfl_gt2, "Intel(R) UHD Graphics 630 (Coffeelake 3x8 GT2)")
CHIPSET(0x3E96, cfl_gt2, "Intel(R) HD Graphics (Coffeelake 3x8 GT2)")
......@@ -203,6 +204,10 @@ CHIPSET(0x5A54, cnl_5x8, "Intel(R) HD Graphics (Cannonlake 5x8 GT2)")
CHIPSET(0x8A50, icl_8x8, "Intel(R) HD Graphics (Ice Lake 8x8 GT2)")
CHIPSET(0x8A51, icl_8x8, "Intel(R) HD Graphics (Ice Lake 8x8 GT2)")
CHIPSET(0x8A52, icl_8x8, "Intel(R) HD Graphics (Ice Lake 8x8 GT2)")
CHIPSET(0x8A56, icl_4x8, "Intel(R) HD Graphics (Ice Lake 4x8 GT1)")
CHIPSET(0x8A57, icl_6x8, "Intel(R) HD Graphics (Ice Lake 6x8 GT1.5)")
CHIPSET(0x8A58, icl_4x8, "Intel(R) HD Graphics (Ice Lake 4x8 GT1)")
CHIPSET(0x8A59, icl_6x8, "Intel(R) HD Graphics (Ice Lake 6x8 GT1.5)")
CHIPSET(0x8A5A, icl_6x8, "Intel(R) HD Graphics (Ice Lake 6x8 GT1.5)")
CHIPSET(0x8A5B, icl_4x8, "Intel(R) HD Graphics (Ice Lake 4x8 GT1)")
CHIPSET(0x8A5C, icl_6x8, "Intel(R) HD Graphics (Ice Lake 6x8 GT1.5)")
......
......@@ -61,11 +61,11 @@ endif
dri_drivers_path = get_option('dri-drivers-path')
if dri_drivers_path == ''
dri_drivers_path = join_paths(get_option('libdir'), 'dri')
dri_drivers_path = join_paths(get_option('prefix'), get_option('libdir'), 'dri')
endif
dri_search_path = get_option('dri-search-path')
if dri_search_path == ''
dri_search_path = join_paths(get_option('prefix'), dri_drivers_path)
dri_search_path = dri_drivers_path
endif
with_gles1 = get_option('gles1')
......
......@@ -905,6 +905,37 @@ ac_build_fs_interp(struct ac_llvm_context *ctx,
ctx->f32, args, 5, AC_FUNC_ATTR_READNONE);
}
LLVMValueRef
ac_build_fs_interp_f16(struct ac_llvm_context *ctx,
LLVMValueRef llvm_chan,
LLVMValueRef attr_number,
LLVMValueRef params,
LLVMValueRef i,
LLVMValueRef j)
{
LLVMValueRef args[6];
LLVMValueRef p1;
args[0] = i;
args[1] = llvm_chan;
args[2] = attr_number;
args[3] = ctx->i1false;
args[4] = params;
p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1.f16",
ctx->f32, args, 5, AC_FUNC_ATTR_READNONE);
args[0] = p1;
args[1] = j;
args[2] = llvm_chan;
args[3] = attr_number;
args[4] = ctx->i1false;
args[5] = params;
return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2.f16",
ctx->f16, args, 6, AC_FUNC_ATTR_READNONE);
}
LLVMValueRef
ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
LLVMValueRef parameter,
......
......@@ -216,6 +216,14 @@ ac_build_fs_interp(struct ac_llvm_context *ctx,
LLVMValueRef i,
LLVMValueRef j);
LLVMValueRef
ac_build_fs_interp_f16(struct ac_llvm_context *ctx,
LLVMValueRef llvm_chan,
LLVMValueRef attr_number,
LLVMValueRef params,
LLVMValueRef i,
LLVMValueRef j);
LLVMValueRef
ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
LLVMValueRef parameter,
......
......@@ -1896,14 +1896,18 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
if (var) {
bool vs_in = ctx->stage == MESA_SHADER_VERTEX &&
var->data.mode == nir_var_shader_in;
if (var->data.compact)
stride = 1;
idx = var->data.driver_location;
comp = var->data.location_frac;
mode = var->data.mode;
get_deref_offset(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr), vs_in, NULL, NULL,
&const_index, &indir_index);
if (var->data.compact) {
stride = 1;
const_index += comp;
comp = 0;
}
}
if (instr->dest.ssa.bit_size == 64)
......@@ -2022,6 +2026,11 @@ visit_store_var(struct ac_nir_context *ctx,
NULL, NULL, &const_index, &indir_index);
idx = var->data.driver_location;
comp = var->data.location_frac;
if (var->data.compact) {
const_index += comp;
comp = 0;
}
}
if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64) {
......
......@@ -969,9 +969,11 @@ void radv_update_descriptor_sets(
}
src_ptr += src_binding_layout->size / 4;
dst_ptr += dst_binding_layout->size / 4;
if (src_binding_layout->type != VK_DESCRIPTOR_TYPE_SAMPLER) {
/* Sampler descriptors don't have a buffer list. */
dst_buffer_list[j] = src_buffer_list[j];
++src_buffer_list;
++dst_buffer_list;
}
}
}
}
......
......@@ -739,8 +739,7 @@ void radv_GetPhysicalDeviceFeatures(
.alphaToOne = true,
.multiViewport = true,
.samplerAnisotropy = true,
.textureCompressionETC2 = pdevice->rad_info.chip_class >= GFX9 ||
pdevice->rad_info.family == CHIP_STONEY,
.textureCompressionETC2 = radv_device_supports_etc(pdevice),
.textureCompressionASTC_LDR = false,
.textureCompressionBC = true,
.occlusionQueryPrecise = true,
......@@ -807,7 +806,7 @@ void radv_GetPhysicalDeviceFeatures2(
features->storageBuffer16BitAccess = enabled;
features->uniformAndStorageBuffer16BitAccess = enabled;
features->storagePushConstant16 = enabled;
features->storageInputOutput16 = enabled;
features->storageInputOutput16 = enabled && HAVE_LLVM >= 0x900;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: {
......
......@@ -595,6 +595,14 @@ static bool radv_is_filter_minmax_format_supported(VkFormat format)
}
}
bool
radv_device_supports_etc(struct radv_physical_device *physical_device)
{
return physical_device->rad_info.family == CHIP_VEGA10 ||
physical_device->rad_info.family == CHIP_RAVEN ||
physical_device->rad_info.family == CHIP_STONEY;
}
static void
radv_physical_device_get_format_properties(struct radv_physical_device *physical_device,
VkFormat format,
......@@ -612,9 +620,7 @@ radv_physical_device_get_format_properties(struct radv_physical_device *physical
}
if (desc->layout == VK_FORMAT_LAYOUT_ETC &&
physical_device->rad_info.family != CHIP_VEGA10 &&
physical_device->rad_info.family != CHIP_RAVEN &&
physical_device->rad_info.family != CHIP_STONEY) {
!radv_device_supports_etc(physical_device)) {
out_properties->linearTilingFeatures = linear;
out_properties->optimalTilingFeatures = tiled;
out_properties->bufferFeatures = buffer;
......
......@@ -370,14 +370,29 @@ emit_color_clear(struct radv_cmd_buffer *cmd_buffer,
const struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
const uint32_t subpass_att = clear_att->colorAttachment;
const uint32_t pass_att = subpass->color_attachments[subpass_att].attachment;
const struct radv_image_view *iview = fb->attachments[pass_att].attachment;
const uint32_t samples = iview->image->info.samples;
const uint32_t samples_log2 = ffs(samples) - 1;
unsigned fs_key = radv_format_meta_fs_key(iview->vk_format);
const struct radv_image_view *iview = fb ? fb->attachments[pass_att].attachment : NULL;
uint32_t samples, samples_log2;
VkFormat format;
unsigned fs_key;
VkClearColorValue clear_value = clear_att->clearValue.color;
VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
VkPipeline pipeline;
/* When a framebuffer is bound to the current command buffer, get the
* number of samples from it. Otherwise, get the number of samples from
* the render pass because it's likely a secondary command buffer.
*/
if (iview) {
samples = iview->image->info.samples;
format = iview->vk_format;
} else {
samples = cmd_buffer->state.pass->attachments[pass_att].samples;
format = cmd_buffer->state.pass->attachments[pass_att].format;
}
samples_log2 = ffs(samples) - 1;
fs_key = radv_format_meta_fs_key(format);
if (fs_key == -1) {
radv_finishme("color clears incomplete");
return;
......@@ -617,6 +632,9 @@ static bool depth_view_can_fast_clear(struct radv_cmd_buffer *cmd_buffer,
const VkClearRect *clear_rect,
VkClearDepthStencilValue clear_value)
{
if (!iview)
return false;
uint32_t queue_mask = radv_image_queue_family_mask(iview->image,
cmd_buffer->queue_family_index,
cmd_buffer->queue_family_index);
......@@ -705,11 +723,22 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer,
const uint32_t pass_att = subpass->depth_stencil_attachment.attachment;
VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
VkImageAspectFlags aspects = clear_att->aspectMask;
const struct radv_image_view *iview = fb->attachments[pass_att].attachment;
const uint32_t samples = iview->image->info.samples;
const uint32_t samples_log2 = ffs(samples) - 1;
const struct radv_image_view *iview = fb ? fb->attachments[pass_att].attachment : NULL;
uint32_t samples, samples_log2;
VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
/* When a framebuffer is bound to the current command buffer, get the
* number of samples from it. Otherwise, get the number of samples from
* the render pass because it's likely a secondary command buffer.
*/
if (iview) {
samples = iview->image->info.samples;
} else {
samples = cmd_buffer->state.pass->attachments[pass_att].samples;
}
samples_log2 = ffs(samples) - 1;
assert(pass_att != VK_ATTACHMENT_UNUSED);
if (!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
......@@ -915,7 +944,11 @@ static bool
radv_image_view_can_fast_clear(struct radv_device *device,
const struct radv_image_view *iview)
{
struct radv_image *image = iview->image;
struct radv_image *image;
if (!iview)
return false;
image = iview->image;
/* Only fast clear if the image itself can be fast cleared. */
if (!radv_image_can_fast_clear(device, image))
......@@ -1523,7 +1556,7 @@ emit_clear(struct radv_cmd_buffer *cmd_buffer,
const uint32_t subpass_att = clear_att->colorAttachment;
const uint32_t pass_att = subpass->color_attachments[subpass_att].attachment;
VkImageLayout image_layout = subpass->color_attachments[subpass_att].layout;
const struct radv_image_view *iview = fb->attachments[pass_att].attachment;
const struct radv_image_view *iview = fb ? fb->attachments[pass_att].attachment : NULL;
VkClearColorValue clear_value = clear_att->clearValue.color;
if (radv_can_fast_clear_color(cmd_buffer, iview, image_layout,
......@@ -1536,8 +1569,11 @@ emit_clear(struct radv_cmd_buffer *cmd_buffer,
}
} else {
const uint32_t pass_att = subpass->depth_stencil_attachment.attachment;
if (pass_att == VK_ATTACHMENT_UNUSED)
return;
VkImageLayout image_layout = subpass->depth_stencil_attachment.layout;
const struct radv_image_view *iview = fb->attachments[pass_att].attachment;
const struct radv_image_view *iview = fb ? fb->attachments[pass_att].attachment : NULL;
VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
assert(aspects & (VK_IMAGE_ASPECT_DEPTH_BIT |
......
......@@ -92,6 +92,7 @@ struct radv_shader_context {
gl_shader_stage stage;
LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
uint64_t float16_shaded_mask;
uint64_t input_mask;
uint64_t output_mask;
......@@ -1441,7 +1442,7 @@ store_tcs_output(struct ac_shader_abi *abi,
{
struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
const unsigned location = var->data.location;
const unsigned component = var->data.location_frac;
unsigned component = var->data.location_frac;
const bool is_patch = var->data.patch;
const bool is_compact = var->data.compact;
LLVMValueRef dw_addr;
......@@ -1459,11 +1460,15 @@ store_tcs_output(struct ac_shader_abi *abi,
}
param = shader_io_get_unique_index(location);
if (location == VARYING_SLOT_CLIP_DIST0 &&
is_compact && const_index > 3) {
const_index -= 3;
if ((location == VARYING_SLOT_CLIP_DIST0 || location == VARYING_SLOT_CLIP_DIST1) && is_compact) {
const_index += component;
component = 0;
if (const_index >= 4) {
const_index -= 4;
param++;
}
}
if (!is_patch) {
stride = get_tcs_out_vertex_stride(ctx);
......@@ -1529,10 +1534,14 @@ load_tes_input(struct ac_shader_abi *abi,
LLVMValueRef result;
unsigned param = shader_io_get_unique_index(location);
if (location == VARYING_SLOT_CLIP_DIST0 && is_compact && const_index > 3) {
const_index -= 3;
if ((location == VARYING_SLOT_CLIP_DIST0 || location == VARYING_SLOT_CLIP_DIST1) && is_compact) {
const_index += component;
component = 0;
if (const_index >= 4) {
const_index -= 4;
param++;
}
}
buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index,
is_compact, vertex_index, param_index);
......@@ -2051,6 +2060,7 @@ static void interp_fs_input(struct radv_shader_context *ctx,
unsigned attr,
LLVMValueRef interp_param,
LLVMValueRef prim_mask,
bool float16,
LLVMValueRef result[4])
{
LLVMValueRef attr_number;
......@@ -2083,7 +2093,12 @@ static void interp_fs_input(struct radv_shader_context *ctx,
for (chan = 0; chan < 4; chan++) {
LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
if (interp) {
if (interp && float16) {
result[chan] = ac_build_fs_interp_f16(&ctx->ac,
llvm_chan,
attr_number,
prim_mask, i, j);
} else if (interp) {
result[chan] = ac_build_fs_interp(&ctx->ac,
llvm_chan,
attr_number,
......@@ -2095,7 +2110,30 @@ static void interp_fs_input(struct radv_shader_context *ctx,
attr_number,
prim_mask);
result[chan] = LLVMBuildBitCast(ctx->ac.builder, result[chan], ctx->ac.i32, "");
result[chan] = LLVMBuildTruncOrBitCast(ctx->ac.builder, result[chan], LLVMTypeOf(interp_param), "");
result[chan] = LLVMBuildTruncOrBitCast(ctx->ac.builder, result[chan], float16 ? ctx->ac.i16 : ctx->ac.i32, "");
}
}
}
static void mark_16bit_fs_input(struct radv_shader_context *ctx,
const struct glsl_type *type,
int location)
{
if (glsl_type_is_scalar(type) || glsl_type_is_vector(type) || glsl_type_is_matrix(type)) {
unsigned attrib_count = glsl_count_attribute_slots(type, false);
if (glsl_type_is_16bit(type)) {
ctx->float16_shaded_mask |= ((1ull << attrib_count) - 1) << location;
}
} else if (glsl_type_is_array(type)) {
unsigned stride = glsl_count_attribute_slots(glsl_get_array_element(type), false);
for (unsigned i = 0; i < glsl_get_length(type); ++i) {
mark_16bit_fs_input(ctx, glsl_get_array_element(type), location + i * stride);
}
} else {
assert(glsl_type_is_struct(type));
for (unsigned i = 0; i < glsl_get_length(type); i++) {
mark_16bit_fs_input(ctx, glsl_get_struct_field(type, i), location);
location += glsl_count_attribute_slots(glsl_get_struct_field(type, i), false);
}
}
}
......@@ -2110,9 +2148,17 @@ handle_fs_input_decl(struct radv_shader_context *ctx,
uint64_t mask;
variable->data.driver_location = idx * 4;
if (variable->data.compact) {
unsigned component_count = variable->data.location_frac +
glsl_get_length(variable->type);
attrib_count = (component_count + 3) / 4;
} else
mark_16bit_fs_input(ctx, variable->type, idx);
mask = ((1ull << attrib_count) - 1) << variable->data.location;
if (glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT) {
unsigned interp_type;
if (variable->data.sample)
interp_type = INTERP_SAMPLE;
......@@ -2122,23 +2168,13 @@ handle_fs_input_decl(struct radv_shader_context *ctx,
interp_type = INTERP_CENTER;
interp = lookup_interp_param(&ctx->abi, variable->data.interpolation, interp_type);
}
bool is_16bit = glsl_type_is_16bit(glsl_without_array(variable->type));
LLVMTypeRef type = is_16bit ? ctx->ac.i16 : ctx->ac.i32;
if (interp == NULL)
interp = LLVMGetUndef(type);
interp = LLVMGetUndef(ctx->ac.i32);
for (unsigned i = 0; i < attrib_count; ++i)
ctx->inputs[ac_llvm_reg_index_soa(idx + i, 0)] = interp;
if (idx == VARYING_SLOT_CLIP_DIST0) {
/* Do not account for the number of components inside the array
* of clip/cull distances because this might wrongly set other
* bits like primitive ID or layer.
*/
mask = 1ull << VARYING_SLOT_CLIP_DIST0;
}
ctx->input_mask |= mask;
}
......@@ -2200,11 +2236,14 @@ handle_fs_inputs(struct radv_shader_context *ctx,
if (i >= VARYING_SLOT_VAR0 || i == VARYING_SLOT_PNTC ||
i == VARYING_SLOT_PRIMITIVE_ID || i == VARYING_SLOT_LAYER) {
interp_param = *inputs;
interp_fs_input(ctx, index, interp_param, ctx->abi.prim_mask,
bool float16 = (ctx->float16_shaded_mask >> i) & 1;
interp_fs_input(ctx, index, interp_param, ctx->abi.prim_mask, float16,
inputs);
if (LLVMIsUndef(interp_param))
ctx->shader_info->fs.flat_shaded_mask |= 1u << index;
if (float16)
ctx->shader_info->fs.float16_shaded_mask |= 1u << index;
if (i >= VARYING_SLOT_VAR0)
ctx->abi.fs_input_attr_indices[i - VARYING_SLOT_VAR0] = index;
++index;
......@@ -2216,7 +2255,7 @@ handle_fs_inputs(struct radv_shader_context *ctx,
interp_param = *inputs;
interp_fs_input(ctx, index, interp_param,
ctx->abi.prim_mask, inputs);
ctx->abi.prim_mask, false, inputs);
++index;
}
} else if (i == VARYING_SLOT_POS) {
......@@ -2250,6 +2289,12 @@ scan_shader_output_decl(struct radv_shader_context *ctx,
if (stage == MESA_SHADER_TESS_CTRL)
return;
if (variable->data.compact) {
unsigned component_count = variable->data.location_frac +
glsl_get_length(variable->type);
attrib_count = (component_count + 3) / 4;
}
mask_attribs = ((1ull << attrib_count) - 1) << idx;
if (stage == MESA_SHADER_VERTEX ||
stage == MESA_SHADER_TESS_EVAL ||
......@@ -2265,8 +2310,6 @@ scan_shader_output_decl(struct radv_shader_context *ctx,
ctx->shader_info->tes.outinfo.cull_dist_mask = (1 << shader->info.cull_distance_array_size) - 1;
ctx->shader_info->tes.outinfo.cull_dist_mask <<= shader->info.clip_distance_array_size;
}
mask_attribs = 1ull << idx;
}
}
......@@ -2611,51 +2654,41 @@ handle_vs_outputs_post(struct radv_shader_context *ctx,
memset(outinfo->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
sizeof(outinfo->vs_output_param_offset));
if (ctx->output_mask & (1ull << VARYING_SLOT_CLIP_DIST0)) {
for(unsigned location = VARYING_SLOT_CLIP_DIST0; location <= VARYING_SLOT_CLIP_DIST1; ++location) {
if (ctx->output_mask & (1ull << location)) {
unsigned output_usage_mask, length;
LLVMValueRef slots[8];
LLVMValueRef slots[4];
unsigned j;
if (ctx->stage == MESA_SHADER_VERTEX &&
!ctx->is_gs_copy_shader) {
output_usage_mask =
ctx->shader_info->info.vs.output_usage_mask[VARYING_SLOT_CLIP_DIST0];
ctx->shader_info->info.vs.output_usage_mask[location];
} else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
output_usage_mask =
ctx->shader_info->info.tes.output_usage_mask[VARYING_SLOT_CLIP_DIST0];
ctx->shader_info->info.tes.output_usage_mask[location];
} else {
assert(ctx->is_gs_copy_shader);
output_usage_mask =
ctx->shader_info->info.gs.output_usage_mask[VARYING_SLOT_CLIP_DIST0];
ctx->shader_info->info.gs.output_usage_mask[location];
}
length = util_last_bit(output_usage_mask);
i = VARYING_SLOT_CLIP_DIST0;
for (j = 0; j < length; j++)
slots[j] = ac_to_float(&ctx->ac, radv_load_output(ctx, i, j));
slots[j] = ac_to_float(&ctx->ac, radv_load_output(ctx, location, j));
for (i = length; i < 8; i++)
for (i = length; i < 4; i++)
slots[i] = LLVMGetUndef(ctx->ac.f32);
if (length > 4) {
target = V_008DFC_SQ_EXP_POS + 3;
si_llvm_init_export_args(ctx, &slots[4], 0xf, target, &args);
memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS],
&args, sizeof(args));
}
target = V_008DFC_SQ_EXP_POS + 2;
target = V_008DFC_SQ_EXP_POS + 2 + (location - VARYING_SLOT_CLIP_DIST0);
si_llvm_init_export_args(ctx, &slots[0], 0xf, target, &args);
memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS],
&args, sizeof(args));
/* Export the clip/cull distances values to the next stage. */
radv_export_param(ctx, param_count, &slots[0], 0xf);
outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0] = param_count++;
if (length > 4) {
radv_export_param(ctx, param_count, &slots[4], 0xf);
outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1] = param_count++;
outinfo->vs_output_param_offset[location] = param_count++;
}
}
......@@ -2816,28 +2849,14 @@ handle_es_outputs_post(struct radv_shader_context *ctx,
LLVMValueRef lds_base = NULL;
for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
unsigned output_usage_mask;
int param_index;
int length = 4;
if (!(ctx->output_mask & (1ull << i)))
continue;
if (ctx->stage == MESA_SHADER_VERTEX) {
output_usage_mask =
ctx->shader_info->info.vs.output_usage_mask[i];
} else {
assert(ctx->stage == MESA_SHADER_TESS_EVAL);
output_usage_mask =
ctx->shader_info->info.tes.output_usage_mask[i];
}
if (i == VARYING_SLOT_CLIP_DIST0)
length = util_last_bit(output_usage_mask);
param_index = shader_io_get_unique_index(i);
max_output_written = MAX2(param_index + (length > 4), max_output_written);
max_output_written = MAX2(param_index, max_output_written);
}
outinfo->esgs_itemsize = (max_output_written + 1) * 16;
......@@ -2858,7 +2877,6 @@ handle_es_outputs_post(struct radv_shader_context *ctx,
LLVMValueRef *out_ptr = &ctx->abi.outputs[i * 4];
unsigned output_usage_mask;
int param_index;
int length = 4;
if (!(ctx->output_mask & (1ull << i)))
continue;
......@@ -2872,9 +2890,6 @@ handle_es_outputs_post(struct radv_shader_context *ctx,
ctx->shader_info->info.tes.output_usage_mask[i];
}
if (i == VARYING_SLOT_CLIP_DIST0)
length = util_last_bit(output_usage_mask);
param_index = shader_io_get_unique_index(i);
if (lds_base) {
......@@ -2883,7 +2898,7 @@ handle_es_outputs_post(struct radv_shader_context *ctx,
"");
}
for (j = 0; j < length; j++) {
for (j = 0; j < 4; j++) {
if (!(output_usage_mask & (1 << j)))
continue;
......@@ -2920,22 +2935,16 @@ handle_ls_outputs_post(struct radv_shader_context *ctx)
vertex_dw_stride, "");
for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
unsigned output_usage_mask =
ctx->shader_info->info.vs.output_usage_mask[i];
LLVMValueRef *out_ptr = &ctx->abi.outputs[i * 4];
int length = 4;
if (!(ctx->output_mask & (1ull << i)))
continue;
if (i == VARYING_SLOT_CLIP_DIST0)
length = util_last_bit(output_usage_mask);
int param = shader_io_get_unique_index(i);
LLVMValueRef dw_addr = LLVMBuildAdd(ctx->ac.builder, base_dw_addr,
LLVMConstInt(ctx->ac.i32, param * 4, false),
"");
for (unsigned j = 0; j < length; j++) {
for (unsigned j = 0; j < 4; j++) {
LLVMValueRef value = LLVMBuildLoad(ctx->ac.builder, out_ptr[j], "");
value = ac_to_integer(&ctx->ac, value);
value = LLVMBuildZExtOrBitCast(ctx->ac.builder, value, ctx->ac.i32, "");
......
......@@ -3088,13 +3088,17 @@ radv_pipeline_generate_geometry_shader(struct radeon_cmdbuf *ctx_cs,
radv_pipeline_generate_hw_vs(ctx_cs, cs, pipeline, pipeline->gs_copy_shader);
}
static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade)
static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade, bool float16)
{
uint32_t ps_input_cntl;
if (offset <= AC_EXP_PARAM_OFFSET_31) {
ps_input_cntl = S_028644_OFFSET(offset);
if (flat_shade)
ps_input_cntl |= S_028644_FLAT_SHADE(1);
if (float16) {
ps_input_cntl |= S_028644_FP16_INTERP_MODE(1) |
S_028644_ATTR0_VALID(1);
}
} else {
/* The input is a DEFAULT_VAL constant. */
assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 &&
......@@ -3119,7 +3123,7 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs,
if (ps->info.info.ps.prim_id_input) {
unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID];
if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true);
ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false);
++ps_offset;
}
}
......@@ -3129,9 +3133,9 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs,
ps->info.info.needs_multiview_view_index) {
unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_LAYER];
if (vs_offset != AC_EXP_PARAM_UNDEFINED)
ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true);
ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false);
else
ps_input_cntl[ps_offset] = offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true);
ps_input_cntl[ps_offset] = offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true, false);
++ps_offset;
}
......@@ -3147,14 +3151,14 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs,
vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0];
if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false);
ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false);
++ps_offset;
}
vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1];
if (vs_offset != AC_EXP_PARAM_UNDEFINED &&
ps->info.info.ps.num_input_clips_culls > 4) {
ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false);
ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false);
++ps_offset;
}
}
......@@ -3162,6 +3166,7 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs,
for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.fs.input_mask; ++i) {
unsigned vs_offset;
bool flat_shade;
bool float16;
if (!(ps->info.fs.input_mask & (1u << i)))
continue;
......@@ -3173,8 +3178,9 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs,
}
flat_shade = !!(ps->info.fs.flat_shaded_mask & (1u << ps_offset));
float16 = !!(ps->info.fs.float16_shaded_mask & (1u << ps_offset));
ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, flat_shade);
ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, flat_shade, float16);
++ps_offset;
}
......
......@@ -1465,6 +1465,7 @@ bool radv_format_pack_clear_color(VkFormat format,
bool radv_is_colorbuffer_format_supported(VkFormat format, bool *blendable);
bool radv_dcc_formats_compatible(VkFormat format1,
VkFormat format2);
bool radv_device_supports_etc(struct radv_physical_device *physical_device);
struct radv_fmask_info {
uint64_t offset;
......
......@@ -257,6 +257,7 @@ struct radv_shader_variant_info {
unsigned num_interp;
uint32_t input_mask;
uint32_t flat_shaded_mask;
uint32_t float16_shaded_mask;
bool can_discard;
bool early_fragment_test;
} fs;
......@@ -401,6 +402,8 @@ static inline unsigned shader_io_get_unique_index(gl_varying_slot slot)
return 1;
if (slot == VARYING_SLOT_CLIP_DIST0)
return 2;
if (slot == VARYING_SLOT_CLIP_DIST1)
return 3;
/* 3 is reserved for clip dist as well */
if (slot >= VARYING_SLOT_VAR0 && slot <= VARYING_SLOT_VAR31)
return 4 + (slot - VARYING_SLOT_VAR0);
......
......@@ -129,11 +129,9 @@ set_output_usage_mask(const nir_shader *nir, const nir_intrinsic_instr *instr,
get_deref_offset(deref_instr, &const_offset);
if (idx == VARYING_SLOT_CLIP_DIST0) {
/* Special case for clip/cull distances because there are
* combined into a single array that contains both.
*/
output_usage_mask[idx] |= 1 << const_offset;
if (var->data.compact) {
const_offset += comp;
output_usage_mask[idx + const_offset / 4] |= 1 << (const_offset % 4);
return;
}
......@@ -174,13 +172,9 @@ gather_intrinsic_store_deref_info(const nir_shader *nir,
type = glsl_get_array_element(var->type);
unsigned slots =
var->data.compact ? DIV_ROUND_UP(glsl_get_length(type), 4)
var->data.compact ? DIV_ROUND_UP(var->data.location_frac + glsl_get_length(type), 4)
: glsl_count_attribute_slots(type, false);
if (idx == VARYING_SLOT_CLIP_DIST0)
slots = (nir->info.clip_distance_array_size +
nir->info.cull_distance_array_size > 4) ? 2 : 1;
mark_tess_output(info, var->data.patch, param, slots);
break;
}
......@@ -374,7 +368,8 @@ gather_info_input_decl_ps(const nir_shader *nir, const nir_variable *var,
info->ps.layer_input = true;
break;
case VARYING_SLOT_CLIP_DIST0:
info->ps.num_input_clips_culls = attrib_count;
case VARYING_SLOT_CLIP_DIST1:
info->ps.num_input_clips_culls += attrib_count;
break;
default:
break;
......@@ -409,8 +404,8 @@ gather_info_output_decl_ls(const nir_shader *nir, const nir_variable *var,
int idx = var->data.location;
unsigned param = shader_io_get_unique_index(idx);
int num_slots = glsl_count_attribute_slots(var->type, false);
if (idx == VARYING_SLOT_CLIP_DIST0)
num_slots = (nir->info.clip_distance_array_size + nir->info.cull_distance_array_size > 4) ? 2 : 1;
if (var->data.compact)
num_slots = DIV_ROUND_UP(var->data.location_frac + glsl_get_length(var->type), 4);
mark_ls_output(info, param, num_slots);
}
......
......@@ -353,6 +353,12 @@ nir_visitor::visit(ir_variable *ir)
ir->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)) {
var->data.compact = ir->type->without_array()->is_scalar();
}
if (shader->info.stage > MESA_SHADER_VERTEX &&
ir->data.location >= VARYING_SLOT_CLIP_DIST0 &&
ir->data.location <= VARYING_SLOT_CULL_DIST1) {
var->data.compact = ir->type->without_array()->is_scalar();
}
}
break;
......@@ -363,6 +369,12 @@ nir_visitor::visit(ir_variable *ir)
ir->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)) {
var->data.compact = ir->type->without_array()->is_scalar();
}
if (shader->info.stage <= MESA_SHADER_GEOMETRY &&
ir->data.location >= VARYING_SLOT_CLIP_DIST0 &&
ir->data.location <= VARYING_SLOT_CULL_DIST1) {
var->data.compact = ir->type->without_array()->is_scalar();
}
break;
case ir_var_uniform:
......
......@@ -260,6 +260,22 @@ glsl_type::contains_double() const
}
}
bool
glsl_type::contains_64bit() const
{
if (this->is_array()) {
return this->fields.array->contains_64bit();
} else if (this->is_record() || this->is_interface()) {
for (unsigned int i = 0; i < this->length; i++) {
if (this->fields.structure[i].type->contains_64bit())
return true;
}
return false;
} else {
return this->is_64bit();
}
}
bool
glsl_type::contains_opaque() const {
switch (base_type) {
......
......@@ -544,6 +544,12 @@ public:
*/
bool contains_double() const;
/**
* Query whether or not type is a 64-bit type, or for struct, interface and
* array types, contains a double type.
*/
bool contains_64bit() const;
/**
* Query whether or not a type is a float type
*/
......
......@@ -33,7 +33,11 @@ add_var_xfb_outputs(nir_xfb_info *xfb,
unsigned *offset,
const struct glsl_type *type)
{
if (glsl_type_is_array(type) || glsl_type_is_matrix(type)) {
/* If this type contains a 64-bit value, align to 8 bytes */
if (glsl_type_contains_64bit(type))
*offset = ALIGN_POT(*offset, 8);
if (glsl_type_is_array_or_matrix(type) && !var->data.compact) {
unsigned length = glsl_get_length(type);
const struct glsl_type *child_type = glsl_get_array_element(type);
for (unsigned i = 0; i < length; i++)
......@@ -58,32 +62,43 @@ add_var_xfb_outputs(nir_xfb_info *xfb,
assert(var->data.stream < NIR_MAX_XFB_STREAMS);
xfb->streams_written |= (1 << var->data.stream);
unsigned comp_slots = glsl_get_component_slots(type);
unsigned comp_slots;
if (var->data.compact) {
/* This only happens for clip/cull which are float arrays */
assert(glsl_without_array(type) == glsl_float_type());
assert(var->data.location == VARYING_SLOT_CLIP_DIST0 ||
var->data.location == VARYING_SLOT_CLIP_DIST1);
comp_slots = glsl_get_length(type);
} else {
comp_slots = glsl_get_component_slots(type);
unsigned attrib_slots = DIV_ROUND_UP(comp_slots, 4);
assert(attrib_slots == glsl_count_attribute_slots(type, false));
/* Ensure that we don't have, for instance, a dvec2 with a location_frac
* of 2 which would make it crass a location boundary even though it
* fits in a single slot. However, you can have a dvec3 which crosses
* the slot boundary with a location_frac of 2.
/* Ensure that we don't have, for instance, a dvec2 with a
* location_frac of 2 which would make it crass a location boundary
* even though it fits in a single slot. However, you can have a
* dvec3 which crosses the slot boundary with a location_frac of 2.
*/
assert(DIV_ROUND_UP(var->data.location_frac + comp_slots, 4) == attrib_slots);
assert(DIV_ROUND_UP(var->data.location_frac + comp_slots, 4) ==
attrib_slots);
}
assert(var->data.location_frac + comp_slots <= 8);
uint8_t comp_mask = ((1 << comp_slots) - 1) << var->data.location_frac;
assert(attrib_slots <= 2);
for (unsigned s = 0; s < attrib_slots; s++) {
while (comp_mask) {
nir_xfb_output_info *output = &xfb->outputs[xfb->output_count++];
output->buffer = buffer;
output->offset = *offset + s * 16;
output->offset = *offset;
output->location = *location;
output->component_mask = (comp_mask >> (s * 4)) & 0xf;
output->component_mask = comp_mask & 0xf;
*offset += util_bitcount(output->component_mask) * 4;
(*location)++;
comp_mask >>= 4;
}
*offset += comp_slots * 4;
}
}
......