Skip to content
Commits on Source (96)
......@@ -36,3 +36,19 @@ cbd1ad6165f0aea7fb7c6fd1b36ad5317dd65cb7 st/mesa: require RGBA2, RGB4, and RGBA4
# stable The commit addresses functionality not present in branch
1b8983c25be19073c02fe9630e949be55f8280fa radv: fix using LOAD_CONTEXT_REG with old GFX ME firmwares on GFX8
# stable Explicit 19.0 only nominations, require work which did not land in branch
8ab95b849e66f3221d80a67eef2ec6e3730901a8 anv: Refactor descriptor pushing a bit
5c30fffeec1732c21d600c036f95f8cdb1bb5487 anv: Take references to push descriptor set layouts
# sha Commit references invalid sha - a offender did not land in branch.
85ee157283c667372baf7c03259cba08853f0067 gitlab-ci: autotools needs to be told which llvm version to use
# revert The commit was reverted shortly after it was applied on master
47fc359822494935852de1e70e4d840b2fe6a25c anv: release memory allocated by glsl types during spirv_to_nir
# stable Explicitly 19.0 nomination, seemingly a performance patch
5ef2b8f1f2ebcdb4ffe5c98b3f4f48e584cb4b22 nir: Add a pass for lowering IO back to vector when possible
# fixes Temporary block perf. fix depending on previous opt. pass
6d5d89d25a0a4299dbfcbfeca71b6c7e65ef3d45 intel/nir: Vectorize all IO
......@@ -35,7 +35,11 @@ def main():
args = parser.parse_args()
if os.path.isabs(args.libdir):
to = os.path.join(os.environ.get('DESTDIR', '/'), args.libdir[1:])
destdir = os.environ.get('DESTDIR')
if destdir:
to = os.path.join(destdir, args.libdir[1:])
else:
to = args.libdir
else:
to = os.path.join(os.environ['MESON_INSTALL_DESTDIR_PREFIX'], args.libdir)
......@@ -45,7 +49,6 @@ def main():
if os.path.lexists(to):
os.unlink(to)
os.makedirs(to)
shutil.copy(args.megadriver, master)
for driver in args.drivers:
abs_driver = os.path.join(to, driver)
......
......@@ -31,7 +31,8 @@ Compatibility contexts may report a lower version depending on each driver.
<h2>SHA256 checksums</h2>
<pre>
TBD
e22e6fe4c3aca80fe872a0a7285b6c5523e0cfc0bfb57ffcc3b3d66d292593e4 mesa-18.3.4.tar.gz
32314da4365d37f80d84f599bd9625b00161c273c39600ba63b45002d500bb07 mesa-18.3.4.tar.xz
</pre>
......
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html lang="en">
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<title>Mesa Release Notes</title>
<link rel="stylesheet" type="text/css" href="../mesa.css">
</head>
<body>
<div class="header">
<h1>The Mesa 3D Graphics Library</h1>
</div>
<iframe src="../contents.html"></iframe>
<div class="content">
<h1>Mesa 18.3.5 Release Notes / March 18, 2019</h1>
<p>
Mesa 18.3.5 is a bug fix release which fixes bugs found since the 18.3.4 release.
</p>
<p>
Mesa 18.3.5 implements the OpenGL 4.5 API, but the version reported by
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
Some drivers don't support all the features required in OpenGL 4.5. OpenGL
4.5 is <strong>only</strong> available if requested at context creation.
Compatibility contexts may report a lower version depending on each driver.
</p>
<h2>SHA256 checksums</h2>
<pre>
5f40a336cb2af9b1d66fa243bb03c2c8a3f9b3f067aab6aaaad4316d1bc0e58b mesa-18.3.5.tar.gz
4027aea82cc63240b3fcf60eec9eea882955f098c989b29357b01d1695747953 mesa-18.3.5.tar.xz
</pre>
<h2>New features</h2>
<p>None</p>
<h2>Bug fixes</h2>
<ul>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104297">Bug 104297</a> - [i965] Downward causes GPU hangs and misrendering on Haswell</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104602">Bug 104602</a> - [apitrace] Graphical artifacts in Civilization VI on RX Vega</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107052">Bug 107052</a> - [Regression][bisected]. Crookz - The Big Heist Demo can't be launched despite the &quot;true&quot; flag in &quot;drirc&quot;</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107563">Bug 107563</a> - [RADV] Broken rendering in Unity demos</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108457">Bug 108457</a> - [OpenGL CTS] KHR-GL46.tessellation_shader.single.xfb_captures_data_from_correct_stage fails</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108999">Bug 108999</a> - Calculating the scissors fields when the y is flipped (0 on top) can generate negative numbers that will cause assertion failure later on.</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109328">Bug 109328</a> - [BSW BXT GLK] dEQP-VK.subgroups.arithmetic.subgroup regressions</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109443">Bug 109443</a> - Build failure with MSVC when using Scons &gt;= 3.0.2</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109451">Bug 109451</a> - [IVB,SNB] LINE_STRIPs following a TRIANGLE_FAN fail to use primitive restart</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109594">Bug 109594</a> - totem assert failure: totem: src/intel/genxml/gen9_pack.h:72: __gen_uint: La declaración `v &lt;= max' no se cumple.</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109597">Bug 109597</a> - wreckfest issues with transparent objects &amp; skybox</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109601">Bug 109601</a> - [Regression] RuneLite GPU rendering broken on 18.3.x</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109698">Bug 109698</a> - dri.pc contents invalid when built with meson</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109735">Bug 109735</a> - [Regression] broken font with mesa_vulkan_overlay</li>
</ul>
<h2>Changes</h2>
<p>Alok Hota (1):</p>
<ul>
<li>swr/rast: bypass size limit for non-sampled textures</li>
</ul>
<p>Andrii Simiklit (1):</p>
<ul>
<li>i965: re-emit index buffer state on a reset option change.</li>
</ul>
<p>Axel Davy (2):</p>
<ul>
<li>st/nine: Ignore window size if error</li>
<li>st/nine: Ignore multisample quality level if no ms</li>
</ul>
<p>Bas Nieuwenhuizen (4):</p>
<ul>
<li>radv: Sync ETC2 whitelisted devices.</li>
<li>radv: Fix float16 interpolation set up.</li>
<li>radv: Allow interpolation on non-float types.</li>
<li>radv: Interpolate less aggressively.</li>
</ul>
<p>Carlos Garnacho (1):</p>
<ul>
<li>wayland/egl: Ensure EGL surface is resized on DRI update_buffers()</li>
</ul>
<p>Danylo Piliaiev (1):</p>
<ul>
<li>glsl/linker: Fix unmatched TCS outputs being reduced to local variable</li>
</ul>
<p>David Shao (1):</p>
<ul>
<li>meson: ensure that xmlpool_options.h is generated for gallium targets that need it</li>
</ul>
<p>Eleni Maria Stea (1):</p>
<ul>
<li>i965: fixed clamping in set_scissor_bits when the y is flipped</li>
</ul>
<p>Emil Velikov (7):</p>
<ul>
<li>docs: add sha256 checksums for 18.3.4</li>
<li>meson: egl: correctly manage loader/xmlconfig</li>
<li>cherry-ignore: add 19.0 only anv/push buffer nominations</li>
<li>cherry-ignore: add gitlab-ci fixup commit</li>
<li>cherry-ignore: ignore glsl_types memory cleanup patch</li>
<li>cherry-ignore: add explicit 19.0 performance optimisations</li>
<li>Update version to 18.3.5</li>
</ul>
<p>Eric Engestrom (1):</p>
<ul>
<li>egl: fix libdrm-less builds</li>
</ul>
<p>Francisco Jerez (1):</p>
<ul>
<li>intel/fs: Implement extended strides greater than 4 for IR source regions.</li>
</ul>
<p>Ian Romanick (2):</p>
<ul>
<li>intel/fs: nir_op_extract_i8 extracts a byte, not a word</li>
<li>intel/fs: Fix extract_u8 of an odd byte from a 64-bit integer</li>
</ul>
<p>Ilia Mirkin (1):</p>
<ul>
<li>glsl: fix recording of variables for XFB in TCS shaders</li>
</ul>
<p>Jason Ekstrand (10):</p>
<ul>
<li>intel/fs: Bail in optimize_extract_to_float if we have modifiers</li>
<li>compiler/types: Add a contains_64bit helper</li>
<li>nir/xfb: Properly align 64-bit values</li>
<li>nir/xfb: Work in terms of components rather than slots</li>
<li>nir/xfb: Handle compact arrays in gather_xfb_info</li>
<li>anv: Count surfaces for non-YCbCr images in GetDescriptorSetLayoutSupport</li>
<li>spirv: OpImageQueryLod requires a sampler</li>
<li>spirv: Pull offset/stride from the pointer for OpArrayLength</li>
<li>glsl/list: Add a list variant of insert_after</li>
<li>glsl/lower_vector_derefs: Don't use a temporary for TCS outputs</li>
</ul>
<p>Jose Maria Casanova Crespo (1):</p>
<ul>
<li>glsl: TCS outputs can not be transform feedback candidates on GLES</li>
</ul>
<p>José Fonseca (1):</p>
<ul>
<li>scons: Workaround failures with MSVC when using SCons 3.0.[2-4].</li>
</ul>
<p>Juan A. Suarez Romero (3):</p>
<ul>
<li>genxml: add missing field values for 3DSTATE_SF</li>
<li>anv: advertise 8 subpixel precision bits</li>
<li>anv: destroy descriptor sets when pool gets reset</li>
</ul>
<p>Kenneth Graunke (1):</p>
<ul>
<li>intel/fs: Fix opt_peephole_csel to not throw away saturates.</li>
</ul>
<p>Kevin Strasser (1):</p>
<ul>
<li>egl/dri: Avoid out of bounds array access</li>
</ul>
<p>Lionel Landwerlin (1):</p>
<ul>
<li>intel: fix urb size for CFL GT1</li>
</ul>
<p>Marek Olšák (5):</p>
<ul>
<li>radeonsi: add driconf option radeonsi_enable_nir</li>
<li>radeonsi: always enable NIR for Civilization 6 to fix corruption</li>
<li>driconf: add Civ6Sub executable for Civilization 6</li>
<li>tgsi: don't set tgsi_info::uses_bindless_images for constbufs and hw atomics</li>
<li>radeonsi: compile clear and copy buffer compute shaders on demand</li>
</ul>
<p>Mauro Rossi (2):</p>
<ul>
<li>android: anv: fix generated files depedencies (v2)</li>
<li>android: anv: fix libexpat shared dependency</li>
</ul>
<p>Ray Zhang (1):</p>
<ul>
<li>glx: fix shared memory leak in X11</li>
</ul>
<p>Rhys Perry (2):</p>
<ul>
<li>radv: bitcast 16-bit outputs to integers</li>
<li>radv: ensure export arguments are always float</li>
</ul>
<p>Samuel Pitoiset (8):</p>
<ul>
<li>radv: write the alpha channel of MRT0 when alpha coverage is enabled</li>
<li>radv: fix writing the alpha channel of MRT0 when alpha coverage is enabled</li>
<li>radv: fix clearing attachments in secondary command buffers</li>
<li>radv: fix out-of-bounds access when copying descriptors BO list</li>
<li>radv: don't copy buffer descriptors list for samplers</li>
<li>radv: properly align the fence and EOP bug VA on GFX9</li>
<li>radv: fix pointSizeRange limits</li>
<li>radv: always initialize HTILE when the src layout is UNDEFINED</li>
</ul>
<p>Sergii Romantsov (2):</p>
<ul>
<li>dri: meson: do not prefix user provided dri-drivers-path</li>
<li>d3d: meson: do not prefix user provided d3d-drivers-path</li>
</ul>
<p>Tapani Pälli (3):</p>
<ul>
<li>nir: initialize value in copy_prop_vars_block</li>
<li>anv: retain the is_array state in create_plane_tex_instr_implicit</li>
<li>anv: destroy descriptor sets when pool gets destroyed</li>
</ul>
<p>Timothy Arceri (1):</p>
<ul>
<li>glsl: fix shader cache for packed param list</li>
</ul>
<p>Yevhenii Kolesnikov (1):</p>
<ul>
<li>i965: Fix allow_higher_compat_version workaround limited by OpenGL 3.0</li>
</ul>
<p>pal1000 (1):</p>
<ul>
<li>scons: Compatibility with Scons development version string</li>
</ul>
</div>
</body>
</html>
......@@ -64,11 +64,11 @@ endif
dri_drivers_path = get_option('dri-drivers-path')
if dri_drivers_path == ''
dri_drivers_path = join_paths(get_option('libdir'), 'dri')
dri_drivers_path = join_paths(get_option('prefix'), get_option('libdir'), 'dri')
endif
dri_search_path = get_option('dri-search-path')
if dri_search_path == ''
dri_search_path = join_paths(get_option('prefix'), dri_drivers_path)
dri_search_path = dri_drivers_path
endif
with_gles1 = get_option('gles1')
......@@ -615,7 +615,7 @@ with_gallium_xa = _xa != 'false'
d3d_drivers_path = get_option('d3d-drivers-path')
if d3d_drivers_path == ''
d3d_drivers_path = join_paths(get_option('libdir'), 'd3d')
d3d_drivers_path = join_paths(get_option('prefix'), get_option('libdir'), 'd3d')
endif
with_gallium_st_nine = get_option('gallium-nine')
......
......@@ -48,7 +48,12 @@ import source_list
# a path directly. We want to support both, so we need to detect the SCons version,
# for which no API is provided by SCons 8-P
scons_version = tuple(map(int, SCons.__version__.split('.')))
# Scons version string has consistently been in this format:
# MajorVersion.MinorVersion.Patch[.alpha/beta.yyyymmdd]
# so this formula should cover all versions regardless of type
# stable, alpha or beta.
# For simplicity alpha and beta flags are removed.
scons_version = tuple(map(int, SCons.__version__.split('.')[:3]))
def quietCommandLines(env):
# Quiet command lines
......
......@@ -308,6 +308,19 @@ def generate(env):
if env.GetOption('num_jobs') <= 1:
env.SetOption('num_jobs', num_jobs())
# Speed up dependency checking. See
# - https://github.com/SCons/scons/wiki/GoFastButton
# - https://bugs.freedesktop.org/show_bug.cgi?id=109443
# Scons version string has consistently been in this format:
# MajorVersion.MinorVersion.Patch[.alpha/beta.yyyymmdd]
# so this formula should cover all versions regardless of type
# stable, alpha or beta.
# For simplicity alpha and beta flags are removed.
scons_version = distutils.version.StrictVersion('.'.join(SCons.__version__.split('.')[:3]))
if scons_version < distutils.version.StrictVersion('3.0.2') or \
scons_version > distutils.version.StrictVersion('3.0.4'):
env.Decider('MD5-timestamp')
env.SetOption('max_drift', 60)
......
......@@ -896,6 +896,37 @@ ac_build_fs_interp(struct ac_llvm_context *ctx,
ctx->f32, args, 5, AC_FUNC_ATTR_READNONE);
}
LLVMValueRef
ac_build_fs_interp_f16(struct ac_llvm_context *ctx,
LLVMValueRef llvm_chan,
LLVMValueRef attr_number,
LLVMValueRef params,
LLVMValueRef i,
LLVMValueRef j)
{
LLVMValueRef args[6];
LLVMValueRef p1;
args[0] = i;
args[1] = llvm_chan;
args[2] = attr_number;
args[3] = ctx->i1false;
args[4] = params;
p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1.f16",
ctx->f32, args, 5, AC_FUNC_ATTR_READNONE);
args[0] = p1;
args[1] = j;
args[2] = llvm_chan;
args[3] = attr_number;
args[4] = ctx->i1false;
args[5] = params;
return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2.f16",
ctx->f16, args, 6, AC_FUNC_ATTR_READNONE);
}
LLVMValueRef
ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
LLVMValueRef parameter,
......
......@@ -217,6 +217,14 @@ ac_build_fs_interp(struct ac_llvm_context *ctx,
LLVMValueRef i,
LLVMValueRef j);
LLVMValueRef
ac_build_fs_interp_f16(struct ac_llvm_context *ctx,
LLVMValueRef llvm_chan,
LLVMValueRef attr_number,
LLVMValueRef params,
LLVMValueRef i,
LLVMValueRef j);
LLVMValueRef
ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
LLVMValueRef parameter,
......
......@@ -3032,7 +3032,8 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
ctx->abi->frag_pos[2],
ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, ctx->abi->frag_pos[3])
};
result = ac_build_gather_values(&ctx->ac, values, 4);
result = ac_to_integer(&ctx->ac,
ac_build_gather_values(&ctx->ac, values, 4));
break;
}
case nir_intrinsic_load_front_face:
......
......@@ -338,13 +338,13 @@ radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
unsigned eop_bug_offset;
void *fence_ptr;
radv_cmd_buffer_upload_alloc(cmd_buffer, 8, 0,
radv_cmd_buffer_upload_alloc(cmd_buffer, 8, 8,
&cmd_buffer->gfx9_fence_offset,
&fence_ptr);
cmd_buffer->gfx9_fence_bo = cmd_buffer->upload.upload_bo;
/* Allocate a buffer for the EOP bug on GFX9. */
radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, 0,
radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, 8,
&eop_bug_offset, &fence_ptr);
cmd_buffer->gfx9_eop_bug_va =
radv_buffer_get_va(cmd_buffer->upload.upload_bo);
......@@ -414,6 +414,8 @@ radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer,
unsigned *out_offset,
void **ptr)
{
assert(util_is_power_of_two_nonzero(alignment));
uint64_t offset = align(cmd_buffer->upload.offset, alignment);
if (offset + size > cmd_buffer->upload.size) {
if (!radv_cmd_buffer_resize_upload_buf(cmd_buffer, size))
......@@ -4243,10 +4245,15 @@ static void radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffe
if (!radv_image_has_htile(image))
return;
if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED &&
radv_layout_has_htile(image, dst_layout, dst_queue_mask)) {
/* TODO: merge with the clear if applicable */
radv_initialize_htile(cmd_buffer, image, range, 0);
if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
uint32_t clear_value = vk_format_is_stencil(image->vk_format) ? 0xfffff30f : 0xfffc000f;
if (radv_layout_is_htile_compressed(image, dst_layout,
dst_queue_mask)) {
clear_value = 0;
}
radv_initialize_htile(cmd_buffer, image, range, clear_value);
} else if (!radv_layout_is_htile_compressed(image, src_layout, src_queue_mask) &&
radv_layout_is_htile_compressed(image, dst_layout, dst_queue_mask)) {
uint32_t clear_value = vk_format_is_stencil(image->vk_format) ? 0xfffff30f : 0xfffc000f;
......@@ -4709,7 +4716,7 @@ void radv_CmdBindTransformFeedbackBuffersEXT(
enabled_mask |= 1 << idx;
}
cmd_buffer->state.streamout.enabled_mask = enabled_mask;
cmd_buffer->state.streamout.enabled_mask |= enabled_mask;
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_BUFFER;
}
......
......@@ -965,9 +965,11 @@ void radv_update_descriptor_sets(
}
src_ptr += src_binding_layout->size / 4;
dst_ptr += dst_binding_layout->size / 4;
if (src_binding_layout->type != VK_DESCRIPTOR_TYPE_SAMPLER) {
/* Sampler descriptors don't have a buffer list. */
dst_buffer_list[j] = src_buffer_list[j];
++src_buffer_list;
++dst_buffer_list;
}
}
}
}
......
......@@ -329,7 +329,7 @@ radv_physical_device_init(struct radv_physical_device *device,
device->rad_info.chip_class > GFX9)
fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
radv_get_driver_uuid(&device->device_uuid);
radv_get_driver_uuid(&device->driver_uuid);
radv_get_device_uuid(&device->rad_info, &device->device_uuid);
if (device->rad_info.family == CHIP_STONEY ||
......@@ -726,8 +726,7 @@ void radv_GetPhysicalDeviceFeatures(
.alphaToOne = true,
.multiViewport = true,
.samplerAnisotropy = true,
.textureCompressionETC2 = pdevice->rad_info.chip_class >= GFX9 ||
pdevice->rad_info.family == CHIP_STONEY,
.textureCompressionETC2 = radv_device_supports_etc(pdevice),
.textureCompressionASTC_LDR = false,
.textureCompressionBC = true,
.occlusionQueryPrecise = true,
......@@ -794,7 +793,7 @@ void radv_GetPhysicalDeviceFeatures2(
features->storageBuffer16BitAccess = enabled;
features->uniformAndStorageBuffer16BitAccess = enabled;
features->storagePushConstant16 = enabled;
features->storageInputOutput16 = enabled;
features->storageInputOutput16 = enabled && HAVE_LLVM >= 0x900;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: {
......@@ -978,7 +977,7 @@ void radv_GetPhysicalDeviceProperties(
.maxCullDistances = 8,
.maxCombinedClipAndCullDistances = 8,
.discreteQueuePriorities = 2,
.pointSizeRange = { 0.125, 255.875 },
.pointSizeRange = { 0.0, 8192.0 },
.lineWidthRange = { 0.0, 7.9921875 },
.pointSizeGranularity = (1.0 / 8.0),
.lineWidthGranularity = (1.0 / 128.0),
......
......@@ -595,6 +595,14 @@ static bool radv_is_filter_minmax_format_supported(VkFormat format)
}
}
bool
radv_device_supports_etc(struct radv_physical_device *physical_device)
{
return physical_device->rad_info.family == CHIP_VEGA10 ||
physical_device->rad_info.family == CHIP_RAVEN ||
physical_device->rad_info.family == CHIP_STONEY;
}
static void
radv_physical_device_get_format_properties(struct radv_physical_device *physical_device,
VkFormat format,
......@@ -612,9 +620,7 @@ radv_physical_device_get_format_properties(struct radv_physical_device *physical
}
if (desc->layout == VK_FORMAT_LAYOUT_ETC &&
physical_device->rad_info.family != CHIP_VEGA10 &&
physical_device->rad_info.family != CHIP_RAVEN &&
physical_device->rad_info.family != CHIP_STONEY) {
!radv_device_supports_etc(physical_device)) {
out_properties->linearTilingFeatures = linear;
out_properties->optimalTilingFeatures = tiled;
out_properties->bufferFeatures = buffer;
......
......@@ -352,14 +352,29 @@ emit_color_clear(struct radv_cmd_buffer *cmd_buffer,
const struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
const uint32_t subpass_att = clear_att->colorAttachment;
const uint32_t pass_att = subpass->color_attachments[subpass_att].attachment;
const struct radv_image_view *iview = fb->attachments[pass_att].attachment;
const uint32_t samples = iview->image->info.samples;
const uint32_t samples_log2 = ffs(samples) - 1;
unsigned fs_key = radv_format_meta_fs_key(iview->vk_format);
const struct radv_image_view *iview = fb ? fb->attachments[pass_att].attachment : NULL;
uint32_t samples, samples_log2;
VkFormat format;
unsigned fs_key;
VkClearColorValue clear_value = clear_att->clearValue.color;
VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
VkPipeline pipeline;
/* When a framebuffer is bound to the current command buffer, get the
* number of samples from it. Otherwise, get the number of samples from
* the render pass because it's likely a secondary command buffer.
*/
if (iview) {
samples = iview->image->info.samples;
format = iview->vk_format;
} else {
samples = cmd_buffer->state.pass->attachments[pass_att].samples;
format = cmd_buffer->state.pass->attachments[pass_att].format;
}
samples_log2 = ffs(samples) - 1;
fs_key = radv_format_meta_fs_key(format);
if (fs_key == -1) {
radv_finishme("color clears incomplete");
return;
......@@ -599,6 +614,9 @@ static bool depth_view_can_fast_clear(struct radv_cmd_buffer *cmd_buffer,
const VkClearRect *clear_rect,
VkClearDepthStencilValue clear_value)
{
if (!iview)
return false;
uint32_t queue_mask = radv_image_queue_family_mask(iview->image,
cmd_buffer->queue_family_index,
cmd_buffer->queue_family_index);
......@@ -615,7 +633,7 @@ static bool depth_view_can_fast_clear(struct radv_cmd_buffer *cmd_buffer,
iview->base_mip == 0 &&
iview->base_layer == 0 &&
radv_layout_is_htile_compressed(iview->image, layout, queue_mask) &&
!radv_image_extent_compare(iview->image, &iview->extent))
radv_image_extent_compare(iview->image, &iview->extent))
return true;
return false;
}
......@@ -686,11 +704,22 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer,
const uint32_t pass_att = subpass->depth_stencil_attachment.attachment;
VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
VkImageAspectFlags aspects = clear_att->aspectMask;
const struct radv_image_view *iview = fb->attachments[pass_att].attachment;
const uint32_t samples = iview->image->info.samples;
const uint32_t samples_log2 = ffs(samples) - 1;
const struct radv_image_view *iview = fb ? fb->attachments[pass_att].attachment : NULL;
uint32_t samples, samples_log2;
VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
/* When a framebuffer is bound to the current command buffer, get the
* number of samples from it. Otherwise, get the number of samples from
* the render pass because it's likely a secondary command buffer.
*/
if (iview) {
samples = iview->image->info.samples;
} else {
samples = cmd_buffer->state.pass->attachments[pass_att].samples;
}
samples_log2 = ffs(samples) - 1;
assert(pass_att != VK_ATTACHMENT_UNUSED);
if (!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
......@@ -757,11 +786,14 @@ emit_fast_htile_clear(struct radv_cmd_buffer *cmd_buffer,
const uint32_t pass_att = subpass->depth_stencil_attachment.attachment;
VkImageLayout image_layout = subpass->depth_stencil_attachment.layout;
const struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
const struct radv_image_view *iview = fb->attachments[pass_att].attachment;
const struct radv_image_view *iview = fb ? fb->attachments[pass_att].attachment : NULL;
VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
VkImageAspectFlags aspects = clear_att->aspectMask;
uint32_t clear_word, flush_bits;
if (!iview)
return false;
if (!radv_image_has_htile(iview->image))
return false;
......@@ -1059,12 +1091,15 @@ emit_fast_color_clear(struct radv_cmd_buffer *cmd_buffer,
const uint32_t pass_att = subpass->color_attachments[subpass_att].attachment;
VkImageLayout image_layout = subpass->color_attachments[subpass_att].layout;
const struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
const struct radv_image_view *iview = fb->attachments[pass_att].attachment;
const struct radv_image_view *iview = fb ? fb->attachments[pass_att].attachment : NULL;
VkClearColorValue clear_value = clear_att->clearValue.color;
uint32_t clear_color[2], flush_bits = 0;
uint32_t cmask_clear_value;
bool ret;
if (!iview)
return false;
if (!radv_image_has_cmask(iview->image) && !radv_image_has_dcc(iview->image))
return false;
......
......@@ -94,6 +94,7 @@ struct radv_shader_context {
gl_shader_stage stage;
LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
uint64_t float16_shaded_mask;
uint64_t input_mask;
uint64_t output_mask;
......@@ -2097,6 +2098,7 @@ static void interp_fs_input(struct radv_shader_context *ctx,
unsigned attr,
LLVMValueRef interp_param,
LLVMValueRef prim_mask,
bool float16,
LLVMValueRef result[4])
{
LLVMValueRef attr_number;
......@@ -2129,7 +2131,12 @@ static void interp_fs_input(struct radv_shader_context *ctx,
for (chan = 0; chan < 4; chan++) {
LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
if (interp) {
if (interp && float16) {
result[chan] = ac_build_fs_interp_f16(&ctx->ac,
llvm_chan,
attr_number,
prim_mask, i, j);
} else if (interp) {
result[chan] = ac_build_fs_interp(&ctx->ac,
llvm_chan,
attr_number,
......@@ -2141,7 +2148,30 @@ static void interp_fs_input(struct radv_shader_context *ctx,
attr_number,
prim_mask);
result[chan] = LLVMBuildBitCast(ctx->ac.builder, result[chan], ctx->ac.i32, "");
result[chan] = LLVMBuildTruncOrBitCast(ctx->ac.builder, result[chan], LLVMTypeOf(interp_param), "");
result[chan] = LLVMBuildTruncOrBitCast(ctx->ac.builder, result[chan], float16 ? ctx->ac.i16 : ctx->ac.i32, "");
}
}
}
static void mark_16bit_fs_input(struct radv_shader_context *ctx,
const struct glsl_type *type,
int location)
{
if (glsl_type_is_scalar(type) || glsl_type_is_vector(type) || glsl_type_is_matrix(type)) {
unsigned attrib_count = glsl_count_attribute_slots(type, false);
if (glsl_type_is_16bit(type)) {
ctx->float16_shaded_mask |= ((1ull << attrib_count) - 1) << location;
}
} else if (glsl_type_is_array(type)) {
unsigned stride = glsl_count_attribute_slots(glsl_get_array_element(type), false);
for (unsigned i = 0; i < glsl_get_length(type); ++i) {
mark_16bit_fs_input(ctx, glsl_get_array_element(type), location + i * stride);
}
} else {
assert(glsl_type_is_struct(type));
for (unsigned i = 0; i < glsl_get_length(type); i++) {
mark_16bit_fs_input(ctx, glsl_get_struct_field(type, i), location);
location += glsl_count_attribute_slots(glsl_get_struct_field(type, i), false);
}
}
}
......@@ -2156,9 +2186,15 @@ handle_fs_input_decl(struct radv_shader_context *ctx,
uint64_t mask;
variable->data.driver_location = idx * 4;
if (!variable->data.compact)
mark_16bit_fs_input(ctx, variable->type, idx);
mask = ((1ull << attrib_count) - 1) << variable->data.location;
if (glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT) {
if (glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT ||
glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT16 ||
glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_STRUCT) {
unsigned interp_type;
if (variable->data.sample)
interp_type = INTERP_SAMPLE;
......@@ -2169,10 +2205,8 @@ handle_fs_input_decl(struct radv_shader_context *ctx,
interp = lookup_interp_param(&ctx->abi, variable->data.interpolation, interp_type);
}
bool is_16bit = glsl_type_is_16bit(glsl_without_array(variable->type));
LLVMTypeRef type = is_16bit ? ctx->ac.i16 : ctx->ac.i32;
if (interp == NULL)
interp = LLVMGetUndef(type);
interp = LLVMGetUndef(ctx->ac.i32);
for (unsigned i = 0; i < attrib_count; ++i)
ctx->inputs[ac_llvm_reg_index_soa(idx + i, 0)] = interp;
......@@ -2246,11 +2280,14 @@ handle_fs_inputs(struct radv_shader_context *ctx,
if (i >= VARYING_SLOT_VAR0 || i == VARYING_SLOT_PNTC ||
i == VARYING_SLOT_PRIMITIVE_ID || i == VARYING_SLOT_LAYER) {
interp_param = *inputs;
interp_fs_input(ctx, index, interp_param, ctx->abi.prim_mask,
bool float16 = (ctx->float16_shaded_mask >> i) & 1;
interp_fs_input(ctx, index, interp_param, ctx->abi.prim_mask, float16,
inputs);
if (LLVMIsUndef(interp_param))
ctx->shader_info->fs.flat_shaded_mask |= 1u << index;
if (float16)
ctx->shader_info->fs.float16_shaded_mask |= 1u << index;
if (i >= VARYING_SLOT_VAR0)
ctx->abi.fs_input_attr_indices[i - VARYING_SLOT_VAR0] = index;
++index;
......@@ -2262,7 +2299,7 @@ handle_fs_inputs(struct radv_shader_context *ctx,
interp_param = *inputs;
interp_fs_input(ctx, index, interp_param,
ctx->abi.prim_mask, inputs);
ctx->abi.prim_mask, false, inputs);
++index;
}
} else if (i == VARYING_SLOT_POS) {
......@@ -2411,7 +2448,7 @@ si_llvm_init_export_args(struct radv_shader_context *ctx,
if (is_16bit) {
for (unsigned chan = 0; chan < 4; chan++)
values[chan] = LLVMBuildZExt(ctx->ac.builder,
values[chan],
ac_to_integer(&ctx->ac, values[chan]),
ctx->ac.i32, "");
}
break;
......@@ -2422,7 +2459,7 @@ si_llvm_init_export_args(struct radv_shader_context *ctx,
if (is_16bit) {
for (unsigned chan = 0; chan < 4; chan++)
values[chan] = LLVMBuildSExt(ctx->ac.builder,
values[chan],
ac_to_integer(&ctx->ac, values[chan]),
ctx->ac.i32, "");
}
break;
......@@ -2475,13 +2512,9 @@ si_llvm_init_export_args(struct radv_shader_context *ctx,
} else
memcpy(&args->out[0], values, sizeof(values[0]) * 4);
for (unsigned i = 0; i < 4; ++i) {
if (!(args->enabled_channels & (1 << i)))
continue;
for (unsigned i = 0; i < 4; ++i)
args->out[i] = ac_to_float(&ctx->ac, args->out[i]);
}
}
static void
radv_export_param(struct radv_shader_context *ctx, unsigned index,
......
......@@ -524,6 +524,14 @@ radv_pipeline_compute_spi_color_formats(struct radv_pipeline *pipeline,
col_format |= cf << (4 * i);
}
if (!col_format && blend->need_src_alpha & (1 << 0)) {
/* When a subpass doesn't have any color attachments, write the
* alpha channel of MRT0 when alpha coverage is enabled because
* the depth attachment needs it.
*/
col_format |= V_028714_SPI_SHADER_32_ABGR;
}
/* If the i-th target format is set, all previous target formats must
* be non-zero to avoid hangs.
*/
......@@ -688,6 +696,7 @@ radv_pipeline_init_blend_state(struct radv_pipeline *pipeline,
if (vkms && vkms->alphaToCoverageEnable) {
blend.db_alpha_to_mask |= S_028B70_ALPHA_TO_MASK_ENABLE(1);
blend.need_src_alpha |= 0x1;
}
blend.cb_target_mask = 0;
......@@ -3066,13 +3075,17 @@ radv_pipeline_generate_geometry_shader(struct radeon_cmdbuf *cs,
radv_pipeline_generate_hw_vs(cs, pipeline, pipeline->gs_copy_shader);
}
static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade)
static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade, bool float16)
{
uint32_t ps_input_cntl;
if (offset <= AC_EXP_PARAM_OFFSET_31) {
ps_input_cntl = S_028644_OFFSET(offset);
if (flat_shade)
ps_input_cntl |= S_028644_FLAT_SHADE(1);
if (float16) {
ps_input_cntl |= S_028644_FP16_INTERP_MODE(1) |
S_028644_ATTR0_VALID(1);
}
} else {
/* The input is a DEFAULT_VAL constant. */
assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 &&
......@@ -3097,7 +3110,7 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *cs,
if (ps->info.info.ps.prim_id_input) {
unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID];
if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true);
ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false);
++ps_offset;
}
}
......@@ -3107,9 +3120,9 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *cs,
ps->info.info.needs_multiview_view_index) {
unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_LAYER];
if (vs_offset != AC_EXP_PARAM_UNDEFINED)
ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true);
ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false);
else
ps_input_cntl[ps_offset] = offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true);
ps_input_cntl[ps_offset] = offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true, false);
++ps_offset;
}
......@@ -3125,14 +3138,14 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *cs,
vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0];
if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false);
ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false);
++ps_offset;
}
vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1];
if (vs_offset != AC_EXP_PARAM_UNDEFINED &&
ps->info.info.ps.num_input_clips_culls > 4) {
ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false);
ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false);
++ps_offset;
}
}
......@@ -3140,6 +3153,7 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *cs,
for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.fs.input_mask; ++i) {
unsigned vs_offset;
bool flat_shade;
bool float16;
if (!(ps->info.fs.input_mask & (1u << i)))
continue;
......@@ -3151,8 +3165,9 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *cs,
}
flat_shade = !!(ps->info.fs.flat_shaded_mask & (1u << ps_offset));
float16 = !!(ps->info.fs.float16_shaded_mask & (1u << ps_offset));
ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, flat_shade);
ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, flat_shade, float16);
++ps_offset;
}
......
......@@ -1447,6 +1447,7 @@ bool radv_format_pack_clear_color(VkFormat format,
bool radv_is_colorbuffer_format_supported(VkFormat format, bool *blendable);
bool radv_dcc_formats_compatible(VkFormat format1,
VkFormat format2);
bool radv_device_supports_etc(struct radv_physical_device *physical_device);
struct radv_fmask_info {
uint64_t offset;
......
......@@ -258,6 +258,7 @@ struct radv_shader_variant_info {
unsigned num_interp;
uint32_t input_mask;
uint32_t flat_shaded_mask;
uint32_t float16_shaded_mask;
bool can_discard;
bool early_fragment_test;
} fs;
......