Emil Velikov · Juan A. Suarez Romero · Lucas Stach · Alex Smith · Tim Rowley · Alex Smith
--- a/VERSION
+++ b/VERSION
-17.3.2
+17.3.3
--- a/bin/.cherry-ignore
+++ b/bin/.cherry-ignore
@@ -3,3 +3,22 @@ ab0809e5529725bd0af6f7b6ce06415020b9d32e meson: fix strtof locale support check

 # fixes:  The commit addresses Meson which is explicitly disabled for 17.3
 44fbbd6fd07e5784b05e08e762e54b6c71f95ab1 util: add mesa-sha1 test to meson
+
+# stable: The commit is causing a regression
+#         (https://bugs.freedesktop.org/show_bug.cgi?id=103626)
+18fde36ced4279f2577097a1a7d31b55f2f5f141 intel/fs: Use the original destination region for int MUL lowering
+
+# stable: The commit addresses earlier commit 6132992cdb which did not land in
+#         branch
+3d2b157e23c9d66df97d59be6efd1098878cc110 i965/fs: Use UW types when using V immediates
+
+# extra: The commit just references a fix for an additional change in its v2.
+c1ff99fd70cd2ceb2cac4723e4fd5efc93834746 main: Clear shader program data whenever ProgramBinary is called
+
+# fixes: The commit addresses earlier commits 40a01c9a0ef and 8d745abc009 which
+#        did not land in branch
+9b0223046668593deb9c0be0b557994bb5218788 egl: pass the dri2_dpy to the $plat_teardown functions
+
+# fixes: The commit addresses earlier commit d50937f137 which did not land in
+#        branch
+78a8b73e7d45f55ced98a148b26247d91f4e0171 vulkan/wsi: free cmd pools
--- a/debian/changelog
+++ b/debian/changelog
+mesa (17.3.3-1) unstable; urgency=medium
+
+  * New upstream release.
+  * Bump debhelper compat to 10.
+  * Bump standards version to 4.1.3.
+
+ -- Andreas Boll <andreas.boll.dev@gmail.com>  Fri, 19 Jan 2018 20:34:29 +0100
+
 mesa (17.3.2-1) unstable; urgency=medium

  * New upstream release.

--- a/debian/compat
+++ b/debian/compat
-9
+10
--- a/debian/control
+++ b/debian/control
@@ -3,11 +3,10 @@ Section: graphics
 Priority: optional
 Maintainer: Debian X Strike Force <debian-x@lists.debian.org>
 Uploaders: Andreas Boll <andreas.boll.dev@gmail.com>
-Standards-Version: 4.1.2
+Standards-Version: 4.1.3
 Build-Depends:
- debhelper (>= 9),
- dh-autoreconf,
- quilt (>= 0.40),
+ debhelper (>= 10),
+ quilt (>= 0.63-8.2~),
 pkg-config,
 libdrm-dev (>= 2.4.85) [!hurd-any],
 libx11-dev,

--- a/debian/rules
+++ b/debian/rules
@@ -227,8 +227,7 @@ override_dh_makeshlibs:
 	dh_makeshlibs -a -- -c4

 %:
-	dh $@ --with quilt,autoreconf,libva \
-		--parallel \
+	dh $@ --with quilt,libva \
 		--builddirectory=build/

 # For maintainer use only, generate a tarball:

--- a/docs/relnotes/17.3.2.html
+++ b/docs/relnotes/17.3.2.html
@@ -31,7 +31,8 @@ because compatibility contexts are not supported.

 <h2>SHA256 checksums</h2>
 <pre>
-TBD
+f997e80f14c385f9a2ba827c2b74aebf1b7426712ca4a81c631ef9f78e437bf4  mesa-17.3.2.tar.gz
+e2844a13f2d6f8f24bee65804a51c42d8dc6ae9c36cff7ee61d0940e796d64c6  mesa-17.3.2.tar.xz
 </pre>



--- a/docs/relnotes/17.3.3.html
+++ b/docs/relnotes/17.3.3.html
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 17.3.3 Release Notes / January 18, 2018</h1>
+
+<p>
+Mesa 17.3.3 is a bug fix release which fixes bugs found since the 17.3.2 release.
+</p>
+<p>
+Mesa 17.3.3 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+TBD
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104214">Bug 104214</a> - Dota crashes when switching from game to desktop</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104492">Bug 104492</a> - Compute Shader: Wrong alignment when assigning struct value to structured SSBO</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104551">Bug 104551</a> - Check if Mako templates for Python are installed</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Alex Smith (3):</p>
+<ul>
+  <li>anv: Add missing unlock in anv_scratch_pool_alloc</li>
+  <li>anv: Take write mask into account in has_color_buffer_write_enabled</li>
+  <li>anv: Make sure state on primary is correct after CmdExecuteCommands</li>
+</ul>
+
+<p>Andres Gomez (1):</p>
+<ul>
+  <li>anv: Import mako templates only during execution of anv_extensions</li>
+</ul>
+
+<p>Bas Nieuwenhuizen (11):</p>
+<ul>
+  <li>radv: Invert condition for all samples identical during resolve.</li>
+  <li>radv: Flush caches before subpass resolve.</li>
+  <li>radv: Fix fragment resolve destination offset.</li>
+  <li>radv: Use correct framebuffer size for partial FS resolves.</li>
+  <li>radv: Always use fragment resolve if dest uses DCC.</li>
+  <li>Revert "radv/gfx9: fix block compression texture views."</li>
+  <li>radv: Use correct HTILE expanded words.</li>
+  <li>radv: Allow writing 0 scissors.</li>
+  <li>ac/nir: Handle loading data from compact arrays.</li>
+  <li>radv: Invalidate L1 for VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT.</li>
+  <li>ac/nir: Sanitize location_frac for local variables.</li>
+</ul>
+
+<p>Dave Airlie (8):</p>
+<ul>
+  <li>radv: fix events on compute queues.</li>
+  <li>radv: fix pipeline statistics end query on compute queue</li>
+  <li>radv/gfx9: fix 3d image to image transfers on compute queues.</li>
+  <li>radv/gfx9: fix 3d image clears on compute queues</li>
+  <li>radv/gfx9: fix buffer to image for 3d images on compute queues</li>
+  <li>radv/gfx9: fix block compression texture views.</li>
+  <li>radv/gfx9: use a bigger hammer to flush cb/db caches.</li>
+  <li>radv/gfx9: use correct swizzle parameter to work out border swizzle.</li>
+</ul>
+
+<p>Emil Velikov (1):</p>
+<ul>
+  <li>docs: add sha256 checksums for 17.3.2</li>
+</ul>
+
+<p>Florian Will (1):</p>
+<ul>
+  <li>glsl: Respect std430 layout in lower_buffer_access</li>
+</ul>
+
+<p>Juan A. Suarez Romero (6):</p>
+<ul>
+  <li>cherry-ignore: intel/fs: Use the original destination region for int MUL lowering</li>
+  <li>cherry-ignore: i965/fs: Use UW types when using V immediates</li>
+  <li>cherry-ignore: main: Clear shader program data whenever ProgramBinary is called</li>
+  <li>cherry-ignore: egl: pass the dri2_dpy to the $plat_teardown functions</li>
+  <li>cherry-ignore: vulkan/wsi: free cmd pools</li>
+  <li>Update version to 17.3.3</li>
+</ul>
+
+<p>Józef Kucia (1):</p>
+<ul>
+  <li>radeonsi: fix alpha-to-coverage if color writes are disabled</li>
+</ul>
+
+<p>Kenneth Graunke (2):</p>
+<ul>
+  <li>i965: Require space for MI_BATCHBUFFER_END.</li>
+  <li>i965: Torch public intel_batchbuffer_emit_dword/float helpers.</li>
+</ul>
+
+<p>Lucas Stach (1):</p>
+<ul>
+  <li>etnaviv: disable in-place resolve for non-supertiled surfaces</li>
+</ul>
+
+<p>Samuel Iglesias Gonsálvez (1):</p>
+<ul>
+  <li>anv: VkDescriptorSetLayoutBinding can have descriptorCount == 0</li>
+</ul>
+
+<p>Thomas Hellstrom (1):</p>
+<ul>
+  <li>loader/dri3: Avoid freeing renderbuffers in use</li>
+</ul>
+
+<p>Tim Rowley (1):</p>
+<ul>
+  <li>swr/rast: fix invalid sign masks in avx512 simdlib code</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3073,6 +3073,7 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
 	LLVMValueRef indir_index;
 	LLVMValueRef ret;
 	unsigned const_index;
+	unsigned stride = instr->variables[0]->var->data.compact ? 1 : 4;
 	bool vs_in = ctx->stage == MESA_SHADER_VERTEX &&
 	             instr->variables[0]->var->data.mode == nir_var_shader_in;
 	get_deref_offset(ctx, instr->variables[0], vs_in, NULL, NULL,
@@ -3098,13 +3099,13 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
 				count -= chan / 4;
 				LLVMValueRef tmp_vec = ac_build_gather_values_extended(
 						&ctx->ac, ctx->abi->inputs + idx + chan, count,
-						4, false, true);
+						stride, false, true);

 				values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
 								       tmp_vec,
 								       indir_index, "");
 			} else
-				values[chan] = ctx->abi->inputs[idx + chan + const_index * 4];
+				values[chan] = ctx->abi->inputs[idx + chan + const_index * stride];
 		}
 		break;
 	case nir_var_local:
@@ -3115,13 +3116,13 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
 				count -= chan / 4;
 				LLVMValueRef tmp_vec = ac_build_gather_values_extended(
 						&ctx->ac, ctx->locals + idx + chan, count,
-						4, true, true);
+						stride, true, true);

 				values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
 								       tmp_vec,
 								       indir_index, "");
 			} else {
-				values[chan] = LLVMBuildLoad(ctx->ac.builder, ctx->locals[idx + chan + const_index * 4], "");
+				values[chan] = LLVMBuildLoad(ctx->ac.builder, ctx->locals[idx + chan + const_index * stride], "");
 			}
 		}
 		break;
@@ -3143,14 +3144,14 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
 				count -= chan / 4;
 				LLVMValueRef tmp_vec = ac_build_gather_values_extended(
 						&ctx->ac, ctx->outputs + idx + chan, count,
-						4, true, true);
+						stride, true, true);

 				values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
 								       tmp_vec,
 								       indir_index, "");
 			} else {
 				values[chan] = LLVMBuildLoad(ctx->ac.builder,
-						     ctx->outputs[idx + chan + const_index * 4],
+						     ctx->outputs[idx + chan + const_index * stride],
 						     "");
 			}
 		}
@@ -5446,6 +5447,7 @@ setup_locals(struct ac_nir_context *ctx,
 	nir_foreach_variable(variable, &func->impl->locals) {
 		unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
 		variable->data.driver_location = ctx->num_locals * 4;
+		variable->data.location_frac = 0;
 		ctx->num_locals += attrib_count;
 	}
 	ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef));

--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -1919,11 +1919,11 @@ radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer,
 		switch ((VkAccessFlagBits)(1 << b)) {
 		case VK_ACCESS_INDIRECT_COMMAND_READ_BIT:
 		case VK_ACCESS_INDEX_READ_BIT:
-		case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
 			break;
 		case VK_ACCESS_UNIFORM_READ_BIT:
 			flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1 | RADV_CMD_FLAG_INV_SMEM_L1;
 			break;
+		case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
 		case VK_ACCESS_SHADER_READ_BIT:
 		case VK_ACCESS_TRANSFER_READ_BIT:
 		case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT:
@@ -3583,7 +3583,8 @@ void radv_CmdEndRenderPass(

 /*
 * For HTILE we have the following interesting clear words:
- *   0x0000030f: Uncompressed.
+ *   0x0000030f: Uncompressed for depth+stencil HTILE.
+ *   0x0000000f: Uncompressed for depth only HTILE.
 *   0xfffffff0: Clear depth to 1.0
 *   0x00000000: Clear depth to 0.0
 */
@@ -3632,7 +3633,8 @@ static void radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffe
 		radv_initialize_htile(cmd_buffer, image, range, 0);
 	} else if (!radv_layout_is_htile_compressed(image, src_layout, src_queue_mask) &&
 	           radv_layout_is_htile_compressed(image, dst_layout, dst_queue_mask)) {
-		radv_initialize_htile(cmd_buffer, image, range, 0xffffffff);
+		uint32_t clear_value = vk_format_is_stencil(image->vk_format) ? 0x30f : 0xf;
+		radv_initialize_htile(cmd_buffer, image, range, clear_value);
 	} else if (radv_layout_is_htile_compressed(image, src_layout, src_queue_mask) &&
 	           !radv_layout_is_htile_compressed(image, dst_layout, dst_queue_mask)) {
 		VkImageSubresourceRange local_range = *range;
@@ -3834,7 +3836,7 @@ static void write_event(struct radv_cmd_buffer *cmd_buffer,
 	si_cs_emit_write_event_eop(cs,
 				   cmd_buffer->state.predicating,
 				   cmd_buffer->device->physical_device->rad_info.chip_class,
-				   false,
+				   radv_cmd_buffer_uses_mec(cmd_buffer),
 				   V_028A90_BOTTOM_OF_PIPE_TS, 0,
 				   1, va, 2, value);


--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -344,7 +344,7 @@ static unsigned radv_tex_dim(VkImageType image_type, VkImageViewType view_type,
 	}
 }

-static unsigned gfx9_border_color_swizzle(const unsigned char swizzle[4])
+static unsigned gfx9_border_color_swizzle(const enum vk_swizzle swizzle[4])
 {
 	unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;

@@ -449,7 +449,7 @@ si_make_texture_descriptor(struct radv_device *device,
 	state[7] = 0;

 	if (device->physical_device->rad_info.chip_class >= GFX9) {
-		unsigned bc_swizzle = gfx9_border_color_swizzle(desc->swizzle);
+		unsigned bc_swizzle = gfx9_border_color_swizzle(swizzle);

 		/* Depth is the the last accessible layer on Gfx9.
 		 * The hw doesn't need to know the total number of layers.

--- a/src/amd/vulkan/radv_meta.c
+++ b/src/amd/vulkan/radv_meta.c
@@ -533,7 +533,7 @@ void radv_meta_build_resolve_shader_core(nir_builder *b,
 		nir_ssa_dest_init(&tex_all_same->instr, &tex_all_same->dest, 1, 32, "tex");
 		nir_builder_instr_insert(b, &tex_all_same->instr);

-		nir_ssa_def *all_same = nir_ine(b, &tex_all_same->dest.ssa, nir_imm_int(b, 0));
+		nir_ssa_def *all_same = nir_ieq(b, &tex_all_same->dest.ssa, nir_imm_int(b, 0));
 		nir_if *if_stmt = nir_if_create(b->shader);
 		if_stmt->condition = nir_src_for_ssa(all_same);
 		nir_cf_node_insert(b->cursor, &if_stmt->cf_node);

--- a/src/amd/vulkan/radv_meta_bufimage.c
+++ b/src/amd/vulkan/radv_meta_bufimage.c
@@ -259,19 +259,20 @@ radv_device_finish_meta_itob_state(struct radv_device *device)
 }

 static nir_shader *
-build_nir_btoi_compute_shader(struct radv_device *dev)
+build_nir_btoi_compute_shader(struct radv_device *dev, bool is_3d)
 {
 	nir_builder b;
+	enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
 	const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
 							     false,
 							     false,
 							     GLSL_TYPE_FLOAT);
-	const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
+	const struct glsl_type *img_type = glsl_sampler_type(dim,
 							     false,
 							     false,
 							     GLSL_TYPE_FLOAT);
 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
-	b.shader->info.name = ralloc_strdup(b.shader, "meta_btoi_cs");
+	b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_btoi_cs_3d" : "meta_btoi_cs");
 	b.shader->info.cs.local_size[0] = 16;
 	b.shader->info.cs.local_size[1] = 16;
 	b.shader->info.cs.local_size[2] = 1;
@@ -296,16 +297,16 @@ build_nir_btoi_compute_shader(struct radv_device *dev)

 	nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
 	nir_intrinsic_set_base(offset, 0);
-	nir_intrinsic_set_range(offset, 12);
+	nir_intrinsic_set_range(offset, 16);
 	offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
-	offset->num_components = 2;
-	nir_ssa_dest_init(&offset->instr, &offset->dest, 2, 32, "offset");
+	offset->num_components = is_3d ? 3 : 2;
+	nir_ssa_dest_init(&offset->instr, &offset->dest, is_3d ? 3 : 2, 32, "offset");
 	nir_builder_instr_insert(&b, &offset->instr);

 	nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
 	nir_intrinsic_set_base(stride, 0);
-	nir_intrinsic_set_range(stride, 12);
-	stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
+	nir_intrinsic_set_range(stride, 16);
+	stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
 	stride->num_components = 1;
 	nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
 	nir_builder_instr_insert(&b, &stride->instr);
@@ -353,9 +354,10 @@ radv_device_init_meta_btoi_state(struct radv_device *device)
 {
 	VkResult result;
 	struct radv_shader_module cs = { .nir = NULL };
-
-	cs.nir = build_nir_btoi_compute_shader(device);
-
+	struct radv_shader_module cs_3d = { .nir = NULL };
+	cs.nir = build_nir_btoi_compute_shader(device, false);
+	if (device->physical_device->rad_info.chip_class >= GFX9)
+		cs_3d.nir = build_nir_btoi_compute_shader(device, true);
 	/*
 	 * two descriptors one for the image being sampled
 	 * one for the buffer being written.
@@ -395,7 +397,7 @@ radv_device_init_meta_btoi_state(struct radv_device *device)
 		.setLayoutCount = 1,
 		.pSetLayouts = &device->meta_state.btoi.img_ds_layout,
 		.pushConstantRangeCount = 1,
-		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 12},
+		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
 	};

 	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
@@ -429,9 +431,33 @@ radv_device_init_meta_btoi_state(struct radv_device *device)
 	if (result != VK_SUCCESS)
 		goto fail;

+	if (device->physical_device->rad_info.chip_class >= GFX9) {
+		VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
+			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+			.stage = VK_SHADER_STAGE_COMPUTE_BIT,
+			.module = radv_shader_module_to_handle(&cs_3d),
+			.pName = "main",
+			.pSpecializationInfo = NULL,
+		};
+
+		VkComputePipelineCreateInfo vk_pipeline_info_3d = {
+			.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+			.stage = pipeline_shader_stage_3d,
+			.flags = 0,
+			.layout = device->meta_state.btoi.img_p_layout,
+		};
+
+		result = radv_CreateComputePipelines(radv_device_to_handle(device),
+						     radv_pipeline_cache_to_handle(&device->meta_state.cache),
+						     1, &vk_pipeline_info_3d, NULL,
+						     &device->meta_state.btoi.pipeline_3d);
+		ralloc_free(cs_3d.nir);
+	}
 	ralloc_free(cs.nir);
+
 	return VK_SUCCESS;
 fail:
+	ralloc_free(cs_3d.nir);
 	ralloc_free(cs.nir);
 	return result;
 }
@@ -448,22 +474,25 @@ radv_device_finish_meta_btoi_state(struct radv_device *device)
 					&state->alloc);
 	radv_DestroyPipeline(radv_device_to_handle(device),
 			     state->btoi.pipeline, &state->alloc);
+	radv_DestroyPipeline(radv_device_to_handle(device),
+			     state->btoi.pipeline_3d, &state->alloc);
 }

 static nir_shader *
-build_nir_itoi_compute_shader(struct radv_device *dev)
+build_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d)
 {
 	nir_builder b;
-	const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
+	enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
+	const struct glsl_type *buf_type = glsl_sampler_type(dim,
 							     false,
 							     false,
 							     GLSL_TYPE_FLOAT);
-	const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
+	const struct glsl_type *img_type = glsl_sampler_type(dim,
 							     false,
 							     false,
 							     GLSL_TYPE_FLOAT);
 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
-	b.shader->info.name = ralloc_strdup(b.shader, "meta_itoi_cs");
+	b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_itoi_cs_3d" : "meta_itoi_cs");
 	b.shader->info.cs.local_size[0] = 16;
 	b.shader->info.cs.local_size[1] = 16;
 	b.shader->info.cs.local_size[2] = 1;
@@ -488,18 +517,18 @@ build_nir_itoi_compute_shader(struct radv_device *dev)

 	nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
 	nir_intrinsic_set_base(src_offset, 0);
-	nir_intrinsic_set_range(src_offset, 16);
+	nir_intrinsic_set_range(src_offset, 24);
 	src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
-	src_offset->num_components = 2;
-	nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, 2, 32, "src_offset");
+	src_offset->num_components = is_3d ? 3 : 2;
+	nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, is_3d ? 3 : 2, 32, "src_offset");
 	nir_builder_instr_insert(&b, &src_offset->instr);

 	nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
 	nir_intrinsic_set_base(dst_offset, 0);
-	nir_intrinsic_set_range(dst_offset, 16);
-	dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
-	dst_offset->num_components = 2;
-	nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, 2, 32, "dst_offset");
+	nir_intrinsic_set_range(dst_offset, 24);
+	dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
+	dst_offset->num_components = is_3d ? 3 : 2;
+	nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, is_3d ? 3 : 2, 32, "dst_offset");
 	nir_builder_instr_insert(&b, &dst_offset->instr);

 	nir_ssa_def *src_coord = nir_iadd(&b, global_id, &src_offset->dest.ssa);
@@ -507,15 +536,15 @@ build_nir_itoi_compute_shader(struct radv_device *dev)
 	nir_ssa_def *dst_coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa);

 	nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2);
-	tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
+	tex->sampler_dim = dim;
 	tex->op = nir_texop_txf;
 	tex->src[0].src_type = nir_tex_src_coord;
-	tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, 3));
+	tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, is_3d ? 0x7 : 0x3));
 	tex->src[1].src_type = nir_tex_src_lod;
 	tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
 	tex->dest_type = nir_type_float;
 	tex->is_array = false;
-	tex->coord_components = 2;
+	tex->coord_components = is_3d ? 3 : 2;
 	tex->texture = nir_deref_var_create(tex, input_img);
 	tex->sampler = NULL;

@@ -539,9 +568,10 @@ radv_device_init_meta_itoi_state(struct radv_device *device)
 {
 	VkResult result;
 	struct radv_shader_module cs = { .nir = NULL };
-
-	cs.nir = build_nir_itoi_compute_shader(device);
-
+	struct radv_shader_module cs_3d = { .nir = NULL };
+	cs.nir = build_nir_itoi_compute_shader(device, false);
+	if (device->physical_device->rad_info.chip_class >= GFX9)
+		cs_3d.nir = build_nir_itoi_compute_shader(device, true);
 	/*
 	 * two descriptors one for the image being sampled
 	 * one for the buffer being written.
@@ -581,7 +611,7 @@ radv_device_init_meta_itoi_state(struct radv_device *device)
 		.setLayoutCount = 1,
 		.pSetLayouts = &device->meta_state.itoi.img_ds_layout,
 		.pushConstantRangeCount = 1,
-		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
+		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24},
 	};

 	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
@@ -615,10 +645,35 @@ radv_device_init_meta_itoi_state(struct radv_device *device)
 	if (result != VK_SUCCESS)
 		goto fail;

+	if (device->physical_device->rad_info.chip_class >= GFX9) {
+		VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
+			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+.stage = VK_SHADER_STAGE_COMPUTE_BIT,
+			.module = radv_shader_module_to_handle(&cs_3d),
+			.pName = "main",
+			.pSpecializationInfo = NULL,
+		};
+
+		VkComputePipelineCreateInfo vk_pipeline_info_3d = {
+			.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+			.stage = pipeline_shader_stage_3d,
+			.flags = 0,
+			.layout = device->meta_state.itoi.img_p_layout,
+		};
+
+		result = radv_CreateComputePipelines(radv_device_to_handle(device),
+						     radv_pipeline_cache_to_handle(&device->meta_state.cache),
+						     1, &vk_pipeline_info_3d, NULL,
+						     &device->meta_state.itoi.pipeline_3d);
+
+		ralloc_free(cs_3d.nir);
+	}
 	ralloc_free(cs.nir);
+
 	return VK_SUCCESS;
 fail:
 	ralloc_free(cs.nir);
+	ralloc_free(cs_3d.nir);
 	return result;
 }

@@ -634,18 +689,22 @@ radv_device_finish_meta_itoi_state(struct radv_device *device)
 					&state->alloc);
 	radv_DestroyPipeline(radv_device_to_handle(device),
 			     state->itoi.pipeline, &state->alloc);
+	if (device->physical_device->rad_info.chip_class >= GFX9)
+		radv_DestroyPipeline(radv_device_to_handle(device),
+				     state->itoi.pipeline_3d, &state->alloc);
 }

 static nir_shader *
-build_nir_cleari_compute_shader(struct radv_device *dev)
+build_nir_cleari_compute_shader(struct radv_device *dev, bool is_3d)
 {
 	nir_builder b;
-	const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
+	enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
+	const struct glsl_type *img_type = glsl_sampler_type(dim,
 							     false,
 							     false,
 							     GLSL_TYPE_FLOAT);
 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
-	b.shader->info.name = ralloc_strdup(b.shader, "meta_cleari_cs");
+	b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_cleari_cs_3d" : "meta_cleari_cs");
 	b.shader->info.cs.local_size[0] = 16;
 	b.shader->info.cs.local_size[1] = 16;
 	b.shader->info.cs.local_size[2] = 1;
@@ -666,12 +725,29 @@ build_nir_cleari_compute_shader(struct radv_device *dev)

 	nir_intrinsic_instr *clear_val = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
 	nir_intrinsic_set_base(clear_val, 0);
-	nir_intrinsic_set_range(clear_val, 16);
+	nir_intrinsic_set_range(clear_val, 20);
 	clear_val->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
 	clear_val->num_components = 4;
 	nir_ssa_dest_init(&clear_val->instr, &clear_val->dest, 4, 32, "clear_value");
 	nir_builder_instr_insert(&b, &clear_val->instr);

+	nir_intrinsic_instr *layer = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
+	nir_intrinsic_set_base(layer, 0);
+	nir_intrinsic_set_range(layer, 20);
+	layer->src[0] = nir_src_for_ssa(nir_imm_int(&b, 16));
+	layer->num_components = 1;
+	nir_ssa_dest_init(&layer->instr, &layer->dest, 1, 32, "layer");
+	nir_builder_instr_insert(&b, &layer->instr);
+
+	nir_ssa_def *global_z = nir_iadd(&b, nir_channel(&b, global_id, 2), &layer->dest.ssa);
+
+	nir_ssa_def *comps[4];
+	comps[0] = nir_channel(&b, global_id, 0);
+	comps[1] = nir_channel(&b, global_id, 1);
+	comps[2] = global_z;
+	comps[3] = nir_imm_int(&b, 0);
+	global_id = nir_vec(&b, comps, 4);
+
 	nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_store);
 	store->src[0] = nir_src_for_ssa(global_id);
 	store->src[1] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
@@ -687,8 +763,10 @@ radv_device_init_meta_cleari_state(struct radv_device *device)
 {
 	VkResult result;
 	struct radv_shader_module cs = { .nir = NULL };
-
-	cs.nir = build_nir_cleari_compute_shader(device);
+	struct radv_shader_module cs_3d = { .nir = NULL };
+	cs.nir = build_nir_cleari_compute_shader(device, false);
+	if (device->physical_device->rad_info.chip_class >= GFX9)
+		cs_3d.nir = build_nir_cleari_compute_shader(device, true);

 	/*
 	 * two descriptors one for the image being sampled
@@ -722,7 +800,7 @@ radv_device_init_meta_cleari_state(struct radv_device *device)
 		.setLayoutCount = 1,
 		.pSetLayouts = &device->meta_state.cleari.img_ds_layout,
 		.pushConstantRangeCount = 1,
-		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
+		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 20},
 	};

 	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
@@ -756,10 +834,38 @@ radv_device_init_meta_cleari_state(struct radv_device *device)
 	if (result != VK_SUCCESS)
 		goto fail;

+
+	if (device->physical_device->rad_info.chip_class >= GFX9) {
+		/* compute shader */
+		VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
+			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+			.stage = VK_SHADER_STAGE_COMPUTE_BIT,
+			.module = radv_shader_module_to_handle(&cs_3d),
+			.pName = "main",
+			.pSpecializationInfo = NULL,
+		};
+
+		VkComputePipelineCreateInfo vk_pipeline_info_3d = {
+			.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+			.stage = pipeline_shader_stage_3d,
+			.flags = 0,
+			.layout = device->meta_state.cleari.img_p_layout,
+		};
+
+		result = radv_CreateComputePipelines(radv_device_to_handle(device),
+						     radv_pipeline_cache_to_handle(&device->meta_state.cache),
+						     1, &vk_pipeline_info_3d, NULL,
+						     &device->meta_state.cleari.pipeline_3d);
+		if (result != VK_SUCCESS)
+			goto fail;
+
+		ralloc_free(cs_3d.nir);
+	}
 	ralloc_free(cs.nir);
 	return VK_SUCCESS;
 fail:
 	ralloc_free(cs.nir);
+	ralloc_free(cs_3d.nir);
 	return result;
 }

@@ -775,6 +881,8 @@ radv_device_finish_meta_cleari_state(struct radv_device *device)
 					&state->alloc);
 	radv_DestroyPipeline(radv_device_to_handle(device),
 			     state->cleari.pipeline, &state->alloc);
+	radv_DestroyPipeline(radv_device_to_handle(device),
+			     state->cleari.pipeline_3d, &state->alloc);
 }

 void
@@ -990,18 +1098,22 @@ radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
 	create_iview(cmd_buffer, dst, &dst_view);
 	btoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);

+	if (device->physical_device->rad_info.chip_class >= GFX9 &&
+	    dst->image->type == VK_IMAGE_TYPE_3D)
+		pipeline = cmd_buffer->device->meta_state.btoi.pipeline_3d;
 	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
 			     VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);

 	for (unsigned r = 0; r < num_rects; ++r) {
-		unsigned push_constants[3] = {
+		unsigned push_constants[4] = {
 			rects[r].dst_x,
 			rects[r].dst_y,
-			src->pitch
+			dst->layer,
+			src->pitch,
 		};
 		radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
 				      device->meta_state.btoi.img_p_layout,
-				      VK_SHADER_STAGE_COMPUTE_BIT, 0, 12,
+				      VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
 				      push_constants);

 		radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
@@ -1068,19 +1180,24 @@ radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer,

 	itoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);

+	if (device->physical_device->rad_info.chip_class >= GFX9 &&
+	    src->image->type == VK_IMAGE_TYPE_3D)
+		pipeline = cmd_buffer->device->meta_state.itoi.pipeline_3d;
 	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
 			     VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);

 	for (unsigned r = 0; r < num_rects; ++r) {
-		unsigned push_constants[4] = {
+		unsigned push_constants[6] = {
 			rects[r].src_x,
 			rects[r].src_y,
+			src->layer,
 			rects[r].dst_x,
 			rects[r].dst_y,
+			dst->layer,
 		};
 		radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
 				      device->meta_state.itoi.img_p_layout,
-				      VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
+				      VK_SHADER_STAGE_COMPUTE_BIT, 0, 24,
 				      push_constants);

 		radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
@@ -1128,19 +1245,24 @@ radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer,
 	create_iview(cmd_buffer, dst, &dst_iview);
 	cleari_bind_descriptors(cmd_buffer, &dst_iview);

+	if (device->physical_device->rad_info.chip_class >= GFX9 &&
+	    dst->image->type == VK_IMAGE_TYPE_3D)
+		pipeline = cmd_buffer->device->meta_state.cleari.pipeline_3d;
+
 	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
 			     VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);

-	unsigned push_constants[4] = {
+	unsigned push_constants[5] = {
 		clear_color->uint32[0],
 		clear_color->uint32[1],
 		clear_color->uint32[2],
 		clear_color->uint32[3],
+		dst->layer,
 	};

 	radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
 			      device->meta_state.cleari.img_p_layout,
-			      VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
+			      VK_SHADER_STAGE_COMPUTE_BIT, 0, 20,
 			      push_constants);

 	radv_unaligned_dispatch(cmd_buffer, dst->image->info.width, dst->image->info.height, 1);

--- a/src/amd/vulkan/radv_meta_resolve.c
+++ b/src/amd/vulkan/radv_meta_resolve.c
@@ -300,10 +300,9 @@ static void radv_pick_resolve_method_images(struct radv_image *src_image,
 					    enum radv_resolve_method *method)

 {
-	if (dest_image->surface.micro_tile_mode != src_image->surface.micro_tile_mode) {
-		if (dest_image->surface.num_dcc_levels > 0)
+	if (dest_image->surface.num_dcc_levels > 0) {
 		*method = RESOLVE_FRAGMENT;
-		else
+	} else if (dest_image->surface.micro_tile_mode != src_image->surface.micro_tile_mode) {
 		*method = RESOLVE_COMPUTE;
 	}
 }

--- a/src/amd/vulkan/radv_meta_resolve_cs.c
+++ b/src/amd/vulkan/radv_meta_resolve_cs.c
@@ -487,6 +487,14 @@ radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer)
 	if (!subpass->has_resolve)
 		return;

+	/* Resolves happen before the end-of-subpass barriers get executed,
+	 * so we have to make the attachment shader-readable */
+	cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
+	                                RADV_CMD_FLAG_FLUSH_AND_INV_CB |
+	                                RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
+	                                RADV_CMD_FLAG_INV_GLOBAL_L2 |
+	                                RADV_CMD_FLAG_INV_VMEM_L1;
+
 	for (uint32_t i = 0; i < subpass->color_count; ++i) {
 		VkAttachmentReference src_att = subpass->color_attachments[i];
 		VkAttachmentReference dest_att = subpass->resolve_attachments[i];

--- a/src/amd/vulkan/radv_meta_resolve_fs.c
+++ b/src/amd/vulkan/radv_meta_resolve_fs.c
@@ -407,8 +407,8 @@ emit_resolve(struct radv_cmd_buffer *cmd_buffer,
 	cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;

 	unsigned push_constants[2] = {
-		src_offset->x,
-		src_offset->y,
+		src_offset->x - dest_offset->x,
+		src_offset->y - dest_offset->y,
 	};
 	radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
 			      device->meta_state.resolve_fragment.p_layout,
@@ -540,8 +540,8 @@ void radv_meta_resolve_fragment_image(struct radv_cmd_buffer *cmd_buffer,
 					       .pAttachments = (VkImageView[]) {
 					       radv_image_view_to_handle(&dest_iview),
 				       },
-				       .width = extent.width,
-				       .height = extent.height,
+				       .width = extent.width + dstOffset.x,
+				       .height = extent.height + dstOffset.y,
 				       .layers = 1
 				}, &cmd_buffer->pool->alloc, &fb);

@@ -604,6 +604,16 @@ radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer)
 		       RADV_META_SAVE_CONSTANTS |
 		       RADV_META_SAVE_DESCRIPTORS);

+	/* Resolves happen before the end-of-subpass barriers get executed,
+	 * so we have to make the attachment shader-readable */
+	cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
+	                                RADV_CMD_FLAG_FLUSH_AND_INV_CB |
+	                                RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
+	                                RADV_CMD_FLAG_FLUSH_AND_INV_DB |
+	                                RADV_CMD_FLAG_FLUSH_AND_INV_DB_META |
+	                                RADV_CMD_FLAG_INV_GLOBAL_L2 |
+	                                RADV_CMD_FLAG_INV_VMEM_L1;
+
 	for (uint32_t i = 0; i < subpass->color_count; ++i) {
 		VkAttachmentReference src_att = subpass->color_attachments[i];
 		VkAttachmentReference dest_att = subpass->resolve_attachments[i];

--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -433,16 +433,19 @@ struct radv_meta_state {
 		VkPipelineLayout                          img_p_layout;
 		VkDescriptorSetLayout                     img_ds_layout;
 		VkPipeline pipeline;
+		VkPipeline pipeline_3d;
 	} btoi;
 	struct {
 		VkPipelineLayout                          img_p_layout;
 		VkDescriptorSetLayout                     img_ds_layout;
 		VkPipeline pipeline;
+		VkPipeline pipeline_3d;
 	} itoi;
 	struct {
 		VkPipelineLayout                          img_p_layout;
 		VkDescriptorSetLayout                     img_ds_layout;
 		VkPipeline pipeline;
+		VkPipeline pipeline_3d;
 	} cleari;

 	struct {

--- a/src/amd/vulkan/radv_query.c
+++ b/src/amd/vulkan/radv_query.c
@@ -1152,7 +1152,7 @@ void radv_CmdEndQuery(
 		si_cs_emit_write_event_eop(cs,
 					   false,
 					   cmd_buffer->device->physical_device->rad_info.chip_class,
-					   false,
+					   radv_cmd_buffer_uses_mec(cmd_buffer),
 					   V_028A90_BOTTOM_OF_PIPE_TS, 0,
 					   1, avail_va, 0, 1);
 		break;

--- a/src/amd/vulkan/si_cmd_buffer.c
+++ b/src/amd/vulkan/si_cmd_buffer.c
@@ -676,7 +676,8 @@ si_write_scissors(struct radeon_winsys_cs *cs, int first,
 	int i;
 	float scale[3], translate[3], guardband_x = INFINITY, guardband_y = INFINITY;
 	const float max_range = 32767.0f;
-	assert(count);
+	if (!count)
+		return;

 	radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + first * 4 * 2, count * 2);
 	for (i = 0; i < count; i++) {
@@ -988,6 +989,11 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
 	if (chip_class >= GFX9 && flush_cb_db) {
 		unsigned cb_db_event, tc_flags;

+#if 0
+		/* This breaks a bunch of:
+		   dEQP-VK.renderpass.dedicated_allocation.formats.d32_sfloat_s8_uint.input*.
+		   use the big hammer always.
+		*/
 		/* Set the CB/DB flush event. */
 		switch (flush_cb_db) {
 		case RADV_CMD_FLAG_FLUSH_AND_INV_CB:
@@ -1000,7 +1006,9 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
 			/* both CB & DB */
 			cb_db_event = V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT;
 		}
-
+#else
+		cb_db_event = V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT;
+#endif
 		/* TC    | TC_WB         = invalidate L2 data
 		 * TC_MD | TC_WB         = invalidate L2 metadata
 		 * TC    | TC_WB | TC_MD = invalidate L2 data & metadata

--- a/src/compiler/glsl/lower_buffer_access.cpp
+++ b/src/compiler/glsl/lower_buffer_access.cpp
@@ -72,15 +72,21 @@ lower_buffer_access::emit_access(void *mem_ctx,
            new(mem_ctx) ir_dereference_record(deref->clone(mem_ctx, NULL),
                                               field->name);

-         field_offset =
-            glsl_align(field_offset,
-                       field->type->std140_base_alignment(row_major));
+         unsigned field_align;
+         if (packing == GLSL_INTERFACE_PACKING_STD430)
+            field_align = field->type->std430_base_alignment(row_major);
+         else
+            field_align = field->type->std140_base_alignment(row_major);
+         field_offset = glsl_align(field_offset, field_align);

         emit_access(mem_ctx, is_write, field_deref, base_offset,
                     deref_offset + field_offset,
                     row_major, 1, packing,
                     writemask_for_size(field_deref->type->vector_elements));

+         if (packing == GLSL_INTERFACE_PACKING_STD430)
+            field_offset += field->type->std430_size(row_major);
+         else
            field_offset += field->type->std140_size(row_major);
      }
      return;