Marek Olšák · Marek Olšák · Marek Olšák · Marek Olšák · Marek Olšák · Marek Olšák
--- a/VERSION
+++ b/VERSION
-19.2.0-rc1
+19.2.0-rc2
--- a/include/pci_ids/radeonsi_pci_ids.h
+++ b/include/pci_ids/radeonsi_pci_ids.h
@@ -267,3 +267,5 @@ CHIPSET(0x7319, NAVI10)
 CHIPSET(0x731A, NAVI10)
 CHIPSET(0x731B, NAVI10)
 CHIPSET(0x731F, NAVI10)
+
+CHIPSET(0x7340, NAVI14)
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -4221,10 +4221,9 @@ ac_build_scan(struct ac_llvm_context *ctx, nir_op op, LLVMValueRef src, LLVMValu
 	if (ctx->chip_class >= GFX10) {
 		result = inclusive ? src : identity;
 	} else {
-		if (inclusive)
+		if (!inclusive)
+			src = ac_build_dpp(ctx, identity, src, dpp_wf_sr1, 0xf, 0xf, false);
 		result = src;
-		else
-			result = ac_build_dpp(ctx, identity, src, dpp_wf_sr1, 0xf, 0xf, false);
 	}
 	if (maxprefix <= 1)
 		return result;

--- a/src/amd/common/sid.h
+++ b/src/amd/common/sid.h
@@ -181,6 +181,7 @@
 /* fix CP DMA before uncommenting: */
 /*#define PKT3_EVENT_WRITE_EOS                   0x48*/ /* not on GFX9 */
 #define PKT3_RELEASE_MEM                       0x49 /* GFX9+ [any ring] or GFX8 [compute ring only] */
+#define PKT3_CONTEXT_REG_RMW                   0x51 /* older firmware versions on older chips don't have this */
 #define PKT3_ONE_REG_WRITE                     0x57 /* not on CIK */
 #define PKT3_ACQUIRE_MEM                       0x58 /* new for CIK */
 #define PKT3_REWIND                            0x59 /* VI+ [any ring] or CIK [compute ring only] */

--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -3321,7 +3321,7 @@ void radv_CmdBindIndexBuffer(
 	cmd_buffer->state.index_va = radv_buffer_get_va(index_buffer->bo);
 	cmd_buffer->state.index_va += index_buffer->offset + offset;

-	int index_size = radv_get_vgt_index_size(indexType);
+	int index_size = radv_get_vgt_index_size(vk_to_index_type(indexType));
 	cmd_buffer->state.max_index_count = (index_buffer->size - offset) / index_size;
 	cmd_buffer->state.dirty |= RADV_CMD_DIRTY_INDEX_BUFFER;
 	radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, index_buffer->bo);

--- a/src/amd/vulkan/radv_debug.h
+++ b/src/amd/vulkan/radv_debug.h
@@ -53,6 +53,7 @@ enum {
 	RADV_DEBUG_NOBINNING         = 0x800000,
 	RADV_DEBUG_NO_LOAD_STORE_OPT = 0x1000000,
 	RADV_DEBUG_NO_NGG            = 0x2000000,
+	RADV_DEBUG_NO_SHADER_BALLOT  = 0x4000000,
 };

 enum {

--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -173,12 +173,11 @@ radv_physical_device_init_mem_types(struct radv_physical_device *device)
 			.heapIndex = vram_index,
 		};
 	}
-	if (gart_index >= 0) {
+	if (gart_index >= 0 && device->rad_info.has_dedicated_vram) {
 		device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_WRITE_COMBINE;
 		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
 			.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
-			VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
-			(device->rad_info.has_dedicated_vram ? 0 : VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT),
+			VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
 			.heapIndex = gart_index,
 		};
 	}
@@ -191,6 +190,19 @@ radv_physical_device_init_mem_types(struct radv_physical_device *device)
 			.heapIndex = visible_vram_index,
 		};
 	}
+	if (gart_index >= 0 && !device->rad_info.has_dedicated_vram) {
+		/* Put GTT after visible VRAM for GPUs without dedicated VRAM
+		 * as they have identical property flags, and according to the
+		 * spec, for types with identical flags, the one with greater
+		 * performance must be given a lower index. */
+		device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_WRITE_COMBINE;
+		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
+			.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+			VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+			VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+			.heapIndex = gart_index,
+		};
+	}
 	if (gart_index >= 0) {
 		device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_CACHED;
 		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
@@ -383,7 +395,8 @@ radv_physical_device_init(struct radv_physical_device *device,
 					  device->rad_info.family == CHIP_RENOIR ||
 					  device->rad_info.chip_class >= GFX10;

-	device->use_shader_ballot = device->instance->perftest_flags & RADV_PERFTEST_SHADER_BALLOT;
+	device->use_shader_ballot = device->rad_info.chip_class >= GFX8 &&
+				    device->instance->perftest_flags & RADV_PERFTEST_SHADER_BALLOT;

 	/* Determine the number of threads per wave for all stages. */
 	device->cs_wave_size = 64;
@@ -495,6 +508,7 @@ static const struct debug_control radv_debug_options[] = {
 	{"nobinning", RADV_DEBUG_NOBINNING},
 	{"noloadstoreopt", RADV_DEBUG_NO_LOAD_STORE_OPT},
 	{"nongg", RADV_DEBUG_NO_NGG},
+	{"noshaderballot", RADV_DEBUG_NO_SHADER_BALLOT},
 	{NULL, 0}
 };

@@ -553,6 +567,14 @@ radv_handle_per_app_options(struct radv_instance *instance,
 		 */
 		if (HAVE_LLVM < 0x900)
 			instance->debug_flags |= RADV_DEBUG_NO_LOAD_STORE_OPT;
+	} else if (!strcmp(name, "Wolfenstein: Youngblood")) {
+		if (!(instance->debug_flags & RADV_DEBUG_NO_SHADER_BALLOT)) {
+			/* Force enable VK_AMD_shader_ballot because it looks
+			 * safe and it gives a nice boost (+20% on Vega 56 at
+			 * this time).
+			 */
+			instance->perftest_flags |= RADV_PERFTEST_SHADER_BALLOT;
+		}
 	}
 }


--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -2320,6 +2320,7 @@ radv_fill_shader_keys(struct radv_device *device,
 	}

 	if (device->physical_device->rad_info.chip_class >= GFX10 &&
+	    device->physical_device->rad_info.family != CHIP_NAVI14 &&
 	    !(device->instance->debug_flags & RADV_DEBUG_NO_NGG)) {
 		if (nir[MESA_SHADER_TESS_CTRL]) {
 			keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = true;
@@ -2339,6 +2340,26 @@ radv_fill_shader_keys(struct radv_device *device,
 			keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = false;
 		}

+		/*
+		 * Disable NGG with geometry shaders. There are a bunch of
+		 * issues still:
+		 *   * GS primitives in pipeline statistic queries do not get
+		 *     updates. See dEQP-VK.query_pool.statistics_query.geometry_shader_primitives
+		 *   * dEQP-VK.clipping.user_defined.clip_cull_distance_dynamic_index.*geom* failures
+		 *   * Interactions with tessellation failing:
+		 *     dEQP-VK.tessellation.geometry_interaction.passthrough.tessellate_isolines_passthrough_geometry_no_change
+		 *   * General issues with the last primitive missing/corrupt:
+		 *     https://bugs.freedesktop.org/show_bug.cgi?id=111248
+		 *
+		 * Furthermore, XGL/AMDVLK also disables this as of 9b632ef.
+		 */
+		if (nir[MESA_SHADER_GEOMETRY]) {
+			if (nir[MESA_SHADER_TESS_CTRL])
+				keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = false;
+			else
+				keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg = false;
+		}
+
 		/* TODO: Implement streamout support for NGG. */
 		gl_shader_stage last_xfb_stage = MESA_SHADER_VERTEX;

@@ -3812,6 +3833,14 @@ radv_pipeline_generate_tess_shaders(struct radeon_cmdbuf *ctx_cs,
 	else
 		radeon_set_context_reg(ctx_cs, R_028B58_VGT_LS_HS_CONFIG,
 				       tess->ls_hs_config);
+
+	if (pipeline->device->physical_device->rad_info.chip_class >= GFX10 &&
+	    !radv_pipeline_has_gs(pipeline) && !radv_pipeline_has_ngg(pipeline)) {
+		radeon_set_context_reg(ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL,
+		                       S_028A44_ES_VERTS_PER_SUBGRP(250) |
+		                       S_028A44_GS_PRIMS_PER_SUBGRP(126) |
+		                       S_028A44_GS_INST_PRIMS_IN_SUBGRP(126));
+	}
 }

 static void

--- a/src/amd/vulkan/radv_query.c
+++ b/src/amd/vulkan/radv_query.c
@@ -1124,10 +1124,11 @@ VkResult radv_GetQueryPoolResults(

 		switch (pool->type) {
 		case VK_QUERY_TYPE_TIMESTAMP: {
-			available = *(uint64_t *)src != TIMESTAMP_NOT_READY;
+			volatile uint64_t const *src64 = (volatile uint64_t const *)src;
+			available = *src64 != TIMESTAMP_NOT_READY;

 			if (flags & VK_QUERY_RESULT_WAIT_BIT) {
-				while (*(volatile uint64_t *)src == TIMESTAMP_NOT_READY)
+				while (*src64 == TIMESTAMP_NOT_READY)
 					;
 				available = true;
 			}
@@ -1137,11 +1138,11 @@ VkResult radv_GetQueryPoolResults(

 			if (flags & VK_QUERY_RESULT_64_BIT) {
 				if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
-					*(uint64_t*)dest = *(uint64_t*)src;
+					*(uint64_t*)dest = *src64;
 				dest += 8;
 			} else {
 				if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
-					*(uint32_t*)dest = *(uint32_t*)src;
+					*(uint32_t*)dest = *(volatile uint32_t*)src;
 				dest += 4;
 			}
 			break;
@@ -1189,13 +1190,13 @@ VkResult radv_GetQueryPoolResults(
 			if (flags & VK_QUERY_RESULT_WAIT_BIT)
 				while(!*(volatile uint32_t*)(pool->ptr + pool->availability_offset + 4 * query))
 					;
-			available = *(uint32_t*)(pool->ptr + pool->availability_offset + 4 * query);
+			available = *(volatile uint32_t*)(pool->ptr + pool->availability_offset + 4 * query);

 			if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT))
 				result = VK_NOT_READY;

-			const uint64_t *start = (uint64_t*)src;
-			const uint64_t *stop = (uint64_t*)(src + pipelinestat_block_size);
+			const volatile uint64_t *start = (uint64_t*)src;
+			const volatile uint64_t *stop = (uint64_t*)(src + pipelinestat_block_size);
 			if (flags & VK_QUERY_RESULT_64_BIT) {
 				uint64_t *dst = (uint64_t*)dest;
 				dest += util_bitcount(pool->pipeline_stats_mask) * 8;

--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -297,7 +297,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
 			.lower_ubo_ssbo_access_to_offsets = true,
 			.caps = {
 				.amd_gcn_shader = true,
-				.amd_shader_ballot = device->instance->perftest_flags & RADV_PERFTEST_SHADER_BALLOT,
+				.amd_shader_ballot = device->physical_device->use_shader_ballot,
 				.amd_trinary_minmax = true,
 				.derivative_group = true,
 				.descriptor_array_dynamic_indexing = true,
@@ -809,10 +809,12 @@ static void radv_postprocess_config(const struct radv_physical_device *pdevice,
 			 * If PrimID is disabled. InstanceID / StepRate1 is loaded instead.
 			 * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded.
 			 */
-			if (info->vs.export_prim_id) {
+			if (info->info.vs.needs_instance_id && pdevice->rad_info.chip_class >= GFX10) {
+				vgpr_comp_cnt = 3;
+			} else if (info->vs.export_prim_id) {
 				vgpr_comp_cnt = 2;
 			} else if (info->info.vs.needs_instance_id) {
-				vgpr_comp_cnt = pdevice->rad_info.chip_class >= GFX10 ? 3 : 1;
+				vgpr_comp_cnt = 1;
 			} else {
 				vgpr_comp_cnt = 0;
 			}

--- a/src/amd/vulkan/si_cmd_buffer.c
+++ b/src/amd/vulkan/si_cmd_buffer.c
@@ -192,6 +192,7 @@ si_emit_graphics(struct radv_physical_device *physical_device,
 		radeon_set_context_reg(cs, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
 	}

+	if (physical_device->rad_info.chip_class <= GFX9)
 		radeon_set_context_reg(cs, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1);
 	if (!physical_device->has_clear_state)
 		radeon_set_context_reg(cs, R_028AB8_VGT_VTX_CNT_EN, 0x0);

--- a/src/compiler/glsl/opt_algebraic.cpp
+++ b/src/compiler/glsl/opt_algebraic.cpp
@@ -507,6 +507,18 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
      if (is_vec_zero(op_const[1]))
 	 return ir->operands[0];

+      /* Replace (x + (-x)) with constant 0 */
+      for (int i = 0; i < 2; i++) {
+         if (op_expr[i]) {
+            if (op_expr[i]->operation == ir_unop_neg) {
+               ir_rvalue *other = ir->operands[(i + 1) % 2];
+               if (other && op_expr[i]->operands[0]->equals(other)) {
+                  return ir_constant::zero(ir, ir->type);
+               }
+            }
+         }
+      }
+
      /* Reassociate addition of constants so that we can do constant
       * folding.
       */

--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -806,7 +806,8 @@ optimizations.extend([
   (('ffloor', 'a(is_integral)'), a),
   (('fceil', 'a(is_integral)'), a),
   (('ftrunc', 'a(is_integral)'), a),
-   (('ffract', 'a(is_integral)'), 0.0),
+   # fract(x) = x - floor(x), so fract(NaN) = NaN
+   (('~ffract', 'a(is_integral)'), 0.0),
   (('fabs', 'a(is_not_negative)'), a),
   (('iabs', 'a(is_not_negative)'), a),
   (('fsat', 'a(is_not_positive)'), 0.0),
@@ -836,14 +837,17 @@ optimizations.extend([
   (('fne', 'a(is_not_zero)', 0.0), True),
   (('feq', 'a(is_not_zero)', 0.0), False),

-   (('fge', 'a(is_not_negative)', 'b(is_not_positive)'), True),
+   # The results expecting true, must be marked imprecise.  The results
+   # expecting false are fine because NaN compared >= or < anything is false.
+
+   (('~fge', 'a(is_not_negative)', 'b(is_not_positive)'), True),
   (('fge',  'b(is_not_positive)', 'a(is_gt_zero)'),      False),
   (('fge',  'a(is_lt_zero)',      'b(is_not_negative)'), False),
-   (('fge', 'b(is_not_negative)', 'a(is_not_positive)'), True),
+   (('~fge', 'b(is_not_negative)', 'a(is_not_positive)'), True),

   (('flt',  'a(is_not_negative)', 'b(is_not_positive)'), False),
-   (('flt', 'b(is_not_positive)', 'a(is_gt_zero)'),      True),
-   (('flt', 'a(is_lt_zero)',      'b(is_not_negative)'), True),
+   (('~flt', 'b(is_not_positive)', 'a(is_gt_zero)'),      True),
+   (('~flt', 'a(is_lt_zero)',      'b(is_not_negative)'), True),
   (('flt',  'b(is_not_negative)', 'a(is_not_positive)'), False),

   (('ine', 'a(is_not_zero)', 0), True),
@@ -1316,7 +1320,7 @@ def bitfield_reverse(u):

    return step5

-optimizations += [(bitfield_reverse('x@32'), ('bitfield_reverse', 'x'))]
+optimizations += [(bitfield_reverse('x@32'), ('bitfield_reverse', 'x'), '!options->lower_bitfield_reverse')]

 # For any float comparison operation, "cmp", if you have "a == a && a cmp b"
 # then the "a == a" is redundant because it's equivalent to "a is not NaN"

--- a/src/compiler/nir/nir_opt_loop_unroll.c
+++ b/src/compiler/nir/nir_opt_loop_unroll.c
@@ -560,31 +560,7 @@ wrapper_unroll(nir_loop *loop)
           nir_after_block(nir_if_last_else_block(terminator->nif));
      }
   } else {
-      nir_block *blk_after_loop =
-         nir_cursor_current_block(nir_after_cf_node(&loop->cf_node));
-
-      /* There may still be some single src phis following the loop that
-       * have not yet been cleaned up by another pass. Tidy those up
-       * before unrolling the loop.
-       */
-      nir_foreach_instr_safe(instr, blk_after_loop) {
-         if (instr->type != nir_instr_type_phi)
-            break;
-
-         nir_phi_instr *phi = nir_instr_as_phi(instr);
-         assert(exec_list_length(&phi->srcs) == 1);
-
-         nir_phi_src *phi_src =
-            exec_node_data(nir_phi_src, exec_list_get_head(&phi->srcs), node);
-
-         nir_ssa_def_rewrite_uses(&phi->dest.ssa, phi_src->src);
-         nir_instr_remove(instr);
-      }
-
-      /* Remove break at end of the loop */
-      nir_block *last_loop_blk = nir_loop_last_block(loop);
-      nir_instr *break_instr = nir_block_last_instr(last_loop_blk);
-      nir_instr_remove(break_instr);
+      loop_prepare_for_unroll(loop);
   }

   /* Pluck out the loop body. */

--- a/src/compiler/nir/nir_range_analysis.c
+++ b/src/compiler/nir/nir_range_analysis.c
@@ -31,12 +31,6 @@
 * the result.
 */

-static bool
-is_not_zero(enum ssa_ranges r)
-{
-   return r == gt_zero || r == lt_zero || r == ne_zero;
-}
-
 static void *
 pack_data(const struct ssa_result_range r)
 {
@@ -253,9 +247,15 @@ analyze_expression(const nir_alu_instr *instr, unsigned src,
    *        | lt_zero + lt_zero
    *        ;
    *
+    * ne_zero: eq_zero + ne_zero
+    *        | ne_zero + eq_zero   # Addition is commutative
+    *        ;
+    *
    * eq_zero: eq_zero + eq_zero
+    *        ;
    *
-    * All other cases are 'unknown'.
+    * All other cases are 'unknown'.  The seeming odd entry is (ne_zero,
+    * ne_zero), but that could be (-5, +5) which is not ne_zero.
    */
   static const enum ssa_ranges fadd_table[last_range + 1][last_range + 1] = {
      /* left\right   unknown  lt_zero  le_zero  gt_zero  ge_zero  ne_zero  eq_zero */
@@ -264,14 +264,17 @@ analyze_expression(const nir_alu_instr *instr, unsigned src,
      /* le_zero */ { _______, lt_zero, le_zero, _______, _______, _______, le_zero },
      /* gt_zero */ { _______, _______, _______, gt_zero, gt_zero, _______, gt_zero },
      /* ge_zero */ { _______, _______, _______, gt_zero, ge_zero, _______, ge_zero },
-      /* ne_zero */ { _______, _______, _______, _______, _______, ne_zero, ne_zero },
+      /* ne_zero */ { _______, _______, _______, _______, _______, _______, ne_zero },
      /* eq_zero */ { _______, lt_zero, le_zero, gt_zero, ge_zero, ne_zero, eq_zero },
   };

   ASSERT_TABLE_IS_COMMUTATIVE(fadd_table);
-   ASSERT_TABLE_IS_DIAGONAL(fadd_table);

-   /* ge_zero: ge_zero * ge_zero
+   /* Due to flush-to-zero semanatics of floating-point numbers with very
+    * small mangnitudes, we can never really be sure a result will be
+    * non-zero.
+    *
+    * ge_zero: ge_zero * ge_zero
    *        | ge_zero * gt_zero
    *        | ge_zero * eq_zero
    *        | le_zero * lt_zero
@@ -280,9 +283,7 @@ analyze_expression(const nir_alu_instr *instr, unsigned src,
    *        | gt_zero * ge_zero  # Multiplication is commutative
    *        | eq_zero * ge_zero  # Multiplication is commutative
    *        | a * a              # Left source == right source
-    *        ;
-    *
-    * gt_zero: gt_zero * gt_zero
+    *        | gt_zero * gt_zero
    *        | lt_zero * lt_zero
    *        ;
    *
@@ -291,19 +292,10 @@ analyze_expression(const nir_alu_instr *instr, unsigned src,
    *        | lt_zero * ge_zero  # Multiplication is commutative
    *        | le_zero * ge_zero  # Multiplication is commutative
    *        | le_zero * gt_zero
-    *        ;
-    *
-    * lt_zero: lt_zero * gt_zero
+    *        | lt_zero * gt_zero
    *        | gt_zero * lt_zero  # Multiplication is commutative
    *        ;
    *
-    * ne_zero: ne_zero * gt_zero
-    *        | ne_zero * lt_zero
-    *        | gt_zero * ne_zero  # Multiplication is commutative
-    *        | lt_zero * ne_zero  # Multiplication is commutative
-    *        | ne_zero * ne_zero
-    *        ;
-    *
    * eq_zero: eq_zero * <any>
    *          <any> * eq_zero    # Multiplication is commutative
    *
@@ -312,11 +304,11 @@ analyze_expression(const nir_alu_instr *instr, unsigned src,
   static const enum ssa_ranges fmul_table[last_range + 1][last_range + 1] = {
      /* left\right   unknown  lt_zero  le_zero  gt_zero  ge_zero  ne_zero  eq_zero */
      /* unknown */ { _______, _______, _______, _______, _______, _______, eq_zero },
-      /* lt_zero */ { _______, gt_zero, ge_zero, lt_zero, le_zero, ne_zero, eq_zero },
+      /* lt_zero */ { _______, ge_zero, ge_zero, le_zero, le_zero, _______, eq_zero },
      /* le_zero */ { _______, ge_zero, ge_zero, le_zero, le_zero, _______, eq_zero },
-      /* gt_zero */ { _______, lt_zero, le_zero, gt_zero, ge_zero, ne_zero, eq_zero },
+      /* gt_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
      /* ge_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
-      /* ne_zero */ { _______, ne_zero, _______, ne_zero, _______, ne_zero, eq_zero },
+      /* ne_zero */ { _______, _______, _______, _______, _______, _______, eq_zero },
      /* eq_zero */ { eq_zero, eq_zero, eq_zero, eq_zero, eq_zero, eq_zero, eq_zero }
   };

@@ -453,9 +445,21 @@ analyze_expression(const nir_alu_instr *instr, unsigned src,
      break;
   }

-   case nir_op_fexp2:
-      r = (struct ssa_result_range){gt_zero, analyze_expression(alu, 0, ht).is_integral};
+   case nir_op_fexp2: {
+      /* If the parameter might be less than zero, the mathematically result
+       * will be on (0, 1).  For sufficiently large magnitude negative
+       * parameters, the result will flush to zero.
+       */
+      static const enum ssa_ranges table[last_range + 1] = {
+      /* unknown  lt_zero  le_zero  gt_zero  ge_zero  ne_zero  eq_zero */
+         ge_zero, ge_zero, ge_zero, gt_zero, gt_zero, ge_zero, gt_zero
+      };
+
+      r = analyze_expression(alu, 0, ht);
+
+      r.range = table[r.range];
      break;
+   }

   case nir_op_fmax: {
      const struct ssa_result_range left = analyze_expression(alu, 0, ht);
@@ -589,11 +593,13 @@ analyze_expression(const nir_alu_instr *instr, unsigned src,

      /* x * x => ge_zero */
      if (left.range != eq_zero && nir_alu_srcs_equal(alu, alu, 0, 1)) {
-         /* x * x => ge_zero or gt_zero depending on the range of x. */
-         r.range = is_not_zero(left.range) ? gt_zero : ge_zero;
+         /* Even if x > 0, the result of x*x can be zero when x is, for
+          * example, a subnormal number.
+          */
+         r.range = ge_zero;
      } else if (left.range != eq_zero && nir_alu_srcs_negative_equal(alu, alu, 0, 1)) {
-         /* -x * x => le_zero or lt_zero depending on the range of x. */
-         r.range = is_not_zero(left.range) ? lt_zero : le_zero;
+         /* -x * x => le_zero. */
+         r.range = le_zero;
      } else
         r.range = fmul_table[left.range][right.range];

@@ -604,9 +610,16 @@ analyze_expression(const nir_alu_instr *instr, unsigned src,
      r = (struct ssa_result_range){analyze_expression(alu, 0, ht).range, false};
      break;

-   case nir_op_mov:
-      r = analyze_expression(alu, 0, ht);
+   case nir_op_mov: {
+      const struct ssa_result_range left = analyze_expression(alu, 0, ht);
+
+      /* See commentary in nir_op_bcsel for the reasons this is necessary. */
+      if (nir_src_is_const(alu->src[0].src) && left.range != eq_zero)
+         return (struct ssa_result_range){unknown, false};
+
+      r = left;
      break;
+   }

   case nir_op_fneg:
      r = analyze_expression(alu, 0, ht);
@@ -720,11 +733,13 @@ analyze_expression(const nir_alu_instr *instr, unsigned src,
      enum ssa_ranges fmul_range;

      if (first.range != eq_zero && nir_alu_srcs_equal(alu, alu, 0, 1)) {
-         /* x * x => ge_zero or gt_zero depending on the range of x. */
-         fmul_range = is_not_zero(first.range) ? gt_zero : ge_zero;
+         /* See handling of nir_op_fmul for explanation of why ge_zero is the
+          * range.
+          */
+         fmul_range = ge_zero;
      } else if (first.range != eq_zero && nir_alu_srcs_negative_equal(alu, alu, 0, 1)) {
-         /* -x * x => le_zero or lt_zero depending on the range of x. */
-         fmul_range = is_not_zero(first.range) ? lt_zero : le_zero;
+         /* -x * x => le_zero */
+         fmul_range = le_zero;
      } else
         fmul_range = fmul_table[first.range][second.range];


--- a/src/egl/main/eglapi.c
+++ b/src/egl/main/eglapi.c
@@ -676,6 +676,10 @@ eglTerminate(EGLDisplay dpy)
      /* do not reset disp->Driver */
      disp->ClientAPIsString[0] = 0;
      disp->Initialized = EGL_FALSE;
+
+      /* Reset blob cache funcs on terminate. */
+      disp->BlobCacheSet = NULL;
+      disp->BlobCacheGet = NULL;
   }

   RETURN_EGL_SUCCESS(disp, EGL_TRUE);

--- a/src/gallium/auxiliary/driver_ddebug/dd_screen.c
+++ b/src/gallium/auxiliary/driver_ddebug/dd_screen.c
@@ -311,6 +311,18 @@ dd_screen_resource_get_handle(struct pipe_screen *_screen,
   return screen->resource_get_handle(screen, pipe, resource, handle, usage);
 }

+static bool
+dd_screen_resource_get_param(struct pipe_screen *_screen,
+                             struct pipe_resource *resource,
+                             unsigned int plane,
+                             enum pipe_resource_param param,
+                             uint64_t *value)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+
+   return screen->resource_get_param(screen, resource, plane, param, value);
+}
+
 static void
 dd_screen_resource_get_info(struct pipe_screen *_screen,
                            struct pipe_resource *resource,
@@ -565,6 +577,7 @@ ddebug_screen_create(struct pipe_screen *screen)
   SCR_INIT(resource_from_user_memory);
   SCR_INIT(check_resource_capability);
   dscreen->base.resource_get_handle = dd_screen_resource_get_handle;
+   SCR_INIT(resource_get_param);
   SCR_INIT(resource_get_info);
   SCR_INIT(resource_changed);
   dscreen->base.resource_destroy = dd_screen_resource_destroy;

--- a/src/gallium/auxiliary/driver_noop/noop_pipe.c
+++ b/src/gallium/auxiliary/driver_noop/noop_pipe.c
@@ -156,6 +156,27 @@ static bool noop_resource_get_handle(struct pipe_screen *pscreen,
   return result;
 }

+static bool noop_resource_get_param(struct pipe_screen *pscreen,
+                                    struct pipe_resource *resource,
+                                    unsigned int plane,
+                                    enum pipe_resource_param param,
+                                    uint64_t *value)
+{
+   struct noop_pipe_screen *noop_screen = (struct noop_pipe_screen*)pscreen;
+   struct pipe_screen *screen = noop_screen->oscreen;
+   struct pipe_resource *tex;
+   bool result;
+
+   /* resource_get_param mustn't fail. Just create something and return it. */
+   tex = screen->resource_create(screen, resource);
+   if (!tex)
+      return false;
+
+   result = screen->resource_get_param(screen, tex, 0, param, value);
+   pipe_resource_reference(&tex, NULL);
+   return result;
+}
+
 static void noop_resource_destroy(struct pipe_screen *screen,
                                  struct pipe_resource *resource)
 {
@@ -502,6 +523,8 @@ struct pipe_screen *noop_screen_create(struct pipe_screen *oscreen)
   screen->resource_create = noop_resource_create;
   screen->resource_from_handle = noop_resource_from_handle;
   screen->resource_get_handle = noop_resource_get_handle;
+   if (oscreen->resource_get_param)
+      screen->resource_get_param = noop_resource_get_param;
   screen->resource_destroy = noop_resource_destroy;
   screen->flush_frontbuffer = noop_flush_frontbuffer;
   screen->get_timestamp = noop_get_timestamp;

--- a/src/gallium/auxiliary/driver_rbug/rbug_screen.c
+++ b/src/gallium/auxiliary/driver_rbug/rbug_screen.c
@@ -215,6 +215,22 @@ rbug_screen_resource_get_handle(struct pipe_screen *_screen,
                                      resource, handle, usage);
 }

+static bool
+rbug_screen_resource_get_param(struct pipe_screen *_screen,
+                               struct pipe_resource *_resource,
+                               unsigned int plane,
+                               enum pipe_resource_param param,
+                               uint64_t *value)
+{
+   struct rbug_screen *rb_screen = rbug_screen(_screen);
+   struct rbug_resource *rb_resource = rbug_resource(_resource);
+   struct pipe_screen *screen = rb_screen->screen;
+   struct pipe_resource *resource = rb_resource->resource;
+
+   return screen->resource_get_param(screen, resource, plane, param, value);
+}
+
+
 static void
 rbug_screen_resource_get_info(struct pipe_screen *_screen,
                              struct pipe_resource *_resource,
@@ -333,6 +349,7 @@ rbug_screen_create(struct pipe_screen *screen)
   rb_screen->base.resource_from_handle = rbug_screen_resource_from_handle;
   SCR_INIT(check_resource_capability);
   rb_screen->base.resource_get_handle = rbug_screen_resource_get_handle;
+   SCR_INIT(resource_get_param);
   SCR_INIT(resource_get_info);
   SCR_INIT(resource_changed);
   rb_screen->base.resource_destroy = rbug_screen_resource_destroy;

--- a/src/gallium/auxiliary/driver_trace/tr_screen.c
+++ b/src/gallium/auxiliary/driver_trace/tr_screen.c
@@ -407,6 +407,21 @@ trace_screen_resource_get_handle(struct pipe_screen *_screen,
                                      resource, handle, usage);
 }

+static bool
+trace_screen_resource_get_param(struct pipe_screen *_screen,
+                                struct pipe_resource *resource,
+                                unsigned int plane,
+                                enum pipe_resource_param param,
+                                uint64_t *value)
+{
+   struct trace_screen *tr_screen = trace_screen(_screen);
+   struct pipe_screen *screen = tr_screen->screen;
+
+   /* TODO trace call */
+
+   return screen->resource_get_param(screen, resource, plane, param, value);
+}
+
 static void
 trace_screen_resource_get_info(struct pipe_screen *_screen,
                               struct pipe_resource *resource,
@@ -687,6 +702,7 @@ trace_screen_create(struct pipe_screen *screen)
   tr_scr->base.resource_from_handle = trace_screen_resource_from_handle;
   SCR_INIT(check_resource_capability);
   tr_scr->base.resource_get_handle = trace_screen_resource_get_handle;
+   SCR_INIT(resource_get_param);
   SCR_INIT(resource_get_info);
   SCR_INIT(resource_from_memobj);
   SCR_INIT(resource_changed);