...
 
Commits (38)
......@@ -11,4 +11,7 @@ b031c643491a92a5574c7a4bd659df33f2d89bb6
# These were manually rebased by Jason, thanks!
8ab95b849e66f3221d80a67eef2ec6e3730901a8
5c30fffeec1732c21d600c036f95f8cdb1bb5487
\ No newline at end of file
5c30fffeec1732c21d600c036f95f8cdb1bb5487
# This doesn't actually appliy to 19.0
29179f58c6ba8099859ea25900214dbbd3814a92
\ No newline at end of file
......@@ -35,7 +35,11 @@ def main():
args = parser.parse_args()
if os.path.isabs(args.libdir):
to = os.path.join(os.environ.get('DESTDIR', '/'), args.libdir[1:])
destdir = os.environ.get('DESTDIR')
if destdir:
to = os.path.join(destdir, args.libdir[1:])
else:
to = args.libdir
else:
to = os.path.join(os.environ['MESON_INSTALL_DESTDIR_PREFIX'], args.libdir)
......
mesa (19.0.1-2) UNRELEASED; urgency=medium
* control: Bump libdrm-dev build-dep, amdgpu needs 2.4.97.
-- Timo Aaltonen <tjaalton@debian.org> Wed, 10 Apr 2019 12:58:36 +0300
mesa (19.0.1-1) experimental; urgency=medium
* New upstream release.
-- Timo Aaltonen <tjaalton@debian.org> Thu, 28 Mar 2019 15:38:58 +0200
mesa (19.0.0-1) experimental; urgency=medium
* New upstream release.
......
......@@ -9,7 +9,7 @@ Build-Depends:
meson (>= 0.45),
quilt (>= 0.63-8.2~),
pkg-config,
libdrm-dev (>= 2.4.95) [!hurd-any],
libdrm-dev (>= 2.4.97) [!hurd-any],
libx11-dev,
libxxf86vm-dev,
libexpat1-dev,
......
......@@ -32,7 +32,8 @@ Compatibility contexts may report a lower version depending on each driver.
<h2>SHA256 checksums</h2>
<pre>
TBD.
4c5b9c5227d37c1f6bdc786a6fa7ee7fbce40b2e8a87340c7d3234534ece3304 mesa-19.0.0.tar.gz
5a549dfb40ec31e5c36c47aadac04554cb2e2a8d144a046a378fc16da57e38f8 mesa-19.0.0.tar.xz
</pre>
......
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html lang="en">
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<title>Mesa Release Notes</title>
<link rel="stylesheet" type="text/css" href="../mesa.css">
</head>
<body>
<div class="header">
<h1>The Mesa 3D Graphics Library</h1>
</div>
<iframe src="../contents.html"></iframe>
<div class="content">
<h1>Mesa 19.0.1 Release Notes / March 27, 2019</h1>
<p>
Mesa 19.0.1 is a bug fix release which fixes bugs found since the 19.0.0 release.
</p>
<p>
Mesa 19.0.1 implements the OpenGL 4.5 API, but the version reported by
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
Some drivers don't support all the features required in OpenGL 4.5. OpenGL
4.5 is <strong>only</strong> available if requested at context creation.
Compatibility contexts may report a lower version depending on each driver.
</p>
<h2>SHA256 checksums</h2>
<pre>
TBD
</pre>
<h2>New features</h2>
<p>None</p>
<h2>Bug fixes</h2>
<ul>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100316">Bug 100316</a> - Linking GLSL 1.30 shaders with invariant and deprecated variables triggers an 'mismatching invariant qualifiers' error</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107563">Bug 107563</a> - [RADV] Broken rendering in Unity demos</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109698">Bug 109698</a> - dri.pc contents invalid when built with meson</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109980">Bug 109980</a> - [i915 CI][HSW] spec&#64;arb_fragment_shader_interlock&#64;arb_fragment_shader_interlock-image-load-store - fail</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110211">Bug 110211</a> - If DESTDIR is set to an empty string, the dri drivers are not installed</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110221">Bug 110221</a> - build error with meson</li>
</ul>
<h2>Changes</h2>
<p>Andres Gomez (4):</p>
<ul>
<li>glsl: correctly validate component layout qualifier for dvec{3,4}</li>
<li>glsl/linker: don't fail non static used inputs without matching outputs</li>
<li>glsl/linker: simplify xfb_offset vs xfb_stride overflow check</li>
<li>Revert "glsl: relax input-&gt;output validation for SSO programs"</li>
</ul>
<p>Bas Nieuwenhuizen (2):</p>
<ul>
<li>radv: Use correct image view comparison for fast clears.</li>
<li>ac/nir: Return frag_coord as integer.</li>
</ul>
<p>Danylo Piliaiev (2):</p>
<ul>
<li>anv: Treat zero size XFB buffer as disabled</li>
<li>glsl: Cross validate variable's invariance by explicit invariance only</li>
</ul>
<p>Dave Airlie (1):</p>
<ul>
<li>softpipe: fix texture view crashes</li>
</ul>
<p>Dylan Baker (5):</p>
<ul>
<li>docs: Add SHA256 sums for 19.0.0</li>
<li>cherry-ignore: Add commit that doesn't apply</li>
<li>bin/install_megadrivers.py: Correctly handle DESTDIR=''</li>
<li>bin/install_megadrivers.py: Fix regression for set DESTDIR</li>
<li>bump version for 19.0.1</li>
</ul>
<p>Eric Anholt (1):</p>
<ul>
<li>v3d: Fix leak of the renderonly struct on screen destruction.</li>
</ul>
<p>Jason Ekstrand (6):</p>
<ul>
<li>glsl/lower_vector_derefs: Don't use a temporary for TCS outputs</li>
<li>glsl/list: Add a list variant of insert_after</li>
<li>anv/pass: Flag the need for a RT flush for resolve attachments</li>
<li>nir/builder: Add a vector extract helper</li>
<li>nir: Add a new pass to lower array dereferences on vectors</li>
<li>intel/nir: Lower array-deref-of-vector UBO and SSBO loads</li>
</ul>
<p>Józef Kucia (2):</p>
<ul>
<li>radv: Fix driverUUID</li>
<li>mesa: Fix GL_NUM_DEVICE_UUIDS_EXT</li>
</ul>
<p>Kenneth Graunke (1):</p>
<ul>
<li>intel/fs: Fix opt_peephole_csel to not throw away saturates.</li>
</ul>
<p>Kevin Strasser (1):</p>
<ul>
<li>egl/dri: Avoid out of bounds array access</li>
</ul>
<p>Mark Janes (1):</p>
<ul>
<li>mesa: properly report the length of truncated log messages</li>
</ul>
<p>Plamena Manolova (1):</p>
<ul>
<li>i965: Disable ARB_fragment_shader_interlock for platforms prior to GEN9</li>
</ul>
<p>Samuel Pitoiset (3):</p>
<ul>
<li>radv: set the maximum number of IBs per submit to 192</li>
<li>radv: always initialize HTILE when the src layout is UNDEFINED</li>
<li>radv: fix binding transform feedback buffers</li>
</ul>
<p>Sergii Romantsov (1):</p>
<ul>
<li>d3d: meson: do not prefix user provided d3d-drivers-path</li>
</ul>
<p>Tapani Pälli (2):</p>
<ul>
<li>isl: fix automake build when sse41 is not supported</li>
<li>anv/radv: release memory allocated by glsl types during spirv_to_nir</li>
</ul>
</div>
</body>
</html>
......@@ -608,7 +608,7 @@ with_gallium_xa = _xa != 'false'
d3d_drivers_path = get_option('d3d-drivers-path')
if d3d_drivers_path == ''
d3d_drivers_path = join_paths(get_option('libdir'), 'd3d')
d3d_drivers_path = join_paths(get_option('prefix'), get_option('libdir'), 'd3d')
endif
with_gallium_st_nine = get_option('gallium-nine')
......
......@@ -3093,7 +3093,8 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
ctx->abi->frag_pos[2],
ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, ctx->abi->frag_pos[3])
};
result = ac_build_gather_values(&ctx->ac, values, 4);
result = ac_to_integer(&ctx->ac,
ac_build_gather_values(&ctx->ac, values, 4));
break;
}
case nir_intrinsic_load_front_face:
......
......@@ -4406,8 +4406,7 @@ static void radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffe
if (!radv_image_has_htile(image))
return;
if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED &&
radv_layout_has_htile(image, dst_layout, dst_queue_mask)) {
if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
/* TODO: merge with the clear if applicable */
radv_initialize_htile(cmd_buffer, image, range, 0);
} else if (!radv_layout_is_htile_compressed(image, src_layout, src_queue_mask) &&
......@@ -4906,7 +4905,7 @@ void radv_CmdBindTransformFeedbackBuffersEXT(
enabled_mask |= 1 << idx;
}
cmd_buffer->state.streamout.enabled_mask = enabled_mask;
cmd_buffer->state.streamout.enabled_mask |= enabled_mask;
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_BUFFER;
}
......
......@@ -48,6 +48,7 @@
#include "util/build_id.h"
#include "util/debug.h"
#include "util/mesa-sha1.h"
#include "compiler/glsl_types.h"
static int
radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
......@@ -337,7 +338,7 @@ radv_physical_device_init(struct radv_physical_device *device,
device->rad_info.chip_class > GFX9)
fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
radv_get_driver_uuid(&device->device_uuid);
radv_get_driver_uuid(&device->driver_uuid);
radv_get_device_uuid(&device->rad_info, &device->device_uuid);
if (device->rad_info.family == CHIP_STONEY ||
......@@ -610,6 +611,7 @@ void radv_DestroyInstance(
VG(VALGRIND_DESTROY_MEMPOOL(instance));
_mesa_glsl_release_types();
_mesa_locale_fini();
vk_debug_report_instance_destroy(&instance->debug_report_callbacks);
......@@ -2794,7 +2796,7 @@ VkResult radv_QueueSubmit(
struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
struct radeon_winsys_ctx *ctx = queue->hw_ctx;
int ret;
uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
uint32_t max_cs_submission = queue->device->trace_bo ? 1 : RADV_MAX_IBS_PER_SUBMIT;
uint32_t scratch_size = 0;
uint32_t compute_scratch_size = 0;
uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
......
......@@ -651,7 +651,7 @@ static bool depth_view_can_fast_clear(struct radv_cmd_buffer *cmd_buffer,
iview->base_mip == 0 &&
iview->base_layer == 0 &&
radv_layout_is_htile_compressed(iview->image, layout, queue_mask) &&
!radv_image_extent_compare(iview->image, &iview->extent))
radv_image_extent_compare(iview->image, &iview->extent))
return true;
return false;
}
......
......@@ -29,6 +29,13 @@
#ifndef RADV_AMDGPU_WINSYS_PUBLIC_H
#define RADV_AMDGPU_WINSYS_PUBLIC_H
/* The number of IBs per submit isn't infinite, it depends on the ring type
* (ie. some initial setup needed for a submit) and the number of IBs (4 DW).
* This limit is arbitrary but should be safe for now. Ideally, we should get
* this limit from the KMD.
*/
#define RADV_MAX_IBS_PER_SUBMIT 192
struct radeon_winsys *radv_amdgpu_winsys_create(int fd, uint64_t debug_flags,
uint64_t perftest_flags);
......
......@@ -229,6 +229,7 @@ NIR_FILES = \
nir/nir_lower_alpha_test.c \
nir/nir_lower_alu.c \
nir/nir_lower_alu_to_scalar.c \
nir/nir_lower_array_deref_of_vec.c \
nir/nir_lower_atomics_to_ssbo.c \
nir/nir_lower_bitmap.c \
nir/nir_lower_bit_size.c \
......
......@@ -3698,6 +3698,10 @@ apply_layout_qualifier_to_variable(const struct ast_type_qualifier *qual,
"cannot be applied to a matrix, a structure, "
"a block, or an array containing any of "
"these.");
} else if (components > 4 && type->is_64bit()) {
_mesa_glsl_error(loc, state, "component layout qualifier "
"cannot be applied to dvec%u.",
components / 2);
} else if (qual_component != 0 &&
(qual_component + components - 1) > 3) {
_mesa_glsl_error(loc, state, "component overflow (%u > 3)",
......@@ -3940,7 +3944,8 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
"`invariant' after being used",
var->name);
} else {
var->data.invariant = 1;
var->data.explicit_invariant = true;
var->data.invariant = true;
}
}
......@@ -4148,8 +4153,10 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
}
}
if (state->all_invariant && var->data.mode == ir_var_shader_out)
if (state->all_invariant && var->data.mode == ir_var_shader_out) {
var->data.explicit_invariant = true;
var->data.invariant = true;
}
var->data.interpolation =
interpret_interpolation_qualifier(qual, var->type,
......@@ -4857,6 +4864,7 @@ ast_declarator_list::hir(exec_list *instructions,
"`invariant' after being used",
earlier->name);
} else {
earlier->data.explicit_invariant = true;
earlier->data.invariant = true;
}
}
......
......@@ -1734,6 +1734,7 @@ ir_variable::ir_variable(const struct glsl_type *type, const char *name,
this->data.centroid = false;
this->data.sample = false;
this->data.patch = false;
this->data.explicit_invariant = false;
this->data.invariant = false;
this->data.how_declared = ir_var_declared_normally;
this->data.mode = mode;
......
......@@ -657,6 +657,19 @@ public:
unsigned centroid:1;
unsigned sample:1;
unsigned patch:1;
/**
* Was an 'invariant' qualifier explicitly set in the shader?
*
* This is used to cross validate qualifiers.
*/
unsigned explicit_invariant:1;
/**
* Is the variable invariant?
*
* It can happen either by having the 'invariant' qualifier
* explicitly set in the shader or by being used in calculations
* of other invariant variables.
*/
unsigned invariant:1;
unsigned precise:1;
......
......@@ -199,6 +199,7 @@ void ir_print_visitor::visit(ir_variable *ir)
const char *const samp = (ir->data.sample) ? "sample " : "";
const char *const patc = (ir->data.patch) ? "patch " : "";
const char *const inv = (ir->data.invariant) ? "invariant " : "";
const char *const explicit_inv = (ir->data.explicit_invariant) ? "explicit_invariant " : "";
const char *const prec = (ir->data.precise) ? "precise " : "";
const char *const bindless = (ir->data.bindless) ? "bindless " : "";
const char *const bound = (ir->data.bound) ? "bound " : "";
......@@ -215,11 +216,11 @@ void ir_print_visitor::visit(ir_variable *ir)
const char *const interp[] = { "", "smooth", "flat", "noperspective" };
STATIC_ASSERT(ARRAY_SIZE(interp) == INTERP_MODE_COUNT);
fprintf(f, "(%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s) ",
fprintf(f, "(%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s) ",
binding, loc, component, cent, bindless, bound,
image_format, memory_read_only, memory_write_only,
memory_coherent, memory_volatile, memory_restrict,
samp, patc, inv, prec, mode[ir->data.mode],
samp, patc, inv, explicit_inv, prec, mode[ir->data.mode],
stream,
interp[ir->data.interpolation]);
......
......@@ -419,8 +419,10 @@ ir_reader::read_declaration(s_expression *expr)
var->data.sample = 1;
} else if (strcmp(qualifier->value(), "patch") == 0) {
var->data.patch = 1;
} else if (strcmp(qualifier->value(), "explicit_invariant") == 0) {
var->data.explicit_invariant = true;
} else if (strcmp(qualifier->value(), "invariant") == 0) {
var->data.invariant = 1;
var->data.invariant = true;
} else if (strcmp(qualifier->value(), "uniform") == 0) {
var->data.mode = ir_var_uniform;
} else if (strcmp(qualifier->value(), "shader_storage") == 0) {
......
......@@ -309,16 +309,16 @@ cross_validate_types_and_qualifiers(struct gl_context *ctx,
* "The invariance of varyings that are declared in both the vertex
* and fragment shaders must match."
*/
if (input->data.invariant != output->data.invariant &&
if (input->data.explicit_invariant != output->data.explicit_invariant &&
prog->data->Version < (prog->IsES ? 300 : 430)) {
linker_error(prog,
"%s shader output `%s' %s invariant qualifier, "
"but %s shader input %s invariant qualifier\n",
_mesa_shader_stage_to_string(producer_stage),
output->name,
(output->data.invariant) ? "has" : "lacks",
(output->data.explicit_invariant) ? "has" : "lacks",
_mesa_shader_stage_to_string(consumer_stage),
(input->data.invariant) ? "has" : "lacks");
(input->data.explicit_invariant) ? "has" : "lacks");
return;
}
......@@ -773,8 +773,20 @@ cross_validate_outputs_to_inputs(struct gl_context *ctx,
output = explicit_locations[idx][input->data.location_frac].var;
if (output == NULL ||
input->data.location != output->data.location) {
if (output == NULL) {
/* A linker failure should only happen when there is no
* output declaration and there is Static Use of the
* declared input.
*/
if (input->data.used) {
linker_error(prog,
"%s shader input `%s' with explicit location "
"has no matching output\n",
_mesa_shader_stage_to_string(consumer->Stage),
input->name);
break;
}
} else if (input->data.location != output->data.location) {
linker_error(prog,
"%s shader input `%s' with explicit location "
"has no matching output\n",
......@@ -804,7 +816,7 @@ cross_validate_outputs_to_inputs(struct gl_context *ctx,
*/
assert(!input->data.assigned);
if (input->data.used && !input->get_interface_type() &&
!input->data.explicit_location && !prog->SeparateShader)
!input->data.explicit_location)
linker_error(prog,
"%s shader input `%s' "
"has no matching output in the previous stage\n",
......@@ -1166,8 +1178,7 @@ tfeedback_decl::store(struct gl_context *ctx, struct gl_shader_program *prog,
return false;
}
if ((this->offset / 4) / info->Buffers[buffer].Stride !=
(xfb_offset - 1) / info->Buffers[buffer].Stride) {
if (xfb_offset > info->Buffers[buffer].Stride) {
linker_error(prog, "xfb_offset (%d) overflows xfb_stride (%d) for "
"buffer (%d)", xfb_offset * 4,
info->Buffers[buffer].Stride * 4, buffer);
......
......@@ -1090,7 +1090,7 @@ cross_validate_globals(struct gl_context *ctx, struct gl_shader_program *prog,
}
}
if (existing->data.invariant != var->data.invariant) {
if (existing->data.explicit_invariant != var->data.explicit_invariant) {
linker_error(prog, "declarations for %s `%s' have "
"mismatching invariant qualifiers\n",
mode_string(var), var->name);
......
......@@ -81,6 +81,12 @@ struct exec_node {
* Insert a node in the list after the current node
*/
void insert_after(exec_node *after);
/**
* Insert another list in the list after the current node
*/
void insert_after(struct exec_list *after);
/**
* Insert a node in the list before the current node
*/
......@@ -507,6 +513,21 @@ exec_list_append(struct exec_list *list, struct exec_list *source)
exec_list_make_empty(source);
}
static inline void
exec_node_insert_list_after(struct exec_node *n, struct exec_list *after)
{
if (exec_list_is_empty(after))
return;
after->tail_sentinel.prev->next = n->next;
after->head_sentinel.next->prev = n;
n->next->prev = after->tail_sentinel.prev;
n->next = after->head_sentinel.next;
exec_list_make_empty(after);
}
static inline void
exec_list_prepend(struct exec_list *list, struct exec_list *source)
{
......@@ -635,6 +656,11 @@ inline void exec_list::append_list(exec_list *source)
exec_list_append(this, source);
}
inline void exec_node::insert_after(exec_list *after)
{
exec_node_insert_list_after(this, after);
}
inline void exec_list::prepend_list(exec_list *source)
{
exec_list_prepend(this, source);
......
......@@ -32,8 +32,9 @@ namespace {
class vector_deref_visitor : public ir_rvalue_enter_visitor {
public:
vector_deref_visitor()
: progress(false)
vector_deref_visitor(void *mem_ctx, gl_shader_stage shader_stage)
: progress(false), shader_stage(shader_stage),
factory(&factory_instructions, mem_ctx)
{
}
......@@ -45,6 +46,9 @@ public:
virtual ir_visitor_status visit_enter(ir_assignment *ir);
bool progress;
gl_shader_stage shader_stage;
exec_list factory_instructions;
ir_factory factory;
};
} /* anonymous namespace */
......@@ -65,13 +69,63 @@ vector_deref_visitor::visit_enter(ir_assignment *ir)
ir_constant *old_index_constant =
deref->array_index->constant_expression_value(mem_ctx);
if (!old_index_constant) {
ir->rhs = new(mem_ctx) ir_expression(ir_triop_vector_insert,
new_lhs->type,
new_lhs->clone(mem_ctx, NULL),
ir->rhs,
deref->array_index);
ir->write_mask = (1 << new_lhs->type->vector_elements) - 1;
ir->set_lhs(new_lhs);
if (shader_stage == MESA_SHADER_TESS_CTRL &&
deref->variable_referenced()->data.mode == ir_var_shader_out) {
/* Tessellation control shader outputs act as if they have memory
* backing them and if we have writes from multiple threads
* targeting the same vec4 (this can happen for patch outputs), the
* load-vec-store pattern of ir_triop_vector_insert doesn't work.
* Instead, we have to lower to a series of conditional write-masked
* assignments.
*/
ir_variable *const src_temp =
factory.make_temp(ir->rhs->type, "scalar_tmp");
/* The newly created variable declaration goes before the assignment
* because we're going to set it as the new LHS.
*/
ir->insert_before(factory.instructions);
ir->set_lhs(new(mem_ctx) ir_dereference_variable(src_temp));
ir_variable *const arr_index =
factory.make_temp(deref->array_index->type, "index_tmp");
factory.emit(assign(arr_index, deref->array_index));
for (unsigned i = 0; i < new_lhs->type->vector_elements; i++) {
ir_constant *const cmp_index =
ir_constant::zero(factory.mem_ctx, deref->array_index->type);
cmp_index->value.u[0] = i;
ir_rvalue *const lhs_clone = new_lhs->clone(factory.mem_ctx, NULL);
ir_dereference_variable *const src_temp_deref =
new(mem_ctx) ir_dereference_variable(src_temp);
if (new_lhs->ir_type != ir_type_swizzle) {
assert(lhs_clone->as_dereference());
ir_assignment *cond_assign =
new(mem_ctx) ir_assignment(lhs_clone->as_dereference(),
src_temp_deref,
equal(arr_index, cmp_index),
WRITEMASK_X << i);
factory.emit(cond_assign);
} else {
ir_assignment *cond_assign =
new(mem_ctx) ir_assignment(swizzle(lhs_clone, i, 1),
src_temp_deref,
equal(arr_index, cmp_index));
factory.emit(cond_assign);
}
}
ir->insert_after(factory.instructions);
} else {
ir->rhs = new(mem_ctx) ir_expression(ir_triop_vector_insert,
new_lhs->type,
new_lhs->clone(mem_ctx, NULL),
ir->rhs,
deref->array_index);
ir->write_mask = (1 << new_lhs->type->vector_elements) - 1;
ir->set_lhs(new_lhs);
}
} else if (new_lhs->ir_type != ir_type_swizzle) {
ir->set_lhs(new_lhs);
ir->write_mask = 1 << old_index_constant->get_uint_component(0);
......@@ -105,7 +159,7 @@ vector_deref_visitor::handle_rvalue(ir_rvalue **rv)
bool
lower_vector_derefs(gl_linked_shader *shader)
{
vector_deref_visitor v;
vector_deref_visitor v(shader->ir, shader->Stage);
visit_list_elements(&v, shader->ir);
......
......@@ -112,6 +112,7 @@ files_libnir = files(
'nir_lower_alu.c',
'nir_lower_alu_to_scalar.c',
'nir_lower_alpha_test.c',
'nir_lower_array_deref_of_vec.c',
'nir_lower_atomics_to_ssbo.c',
'nir_lower_bitmap.c',
'nir_lower_bool_to_float.c',
......
......@@ -2910,6 +2910,16 @@ void nir_fixup_deref_modes(nir_shader *shader);
bool nir_lower_global_vars_to_local(nir_shader *shader);
typedef enum {
nir_lower_direct_array_deref_of_vec_load = (1 << 0),
nir_lower_indirect_array_deref_of_vec_load = (1 << 1),
nir_lower_direct_array_deref_of_vec_store = (1 << 2),
nir_lower_indirect_array_deref_of_vec_store = (1 << 3),
} nir_lower_array_deref_of_vec_options;
bool nir_lower_array_deref_of_vec(nir_shader *shader, nir_variable_mode modes,
nir_lower_array_deref_of_vec_options options);
bool nir_lower_indirect_derefs(nir_shader *shader, nir_variable_mode modes);
bool nir_lower_locals_to_regs(nir_shader *shader);
......
......@@ -560,6 +560,35 @@ nir_channels(nir_builder *b, nir_ssa_def *def, nir_component_mask_t mask)
return nir_swizzle(b, def, swizzle, num_channels, false);
}
static inline nir_ssa_def *
_nir_vector_extract_helper(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *c,
unsigned start, unsigned end)
{
if (start == end - 1) {
return nir_channel(b, vec, start);
} else {
unsigned mid = start + (end - start) / 2;
return nir_bcsel(b, nir_ilt(b, c, nir_imm_int(b, mid)),
_nir_vector_extract_helper(b, vec, c, start, mid),
_nir_vector_extract_helper(b, vec, c, mid, end));
}
}
static inline nir_ssa_def *
nir_vector_extract(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *c)
{
nir_src c_src = nir_src_for_ssa(c);
if (nir_src_is_const(c_src)) {
unsigned c_const = nir_src_as_uint(c_src);
if (c_const < vec->num_components)
return nir_channel(b, vec, c_const);
else
return nir_ssa_undef(b, 1, vec->bit_size);
} else {
return _nir_vector_extract_helper(b, vec, c, 0, vec->num_components);
}
}
static inline nir_ssa_def *
nir_i2i(nir_builder *build, nir_ssa_def *x, unsigned dest_bit_size)
{
......
/*
* Copyright © 2019 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "nir.h"
#include "nir_builder.h"
static void
build_write_masked_store(nir_builder *b, nir_deref_instr *vec_deref,
nir_ssa_def *value, unsigned component)
{
assert(value->num_components == 1);
unsigned num_components = glsl_get_components(vec_deref->type);
assert(num_components > 1 && num_components <= NIR_MAX_VEC_COMPONENTS);
nir_ssa_def *u = nir_ssa_undef(b, 1, value->bit_size);
nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS];
for (unsigned i = 0; i < num_components; i++)
comps[i] = (i == component) ? value : u;
nir_ssa_def *vec = nir_vec(b, comps, num_components);
nir_store_deref(b, vec_deref, vec, (1u << component));
}
static void
build_write_masked_stores(nir_builder *b, nir_deref_instr *vec_deref,
nir_ssa_def *value, nir_ssa_def *index,
unsigned start, unsigned end)
{
if (start == end - 1) {
build_write_masked_store(b, vec_deref, value, start);
} else {
unsigned mid = start + (end - start) / 2;
nir_push_if(b, nir_ilt(b, index, nir_imm_int(b, mid)));
build_write_masked_stores(b, vec_deref, value, index, start, mid);
nir_push_else(b, NULL);
build_write_masked_stores(b, vec_deref, value, index, mid, end);
nir_pop_if(b, NULL);
}
}
static bool
nir_lower_array_deref_of_vec_impl(nir_function_impl *impl,
nir_variable_mode modes,
nir_lower_array_deref_of_vec_options options)
{
bool progress = false;
nir_builder b;
nir_builder_init(&b, impl);
nir_foreach_block(block, impl) {
nir_foreach_instr_safe(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
assert(intrin->intrinsic != nir_intrinsic_copy_deref);
if (intrin->intrinsic != nir_intrinsic_load_deref &&
intrin->intrinsic != nir_intrinsic_interp_deref_at_centroid &&
intrin->intrinsic != nir_intrinsic_interp_deref_at_sample &&
intrin->intrinsic != nir_intrinsic_interp_deref_at_offset &&
intrin->intrinsic != nir_intrinsic_store_deref)
continue;
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
if (!(deref->mode & modes))
continue;
/* We only care about array derefs that act on vectors */
if (deref->deref_type != nir_deref_type_array)
continue;
nir_deref_instr *vec_deref = nir_deref_instr_parent(deref);
if (!glsl_type_is_vector(vec_deref->type))
continue;
assert(intrin->num_components == 1);
unsigned num_components = glsl_get_components(vec_deref->type);
assert(num_components > 1 && num_components <= NIR_MAX_VEC_COMPONENTS);
b.cursor = nir_after_instr(&intrin->instr);
if (intrin->intrinsic == nir_intrinsic_store_deref) {
assert(intrin->src[1].is_ssa);
nir_ssa_def *value = intrin->src[1].ssa;
if (nir_src_is_const(deref->arr.index)) {
if (!(options & nir_lower_direct_array_deref_of_vec_store))
continue;
unsigned index = nir_src_as_uint(deref->arr.index);
/* If index is OOB, we throw the old store away and don't
* replace it with anything.
*/
if (index < num_components)
build_write_masked_store(&b, vec_deref, value, index);
} else {
if (!(options & nir_lower_indirect_array_deref_of_vec_store))
continue;
nir_ssa_def *index = nir_ssa_for_src(&b, deref->arr.index, 1);
build_write_masked_stores(&b, vec_deref, value, index,
0, num_components);
}
nir_instr_remove(&intrin->instr);
progress = true;
} else {
if (nir_src_is_const(deref->arr.index)) {
if (!(options & nir_lower_direct_array_deref_of_vec_load))
continue;
} else {
if (!(options & nir_lower_indirect_array_deref_of_vec_load))
continue;
}
/* Turn the load into a vector load */
nir_instr_rewrite_src(&intrin->instr, &intrin->src[0],
nir_src_for_ssa(&vec_deref->dest.ssa));
intrin->dest.ssa.num_components = num_components;
intrin->num_components = num_components;
nir_ssa_def *index = nir_ssa_for_src(&b, deref->arr.index, 1);
nir_ssa_def *scalar =
nir_vector_extract(&b, &intrin->dest.ssa, index);
if (scalar->parent_instr->type == nir_instr_type_ssa_undef) {
nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
nir_src_for_ssa(scalar));
nir_instr_remove(&intrin->instr);
} else {
nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa,
nir_src_for_ssa(scalar),
scalar->parent_instr);
}
progress = true;
}
}
}
if (progress) {
nir_metadata_preserve(impl, nir_metadata_block_index |
nir_metadata_dominance);
}
return progress;
}
/* Lowers away array dereferences on vectors
*
* These are allowed on certain variable types such as SSBOs and TCS outputs.
* However, not everyone can actually handle them everywhere. There are also
* cases where we want to lower them for performance reasons.
*
* This patch assumes that copy_deref instructions have already been lowered.
*/
bool
nir_lower_array_deref_of_vec(nir_shader *shader, nir_variable_mode modes,
nir_lower_array_deref_of_vec_options options)
{
bool progress = false;
nir_foreach_function(function, shader) {
if (function->impl &&
nir_lower_array_deref_of_vec_impl(function->impl, modes, options))
progress = true;
}
return progress;
}
......@@ -3045,12 +3045,7 @@ nir_ssa_def *
vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src,
nir_ssa_def *index)
{
nir_ssa_def *dest = vtn_vector_extract(b, src, 0);
for (unsigned i = 1; i < src->num_components; i++)
dest = nir_bcsel(&b->nb, nir_ieq_imm(&b->nb, index, i),
vtn_vector_extract(b, src, i), dest);
return dest;
return nir_vector_extract(&b->nb, src, nir_i2i(&b->nb, index, 32));
}
nir_ssa_def *
......
......@@ -199,8 +199,10 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id,
bind_to_texture_rgb = 0;
bind_to_texture_rgba = 0;
for (int i = 0; dri2_dpy->core->indexConfigAttrib(dri_config, i, &attrib,
&value); ++i) {
for (int i = 0; i < __DRI_ATTRIB_MAX; ++i) {
if (!dri2_dpy->core->indexConfigAttrib(dri_config, i, &attrib, &value))
break;
switch (attrib) {
case __DRI_ATTRIB_RENDER_TYPE:
if (value & __DRI_ATTRIB_RGBA_BIT)
......
......@@ -373,17 +373,18 @@ sp_tile_cache_flush_clear(struct softpipe_tile_cache *tc, int layer)
if (util_format_is_pure_uint(tc->surface->format)) {
pipe_put_tile_ui_format(pt, tc->transfer_map[layer],
x, y, TILE_SIZE, TILE_SIZE,
pt->resource->format,
tc->surface->format,
(unsigned *) tc->tile->data.colorui128);
} else if (util_format_is_pure_sint(tc->surface->format)) {
pipe_put_tile_i_format(pt, tc->transfer_map[layer],
x, y, TILE_SIZE, TILE_SIZE,
pt->resource->format,
tc->surface->format,
(int *) tc->tile->data.colori128);
} else {
pipe_put_tile_rgba(pt, tc->transfer_map[layer],
x, y, TILE_SIZE, TILE_SIZE,
(float *) tc->tile->data.color);
pipe_put_tile_rgba_format(pt, tc->transfer_map[layer],
x, y, TILE_SIZE, TILE_SIZE,
tc->surface->format,
(float *) tc->tile->data.color);
}
}
numCleared++;
......
......@@ -70,6 +70,7 @@ v3d_screen_destroy(struct pipe_screen *pscreen)
util_hash_table_destroy(screen->bo_handles);
v3d_bufmgr_destroy(pscreen);
slab_destroy_parent(&screen->transfer_pool);
free(screen->ro);
if (using_v3d_simulator)
v3d_simulator_destroy(screen);
......
......@@ -68,5 +68,5 @@ pkg.generate(
description : 'Native D3D driver modules',
version : '.'.join(nine_version),
requires_private : 'libdrm >= ' + dep_libdrm.version(),
variables : ['moduledir=${prefix}/@0@'.format(d3d_drivers_path)],
variables : ['moduledir=@0@'.format(d3d_drivers_path)],
)
......@@ -33,12 +33,15 @@ ISL_GEN_LIBS = \
noinst_LTLIBRARIES += $(ISL_GEN_LIBS) \
isl/libisl.la \
libisl_tiled_memcpy.la \
libisl_tiled_memcpy_sse41.la
libisl_tiled_memcpy.la
isl_libisl_la_LIBADD = $(ISL_GEN_LIBS) \
libisl_tiled_memcpy.la \
libisl_tiled_memcpy_sse41.la
libisl_tiled_memcpy.la
if SSE41_SUPPORTED
isl_libisl_la_LIBADD += libisl_tiled_memcpy_sse41.la
noinst_LTLIBRARIES += libisl_tiled_memcpy_sse41.la
endif
isl_libisl_la_SOURCES = $(ISL_FILES) $(ISL_GENERATED_FILES)
......
......@@ -3117,6 +3117,7 @@ fs_visitor::opt_peephole_csel()
if (csel_inst != NULL) {
progress = true;
csel_inst->saturate = inst->saturate;
inst->remove(block);
}
......
......@@ -2100,6 +2100,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
break;
case SHADER_OPCODE_INTERLOCK:
assert(devinfo->gen >= 9);
/* The interlock is basically a memory fence issued via sendc */
brw_memory_fence(p, dst, BRW_OPCODE_SENDC);
break;
......
......@@ -781,6 +781,17 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir)
OPT(brw_nir_lower_mem_access_bit_sizes);
/* Lower array derefs of vectors for SSBO and UBO loads. For both UBOs and
* SSBOs, our back-end is capable of loading an entire vec4 at a time and
* we would like to take advantage of that whenever possible regardless of
* whether or not the app gives us full loads. This should allow the
* optimizer to combine UBO and SSBO load operations and save us some send
* messages.
*/
OPT(nir_lower_array_deref_of_vec,
nir_var_mem_ubo | nir_var_mem_ssbo,
nir_lower_direct_array_deref_of_vec_load);
/* Get rid of split copies */
nir = brw_nir_optimize(nir, compiler, is_scalar, false);
......
......@@ -41,6 +41,7 @@
#include "git_sha1.h"
#include "vk_util.h"
#include "common/gen_defines.h"
#include "compiler/glsl_types.h"
#include "genxml/gen7_pack.h"
......@@ -703,6 +704,7 @@ void anv_DestroyInstance(
vk_debug_report_instance_destroy(&instance->debug_report_callbacks);
_mesa_glsl_release_types();
_mesa_locale_fini();
vk_free(&instance->alloc, instance);
......
......@@ -178,12 +178,28 @@ anv_render_pass_compile(struct anv_render_pass *pass)
* subpasses and checking to see if any of them don't have an external
* dependency. Or, we could just be lazy and add a couple extra flushes.
* We choose to be lazy.
*
* From the documentation for vkCmdNextSubpass:
*
* "Moving to the next subpass automatically performs any multisample
* resolve operations in the subpass being ended. End-of-subpass
* multisample resolves are treated as color attachment writes for the
* purposes of synchronization. This applies to resolve operations for
* both color and depth/stencil attachments. That is, they are
* considered to execute in the
* VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT pipeline stage and
* their writes are synchronized with
* VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT."
*
* Therefore, the above flags concerning color attachments also apply to
* color and depth/stencil resolve attachments.
*/
if (all_usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) {
pass->subpass_flushes[0] |=
ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
}
if (all_usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
if (all_usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
VK_IMAGE_USAGE_TRANSFER_DST_BIT)) {
pass->subpass_flushes[pass->subpass_count] |=
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
}
......
......@@ -2653,7 +2653,7 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_SO_BUFFER), sob) {
sob.SOBufferIndex = idx;
if (cmd_buffer->state.xfb_enabled && xfb->buffer) {
if (cmd_buffer->state.xfb_enabled && xfb->buffer && xfb->size != 0) {
sob.SOBufferEnable = true;
sob.MOCS = cmd_buffer->device->default_mocs,
sob.StreamOffsetWriteEnable = false;
......
......@@ -253,7 +253,6 @@ intelInitExtensions(struct gl_context *ctx)
ctx->Extensions.EXT_shader_samples_identical = true;
ctx->Extensions.OES_primitive_bounding_box = true;
ctx->Extensions.OES_texture_buffer = true;
ctx->Extensions.ARB_fragment_shader_interlock = true;
if (can_do_pipelined_register_writes(brw->screen)) {
ctx->Extensions.ARB_draw_indirect = true;
......@@ -318,6 +317,30 @@ intelInitExtensions(struct gl_context *ctx)
ctx->Extensions.KHR_blend_equation_advanced_coherent = true;
ctx->Extensions.KHR_texture_compression_astc_ldr = true;
ctx->Extensions.KHR_texture_compression_astc_sliced_3d = true;
/*
* From the Skylake PRM Vol. 7 (Memory Fence Message, page 221):
* "A memory fence message issued by a thread causes further messages
* issued by the thread to be blocked until all previous data port
* messages have completed, or the results can be globally observed from
* the point of view of other threads in the system."
*
* From the Haswell PRM Vol. 7 (Memory Fence, page 256):
* "A memory fence message issued by a thread causes further messages
* issued by the thread to be blocked until all previous messages issued
* by the thread to that data port (data cache or render cache) have
* been globally observed from the point of view of other threads in the
* system."
*
* Summarized: For ARB_fragment_shader_interlock to work, we need to
* ensure memory access ordering for all messages to the dataport from
* all threads. Memory fence messages prior to SKL only provide memory
* access ordering for messages from the same thread, so we can only
* support the feature from Gen9 onwards.
*
*/
ctx->Extensions.ARB_fragment_shader_interlock = true;
}
if (gen_device_info_is_9lp(devinfo))
......
......@@ -231,6 +231,9 @@ _mesa_gl_vdebug(struct gl_context *ctx,
_mesa_debug_get_id(id);
len = _mesa_vsnprintf(s, MAX_DEBUG_MESSAGE_LENGTH, fmtString, args);
if (len >= MAX_DEBUG_MESSAGE_LENGTH)
/* message was truncated */
len = MAX_DEBUG_MESSAGE_LENGTH - 1;
_mesa_log_msg(ctx, source, type, *id, severity, len, s);
}
......
......@@ -906,6 +906,9 @@ find_custom_value(struct gl_context *ctx, const struct value_desc *d, union valu
break;
/* GL_EXT_external_objects */
case GL_NUM_DEVICE_UUIDS_EXT:
v->value_int = 1;
break;
case GL_DRIVER_UUID_EXT:
_mesa_get_driver_uuid(ctx, v->value_int_4);
break;
......