Skip to content
Commits on Source (23)
......@@ -70,7 +70,14 @@ def main():
name, ext = os.path.splitext(name)
finally:
os.chdir(ret)
# Remove meson-created master .so and symlinks
os.unlink(master)
name, ext = os.path.splitext(master)
while ext != '.so':
if os.path.lexists(name):
os.unlink(name)
name, ext = os.path.splitext(name)
if __name__ == '__main__':
......
......@@ -31,7 +31,8 @@ Compatibility contexts may report a lower version depending on each driver.
<h2>SHA256 checksums</h2>
<pre>
TBD
SHA256: eb972fc11d4e1261d34ec0b91a701f158d4870c0428fb108353ae7eab64b1118 mesa-19.0.2.tar.gz
SHA256: 1a2edc3ce56906a676c91e6851298db45903df1f5cb9827395a922c1452db802 mesa-19.0.2.tar.xz
</pre>
......
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html lang="en">
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<title>Mesa Release Notes</title>
<link rel="stylesheet" type="text/css" href="../mesa.css">
</head>
<body>
<div class="header">
<h1>The Mesa 3D Graphics Library</h1>
</div>
<iframe src="../contents.html"></iframe>
<div class="content">
<h1>Mesa 19.0.3 Release Notes / April 24, 2019</h1>
<p>
Mesa 19.0.3 is a bug fix release which fixes bugs found since the l9.0.2 release.
</p>
<p>
Mesa 19.0.3 implements the OpenGL 4.5 API, but the version reported by
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
Some drivers don't support all the features required in OpenGL 4.5. OpenGL
4.5 is <strong>only</strong> available if requested at context creation.
Compatibility contexts may report a lower version depending on each driver.
</p>
<h2>SHA256 checksums</h2>
<pre>
TBD
</pre>
<h2>New features</h2>
<p>N/A</p>
<h2>Bug fixes</h2>
<ul>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108879">Bug 108879</a> - [CIK] [regression] All opencl apps hangs indefinitely in si_create_context</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110201">Bug 110201</a> - [ivb] mesa 19.0.0 breaks rendering in kitty</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110356">Bug 110356</a> - install_megadrivers.py creates new dangling symlink [bisected]</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110441">Bug 110441</a> - [llvmpipe] complex-loop-analysis-bug regression</li>
</ul>
<h2>Changes</h2>
<p>Andres Gomez (1):</p>
<ul>
<li>glsl/linker: location aliasing requires types to have the same width</li>
</ul>
<p>Bas Nieuwenhuizen (1):</p>
<ul>
<li>ac: Move has_local_buffers disable to radeonsi.</li>
</ul>
<p>Chia-I Wu (1):</p>
<ul>
<li>virgl: fix fence fd version check</li>
</ul>
<p>Danylo Piliaiev (1):</p>
<ul>
<li>intel/compiler: Do not reswizzle dst if instruction writes to flag register</li>
</ul>
<p>Dylan Baker (2):</p>
<ul>
<li>docs: Add sha256 sums for 19.0.2</li>
<li>Bump version for 19.0.3</li>
</ul>
<p>Eric Anholt (1):</p>
<ul>
<li>nir: Fix deref offset calculation for structs.</li>
</ul>
<p>Eric Engestrom (1):</p>
<ul>
<li>meson: remove meson-created megadrivers symlinks</li>
</ul>
<p>Jason Ekstrand (2):</p>
<ul>
<li>anv/pipeline: Fix MEDIA_VFE_STATE::PerThreadScratchSpace on gen7</li>
<li>anv: Add a #define for the max binding table size</li>
</ul>
<p>Juan A. Suarez Romero (1):</p>
<ul>
<li>meson: Add dependency on genxml to anvil genfiles</li>
</ul>
<p>Kenneth Graunke (2):</p>
<ul>
<li>glsl: Set location on structure-split sampler uniform variables</li>
<li>Revert "glsl: Set location on structure-split sampler uniform variables"</li>
</ul>
<p>Lionel Landwerlin (2):</p>
<ul>
<li>anv: fix uninitialized pthread cond clock domain</li>
<li>intel/devinfo: fix missing num_thread_per_eu on ICL</li>
</ul>
<p>Lubomir Rintel (2):</p>
<ul>
<li>gallivm: guess CPU features also on ARM</li>
<li>gallivm: disable NEON instructions if they are not supported</li>
</ul>
<p>Marek Olšák (1):</p>
<ul>
<li>radeonsi: use CP DMA for the null const buffer clear on CIK</li>
</ul>
<p>Rhys Perry (1):</p>
<ul>
<li>nir,ac/nir: fix cube_face_coord</li>
</ul>
<p>Roland Scheidegger (1):</p>
<ul>
<li>gallivm: fix bogus assert in get_indirect_index</li>
</ul>
<p>Samuel Pitoiset (2):</p>
<ul>
<li>ac/nir: only use the new raw/struct image atomic intrinsics with LLVM 9+</li>
<li>radv: do not load vertex attributes that are not provided by the pipeline</li>
</ul>
</div>
</body>
</html>
......@@ -367,9 +367,7 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
info->has_syncobj_wait_for_submit = info->has_syncobj && info->drm_minor >= 20;
info->has_fence_to_handle = info->has_syncobj && info->drm_minor >= 21;
info->has_ctx_priority = info->drm_minor >= 22;
/* TODO: Enable this once the kernel handles it efficiently. */
info->has_local_buffers = info->drm_minor >= 20 &&
!info->has_dedicated_vram;
info->has_local_buffers = info->drm_minor >= 20;
info->kernel_flushes_hdp_before_ib = true;
info->htile_cmask_support_1d_tiling = true;
info->si_TA_CS_BC_BASE_ADDR_allowed = true;
......
......@@ -1019,10 +1019,17 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
LLVMValueRef in[3];
for (unsigned chan = 0; chan < 3; chan++)
in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan);
results[0] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubetc",
results[0] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubesc",
ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE);
results[1] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubesc",
results[1] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubetc",
ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE);
LLVMValueRef ma = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubema",
ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE);
results[0] = ac_build_fdiv(&ctx->ac, results[0], ma);
results[1] = ac_build_fdiv(&ctx->ac, results[1], ma);
LLVMValueRef offset = LLVMConstReal(ctx->ac.f32, 0.5);
results[0] = LLVMBuildFAdd(ctx->ac.builder, results[0], offset, "");
results[1] = LLVMBuildFAdd(ctx->ac.builder, results[1], offset, "");
result = ac_build_gather_values(&ctx->ac, results, 2);
break;
}
......@@ -2532,7 +2539,10 @@ static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx,
params[param_count++] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
ctx->ac.i32_0, ""); /* vindex */
params[param_count++] = ctx->ac.i32_0; /* voffset */
if (HAVE_LLVM >= 0x800) {
if (HAVE_LLVM >= 0x900) {
/* XXX: The new raw/struct atomic intrinsics are buggy
* with LLVM 8, see r358579.
*/
params[param_count++] = ctx->ac.i32_0; /* soffset */
params[param_count++] = ctx->ac.i32_0; /* slc */
......
......@@ -2027,10 +2027,32 @@ handle_vs_input_decl(struct radv_shader_context *ctx,
t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
if (ctx->options->key.vs.vertex_attribute_provided & (1u << attrib_index)) {
input = ac_build_buffer_load_format(&ctx->ac, t_list,
buffer_index,
ctx->ac.i32_0,
num_channels, false, true);
} else {
/* Per the Vulkan spec, it's invalid to consume vertex
* attributes that are not provided by the pipeline but
* some (invalid) apps appear to do that. Fill the
* input array with (eg. (0, 0, 0, 1)) to workaround
* the problem and to avoid possible GPU hangs.
*/
LLVMValueRef chan[4];
/* The input_usage mask might be 0 if input variables
* are not removed by the compiler.
*/
num_channels = CLAMP(num_channels, 1, 4);
for (unsigned i = 0; i < num_channels; i++) {
chan[i] = i == 3 ? ctx->ac.f32_1 : ctx->ac.f32_0;
chan[i] = ac_to_float(&ctx->ac, chan[i]);
}
input = ac_build_gather_values(&ctx->ac, chan, num_channels);
}
input = ac_build_expand_to_vec4(&ctx->ac, input, num_channels);
......
......@@ -1922,6 +1922,8 @@ radv_generate_graphics_pipeline_key(struct radv_pipeline *pipeline,
}
key.vertex_alpha_adjust |= adjust << (2 * location);
}
key.vertex_attribute_provided |= 1 << location;
}
if (pCreateInfo->pTessellationState)
......@@ -1950,6 +1952,7 @@ radv_fill_shader_keys(struct radv_shader_variant_key *keys,
{
keys[MESA_SHADER_VERTEX].vs.instance_rate_inputs = key->instance_rate_inputs;
keys[MESA_SHADER_VERTEX].vs.alpha_adjust = key->vertex_alpha_adjust;
keys[MESA_SHADER_VERTEX].vs.vertex_attribute_provided = key->vertex_attribute_provided;
for (unsigned i = 0; i < MAX_VERTEX_ATTRIBS; ++i)
keys[MESA_SHADER_VERTEX].vs.instance_rate_divisors[i] = key->instance_rate_divisors[i];
......
......@@ -365,6 +365,7 @@ struct radv_pipeline_cache {
struct radv_pipeline_key {
uint32_t instance_rate_inputs;
uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS];
uint32_t vertex_attribute_provided;
uint64_t vertex_alpha_adjust;
unsigned tess_input_vertices;
uint32_t col_format;
......
......@@ -66,6 +66,9 @@ struct radv_vs_variant_key {
uint32_t instance_rate_inputs;
uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS];
/* Mask of vertex attributes that are provided by the pipeline. */
uint32_t vertex_attribute_provided;
/* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW.
* so we may need to fix it up. */
uint64_t alpha_adjust;
......
......@@ -167,6 +167,14 @@ lower_deref(nir_builder *b, struct lower_samplers_as_deref_state *state,
} else {
var = nir_variable_create(state->shader, nir_var_uniform, type, name);
var->data.binding = binding;
/* Don't set var->data.location. The old structure location could be
* used to index into gl_uniform_storage, assuming the full structure
* was walked in order. With the new split variables, this invariant
* no longer holds and there's no meaningful way to start from a base
* location and access a particular array element. Just leave it 0.
*/
_mesa_hash_table_insert_pre_hashed(state->remap_table, hash, name, var);
}
......
......@@ -424,28 +424,14 @@ compute_variable_location_slot(ir_variable *var, gl_shader_stage stage)
struct explicit_location_info {
ir_variable *var;
unsigned numerical_type;
bool base_type_is_integer;
unsigned base_type_bit_size;
unsigned interpolation;
bool centroid;
bool sample;
bool patch;
};
static inline unsigned
get_numerical_type(const glsl_type *type)
{
/* From the OpenGL 4.6 spec, section 4.4.1 Input Layout Qualifiers, Page 68,
* (Location aliasing):
*
* "Further, when location aliasing, the aliases sharing the location
* must have the same underlying numerical type (floating-point or
* integer)
*/
if (type->is_float() || type->is_double())
return GLSL_TYPE_FLOAT;
return GLSL_TYPE_INT;
}
static bool
check_location_aliasing(struct explicit_location_info explicit_locations[][4],
ir_variable *var,
......@@ -461,14 +447,23 @@ check_location_aliasing(struct explicit_location_info explicit_locations[][4],
gl_shader_stage stage)
{
unsigned last_comp;
if (type->without_array()->is_record()) {
/* The component qualifier can't be used on structs so just treat
* all component slots as used.
unsigned base_type_bit_size;
const glsl_type *type_without_array = type->without_array();
const bool base_type_is_integer =
glsl_base_type_is_integer(type_without_array->base_type);
const bool is_struct = type_without_array->is_record();
if (is_struct) {
/* structs don't have a defined underlying base type so just treat all
* component slots as used and set the bit size to 0. If there is
* location aliasing, we'll fail anyway later.
*/
last_comp = 4;
base_type_bit_size = 0;
} else {
unsigned dmul = type->without_array()->is_64bit() ? 2 : 1;
last_comp = component + type->without_array()->vector_elements * dmul;
unsigned dmul = type_without_array->is_64bit() ? 2 : 1;
last_comp = component + type_without_array->vector_elements * dmul;
base_type_bit_size =
glsl_base_type_get_bit_size(type_without_array->base_type);
}
while (location < location_limit) {
......@@ -478,8 +473,22 @@ check_location_aliasing(struct explicit_location_info explicit_locations[][4],
&explicit_locations[location][comp];
if (info->var) {
/* Component aliasing is not alloed */
if (comp >= component && comp < last_comp) {
if (info->var->type->without_array()->is_record() || is_struct) {
/* Structs cannot share location since they are incompatible
* with any other underlying numerical type.
*/
linker_error(prog,
"%s shader has multiple %sputs sharing the "
"same location that don't have the same "
"underlying numerical type. Struct variable '%s', "
"location %u\n",
_mesa_shader_stage_to_string(stage),
var->data.mode == ir_var_shader_in ? "in" : "out",
is_struct ? var->name : info->var->name,
location);
return false;
} else if (comp >= component && comp < last_comp) {
/* Component aliasing is not allowed */
linker_error(prog,
"%s shader has multiple %sputs explicitly "
"assigned to location %d and component %d\n",
......@@ -488,27 +497,52 @@ check_location_aliasing(struct explicit_location_info explicit_locations[][4],
location, comp);
return false;
} else {
/* For all other used components we need to have matching
* types, interpolation and auxiliary storage
/* From the OpenGL 4.60.5 spec, section 4.4.1 Input Layout
* Qualifiers, Page 67, (Location aliasing):
*
* " Further, when location aliasing, the aliases sharing the
* location must have the same underlying numerical type
* and bit width (floating-point or integer, 32-bit versus
* 64-bit, etc.) and the same auxiliary storage and
* interpolation qualification."
*/
/* If the underlying numerical type isn't integer, implicitly
* it will be float or else we would have failed by now.
*/
if (info->numerical_type !=
get_numerical_type(type->without_array())) {
if (info->base_type_is_integer != base_type_is_integer) {
linker_error(prog,
"Varyings sharing the same location must "
"have the same underlying numerical type. "
"Location %u component %u\n",
location, comp);
"%s shader has multiple %sputs sharing the "
"same location that don't have the same "
"underlying numerical type. Location %u "
"component %u.\n",
_mesa_shader_stage_to_string(stage),
var->data.mode == ir_var_shader_in ?
"in" : "out", location, comp);
return false;
}
if (info->base_type_bit_size != base_type_bit_size) {
linker_error(prog,
"%s shader has multiple %sputs sharing the "
"same location that don't have the same "
"underlying numerical bit size. Location %u "
"component %u.\n",
_mesa_shader_stage_to_string(stage),
var->data.mode == ir_var_shader_in ?
"in" : "out", location, comp);
return false;
}
if (info->interpolation != interpolation) {
linker_error(prog,
"%s shader has multiple %sputs at explicit "
"location %u with different interpolation "
"settings\n",
"%s shader has multiple %sputs sharing the "
"same location that don't have the same "
"interpolation qualification. Location %u "
"component %u.\n",
_mesa_shader_stage_to_string(stage),
var->data.mode == ir_var_shader_in ?
"in" : "out", location);
"in" : "out", location, comp);
return false;
}
......@@ -516,17 +550,20 @@ check_location_aliasing(struct explicit_location_info explicit_locations[][4],
info->sample != sample ||
info->patch != patch) {
linker_error(prog,
"%s shader has multiple %sputs at explicit "
"location %u with different aux storage\n",
"%s shader has multiple %sputs sharing the "
"same location that don't have the same "
"auxiliary storage qualification. Location %u "
"component %u.\n",
_mesa_shader_stage_to_string(stage),
var->data.mode == ir_var_shader_in ?
"in" : "out", location);
"in" : "out", location, comp);
return false;
}
}
} else if (comp >= component && comp < last_comp) {
info->var = var;
info->numerical_type = get_numerical_type(type->without_array());
info->base_type_is_integer = base_type_is_integer;
info->base_type_bit_size = base_type_bit_size;
info->interpolation = interpolation;
info->centroid = centroid;
info->sample = sample;
......
......@@ -31,6 +31,7 @@
#include "shader_enums.h"
#include "blob.h"
#include "c11/threads.h"
#include "util/macros.h"
#ifdef __cplusplus
#include "main/config.h"
......@@ -114,6 +115,42 @@ static inline bool glsl_base_type_is_integer(enum glsl_base_type type)
type == GLSL_TYPE_IMAGE;
}
static inline unsigned int
glsl_base_type_get_bit_size(const enum glsl_base_type base_type)
{
switch (base_type) {
case GLSL_TYPE_BOOL:
return 1;
case GLSL_TYPE_INT:
case GLSL_TYPE_UINT:
case GLSL_TYPE_FLOAT: /* TODO handle mediump */
case GLSL_TYPE_SUBROUTINE:
return 32;
case GLSL_TYPE_FLOAT16:
case GLSL_TYPE_UINT16:
case GLSL_TYPE_INT16:
return 16;
case GLSL_TYPE_UINT8:
case GLSL_TYPE_INT8:
return 8;
case GLSL_TYPE_DOUBLE:
case GLSL_TYPE_INT64:
case GLSL_TYPE_UINT64:
case GLSL_TYPE_IMAGE:
case GLSL_TYPE_SAMPLER:
return 64;
default:
unreachable("unknown base type");
}
return 0;
}
enum glsl_sampler_dim {
GLSL_SAMPLER_DIM_1D = 0,
GLSL_SAMPLER_DIM_2D,
......
......@@ -215,7 +215,7 @@ nir_build_deref_offset(nir_builder *b, nir_deref_instr *deref,
unsigned field_offset =
struct_type_get_field_offset(parent->type, size_align,
(*p)->strct.index);
nir_iadd(b, offset, nir_imm_int(b, field_offset));
offset = nir_iadd(b, offset, nir_imm_int(b, field_offset));
} else {
unreachable("Unsupported deref type");
}
......
......@@ -404,12 +404,21 @@ dst.x = dst.y = 0.0;
float absX = fabs(src0.x);
float absY = fabs(src0.y);
float absZ = fabs(src0.z);
if (src0.x >= 0 && absX >= absY && absX >= absZ) { dst.x = -src0.y; dst.y = -src0.z; }
if (src0.x < 0 && absX >= absY && absX >= absZ) { dst.x = -src0.y; dst.y = src0.z; }
if (src0.y >= 0 && absY >= absX && absY >= absZ) { dst.x = src0.z; dst.y = src0.x; }
if (src0.y < 0 && absY >= absX && absY >= absZ) { dst.x = -src0.z; dst.y = src0.x; }
if (src0.z >= 0 && absZ >= absX && absZ >= absY) { dst.x = -src0.y; dst.y = src0.x; }
if (src0.z < 0 && absZ >= absX && absZ >= absY) { dst.x = -src0.y; dst.y = -src0.x; }
float ma = 0.0;
if (absX >= absY && absX >= absZ) { ma = 2 * src0.x; }
if (absY >= absX && absY >= absZ) { ma = 2 * src0.y; }
if (absZ >= absX && absZ >= absY) { ma = 2 * src0.z; }
if (src0.x >= 0 && absX >= absY && absX >= absZ) { dst.x = -src0.z; dst.y = -src0.y; }
if (src0.x < 0 && absX >= absY && absX >= absZ) { dst.x = src0.z; dst.y = -src0.y; }
if (src0.y >= 0 && absY >= absX && absY >= absZ) { dst.x = src0.x; dst.y = src0.z; }
if (src0.y < 0 && absY >= absX && absY >= absZ) { dst.x = src0.x; dst.y = -src0.z; }
if (src0.z >= 0 && absZ >= absX && absZ >= absY) { dst.x = src0.x; dst.y = -src0.y; }
if (src0.z < 0 && absZ >= absX && absZ >= absY) { dst.x = -src0.x; dst.y = -src0.y; }
dst.x = dst.x / ma + 0.5;
dst.y = dst.y / ma + 0.5;
""")
unop_horiz("cube_face_index", 1, tfloat32, 3, tfloat32, """
......
......@@ -97,37 +97,7 @@ unsigned glsl_atomic_size(const struct glsl_type *type);
static inline unsigned
glsl_get_bit_size(const struct glsl_type *type)
{
switch (glsl_get_base_type(type)) {
case GLSL_TYPE_BOOL:
return 1;
case GLSL_TYPE_INT:
case GLSL_TYPE_UINT:
case GLSL_TYPE_FLOAT: /* TODO handle mediump */
case GLSL_TYPE_SUBROUTINE:
return 32;
case GLSL_TYPE_FLOAT16:
case GLSL_TYPE_UINT16:
case GLSL_TYPE_INT16:
return 16;
case GLSL_TYPE_UINT8:
case GLSL_TYPE_INT8:
return 8;
case GLSL_TYPE_DOUBLE:
case GLSL_TYPE_INT64:
case GLSL_TYPE_UINT64:
case GLSL_TYPE_IMAGE:
case GLSL_TYPE_SAMPLER:
return 64;
default:
unreachable("unknown base type");
}
return 0;
return glsl_base_type_get_bit_size(glsl_get_base_type(type));
}
bool glsl_type_is_16bit(const struct glsl_type *type);
......
......@@ -556,11 +556,11 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
llvm::SmallVector<std::string, 16> MAttrs;
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
#if HAVE_LLVM >= 0x0400
/* llvm-3.7+ implements sys::getHostCPUFeatures for x86,
* which allows us to enable/disable code generation based
* on the results of cpuid.
#if HAVE_LLVM >= 0x0400 && (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) || defined(PIPE_ARCH_ARM))
/* llvm-3.3+ implements sys::getHostCPUFeatures for Arm
* and llvm-3.7+ for x86, which allows us to enable/disable
* code generation based on the results of cpuid on these
* architectures.
*/
llvm::StringMap<bool> features;
llvm::sys::getHostCPUFeatures(features);
......@@ -570,7 +570,7 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
++f) {
MAttrs.push_back(((*f).second ? "+" : "-") + (*f).first().str());
}
#else
#elif defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
/*
* We need to unset attributes because sometimes LLVM mistakenly assumes
* certain features are present given the processor name.
......@@ -625,6 +625,12 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
MAttrs.push_back("-avx512vl");
#endif
#endif
#if defined(PIPE_ARCH_ARM)
if (!util_cpu_caps.has_neon) {
MAttrs.push_back("-neon");
MAttrs.push_back("-crypto");
MAttrs.push_back("-vfp2");
}
#endif
#if defined(PIPE_ARCH_PPC)
......
......@@ -1108,7 +1108,7 @@ get_indirect_index(struct lp_build_tgsi_soa_context *bld,
* larger than the declared size but smaller than the buffer size.
*/
if (reg_file != TGSI_FILE_CONSTANT) {
assert(index_limit > 0);
assert(index_limit >= 0);
max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
uint_bld->type, index_limit);
......
......@@ -272,7 +272,7 @@ void vi_dcc_clear_level(struct si_context *sctx,
}
si_clear_buffer(sctx, dcc_buffer, dcc_offset, clear_size,
&clear_value, 4, SI_COHERENCY_CB_META);
&clear_value, 4, SI_COHERENCY_CB_META, false);
}
/* Set the same micro tile mode as the destination of the last MSAA resolve.
......@@ -505,7 +505,7 @@ static void si_do_fast_color_clear(struct si_context *sctx,
uint32_t clear_value = 0xCCCCCCCC;
si_clear_buffer(sctx, &tex->cmask_buffer->b.b,
tex->cmask_offset, tex->surface.cmask_size,
&clear_value, 4, SI_COHERENCY_CB_META);
&clear_value, 4, SI_COHERENCY_CB_META, false);
fmask_decompress_needed = true;
}
......@@ -533,7 +533,7 @@ static void si_do_fast_color_clear(struct si_context *sctx,
uint32_t clear_value = 0;
si_clear_buffer(sctx, &tex->cmask_buffer->b.b,
tex->cmask_offset, tex->surface.cmask_size,
&clear_value, 4, SI_COHERENCY_CB_META);
&clear_value, 4, SI_COHERENCY_CB_META, false);
eliminate_needed = true;
}
......
......@@ -177,7 +177,8 @@ static void si_compute_do_clear_or_copy(struct si_context *sctx,
void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
uint64_t offset, uint64_t size, uint32_t *clear_value,
uint32_t clear_value_size, enum si_coherency coher)
uint32_t clear_value_size, enum si_coherency coher,
bool force_cpdma)
{
if (!size)
return;
......@@ -241,7 +242,8 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
* about buffer placements.
*/
if (clear_value_size > 4 ||
(clear_value_size == 4 &&
(!force_cpdma &&
clear_value_size == 4 &&
offset % 4 == 0 &&
(size > 32*1024 || sctx->chip_class <= VI))) {
si_compute_do_clear_or_copy(sctx, dst, offset, NULL, 0,
......@@ -282,7 +284,7 @@ static void si_pipe_clear_buffer(struct pipe_context *ctx,
coher = SI_COHERENCY_SHADER;
si_clear_buffer((struct si_context*)ctx, dst, offset, size, (uint32_t*)clear_value,
clear_value_size, coher);
clear_value_size, coher, false);
}
void si_copy_buffer(struct si_context *sctx,
......