Skip to content
Commits on Source (55)
......@@ -70,6 +70,7 @@ LOCAL_CFLAGS += \
-DHAVE_DLADDR \
-DHAVE_DL_ITERATE_PHDR \
-DHAVE_LINUX_FUTEX_H \
-DHAVE_ENDIAN_H \
-DHAVE_ZLIB \
-DMAJOR_IN_SYSMACROS \
-fvisibility=hidden \
......
......@@ -64,7 +64,8 @@ EXTRA_DIST = \
meson_options.txt \
bin/meson.build \
include/meson.build \
bin/install_megadrivers.py
bin/install_megadrivers.py \
bin/meson_get_version.py
noinst_HEADERS = \
include/c99_alloca.h \
......
......@@ -4,3 +4,20 @@ ac4437b20b87c7285b89466f05b51518ae616873 automake: small cleanup after the meson
# stable: The KHX extension is disabled all together in the stable branches.
2ffe395cba0f7b3c1f1c41062f4376eae3a188b5 radv: Don't expose VK_KHX_multiview on android.
# stable: There is a specific version for this patch for stable branches, but
# it is causing regressions.
85d0bec9616bc1ffa8e4ab5e7c5d12ff4e414872 anv: Be more careful about fast-clear colors
# fixes: The commit fixes earlier commit 1c57a6da5e3 which did not land in
# branch.
3401b028df1074a06a7fbc3fb1cda949646ef75d ac/shader: fix vertex input with components.
# fixes: The commit requires earlier commits 639c4f2b54a6 and 2cfba40eea4c
# which did not land in branch.
8f052a3e257a61240cb311032497d016278117a8 radv: handle exporting view index to fragment shader. (v1.1)
# fixes: The commit fixes earlier commits 83d4a5d5aea5a8a05be2,
# b2f2236dc565dd1460f0 and c62cf1f165919bc74296 which did not land in
# branch.
880c1718b6d14b33fe5ba918af70fea5be890c6b omx: always define ENABLE_ST_OMX_{BELLAGIO,TIZONIA}
......@@ -58,7 +58,7 @@ def main():
while ext != '.so':
if os.path.exists(name):
os.unlink(name)
os.symlink(driver, name)
os.symlink(each, name)
name, ext = os.path.splitext(name)
finally:
os.chdir(ret)
......
......@@ -862,6 +862,7 @@ fi
AC_HEADER_MAJOR
AC_CHECK_HEADER([xlocale.h], [DEFINES="$DEFINES -DHAVE_XLOCALE_H"])
AC_CHECK_HEADER([sys/sysctl.h], [DEFINES="$DEFINES -DHAVE_SYS_SYSCTL_H"])
AC_CHECK_HEADERS([endian.h])
AC_CHECK_FUNC([strtof], [DEFINES="$DEFINES -DHAVE_STRTOF"])
AC_CHECK_FUNC([mkostemp], [DEFINES="$DEFINES -DHAVE_MKOSTEMP"])
AC_CHECK_FUNC([timespec_get], [DEFINES="$DEFINES -DHAVE_TIMESPEC_GET"])
......
mesa (18.0.1-1) UNRELEASED; urgency=medium
* New upstream release.
-- Timo Aaltonen <tjaalton@debian.org> Wed, 18 Apr 2018 19:28:11 +0300
mesa (18.0.0-1) experimental; urgency=medium
* New upstream release.
......
......@@ -88,22 +88,40 @@ This is a work-around for that.
<li>MESA_GL_VERSION_OVERRIDE - changes the value returned by
glGetString(GL_VERSION) and possibly the GL API type.
<ul>
<li> The format should be MAJOR.MINOR[FC]
<li> FC is an optional suffix that indicates a forward compatible context.
This is only valid for versions &gt;= 3.0.
<li> GL versions &lt; 3.0 are set to a compatibility (non-Core) profile
<li> GL versions = 3.0, see below
<li> GL versions &gt; 3.0 are set to a Core profile
<li> Examples: 2.1, 3.0, 3.0FC, 3.1, 3.1FC
<li>The format should be MAJOR.MINOR[FC|COMPAT]
<li>FC is an optional suffix that indicates a forward compatible
context. This is only valid for versions &gt;= 3.0.
<li>COMPAT is an optional suffix that indicates a compatibility
context or GL_ARB_compatibility support. This is only valid for
versions &gt;= 3.1.
<li>GL versions &lt;= 3.0 are set to a compatibility (non-Core)
profile
<li>GL versions = 3.1, depending on the driver, it may or may not
have the ARB_compatibility extension enabled.
<li>GL versions &gt;= 3.2 are set to a Core profile
<li>Examples: 2.1, 3.0, 3.0FC, 3.1, 3.1FC, 3.1COMPAT, X.Y, X.YFC,
X.YCOMPAT.
<ul>
<li> 2.1 - select a compatibility (non-Core) profile with GL version 2.1
<li> 3.0 - select a compatibility (non-Core) profile with GL version 3.0
<li> 3.0FC - select a Core+Forward Compatible profile with GL version 3.0
<li> 3.1 - select a Core profile with GL version 3.1
<li> 3.1FC - select a Core+Forward Compatible profile with GL version 3.1
<li>2.1 - select a compatibility (non-Core) profile with GL
version 2.1.
<li>3.0 - select a compatibility (non-Core) profile with GL
version 3.0.
<li>3.0FC - select a Core+Forward Compatible profile with GL
version 3.0.
<li>3.1 - select GL version 3.1 with GL_ARB_compatibility enabled
per the driver default.
<li>3.1FC - select GL version 3.1 with forward compatibility and
GL_ARB_compatibility disabled.
<li>3.1COMPAT - select GL version 3.1 with GL_ARB_compatibility
enabled.
<li>X.Y - override GL version to X.Y without changing the profile.
<li>X.YFC - select a Core+Forward Compatible profile with GL
version X.Y.
<li>X.YCOMPAT - select a Compatibility profile with GL version
X.Y.
</ul>
<li> Mesa may not really implement all the features of the given version.
(for developers only)
<li>Mesa may not really implement all the features of the given
version. (for developers only)
</ul>
<li>MESA_GLES_VERSION_OVERRIDE - changes the value returned by
glGetString(GL_VERSION) for OpenGL ES.
......
......@@ -14,15 +14,15 @@
<iframe src="../contents.html"></iframe>
<div class="content">
<h1>Mesa 17.4.0 Release Notes / March 27 2018</h1>
<h1>Mesa 18.0.0 Release Notes / March 27 2018</h1>
<p>
Mesa 17.4.0 is a new development release.
Mesa 18.0.0 is a new development release.
People who are concerned with stability and reliability should stick
with a previous release or wait for Mesa 17.4.1.
with a previous release or wait for Mesa 18.0.1.
</p>
<p>
Mesa 17.4.0 implements the OpenGL 4.5 API, but the version reported by
Mesa 18.0.0 implements the OpenGL 4.5 API, but the version reported by
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
Some drivers don't support all the features required in OpenGL 4.5. OpenGL
......@@ -33,7 +33,8 @@ because compatibility contexts are not supported.
<h2>SHA256 checksums</h2>
<pre>
TBD.
93c2d3504b2871ac2146603fb1270f341d36a39695e2950a469c5eac74f98457 mesa-18.0.0.tar.gz
694e5c3d37717d23258c1f88bc134223c5d1aac70518d2f9134d6df3ee791eea mesa-18.0.0.tar.xz
</pre>
......
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html lang="en">
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<title>Mesa Release Notes</title>
<link rel="stylesheet" type="text/css" href="../mesa.css">
</head>
<body>
<div class="header">
<h1>The Mesa 3D Graphics Library</h1>
</div>
<iframe src="../contents.html"></iframe>
<div class="content">
<h1>Mesa 18.0.1 Release Notes / April 18, 2018</h1>
<p>
Mesa 18.0.1 is a bug fix release which fixes bugs found since the 18.0.0 release.
</p>
<p>
Mesa 18.0.1 implements the OpenGL 4.5 API, but the version reported by
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
Some drivers don't support all the features required in OpenGL 4.5. OpenGL
4.5 is <strong>only</strong> available if requested at context creation
because compatibility contexts are not supported.
</p>
<h2>SHA256 checksums</h2>
<pre>
TBD
</pre>
<h2>New features</h2>
<p>None</p>
<h2>Bug fixes</h2>
<ul>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101408">Bug 101408</a> - [Gen8+] Xonotic fails to render one of the weapons</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102342">Bug 102342</a> - mesa-17.1.7/src/gallium/auxiliary/pipebuffer/pb_cache.c:169]: (style) Suspicious condition</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102542">Bug 102542</a> - mesa-17.2.0/src/gallium/state_trackers/nine/nine_ff.c:1938: bad assignment ?</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105317">Bug 105317</a> - The GPU Vega 56 was hang while try to pass #GraphicsFuzz shader15 test</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105440">Bug 105440</a> - GEN7: rendering issue on citra</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105442">Bug 105442</a> - Hang when running nine ff lighting shader with radeonsi</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105567">Bug 105567</a> - meson/ninja: 1. mesa/vdpau incorrect symlinks in DESTDIR and 2. Ddri-drivers-path Dvdpau-libs-path overrides DESTDIR</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105670">Bug 105670</a> - [regression][hang] Trine1EE hangs GPU after loading screen on Mesa3D-17.3 and later</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105704">Bug 105704</a> - compiler assertion hit</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105717">Bug 105717</a> - [bisected] Mesa build tests fails: BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105942">Bug 105942</a> - Graphical artefacts after update to mesa 18.0.0-2</li>
</ul>
<h2>Changes</h2>
<p>Andres Gomez (2):</p>
<ul>
<li>dri_util: when overriding, always reset the core version</li>
<li>mesa: adds some comments regarding MESA_GLES_VERSION_OVERRIDE usage</li>
</ul>
<p>Axel Davy (5):</p>
<ul>
<li>st/nine: Fix bad tracking of vs textures for NINESBT_ALL</li>
<li>st/nine: Fixes warning about implicit conversion</li>
<li>st/nine: Fix non inversible matrix check</li>
<li>st/nine: Declare lighting consts for ff shaders</li>
<li>st/nine: Do not use scratch for face register</li>
</ul>
<p>Bas Nieuwenhuizen (3):</p>
<ul>
<li>ac/nir: Add workaround for GFX9 buffer views.</li>
<li>radv: Don't set instance count using predication.</li>
<li>radv: Always reset draw user SGPRs after secondary command buffer.</li>
</ul>
<p>Caio Marcelo de Oliveira Filho (1):</p>
<ul>
<li>anv/pipeline: fail if TCS/TES compile fail</li>
</ul>
<p>Daniel Stone (1):</p>
<ul>
<li>st/dri: Initialise modifier to INVALID for DRI2</li>
</ul>
<p>Derek Foreman (1):</p>
<ul>
<li>egl/wayland: Make swrast display_sync the correct queue</li>
</ul>
<p>Dylan Baker (4):</p>
<ul>
<li>meson: don't use compiler.has_header</li>
<li>autotools: include meson_get_version</li>
<li>meson: Set .so version for xa like autotools does</li>
<li>meson: fix megadriver symlinking</li>
</ul>
<p>Emil Velikov (1):</p>
<ul>
<li>docs: add sha256 checksums for 18.0.0</li>
</ul>
<p>Eric Engestrom (3):</p>
<ul>
<li>meson/configure: detect endian.h instead of trying to guess when it's available</li>
<li>docs: fix 18.0 release note version</li>
<li>gbm: remove never-implemented function</li>
</ul>
<p>Henri Verbeet (1):</p>
<ul>
<li>mesa: Inherit texture view multi-sample information from the original texture images.</li>
</ul>
<p>Iago Toral Quiroga (1):</p>
<ul>
<li>compiler/spirv: set is_shadow for depth comparitor sampling opcodes</li>
</ul>
<p>Ian Romanick (1):</p>
<ul>
<li>i965/vec4: Fix null destination register in 3-source instructions</li>
</ul>
<p>Jason Ekstrand (4):</p>
<ul>
<li>nir/vars_to_ssa: Remove copies from the correct set</li>
<li>nir/lower_indirect_derefs: Support interp_var_at intrinsics</li>
<li>intel/vec4: Set channel_sizes for MOV_INDIRECT sources</li>
<li>nir/lower_vec_to_movs: Only coalesce if the vec had a SSA destination</li>
</ul>
<p>Juan A. Suarez Romero (5):</p>
<ul>
<li>cherry-ignore anv: Be more careful about fast-clear colors</li>
<li>cherry-ignore: ac/shader: fix vertex input with components.</li>
<li>cherry-ignore: radv: handle exporting view index to fragment shader. (v1.1)</li>
<li>cherry-ignore: omx: always define ENABLE_ST_OMX_{BELLAGIO,TIZONIA}</li>
<li>Update version to 18.0.1</li>
</ul>
<p>Leo Liu (1):</p>
<ul>
<li>radeon/vce: move feedback command inside of destroy function</li>
</ul>
<p>Lionel Landwerlin (1):</p>
<ul>
<li>i965/perf: fix config registration when uploading to kernel</li>
</ul>
<p>Marc Dietrich (1):</p>
<ul>
<li>meson: fix HAVE_LLVM version define in meson build</li>
</ul>
<p>Marek Olšák (1):</p>
<ul>
<li>mesa: simplify MESA_GL_VERSION_OVERRIDE behavior of API override</li>
</ul>
<p>Mark Thompson (1):</p>
<ul>
<li>st/va: Enable vaExportSurfaceHandle()</li>
</ul>
<p>Rob Clark (3):</p>
<ul>
<li>nir: fix per_vertex_output intrinsic</li>
<li>freedreno/a5xx: fix page faults on last level</li>
<li>freedreno/a5xx: don't align height for PIPE_BUFFER</li>
</ul>
<p>Samuel Pitoiset (2):</p>
<ul>
<li>radv: fix picking the method for resolve subpass</li>
<li>radv: fix radv_layout_dcc_compressed() when image doesn't have DCC</li>
</ul>
<p>Sergii Romantsov (1):</p>
<ul>
<li>i965: Extend the negative 32-bit deltas to 64-bits</li>
</ul>
<p>Timothy Arceri (7):</p>
<ul>
<li>ac: add if/loop build helpers</li>
<li>radeonsi: make use of if/loop build helpers in ac</li>
<li>ac: make use of if/loop build helpers</li>
<li>glsl: fix infinite loop caused by bug in loop unrolling pass</li>
<li>nir: fix crash in loop unroll corner case</li>
<li>gallium/pipebuffer: fix parenthesis location</li>
<li>glsl: always call do_lower_jumps() after loop unrolling</li>
</ul>
<p>Xiong, James (1):</p>
<ul>
<li>i965: return the fourcc saved in __DRIimage when possible</li>
</ul>
</div>
</body>
</html>
......@@ -838,8 +838,8 @@ elif cc.has_header_symbol('sys/mkdev.h', 'major')
pre_args += '-DMAJOR_IN_MKDEV'
endif
foreach h : ['xlocale.h', 'sys/sysctl.h', 'linux/futex.h']
if cc.has_header(h)
foreach h : ['xlocale.h', 'sys/sysctl.h', 'linux/futex.h', 'endian.h']
if cc.compiles('#include <@0@>'.format(h), name : '@0@ works'.format(h))
pre_args += '-DHAVE_@0@'.format(h.to_upper().underscorify())
endif
endforeach
......@@ -1027,7 +1027,7 @@ if with_llvm
_llvm_patch = _llvm_patch.split('g')[0]
endif
pre_args += [
'-DHAVE_LLVM=0x0@0@@1@@2@'.format(_llvm_version[0], _llvm_version[1], _llvm_patch),
'-DHAVE_LLVM=0x0@0@0@1@'.format(_llvm_version[0], _llvm_version[1]),
'-DMESA_LLVM_VERSION_PATCH=@0@'.format(_llvm_patch),
]
elif with_amd_vk or with_gallium_radeonsi or with_gallium_swr
......
......@@ -352,6 +352,9 @@ def generate(env):
if check_header(env, 'xlocale.h'):
cppdefines += ['HAVE_XLOCALE_H']
if check_header(env, 'endian.h'):
cppdefines += ['HAVE_ENDIAN_H']
if check_functions(env, ['strtod_l', 'strtof_l']):
cppdefines += ['HAVE_STRTOD_L']
......
......@@ -41,6 +41,16 @@
#include "shader_enums.h"
#define AC_LLVM_INITIAL_CF_DEPTH 4
/* Data for if/else/endif and bgnloop/endloop control flow structures.
*/
struct ac_llvm_flow {
/* Loop exit or next part of if/else/endif. */
LLVMBasicBlockRef next_block;
LLVMBasicBlockRef loop_entry_block;
};
/* Initialize module-independent parts of the context.
*
* The caller is responsible for initializing ctx::module and ctx::builder.
......@@ -103,6 +113,14 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context,
ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
}
void
ac_llvm_context_dispose(struct ac_llvm_context *ctx)
{
free(ctx->flow);
ctx->flow = NULL;
ctx->flow_depth_max = 0;
}
int
ac_get_llvm_num_components(LLVMValueRef value)
{
......@@ -1010,6 +1028,26 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
ac_get_load_intr_attribs(can_speculate));
}
LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx,
LLVMValueRef rsrc,
LLVMValueRef vindex,
LLVMValueRef voffset,
bool can_speculate)
{
LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->builder, rsrc, LLVMConstInt(ctx->i32, 2, 0), "");
LLVMValueRef stride = LLVMBuildExtractElement(ctx->builder, rsrc, LLVMConstInt(ctx->i32, 1, 0), "");
stride = LLVMBuildLShr(ctx->builder, stride, LLVMConstInt(ctx->i32, 16, 0), "");
LLVMValueRef new_elem_count = LLVMBuildSelect(ctx->builder,
LLVMBuildICmp(ctx->builder, LLVMIntUGT, elem_count, stride, ""),
elem_count, stride, "");
LLVMValueRef new_rsrc = LLVMBuildInsertElement(ctx->builder, rsrc, new_elem_count,
LLVMConstInt(ctx->i32, 2, 0), "");
return ac_build_buffer_load_format(ctx, new_rsrc, vindex, voffset, can_speculate);
}
/**
* Set range metadata on an instruction. This can only be used on load and
* call instructions. If you know an instruction can only produce the values
......@@ -1865,3 +1903,174 @@ LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx,
ctx->i32_0, ""),
LLVMConstInt(ctx->i32, -1, 0), lsb, "");
}
static struct ac_llvm_flow *
get_current_flow(struct ac_llvm_context *ctx)
{
if (ctx->flow_depth > 0)
return &ctx->flow[ctx->flow_depth - 1];
return NULL;
}
static struct ac_llvm_flow *
get_innermost_loop(struct ac_llvm_context *ctx)
{
for (unsigned i = ctx->flow_depth; i > 0; --i) {
if (ctx->flow[i - 1].loop_entry_block)
return &ctx->flow[i - 1];
}
return NULL;
}
static struct ac_llvm_flow *
push_flow(struct ac_llvm_context *ctx)
{
struct ac_llvm_flow *flow;
if (ctx->flow_depth >= ctx->flow_depth_max) {
unsigned new_max = MAX2(ctx->flow_depth << 1,
AC_LLVM_INITIAL_CF_DEPTH);
ctx->flow = realloc(ctx->flow, new_max * sizeof(*ctx->flow));
ctx->flow_depth_max = new_max;
}
flow = &ctx->flow[ctx->flow_depth];
ctx->flow_depth++;
flow->next_block = NULL;
flow->loop_entry_block = NULL;
return flow;
}
static void set_basicblock_name(LLVMBasicBlockRef bb, const char *base,
int label_id)
{
char buf[32];
snprintf(buf, sizeof(buf), "%s%d", base, label_id);
LLVMSetValueName(LLVMBasicBlockAsValue(bb), buf);
}
/* Append a basic block at the level of the parent flow.
*/
static LLVMBasicBlockRef append_basic_block(struct ac_llvm_context *ctx,
const char *name)
{
assert(ctx->flow_depth >= 1);
if (ctx->flow_depth >= 2) {
struct ac_llvm_flow *flow = &ctx->flow[ctx->flow_depth - 2];
return LLVMInsertBasicBlockInContext(ctx->context,
flow->next_block, name);
}
LLVMValueRef main_fn =
LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->builder));
return LLVMAppendBasicBlockInContext(ctx->context, main_fn, name);
}
/* Emit a branch to the given default target for the current block if
* applicable -- that is, if the current block does not already contain a
* branch from a break or continue.
*/
static void emit_default_branch(LLVMBuilderRef builder,
LLVMBasicBlockRef target)
{
if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder)))
LLVMBuildBr(builder, target);
}
void ac_build_bgnloop(struct ac_llvm_context *ctx, int label_id)
{
struct ac_llvm_flow *flow = push_flow(ctx);
flow->loop_entry_block = append_basic_block(ctx, "LOOP");
flow->next_block = append_basic_block(ctx, "ENDLOOP");
set_basicblock_name(flow->loop_entry_block, "loop", label_id);
LLVMBuildBr(ctx->builder, flow->loop_entry_block);
LLVMPositionBuilderAtEnd(ctx->builder, flow->loop_entry_block);
}
void ac_build_break(struct ac_llvm_context *ctx)
{
struct ac_llvm_flow *flow = get_innermost_loop(ctx);
LLVMBuildBr(ctx->builder, flow->next_block);
}
void ac_build_continue(struct ac_llvm_context *ctx)
{
struct ac_llvm_flow *flow = get_innermost_loop(ctx);
LLVMBuildBr(ctx->builder, flow->loop_entry_block);
}
void ac_build_else(struct ac_llvm_context *ctx, int label_id)
{
struct ac_llvm_flow *current_branch = get_current_flow(ctx);
LLVMBasicBlockRef endif_block;
assert(!current_branch->loop_entry_block);
endif_block = append_basic_block(ctx, "ENDIF");
emit_default_branch(ctx->builder, endif_block);
LLVMPositionBuilderAtEnd(ctx->builder, current_branch->next_block);
set_basicblock_name(current_branch->next_block, "else", label_id);
current_branch->next_block = endif_block;
}
void ac_build_endif(struct ac_llvm_context *ctx, int label_id)
{
struct ac_llvm_flow *current_branch = get_current_flow(ctx);
assert(!current_branch->loop_entry_block);
emit_default_branch(ctx->builder, current_branch->next_block);
LLVMPositionBuilderAtEnd(ctx->builder, current_branch->next_block);
set_basicblock_name(current_branch->next_block, "endif", label_id);
ctx->flow_depth--;
}
void ac_build_endloop(struct ac_llvm_context *ctx, int label_id)
{
struct ac_llvm_flow *current_loop = get_current_flow(ctx);
assert(current_loop->loop_entry_block);
emit_default_branch(ctx->builder, current_loop->loop_entry_block);
LLVMPositionBuilderAtEnd(ctx->builder, current_loop->next_block);
set_basicblock_name(current_loop->next_block, "endloop", label_id);
ctx->flow_depth--;
}
static void if_cond_emit(struct ac_llvm_context *ctx, LLVMValueRef cond,
int label_id)
{
struct ac_llvm_flow *flow = push_flow(ctx);
LLVMBasicBlockRef if_block;
if_block = append_basic_block(ctx, "IF");
flow->next_block = append_basic_block(ctx, "ELSE");
set_basicblock_name(if_block, "if", label_id);
LLVMBuildCondBr(ctx->builder, cond, if_block, flow->next_block);
LLVMPositionBuilderAtEnd(ctx->builder, if_block);
}
void ac_build_if(struct ac_llvm_context *ctx, LLVMValueRef value,
int label_id)
{
LLVMValueRef cond = LLVMBuildFCmp(ctx->builder, LLVMRealUNE,
value, ctx->f32_0, "");
if_cond_emit(ctx, cond, label_id);
}
void ac_build_uif(struct ac_llvm_context *ctx, LLVMValueRef value,
int label_id)
{
LLVMValueRef cond = LLVMBuildICmp(ctx->builder, LLVMIntNE,
ac_to_integer(ctx, value),
ctx->i32_0, "");
if_cond_emit(ctx, cond, label_id);
}
......@@ -38,6 +38,8 @@ enum {
AC_LOCAL_ADDR_SPACE = 3,
};
struct ac_llvm_flow;
struct ac_llvm_context {
LLVMContextRef context;
LLVMModuleRef module;
......@@ -70,6 +72,10 @@ struct ac_llvm_context {
LLVMValueRef i1true;
LLVMValueRef i1false;
struct ac_llvm_flow *flow;
unsigned flow_depth;
unsigned flow_depth_max;
unsigned range_md_kind;
unsigned invariant_load_md_kind;
unsigned uniform_md_kind;
......@@ -87,6 +93,9 @@ void
ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context,
enum chip_class chip_class, enum radeon_family family);
void
ac_llvm_context_dispose(struct ac_llvm_context *ctx);
int
ac_get_llvm_num_components(LLVMValueRef value);
......@@ -216,6 +225,14 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
LLVMValueRef voffset,
bool can_speculate);
/* load_format that handles the stride & element count better if idxen is
* disabled by LLVM. */
LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx,
LLVMValueRef rsrc,
LLVMValueRef vindex,
LLVMValueRef voffset,
bool can_speculate);
LLVMValueRef
ac_get_thread_id(struct ac_llvm_context *ctx);
......@@ -326,6 +343,18 @@ void ac_lds_store(struct ac_llvm_context *ctx,
LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx,
LLVMTypeRef dst_type,
LLVMValueRef src0);
void ac_build_bgnloop(struct ac_llvm_context *ctx, int lable_id);
void ac_build_break(struct ac_llvm_context *ctx);
void ac_build_continue(struct ac_llvm_context *ctx);
void ac_build_else(struct ac_llvm_context *ctx, int lable_id);
void ac_build_endif(struct ac_llvm_context *ctx, int lable_id);
void ac_build_endloop(struct ac_llvm_context *ctx, int lable_id);
void ac_build_if(struct ac_llvm_context *ctx, LLVMValueRef value,
int lable_id);
void ac_build_uif(struct ac_llvm_context *ctx, LLVMValueRef value,
int lable_id);
#ifdef __cplusplus
}
#endif
......
......@@ -2308,12 +2308,20 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx,
struct ac_image_args *args)
{
if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
if (ctx->abi->gfx9_stride_size_workaround) {
return ac_build_buffer_load_format_gfx9_safe(&ctx->ac,
args->resource,
args->addr,
ctx->ac.i32_0,
true);
} else {
return ac_build_buffer_load_format(&ctx->ac,
args->resource,
args->addr,
ctx->ac.i32_0,
true);
}
}
args->opcode = ac_image_sample;
args->compare = instr->is_shadow;
......@@ -3685,8 +3693,23 @@ static void visit_image_store(struct ac_nir_context *ctx,
glc = ctx->ac.i1true;
if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
LLVMValueRef rsrc = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, NULL, true, true);
if (ctx->abi->gfx9_stride_size_workaround) {
LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 2, 0), "");
LLVMValueRef stride = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 1, 0), "");
stride = LLVMBuildLShr(ctx->ac.builder, stride, LLVMConstInt(ctx->ac.i32, 16, 0), "");
LLVMValueRef new_elem_count = LLVMBuildSelect(ctx->ac.builder,
LLVMBuildICmp(ctx->ac.builder, LLVMIntUGT, elem_count, stride, ""),
elem_count, stride, "");
rsrc = LLVMBuildInsertElement(ctx->ac.builder, rsrc, new_elem_count,
LLVMConstInt(ctx->ac.i32, 2, 0), "");
}
params[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[2])); /* data */
params[1] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, NULL, true, true);
params[1] = rsrc;
params[2] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]),
ctx->ac.i32_0, ""); /* vindex */
params[3] = ctx->ac.i32_0; /* voffset */
......@@ -5190,17 +5213,15 @@ static void visit_ssa_undef(struct ac_nir_context *ctx,
_mesa_hash_table_insert(ctx->defs, &instr->def, undef);
}
static void visit_jump(struct ac_nir_context *ctx,
static void visit_jump(struct ac_llvm_context *ctx,
const nir_jump_instr *instr)
{
switch (instr->type) {
case nir_jump_break:
LLVMBuildBr(ctx->ac.builder, ctx->break_block);
LLVMClearInsertionPosition(ctx->ac.builder);
ac_build_break(ctx);
break;
case nir_jump_continue:
LLVMBuildBr(ctx->ac.builder, ctx->continue_block);
LLVMClearInsertionPosition(ctx->ac.builder);
ac_build_continue(ctx);
break;
default:
fprintf(stderr, "Unknown NIR jump instr: ");
......@@ -5238,7 +5259,7 @@ static void visit_block(struct ac_nir_context *ctx, nir_block *block)
visit_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
break;
case nir_instr_type_jump:
visit_jump(ctx, nir_instr_as_jump(instr));
visit_jump(&ctx->ac, nir_instr_as_jump(instr));
break;
default:
fprintf(stderr, "Unknown NIR instr type: ");
......@@ -5255,56 +5276,34 @@ static void visit_if(struct ac_nir_context *ctx, nir_if *if_stmt)
{
LLVMValueRef value = get_src(ctx, if_stmt->condition);
LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->ac.builder));
LLVMBasicBlockRef merge_block =
LLVMAppendBasicBlockInContext(ctx->ac.context, fn, "");
LLVMBasicBlockRef if_block =
LLVMAppendBasicBlockInContext(ctx->ac.context, fn, "");
LLVMBasicBlockRef else_block = merge_block;
if (!exec_list_is_empty(&if_stmt->else_list))
else_block = LLVMAppendBasicBlockInContext(
ctx->ac.context, fn, "");
LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, value,
ctx->ac.i32_0, "");
LLVMBuildCondBr(ctx->ac.builder, cond, if_block, else_block);
nir_block *then_block =
(nir_block *) exec_list_get_head(&if_stmt->then_list);
ac_build_uif(&ctx->ac, value, then_block->index);
LLVMPositionBuilderAtEnd(ctx->ac.builder, if_block);
visit_cf_list(ctx, &if_stmt->then_list);
if (LLVMGetInsertBlock(ctx->ac.builder))
LLVMBuildBr(ctx->ac.builder, merge_block);
if (!exec_list_is_empty(&if_stmt->else_list)) {
LLVMPositionBuilderAtEnd(ctx->ac.builder, else_block);
nir_block *else_block =
(nir_block *) exec_list_get_head(&if_stmt->else_list);
ac_build_else(&ctx->ac, else_block->index);
visit_cf_list(ctx, &if_stmt->else_list);
if (LLVMGetInsertBlock(ctx->ac.builder))
LLVMBuildBr(ctx->ac.builder, merge_block);
}
LLVMPositionBuilderAtEnd(ctx->ac.builder, merge_block);
ac_build_endif(&ctx->ac, then_block->index);
}
static void visit_loop(struct ac_nir_context *ctx, nir_loop *loop)
{
LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->ac.builder));
LLVMBasicBlockRef continue_parent = ctx->continue_block;
LLVMBasicBlockRef break_parent = ctx->break_block;
nir_block *first_loop_block =
(nir_block *) exec_list_get_head(&loop->body);
ctx->continue_block =
LLVMAppendBasicBlockInContext(ctx->ac.context, fn, "");
ctx->break_block =
LLVMAppendBasicBlockInContext(ctx->ac.context, fn, "");
ac_build_bgnloop(&ctx->ac, first_loop_block->index);
LLVMBuildBr(ctx->ac.builder, ctx->continue_block);
LLVMPositionBuilderAtEnd(ctx->ac.builder, ctx->continue_block);
visit_cf_list(ctx, &loop->body);
if (LLVMGetInsertBlock(ctx->ac.builder))
LLVMBuildBr(ctx->ac.builder, ctx->continue_block);
LLVMPositionBuilderAtEnd(ctx->ac.builder, ctx->break_block);
ctx->continue_block = continue_parent;
ctx->break_block = break_parent;
ac_build_endloop(&ctx->ac, first_loop_block->index);
}
static void visit_cf_list(struct ac_nir_context *ctx,
......@@ -6629,6 +6628,8 @@ static void ac_llvm_finalize_module(struct nir_to_llvm_context * ctx)
LLVMDisposeBuilder(ctx->builder);
LLVMDisposePassManager(passmgr);
ac_llvm_context_dispose(&ctx->ac);
}
static void
......@@ -6851,6 +6852,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
ctx.abi.load_ssbo = radv_load_ssbo;
ctx.abi.load_sampler_desc = radv_get_sampler_desc;
ctx.abi.clamp_shadow_reference = false;
ctx.abi.gfx9_stride_size_workaround = ctx.ac.chip_class == GFX9;
if (shader_count >= 2)
ac_init_exec_full_mask(&ctx.ac);
......
......@@ -145,6 +145,10 @@ struct ac_shader_abi {
/* Whether to clamp the shadow reference value to [0,1]on VI. Radeonsi currently
* uses it due to promoting D16 to D32, but radv needs it off. */
bool clamp_shadow_reference;
/* Whether to workaround GFX9 ignoring the stride for the buffer size if IDXEN=0
* and LLVM optimizes an indexed load with constant index to IDXEN=0. */
bool gfx9_stride_size_workaround;
};
#endif /* AC_SHADER_ABI_H */
......@@ -3006,20 +3006,9 @@ void radv_CmdExecuteCommands(
secondary->state.last_ia_multi_vgt_param;
}
if (secondary->state.last_first_instance != -1) {
primary->state.last_first_instance =
secondary->state.last_first_instance;
}
if (secondary->state.last_num_instances != -1) {
primary->state.last_num_instances =
secondary->state.last_num_instances;
}
if (secondary->state.last_vertex_offset != -1) {
primary->state.last_vertex_offset =
secondary->state.last_vertex_offset;
}
primary->state.last_first_instance = secondary->state.last_first_instance;
primary->state.last_num_instances = secondary->state.last_num_instances;
primary->state.last_vertex_offset = secondary->state.last_vertex_offset;
if (secondary->state.last_index_type != -1) {
primary->state.last_index_type =
......@@ -3364,7 +3353,7 @@ radv_emit_draw_packets(struct radv_cmd_buffer *cmd_buffer,
}
if (state->last_num_instances != info->instance_count) {
radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, state->predicating));
radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, false));
radeon_emit(cs, info->instance_count);
state->last_num_instances = info->instance_count;
}
......
......@@ -1172,7 +1172,7 @@ bool radv_layout_dcc_compressed(const struct radv_image *image,
(queue_mask & (1u << RADV_QUEUE_COMPUTE)))
return false;
return image->surface.num_dcc_levels > 0 && layout != VK_IMAGE_LAYOUT_GENERAL;
return image->surface.dcc_size && layout != VK_IMAGE_LAYOUT_GENERAL;
}
......
......@@ -621,7 +621,7 @@ radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer)
struct radv_image *dst_img = cmd_buffer->state.framebuffer->attachments[dest_att.attachment].attachment->image;
struct radv_image *src_img = cmd_buffer->state.framebuffer->attachments[src_att.attachment].attachment->image;
radv_pick_resolve_method_images(dst_img, src_img, dest_att.layout, cmd_buffer, &resolve_method);
radv_pick_resolve_method_images(src_img, dst_img, dest_att.layout, cmd_buffer, &resolve_method);
if (resolve_method == RESOLVE_FRAGMENT) {
break;
}
......
......@@ -2239,6 +2239,24 @@ do_common_optimization(exec_list *ir, bool linked,
loop_progress = false;
loop_progress |= do_constant_propagation(ir);
loop_progress |= do_if_simplification(ir);
/* Some drivers only call do_common_optimization() once rather
* than in a loop. So we must call do_lower_jumps() after
* unrolling a loop because for drivers that use LLVM validation
* will fail if a jump is not the last instruction in the block.
* For example the following will fail LLVM validation:
*
* (loop (
* ...
* break
* (assign (x) (var_ref v124) (expression int + (var_ref v124)
* (constant int (1)) ) )
* ))
*/
loop_progress |= do_lower_jumps(ir, true, true,
options->EmitNoMainReturn,
options->EmitNoCont,
options->EmitNoLoops);
}
progress |= loop_progress;
}
......