Skip to content
Commits on Source (81)
......@@ -13,3 +13,5 @@ c9f54486959716762e6818dabb0a73a8cd46df67 radeonsi: fix regression in indirect in
4aec44c0d9c4c0649c362199fac97efe0a3b38a4 i965/tools: 32bit compilation with meson
# pick: This commit was reverted by commit 95bb7d82ca8.
90819abb56f6b1a0cd4946b13b6caf24fb46e500 radv: fix descriptor pool allocation size
# pick: There is a specific patch for stable branch for this commit.
0d495bec25bd7584de4e988c2b4528c1996bc1d0 radeonsi: NaN should pass kill_if
mesa (18.2.2-1) UNRELEASED; urgency=medium
* New upstream release.
- vulkan: Disable randr lease for libxcb < 1.13 (Closes: #908827,
#909720).
* Drop version from build-deps libxcb-dri3-dev and libxcb-present-dev.
* Bump libdrm-dev dependency to 2.4.92.
* Enable building etnaviv and imx on arm64 (Closes: #907136).
-- Andreas Boll <aboll@debian.org> Wed, 10 Oct 2018 20:06:00 +0200
mesa (18.2.1-1) experimental; urgency=medium
* New upstream release.
......@@ -42,6 +53,15 @@ mesa (18.2.0~rc4-1) experimental; urgency=medium
-- Timo Aaltonen <tjaalton@debian.org> Wed, 22 Aug 2018 19:59:55 +0300
mesa (18.1.9-1) unstable; urgency=medium
* New upstream release.
* Cherry-pick 2f1ad72ac17 (clover: Add explicit virtual destructor to
argument class) from upstream master branch. Fixes memory leaks with
some OpenCL apps (Closes: #909741). Thanks, David Kuehling!
-- Andreas Boll <aboll@debian.org> Wed, 10 Oct 2018 16:54:18 +0200
mesa (18.1.8-1) unstable; urgency=medium
* New upstream release.
......
......@@ -8,7 +8,7 @@ Build-Depends:
debhelper (>= 11),
quilt (>= 0.63-8.2~),
pkg-config,
libdrm-dev (>= 2.4.91) [!hurd-any],
libdrm-dev (>= 2.4.92) [!hurd-any],
libx11-dev,
libxxf86vm-dev,
libexpat1-dev,
......@@ -28,8 +28,8 @@ Build-Depends:
libxcb-dri2-0-dev (>= 1.8),
libxcb-glx0-dev (>= 1.8.1),
libxcb-xfixes0-dev,
libxcb-dri3-dev (>= 1.13),
libxcb-present-dev (>= 1.13),
libxcb-dri3-dev,
libxcb-present-dev,
libxcb-randr0-dev,
libxcb-sync-dev,
libxrandr-dev,
......@@ -173,7 +173,7 @@ Section: libdevel
Architecture: any
Depends:
libglvnd-dev,
libdrm-dev (>= 2.4.91) [!hurd-any],
libdrm-dev (>= 2.4.92) [!hurd-any],
libx11-dev,
libxext-dev,
libxxf86vm-dev,
......@@ -319,7 +319,7 @@ Architecture: any
Depends:
mesa-common-dev (= ${binary:Version}),
libglvnd-dev,
libdrm-dev (>= 2.4.91) [!hurd-any],
libdrm-dev (>= 2.4.92) [!hurd-any],
libx11-dev,
libx11-xcb-dev,
libxcb-dri3-dev,
......@@ -354,7 +354,7 @@ Section: libdevel
Architecture: any
Depends:
libx11-dev,
libdrm-dev (>= 2.4.91) [!hurd-any],
libdrm-dev (>= 2.4.92) [!hurd-any],
${misc:Depends},
Multi-Arch: same
Description: Developer documentation for Mesa
......
......@@ -56,14 +56,9 @@ else
GALLIUM_DRIVERS += freedreno
endif
# tegra, vc4 and v3d kernel support are only available on armhf and arm64
# etnaviv, imx, tegra, vc4 and v3d kernel support are only available on armhf and arm64
ifneq (,$(filter $(DEB_HOST_ARCH), armhf arm64))
GALLIUM_DRIVERS += tegra vc4 v3d
endif
# etnaviv kernel support is currently only available on armhf
ifneq (,$(filter $(DEB_HOST_ARCH), armhf))
GALLIUM_DRIVERS += etnaviv imx
GALLIUM_DRIVERS += etnaviv imx tegra vc4 v3d
endif
ifneq (,$(filter $(DEB_HOST_ARCH), amd64 i386 x32))
......
......@@ -16,7 +16,7 @@
<center>
<h1>Mesa Frequently Asked Questions</h1>
Last updated: 9 October 2012
Last updated: 19 September 2018
</center>
<br>
......@@ -373,18 +373,16 @@ the archives) is a good way to get information.
<h2>4.3 Why isn't GL_EXT_texture_compression_s3tc implemented in Mesa?</h2>
<p>
The <a href="http://oss.sgi.com/projects/ogl-sample/registry/EXT/texture_compression_s3tc.txt">specification for the extension</a>
indicates that there are intellectual property (IP) and/or patent issues
to be dealt with.
Oh but it is! Prior to 2nd October 2017, the Mesa project did not include s3tc
support due to intellectual property (IP) and/or patent issues around the s3tc
algorithm.
</p>
<p>We've been unsuccessful in getting a response from S3 (or whoever owns
the IP nowadays) to indicate whether or not an open source project can
implement the extension (specifically the compression/decompression
algorithms).
<p>
As of Mesa 17.3.0, Mesa now officially supports s3tc, as the patent has expired.
</p>
<p>
In the mean time, a 3rd party <a href="https://dri.freedesktop.org/wiki/S3TC">
plug-in library</a> is available.
In versions prior to this, a 3rd party <a href="https://dri.freedesktop.org/wiki/S3TC">
plug-in library</a> was required.
</p>
</div>
......
......@@ -31,7 +31,8 @@ Compatibility contexts may report a lower version depending on each driver.
<h2>SHA256 checksums</h2>
<pre>
TBD
SHA256: 45419ccbe1bf9a2e15ffe71ced34615002e1b42c24b917fbe2b2f58ab1970562 mesa-18.2.1.tar.gz
SHA256: 9636dc6f3d188abdcca02da97cedd73640d9035224efd5db724187d062c81056 mesa-18.2.1.tar.xz
</pre>
......
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html lang="en">
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<title>Mesa Release Notes</title>
<link rel="stylesheet" type="text/css" href="../mesa.css">
</head>
<body>
<div class="header">
<h1>The Mesa 3D Graphics Library</h1>
</div>
<iframe src="../contents.html"></iframe>
<div class="content">
<h1>Mesa 18.2.2 Release Notes / October 5, 2018</h1>
<p>
Mesa 18.2.2 is a bug fix release which fixes bugs found since the 18.2.1 release.
</p>
<p>
Mesa 18.2.2 implements the OpenGL 4.5 API, but the version reported by
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
Some drivers don't support all the features required in OpenGL 4.5. OpenGL
4.5 is <strong>only</strong> available if requested at context creation.
Compatibility contexts may report a lower version depending on each driver.
</p>
<h2>SHA256 checksums</h2>
<pre>
TBD
</pre>
<h2>New features</h2>
<p>None</p>
<h2>Bug fixes</h2>
<ul>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104602">Bug 104602</a> - [apitrace] Graphical artifacts in Civilization VI on RX Vega</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104926">Bug 104926</a> - swrast: Mesa 17.3.3 produces: HW cursor for format 875713089 not supported</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107276">Bug 107276</a> - radv: OpBitfieldUExtract returns incorrect result when count is zero</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107786">Bug 107786</a> - [DXVK] MSAA reflections are broken in GTA V</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108024">Bug 108024</a> - [Debian Stretch]Fail to build because &quot;xcb_randr_lease_t&quot;</li>
</ul>
<h2>Changes</h2>
<p>Alex Deucher (1):</p>
<ul>
<li>pci_ids: add new polaris pci id</li>
</ul>
<p>Andres Rodriguez (1):</p>
<ul>
<li>radv: only emit ZPASS_DONE for timestamp queries on gfx queues</li>
</ul>
<p>Axel Davy (3):</p>
<ul>
<li>st/nine: Clamp RCP when 0*inf!=0</li>
<li>st/nine: Avoid redundant SetCursorPos calls</li>
<li>st/nine: Increase maximum number of temp registers</li>
</ul>
<p>Dylan Baker (1):</p>
<ul>
<li>meson: Don't compile pipe loader with dri support when not using dri</li>
</ul>
<p>Eric Anholt (1):</p>
<ul>
<li>vc4: Fix sin(0.0) and cos(0.0) accuracy to fix SDL rendering rotation.</li>
</ul>
<p>Eric Engestrom (1):</p>
<ul>
<li>vulkan/wsi/display: check if wsi_swapchain_init() succeeded</li>
</ul>
<p>Jason Ekstrand (1):</p>
<ul>
<li>anv,radv: Implement vkAcquireNextImage2</li>
</ul>
<p>Juan A. Suarez Romero (2):</p>
<ul>
<li>docs: add sha256 checksums for 18.2.1</li>
<li>Update version to 18.2.2</li>
</ul>
<p>Leo Liu (1):</p>
<ul>
<li>radeon/uvd: use bitstream coded number for symbols of Huffman tables</li>
</ul>
<p>Marek Olšák (2):</p>
<ul>
<li>glsl_to_tgsi: invert gl_SamplePosition.y for the default framebuffer</li>
<li>radeonsi: NaN should pass kill_if</li>
</ul>
<p>Maxime (1):</p>
<ul>
<li>vulkan: Disable randr lease for libxcb &lt; 1.13</li>
</ul>
<p>Michal Srb (1):</p>
<ul>
<li>st/dri: don't set queryDmaBufFormats/queryDmaBufModifiers if the driver does not implement it</li>
</ul>
<p>Rhys Perry (2):</p>
<ul>
<li>nvc0: Update counter reading shaders to new NVC0_CB_AUX_MP_INFO</li>
<li>nvc0: fix bindless multisampled images on Maxwell+</li>
</ul>
<p>Samuel Iglesias Gonsálvez (1):</p>
<ul>
<li>anv: Add support for protected memory properties on anv_GetPhysicalDeviceProperties2()</li>
</ul>
<p>Samuel Pitoiset (1):</p>
<ul>
<li>radv: use the resolve compute path if dest uses multiple layers</li>
</ul>
<p>Stuart Young (1):</p>
<ul>
<li>docs: Update FAQ with respect to s3tc support</li>
</ul>
<p>Timothy Arceri (1):</p>
<ul>
<li>radeonsi: add a workaround for bitfield_extract when count is 0</li>
</ul>
</div>
</body>
</html>
......@@ -204,6 +204,7 @@ CHIPSET(0x67CC, POLARIS10)
CHIPSET(0x67CF, POLARIS10)
CHIPSET(0x67D0, POLARIS10)
CHIPSET(0x67DF, POLARIS10)
CHIPSET(0x6FDF, POLARIS10)
CHIPSET(0x98E4, STONEY)
......
......@@ -358,7 +358,8 @@ static void radv_pick_resolve_method_images(struct radv_image *src_image,
*method = RESOLVE_COMPUTE;
else if (vk_format_is_int(src_image->vk_format))
*method = RESOLVE_COMPUTE;
else if (src_image->info.array_size > 1)
else if (src_image->info.array_size > 1 ||
dest_image->info.array_size > 1)
*method = RESOLVE_COMPUTE;
if (radv_layout_dcc_compressed(dest_image, dest_image_layout, queue_mask)) {
......
......@@ -206,22 +206,37 @@ VkResult radv_GetSwapchainImagesKHR(
}
VkResult radv_AcquireNextImageKHR(
VkDevice _device,
VkDevice device,
VkSwapchainKHR swapchain,
uint64_t timeout,
VkSemaphore semaphore,
VkFence _fence,
VkFence fence,
uint32_t* pImageIndex)
{
VkAcquireNextImageInfoKHR acquire_info = {
.sType = VK_STRUCTURE_TYPE_ACQUIRE_NEXT_IMAGE_INFO_KHR,
.swapchain = swapchain,
.timeout = timeout,
.semaphore = semaphore,
.fence = fence,
.deviceMask = 0,
};
return radv_AcquireNextImage2KHR(device, &acquire_info, pImageIndex);
}
VkResult radv_AcquireNextImage2KHR(
VkDevice _device,
const VkAcquireNextImageInfoKHR* pAcquireInfo,
uint32_t* pImageIndex)
{
RADV_FROM_HANDLE(radv_device, device, _device);
struct radv_physical_device *pdevice = device->physical_device;
RADV_FROM_HANDLE(radv_fence, fence, _fence);
RADV_FROM_HANDLE(radv_fence, fence, pAcquireInfo->fence);
VkResult result = wsi_common_acquire_next_image(&pdevice->wsi_device,
VkResult result = wsi_common_acquire_next_image2(&pdevice->wsi_device,
_device,
swapchain,
timeout,
semaphore,
pAcquireInfo,
pImageIndex);
if (fence && (result == VK_SUCCESS || result == VK_SUBOPTIMAL_KHR)) {
......
......@@ -699,7 +699,7 @@ void si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs,
* counters) must immediately precede every timestamp event to
* prevent a GPU hang on GFX9.
*/
if (chip_class == GFX9) {
if (chip_class == GFX9 && !is_mec) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
radeon_emit(cs, gfx9_eop_bug_va);
......
......@@ -31,6 +31,9 @@ libpipe_loader_defines = []
if dep_libdrm.found()
files_pipe_loader += files('pipe_loader_drm.c')
endif
if with_dri
libpipe_loader_defines += '-DHAVE_PIPE_LOADER_DRI'
endif
if with_gallium_drisw_kms
libpipe_loader_defines += '-DHAVE_PIPE_LOADER_KMS'
endif
......@@ -42,10 +45,7 @@ libpipe_loader_static = static_library(
inc_util, inc_loader, inc_gallium, inc_include, inc_src, inc_gallium_aux,
inc_gallium_winsys,
],
c_args : [
c_vis_args, '-DHAVE_PIPE_LOADER_DRI', '-DGALLIUM_STATIC_TARGETS=1',
libpipe_loader_defines,
],
c_args : [c_vis_args, libpipe_loader_defines, '-DGALLIUM_STATIC_TARGETS=1'],
link_with : [libloader, libxmlconfig],
dependencies : [dep_libdrm],
build_by_default : false,
......@@ -59,7 +59,7 @@ libpipe_loader_dynamic = static_library(
inc_gallium_winsys,
],
c_args : [
c_vis_args, libpipe_loader_defines, '-DHAVE_PIPE_LOADER_DRI',
c_vis_args, libpipe_loader_defines,
'-DPIPE_SEARCH_DIR="@0@"'.format(
join_paths(get_option('prefix'), get_option('libdir'), 'gallium-pipe')
)
......
......@@ -320,11 +320,11 @@ GM107LoweringPass::handleSUQ(TexInstruction *suq)
if (mask & 0x1)
bld.mkOp2(OP_SHR, TYPE_U32, suq->getDef(0), suq->getDef(0),
loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), suq->tex.bindless));
loadMsAdjInfo32(suq->tex.target, 0, slot, ind, suq->tex.bindless));
if (mask & 0x2) {
int d = util_bitcount(mask & 0x1);
bld.mkOp2(OP_SHR, TYPE_U32, suq->getDef(d), suq->getDef(d),
loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), suq->tex.bindless));
loadMsAdjInfo32(suq->tex.target, 1, slot, ind, suq->tex.bindless));
}
}
......
......@@ -1732,6 +1732,45 @@ NVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless
prog->driver->io.suInfoBase);
}
Value *
NVC0LoweringPass::loadMsAdjInfo32(TexInstruction::Target target, uint32_t index, int slot, Value *ind, bool bindless)
{
if (!bindless || targ->getChipset() < NVISA_GM107_CHIPSET)
return loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(index), bindless);
assert(bindless);
Value *samples = bld.getSSA();
// this shouldn't be lowered because it's being inserted before the current instruction
TexInstruction *tex = new_TexInstruction(func, OP_TXQ);
tex->tex.target = target;
tex->tex.query = TXQ_TYPE;
tex->tex.mask = 0x4;
tex->tex.r = 0xff;
tex->tex.s = 0x1f;
tex->tex.rIndirectSrc = 0;
tex->setDef(0, samples);
tex->setSrc(0, ind);
tex->setSrc(1, bld.loadImm(NULL, 0));
bld.insert(tex);
// doesn't work with sample counts other than 1/2/4/8 but they aren't supported
switch (index) {
case 0: {
Value *tmp = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), samples, bld.mkImm(2));
return bld.mkOp2v(OP_SHR, TYPE_U32, bld.getSSA(), tmp, bld.mkImm(2));
}
case 1: {
Value *tmp = bld.mkCmp(OP_SET, CC_GT, TYPE_U32, bld.getSSA(), TYPE_U32, samples, bld.mkImm(2))->getDef(0);
return bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), tmp, bld.mkImm(1));
}
default: {
assert(false);
return NULL;
}
}
}
static inline uint16_t getSuClampSubOp(const TexInstruction *su, int c)
{
switch (su->tex.target.getEnum()) {
......@@ -1817,8 +1856,8 @@ NVC0LoweringPass::adjustCoordinatesMS(TexInstruction *tex)
Value *tx = bld.getSSA(), *ty = bld.getSSA(), *ts = bld.getSSA();
Value *ind = tex->getIndirectR();
Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), tex->tex.bindless);
Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), tex->tex.bindless);
Value *ms_x = loadMsAdjInfo32(tex->tex.target, 0, slot, ind, tex->tex.bindless);
Value *ms_y = loadMsAdjInfo32(tex->tex.target, 1, slot, ind, tex->tex.bindless);
bld.mkOp2(OP_SHL, TYPE_U32, tx, x, ms_x);
bld.mkOp2(OP_SHL, TYPE_U32, ty, y, ms_y);
......
......@@ -148,7 +148,7 @@ protected:
void handlePIXLD(Instruction *);
void checkPredicate(Instruction *);
Value *loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless);
Value *loadMsAdjInfo32(TexInstruction::Target targ, uint32_t index, int slot, Value *ind, bool bindless);
virtual bool visit(Instruction *);
......@@ -161,6 +161,7 @@ private:
Value *loadResInfo32(Value *ptr, uint32_t off, uint16_t base);
Value *loadResInfo64(Value *ptr, uint32_t off, uint16_t base);
Value *loadResLength32(Value *ptr, uint32_t off, uint16_t base);
Value *loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless);
Value *loadBufInfo64(Value *ptr, uint32_t off);
Value *loadBufLength32(Value *ptr, uint32_t off);
Value *loadUboInfo64(Value *ptr, uint32_t off);
......
......@@ -429,9 +429,9 @@ static const uint64_t nve4_read_hw_sm_counters_code[] =
* mov b32 $r6 $pm6
* mov b32 $r7 $pm7
* set $p0 0x1 eq u32 $r8 0x0
* mov b32 $r10 c7[0x620]
* mov b32 $r10 c7[0x6a0]
* ext u32 $r8 $r12 0x414
* mov b32 $r11 c7[0x624]
* mov b32 $r11 c7[0x6a4]
* sched 0x04 0x2e 0x04 0x20 0x20 0x28 0x04
* ext u32 $r9 $r12 0x208
* (not $p0) exit
......@@ -449,7 +449,7 @@ static const uint64_t nve4_read_hw_sm_counters_code[] =
* add b32 $r12 $c $r12 $r9
* st b128 wt g[$r10d] $r0q
* sched 0x4 0x2c 0x20 0x04 0x2e 0x00 0x00
* mov b32 $r0 c7[0x628]
* mov b32 $r0 c7[0x6a8]
* add b32 $r13 $r13 0x0 $c
* $p1 st b128 wt g[$r12d+0x40] $r4q
* st b32 wt g[$r12d+0x50] $r0
......@@ -467,9 +467,9 @@ static const uint64_t nve4_read_hw_sm_counters_code[] =
0x2c00000028019c04ULL,
0x2c0000002c01dc04ULL,
0x190e0000fc81dc03ULL,
0x28005c1880029de4ULL,
0x28005c1a80029de4ULL,
0x7000c01050c21c03ULL,
0x28005c189002dde4ULL,
0x28005c1a9002dde4ULL,
0x204282020042e047ULL,
0x7000c00820c25c03ULL,
0x80000000000021e7ULL,
......@@ -487,7 +487,7 @@ static const uint64_t nve4_read_hw_sm_counters_code[] =
0x4801000024c31c03ULL,
0x9400000000a01fc5ULL,
0x200002e04202c047ULL,
0x28005c18a0001de4ULL,
0x28005c1aa0001de4ULL,
0x0800000000d35c42ULL,
0x9400000100c107c5ULL,
0x9400000140c01f85ULL,
......@@ -510,9 +510,9 @@ static const uint64_t nvf0_read_hw_sm_counters_code[] =
0x86400000051c001aULL,
0x86400000059c001eULL,
0xdb201c007f9c201eULL,
0x64c03ce0c41c002aULL,
0x64c03ce0d41c002aULL,
0xc00000020a1c3021ULL,
0x64c03ce0c49c002eULL,
0x64c03ce0d49c002eULL,
0x0810a0808010b810ULL,
0xc0000001041c3025ULL,
0x180000000020003cULL,
......@@ -530,7 +530,7 @@ static const uint64_t nvf0_read_hw_sm_counters_code[] =
0xe0840000049c3032ULL,
0xfe800000001c2800ULL,
0x080000b81080b010ULL,
0x64c03ce0c51c0002ULL,
0x64c03ce0d51c0002ULL,
0xe08040007f9c3436ULL,
0xfe80000020043010ULL,
0xfc800000281c3000ULL,
......@@ -554,10 +554,10 @@ static const uint64_t gm107_read_hw_sm_counters_code[] =
0x001f8401fc2007a1ULL, /* sched (st 0x1 wr 0x5) (st 0x1 wt 0x1) (st 0x1) */
0xf0c8000000b70007ULL, /* mov $r7 $pm7 */
0x5b6403800087ff07ULL, /* isetp eq u32 and $p0 0x1 0x0 $r8 0x1 */
0x4c98079c1887000aULL, /* mov $r10 c7[0x620] 0xf */
0x4c98079c1a87000aULL, /* mov $r10 c7[0x6a0] 0xf */
0x001fa400fc2017e1ULL, /* sched (st 0x1 wt 0x2) (st 0x1) (st 0x9) */
0x3800000091470c08ULL, /* bfe u32 $r8 $r12 0x914 */
0x4c98079c1897000bULL, /* mov $r11 c7[0x624] 0xf */
0x4c98079c1a97000bULL, /* mov $r11 c7[0x6a4] 0xf */
0x3800000020870c09ULL, /* bfe u32 $r9 $r12 0x208 */
0x001c1800fc2007edULL, /* sched (st 0xd) (st 0x1) (st 0x6 wr 0x0) */
0xe30000000008000fULL, /* not $p0 exit */
......@@ -578,7 +578,7 @@ static const uint64_t gm107_read_hw_sm_counters_code[] =
0x003f983c1c4007e1ULL, /* sched (st 0x1) (st 0x2 rd 0x0 wt 0x3c) (st 0x6 wt 0x1) */
0x5c1008000ff70d0dULL, /* iadd x $r13 $r13 0x0 */
0xbfd0000000070a00ULL, /* st e wt b128 g[$r10] $r0 0x1 */
0x4c98079c18a70000ULL, /* mov $r0 c7[0x628] 0xf */
0x4c98079c1aa70000ULL, /* mov $r0 c7[0x6a8] 0xf */
0x001fbc00fc2007e6ULL, /* sched (st 0x1) (st 0x1) (st 0xf) */
0xbfd0000004010c04ULL, /* $p1 st e wt b128 g[$r12+0x40] $r4 0x1 */
0xbf90000005070c00ULL, /* st e wt b32 g[$r12+0x50] $r0 0x1 */
......@@ -1760,14 +1760,14 @@ static const uint64_t nvc0_read_hw_sm_counters_code[] =
* mov b32 $r6 $pm6
* mov b32 $r7 $pm7
* set $p0 0x1 eq u32 $r8 0x0
* mov b32 $r10 c15[0x620]
* mov b32 $r11 c15[0x624]
* mov b32 $r10 c15[0x6a0]
* mov b32 $r11 c15[0x6a4]
* ext u32 $r8 $r9 0x414
* (not $p0) exit
* mul $r8 u32 $r8 u32 48
* add b32 $r10 $c $r10 $r8
* add b32 $r11 $r11 0x0 $c
* mov b32 $r8 c15[0x628]
* mov b32 $r8 c15[0x6a8]
* st b128 wt g[$r10d+0x00] $r0q
* st b128 wt g[$r10d+0x10] $r4q
* st b32 wt g[$r10d+0x20] $r8
......@@ -1783,14 +1783,14 @@ static const uint64_t nvc0_read_hw_sm_counters_code[] =
0x2c00000028019c04ULL,
0x2c0000002c01dc04ULL,
0x190e0000fc81dc03ULL,
0x28007c1880029de4ULL,
0x28007c189002dde4ULL,
0x28007c1a80029de4ULL,
0x28007c1a9002dde4ULL,
0x7000c01050921c03ULL,
0x80000000000021e7ULL,
0x10000000c0821c02ULL,
0x4801000020a29c03ULL,
0x0800000000b2dc42ULL,
0x28007c18a0021de4ULL,
0x28007c1aa0021de4ULL,
0x9400000000a01fc5ULL,
0x9400000040a11fc5ULL,
0x9400000080a21f85ULL,
......
......@@ -1003,25 +1003,35 @@ static void get_mjpeg_slice_header(struct ruvd_decoder *dec, struct pipe_mjpeg_p
size++;
for (i = 0; i < 2; ++i) {
int num = 0, j;
if (pic->huffman_table.load_huffman_table[i] == 0)
continue;
buf[size++] = 0x00 | i;
memcpy((buf + size), &pic->huffman_table.table[i].num_dc_codes, 16);
size += 16;
memcpy((buf + size), &pic->huffman_table.table[i].dc_values, 12);
size += 12;
for (j = 0; j < 16; ++j)
num += pic->huffman_table.table[i].num_dc_codes[j];
assert(num <= 12);
memcpy((buf + size), &pic->huffman_table.table[i].dc_values, num);
size += num;
}
for (i = 0; i < 2; ++i) {
int num = 0, j;
if (pic->huffman_table.load_huffman_table[i] == 0)
continue;
buf[size++] = 0x10 | i;
memcpy((buf + size), &pic->huffman_table.table[i].num_ac_codes, 16);
size += 16;
memcpy((buf + size), &pic->huffman_table.table[i].ac_values, 162);
size += 162;
for (j = 0; j < 16; ++j)
num += pic->huffman_table.table[i].num_ac_codes[j];
assert(num <= 162);
memcpy((buf + size), &pic->huffman_table.table[i].ac_values, num);
size += num;
}
bs = (uint16_t*)&buf[len_pos];
......
......@@ -36,7 +36,8 @@ static void kill_if_fetch_args(struct lp_build_tgsi_context *bld_base,
for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
LLVMValueRef value = lp_build_emit_fetch(bld_base, inst, 0, i);
conds[i] = LLVMBuildFCmp(builder, LLVMRealOGE, value,
/* UGE because NaN shouldn't get killed */
conds[i] = LLVMBuildFCmp(builder, LLVMRealUGE, value,
ctx->ac.f32_0, "");
}
......@@ -505,18 +506,37 @@ static void emit_bfe(const struct lp_build_tgsi_action *action,
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
LLVMValueRef bfe_sm5;
LLVMValueRef cond;
bfe_sm5 = ac_build_bfe(&ctx->ac, emit_data->args[0],
if (HAVE_LLVM < 0x0700) {
LLVMValueRef bfe_sm5 =
ac_build_bfe(&ctx->ac, emit_data->args[0],
emit_data->args[1], emit_data->args[2],
emit_data->info->opcode == TGSI_OPCODE_IBFE);
/* Correct for GLSL semantics. */
cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntUGE, emit_data->args[2],
LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntUGE, emit_data->args[2],
LLVMConstInt(ctx->i32, 32, 0), "");
emit_data->output[emit_data->chan] =
LLVMBuildSelect(ctx->ac.builder, cond, emit_data->args[0], bfe_sm5, "");
} else {
/* FIXME: LLVM 7 returns incorrect result when count is 0.
* https://bugs.freedesktop.org/show_bug.cgi?id=107276
*/
LLVMValueRef zero = ctx->i32_0;
LLVMValueRef bfe_sm5 =
ac_build_bfe(&ctx->ac, emit_data->args[0],
emit_data->args[1], emit_data->args[2],
emit_data->info->opcode == TGSI_OPCODE_IBFE);
/* Correct for GLSL semantics. */
LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntUGE, emit_data->args[2],
LLVMConstInt(ctx->i32, 32, 0), "");
LLVMValueRef cond2 = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, emit_data->args[2],
zero, "");
bfe_sm5 = LLVMBuildSelect(ctx->ac.builder, cond, emit_data->args[0], bfe_sm5, "");
emit_data->output[emit_data->chan] =
LLVMBuildSelect(ctx->ac.builder, cond2, zero, bfe_sm5, "");
}
}
/* this is ffs in C */
......
......@@ -686,25 +686,45 @@ ntq_fceil(struct vc4_compile *c, struct qreg src)
return qir_MOV(c, result);
}
static struct qreg
ntq_shrink_sincos_input_range(struct vc4_compile *c, struct qreg x)
{
/* Since we're using a Taylor approximation, we want to have a small
* number of coefficients and take advantage of sin/cos repeating
* every 2pi. We keep our x as close to 0 as we can, since the series
* will be less accurate as |x| increases. (Also, be careful of
* shifting the input x value to be tricky with sin/cos relations,
* because getting accurate values for x==0 is very important for SDL
* rendering)
*/
struct qreg scaled_x =
qir_FMUL(c, x,
qir_uniform_f(c, 1.0f / (M_PI * 2.0f)));
/* Note: FTOI truncates toward 0. */
struct qreg x_frac = qir_FSUB(c, scaled_x,
qir_ITOF(c, qir_FTOI(c, scaled_x)));
/* Map [0.5, 1] to [-0.5, 0] */
qir_SF(c, qir_FSUB(c, x_frac, qir_uniform_f(c, 0.5)));
qir_FSUB_dest(c, x_frac, x_frac, qir_uniform_f(c, 1.0))->cond = QPU_COND_NC;
/* Map [-1, -0.5] to [0, 0.5] */
qir_SF(c, qir_FADD(c, x_frac, qir_uniform_f(c, 0.5)));
qir_FADD_dest(c, x_frac, x_frac, qir_uniform_f(c, 1.0))->cond = QPU_COND_NS;
return x_frac;
}
static struct qreg
ntq_fsin(struct vc4_compile *c, struct qreg src)
{
float coeff[] = {
-2.0 * M_PI,
pow(2.0 * M_PI, 3) / (3 * 2 * 1),
-pow(2.0 * M_PI, 5) / (5 * 4 * 3 * 2 * 1),
pow(2.0 * M_PI, 7) / (7 * 6 * 5 * 4 * 3 * 2 * 1),
-pow(2.0 * M_PI, 9) / (9 * 8 * 7 * 6 * 5 * 4 * 3 * 2 * 1),
2.0 * M_PI,
-pow(2.0 * M_PI, 3) / (3 * 2 * 1),
pow(2.0 * M_PI, 5) / (5 * 4 * 3 * 2 * 1),
-pow(2.0 * M_PI, 7) / (7 * 6 * 5 * 4 * 3 * 2 * 1),
pow(2.0 * M_PI, 9) / (9 * 8 * 7 * 6 * 5 * 4 * 3 * 2 * 1),
};
struct qreg scaled_x =
qir_FMUL(c,
src,
qir_uniform_f(c, 1.0 / (M_PI * 2.0)));
struct qreg x = qir_FADD(c,
ntq_ffract(c, scaled_x),
qir_uniform_f(c, -0.5));
struct qreg x = ntq_shrink_sincos_input_range(c, src);
struct qreg x2 = qir_FMUL(c, x, x);
struct qreg sum = qir_FMUL(c, x, qir_uniform_f(c, coeff[0]));
for (int i = 1; i < ARRAY_SIZE(coeff); i++) {
......@@ -722,21 +742,15 @@ static struct qreg
ntq_fcos(struct vc4_compile *c, struct qreg src)
{
float coeff[] = {
-1.0f,
pow(2.0 * M_PI, 2) / (2 * 1),
-pow(2.0 * M_PI, 4) / (4 * 3 * 2 * 1),
pow(2.0 * M_PI, 6) / (6 * 5 * 4 * 3 * 2 * 1),
-pow(2.0 * M_PI, 8) / (8 * 7 * 6 * 5 * 4 * 3 * 2 * 1),
pow(2.0 * M_PI, 10) / (10 * 9 * 8 * 7 * 6 * 5 * 4 * 3 * 2 * 1),
1.0f,
-pow(2.0 * M_PI, 2) / (2 * 1),
pow(2.0 * M_PI, 4) / (4 * 3 * 2 * 1),
-pow(2.0 * M_PI, 6) / (6 * 5 * 4 * 3 * 2 * 1),
pow(2.0 * M_PI, 8) / (8 * 7 * 6 * 5 * 4 * 3 * 2 * 1),
-pow(2.0 * M_PI, 10) / (10 * 9 * 8 * 7 * 6 * 5 * 4 * 3 * 2 * 1),
};
struct qreg scaled_x =
qir_FMUL(c, src,
qir_uniform_f(c, 1.0f / (M_PI * 2.0f)));
struct qreg x_frac = qir_FADD(c,
ntq_ffract(c, scaled_x),
qir_uniform_f(c, -0.5));
struct qreg x_frac = ntq_shrink_sincos_input_range(c, src);
struct qreg sum = qir_uniform_f(c, coeff[0]);
struct qreg x2 = qir_FMUL(c, x_frac, x_frac);
struct qreg x = x2; /* Current x^2, x^4, or x^6 */
......