diff --git a/CHANGELOG.md b/CHANGELOG.md index 94afe7bc5f..0f51248edf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -50,7 +50,7 @@ Bottom level categories: Naga now infers the correct binding layout when a resource appears only in an assignment to `_`. By @andyleiserson in [#7540](https://github.com/gfx-rs/wgpu/pull/7540). -- Add polyfills for `dot4U8Packed` and `dot4I8Packed` for all backends. By @robamler in [#7494](https://github.com/gfx-rs/wgpu/pull/7494). +- Implement `dot4U8Packed` and `dot4I8Packed` for all backends, using specialized intrinsics on SPIR-V and HSLS if available, and polyfills everywhere else. By @robamler in [#7494](https://github.com/gfx-rs/wgpu/pull/7494) and [#7574](https://github.com/gfx-rs/wgpu/pull/7574). - Add polyfilled `pack4x{I,U}8Clamped` built-ins to all backends and WGSL frontend. By @ErichDonGubler in [#7546](https://github.com/gfx-rs/wgpu/pull/7546). #### DX12 diff --git a/naga/src/back/hlsl/writer.rs b/naga/src/back/hlsl/writer.rs index f11dcd7744..59725df3db 100644 --- a/naga/src/back/hlsl/writer.rs +++ b/naga/src/back/hlsl/writer.rs @@ -12,7 +12,7 @@ use super::{ WrappedZeroValue, }, storage::StoreValue, - BackendResult, Error, FragmentEntryPoint, Options, + BackendResult, Error, FragmentEntryPoint, Options, ShaderModel, }; use crate::{ back::{self, Baked}, @@ -3751,33 +3751,48 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { fun @ (Function::Dot4I8Packed | Function::Dot4U8Packed) => { let arg1 = arg1.unwrap(); - write!(self.out, "dot(")?; + if self.options.shader_model >= ShaderModel::V6_4 { + // Intrinsics `dot4add_{i, u}8packed` are available in SM 6.4 and later. + let function_name = match fun { + Function::Dot4I8Packed => "dot4add_i8packed", + Function::Dot4U8Packed => "dot4add_u8packed", + _ => unreachable!(), + }; + write!(self.out, "{function_name}(")?; + self.write_expr(module, arg, func_ctx)?; + write!(self.out, ", ")?; + self.write_expr(module, arg1, func_ctx)?; + write!(self.out, ", 0)")?; + } else { + // Fall back to a polyfill as `dot4add_u8packed` is not available. + write!(self.out, "dot(")?; - if matches!(fun, Function::Dot4U8Packed) { - write!(self.out, "u")?; - } - write!(self.out, "int4(")?; - self.write_expr(module, arg, func_ctx)?; - write!(self.out, ", ")?; - self.write_expr(module, arg, func_ctx)?; - write!(self.out, " >> 8, ")?; - self.write_expr(module, arg, func_ctx)?; - write!(self.out, " >> 16, ")?; - self.write_expr(module, arg, func_ctx)?; - write!(self.out, " >> 24) << 24 >> 24, ")?; + if matches!(fun, Function::Dot4U8Packed) { + write!(self.out, "u")?; + } + write!(self.out, "int4(")?; + self.write_expr(module, arg, func_ctx)?; + write!(self.out, ", ")?; + self.write_expr(module, arg, func_ctx)?; + write!(self.out, " >> 8, ")?; + self.write_expr(module, arg, func_ctx)?; + write!(self.out, " >> 16, ")?; + self.write_expr(module, arg, func_ctx)?; + write!(self.out, " >> 24) << 24 >> 24, ")?; - if matches!(fun, Function::Dot4U8Packed) { - write!(self.out, "u")?; + if matches!(fun, Function::Dot4U8Packed) { + write!(self.out, "u")?; + } + write!(self.out, "int4(")?; + self.write_expr(module, arg1, func_ctx)?; + write!(self.out, ", ")?; + self.write_expr(module, arg1, func_ctx)?; + write!(self.out, " >> 8, ")?; + self.write_expr(module, arg1, func_ctx)?; + write!(self.out, " >> 16, ")?; + self.write_expr(module, arg1, func_ctx)?; + write!(self.out, " >> 24) << 24 >> 24)")?; } - write!(self.out, "int4(")?; - self.write_expr(module, arg1, func_ctx)?; - write!(self.out, ", ")?; - self.write_expr(module, arg1, func_ctx)?; - write!(self.out, " >> 8, ")?; - self.write_expr(module, arg1, func_ctx)?; - write!(self.out, " >> 16, ")?; - self.write_expr(module, arg1, func_ctx)?; - write!(self.out, " >> 24) << 24 >> 24)")?; } Function::QuantizeToF16 => { write!(self.out, "f16tof32(f32tof16(")?; diff --git a/naga/src/back/spv/block.rs b/naga/src/back/spv/block.rs index 2e9cd32801..96e8471c86 100644 --- a/naga/src/back/spv/block.rs +++ b/naga/src/back/spv/block.rs @@ -1143,59 +1143,89 @@ impl BlockContext<'_> { ), }, fun @ (Mf::Dot4I8Packed | Mf::Dot4U8Packed) => { - // TODO: consider using packed integer dot product if PackedVectorFormat4x8Bit is available - let (extract_op, arg0_id, arg1_id) = match fun { - Mf::Dot4U8Packed => (spirv::Op::BitFieldUExtract, arg0_id, arg1_id), - Mf::Dot4I8Packed => { - // Convert both packed arguments to signed integers so that we can apply the - // `BitFieldSExtract` operation on them in `write_dot_product` below. - let new_arg0_id = self.gen_id(); - block.body.push(Instruction::unary( - spirv::Op::Bitcast, - result_type_id, - new_arg0_id, - arg0_id, - )); + if self.writer.lang_version() >= (1, 6) + && self + .writer + .require_all(&[ + spirv::Capability::DotProduct, + spirv::Capability::DotProductInput4x8BitPacked, + ]) + .is_ok() + { + // Write optimized code using `PackedVectorFormat4x8Bit`. + self.writer.use_extension("SPV_KHR_integer_dot_product"); + + let op = match fun { + Mf::Dot4I8Packed => spirv::Op::SDot, + Mf::Dot4U8Packed => spirv::Op::UDot, + _ => unreachable!(), + }; - let new_arg1_id = self.gen_id(); - block.body.push(Instruction::unary( - spirv::Op::Bitcast, - result_type_id, - new_arg1_id, - arg1_id, - )); + block.body.push(Instruction::ternary( + op, + result_type_id, + id, + arg0_id, + arg1_id, + spirv::PackedVectorFormat::PackedVectorFormat4x8Bit as Word, + )); + } else { + // Fall back to a polyfill since `PackedVectorFormat4x8Bit` is not available. + let (extract_op, arg0_id, arg1_id) = match fun { + Mf::Dot4U8Packed => (spirv::Op::BitFieldUExtract, arg0_id, arg1_id), + Mf::Dot4I8Packed => { + // Convert both packed arguments to signed integers so that we can apply the + // `BitFieldSExtract` operation on them in `write_dot_product` below. + let new_arg0_id = self.gen_id(); + block.body.push(Instruction::unary( + spirv::Op::Bitcast, + result_type_id, + new_arg0_id, + arg0_id, + )); - (spirv::Op::BitFieldSExtract, new_arg0_id, new_arg1_id) - } - _ => unreachable!(), - }; + let new_arg1_id = self.gen_id(); + block.body.push(Instruction::unary( + spirv::Op::Bitcast, + result_type_id, + new_arg1_id, + arg1_id, + )); - let eight = self.writer.get_constant_scalar(crate::Literal::U32(8)); + (spirv::Op::BitFieldSExtract, new_arg0_id, new_arg1_id) + } + _ => unreachable!(), + }; - const VEC_LENGTH: u8 = 4; - let bit_shifts: [_; VEC_LENGTH as usize] = core::array::from_fn(|index| { - self.writer - .get_constant_scalar(crate::Literal::U32(index as u32 * 8)) - }); + let eight = self.writer.get_constant_scalar(crate::Literal::U32(8)); + + const VEC_LENGTH: u8 = 4; + let bit_shifts: [_; VEC_LENGTH as usize] = + core::array::from_fn(|index| { + self.writer + .get_constant_scalar(crate::Literal::U32(index as u32 * 8)) + }); + + self.write_dot_product( + id, + result_type_id, + arg0_id, + arg1_id, + VEC_LENGTH as Word, + block, + |result_id, composite_id, index| { + Instruction::ternary( + extract_op, + result_type_id, + result_id, + composite_id, + bit_shifts[index as usize], + eight, + ) + }, + ); + } - self.write_dot_product( - id, - result_type_id, - arg0_id, - arg1_id, - VEC_LENGTH as Word, - block, - |result_id, composite_id, index| { - Instruction::ternary( - extract_op, - result_type_id, - result_id, - composite_id, - bit_shifts[index as usize], - eight, - ) - }, - ); self.cached[expr_handle] = id; return Ok(()); } diff --git a/naga/src/back/spv/layout.rs b/naga/src/back/spv/layout.rs index 177996741d..6652b33504 100644 --- a/naga/src/back/spv/layout.rs +++ b/naga/src/back/spv/layout.rs @@ -12,7 +12,8 @@ use alloc::format; const GENERATOR: Word = 28; impl PhysicalLayout { - pub(super) const fn new(version: Word) -> Self { + pub(super) const fn new(major_version: u8, minor_version: u8) -> Self { + let version = ((major_version as u32) << 16) | ((minor_version as u32) << 8); PhysicalLayout { magic_number: MAGIC_NUMBER, version, @@ -29,6 +30,13 @@ impl PhysicalLayout { sink.extend(iter::once(self.bound)); sink.extend(iter::once(self.instruction_schema)); } + + /// Returns `(major, minor)`. + pub(super) const fn lang_version(&self) -> (u8, u8) { + let major = (self.version >> 16) as u8; + let minor = (self.version >> 8) as u8; + (major, minor) + } } impl super::recyclable::Recyclable for PhysicalLayout { @@ -150,10 +158,13 @@ impl Instruction { #[test] fn test_physical_layout_in_words() { let bound = 5; - let version = 0x10203; + + // The least and most significant bytes of `version` must both be zero + // according to the SPIR-V spec. + let version = 0x0001_0200; let mut output = vec![]; - let mut layout = PhysicalLayout::new(version); + let mut layout = PhysicalLayout::new(1, 2); layout.bound = bound; layout.in_words(&mut output); diff --git a/naga/src/back/spv/writer.rs b/naga/src/back/spv/writer.rs index 6cfbbba981..3819ed10e7 100644 --- a/naga/src/back/spv/writer.rs +++ b/naga/src/back/spv/writer.rs @@ -60,7 +60,6 @@ impl Writer { if major != 1 { return Err(Error::UnsupportedVersion(major, minor)); } - let raw_version = ((major as u32) << 16) | ((minor as u32) << 8); let mut capabilities_used = crate::FastIndexSet::default(); capabilities_used.insert(spirv::Capability::Shader); @@ -70,7 +69,7 @@ impl Writer { let void_type = id_gen.next(); Ok(Writer { - physical_layout: PhysicalLayout::new(raw_version), + physical_layout: PhysicalLayout::new(major, minor), logical_layout: LogicalLayout::default(), id_gen, capabilities_available: options.capabilities.clone(), @@ -99,6 +98,11 @@ impl Writer { }) } + /// Returns `(major, minor)` of the SPIR-V language version. + pub const fn lang_version(&self) -> (u8, u8) { + self.physical_layout.lang_version() + } + /// Reset `Writer` to its initial state, retaining any allocations. /// /// Why not just implement `Recyclable` for `Writer`? By design, @@ -202,6 +206,43 @@ impl Writer { } } + /// Indicate that the code requires all of the listed capabilities. + /// + /// If all entries of `capabilities` appear in the available capabilities + /// specified in the [`Options`] from which this `Writer` was created + /// (including the case where [`Options::capabilities`] is `None`), add + /// them all to this `Writer`'s [`capabilities_used`] table, and return + /// `Ok(())`. If at least one of the listed capabilities is not available, + /// do not add anything to the `capabilities_used` table, and return the + /// first unavailable requested capability, wrapped in `Err()`. + /// + /// This method is does not return an [`enum@Error`] in case of failure + /// because it may be used in cases where the caller can recover (e.g., + /// with a polyfill) if the requested capabilities are not available. In + /// this case, it would be unnecessary work to find *all* the unavailable + /// requested capabilities, and to allocate a `Vec` for them, just so we + /// could return an [`Error::MissingCapabilities`]). + /// + /// [`capabilities_used`]: Writer::capabilities_used + pub(super) fn require_all( + &mut self, + capabilities: &[spirv::Capability], + ) -> Result<(), spirv::Capability> { + if let Some(ref available) = self.capabilities_available { + for requested in capabilities { + if !available.contains(requested) { + return Err(*requested); + } + } + } + + for requested in capabilities { + self.capabilities_used.insert(*requested); + } + + Ok(()) + } + /// Indicate that the code uses the given extension. pub(super) fn use_extension(&mut self, extension: &'static str) { self.extensions_used.insert(extension); diff --git a/naga/tests/in/wgsl/functions-optimized.toml b/naga/tests/in/wgsl/functions-optimized.toml new file mode 100644 index 0000000000..c8a7abfa09 --- /dev/null +++ b/naga/tests/in/wgsl/functions-optimized.toml @@ -0,0 +1,11 @@ +# Explicitly turn on optimizations for `dot4I8Packed` and `dot4U8Packed` +# on SPIRV and HLSL. + +targets = "SPIRV | HLSL" + +[spv] +capabilities = ["DotProduct", "DotProductInput4x8BitPacked"] +version = [1, 6] + +[hlsl] +shader_model = "V6_4" diff --git a/naga/tests/in/wgsl/functions-optimized.wgsl b/naga/tests/in/wgsl/functions-optimized.wgsl new file mode 100644 index 0000000000..229357523c --- /dev/null +++ b/naga/tests/in/wgsl/functions-optimized.wgsl @@ -0,0 +1,19 @@ +fn test_packed_integer_dot_product() -> u32 { + let a_5 = 1u; + let b_5 = 2u; + let c_5: i32 = dot4I8Packed(a_5, b_5); + + let a_6 = 3u; + let b_6 = 4u; + let c_6: u32 = dot4U8Packed(a_6, b_6); + + // test baking of arguments + let c_7: i32 = dot4I8Packed(5u + c_6, 6u + c_6); + let c_8: u32 = dot4U8Packed(7u + c_6, 8u + c_6); + return c_8; +} + +@compute @workgroup_size(1) +fn main() { + let c = test_packed_integer_dot_product(); +} diff --git a/naga/tests/in/wgsl/functions-unoptimized.toml b/naga/tests/in/wgsl/functions-unoptimized.toml new file mode 100644 index 0000000000..7361004be3 --- /dev/null +++ b/naga/tests/in/wgsl/functions-unoptimized.toml @@ -0,0 +1,13 @@ +# Explicitly turn off optimizations for `dot4I8Packed` and `dot4U8Packed` +# on SPIRV and HLSL. + +targets = "SPIRV | HLSL" + +[spv] +# Provide some unrelated capability because an empty list of capabilities would +# get mapped to `None`, which would then be interpreted as "all capabilities +# are available". +capabilities = ["Matrix"] + +[hlsl] +shader_model = "V6_3" diff --git a/naga/tests/in/wgsl/functions-unoptimized.wgsl b/naga/tests/in/wgsl/functions-unoptimized.wgsl new file mode 100644 index 0000000000..229357523c --- /dev/null +++ b/naga/tests/in/wgsl/functions-unoptimized.wgsl @@ -0,0 +1,19 @@ +fn test_packed_integer_dot_product() -> u32 { + let a_5 = 1u; + let b_5 = 2u; + let c_5: i32 = dot4I8Packed(a_5, b_5); + + let a_6 = 3u; + let b_6 = 4u; + let c_6: u32 = dot4U8Packed(a_6, b_6); + + // test baking of arguments + let c_7: i32 = dot4I8Packed(5u + c_6, 6u + c_6); + let c_8: u32 = dot4U8Packed(7u + c_6, 8u + c_6); + return c_8; +} + +@compute @workgroup_size(1) +fn main() { + let c = test_packed_integer_dot_product(); +} diff --git a/naga/tests/out/hlsl/wgsl-functions-optimized.hlsl b/naga/tests/out/hlsl/wgsl-functions-optimized.hlsl new file mode 100644 index 0000000000..2b2d003ea0 --- /dev/null +++ b/naga/tests/out/hlsl/wgsl-functions-optimized.hlsl @@ -0,0 +1,19 @@ +uint test_packed_integer_dot_product() +{ + int c_5_ = dot4add_i8packed(1u, 2u, 0); + uint c_6_ = dot4add_u8packed(3u, 4u, 0); + uint _e7 = (5u + c_6_); + uint _e9 = (6u + c_6_); + int c_7_ = dot4add_i8packed(_e7, _e9, 0); + uint _e12 = (7u + c_6_); + uint _e14 = (8u + c_6_); + uint c_8_ = dot4add_u8packed(_e12, _e14, 0); + return c_8_; +} + +[numthreads(1, 1, 1)] +void main() +{ + const uint _e0 = test_packed_integer_dot_product(); + return; +} diff --git a/naga/tests/out/hlsl/wgsl-functions-optimized.ron b/naga/tests/out/hlsl/wgsl-functions-optimized.ron new file mode 100644 index 0000000000..81f3e9b295 --- /dev/null +++ b/naga/tests/out/hlsl/wgsl-functions-optimized.ron @@ -0,0 +1,12 @@ +( + vertex:[ + ], + fragment:[ + ], + compute:[ + ( + entry_point:"main", + target_profile:"cs_6_4", + ), + ], +) diff --git a/naga/tests/out/hlsl/wgsl-functions-unoptimized.hlsl b/naga/tests/out/hlsl/wgsl-functions-unoptimized.hlsl new file mode 100644 index 0000000000..72156e433c --- /dev/null +++ b/naga/tests/out/hlsl/wgsl-functions-unoptimized.hlsl @@ -0,0 +1,19 @@ +uint test_packed_integer_dot_product() +{ + int c_5_ = dot(int4(1u, 1u >> 8, 1u >> 16, 1u >> 24) << 24 >> 24, int4(2u, 2u >> 8, 2u >> 16, 2u >> 24) << 24 >> 24); + uint c_6_ = dot(uint4(3u, 3u >> 8, 3u >> 16, 3u >> 24) << 24 >> 24, uint4(4u, 4u >> 8, 4u >> 16, 4u >> 24) << 24 >> 24); + uint _e7 = (5u + c_6_); + uint _e9 = (6u + c_6_); + int c_7_ = dot(int4(_e7, _e7 >> 8, _e7 >> 16, _e7 >> 24) << 24 >> 24, int4(_e9, _e9 >> 8, _e9 >> 16, _e9 >> 24) << 24 >> 24); + uint _e12 = (7u + c_6_); + uint _e14 = (8u + c_6_); + uint c_8_ = dot(uint4(_e12, _e12 >> 8, _e12 >> 16, _e12 >> 24) << 24 >> 24, uint4(_e14, _e14 >> 8, _e14 >> 16, _e14 >> 24) << 24 >> 24); + return c_8_; +} + +[numthreads(1, 1, 1)] +void main() +{ + const uint _e0 = test_packed_integer_dot_product(); + return; +} diff --git a/naga/tests/out/hlsl/wgsl-functions-unoptimized.ron b/naga/tests/out/hlsl/wgsl-functions-unoptimized.ron new file mode 100644 index 0000000000..f1f510d2dc --- /dev/null +++ b/naga/tests/out/hlsl/wgsl-functions-unoptimized.ron @@ -0,0 +1,12 @@ +( + vertex:[ + ], + fragment:[ + ], + compute:[ + ( + entry_point:"main", + target_profile:"cs_6_3", + ), + ], +) diff --git a/naga/tests/out/spv/wgsl-functions-optimized.spvasm b/naga/tests/out/spv/wgsl-functions-optimized.spvasm new file mode 100644 index 0000000000..7169a1e1e4 --- /dev/null +++ b/naga/tests/out/spv/wgsl-functions-optimized.spvasm @@ -0,0 +1,46 @@ +; SPIR-V +; Version: 1.6 +; Generator: rspirv +; Bound: 30 +OpCapability Shader +OpCapability DotProductKHR +OpCapability DotProductInput4x8BitPackedKHR +OpExtension "SPV_KHR_integer_dot_product" +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint GLCompute %26 "main" +OpExecutionMode %26 LocalSize 1 1 1 +%2 = OpTypeVoid +%3 = OpTypeInt 32 0 +%6 = OpTypeFunction %3 +%7 = OpConstant %3 1 +%8 = OpConstant %3 2 +%9 = OpConstant %3 3 +%10 = OpConstant %3 4 +%11 = OpConstant %3 5 +%12 = OpConstant %3 6 +%13 = OpConstant %3 7 +%14 = OpConstant %3 8 +%16 = OpTypeInt 32 1 +%27 = OpTypeFunction %2 +%5 = OpFunction %3 None %6 +%4 = OpLabel +OpBranch %15 +%15 = OpLabel +%17 = OpSDotKHR %16 %7 %8 PackedVectorFormat4x8BitKHR +%18 = OpUDotKHR %3 %9 %10 PackedVectorFormat4x8BitKHR +%19 = OpIAdd %3 %11 %18 +%20 = OpIAdd %3 %12 %18 +%21 = OpSDotKHR %16 %19 %20 PackedVectorFormat4x8BitKHR +%22 = OpIAdd %3 %13 %18 +%23 = OpIAdd %3 %14 %18 +%24 = OpUDotKHR %3 %22 %23 PackedVectorFormat4x8BitKHR +OpReturnValue %24 +OpFunctionEnd +%26 = OpFunction %2 None %27 +%25 = OpLabel +OpBranch %28 +%28 = OpLabel +%29 = OpFunctionCall %3 %5 +OpReturn +OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/wgsl-functions-unoptimized.spvasm b/naga/tests/out/spv/wgsl-functions-unoptimized.spvasm new file mode 100644 index 0000000000..adf07f25db --- /dev/null +++ b/naga/tests/out/spv/wgsl-functions-unoptimized.spvasm @@ -0,0 +1,112 @@ +; SPIR-V +; Version: 1.1 +; Generator: rspirv +; Bound: 99 +OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint GLCompute %95 "main" +OpExecutionMode %95 LocalSize 1 1 1 +%2 = OpTypeVoid +%3 = OpTypeInt 32 0 +%6 = OpTypeFunction %3 +%7 = OpConstant %3 1 +%8 = OpConstant %3 2 +%9 = OpConstant %3 3 +%10 = OpConstant %3 4 +%11 = OpConstant %3 5 +%12 = OpConstant %3 6 +%13 = OpConstant %3 7 +%14 = OpConstant %3 8 +%16 = OpTypeInt 32 1 +%20 = OpConstant %3 0 +%21 = OpConstant %3 16 +%22 = OpConstant %3 24 +%23 = OpConstantNull %16 +%40 = OpConstantNull %3 +%96 = OpTypeFunction %2 +%5 = OpFunction %3 None %6 +%4 = OpLabel +OpBranch %15 +%15 = OpLabel +%18 = OpBitcast %16 %7 +%19 = OpBitcast %16 %8 +%24 = OpBitFieldSExtract %16 %18 %20 %14 +%25 = OpBitFieldSExtract %16 %19 %20 %14 +%26 = OpIMul %16 %24 %25 +%27 = OpIAdd %16 %23 %26 +%28 = OpBitFieldSExtract %16 %18 %14 %14 +%29 = OpBitFieldSExtract %16 %19 %14 %14 +%30 = OpIMul %16 %28 %29 +%31 = OpIAdd %16 %27 %30 +%32 = OpBitFieldSExtract %16 %18 %21 %14 +%33 = OpBitFieldSExtract %16 %19 %21 %14 +%34 = OpIMul %16 %32 %33 +%35 = OpIAdd %16 %31 %34 +%36 = OpBitFieldSExtract %16 %18 %22 %14 +%37 = OpBitFieldSExtract %16 %19 %22 %14 +%38 = OpIMul %16 %36 %37 +%17 = OpIAdd %16 %35 %38 +%41 = OpBitFieldUExtract %3 %9 %20 %14 +%42 = OpBitFieldUExtract %3 %10 %20 %14 +%43 = OpIMul %3 %41 %42 +%44 = OpIAdd %3 %40 %43 +%45 = OpBitFieldUExtract %3 %9 %14 %14 +%46 = OpBitFieldUExtract %3 %10 %14 %14 +%47 = OpIMul %3 %45 %46 +%48 = OpIAdd %3 %44 %47 +%49 = OpBitFieldUExtract %3 %9 %21 %14 +%50 = OpBitFieldUExtract %3 %10 %21 %14 +%51 = OpIMul %3 %49 %50 +%52 = OpIAdd %3 %48 %51 +%53 = OpBitFieldUExtract %3 %9 %22 %14 +%54 = OpBitFieldUExtract %3 %10 %22 %14 +%55 = OpIMul %3 %53 %54 +%39 = OpIAdd %3 %52 %55 +%56 = OpIAdd %3 %11 %39 +%57 = OpIAdd %3 %12 %39 +%59 = OpBitcast %16 %56 +%60 = OpBitcast %16 %57 +%61 = OpBitFieldSExtract %16 %59 %20 %14 +%62 = OpBitFieldSExtract %16 %60 %20 %14 +%63 = OpIMul %16 %61 %62 +%64 = OpIAdd %16 %23 %63 +%65 = OpBitFieldSExtract %16 %59 %14 %14 +%66 = OpBitFieldSExtract %16 %60 %14 %14 +%67 = OpIMul %16 %65 %66 +%68 = OpIAdd %16 %64 %67 +%69 = OpBitFieldSExtract %16 %59 %21 %14 +%70 = OpBitFieldSExtract %16 %60 %21 %14 +%71 = OpIMul %16 %69 %70 +%72 = OpIAdd %16 %68 %71 +%73 = OpBitFieldSExtract %16 %59 %22 %14 +%74 = OpBitFieldSExtract %16 %60 %22 %14 +%75 = OpIMul %16 %73 %74 +%58 = OpIAdd %16 %72 %75 +%76 = OpIAdd %3 %13 %39 +%77 = OpIAdd %3 %14 %39 +%79 = OpBitFieldUExtract %3 %76 %20 %14 +%80 = OpBitFieldUExtract %3 %77 %20 %14 +%81 = OpIMul %3 %79 %80 +%82 = OpIAdd %3 %40 %81 +%83 = OpBitFieldUExtract %3 %76 %14 %14 +%84 = OpBitFieldUExtract %3 %77 %14 %14 +%85 = OpIMul %3 %83 %84 +%86 = OpIAdd %3 %82 %85 +%87 = OpBitFieldUExtract %3 %76 %21 %14 +%88 = OpBitFieldUExtract %3 %77 %21 %14 +%89 = OpIMul %3 %87 %88 +%90 = OpIAdd %3 %86 %89 +%91 = OpBitFieldUExtract %3 %76 %22 %14 +%92 = OpBitFieldUExtract %3 %77 %22 %14 +%93 = OpIMul %3 %91 %92 +%78 = OpIAdd %3 %90 %93 +OpReturnValue %78 +OpFunctionEnd +%95 = OpFunction %2 None %96 +%94 = OpLabel +OpBranch %97 +%97 = OpLabel +%98 = OpFunctionCall %3 %5 +OpReturn +OpFunctionEnd \ No newline at end of file