diff --git a/CHANGELOG.md b/CHANGELOG.md
index 94afe7bc5f..0f51248edf 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -50,7 +50,7 @@ Bottom level categories:
 
 Naga now infers the correct binding layout when a resource appears only in an assignment to `_`. By @andyleiserson in [#7540](https://github.com/gfx-rs/wgpu/pull/7540).
 
-- Add polyfills for `dot4U8Packed` and `dot4I8Packed` for all backends. By @robamler in [#7494](https://github.com/gfx-rs/wgpu/pull/7494).
+- Implement `dot4U8Packed` and `dot4I8Packed` for all backends, using specialized intrinsics on SPIR-V and HSLS if available, and polyfills everywhere else. By @robamler in [#7494](https://github.com/gfx-rs/wgpu/pull/7494) and [#7574](https://github.com/gfx-rs/wgpu/pull/7574).
 - Add polyfilled `pack4x{I,U}8Clamped` built-ins to all backends and WGSL frontend. By @ErichDonGubler in [#7546](https://github.com/gfx-rs/wgpu/pull/7546).
 
 #### DX12
diff --git a/naga/src/back/hlsl/writer.rs b/naga/src/back/hlsl/writer.rs
index f11dcd7744..59725df3db 100644
--- a/naga/src/back/hlsl/writer.rs
+++ b/naga/src/back/hlsl/writer.rs
@@ -12,7 +12,7 @@ use super::{
         WrappedZeroValue,
     },
     storage::StoreValue,
-    BackendResult, Error, FragmentEntryPoint, Options,
+    BackendResult, Error, FragmentEntryPoint, Options, ShaderModel,
 };
 use crate::{
     back::{self, Baked},
@@ -3751,33 +3751,48 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                     fun @ (Function::Dot4I8Packed | Function::Dot4U8Packed) => {
                         let arg1 = arg1.unwrap();
 
-                        write!(self.out, "dot(")?;
+                        if self.options.shader_model >= ShaderModel::V6_4 {
+                            // Intrinsics `dot4add_{i, u}8packed` are available in SM 6.4 and later.
+                            let function_name = match fun {
+                                Function::Dot4I8Packed => "dot4add_i8packed",
+                                Function::Dot4U8Packed => "dot4add_u8packed",
+                                _ => unreachable!(),
+                            };
+                            write!(self.out, "{function_name}(")?;
+                            self.write_expr(module, arg, func_ctx)?;
+                            write!(self.out, ", ")?;
+                            self.write_expr(module, arg1, func_ctx)?;
+                            write!(self.out, ", 0)")?;
+                        } else {
+                            // Fall back to a polyfill as `dot4add_u8packed` is not available.
+                            write!(self.out, "dot(")?;
 
-                        if matches!(fun, Function::Dot4U8Packed) {
-                            write!(self.out, "u")?;
-                        }
-                        write!(self.out, "int4(")?;
-                        self.write_expr(module, arg, func_ctx)?;
-                        write!(self.out, ", ")?;
-                        self.write_expr(module, arg, func_ctx)?;
-                        write!(self.out, " >> 8, ")?;
-                        self.write_expr(module, arg, func_ctx)?;
-                        write!(self.out, " >> 16, ")?;
-                        self.write_expr(module, arg, func_ctx)?;
-                        write!(self.out, " >> 24) << 24 >> 24, ")?;
+                            if matches!(fun, Function::Dot4U8Packed) {
+                                write!(self.out, "u")?;
+                            }
+                            write!(self.out, "int4(")?;
+                            self.write_expr(module, arg, func_ctx)?;
+                            write!(self.out, ", ")?;
+                            self.write_expr(module, arg, func_ctx)?;
+                            write!(self.out, " >> 8, ")?;
+                            self.write_expr(module, arg, func_ctx)?;
+                            write!(self.out, " >> 16, ")?;
+                            self.write_expr(module, arg, func_ctx)?;
+                            write!(self.out, " >> 24) << 24 >> 24, ")?;
 
-                        if matches!(fun, Function::Dot4U8Packed) {
-                            write!(self.out, "u")?;
+                            if matches!(fun, Function::Dot4U8Packed) {
+                                write!(self.out, "u")?;
+                            }
+                            write!(self.out, "int4(")?;
+                            self.write_expr(module, arg1, func_ctx)?;
+                            write!(self.out, ", ")?;
+                            self.write_expr(module, arg1, func_ctx)?;
+                            write!(self.out, " >> 8, ")?;
+                            self.write_expr(module, arg1, func_ctx)?;
+                            write!(self.out, " >> 16, ")?;
+                            self.write_expr(module, arg1, func_ctx)?;
+                            write!(self.out, " >> 24) << 24 >> 24)")?;
                         }
-                        write!(self.out, "int4(")?;
-                        self.write_expr(module, arg1, func_ctx)?;
-                        write!(self.out, ", ")?;
-                        self.write_expr(module, arg1, func_ctx)?;
-                        write!(self.out, " >> 8, ")?;
-                        self.write_expr(module, arg1, func_ctx)?;
-                        write!(self.out, " >> 16, ")?;
-                        self.write_expr(module, arg1, func_ctx)?;
-                        write!(self.out, " >> 24) << 24 >> 24)")?;
                     }
                     Function::QuantizeToF16 => {
                         write!(self.out, "f16tof32(f32tof16(")?;
diff --git a/naga/src/back/spv/block.rs b/naga/src/back/spv/block.rs
index 2e9cd32801..96e8471c86 100644
--- a/naga/src/back/spv/block.rs
+++ b/naga/src/back/spv/block.rs
@@ -1143,59 +1143,89 @@ impl BlockContext<'_> {
                         ),
                     },
                     fun @ (Mf::Dot4I8Packed | Mf::Dot4U8Packed) => {
-                        // TODO: consider using packed integer dot product if PackedVectorFormat4x8Bit is available
-                        let (extract_op, arg0_id, arg1_id) = match fun {
-                            Mf::Dot4U8Packed => (spirv::Op::BitFieldUExtract, arg0_id, arg1_id),
-                            Mf::Dot4I8Packed => {
-                                // Convert both packed arguments to signed integers so that we can apply the
-                                // `BitFieldSExtract` operation on them in `write_dot_product` below.
-                                let new_arg0_id = self.gen_id();
-                                block.body.push(Instruction::unary(
-                                    spirv::Op::Bitcast,
-                                    result_type_id,
-                                    new_arg0_id,
-                                    arg0_id,
-                                ));
+                        if self.writer.lang_version() >= (1, 6)
+                            && self
+                                .writer
+                                .require_all(&[
+                                    spirv::Capability::DotProduct,
+                                    spirv::Capability::DotProductInput4x8BitPacked,
+                                ])
+                                .is_ok()
+                        {
+                            // Write optimized code using `PackedVectorFormat4x8Bit`.
+                            self.writer.use_extension("SPV_KHR_integer_dot_product");
+
+                            let op = match fun {
+                                Mf::Dot4I8Packed => spirv::Op::SDot,
+                                Mf::Dot4U8Packed => spirv::Op::UDot,
+                                _ => unreachable!(),
+                            };
 
-                                let new_arg1_id = self.gen_id();
-                                block.body.push(Instruction::unary(
-                                    spirv::Op::Bitcast,
-                                    result_type_id,
-                                    new_arg1_id,
-                                    arg1_id,
-                                ));
+                            block.body.push(Instruction::ternary(
+                                op,
+                                result_type_id,
+                                id,
+                                arg0_id,
+                                arg1_id,
+                                spirv::PackedVectorFormat::PackedVectorFormat4x8Bit as Word,
+                            ));
+                        } else {
+                            // Fall back to a polyfill since `PackedVectorFormat4x8Bit` is not available.
+                            let (extract_op, arg0_id, arg1_id) = match fun {
+                                Mf::Dot4U8Packed => (spirv::Op::BitFieldUExtract, arg0_id, arg1_id),
+                                Mf::Dot4I8Packed => {
+                                    // Convert both packed arguments to signed integers so that we can apply the
+                                    // `BitFieldSExtract` operation on them in `write_dot_product` below.
+                                    let new_arg0_id = self.gen_id();
+                                    block.body.push(Instruction::unary(
+                                        spirv::Op::Bitcast,
+                                        result_type_id,
+                                        new_arg0_id,
+                                        arg0_id,
+                                    ));
 
-                                (spirv::Op::BitFieldSExtract, new_arg0_id, new_arg1_id)
-                            }
-                            _ => unreachable!(),
-                        };
+                                    let new_arg1_id = self.gen_id();
+                                    block.body.push(Instruction::unary(
+                                        spirv::Op::Bitcast,
+                                        result_type_id,
+                                        new_arg1_id,
+                                        arg1_id,
+                                    ));
 
-                        let eight = self.writer.get_constant_scalar(crate::Literal::U32(8));
+                                    (spirv::Op::BitFieldSExtract, new_arg0_id, new_arg1_id)
+                                }
+                                _ => unreachable!(),
+                            };
 
-                        const VEC_LENGTH: u8 = 4;
-                        let bit_shifts: [_; VEC_LENGTH as usize] = core::array::from_fn(|index| {
-                            self.writer
-                                .get_constant_scalar(crate::Literal::U32(index as u32 * 8))
-                        });
+                            let eight = self.writer.get_constant_scalar(crate::Literal::U32(8));
+
+                            const VEC_LENGTH: u8 = 4;
+                            let bit_shifts: [_; VEC_LENGTH as usize] =
+                                core::array::from_fn(|index| {
+                                    self.writer
+                                        .get_constant_scalar(crate::Literal::U32(index as u32 * 8))
+                                });
+
+                            self.write_dot_product(
+                                id,
+                                result_type_id,
+                                arg0_id,
+                                arg1_id,
+                                VEC_LENGTH as Word,
+                                block,
+                                |result_id, composite_id, index| {
+                                    Instruction::ternary(
+                                        extract_op,
+                                        result_type_id,
+                                        result_id,
+                                        composite_id,
+                                        bit_shifts[index as usize],
+                                        eight,
+                                    )
+                                },
+                            );
+                        }
 
-                        self.write_dot_product(
-                            id,
-                            result_type_id,
-                            arg0_id,
-                            arg1_id,
-                            VEC_LENGTH as Word,
-                            block,
-                            |result_id, composite_id, index| {
-                                Instruction::ternary(
-                                    extract_op,
-                                    result_type_id,
-                                    result_id,
-                                    composite_id,
-                                    bit_shifts[index as usize],
-                                    eight,
-                                )
-                            },
-                        );
                         self.cached[expr_handle] = id;
                         return Ok(());
                     }
diff --git a/naga/src/back/spv/layout.rs b/naga/src/back/spv/layout.rs
index 177996741d..6652b33504 100644
--- a/naga/src/back/spv/layout.rs
+++ b/naga/src/back/spv/layout.rs
@@ -12,7 +12,8 @@ use alloc::format;
 const GENERATOR: Word = 28;
 
 impl PhysicalLayout {
-    pub(super) const fn new(version: Word) -> Self {
+    pub(super) const fn new(major_version: u8, minor_version: u8) -> Self {
+        let version = ((major_version as u32) << 16) | ((minor_version as u32) << 8);
         PhysicalLayout {
             magic_number: MAGIC_NUMBER,
             version,
@@ -29,6 +30,13 @@ impl PhysicalLayout {
         sink.extend(iter::once(self.bound));
         sink.extend(iter::once(self.instruction_schema));
     }
+
+    /// Returns `(major, minor)`.
+    pub(super) const fn lang_version(&self) -> (u8, u8) {
+        let major = (self.version >> 16) as u8;
+        let minor = (self.version >> 8) as u8;
+        (major, minor)
+    }
 }
 
 impl super::recyclable::Recyclable for PhysicalLayout {
@@ -150,10 +158,13 @@ impl Instruction {
 #[test]
 fn test_physical_layout_in_words() {
     let bound = 5;
-    let version = 0x10203;
+
+    // The least and most significant bytes of `version` must both be zero
+    // according to the SPIR-V spec.
+    let version = 0x0001_0200;
 
     let mut output = vec![];
-    let mut layout = PhysicalLayout::new(version);
+    let mut layout = PhysicalLayout::new(1, 2);
     layout.bound = bound;
 
     layout.in_words(&mut output);
diff --git a/naga/src/back/spv/writer.rs b/naga/src/back/spv/writer.rs
index 6cfbbba981..3819ed10e7 100644
--- a/naga/src/back/spv/writer.rs
+++ b/naga/src/back/spv/writer.rs
@@ -60,7 +60,6 @@ impl Writer {
         if major != 1 {
             return Err(Error::UnsupportedVersion(major, minor));
         }
-        let raw_version = ((major as u32) << 16) | ((minor as u32) << 8);
 
         let mut capabilities_used = crate::FastIndexSet::default();
         capabilities_used.insert(spirv::Capability::Shader);
@@ -70,7 +69,7 @@ impl Writer {
         let void_type = id_gen.next();
 
         Ok(Writer {
-            physical_layout: PhysicalLayout::new(raw_version),
+            physical_layout: PhysicalLayout::new(major, minor),
             logical_layout: LogicalLayout::default(),
             id_gen,
             capabilities_available: options.capabilities.clone(),
@@ -99,6 +98,11 @@ impl Writer {
         })
     }
 
+    /// Returns `(major, minor)` of the SPIR-V language version.
+    pub const fn lang_version(&self) -> (u8, u8) {
+        self.physical_layout.lang_version()
+    }
+
     /// Reset `Writer` to its initial state, retaining any allocations.
     ///
     /// Why not just implement `Recyclable` for `Writer`? By design,
@@ -202,6 +206,43 @@ impl Writer {
         }
     }
 
+    /// Indicate that the code requires all of the listed capabilities.
+    ///
+    /// If all entries of `capabilities` appear in the available capabilities
+    /// specified in the [`Options`] from which this `Writer` was created
+    /// (including the case where [`Options::capabilities`] is `None`), add
+    /// them all to this `Writer`'s [`capabilities_used`] table, and return
+    /// `Ok(())`. If at least one of the listed capabilities is not available,
+    /// do not add anything to the `capabilities_used` table, and return the
+    /// first unavailable requested capability, wrapped in `Err()`.
+    ///
+    /// This method is does not return an [`enum@Error`] in case of failure
+    /// because it may be used in cases where the caller can recover (e.g.,
+    /// with a polyfill) if the requested capabilities are not available. In
+    /// this case, it would be unnecessary work to find *all* the unavailable
+    /// requested capabilities, and to allocate a `Vec` for them, just so we
+    /// could return an [`Error::MissingCapabilities`]).
+    ///
+    /// [`capabilities_used`]: Writer::capabilities_used
+    pub(super) fn require_all(
+        &mut self,
+        capabilities: &[spirv::Capability],
+    ) -> Result<(), spirv::Capability> {
+        if let Some(ref available) = self.capabilities_available {
+            for requested in capabilities {
+                if !available.contains(requested) {
+                    return Err(*requested);
+                }
+            }
+        }
+
+        for requested in capabilities {
+            self.capabilities_used.insert(*requested);
+        }
+
+        Ok(())
+    }
+
     /// Indicate that the code uses the given extension.
     pub(super) fn use_extension(&mut self, extension: &'static str) {
         self.extensions_used.insert(extension);
diff --git a/naga/tests/in/wgsl/functions-optimized.toml b/naga/tests/in/wgsl/functions-optimized.toml
new file mode 100644
index 0000000000..c8a7abfa09
--- /dev/null
+++ b/naga/tests/in/wgsl/functions-optimized.toml
@@ -0,0 +1,11 @@
+# Explicitly turn on optimizations for `dot4I8Packed` and `dot4U8Packed`
+# on SPIRV and HLSL.
+
+targets = "SPIRV | HLSL"
+
+[spv]
+capabilities = ["DotProduct", "DotProductInput4x8BitPacked"]
+version = [1, 6]
+
+[hlsl]
+shader_model = "V6_4"
diff --git a/naga/tests/in/wgsl/functions-optimized.wgsl b/naga/tests/in/wgsl/functions-optimized.wgsl
new file mode 100644
index 0000000000..229357523c
--- /dev/null
+++ b/naga/tests/in/wgsl/functions-optimized.wgsl
@@ -0,0 +1,19 @@
+fn test_packed_integer_dot_product() -> u32 {
+    let a_5 = 1u;
+    let b_5 = 2u;
+    let c_5: i32 = dot4I8Packed(a_5, b_5);
+
+    let a_6 = 3u;
+    let b_6 = 4u;
+    let c_6: u32 = dot4U8Packed(a_6, b_6);
+
+    // test baking of arguments
+    let c_7: i32 = dot4I8Packed(5u + c_6, 6u + c_6);
+    let c_8: u32 = dot4U8Packed(7u + c_6, 8u + c_6);
+    return c_8;
+}
+
+@compute @workgroup_size(1)
+fn main() {
+    let c = test_packed_integer_dot_product();
+}
diff --git a/naga/tests/in/wgsl/functions-unoptimized.toml b/naga/tests/in/wgsl/functions-unoptimized.toml
new file mode 100644
index 0000000000..7361004be3
--- /dev/null
+++ b/naga/tests/in/wgsl/functions-unoptimized.toml
@@ -0,0 +1,13 @@
+# Explicitly turn off optimizations for `dot4I8Packed` and `dot4U8Packed`
+# on SPIRV and HLSL.
+
+targets = "SPIRV | HLSL"
+
+[spv]
+# Provide some unrelated capability because an empty list of capabilities would
+# get mapped to `None`, which would then be interpreted as "all capabilities
+# are available".
+capabilities = ["Matrix"]
+
+[hlsl]
+shader_model = "V6_3"
diff --git a/naga/tests/in/wgsl/functions-unoptimized.wgsl b/naga/tests/in/wgsl/functions-unoptimized.wgsl
new file mode 100644
index 0000000000..229357523c
--- /dev/null
+++ b/naga/tests/in/wgsl/functions-unoptimized.wgsl
@@ -0,0 +1,19 @@
+fn test_packed_integer_dot_product() -> u32 {
+    let a_5 = 1u;
+    let b_5 = 2u;
+    let c_5: i32 = dot4I8Packed(a_5, b_5);
+
+    let a_6 = 3u;
+    let b_6 = 4u;
+    let c_6: u32 = dot4U8Packed(a_6, b_6);
+
+    // test baking of arguments
+    let c_7: i32 = dot4I8Packed(5u + c_6, 6u + c_6);
+    let c_8: u32 = dot4U8Packed(7u + c_6, 8u + c_6);
+    return c_8;
+}
+
+@compute @workgroup_size(1)
+fn main() {
+    let c = test_packed_integer_dot_product();
+}
diff --git a/naga/tests/out/hlsl/wgsl-functions-optimized.hlsl b/naga/tests/out/hlsl/wgsl-functions-optimized.hlsl
new file mode 100644
index 0000000000..2b2d003ea0
--- /dev/null
+++ b/naga/tests/out/hlsl/wgsl-functions-optimized.hlsl
@@ -0,0 +1,19 @@
+uint test_packed_integer_dot_product()
+{
+    int c_5_ = dot4add_i8packed(1u, 2u, 0);
+    uint c_6_ = dot4add_u8packed(3u, 4u, 0);
+    uint _e7 = (5u + c_6_);
+    uint _e9 = (6u + c_6_);
+    int c_7_ = dot4add_i8packed(_e7, _e9, 0);
+    uint _e12 = (7u + c_6_);
+    uint _e14 = (8u + c_6_);
+    uint c_8_ = dot4add_u8packed(_e12, _e14, 0);
+    return c_8_;
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    const uint _e0 = test_packed_integer_dot_product();
+    return;
+}
diff --git a/naga/tests/out/hlsl/wgsl-functions-optimized.ron b/naga/tests/out/hlsl/wgsl-functions-optimized.ron
new file mode 100644
index 0000000000..81f3e9b295
--- /dev/null
+++ b/naga/tests/out/hlsl/wgsl-functions-optimized.ron
@@ -0,0 +1,12 @@
+(
+    vertex:[
+    ],
+    fragment:[
+    ],
+    compute:[
+        (
+            entry_point:"main",
+            target_profile:"cs_6_4",
+        ),
+    ],
+)
diff --git a/naga/tests/out/hlsl/wgsl-functions-unoptimized.hlsl b/naga/tests/out/hlsl/wgsl-functions-unoptimized.hlsl
new file mode 100644
index 0000000000..72156e433c
--- /dev/null
+++ b/naga/tests/out/hlsl/wgsl-functions-unoptimized.hlsl
@@ -0,0 +1,19 @@
+uint test_packed_integer_dot_product()
+{
+    int c_5_ = dot(int4(1u, 1u >> 8, 1u >> 16, 1u >> 24) << 24 >> 24, int4(2u, 2u >> 8, 2u >> 16, 2u >> 24) << 24 >> 24);
+    uint c_6_ = dot(uint4(3u, 3u >> 8, 3u >> 16, 3u >> 24) << 24 >> 24, uint4(4u, 4u >> 8, 4u >> 16, 4u >> 24) << 24 >> 24);
+    uint _e7 = (5u + c_6_);
+    uint _e9 = (6u + c_6_);
+    int c_7_ = dot(int4(_e7, _e7 >> 8, _e7 >> 16, _e7 >> 24) << 24 >> 24, int4(_e9, _e9 >> 8, _e9 >> 16, _e9 >> 24) << 24 >> 24);
+    uint _e12 = (7u + c_6_);
+    uint _e14 = (8u + c_6_);
+    uint c_8_ = dot(uint4(_e12, _e12 >> 8, _e12 >> 16, _e12 >> 24) << 24 >> 24, uint4(_e14, _e14 >> 8, _e14 >> 16, _e14 >> 24) << 24 >> 24);
+    return c_8_;
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    const uint _e0 = test_packed_integer_dot_product();
+    return;
+}
diff --git a/naga/tests/out/hlsl/wgsl-functions-unoptimized.ron b/naga/tests/out/hlsl/wgsl-functions-unoptimized.ron
new file mode 100644
index 0000000000..f1f510d2dc
--- /dev/null
+++ b/naga/tests/out/hlsl/wgsl-functions-unoptimized.ron
@@ -0,0 +1,12 @@
+(
+    vertex:[
+    ],
+    fragment:[
+    ],
+    compute:[
+        (
+            entry_point:"main",
+            target_profile:"cs_6_3",
+        ),
+    ],
+)
diff --git a/naga/tests/out/spv/wgsl-functions-optimized.spvasm b/naga/tests/out/spv/wgsl-functions-optimized.spvasm
new file mode 100644
index 0000000000..7169a1e1e4
--- /dev/null
+++ b/naga/tests/out/spv/wgsl-functions-optimized.spvasm
@@ -0,0 +1,46 @@
+; SPIR-V
+; Version: 1.6
+; Generator: rspirv
+; Bound: 30
+OpCapability Shader
+OpCapability DotProductKHR
+OpCapability DotProductInput4x8BitPackedKHR
+OpExtension "SPV_KHR_integer_dot_product"
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint GLCompute %26 "main"
+OpExecutionMode %26 LocalSize 1 1 1
+%2 = OpTypeVoid
+%3 = OpTypeInt 32 0
+%6 = OpTypeFunction %3
+%7 = OpConstant  %3  1
+%8 = OpConstant  %3  2
+%9 = OpConstant  %3  3
+%10 = OpConstant  %3  4
+%11 = OpConstant  %3  5
+%12 = OpConstant  %3  6
+%13 = OpConstant  %3  7
+%14 = OpConstant  %3  8
+%16 = OpTypeInt 32 1
+%27 = OpTypeFunction %2
+%5 = OpFunction  %3  None %6
+%4 = OpLabel
+OpBranch %15
+%15 = OpLabel
+%17 = OpSDotKHR  %16  %7 %8 PackedVectorFormat4x8BitKHR
+%18 = OpUDotKHR  %3  %9 %10 PackedVectorFormat4x8BitKHR
+%19 = OpIAdd  %3  %11 %18
+%20 = OpIAdd  %3  %12 %18
+%21 = OpSDotKHR  %16  %19 %20 PackedVectorFormat4x8BitKHR
+%22 = OpIAdd  %3  %13 %18
+%23 = OpIAdd  %3  %14 %18
+%24 = OpUDotKHR  %3  %22 %23 PackedVectorFormat4x8BitKHR
+OpReturnValue %24
+OpFunctionEnd
+%26 = OpFunction  %2  None %27
+%25 = OpLabel
+OpBranch %28
+%28 = OpLabel
+%29 = OpFunctionCall  %3  %5
+OpReturn
+OpFunctionEnd
\ No newline at end of file
diff --git a/naga/tests/out/spv/wgsl-functions-unoptimized.spvasm b/naga/tests/out/spv/wgsl-functions-unoptimized.spvasm
new file mode 100644
index 0000000000..adf07f25db
--- /dev/null
+++ b/naga/tests/out/spv/wgsl-functions-unoptimized.spvasm
@@ -0,0 +1,112 @@
+; SPIR-V
+; Version: 1.1
+; Generator: rspirv
+; Bound: 99
+OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint GLCompute %95 "main"
+OpExecutionMode %95 LocalSize 1 1 1
+%2 = OpTypeVoid
+%3 = OpTypeInt 32 0
+%6 = OpTypeFunction %3
+%7 = OpConstant  %3  1
+%8 = OpConstant  %3  2
+%9 = OpConstant  %3  3
+%10 = OpConstant  %3  4
+%11 = OpConstant  %3  5
+%12 = OpConstant  %3  6
+%13 = OpConstant  %3  7
+%14 = OpConstant  %3  8
+%16 = OpTypeInt 32 1
+%20 = OpConstant  %3  0
+%21 = OpConstant  %3  16
+%22 = OpConstant  %3  24
+%23 = OpConstantNull  %16
+%40 = OpConstantNull  %3
+%96 = OpTypeFunction %2
+%5 = OpFunction  %3  None %6
+%4 = OpLabel
+OpBranch %15
+%15 = OpLabel
+%18 = OpBitcast  %16  %7
+%19 = OpBitcast  %16  %8
+%24 = OpBitFieldSExtract  %16  %18 %20 %14
+%25 = OpBitFieldSExtract  %16  %19 %20 %14
+%26 = OpIMul  %16  %24 %25
+%27 = OpIAdd  %16  %23 %26
+%28 = OpBitFieldSExtract  %16  %18 %14 %14
+%29 = OpBitFieldSExtract  %16  %19 %14 %14
+%30 = OpIMul  %16  %28 %29
+%31 = OpIAdd  %16  %27 %30
+%32 = OpBitFieldSExtract  %16  %18 %21 %14
+%33 = OpBitFieldSExtract  %16  %19 %21 %14
+%34 = OpIMul  %16  %32 %33
+%35 = OpIAdd  %16  %31 %34
+%36 = OpBitFieldSExtract  %16  %18 %22 %14
+%37 = OpBitFieldSExtract  %16  %19 %22 %14
+%38 = OpIMul  %16  %36 %37
+%17 = OpIAdd  %16  %35 %38
+%41 = OpBitFieldUExtract  %3  %9 %20 %14
+%42 = OpBitFieldUExtract  %3  %10 %20 %14
+%43 = OpIMul  %3  %41 %42
+%44 = OpIAdd  %3  %40 %43
+%45 = OpBitFieldUExtract  %3  %9 %14 %14
+%46 = OpBitFieldUExtract  %3  %10 %14 %14
+%47 = OpIMul  %3  %45 %46
+%48 = OpIAdd  %3  %44 %47
+%49 = OpBitFieldUExtract  %3  %9 %21 %14
+%50 = OpBitFieldUExtract  %3  %10 %21 %14
+%51 = OpIMul  %3  %49 %50
+%52 = OpIAdd  %3  %48 %51
+%53 = OpBitFieldUExtract  %3  %9 %22 %14
+%54 = OpBitFieldUExtract  %3  %10 %22 %14
+%55 = OpIMul  %3  %53 %54
+%39 = OpIAdd  %3  %52 %55
+%56 = OpIAdd  %3  %11 %39
+%57 = OpIAdd  %3  %12 %39
+%59 = OpBitcast  %16  %56
+%60 = OpBitcast  %16  %57
+%61 = OpBitFieldSExtract  %16  %59 %20 %14
+%62 = OpBitFieldSExtract  %16  %60 %20 %14
+%63 = OpIMul  %16  %61 %62
+%64 = OpIAdd  %16  %23 %63
+%65 = OpBitFieldSExtract  %16  %59 %14 %14
+%66 = OpBitFieldSExtract  %16  %60 %14 %14
+%67 = OpIMul  %16  %65 %66
+%68 = OpIAdd  %16  %64 %67
+%69 = OpBitFieldSExtract  %16  %59 %21 %14
+%70 = OpBitFieldSExtract  %16  %60 %21 %14
+%71 = OpIMul  %16  %69 %70
+%72 = OpIAdd  %16  %68 %71
+%73 = OpBitFieldSExtract  %16  %59 %22 %14
+%74 = OpBitFieldSExtract  %16  %60 %22 %14
+%75 = OpIMul  %16  %73 %74
+%58 = OpIAdd  %16  %72 %75
+%76 = OpIAdd  %3  %13 %39
+%77 = OpIAdd  %3  %14 %39
+%79 = OpBitFieldUExtract  %3  %76 %20 %14
+%80 = OpBitFieldUExtract  %3  %77 %20 %14
+%81 = OpIMul  %3  %79 %80
+%82 = OpIAdd  %3  %40 %81
+%83 = OpBitFieldUExtract  %3  %76 %14 %14
+%84 = OpBitFieldUExtract  %3  %77 %14 %14
+%85 = OpIMul  %3  %83 %84
+%86 = OpIAdd  %3  %82 %85
+%87 = OpBitFieldUExtract  %3  %76 %21 %14
+%88 = OpBitFieldUExtract  %3  %77 %21 %14
+%89 = OpIMul  %3  %87 %88
+%90 = OpIAdd  %3  %86 %89
+%91 = OpBitFieldUExtract  %3  %76 %22 %14
+%92 = OpBitFieldUExtract  %3  %77 %22 %14
+%93 = OpIMul  %3  %91 %92
+%78 = OpIAdd  %3  %90 %93
+OpReturnValue %78
+OpFunctionEnd
+%95 = OpFunction  %2  None %96
+%94 = OpLabel
+OpBranch %97
+%97 = OpLabel
+%98 = OpFunctionCall  %3  %5
+OpReturn
+OpFunctionEnd
\ No newline at end of file