Avoid truncation to zero when resizing threadgroups (flutter/engine#40502)

[Impeller] Avoid truncation to zero when resizing threadgroups
2026-02-20 02:29:02 +08:00 · 2023-03-21 15:11:04 -07:00 · 2023-03-21 15:11:04 -07:00 · 885013ccdd
commit 885013ccdd
parent c3f475f8c5
2 changed files with 83 additions and 2 deletions
--- a/engine/src/flutter/impeller/renderer/backend/metal/compute_pass_mtl.mm
+++ b/engine/src/flutter/impeller/renderer/backend/metal/compute_pass_mtl.mm
@ -252,8 +252,8 @@ bool ComputePassMTL::EncodeCommands(const std::shared_ptr<Allocator>& allocator,
    while (width * height >
           static_cast<int64_t>(
               pass_bindings.GetPipeline().maxTotalThreadsPerThreadgroup)) {
-      width /= 2;
-      height /= 2;
+      width = std::max(1LL, width / 2);
+      height = std::max(1LL, height / 2);
    }
    auto size = MTLSizeMake(width, height, 1);
    [encoder dispatchThreadgroups:size threadsPerThreadgroup:size];
--- a/engine/src/flutter/impeller/renderer/compute_unittests.cc
+++ b/engine/src/flutter/impeller/renderer/compute_unittests.cc
@ -204,5 +204,86 @@ TEST_P(ComputeTest, MultiStageInputAndOutput) {
  latch.Wait();
 }

+TEST_P(ComputeTest, CanCorrectlyDownScaleLargeGridSize) {
+  using CS = SampleComputeShader;
+  auto context = GetContext();
+  ASSERT_TRUE(context);
+  ASSERT_TRUE(context->GetDeviceCapabilities().SupportsCompute());
+
+  using SamplePipelineBuilder = ComputePipelineBuilder<CS>;
+  auto pipeline_desc =
+      SamplePipelineBuilder::MakeDefaultPipelineDescriptor(*context);
+  ASSERT_TRUE(pipeline_desc.has_value());
+  auto compute_pipeline =
+      context->GetPipelineLibrary()->GetPipeline(pipeline_desc).Get();
+  ASSERT_TRUE(compute_pipeline);
+
+  auto cmd_buffer = context->CreateCommandBuffer();
+  auto pass = cmd_buffer->CreateComputePass();
+  ASSERT_TRUE(pass && pass->IsValid());
+
+  static constexpr size_t kCount = 5;
+
+  // Intentionally making the grid size obscenely large. No GPU will tolerate
+  // this.
+  pass->SetGridSize(ISize(std::numeric_limits<int64_t>::max(), 1));
+  pass->SetThreadGroupSize(ISize(std::numeric_limits<int64_t>::max(), 1));
+
+  ComputeCommand cmd;
+  cmd.label = "Compute";
+  cmd.pipeline = compute_pipeline;
+
+  CS::Info info{.count = kCount};
+  CS::Input0<kCount> input_0;
+  CS::Input1<kCount> input_1;
+  for (size_t i = 0; i < kCount; i++) {
+    input_0.elements[i] = Vector4(2.0 + i, 3.0 + i, 4.0 + i, 5.0 * i);
+    input_1.elements[i] = Vector4(6.0, 7.0, 8.0, 9.0);
+  }
+
+  input_0.fixed_array[1] = IPoint32(2, 2);
+  input_1.fixed_array[0] = UintPoint32(3, 3);
+  input_0.some_int = 5;
+  input_1.some_struct = CS::SomeStruct{.vf = Point(3, 4), .i = 42};
+
+  auto output_buffer = CreateHostVisibleDeviceBuffer<CS::Output<kCount>>(
+      context, "Output Buffer");
+
+  CS::BindInfo(cmd, pass->GetTransientsBuffer().EmplaceUniform(info));
+  CS::BindInput0(cmd,
+                 pass->GetTransientsBuffer().EmplaceStorageBuffer(input_0));
+  CS::BindInput1(cmd,
+                 pass->GetTransientsBuffer().EmplaceStorageBuffer(input_1));
+  CS::BindOutput(cmd, output_buffer->AsBufferView());
+
+  ASSERT_TRUE(pass->AddCommand(std::move(cmd)));
+  ASSERT_TRUE(pass->EncodeCommands());
+
+  fml::AutoResetWaitableEvent latch;
+  ASSERT_TRUE(
+      cmd_buffer->SubmitCommands([&latch, output_buffer, &input_0,
+                                  &input_1](CommandBuffer::Status status) {
+        EXPECT_EQ(status, CommandBuffer::Status::kCompleted);
+
+        auto view = output_buffer->AsBufferView();
+        EXPECT_EQ(view.range.length, sizeof(CS::Output<kCount>));
+
+        CS::Output<kCount>* output =
+            reinterpret_cast<CS::Output<kCount>*>(view.contents);
+        EXPECT_TRUE(output);
+        for (size_t i = 0; i < kCount; i++) {
+          Vector4 vector = output->elements[i];
+          Vector4 computed = input_0.elements[i] * input_1.elements[i];
+          EXPECT_EQ(vector, Vector4(computed.x + 2 + input_1.some_struct.i,
+                                    computed.y + 3 + input_1.some_struct.vf.x,
+                                    computed.z + 5 + input_1.some_struct.vf.y,
+                                    computed.w));
+        }
+        latch.Signal();
+      }));
+
+  latch.Wait();
+}
+
 }  // namespace testing
 }  // namespace impeller