[Impeller] Don't decompress into device buffer for Vulkan/GLES. (flutter/engine#43493)

My observations on the Pixel 6 device are that performing device allocations from multiple threads can dramatically slow down the raster task workload. As a stopgap solution, we can adjust image upload to only touch the device allocator on the IO thread which reduces the parallel access.

This doesn't have any impact on the S10, but locally on the Pixel 6 it is a night and day difference. I am testing using jonahwilliams/forked_gallery and navigating to the Reply demo. This demo has a large number of images, several of which are quite large.

Work towards https://github.com/flutter/flutter/issues/129392

### Before

Page transition is ~4 frames.

![image](https://github.com/flutter/engine/assets/8975114/b6d1c225-060b-4a20-9737-ad668423799a)

### After

Page transition is ~20 frames.

![image](https://github.com/flutter/engine/assets/8975114/5ff1f857-8327-4d04-b40a-3da4a5fc91a4)
This commit is contained in:
Jonah Williams 2023-07-10 13:48:18 -07:00 committed by GitHub
parent 3537a13e39
commit 83ba0cd2d0
4 changed files with 59 additions and 11 deletions

View File

@ -32,6 +32,42 @@
namespace flutter {
class MallocDeviceBuffer : public impeller::DeviceBuffer {
public:
explicit MallocDeviceBuffer(impeller::DeviceBufferDescriptor desc)
: impeller::DeviceBuffer(desc) {
data_ = static_cast<uint8_t*>(malloc(desc.size));
}
~MallocDeviceBuffer() override { free(data_); }
bool SetLabel(const std::string& label) override { return true; }
bool SetLabel(const std::string& label, impeller::Range range) override {
return true;
}
uint8_t* OnGetContents() const override { return data_; }
bool OnCopyHostBuffer(const uint8_t* source,
impeller::Range source_range,
size_t offset) override {
memcpy(data_ + offset, source + source_range.offset, source_range.length);
return true;
}
private:
uint8_t* data_;
FML_DISALLOW_COPY_AND_ASSIGN(MallocDeviceBuffer);
};
#ifdef FML_OS_ANDROID
static constexpr bool kShouldUseMallocDeviceBuffer = true;
#else
static constexpr bool kShouldUseMallocDeviceBuffer = false;
#endif // FML_OS_ANDROID
namespace {
/**
* Loads the gamut as a set of three points (triangle).
@ -336,17 +372,20 @@ ImageDecoderImpeller::UploadTextureToPrivate(
})
.SetIfTrue([&result, context, bitmap, gpu_disabled_switch] {
// create_mips is false because we already know the GPU is disabled.
result = UploadTextureToShared(context, bitmap, gpu_disabled_switch,
/*create_mips=*/false);
result =
UploadTextureToStorage(context, bitmap, gpu_disabled_switch,
impeller::StorageMode::kHostVisible,
/*create_mips=*/false);
}));
return result;
}
std::pair<sk_sp<DlImage>, std::string>
ImageDecoderImpeller::UploadTextureToShared(
ImageDecoderImpeller::UploadTextureToStorage(
const std::shared_ptr<impeller::Context>& context,
std::shared_ptr<SkBitmap> bitmap,
const std::shared_ptr<fml::SyncSwitch>& gpu_disabled_switch,
impeller::StorageMode storage_mode,
bool create_mips) {
TRACE_EVENT0("impeller", __FUNCTION__);
if (!context) {
@ -366,7 +405,7 @@ ImageDecoderImpeller::UploadTextureToShared(
}
impeller::TextureDescriptor texture_descriptor;
texture_descriptor.storage_mode = impeller::StorageMode::kHostVisible;
texture_descriptor.storage_mode = storage_mode;
texture_descriptor.format = pixel_format.value();
texture_descriptor.size = {image_info.width(), image_info.height()};
texture_descriptor.mip_count =
@ -483,14 +522,16 @@ void ImageDecoderImpeller::Decode(fml::RefPtr<ImageDescriptor> descriptor,
gpu_disabled_switch]() {
sk_sp<DlImage> image;
std::string decode_error;
if (context->GetCapabilities()->SupportsBufferToTextureBlits()) {
if (!kShouldUseMallocDeviceBuffer &&
context->GetCapabilities()->SupportsBufferToTextureBlits()) {
std::tie(image, decode_error) = UploadTextureToPrivate(
context, bitmap_result.device_buffer, bitmap_result.image_info,
bitmap_result.sk_bitmap, gpu_disabled_switch);
result(image, decode_error);
} else {
std::tie(image, decode_error) = UploadTextureToShared(
std::tie(image, decode_error) = UploadTextureToStorage(
context, bitmap_result.sk_bitmap, gpu_disabled_switch,
impeller::StorageMode::kDevicePrivate,
/*create_mips=*/true);
result(image, decode_error);
}
@ -525,7 +566,10 @@ bool ImpellerAllocator::allocPixelRef(SkBitmap* bitmap) {
descriptor.size = ((bitmap->height() - 1) * bitmap->rowBytes()) +
(bitmap->width() * bitmap->bytesPerPixel());
auto device_buffer = allocator_->CreateBuffer(descriptor);
std::shared_ptr<impeller::DeviceBuffer> device_buffer =
kShouldUseMallocDeviceBuffer
? std::make_shared<MallocDeviceBuffer>(descriptor)
: allocator_->CreateBuffer(descriptor);
struct ImpellerPixelRef final : public SkPixelRef {
ImpellerPixelRef(int w, int h, void* s, size_t r)

View File

@ -9,6 +9,7 @@
#include "flutter/fml/macros.h"
#include "flutter/lib/ui/painting/image_decoder.h"
#include "impeller/core/formats.h"
#include "impeller/geometry/size.h"
#include "third_party/skia/include/core/SkBitmap.h"
@ -90,10 +91,11 @@ class ImageDecoderImpeller final : public ImageDecoder {
/// @param gpu_disabled_switch Whether the GPU is available for mipmap
/// creation.
/// @return A DlImage.
static std::pair<sk_sp<DlImage>, std::string> UploadTextureToShared(
static std::pair<sk_sp<DlImage>, std::string> UploadTextureToStorage(
const std::shared_ptr<impeller::Context>& context,
std::shared_ptr<SkBitmap> bitmap,
const std::shared_ptr<fml::SyncSwitch>& gpu_disabled_switch,
impeller::StorageMode storage_mode,
bool create_mips = true);
private:

View File

@ -455,8 +455,9 @@ TEST_F(ImageDecoderFixtureTest, ImpellerUploadToSharedNoGpu) {
ASSERT_EQ(no_gpu_access_context->command_buffer_count_, 0ul);
ASSERT_EQ(result.second, "");
result = ImageDecoderImpeller::UploadTextureToShared(
no_gpu_access_context, bitmap, gpu_disabled_switch, true);
result = ImageDecoderImpeller::UploadTextureToStorage(
no_gpu_access_context, bitmap, gpu_disabled_switch,
impeller::StorageMode::kHostVisible, true);
ASSERT_EQ(no_gpu_access_context->command_buffer_count_, 0ul);
ASSERT_EQ(result.second, "");
}

View File

@ -147,9 +147,10 @@ MultiFrameCodec::State::GetNextFrameImage(
if (is_impeller_enabled_) {
// This is safe regardless of whether the GPU is available or not because
// without mipmap creation there is no command buffer encoding done.
return ImageDecoderImpeller::UploadTextureToShared(
return ImageDecoderImpeller::UploadTextureToStorage(
impeller_context, std::make_shared<SkBitmap>(bitmap),
std::make_shared<fml::SyncSwitch>(),
impeller::StorageMode::kHostVisible,
/*create_mips=*/false);
}
#endif // IMPELLER_SUPPORTS_RENDERING