[Impeller] reduce gaussian sampling by 2x (flutter/engine#40871)

[Impeller] reduce gaussian sampling by 2x
This commit is contained in:
Jonah Williams 2023-04-03 17:28:04 -07:00 committed by GitHub
parent f82dc83970
commit 234decefd7
3 changed files with 98 additions and 87 deletions

View File

@ -195,7 +195,7 @@ std::optional<Entity> DirectionalGaussianBlurFilterContents::RenderFilter(
FS::BlurInfo frag_info;
auto r = Radius{transformed_blur_radius_length};
frag_info.blur_sigma = Sigma{r}.sigma;
frag_info.blur_radius = r.radius;
frag_info.blur_radius = std::round(r.radius);
// The blur direction is in input UV space.
frag_info.blur_uv_offset =
@ -240,6 +240,8 @@ std::optional<Entity> DirectionalGaussianBlurFilterContents::RenderFilter(
source_descriptor.height_address_mode = SamplerAddressMode::kRepeat;
break;
}
input_descriptor.mag_filter = MinMagFilter::kLinear;
input_descriptor.min_filter = MinMagFilter::kLinear;
bool has_alpha_mask = blur_style_ != BlurStyle::kNormal;
bool has_decal_specialization =

View File

@ -60,14 +60,23 @@ void main() {
f16vec4 total_color = f16vec4(0.0hf);
float16_t gaussian_integral = 0.0hf;
for (float16_t i = -blur_info.blur_radius; i <= blur_info.blur_radius; i++) {
float16_t gaussian = IPGaussian(i, blur_info.blur_sigma);
for (float16_t i = -blur_info.blur_radius; i <= blur_info.blur_radius;
i += 2.0hf) {
float16_t w1 = IPGaussian(i, blur_info.blur_sigma);
float16_t w2 = IPGaussian(i + 1.0hf, blur_info.blur_sigma);
float16_t gaussian = w1 + w2;
f16vec2 offset_1 = blur_info.blur_uv_offset * i;
f16vec2 offset_2 = offset_1 + blur_info.blur_uv_offset;
vec2 pos_c1 = v_texture_coords + offset_1;
vec2 pos_c2 = v_texture_coords + offset_2;
vec2 coords = (w1 * pos_c1 + w2 * pos_c2) / gaussian;
gaussian_integral += gaussian;
total_color +=
gaussian * Sample(texture_sampler, // sampler
v_texture_coords + blur_info.blur_uv_offset *
i // texture coordinates
);
total_color += gaussian * Sample(texture_sampler, // sampler
coords // texture coordinates
);
}
frag_color = total_color / gaussian_integral;

View File

@ -3242,7 +3242,7 @@
"uses_late_zs_update": false,
"variants": {
"Main": {
"fp16_arithmetic": 53,
"fp16_arithmetic": 33,
"has_stack_spilling": false,
"performance": {
"longest_path_bound_pipelines": [
@ -3281,13 +3281,13 @@
],
"total_bound_pipelines": [
"arith_total",
"arith_cvt"
"arith_sfu"
],
"total_cycles": [
0.578125,
0.25,
0.578125,
0.5,
0.625,
0.515625,
0.609375,
0.625,
0.0,
0.5,
0.5
@ -3296,7 +3296,7 @@
"stack_spill_bytes": 0,
"thread_occupancy": 100,
"uniform_registers_used": 12,
"work_registers_used": 20
"work_registers_used": 22
}
}
}
@ -3314,7 +3314,7 @@
"uses_late_zs_update": false,
"variants": {
"Main": {
"fp16_arithmetic": 45,
"fp16_arithmetic": 26,
"has_stack_spilling": false,
"performance": {
"longest_path_bound_pipelines": [
@ -3352,14 +3352,14 @@
0.25
],
"total_bound_pipelines": [
"varying",
"texture"
"arith_total",
"arith_fma"
],
"total_cycles": [
0.34375,
0.515625,
0.515625,
0.375,
0.25,
0.34375,
0.125,
0.0,
0.5,
0.5
@ -3368,7 +3368,7 @@
"stack_spill_bytes": 0,
"thread_occupancy": 100,
"uniform_registers_used": 14,
"work_registers_used": 14
"work_registers_used": 21
}
}
}
@ -3386,7 +3386,7 @@
"uses_late_zs_update": false,
"variants": {
"Main": {
"fp16_arithmetic": 42,
"fp16_arithmetic": 23,
"has_stack_spilling": false,
"performance": {
"longest_path_bound_pipelines": [
@ -3425,13 +3425,13 @@
],
"total_bound_pipelines": [
"arith_total",
"arith_sfu"
"arith_fma"
],
"total_cycles": [
0.3125,
0.203125,
0.296875,
0.3125,
0.46875,
0.46875,
0.328125,
0.4375,
0.0,
0.25,
0.25
@ -3440,7 +3440,7 @@
"stack_spill_bytes": 0,
"thread_occupancy": 100,
"uniform_registers_used": 10,
"work_registers_used": 16
"work_registers_used": 18
}
}
}
@ -3458,7 +3458,7 @@
"uses_late_zs_update": false,
"variants": {
"Main": {
"fp16_arithmetic": 35,
"fp16_arithmetic": 19,
"has_stack_spilling": false,
"performance": {
"longest_path_bound_pipelines": [
@ -3496,14 +3496,14 @@
0.0
],
"total_bound_pipelines": [
"varying",
"texture"
"arith_total",
"arith_fma"
],
"total_cycles": [
0.203125,
0.203125,
0.203125,
0.125,
0.46875,
0.46875,
0.234375,
0.25,
0.0,
0.25,
0.25
@ -3512,7 +3512,7 @@
"stack_spill_bytes": 0,
"thread_occupancy": 100,
"uniform_registers_used": 10,
"work_registers_used": 13
"work_registers_used": 19
}
}
}
@ -6603,7 +6603,7 @@
"uses_late_zs_update": false,
"variants": {
"Main": {
"fp16_arithmetic": 66,
"fp16_arithmetic": 52,
"has_stack_spilling": false,
"performance": {
"longest_path_bound_pipelines": [
@ -6644,13 +6644,13 @@
],
"total_bound_pipelines": [
"arith_total",
"arith_cvt"
"arith_sfu"
],
"total_cycles": [
0.53125,
0.328125,
0.53125,
0.5,
0.625,
0.578125,
0.546875,
0.625,
0.0,
0.5,
0.5
@ -6659,7 +6659,7 @@
"stack_spill_bytes": 0,
"thread_occupancy": 100,
"uniform_registers_used": 12,
"work_registers_used": 21
"work_registers_used": 25
}
}
},
@ -6670,7 +6670,7 @@
"type": "Fragment",
"variants": {
"Main": {
"has_stack_spilling": false,
"has_stack_spilling": true,
"performance": {
"longest_path_bound_pipelines": [
null
@ -6689,16 +6689,16 @@
"arithmetic"
],
"shortest_path_cycles": [
3.299999952316284,
2.0,
3.630000114440918,
1.0,
0.0
],
"total_bound_pipelines": [
"arithmetic"
],
"total_cycles": [
7.666666507720947,
2.0,
10.333333015441895,
6.0,
2.0
]
},
@ -6722,7 +6722,7 @@
"uses_late_zs_update": false,
"variants": {
"Main": {
"fp16_arithmetic": 61,
"fp16_arithmetic": 47,
"has_stack_spilling": false,
"performance": {
"longest_path_bound_pipelines": [
@ -6760,14 +6760,14 @@
0.25
],
"total_bound_pipelines": [
"varying",
"texture"
"arith_total",
"arith_fma"
],
"total_cycles": [
0.328125,
0.328125,
0.328125,
0.125,
0.578125,
0.578125,
0.34375,
0.25,
0.0,
0.5,
0.5
@ -6776,7 +6776,7 @@
"stack_spill_bytes": 0,
"thread_occupancy": 100,
"uniform_registers_used": 12,
"work_registers_used": 20
"work_registers_used": 22
}
}
},
@ -6787,7 +6787,7 @@
"type": "Fragment",
"variants": {
"Main": {
"has_stack_spilling": false,
"has_stack_spilling": true,
"performance": {
"longest_path_bound_pipelines": [
null
@ -6803,25 +6803,25 @@
"texture"
],
"shortest_path_bound_pipelines": [
"arithmetic"
"load_store"
],
"shortest_path_cycles": [
2.309999942779541,
2.0,
2.9700000286102295,
7.0,
1.0
],
"total_bound_pipelines": [
"arithmetic"
"load_store"
],
"total_cycles": [
5.0,
2.0,
8.0,
11.0,
2.0
]
},
"thread_occupancy": 100,
"uniform_registers_used": 1,
"work_registers_used": 3
"work_registers_used": 4
}
}
}
@ -6839,7 +6839,7 @@
"uses_late_zs_update": false,
"variants": {
"Main": {
"fp16_arithmetic": 70,
"fp16_arithmetic": 50,
"has_stack_spilling": false,
"performance": {
"longest_path_bound_pipelines": [
@ -6879,13 +6879,13 @@
],
"total_bound_pipelines": [
"arith_total",
"arith_sfu"
"arith_fma"
],
"total_cycles": [
0.3125,
0.234375,
0.28125,
0.3125,
0.484375,
0.484375,
0.296875,
0.4375,
0.0,
0.25,
0.25
@ -6894,7 +6894,7 @@
"stack_spill_bytes": 0,
"thread_occupancy": 100,
"uniform_registers_used": 8,
"work_registers_used": 20
"work_registers_used": 23
}
}
},
@ -6905,7 +6905,7 @@
"type": "Fragment",
"variants": {
"Main": {
"has_stack_spilling": false,
"has_stack_spilling": true,
"performance": {
"longest_path_bound_pipelines": [
null
@ -6924,16 +6924,16 @@
"arithmetic"
],
"shortest_path_cycles": [
1.649999976158142,
1.0,
1.9800000190734863,
0.0,
0.0
],
"total_bound_pipelines": [
"arithmetic"
],
"total_cycles": [
7.666666507720947,
5.0,
1.0,
1.0
]
},
@ -6957,7 +6957,7 @@
"uses_late_zs_update": false,
"variants": {
"Main": {
"fp16_arithmetic": 66,
"fp16_arithmetic": 47,
"has_stack_spilling": false,
"performance": {
"longest_path_bound_pipelines": [
@ -6996,14 +6996,14 @@
0.0
],
"total_bound_pipelines": [
"varying",
"texture"
"arith_total",
"arith_fma"
],
"total_cycles": [
0.234375,
0.234375,
0.1875,
0.125,
0.484375,
0.484375,
0.203125,
0.25,
0.0,
0.25,
0.25
@ -7012,7 +7012,7 @@
"stack_spill_bytes": 0,
"thread_occupancy": 100,
"uniform_registers_used": 8,
"work_registers_used": 19
"work_registers_used": 22
}
}
},
@ -7050,14 +7050,14 @@
"arithmetic"
],
"total_cycles": [
3.6666667461395264,
5.666666507720947,
1.0,
1.0
]
},
"thread_occupancy": 100,
"thread_occupancy": 50,
"uniform_registers_used": 1,
"work_registers_used": 2
"work_registers_used": 6
}
}
}