mirror of
https://github.com/flutter/flutter.git
synced 2026-02-20 02:29:02 +08:00
[Impeller] reduce gaussian sampling by 2x (flutter/engine#40871)
[Impeller] reduce gaussian sampling by 2x
This commit is contained in:
parent
f82dc83970
commit
234decefd7
@ -195,7 +195,7 @@ std::optional<Entity> DirectionalGaussianBlurFilterContents::RenderFilter(
|
||||
FS::BlurInfo frag_info;
|
||||
auto r = Radius{transformed_blur_radius_length};
|
||||
frag_info.blur_sigma = Sigma{r}.sigma;
|
||||
frag_info.blur_radius = r.radius;
|
||||
frag_info.blur_radius = std::round(r.radius);
|
||||
|
||||
// The blur direction is in input UV space.
|
||||
frag_info.blur_uv_offset =
|
||||
@ -240,6 +240,8 @@ std::optional<Entity> DirectionalGaussianBlurFilterContents::RenderFilter(
|
||||
source_descriptor.height_address_mode = SamplerAddressMode::kRepeat;
|
||||
break;
|
||||
}
|
||||
input_descriptor.mag_filter = MinMagFilter::kLinear;
|
||||
input_descriptor.min_filter = MinMagFilter::kLinear;
|
||||
|
||||
bool has_alpha_mask = blur_style_ != BlurStyle::kNormal;
|
||||
bool has_decal_specialization =
|
||||
|
||||
@ -60,14 +60,23 @@ void main() {
|
||||
f16vec4 total_color = f16vec4(0.0hf);
|
||||
float16_t gaussian_integral = 0.0hf;
|
||||
|
||||
for (float16_t i = -blur_info.blur_radius; i <= blur_info.blur_radius; i++) {
|
||||
float16_t gaussian = IPGaussian(i, blur_info.blur_sigma);
|
||||
for (float16_t i = -blur_info.blur_radius; i <= blur_info.blur_radius;
|
||||
i += 2.0hf) {
|
||||
float16_t w1 = IPGaussian(i, blur_info.blur_sigma);
|
||||
float16_t w2 = IPGaussian(i + 1.0hf, blur_info.blur_sigma);
|
||||
float16_t gaussian = w1 + w2;
|
||||
|
||||
f16vec2 offset_1 = blur_info.blur_uv_offset * i;
|
||||
f16vec2 offset_2 = offset_1 + blur_info.blur_uv_offset;
|
||||
vec2 pos_c1 = v_texture_coords + offset_1;
|
||||
vec2 pos_c2 = v_texture_coords + offset_2;
|
||||
|
||||
vec2 coords = (w1 * pos_c1 + w2 * pos_c2) / gaussian;
|
||||
|
||||
gaussian_integral += gaussian;
|
||||
total_color +=
|
||||
gaussian * Sample(texture_sampler, // sampler
|
||||
v_texture_coords + blur_info.blur_uv_offset *
|
||||
i // texture coordinates
|
||||
);
|
||||
total_color += gaussian * Sample(texture_sampler, // sampler
|
||||
coords // texture coordinates
|
||||
);
|
||||
}
|
||||
|
||||
frag_color = total_color / gaussian_integral;
|
||||
|
||||
@ -3242,7 +3242,7 @@
|
||||
"uses_late_zs_update": false,
|
||||
"variants": {
|
||||
"Main": {
|
||||
"fp16_arithmetic": 53,
|
||||
"fp16_arithmetic": 33,
|
||||
"has_stack_spilling": false,
|
||||
"performance": {
|
||||
"longest_path_bound_pipelines": [
|
||||
@ -3281,13 +3281,13 @@
|
||||
],
|
||||
"total_bound_pipelines": [
|
||||
"arith_total",
|
||||
"arith_cvt"
|
||||
"arith_sfu"
|
||||
],
|
||||
"total_cycles": [
|
||||
0.578125,
|
||||
0.25,
|
||||
0.578125,
|
||||
0.5,
|
||||
0.625,
|
||||
0.515625,
|
||||
0.609375,
|
||||
0.625,
|
||||
0.0,
|
||||
0.5,
|
||||
0.5
|
||||
@ -3296,7 +3296,7 @@
|
||||
"stack_spill_bytes": 0,
|
||||
"thread_occupancy": 100,
|
||||
"uniform_registers_used": 12,
|
||||
"work_registers_used": 20
|
||||
"work_registers_used": 22
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -3314,7 +3314,7 @@
|
||||
"uses_late_zs_update": false,
|
||||
"variants": {
|
||||
"Main": {
|
||||
"fp16_arithmetic": 45,
|
||||
"fp16_arithmetic": 26,
|
||||
"has_stack_spilling": false,
|
||||
"performance": {
|
||||
"longest_path_bound_pipelines": [
|
||||
@ -3352,14 +3352,14 @@
|
||||
0.25
|
||||
],
|
||||
"total_bound_pipelines": [
|
||||
"varying",
|
||||
"texture"
|
||||
"arith_total",
|
||||
"arith_fma"
|
||||
],
|
||||
"total_cycles": [
|
||||
0.34375,
|
||||
0.515625,
|
||||
0.515625,
|
||||
0.375,
|
||||
0.25,
|
||||
0.34375,
|
||||
0.125,
|
||||
0.0,
|
||||
0.5,
|
||||
0.5
|
||||
@ -3368,7 +3368,7 @@
|
||||
"stack_spill_bytes": 0,
|
||||
"thread_occupancy": 100,
|
||||
"uniform_registers_used": 14,
|
||||
"work_registers_used": 14
|
||||
"work_registers_used": 21
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -3386,7 +3386,7 @@
|
||||
"uses_late_zs_update": false,
|
||||
"variants": {
|
||||
"Main": {
|
||||
"fp16_arithmetic": 42,
|
||||
"fp16_arithmetic": 23,
|
||||
"has_stack_spilling": false,
|
||||
"performance": {
|
||||
"longest_path_bound_pipelines": [
|
||||
@ -3425,13 +3425,13 @@
|
||||
],
|
||||
"total_bound_pipelines": [
|
||||
"arith_total",
|
||||
"arith_sfu"
|
||||
"arith_fma"
|
||||
],
|
||||
"total_cycles": [
|
||||
0.3125,
|
||||
0.203125,
|
||||
0.296875,
|
||||
0.3125,
|
||||
0.46875,
|
||||
0.46875,
|
||||
0.328125,
|
||||
0.4375,
|
||||
0.0,
|
||||
0.25,
|
||||
0.25
|
||||
@ -3440,7 +3440,7 @@
|
||||
"stack_spill_bytes": 0,
|
||||
"thread_occupancy": 100,
|
||||
"uniform_registers_used": 10,
|
||||
"work_registers_used": 16
|
||||
"work_registers_used": 18
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -3458,7 +3458,7 @@
|
||||
"uses_late_zs_update": false,
|
||||
"variants": {
|
||||
"Main": {
|
||||
"fp16_arithmetic": 35,
|
||||
"fp16_arithmetic": 19,
|
||||
"has_stack_spilling": false,
|
||||
"performance": {
|
||||
"longest_path_bound_pipelines": [
|
||||
@ -3496,14 +3496,14 @@
|
||||
0.0
|
||||
],
|
||||
"total_bound_pipelines": [
|
||||
"varying",
|
||||
"texture"
|
||||
"arith_total",
|
||||
"arith_fma"
|
||||
],
|
||||
"total_cycles": [
|
||||
0.203125,
|
||||
0.203125,
|
||||
0.203125,
|
||||
0.125,
|
||||
0.46875,
|
||||
0.46875,
|
||||
0.234375,
|
||||
0.25,
|
||||
0.0,
|
||||
0.25,
|
||||
0.25
|
||||
@ -3512,7 +3512,7 @@
|
||||
"stack_spill_bytes": 0,
|
||||
"thread_occupancy": 100,
|
||||
"uniform_registers_used": 10,
|
||||
"work_registers_used": 13
|
||||
"work_registers_used": 19
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -6603,7 +6603,7 @@
|
||||
"uses_late_zs_update": false,
|
||||
"variants": {
|
||||
"Main": {
|
||||
"fp16_arithmetic": 66,
|
||||
"fp16_arithmetic": 52,
|
||||
"has_stack_spilling": false,
|
||||
"performance": {
|
||||
"longest_path_bound_pipelines": [
|
||||
@ -6644,13 +6644,13 @@
|
||||
],
|
||||
"total_bound_pipelines": [
|
||||
"arith_total",
|
||||
"arith_cvt"
|
||||
"arith_sfu"
|
||||
],
|
||||
"total_cycles": [
|
||||
0.53125,
|
||||
0.328125,
|
||||
0.53125,
|
||||
0.5,
|
||||
0.625,
|
||||
0.578125,
|
||||
0.546875,
|
||||
0.625,
|
||||
0.0,
|
||||
0.5,
|
||||
0.5
|
||||
@ -6659,7 +6659,7 @@
|
||||
"stack_spill_bytes": 0,
|
||||
"thread_occupancy": 100,
|
||||
"uniform_registers_used": 12,
|
||||
"work_registers_used": 21
|
||||
"work_registers_used": 25
|
||||
}
|
||||
}
|
||||
},
|
||||
@ -6670,7 +6670,7 @@
|
||||
"type": "Fragment",
|
||||
"variants": {
|
||||
"Main": {
|
||||
"has_stack_spilling": false,
|
||||
"has_stack_spilling": true,
|
||||
"performance": {
|
||||
"longest_path_bound_pipelines": [
|
||||
null
|
||||
@ -6689,16 +6689,16 @@
|
||||
"arithmetic"
|
||||
],
|
||||
"shortest_path_cycles": [
|
||||
3.299999952316284,
|
||||
2.0,
|
||||
3.630000114440918,
|
||||
1.0,
|
||||
0.0
|
||||
],
|
||||
"total_bound_pipelines": [
|
||||
"arithmetic"
|
||||
],
|
||||
"total_cycles": [
|
||||
7.666666507720947,
|
||||
2.0,
|
||||
10.333333015441895,
|
||||
6.0,
|
||||
2.0
|
||||
]
|
||||
},
|
||||
@ -6722,7 +6722,7 @@
|
||||
"uses_late_zs_update": false,
|
||||
"variants": {
|
||||
"Main": {
|
||||
"fp16_arithmetic": 61,
|
||||
"fp16_arithmetic": 47,
|
||||
"has_stack_spilling": false,
|
||||
"performance": {
|
||||
"longest_path_bound_pipelines": [
|
||||
@ -6760,14 +6760,14 @@
|
||||
0.25
|
||||
],
|
||||
"total_bound_pipelines": [
|
||||
"varying",
|
||||
"texture"
|
||||
"arith_total",
|
||||
"arith_fma"
|
||||
],
|
||||
"total_cycles": [
|
||||
0.328125,
|
||||
0.328125,
|
||||
0.328125,
|
||||
0.125,
|
||||
0.578125,
|
||||
0.578125,
|
||||
0.34375,
|
||||
0.25,
|
||||
0.0,
|
||||
0.5,
|
||||
0.5
|
||||
@ -6776,7 +6776,7 @@
|
||||
"stack_spill_bytes": 0,
|
||||
"thread_occupancy": 100,
|
||||
"uniform_registers_used": 12,
|
||||
"work_registers_used": 20
|
||||
"work_registers_used": 22
|
||||
}
|
||||
}
|
||||
},
|
||||
@ -6787,7 +6787,7 @@
|
||||
"type": "Fragment",
|
||||
"variants": {
|
||||
"Main": {
|
||||
"has_stack_spilling": false,
|
||||
"has_stack_spilling": true,
|
||||
"performance": {
|
||||
"longest_path_bound_pipelines": [
|
||||
null
|
||||
@ -6803,25 +6803,25 @@
|
||||
"texture"
|
||||
],
|
||||
"shortest_path_bound_pipelines": [
|
||||
"arithmetic"
|
||||
"load_store"
|
||||
],
|
||||
"shortest_path_cycles": [
|
||||
2.309999942779541,
|
||||
2.0,
|
||||
2.9700000286102295,
|
||||
7.0,
|
||||
1.0
|
||||
],
|
||||
"total_bound_pipelines": [
|
||||
"arithmetic"
|
||||
"load_store"
|
||||
],
|
||||
"total_cycles": [
|
||||
5.0,
|
||||
2.0,
|
||||
8.0,
|
||||
11.0,
|
||||
2.0
|
||||
]
|
||||
},
|
||||
"thread_occupancy": 100,
|
||||
"uniform_registers_used": 1,
|
||||
"work_registers_used": 3
|
||||
"work_registers_used": 4
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -6839,7 +6839,7 @@
|
||||
"uses_late_zs_update": false,
|
||||
"variants": {
|
||||
"Main": {
|
||||
"fp16_arithmetic": 70,
|
||||
"fp16_arithmetic": 50,
|
||||
"has_stack_spilling": false,
|
||||
"performance": {
|
||||
"longest_path_bound_pipelines": [
|
||||
@ -6879,13 +6879,13 @@
|
||||
],
|
||||
"total_bound_pipelines": [
|
||||
"arith_total",
|
||||
"arith_sfu"
|
||||
"arith_fma"
|
||||
],
|
||||
"total_cycles": [
|
||||
0.3125,
|
||||
0.234375,
|
||||
0.28125,
|
||||
0.3125,
|
||||
0.484375,
|
||||
0.484375,
|
||||
0.296875,
|
||||
0.4375,
|
||||
0.0,
|
||||
0.25,
|
||||
0.25
|
||||
@ -6894,7 +6894,7 @@
|
||||
"stack_spill_bytes": 0,
|
||||
"thread_occupancy": 100,
|
||||
"uniform_registers_used": 8,
|
||||
"work_registers_used": 20
|
||||
"work_registers_used": 23
|
||||
}
|
||||
}
|
||||
},
|
||||
@ -6905,7 +6905,7 @@
|
||||
"type": "Fragment",
|
||||
"variants": {
|
||||
"Main": {
|
||||
"has_stack_spilling": false,
|
||||
"has_stack_spilling": true,
|
||||
"performance": {
|
||||
"longest_path_bound_pipelines": [
|
||||
null
|
||||
@ -6924,16 +6924,16 @@
|
||||
"arithmetic"
|
||||
],
|
||||
"shortest_path_cycles": [
|
||||
1.649999976158142,
|
||||
1.0,
|
||||
1.9800000190734863,
|
||||
0.0,
|
||||
0.0
|
||||
],
|
||||
"total_bound_pipelines": [
|
||||
"arithmetic"
|
||||
],
|
||||
"total_cycles": [
|
||||
7.666666507720947,
|
||||
5.0,
|
||||
1.0,
|
||||
1.0
|
||||
]
|
||||
},
|
||||
@ -6957,7 +6957,7 @@
|
||||
"uses_late_zs_update": false,
|
||||
"variants": {
|
||||
"Main": {
|
||||
"fp16_arithmetic": 66,
|
||||
"fp16_arithmetic": 47,
|
||||
"has_stack_spilling": false,
|
||||
"performance": {
|
||||
"longest_path_bound_pipelines": [
|
||||
@ -6996,14 +6996,14 @@
|
||||
0.0
|
||||
],
|
||||
"total_bound_pipelines": [
|
||||
"varying",
|
||||
"texture"
|
||||
"arith_total",
|
||||
"arith_fma"
|
||||
],
|
||||
"total_cycles": [
|
||||
0.234375,
|
||||
0.234375,
|
||||
0.1875,
|
||||
0.125,
|
||||
0.484375,
|
||||
0.484375,
|
||||
0.203125,
|
||||
0.25,
|
||||
0.0,
|
||||
0.25,
|
||||
0.25
|
||||
@ -7012,7 +7012,7 @@
|
||||
"stack_spill_bytes": 0,
|
||||
"thread_occupancy": 100,
|
||||
"uniform_registers_used": 8,
|
||||
"work_registers_used": 19
|
||||
"work_registers_used": 22
|
||||
}
|
||||
}
|
||||
},
|
||||
@ -7050,14 +7050,14 @@
|
||||
"arithmetic"
|
||||
],
|
||||
"total_cycles": [
|
||||
3.6666667461395264,
|
||||
5.666666507720947,
|
||||
1.0,
|
||||
1.0
|
||||
]
|
||||
},
|
||||
"thread_occupancy": 100,
|
||||
"thread_occupancy": 50,
|
||||
"uniform_registers_used": 1,
|
||||
"work_registers_used": 2
|
||||
"work_registers_used": 6
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user