mirror of
https://github.com/flutter/flutter.git
synced 2026-02-20 02:29:02 +08:00
[Impeller] convert filters to half precision (flutter/engine#40832)
[Impeller] convert filters to half precision
This commit is contained in:
parent
8f385d6532
commit
b48571cbea
@ -6,6 +6,7 @@
|
||||
#define COLOR_GLSL_
|
||||
|
||||
#include <impeller/branching.glsl>
|
||||
#include <impeller/types.glsl>
|
||||
|
||||
/// Convert a premultiplied color (a color which has its color components
|
||||
/// multiplied with its alpha value) to an unpremultiplied color.
|
||||
@ -18,6 +19,17 @@ vec4 IPUnpremultiply(vec4 color) {
|
||||
return vec4(color.rgb / color.a, color.a);
|
||||
}
|
||||
|
||||
/// Convert a premultiplied color (a color which has its color components
|
||||
/// multiplied with its alpha value) to an unpremultiplied color.
|
||||
///
|
||||
/// Returns (0, 0, 0, 0) if the alpha component is 0.
|
||||
f16vec4 IPHalfUnpremultiply(f16vec4 color) {
|
||||
if (color.a == 0.0hf) {
|
||||
return f16vec4(0.0hf);
|
||||
}
|
||||
return f16vec4(color.rgb / color.a, color.a);
|
||||
}
|
||||
|
||||
/// Convert an unpremultiplied color (a color which has its color components
|
||||
/// separated from its alpha value) to a premultiplied color.
|
||||
///
|
||||
@ -26,4 +38,12 @@ vec4 IPPremultiply(vec4 color) {
|
||||
return vec4(color.rgb * color.a, color.a);
|
||||
}
|
||||
|
||||
/// Convert an unpremultiplied color (a color which has its color components
|
||||
/// separated from its alpha value) to a premultiplied color.
|
||||
///
|
||||
/// Returns (0, 0, 0, 0) if the alpha component is 0.
|
||||
f16vec4 IPHalfPremultiply(f16vec4 color) {
|
||||
return f16vec4(color.rgb * color.a, color.a);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@ -107,12 +107,12 @@ std::optional<Entity> DirectionalMorphologyFilterContents::RenderFilter(
|
||||
|
||||
FS::FragInfo frag_info;
|
||||
frag_info.radius = std::round(transformed_radius.GetLength());
|
||||
frag_info.direction = input_snapshot->transform.Invert()
|
||||
.TransformDirection(transformed_radius)
|
||||
.Normalize();
|
||||
frag_info.texture_size =
|
||||
Point(transformed_texture_width, transformed_texture_height);
|
||||
frag_info.morph_type = static_cast<Scalar>(morph_type_);
|
||||
frag_info.uv_offset =
|
||||
input_snapshot->transform.Invert()
|
||||
.TransformDirection(transformed_radius)
|
||||
.Normalize() /
|
||||
Point(transformed_texture_width, transformed_texture_height);
|
||||
|
||||
Command cmd;
|
||||
cmd.label = "Morphology Filter";
|
||||
@ -121,10 +121,16 @@ std::optional<Entity> DirectionalMorphologyFilterContents::RenderFilter(
|
||||
cmd.pipeline = renderer.GetMorphologyFilterPipeline(options);
|
||||
cmd.BindVertices(vtx_buffer);
|
||||
|
||||
auto sampler_descriptor = input_snapshot->sampler_descriptor;
|
||||
if (renderer.GetDeviceCapabilities().SupportsDecalTileMode()) {
|
||||
sampler_descriptor.width_address_mode = SamplerAddressMode::kDecal;
|
||||
sampler_descriptor.height_address_mode = SamplerAddressMode::kDecal;
|
||||
}
|
||||
|
||||
FS::BindTextureSampler(
|
||||
cmd, input_snapshot->texture,
|
||||
renderer.GetContext()->GetSamplerLibrary()->GetSampler(
|
||||
input_snapshot->sampler_descriptor));
|
||||
sampler_descriptor));
|
||||
VS::BindFrameInfo(cmd, host_buffer.EmplaceUniform(frame_info));
|
||||
FS::BindFragInfo(cmd, host_buffer.EmplaceUniform(frag_info));
|
||||
|
||||
|
||||
@ -31,24 +31,26 @@
|
||||
|
||||
uniform FragInfo {
|
||||
mat4 color_m;
|
||||
vec4 color_v;
|
||||
float input_alpha;
|
||||
f16vec4 color_v;
|
||||
float16_t input_alpha;
|
||||
}
|
||||
frag_info;
|
||||
|
||||
uniform sampler2D input_texture;
|
||||
uniform f16sampler2D input_texture;
|
||||
|
||||
in vec2 v_position;
|
||||
out vec4 frag_color;
|
||||
in highp vec2 v_position;
|
||||
out f16vec4 frag_color;
|
||||
|
||||
void main() {
|
||||
vec4 input_color = texture(input_texture, v_position) * frag_info.input_alpha;
|
||||
f16vec4 input_color =
|
||||
texture(input_texture, v_position) * frag_info.input_alpha;
|
||||
|
||||
// unpremultiply first, as filter inputs are premultiplied.
|
||||
vec4 color = IPUnpremultiply(input_color);
|
||||
f16vec4 color = IPHalfUnpremultiply(input_color);
|
||||
|
||||
color = clamp(frag_info.color_m * color + frag_info.color_v, 0.0, 1.0);
|
||||
color = clamp(f16mat4(frag_info.color_m) * color + frag_info.color_v,
|
||||
float16_t(0), float16_t(1.0));
|
||||
|
||||
// premultiply the outputs
|
||||
frag_color = vec4(color.rgb * color.a, color.a);
|
||||
frag_color = f16vec4(color.rgb * color.a, color.a);
|
||||
}
|
||||
|
||||
@ -11,8 +11,8 @@ uniform FrameInfo {
|
||||
}
|
||||
frame_info;
|
||||
|
||||
in vec2 position;
|
||||
out vec2 v_position;
|
||||
in highp vec2 position;
|
||||
out highp vec2 v_position;
|
||||
|
||||
void main() {
|
||||
v_position =
|
||||
|
||||
@ -9,27 +9,29 @@
|
||||
//
|
||||
// This filter is used so that the colors are suitable for display in monitors.
|
||||
|
||||
uniform sampler2D input_texture;
|
||||
uniform f16sampler2D input_texture;
|
||||
|
||||
uniform FragInfo {
|
||||
float input_alpha;
|
||||
float16_t input_alpha;
|
||||
}
|
||||
frag_info;
|
||||
|
||||
in vec2 v_position;
|
||||
out vec4 frag_color;
|
||||
in highp vec2 v_position;
|
||||
|
||||
out f16vec4 frag_color;
|
||||
|
||||
void main() {
|
||||
vec4 input_color = texture(input_texture, v_position) * frag_info.input_alpha;
|
||||
f16vec4 input_color =
|
||||
texture(input_texture, v_position) * frag_info.input_alpha;
|
||||
|
||||
vec4 color = IPUnpremultiply(input_color);
|
||||
f16vec4 color = IPHalfUnpremultiply(input_color);
|
||||
for (int i = 0; i < 3; i++) {
|
||||
if (color[i] <= 0.0031308) {
|
||||
color[i] = (color[i]) * 12.92;
|
||||
if (color[i] <= 0.0031308hf) {
|
||||
color[i] = (color[i]) * 12.92hf;
|
||||
} else {
|
||||
color[i] = 1.055 * pow(color[i], (1.0 / 2.4)) - 0.055;
|
||||
color[i] = 1.055hf * pow(color[i], (1.0hf / 2.4hf)) - 0.055hf;
|
||||
}
|
||||
}
|
||||
|
||||
frag_color = IPPremultiply(color);
|
||||
frag_color = IPHalfPremultiply(color);
|
||||
}
|
||||
|
||||
@ -11,8 +11,8 @@ uniform FrameInfo {
|
||||
}
|
||||
frame_info;
|
||||
|
||||
in vec2 position;
|
||||
out vec2 v_position;
|
||||
in highp vec2 position;
|
||||
out highp vec2 v_position;
|
||||
|
||||
void main() {
|
||||
v_position =
|
||||
|
||||
@ -8,29 +8,35 @@
|
||||
|
||||
// These values must correspond to the order of the items in the
|
||||
// 'FilterContents::MorphType' enum class.
|
||||
const float kMorphTypeDilate = 0;
|
||||
const float kMorphTypeErode = 1;
|
||||
const float16_t kMorphTypeDilate = 0.0hf;
|
||||
const float16_t kMorphTypeErode = 1.0hf;
|
||||
|
||||
uniform sampler2D texture_sampler;
|
||||
uniform f16sampler2D texture_sampler;
|
||||
|
||||
uniform FragInfo {
|
||||
vec2 texture_size;
|
||||
vec2 direction;
|
||||
float radius;
|
||||
float morph_type;
|
||||
f16vec2 uv_offset;
|
||||
float16_t radius;
|
||||
float16_t morph_type;
|
||||
}
|
||||
frag_info;
|
||||
|
||||
in vec2 v_texture_coords;
|
||||
out vec4 frag_color;
|
||||
in highp vec2 v_texture_coords;
|
||||
|
||||
out f16vec4 frag_color;
|
||||
|
||||
void main() {
|
||||
vec4 result = frag_info.morph_type == kMorphTypeDilate ? vec4(0) : vec4(1);
|
||||
vec2 uv_offset = frag_info.direction / frag_info.texture_size;
|
||||
for (float i = -frag_info.radius; i <= frag_info.radius; i++) {
|
||||
vec2 texture_coords = v_texture_coords + uv_offset * i;
|
||||
vec4 color;
|
||||
color = IPSampleDecal(texture_sampler, texture_coords);
|
||||
f16vec4 result =
|
||||
frag_info.morph_type == kMorphTypeDilate ? f16vec4(0.0) : f16vec4(1.0);
|
||||
for (float16_t i = -frag_info.radius; i <= frag_info.radius; i++) {
|
||||
vec2 texture_coords = v_texture_coords + frag_info.uv_offset * i;
|
||||
|
||||
// gles 2.0 is the only backend without native decal support.
|
||||
#ifdef IMPELLER_TARGET_OPENGLES
|
||||
f16vec4 color = IPHalfSampleDecal(texture_sampler, texture_coords);
|
||||
#else
|
||||
f16vec4 color = texture(texture_sampler, texture_coords);
|
||||
#endif
|
||||
|
||||
if (frag_info.morph_type == kMorphTypeDilate) {
|
||||
result = max(color, result);
|
||||
} else {
|
||||
|
||||
@ -11,10 +11,10 @@ uniform FrameInfo {
|
||||
}
|
||||
frame_info;
|
||||
|
||||
in vec2 position;
|
||||
in vec2 texture_coords;
|
||||
in highp vec2 position;
|
||||
in highp vec2 texture_coords;
|
||||
|
||||
out vec2 v_texture_coords;
|
||||
out highp vec2 v_texture_coords;
|
||||
|
||||
void main() {
|
||||
gl_Position = frame_info.mvp * vec4(position, 0.0, 1.0);
|
||||
|
||||
@ -9,11 +9,11 @@ uniform FrameInfo {
|
||||
}
|
||||
frame_info;
|
||||
|
||||
in vec2 position;
|
||||
in highp vec2 position;
|
||||
// Note: The GLES backend uses name matching for attribute locations. This name
|
||||
// must match the name of the attribute input in:
|
||||
// impeller/compiler/shader_lib/flutter/runtime_effect.glsl
|
||||
out vec2 _fragCoord;
|
||||
out highp vec2 _fragCoord;
|
||||
|
||||
void main() {
|
||||
gl_Position = frame_info.mvp * vec4(position, 0.0, 1.0);
|
||||
|
||||
@ -6,31 +6,32 @@
|
||||
#include <impeller/texture.glsl>
|
||||
#include <impeller/types.glsl>
|
||||
|
||||
uniform sampler2D y_texture;
|
||||
uniform sampler2D uv_texture;
|
||||
uniform f16sampler2D y_texture;
|
||||
uniform f16sampler2D uv_texture;
|
||||
|
||||
// These values must correspond to the order of the items in the
|
||||
// 'YUVColorSpace' enum class.
|
||||
const float kBT601LimitedRange = 0;
|
||||
const float kBT601FullRange = 1;
|
||||
const float16_t kBT601LimitedRange = 0.0hf;
|
||||
const float16_t kBT601FullRange = 1.0hf;
|
||||
|
||||
uniform FragInfo {
|
||||
mat4 matrix;
|
||||
float yuv_color_space;
|
||||
float16_t yuv_color_space;
|
||||
}
|
||||
frag_info;
|
||||
|
||||
in vec2 v_position;
|
||||
out vec4 frag_color;
|
||||
in highp vec2 v_position;
|
||||
|
||||
out f16vec4 frag_color;
|
||||
|
||||
void main() {
|
||||
vec3 yuv;
|
||||
vec3 yuv_offset = vec3(0.0, 0.5, 0.5);
|
||||
f16vec3 yuv;
|
||||
f16vec3 yuv_offset = f16vec3(0.0hf, 0.5hf, 0.5hf);
|
||||
if (frag_info.yuv_color_space == kBT601LimitedRange) {
|
||||
yuv_offset.x = 16.0 / 255.0;
|
||||
yuv_offset.x = 16.0hf / 255.0hf;
|
||||
}
|
||||
|
||||
yuv.x = texture(y_texture, v_position).r;
|
||||
yuv.yz = texture(uv_texture, v_position).rg;
|
||||
frag_color = frag_info.matrix * vec4(yuv - yuv_offset, 1);
|
||||
frag_color = f16mat4(frag_info.matrix) * f16vec4(yuv - yuv_offset, 1.0hf);
|
||||
}
|
||||
|
||||
@ -11,8 +11,8 @@ uniform FrameInfo {
|
||||
}
|
||||
frame_info;
|
||||
|
||||
in vec2 position;
|
||||
out vec2 v_position;
|
||||
in highp vec2 position;
|
||||
out highp vec2 v_position;
|
||||
|
||||
void main() {
|
||||
v_position =
|
||||
|
||||
@ -1665,16 +1665,16 @@
|
||||
"type": "Vertex",
|
||||
"variants": {
|
||||
"Position": {
|
||||
"fp16_arithmetic": 100,
|
||||
"fp16_arithmetic": 0,
|
||||
"has_stack_spilling": false,
|
||||
"performance": {
|
||||
"longest_path_bound_pipelines": [
|
||||
"load_store"
|
||||
],
|
||||
"longest_path_cycles": [
|
||||
0.0625,
|
||||
0.0625,
|
||||
0.0625,
|
||||
0.125,
|
||||
0.125,
|
||||
0.0,
|
||||
0.0,
|
||||
2.0,
|
||||
0.0
|
||||
@ -1691,9 +1691,9 @@
|
||||
"load_store"
|
||||
],
|
||||
"shortest_path_cycles": [
|
||||
0.0625,
|
||||
0.0625,
|
||||
0.0625,
|
||||
0.125,
|
||||
0.125,
|
||||
0.0,
|
||||
0.0,
|
||||
2.0,
|
||||
0.0
|
||||
@ -1702,9 +1702,9 @@
|
||||
"load_store"
|
||||
],
|
||||
"total_cycles": [
|
||||
0.0625,
|
||||
0.0625,
|
||||
0.0625,
|
||||
0.125,
|
||||
0.125,
|
||||
0.0,
|
||||
0.0,
|
||||
2.0,
|
||||
0.0
|
||||
@ -1712,7 +1712,7 @@
|
||||
},
|
||||
"stack_spill_bytes": 0,
|
||||
"thread_occupancy": 100,
|
||||
"uniform_registers_used": 20,
|
||||
"uniform_registers_used": 30,
|
||||
"work_registers_used": 32
|
||||
},
|
||||
"Varying": {
|
||||
@ -1723,9 +1723,9 @@
|
||||
"load_store"
|
||||
],
|
||||
"longest_path_cycles": [
|
||||
0.03125,
|
||||
0.078125,
|
||||
0.015625,
|
||||
0.03125,
|
||||
0.078125,
|
||||
0.0,
|
||||
3.0,
|
||||
0.0
|
||||
@ -1742,9 +1742,9 @@
|
||||
"load_store"
|
||||
],
|
||||
"shortest_path_cycles": [
|
||||
0.03125,
|
||||
0.078125,
|
||||
0.015625,
|
||||
0.03125,
|
||||
0.078125,
|
||||
0.0,
|
||||
3.0,
|
||||
0.0
|
||||
@ -1753,9 +1753,9 @@
|
||||
"load_store"
|
||||
],
|
||||
"total_cycles": [
|
||||
0.03125,
|
||||
0.078125,
|
||||
0.015625,
|
||||
0.03125,
|
||||
0.078125,
|
||||
0.0,
|
||||
3.0,
|
||||
0.0
|
||||
@ -1763,8 +1763,8 @@
|
||||
},
|
||||
"stack_spill_bytes": 0,
|
||||
"thread_occupancy": 100,
|
||||
"uniform_registers_used": 16,
|
||||
"work_registers_used": 7
|
||||
"uniform_registers_used": 22,
|
||||
"work_registers_used": 9
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -6167,16 +6167,16 @@
|
||||
"type": "Vertex",
|
||||
"variants": {
|
||||
"Position": {
|
||||
"fp16_arithmetic": 80,
|
||||
"fp16_arithmetic": 0,
|
||||
"has_stack_spilling": false,
|
||||
"performance": {
|
||||
"longest_path_bound_pipelines": [
|
||||
"load_store"
|
||||
],
|
||||
"longest_path_cycles": [
|
||||
0.078125,
|
||||
0.078125,
|
||||
0.046875,
|
||||
0.140625,
|
||||
0.140625,
|
||||
0.0,
|
||||
0.0,
|
||||
2.0,
|
||||
0.0
|
||||
@ -6193,9 +6193,9 @@
|
||||
"load_store"
|
||||
],
|
||||
"shortest_path_cycles": [
|
||||
0.078125,
|
||||
0.078125,
|
||||
0.046875,
|
||||
0.140625,
|
||||
0.140625,
|
||||
0.0,
|
||||
0.0,
|
||||
2.0,
|
||||
0.0
|
||||
@ -6204,9 +6204,9 @@
|
||||
"load_store"
|
||||
],
|
||||
"total_cycles": [
|
||||
0.078125,
|
||||
0.078125,
|
||||
0.046875,
|
||||
0.140625,
|
||||
0.140625,
|
||||
0.0,
|
||||
0.0,
|
||||
2.0,
|
||||
0.0
|
||||
@ -6214,20 +6214,20 @@
|
||||
},
|
||||
"stack_spill_bytes": 0,
|
||||
"thread_occupancy": 100,
|
||||
"uniform_registers_used": 16,
|
||||
"uniform_registers_used": 20,
|
||||
"work_registers_used": 32
|
||||
},
|
||||
"Varying": {
|
||||
"fp16_arithmetic": 100,
|
||||
"fp16_arithmetic": 0,
|
||||
"has_stack_spilling": false,
|
||||
"performance": {
|
||||
"longest_path_bound_pipelines": [
|
||||
"load_store"
|
||||
],
|
||||
"longest_path_cycles": [
|
||||
0.03125,
|
||||
0.078125,
|
||||
0.015625,
|
||||
0.03125,
|
||||
0.078125,
|
||||
0.0,
|
||||
3.0,
|
||||
0.0
|
||||
@ -6244,9 +6244,9 @@
|
||||
"load_store"
|
||||
],
|
||||
"shortest_path_cycles": [
|
||||
0.03125,
|
||||
0.015625,
|
||||
0.03125,
|
||||
0.0625,
|
||||
0.0,
|
||||
0.0625,
|
||||
0.0,
|
||||
3.0,
|
||||
0.0
|
||||
@ -6255,9 +6255,9 @@
|
||||
"load_store"
|
||||
],
|
||||
"total_cycles": [
|
||||
0.03125,
|
||||
0.078125,
|
||||
0.015625,
|
||||
0.03125,
|
||||
0.078125,
|
||||
0.0,
|
||||
3.0,
|
||||
0.0
|
||||
@ -6266,7 +6266,7 @@
|
||||
"stack_spill_bytes": 0,
|
||||
"thread_occupancy": 100,
|
||||
"uniform_registers_used": 10,
|
||||
"work_registers_used": 7
|
||||
"work_registers_used": 9
|
||||
}
|
||||
}
|
||||
},
|
||||
@ -8018,16 +8018,16 @@
|
||||
"type": "Vertex",
|
||||
"variants": {
|
||||
"Position": {
|
||||
"fp16_arithmetic": 80,
|
||||
"fp16_arithmetic": 0,
|
||||
"has_stack_spilling": false,
|
||||
"performance": {
|
||||
"longest_path_bound_pipelines": [
|
||||
"load_store"
|
||||
],
|
||||
"longest_path_cycles": [
|
||||
0.078125,
|
||||
0.078125,
|
||||
0.046875,
|
||||
0.140625,
|
||||
0.140625,
|
||||
0.0,
|
||||
0.0,
|
||||
2.0,
|
||||
0.0
|
||||
@ -8044,9 +8044,9 @@
|
||||
"load_store"
|
||||
],
|
||||
"shortest_path_cycles": [
|
||||
0.078125,
|
||||
0.078125,
|
||||
0.046875,
|
||||
0.140625,
|
||||
0.140625,
|
||||
0.0,
|
||||
0.0,
|
||||
2.0,
|
||||
0.0
|
||||
@ -8055,9 +8055,9 @@
|
||||
"load_store"
|
||||
],
|
||||
"total_cycles": [
|
||||
0.078125,
|
||||
0.078125,
|
||||
0.046875,
|
||||
0.140625,
|
||||
0.140625,
|
||||
0.0,
|
||||
0.0,
|
||||
2.0,
|
||||
0.0
|
||||
@ -8065,20 +8065,20 @@
|
||||
},
|
||||
"stack_spill_bytes": 0,
|
||||
"thread_occupancy": 100,
|
||||
"uniform_registers_used": 16,
|
||||
"uniform_registers_used": 20,
|
||||
"work_registers_used": 32
|
||||
},
|
||||
"Varying": {
|
||||
"fp16_arithmetic": 100,
|
||||
"fp16_arithmetic": 0,
|
||||
"has_stack_spilling": false,
|
||||
"performance": {
|
||||
"longest_path_bound_pipelines": [
|
||||
"load_store"
|
||||
],
|
||||
"longest_path_cycles": [
|
||||
0.03125,
|
||||
0.078125,
|
||||
0.015625,
|
||||
0.03125,
|
||||
0.078125,
|
||||
0.0,
|
||||
3.0,
|
||||
0.0
|
||||
@ -8095,9 +8095,9 @@
|
||||
"load_store"
|
||||
],
|
||||
"shortest_path_cycles": [
|
||||
0.03125,
|
||||
0.015625,
|
||||
0.03125,
|
||||
0.0625,
|
||||
0.0,
|
||||
0.0625,
|
||||
0.0,
|
||||
3.0,
|
||||
0.0
|
||||
@ -8106,9 +8106,9 @@
|
||||
"load_store"
|
||||
],
|
||||
"total_cycles": [
|
||||
0.03125,
|
||||
0.078125,
|
||||
0.015625,
|
||||
0.03125,
|
||||
0.078125,
|
||||
0.0,
|
||||
3.0,
|
||||
0.0
|
||||
@ -8117,7 +8117,7 @@
|
||||
"stack_spill_bytes": 0,
|
||||
"thread_occupancy": 100,
|
||||
"uniform_registers_used": 10,
|
||||
"work_registers_used": 7
|
||||
"work_registers_used": 9
|
||||
}
|
||||
}
|
||||
},
|
||||
@ -8180,7 +8180,7 @@
|
||||
"uses_late_zs_update": false,
|
||||
"variants": {
|
||||
"Main": {
|
||||
"fp16_arithmetic": 83,
|
||||
"fp16_arithmetic": 57,
|
||||
"has_stack_spilling": false,
|
||||
"performance": {
|
||||
"longest_path_bound_pipelines": [
|
||||
@ -8222,10 +8222,10 @@
|
||||
"arith_cvt"
|
||||
],
|
||||
"total_cycles": [
|
||||
0.328125,
|
||||
0.34375,
|
||||
0.078125,
|
||||
0.328125,
|
||||
0.1875,
|
||||
0.34375,
|
||||
0.0625,
|
||||
0.0,
|
||||
0.25,
|
||||
0.25
|
||||
@ -8261,10 +8261,11 @@
|
||||
"texture"
|
||||
],
|
||||
"shortest_path_bound_pipelines": [
|
||||
"arithmetic"
|
||||
"arithmetic",
|
||||
"load_store"
|
||||
],
|
||||
"shortest_path_cycles": [
|
||||
1.9800000190734863,
|
||||
1.0,
|
||||
1.0,
|
||||
0.0
|
||||
],
|
||||
@ -8272,7 +8273,7 @@
|
||||
"arithmetic"
|
||||
],
|
||||
"total_cycles": [
|
||||
5.333333492279053,
|
||||
4.0,
|
||||
1.0,
|
||||
1.0
|
||||
]
|
||||
@ -8292,16 +8293,16 @@
|
||||
"type": "Vertex",
|
||||
"variants": {
|
||||
"Position": {
|
||||
"fp16_arithmetic": 80,
|
||||
"fp16_arithmetic": 0,
|
||||
"has_stack_spilling": false,
|
||||
"performance": {
|
||||
"longest_path_bound_pipelines": [
|
||||
"load_store"
|
||||
],
|
||||
"longest_path_cycles": [
|
||||
0.078125,
|
||||
0.078125,
|
||||
0.046875,
|
||||
0.140625,
|
||||
0.140625,
|
||||
0.0,
|
||||
0.0,
|
||||
2.0,
|
||||
0.0
|
||||
@ -8318,9 +8319,9 @@
|
||||
"load_store"
|
||||
],
|
||||
"shortest_path_cycles": [
|
||||
0.078125,
|
||||
0.078125,
|
||||
0.046875,
|
||||
0.140625,
|
||||
0.140625,
|
||||
0.0,
|
||||
0.0,
|
||||
2.0,
|
||||
0.0
|
||||
@ -8329,9 +8330,9 @@
|
||||
"load_store"
|
||||
],
|
||||
"total_cycles": [
|
||||
0.078125,
|
||||
0.078125,
|
||||
0.046875,
|
||||
0.140625,
|
||||
0.140625,
|
||||
0.0,
|
||||
0.0,
|
||||
2.0,
|
||||
0.0
|
||||
@ -8339,20 +8340,20 @@
|
||||
},
|
||||
"stack_spill_bytes": 0,
|
||||
"thread_occupancy": 100,
|
||||
"uniform_registers_used": 16,
|
||||
"uniform_registers_used": 20,
|
||||
"work_registers_used": 32
|
||||
},
|
||||
"Varying": {
|
||||
"fp16_arithmetic": 100,
|
||||
"fp16_arithmetic": 0,
|
||||
"has_stack_spilling": false,
|
||||
"performance": {
|
||||
"longest_path_bound_pipelines": [
|
||||
"load_store"
|
||||
],
|
||||
"longest_path_cycles": [
|
||||
0.03125,
|
||||
0.078125,
|
||||
0.015625,
|
||||
0.03125,
|
||||
0.078125,
|
||||
0.0,
|
||||
3.0,
|
||||
0.0
|
||||
@ -8369,9 +8370,9 @@
|
||||
"load_store"
|
||||
],
|
||||
"shortest_path_cycles": [
|
||||
0.03125,
|
||||
0.015625,
|
||||
0.03125,
|
||||
0.0625,
|
||||
0.0,
|
||||
0.0625,
|
||||
0.0,
|
||||
3.0,
|
||||
0.0
|
||||
@ -8380,9 +8381,9 @@
|
||||
"load_store"
|
||||
],
|
||||
"total_cycles": [
|
||||
0.03125,
|
||||
0.078125,
|
||||
0.015625,
|
||||
0.03125,
|
||||
0.078125,
|
||||
0.0,
|
||||
3.0,
|
||||
0.0
|
||||
@ -8391,7 +8392,7 @@
|
||||
"stack_spill_bytes": 0,
|
||||
"thread_occupancy": 100,
|
||||
"uniform_registers_used": 10,
|
||||
"work_registers_used": 7
|
||||
"work_registers_used": 9
|
||||
}
|
||||
}
|
||||
},
|
||||
@ -8436,7 +8437,7 @@
|
||||
},
|
||||
"thread_occupancy": 100,
|
||||
"uniform_registers_used": 4,
|
||||
"work_registers_used": 2
|
||||
"work_registers_used": 3
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -9155,16 +9156,16 @@
|
||||
"type": "Vertex",
|
||||
"variants": {
|
||||
"Position": {
|
||||
"fp16_arithmetic": 80,
|
||||
"fp16_arithmetic": 0,
|
||||
"has_stack_spilling": false,
|
||||
"performance": {
|
||||
"longest_path_bound_pipelines": [
|
||||
"load_store"
|
||||
],
|
||||
"longest_path_cycles": [
|
||||
0.078125,
|
||||
0.078125,
|
||||
0.046875,
|
||||
0.140625,
|
||||
0.140625,
|
||||
0.0,
|
||||
0.0,
|
||||
2.0,
|
||||
0.0
|
||||
@ -9181,9 +9182,9 @@
|
||||
"load_store"
|
||||
],
|
||||
"shortest_path_cycles": [
|
||||
0.078125,
|
||||
0.078125,
|
||||
0.046875,
|
||||
0.140625,
|
||||
0.140625,
|
||||
0.0,
|
||||
0.0,
|
||||
2.0,
|
||||
0.0
|
||||
@ -9192,9 +9193,9 @@
|
||||
"load_store"
|
||||
],
|
||||
"total_cycles": [
|
||||
0.078125,
|
||||
0.078125,
|
||||
0.046875,
|
||||
0.140625,
|
||||
0.140625,
|
||||
0.0,
|
||||
0.0,
|
||||
2.0,
|
||||
0.0
|
||||
@ -9202,7 +9203,7 @@
|
||||
},
|
||||
"stack_spill_bytes": 0,
|
||||
"thread_occupancy": 100,
|
||||
"uniform_registers_used": 14,
|
||||
"uniform_registers_used": 18,
|
||||
"work_registers_used": 32
|
||||
},
|
||||
"Varying": {
|
||||
@ -9254,7 +9255,7 @@
|
||||
"stack_spill_bytes": 0,
|
||||
"thread_occupancy": 100,
|
||||
"uniform_registers_used": 8,
|
||||
"work_registers_used": 6
|
||||
"work_registers_used": 7
|
||||
}
|
||||
}
|
||||
},
|
||||
@ -10717,16 +10718,16 @@
|
||||
"type": "Vertex",
|
||||
"variants": {
|
||||
"Position": {
|
||||
"fp16_arithmetic": 80,
|
||||
"fp16_arithmetic": 0,
|
||||
"has_stack_spilling": false,
|
||||
"performance": {
|
||||
"longest_path_bound_pipelines": [
|
||||
"load_store"
|
||||
],
|
||||
"longest_path_cycles": [
|
||||
0.078125,
|
||||
0.078125,
|
||||
0.046875,
|
||||
0.140625,
|
||||
0.140625,
|
||||
0.0,
|
||||
0.0,
|
||||
2.0,
|
||||
0.0
|
||||
@ -10743,9 +10744,9 @@
|
||||
"load_store"
|
||||
],
|
||||
"shortest_path_cycles": [
|
||||
0.078125,
|
||||
0.078125,
|
||||
0.046875,
|
||||
0.140625,
|
||||
0.140625,
|
||||
0.0,
|
||||
0.0,
|
||||
2.0,
|
||||
0.0
|
||||
@ -10754,9 +10755,9 @@
|
||||
"load_store"
|
||||
],
|
||||
"total_cycles": [
|
||||
0.078125,
|
||||
0.078125,
|
||||
0.046875,
|
||||
0.140625,
|
||||
0.140625,
|
||||
0.0,
|
||||
0.0,
|
||||
2.0,
|
||||
0.0
|
||||
@ -10764,20 +10765,20 @@
|
||||
},
|
||||
"stack_spill_bytes": 0,
|
||||
"thread_occupancy": 100,
|
||||
"uniform_registers_used": 16,
|
||||
"uniform_registers_used": 20,
|
||||
"work_registers_used": 32
|
||||
},
|
||||
"Varying": {
|
||||
"fp16_arithmetic": 100,
|
||||
"fp16_arithmetic": 0,
|
||||
"has_stack_spilling": false,
|
||||
"performance": {
|
||||
"longest_path_bound_pipelines": [
|
||||
"load_store"
|
||||
],
|
||||
"longest_path_cycles": [
|
||||
0.03125,
|
||||
0.078125,
|
||||
0.015625,
|
||||
0.03125,
|
||||
0.078125,
|
||||
0.0,
|
||||
3.0,
|
||||
0.0
|
||||
@ -10794,9 +10795,9 @@
|
||||
"load_store"
|
||||
],
|
||||
"shortest_path_cycles": [
|
||||
0.03125,
|
||||
0.015625,
|
||||
0.03125,
|
||||
0.0625,
|
||||
0.0,
|
||||
0.0625,
|
||||
0.0,
|
||||
3.0,
|
||||
0.0
|
||||
@ -10805,9 +10806,9 @@
|
||||
"load_store"
|
||||
],
|
||||
"total_cycles": [
|
||||
0.03125,
|
||||
0.078125,
|
||||
0.015625,
|
||||
0.03125,
|
||||
0.078125,
|
||||
0.0,
|
||||
3.0,
|
||||
0.0
|
||||
@ -10816,7 +10817,7 @@
|
||||
"stack_spill_bytes": 0,
|
||||
"thread_occupancy": 100,
|
||||
"uniform_registers_used": 10,
|
||||
"work_registers_used": 7
|
||||
"work_registers_used": 9
|
||||
}
|
||||
}
|
||||
},
|
||||
@ -11568,16 +11569,16 @@
|
||||
"type": "Vertex",
|
||||
"variants": {
|
||||
"Position": {
|
||||
"fp16_arithmetic": 100,
|
||||
"fp16_arithmetic": 0,
|
||||
"has_stack_spilling": false,
|
||||
"performance": {
|
||||
"longest_path_bound_pipelines": [
|
||||
"load_store"
|
||||
],
|
||||
"longest_path_cycles": [
|
||||
0.0625,
|
||||
0.0625,
|
||||
0.0625,
|
||||
0.125,
|
||||
0.125,
|
||||
0.0,
|
||||
0.0,
|
||||
2.0,
|
||||
0.0
|
||||
@ -11594,9 +11595,9 @@
|
||||
"load_store"
|
||||
],
|
||||
"shortest_path_cycles": [
|
||||
0.0625,
|
||||
0.0625,
|
||||
0.0625,
|
||||
0.125,
|
||||
0.125,
|
||||
0.0,
|
||||
0.0,
|
||||
2.0,
|
||||
0.0
|
||||
@ -11605,9 +11606,9 @@
|
||||
"load_store"
|
||||
],
|
||||
"total_cycles": [
|
||||
0.0625,
|
||||
0.0625,
|
||||
0.0625,
|
||||
0.125,
|
||||
0.125,
|
||||
0.0,
|
||||
0.0,
|
||||
2.0,
|
||||
0.0
|
||||
@ -11615,7 +11616,7 @@
|
||||
},
|
||||
"stack_spill_bytes": 0,
|
||||
"thread_occupancy": 100,
|
||||
"uniform_registers_used": 20,
|
||||
"uniform_registers_used": 30,
|
||||
"work_registers_used": 32
|
||||
},
|
||||
"Varying": {
|
||||
@ -11626,9 +11627,9 @@
|
||||
"load_store"
|
||||
],
|
||||
"longest_path_cycles": [
|
||||
0.03125,
|
||||
0.078125,
|
||||
0.015625,
|
||||
0.03125,
|
||||
0.078125,
|
||||
0.0,
|
||||
3.0,
|
||||
0.0
|
||||
@ -11645,9 +11646,9 @@
|
||||
"load_store"
|
||||
],
|
||||
"shortest_path_cycles": [
|
||||
0.03125,
|
||||
0.078125,
|
||||
0.015625,
|
||||
0.03125,
|
||||
0.078125,
|
||||
0.0,
|
||||
3.0,
|
||||
0.0
|
||||
@ -11656,9 +11657,9 @@
|
||||
"load_store"
|
||||
],
|
||||
"total_cycles": [
|
||||
0.03125,
|
||||
0.078125,
|
||||
0.015625,
|
||||
0.03125,
|
||||
0.078125,
|
||||
0.0,
|
||||
3.0,
|
||||
0.0
|
||||
@ -11666,8 +11667,8 @@
|
||||
},
|
||||
"stack_spill_bytes": 0,
|
||||
"thread_occupancy": 100,
|
||||
"uniform_registers_used": 16,
|
||||
"work_registers_used": 7
|
||||
"uniform_registers_used": 22,
|
||||
"work_registers_used": 9
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -11685,7 +11686,7 @@
|
||||
"uses_late_zs_update": false,
|
||||
"variants": {
|
||||
"Main": {
|
||||
"fp16_arithmetic": 54,
|
||||
"fp16_arithmetic": 44,
|
||||
"has_stack_spilling": false,
|
||||
"performance": {
|
||||
"longest_path_bound_pipelines": [
|
||||
@ -11727,10 +11728,10 @@
|
||||
"arith_cvt"
|
||||
],
|
||||
"total_cycles": [
|
||||
0.34375,
|
||||
0.265625,
|
||||
0.046875,
|
||||
0.34375,
|
||||
0.1875,
|
||||
0.265625,
|
||||
0.0,
|
||||
0.0,
|
||||
0.25,
|
||||
0.25
|
||||
@ -11739,7 +11740,7 @@
|
||||
"stack_spill_bytes": 0,
|
||||
"thread_occupancy": 100,
|
||||
"uniform_registers_used": 8,
|
||||
"work_registers_used": 13
|
||||
"work_registers_used": 11
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -11752,16 +11753,16 @@
|
||||
"type": "Vertex",
|
||||
"variants": {
|
||||
"Position": {
|
||||
"fp16_arithmetic": 100,
|
||||
"fp16_arithmetic": 0,
|
||||
"has_stack_spilling": false,
|
||||
"performance": {
|
||||
"longest_path_bound_pipelines": [
|
||||
"load_store"
|
||||
],
|
||||
"longest_path_cycles": [
|
||||
0.0625,
|
||||
0.0625,
|
||||
0.0625,
|
||||
0.125,
|
||||
0.125,
|
||||
0.0,
|
||||
0.0,
|
||||
2.0,
|
||||
0.0
|
||||
@ -11778,9 +11779,9 @@
|
||||
"load_store"
|
||||
],
|
||||
"shortest_path_cycles": [
|
||||
0.0625,
|
||||
0.0625,
|
||||
0.0625,
|
||||
0.125,
|
||||
0.125,
|
||||
0.0,
|
||||
0.0,
|
||||
2.0,
|
||||
0.0
|
||||
@ -11789,9 +11790,9 @@
|
||||
"load_store"
|
||||
],
|
||||
"total_cycles": [
|
||||
0.0625,
|
||||
0.0625,
|
||||
0.0625,
|
||||
0.125,
|
||||
0.125,
|
||||
0.0,
|
||||
0.0,
|
||||
2.0,
|
||||
0.0
|
||||
@ -11799,7 +11800,7 @@
|
||||
},
|
||||
"stack_spill_bytes": 0,
|
||||
"thread_occupancy": 100,
|
||||
"uniform_registers_used": 20,
|
||||
"uniform_registers_used": 30,
|
||||
"work_registers_used": 32
|
||||
},
|
||||
"Varying": {
|
||||
@ -11810,9 +11811,9 @@
|
||||
"load_store"
|
||||
],
|
||||
"longest_path_cycles": [
|
||||
0.03125,
|
||||
0.0625,
|
||||
0.015625,
|
||||
0.03125,
|
||||
0.0625,
|
||||
0.0,
|
||||
3.0,
|
||||
0.0
|
||||
@ -11829,9 +11830,9 @@
|
||||
"load_store"
|
||||
],
|
||||
"shortest_path_cycles": [
|
||||
0.03125,
|
||||
0.0625,
|
||||
0.015625,
|
||||
0.03125,
|
||||
0.0625,
|
||||
0.0,
|
||||
3.0,
|
||||
0.0
|
||||
@ -11840,9 +11841,9 @@
|
||||
"load_store"
|
||||
],
|
||||
"total_cycles": [
|
||||
0.03125,
|
||||
0.0625,
|
||||
0.015625,
|
||||
0.03125,
|
||||
0.0625,
|
||||
0.0,
|
||||
3.0,
|
||||
0.0
|
||||
@ -11850,8 +11851,8 @@
|
||||
},
|
||||
"stack_spill_bytes": 0,
|
||||
"thread_occupancy": 100,
|
||||
"uniform_registers_used": 16,
|
||||
"work_registers_used": 7
|
||||
"uniform_registers_used": 22,
|
||||
"work_registers_used": 9
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -12415,16 +12416,16 @@
|
||||
"type": "Vertex",
|
||||
"variants": {
|
||||
"Position": {
|
||||
"fp16_arithmetic": 100,
|
||||
"fp16_arithmetic": 0,
|
||||
"has_stack_spilling": false,
|
||||
"performance": {
|
||||
"longest_path_bound_pipelines": [
|
||||
"load_store"
|
||||
],
|
||||
"longest_path_cycles": [
|
||||
0.0625,
|
||||
0.0625,
|
||||
0.0625,
|
||||
0.125,
|
||||
0.125,
|
||||
0.0,
|
||||
0.0,
|
||||
2.0,
|
||||
0.0
|
||||
@ -12441,9 +12442,9 @@
|
||||
"load_store"
|
||||
],
|
||||
"shortest_path_cycles": [
|
||||
0.0625,
|
||||
0.0625,
|
||||
0.0625,
|
||||
0.125,
|
||||
0.125,
|
||||
0.0,
|
||||
0.0,
|
||||
2.0,
|
||||
0.0
|
||||
@ -12452,9 +12453,9 @@
|
||||
"load_store"
|
||||
],
|
||||
"total_cycles": [
|
||||
0.0625,
|
||||
0.0625,
|
||||
0.0625,
|
||||
0.125,
|
||||
0.125,
|
||||
0.0,
|
||||
0.0,
|
||||
2.0,
|
||||
0.0
|
||||
@ -12462,7 +12463,7 @@
|
||||
},
|
||||
"stack_spill_bytes": 0,
|
||||
"thread_occupancy": 100,
|
||||
"uniform_registers_used": 18,
|
||||
"uniform_registers_used": 28,
|
||||
"work_registers_used": 32
|
||||
},
|
||||
"Varying": {
|
||||
@ -12513,8 +12514,8 @@
|
||||
},
|
||||
"stack_spill_bytes": 0,
|
||||
"thread_occupancy": 100,
|
||||
"uniform_registers_used": 14,
|
||||
"work_registers_used": 6
|
||||
"uniform_registers_used": 20,
|
||||
"work_registers_used": 7
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -13502,16 +13503,16 @@
|
||||
"type": "Vertex",
|
||||
"variants": {
|
||||
"Position": {
|
||||
"fp16_arithmetic": 100,
|
||||
"fp16_arithmetic": 0,
|
||||
"has_stack_spilling": false,
|
||||
"performance": {
|
||||
"longest_path_bound_pipelines": [
|
||||
"load_store"
|
||||
],
|
||||
"longest_path_cycles": [
|
||||
0.0625,
|
||||
0.0625,
|
||||
0.0625,
|
||||
0.125,
|
||||
0.125,
|
||||
0.0,
|
||||
0.0,
|
||||
2.0,
|
||||
0.0
|
||||
@ -13528,9 +13529,9 @@
|
||||
"load_store"
|
||||
],
|
||||
"shortest_path_cycles": [
|
||||
0.0625,
|
||||
0.0625,
|
||||
0.0625,
|
||||
0.125,
|
||||
0.125,
|
||||
0.0,
|
||||
0.0,
|
||||
2.0,
|
||||
0.0
|
||||
@ -13539,9 +13540,9 @@
|
||||
"load_store"
|
||||
],
|
||||
"total_cycles": [
|
||||
0.0625,
|
||||
0.0625,
|
||||
0.0625,
|
||||
0.125,
|
||||
0.125,
|
||||
0.0,
|
||||
0.0,
|
||||
2.0,
|
||||
0.0
|
||||
@ -13549,7 +13550,7 @@
|
||||
},
|
||||
"stack_spill_bytes": 0,
|
||||
"thread_occupancy": 100,
|
||||
"uniform_registers_used": 20,
|
||||
"uniform_registers_used": 30,
|
||||
"work_registers_used": 32
|
||||
},
|
||||
"Varying": {
|
||||
@ -13560,9 +13561,9 @@
|
||||
"load_store"
|
||||
],
|
||||
"longest_path_cycles": [
|
||||
0.03125,
|
||||
0.078125,
|
||||
0.015625,
|
||||
0.03125,
|
||||
0.078125,
|
||||
0.0,
|
||||
3.0,
|
||||
0.0
|
||||
@ -13579,9 +13580,9 @@
|
||||
"load_store"
|
||||
],
|
||||
"shortest_path_cycles": [
|
||||
0.03125,
|
||||
0.078125,
|
||||
0.015625,
|
||||
0.03125,
|
||||
0.078125,
|
||||
0.0,
|
||||
3.0,
|
||||
0.0
|
||||
@ -13590,9 +13591,9 @@
|
||||
"load_store"
|
||||
],
|
||||
"total_cycles": [
|
||||
0.03125,
|
||||
0.078125,
|
||||
0.015625,
|
||||
0.03125,
|
||||
0.078125,
|
||||
0.0,
|
||||
3.0,
|
||||
0.0
|
||||
@ -13600,8 +13601,8 @@
|
||||
},
|
||||
"stack_spill_bytes": 0,
|
||||
"thread_occupancy": 100,
|
||||
"uniform_registers_used": 16,
|
||||
"work_registers_used": 7
|
||||
"uniform_registers_used": 22,
|
||||
"work_registers_used": 9
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user