flutter_flutter/tests/util/UnicodeUtils.cpp
Seigo Nonaka 0ca4fb6d44 Add more native perf tests to minikin.
This CL introduces performance tests for following three modules:
- Hyphenator
- WordBreaker
- GraphemeBreak

During using UnicodeUtils, need to decouple it from gtest since
perftest doesn't have gtest dependencies.

Bug:29142734
Change-Id: I700c662fa7d0a52f19d8e93150ad1a85dc28769f
2016-06-28 16:55:42 +09:00

109 lines
3.7 KiB
C++

/*
* Copyright (C) 2015 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <unicode/utf.h>
#include <unicode/utf8.h>
#include <cstdlib>
#include <cutils/log.h>
#include <vector>
#include <string>
namespace minikin {
// src is of the form "U+1F431 | 'h' 'i'". Position of "|" gets saved to offset if non-null.
// Size is returned in an out parameter because gtest needs a void return for ASSERT to work.
void ParseUnicode(uint16_t* buf, size_t buf_size, const char* src, size_t* result_size,
size_t* offset) {
size_t input_ix = 0;
size_t output_ix = 0;
bool seen_offset = false;
while (src[input_ix] != 0) {
switch (src[input_ix]) {
case '\'':
// single ASCII char
LOG_ALWAYS_FATAL_IF(static_cast<uint8_t>(src[input_ix]) >= 0x80);
input_ix++;
LOG_ALWAYS_FATAL_IF(src[input_ix] == 0);
LOG_ALWAYS_FATAL_IF(output_ix >= buf_size);
buf[output_ix++] = (uint16_t)src[input_ix++];
LOG_ALWAYS_FATAL_IF(src[input_ix] != '\'');
input_ix++;
break;
case 'u':
case 'U': {
// Unicode codepoint in hex syntax
input_ix++;
LOG_ALWAYS_FATAL_IF(src[input_ix] != '+');
input_ix++;
char* endptr = (char*)src + input_ix;
unsigned long int codepoint = strtoul(src + input_ix, &endptr, 16);
size_t num_hex_digits = endptr - (src + input_ix);
// also triggers on invalid number syntax, digits = 0
LOG_ALWAYS_FATAL_IF(num_hex_digits < 4u);
LOG_ALWAYS_FATAL_IF(num_hex_digits > 6u);
LOG_ALWAYS_FATAL_IF(codepoint > 0x10FFFFu);
input_ix += num_hex_digits;
if (U16_LENGTH(codepoint) == 1) {
LOG_ALWAYS_FATAL_IF(output_ix + 1 > buf_size);
buf[output_ix++] = codepoint;
} else {
// UTF-16 encoding
LOG_ALWAYS_FATAL_IF(output_ix + 2 > buf_size);
buf[output_ix++] = U16_LEAD(codepoint);
buf[output_ix++] = U16_TRAIL(codepoint);
}
break;
}
case ' ':
input_ix++;
break;
case '|':
LOG_ALWAYS_FATAL_IF(seen_offset);
LOG_ALWAYS_FATAL_IF(offset == nullptr);
*offset = output_ix;
seen_offset = true;
input_ix++;
break;
default:
LOG_ALWAYS_FATAL("Unexpected Character");
}
}
LOG_ALWAYS_FATAL_IF(result_size == nullptr);
*result_size = output_ix;
LOG_ALWAYS_FATAL_IF(!seen_offset && offset != nullptr);
}
std::vector<uint16_t> utf8ToUtf16(const std::string& text) {
std::vector<uint16_t> result;
int32_t i = 0;
const int32_t textLength = static_cast<int32_t>(text.size());
uint32_t c = 0;
while (i < textLength) {
U8_NEXT(text.c_str(), i, textLength, c);
if (U16_LENGTH(c) == 1) {
result.push_back(c);
} else {
result.push_back(U16_LEAD(c));
result.push_back(U16_TRAIL(c));
}
}
return result;
}
} // namespace minikin