flutter_flutter/app/HyphTool.cpp
Raph Levien f0be43de02 Binary format for hyphenation patterns
In the current state, hyphenation in all languages than Sanskrit seems
to work (case-folding edge cases). Thus, we just disable Sanskrit.
Packed tries are implemented, but not the finite state machine
(space/speed tradeoff).

This commit contains a throw-away test app, which runs on the host.
I think I want to replace it with unit tests, but I'm including it in
the CL because it's useful during development.

Bug: 21562869
Bug: 21826930
Bug: 23317038
Bug: 23317904

Change-Id: I7479a565a4a062fa319651c2c14c0fa18c5ceaea
2015-09-30 21:37:31 -07:00

63 lines
1.6 KiB
C++

#include <stdio.h>
#include <sys/stat.h>
#include <string.h>
#include "utils/Log.h"
#include <vector>
#include <minikin/Hyphenator.h>
using android::Hyphenator;
Hyphenator* loadHybFile(const char* fn) {
struct stat statbuf;
int status = stat(fn, &statbuf);
if (status < 0) {
fprintf(stderr, "error opening %s\n", fn);
return nullptr;
}
size_t size = statbuf.st_size;
FILE* f = fopen(fn, "rb");
if (f == NULL) {
fprintf(stderr, "error opening %s\n", fn);
return nullptr;
}
uint8_t* buf = new uint8_t[size];
size_t read_size = fread(buf, 1, size, f);
if (read_size < size) {
fprintf(stderr, "error reading %s\n", fn);
delete[] buf;
return nullptr;
}
return Hyphenator::loadBinary(buf);
}
int main(int argc, char** argv) {
Hyphenator* hyph = loadHybFile("/tmp/en.hyb"); // should also be configurable
std::vector<uint8_t> result;
std::vector<uint16_t> word;
if (argc < 2) {
fprintf(stderr, "usage: hyphtool word\n");
return 1;
}
char* asciiword = argv[1];
size_t len = strlen(asciiword);
for (size_t i = 0; i < len; i++) {
uint32_t c = asciiword[i];
if (c == '-') {
c = 0x00AD;
}
// ASCII (or possibly ISO Latin 1), but kinda painful to do utf conversion :(
word.push_back(c);
}
hyph->hyphenate(&result, word.data(), word.size());
for (size_t i = 0; i < len; i++) {
if (result[i] != 0) {
printf("-");
}
printf("%c", word[i]);
}
printf("\n");
return 0;
}