1280 lines
34 KiB
C
1280 lines
34 KiB
C
/*
|
|
im.c
|
|
|
|
Input method handling
|
|
Copyright (c)2007 by Mark K. Kim and others
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
(See COPYING.txt)
|
|
|
|
$Id$
|
|
*/
|
|
|
|
/*
|
|
* See the LANGUAGE-SPECIFIC IM FUNCTIONS section for instructions on adding
|
|
* support for new languages.
|
|
*
|
|
* This file is called IM (Input Method), but it's actually an Input Translator.
|
|
* This implementation was sort of necessary in order to work without having to
|
|
* modify SDL.
|
|
*
|
|
* Basically, to read in text in foreign language, read Keysym off of SDL and
|
|
* pass to im_read. im_read will translate the text and pass the unicode string
|
|
* back to you. But before all this is done, be sure to create the IM_DATA
|
|
* structure and initialize it with the proper language translator you want to use.
|
|
*/
|
|
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <wchar.h>
|
|
#include "im.h"
|
|
|
|
|
|
|
|
/* ***************************************************************************
|
|
* I18N GETTEXT
|
|
*/
|
|
|
|
#ifndef gettext_noop
|
|
#define gettext_noop(s) (s)
|
|
#endif
|
|
|
|
|
|
enum {
|
|
IM_TIP_NONE,
|
|
IM_TIP_ENGLISH,
|
|
IM_TIP_HIRAGANA,
|
|
IM_TIP_KATAKANA,
|
|
IM_TIP_HANGUL,
|
|
NUM_IM_TIPS
|
|
};
|
|
|
|
|
|
static const char* const im_tip_text[NUM_IM_TIPS] =
|
|
{
|
|
NULL,
|
|
gettext_noop("English"),
|
|
gettext_noop("Hiragana"),
|
|
gettext_noop("Katakana"),
|
|
gettext_noop("Hangul")
|
|
};
|
|
|
|
|
|
/* ***************************************************************************
|
|
* CONSTANTS
|
|
*/
|
|
|
|
/* #define IM_DEBUG 1 */
|
|
|
|
#define MAX_SECTIONS 8 /* Maximum numbers of sections in *.im file */
|
|
#define MAX_UNICODE_SEQ 16 /* Output of state machine, including NUL */
|
|
#define INITIAL_SMSIZE 8 /* Initial num of transitions in STATE_MACHINE */
|
|
|
|
#ifndef LANG_DEFAULT
|
|
#define LANG_DEFAULT (LANG_EN)
|
|
#endif
|
|
|
|
|
|
/**
|
|
* Event types that im_event_*() functions need to handle.
|
|
*/
|
|
enum {
|
|
IM_REQ_TRANSLATE, /* The ever-more important IM translation request */
|
|
IM_REQ_INIT, /* Initialization request */
|
|
IM_REQ_RESET_SOFT, /* Soft reset request */
|
|
IM_REQ_RESET_FULL, /* Full reset request */
|
|
IM_REQ_FREE, /* Free resources */
|
|
NUM_IM_REQUESTS
|
|
};
|
|
|
|
|
|
/**
|
|
* Match statuses.
|
|
*/
|
|
enum {
|
|
MATCH_STAT_NONE = 0x00,
|
|
MATCH_STAT_NOMOSTATES = 0x01,
|
|
MATCH_STAT_NOMOBUF = 0x02,
|
|
};
|
|
|
|
|
|
/* ***************************************************************************
|
|
* TYPES
|
|
*/
|
|
|
|
/**
|
|
* All im_event_*() functions have this type.
|
|
*/
|
|
typedef int (*IM_EVENT_FN)(IM_DATA*, SDL_keysym); /* IM_EVENT_FN type */
|
|
|
|
|
|
/**
|
|
* State Machine key-value pair for transition control. When the "key"
|
|
* is pressed, transition is made to "state".
|
|
*
|
|
* @see STATE_MACHINE
|
|
*/
|
|
typedef struct SM_WITH_KEY {
|
|
char key;
|
|
struct STATE_MACHINE* state;
|
|
} SM_WITH_KEY;
|
|
|
|
|
|
/**
|
|
* A State Machine is used to map key strokes to the unicode output.
|
|
* A single State Machine has a possible output (the unicode) and pointers
|
|
* to next states. The "next state" is determined by the key stroke
|
|
* pressed by the user - this key is looked up in SM_WITH_KEY and
|
|
* its next state determined by the STATE_MACHINE pointer in SM_WITH_KEY.
|
|
*
|
|
* The number of possible transitions to the next state is dynamically
|
|
* adjustable using the parameter next_maxsize. The actual storage in
|
|
* use can be determined via next_size.
|
|
*
|
|
* @see SM_WITH_KEY
|
|
*/
|
|
typedef struct STATE_MACHINE {
|
|
wchar_t output[MAX_UNICODE_SEQ];
|
|
char flag;
|
|
|
|
SM_WITH_KEY* next; /* Possible transitions */
|
|
size_t next_maxsize; /* Potential size of the next pointer */
|
|
size_t next_size; /* Used size of the next pointer */
|
|
} STATE_MACHINE;
|
|
|
|
|
|
/**
|
|
* A Character Map loads the *.im file, which may have several "sections".
|
|
* Each section has its own state machine, and the C code determines which
|
|
* section is used in determining which STATE_MACHINE to use for the
|
|
* key mapping.
|
|
*/
|
|
typedef struct {
|
|
STATE_MACHINE sections[MAX_SECTIONS];
|
|
int section;
|
|
|
|
/* These variables get populated when a search is performed */
|
|
int match_count; /* How many char seq was used for output */
|
|
int match_is_final; /* T/F - tells if match is final */
|
|
int match_stats; /* Statistics gathering */
|
|
STATE_MACHINE* match_state;
|
|
STATE_MACHINE* match_state_prev;
|
|
} CHARMAP;
|
|
|
|
|
|
/* ***************************************************************************
|
|
* STATIC GLOBALS
|
|
*/
|
|
|
|
/**
|
|
* Global initialization flag.
|
|
*/
|
|
static int im_initialized = 0;
|
|
|
|
|
|
/**
|
|
* Language-specific IM event-handler function pointers. This lookup table
|
|
* is initialized in im_init(). Every support language should have a pointer
|
|
* mapped here.
|
|
*
|
|
* @see im_init()
|
|
* @see im_read()
|
|
*/
|
|
static IM_EVENT_FN im_event_fns[NUM_LANGS];
|
|
|
|
|
|
/* ***************************************************************************
|
|
* UTILITY FUNCTIONS
|
|
*/
|
|
|
|
#define MIN(a,b) ((a)<=(b) ? (a) : (b))
|
|
#define IN_RANGE(a,v,b) ( (a)<=(v) && (v)<(b) )
|
|
#define ARRAYLEN(a) ( sizeof(a)/sizeof(*(a)) )
|
|
|
|
|
|
static void wcs_lshift(wchar_t* s, size_t count)
|
|
{
|
|
wchar_t* dest = s;
|
|
wchar_t* src = s+count;
|
|
size_t len = wcslen(src)+1; /* Copy over all src string + NUL */
|
|
|
|
memmove(dest, src, len * sizeof(wchar_t));
|
|
}
|
|
|
|
|
|
/**
|
|
* Pull out "count" characters from the back.
|
|
*/
|
|
static void wcs_pull(wchar_t* s, size_t count)
|
|
{
|
|
int peg = (int)wcslen(s) - (int)count;
|
|
if(peg < 0) peg = 0;
|
|
|
|
s[peg] = L'\0';
|
|
}
|
|
|
|
|
|
/* ***************************************************************************
|
|
* STATE_MACHINE FUNCTIONS
|
|
*/
|
|
|
|
/**
|
|
* Compare two SM_WITH_KEY, return appropriate result.
|
|
*/
|
|
static int swk_compare(const void* swk1, const void* swk2)
|
|
{
|
|
SM_WITH_KEY* sk1 = (SM_WITH_KEY*)swk1;
|
|
SM_WITH_KEY* sk2 = (SM_WITH_KEY*)swk2;
|
|
|
|
return (sk1->key) - (sk2->key);
|
|
}
|
|
|
|
|
|
/**
|
|
* Initialize the State Machine.
|
|
*/
|
|
static int sm_init(STATE_MACHINE* sm)
|
|
{
|
|
memset(sm, 0, sizeof(STATE_MACHINE));
|
|
|
|
sm->next = calloc(INITIAL_SMSIZE, sizeof(SM_WITH_KEY));
|
|
if(!sm->next) {
|
|
perror("sm_init");
|
|
return 1;
|
|
}
|
|
|
|
sm->next_maxsize = INITIAL_SMSIZE;
|
|
return 0;
|
|
}
|
|
|
|
|
|
/**
|
|
* Free the State Machine resources.
|
|
*/
|
|
static void sm_free(STATE_MACHINE* sm)
|
|
{
|
|
if(sm->next) {
|
|
int i = 0;
|
|
|
|
for(i = 0; i < (int)sm->next_maxsize; i++) {
|
|
STATE_MACHINE* next_state = sm->next[i].state;
|
|
if(next_state) sm_free(next_state);
|
|
sm->next[i].state = NULL;
|
|
}
|
|
|
|
free(sm->next);
|
|
sm->next = NULL;
|
|
}
|
|
|
|
memset(sm, 0, sizeof(STATE_MACHINE));
|
|
}
|
|
|
|
|
|
/**
|
|
* Double the storage space of the possible transition states.
|
|
*/
|
|
static int sm_dblspace(STATE_MACHINE* sm)
|
|
{
|
|
size_t newsize = sm->next_maxsize * 2;
|
|
SM_WITH_KEY* next = realloc(sm->next, sizeof(SM_WITH_KEY) * newsize);
|
|
|
|
if(next == NULL) {
|
|
perror("sm_dblspace");
|
|
return 1;
|
|
}
|
|
|
|
sm->next = next;
|
|
sm->next_maxsize = newsize;
|
|
return 0;
|
|
}
|
|
|
|
|
|
/**
|
|
* Search the state machine's transition keys, return pointer to the next state.
|
|
* Return NULL if none is found. The search is done only at 1 level, and does
|
|
* not recurse deep.
|
|
*/
|
|
static STATE_MACHINE* sm_search_shallow(STATE_MACHINE* sm, char key)
|
|
{
|
|
SM_WITH_KEY smk = { key, NULL };
|
|
SM_WITH_KEY* smk_found;
|
|
|
|
smk_found = bsearch(
|
|
&smk, sm->next, sm->next_size, sizeof(SM_WITH_KEY), swk_compare);
|
|
|
|
if(!smk_found) return NULL;
|
|
return smk_found->state;
|
|
}
|
|
|
|
|
|
/**
|
|
* Search the state machine's transition keys, return the unicode output of the
|
|
* last state found. The search is done deep, recursing until no more match
|
|
* can be found.
|
|
*
|
|
* @param start Starting point of the state transition. Constant.
|
|
* @param key The key string to look for. Constant.
|
|
* @param matched The number of character strings matched. Return on output.
|
|
* @param end The last state found. Return on output.
|
|
* @param penult The penultimate state found.
|
|
*
|
|
* @return Found unicode character sequence output of the last state.
|
|
*/
|
|
static const wchar_t* sm_search(STATE_MACHINE* start, wchar_t* key, int* matched, STATE_MACHINE** penult, STATE_MACHINE** end)
|
|
{
|
|
STATE_MACHINE* sm = sm_search_shallow(start, (char)*key);
|
|
const wchar_t* unicode;
|
|
|
|
/* No match - stop recursion */
|
|
if(!sm) {
|
|
*matched = 0;
|
|
*end = start;
|
|
|
|
return start->output;
|
|
}
|
|
|
|
/* Match - recurse */
|
|
*penult = start;
|
|
unicode = sm_search(sm, key+1, matched, penult, end);
|
|
(*matched)++;
|
|
|
|
return unicode;
|
|
}
|
|
|
|
|
|
/**
|
|
* Sort the state machine's transition keys so it can be binary-searched.
|
|
* The sort is done only at 1 level, and does not recurse deep.
|
|
*/
|
|
static void sm_sort_shallow(STATE_MACHINE* sm)
|
|
{
|
|
qsort(sm->next, sm->next_size, sizeof(SM_WITH_KEY), swk_compare);
|
|
}
|
|
|
|
|
|
/**
|
|
* Add a single sequence-to-unicode path to the state machine.
|
|
*/
|
|
static int sm_add(STATE_MACHINE* sm, char* seq, const wchar_t* unicode, char flag)
|
|
{
|
|
STATE_MACHINE* sm_found = sm_search_shallow(sm, seq[0]);
|
|
|
|
/* Empty sequence */
|
|
if(seq[0] == '\0') {
|
|
if(wcslen(sm->output)) {
|
|
size_t i;
|
|
|
|
fprintf(stderr, "Unicode sequence ");
|
|
for(i = 0; i < wcslen(sm->output); i++) fprintf(stderr, "%04X ", (int)sm->output[i]);
|
|
fprintf(stderr, " already defined, overriding with ");
|
|
for(i = 0; i < wcslen(unicode); i++) fprintf(stderr, "%04X ", (int)unicode[i]);
|
|
fprintf(stderr, "\n");
|
|
}
|
|
wcscpy(sm->output, unicode);
|
|
sm->flag = flag;
|
|
return 0;
|
|
}
|
|
|
|
/* The key doesn't exist yet */
|
|
if(!sm_found) {
|
|
int index = (int)sm->next_size;
|
|
SM_WITH_KEY* next = &sm->next[index];
|
|
|
|
/* Add the key */
|
|
next->key = seq[0];
|
|
next->state = malloc(sizeof(STATE_MACHINE));
|
|
if(!next->state) {
|
|
perror("sm_add");
|
|
return 1;
|
|
}
|
|
sm_init(next->state);
|
|
|
|
/* Increase store for next time, if necessary */
|
|
if(++(sm->next_size) >= sm->next_maxsize) {
|
|
if(sm_dblspace(sm)) {
|
|
fprintf(stderr, "Memory expansion failure\n");
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
sm_found = next->state;
|
|
}
|
|
|
|
/* Recurse */
|
|
sm_add(sm_found, seq+1, unicode, flag);
|
|
|
|
/* Sort the states */
|
|
sm_sort_shallow(sm);
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
|
|
/* ***************************************************************************
|
|
* CHARMAP FUNCTIONS
|
|
*/
|
|
|
|
/**
|
|
* Initialize the character map table.
|
|
*/
|
|
static int charmap_init(CHARMAP* cm)
|
|
{
|
|
int error_code = 0;
|
|
int i = 0;
|
|
|
|
memset(cm, 0, sizeof(CHARMAP));
|
|
|
|
for(i = 0; i < MAX_SECTIONS; i++) {
|
|
error_code += sm_init(&cm->sections[i]);
|
|
}
|
|
|
|
return error_code;
|
|
}
|
|
|
|
|
|
/**
|
|
* Add a character-sequence-to-unicode mapping to the character map.
|
|
*
|
|
* @param cm Character map to which to add the mapping.
|
|
* @param section The section of the character map to add the mapping.
|
|
* @param seq The character sequence to which to add the mapping.
|
|
* @param unicode The unicode of the character sequence.
|
|
* @param flag The flag associated with this state, if any.
|
|
*
|
|
* @return 0 if no error, 1 if error.
|
|
*/
|
|
static int charmap_add(CHARMAP* cm, int section, char* seq, const wchar_t* unicode, char* flag)
|
|
{
|
|
if(section >= MAX_SECTIONS) {
|
|
fprintf(stderr, "Section count exceeded\n");
|
|
return 1;
|
|
}
|
|
|
|
/* For now, we only utilize one-character flags */
|
|
if(strlen(flag) > 1) {
|
|
fprintf(stderr, "%04X: Multi-character flag, truncated.\n", (int)unicode);
|
|
}
|
|
|
|
return sm_add(&cm->sections[section], seq, unicode, flag[0]);
|
|
}
|
|
|
|
|
|
/**
|
|
* Load the character map table from a file.
|
|
*
|
|
* @param cm Character Map to load the table into.
|
|
* @param path The path of the file to load.
|
|
* @return Zero if the file is loaded fine, nonzero otherwise.
|
|
*/
|
|
static int charmap_load(CHARMAP* cm, const char* path)
|
|
{
|
|
FILE* is = NULL;
|
|
int section = 0;
|
|
int error_code = 0;
|
|
|
|
/* Open */
|
|
is = fopen(path, "rt");
|
|
if(!is) {
|
|
perror("path");
|
|
return 1;
|
|
}
|
|
|
|
/* Load */
|
|
while(!feof(is)) {
|
|
wchar_t unicode[MAX_UNICODE_SEQ];
|
|
int ulen = 0;
|
|
|
|
char buf[256];
|
|
char flag[256];
|
|
|
|
int scanned = 0;
|
|
|
|
/* Scan a single token first */
|
|
scanned = fscanf(is, "%255s", buf);
|
|
if(scanned < 0) break;
|
|
if(scanned == 0) {
|
|
fprintf(stderr, "%s: Character map syntax error\n", path);
|
|
return 1;
|
|
}
|
|
|
|
/* Handle the first argument */
|
|
if(strcmp(buf, "section") == 0) { /* Section division */
|
|
section++;
|
|
continue;
|
|
}
|
|
else if(buf[0] == '#') { /* Comment */
|
|
fscanf(is, "%*[^\n]");
|
|
continue;
|
|
}
|
|
else {
|
|
char* bp = buf;
|
|
int u;
|
|
|
|
do {
|
|
if(sscanf(bp, "%x", &u) == 1) { /* Unicode */
|
|
unicode[ulen++] = u;
|
|
}
|
|
else {
|
|
fprintf(stderr, "%s: Syntax error at '%s'\n", path, buf);
|
|
return 1;
|
|
}
|
|
|
|
bp = strchr(bp, ':');
|
|
if(bp) bp++;
|
|
} while(bp && ulen < MAX_UNICODE_SEQ-1);
|
|
unicode[ulen] = L'\0';
|
|
}
|
|
|
|
/* Scan some more */
|
|
scanned = fscanf(is, "%255s\t%255s", buf, flag);
|
|
if(scanned < 0) break;
|
|
|
|
/* Input count checking */
|
|
switch(scanned) {
|
|
case 0: case 1:
|
|
fprintf(stderr, "%s: Character map syntax error\n", path);
|
|
return 1;
|
|
|
|
default:
|
|
if(charmap_add(cm, section, buf, unicode, flag)) {
|
|
size_t i = 0;
|
|
|
|
fwprintf(stderr, L"Unable to add sequence '%ls', unicode ", buf);
|
|
for(i = 0; i < wcslen(unicode); i++) fwprintf(stderr, L"%04X ", (int)unicode[i]);
|
|
fwprintf(stderr, L"in section %d\n", section);
|
|
error_code = 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Close */
|
|
fclose(is);
|
|
|
|
return error_code;
|
|
}
|
|
|
|
|
|
/**
|
|
* Free the resources used by a character map.
|
|
*/
|
|
static void charmap_free(CHARMAP* cm)
|
|
{
|
|
int i;
|
|
|
|
for(i = 0; i < MAX_SECTIONS; i++) {
|
|
sm_free(&cm->sections[i]);
|
|
}
|
|
|
|
memset(cm, 0, sizeof(CHARMAP));
|
|
}
|
|
|
|
|
|
/**
|
|
* Search for a matching character string in the character map.
|
|
*/
|
|
static const wchar_t* charmap_search(CHARMAP* cm, wchar_t* s)
|
|
{
|
|
STATE_MACHINE* start;
|
|
const wchar_t* unicode;
|
|
int section;
|
|
|
|
/* Determine the starting state based on the charmap's active section */
|
|
section = cm->section;
|
|
if(!IN_RANGE(0, section, (int)ARRAYLEN(cm->sections))) section = 0;
|
|
start = &cm->sections[section];
|
|
|
|
cm->match_state = NULL;
|
|
cm->match_state_prev = NULL;
|
|
unicode = sm_search(start, s, &cm->match_count, &cm->match_state_prev, &cm->match_state);
|
|
|
|
/**
|
|
* Determine whether the match is final. A match is considered to be final
|
|
* in two cases: (1)if the last state mached has no exit paths, or (2)if we
|
|
* did not consume all of the search string. (1) is obvious - if there are
|
|
* no more states to transition to, then the unicode we find is the final
|
|
* code. (2) means we reached the final state that can be the only
|
|
* interpretation of the input string, so it must be the final state.
|
|
* If neither of these is true, that means further input from the user
|
|
* may allow us to get to a different state, so we have not reached the
|
|
* final state we possibly can.
|
|
*/
|
|
cm->match_is_final = 0;
|
|
if(cm->match_count < (int)wcslen(s)) {
|
|
cm->match_is_final = 1;
|
|
}
|
|
|
|
/* Statistics */
|
|
cm->match_stats = MATCH_STAT_NONE;
|
|
if(cm->match_state->next_size == 0) {
|
|
cm->match_is_final = 1;
|
|
cm->match_stats |= MATCH_STAT_NOMOSTATES;
|
|
}
|
|
if(cm->match_count == (int)wcslen(s)) {
|
|
cm->match_stats |= MATCH_STAT_NOMOBUF;
|
|
}
|
|
|
|
return unicode;
|
|
}
|
|
|
|
|
|
/* ***************************************************************************
|
|
* LANGUAGE-SPECIFIC IM FUNCTIONS
|
|
*
|
|
* If you want to add a new language support, add the main code to this
|
|
* section. More specifically, do the following:
|
|
*
|
|
* 1) Add im_event_<lang>() function to this section. Use the existing
|
|
* im_event_* functions as models, and feel free to use the state-machine
|
|
* character map engine (CHARMAP struct) but do not feel obligated to
|
|
* do so. The CHARMAP engine exists for the programmer's benefit, to
|
|
* make it easier to support complex languages.
|
|
*
|
|
* 2) Update the im_init() functions so that it initializes im_event_fns[]
|
|
* with a pointer to your im_event_<lang>() function.
|
|
*
|
|
* 3) Create <lang>.im in the "im" directory, if you use the CHARMAP engine.
|
|
* Your code is what loads this file so you should already know to do this
|
|
* step if you have already written a working im_event_<lang>() function
|
|
* that uses CHARMAP, but I explicitly write out this instruction for
|
|
* those trying to figure out the relationship of <lang>.im to this IM
|
|
* framework.
|
|
*
|
|
* 4) Increase MAX_SECTION if your language needs more sections in <lang>.im
|
|
*
|
|
* 5) Increase INITIAL_SMSIZE if your <lang>.im is huginormous and takes too
|
|
* long to load. I can't think of any reason why this would happen unless
|
|
* you're writing a Chinese IM with a significant characters of the
|
|
* language represented, but the code as-is is somewhat lacking when it
|
|
* comes to writing a Chinese IM (need some way to show a dropdown box
|
|
* from the main app - same problem with Korean Hanja and Japanese Kanji
|
|
* inputs, but this isn't meant to be a complex IM framework so I think
|
|
* we're safe for Hanja and Kanji.) Do this with caution because
|
|
* changing INITIAL_SMSIZE will affect the memory consumption of all IM
|
|
* functions.
|
|
*/
|
|
|
|
/**
|
|
* Default C IM event handler.
|
|
*
|
|
* @see im_read
|
|
*/
|
|
static int im_event_c(IM_DATA* im, SDL_keysym ks)
|
|
{
|
|
/* Handle event requests */
|
|
im->s[0] = L'\0';
|
|
if(im->request != IM_REQ_TRANSLATE) return 0;
|
|
|
|
/* Handle key stroke */
|
|
switch(ks.sym) {
|
|
case SDLK_BACKSPACE: im->s[0] = L'\b'; break;
|
|
case SDLK_TAB: im->s[0] = L'\t'; break;
|
|
case SDLK_RETURN: im->s[0] = L'\r'; break;
|
|
default: im->s[0] = ks.unicode;
|
|
}
|
|
im->s[1] = L'\0';
|
|
im->buf[0] = L'\0';
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
/**
|
|
* Japanese IM.
|
|
*
|
|
* @see im_read
|
|
*/
|
|
static int im_event_ja(IM_DATA* im, SDL_keysym ks)
|
|
{
|
|
static const char* lang_file = IMDIR "ja.im";
|
|
enum { SEC_ENGLISH, SEC_HIRAGANA, SEC_KATAKANA, SEC_TOTAL };
|
|
|
|
static CHARMAP cm;
|
|
|
|
|
|
/* Handle event requests */
|
|
switch(im->request) {
|
|
case 0: break;
|
|
|
|
case IM_REQ_FREE: /* Free allocated resources */
|
|
charmap_free(&cm);
|
|
/* go onto full reset */
|
|
|
|
case IM_REQ_RESET_FULL: /* Full reset */
|
|
cm.section = SEC_ENGLISH;
|
|
im->tip_text = im_tip_text[IM_TIP_ENGLISH];
|
|
/* go onto soft reset */
|
|
|
|
case IM_REQ_RESET_SOFT: /* Soft reset */
|
|
im->s[0] = L'\0';
|
|
im->buf[0] = L'\0';
|
|
im->redraw = 0;
|
|
cm.match_count = 0;
|
|
cm.match_is_final = 0;
|
|
cm.match_state = &cm.sections[cm.section];
|
|
cm.match_state_prev = &cm.sections[cm.section];
|
|
break;
|
|
|
|
case IM_REQ_INIT: /* Initialization */
|
|
charmap_init(&cm);
|
|
|
|
if(charmap_load(&cm, lang_file)) {
|
|
fprintf(stderr, "Unable to load %s, defaulting to im_event_c\n", lang_file);
|
|
im->lang = LANG_DEFAULT;
|
|
return im_event_c(im, ks);
|
|
}
|
|
|
|
im_fullreset(im);
|
|
|
|
#ifdef DEBUG
|
|
printf("IM: Loaded '%s'\n", lang_file);
|
|
#endif
|
|
break;
|
|
}
|
|
if(im->request != IM_REQ_TRANSLATE) return 0;
|
|
|
|
|
|
/* Discard redraw characters, so they can be redrawn */
|
|
if((int)wcslen(im->s) < im->redraw) im->redraw = wcslen(im->s);
|
|
wcs_lshift(im->s, (wcslen(im->s) - im->redraw) );
|
|
|
|
|
|
/* Handle keys */
|
|
switch(ks.sym) {
|
|
/* Keys to ignore */
|
|
case SDLK_NUMLOCK: case SDLK_CAPSLOCK: case SDLK_SCROLLOCK:
|
|
case SDLK_LSHIFT: case SDLK_RSHIFT:
|
|
case SDLK_LCTRL: case SDLK_RCTRL:
|
|
case SDLK_LALT:
|
|
case SDLK_LMETA: case SDLK_RMETA:
|
|
case SDLK_LSUPER: case SDLK_RSUPER:
|
|
case SDLK_MODE: case SDLK_COMPOSE:
|
|
break;
|
|
|
|
/* Right-Alt mapped to mode-switch */
|
|
case SDLK_RALT:
|
|
cm.section = (++cm.section % SEC_TOTAL); /* Change section */
|
|
im_softreset(im); /* Soft reset */
|
|
|
|
/* Set tip text */
|
|
switch(cm.section) {
|
|
case SEC_ENGLISH: im->tip_text = im_tip_text[IM_TIP_ENGLISH]; break;
|
|
case SEC_HIRAGANA: im->tip_text = im_tip_text[IM_TIP_HIRAGANA]; break;
|
|
case SEC_KATAKANA: im->tip_text = im_tip_text[IM_TIP_KATAKANA]; break;
|
|
}
|
|
break;
|
|
|
|
/* Enter finalizes previous redraw */
|
|
case SDLK_RETURN:
|
|
if(im->redraw <= 0) {
|
|
im->s[0] = L'\r';
|
|
im->s[1] = L'\0';
|
|
}
|
|
im->buf[0] = L'\0';
|
|
im->redraw = 0;
|
|
break;
|
|
|
|
/* Actual character processing */
|
|
default:
|
|
/* English mode */
|
|
if(cm.section == SEC_ENGLISH) {
|
|
im->s[0] = ks.unicode;
|
|
im->s[1] = L'\0';
|
|
im->buf[0] = L'\0';
|
|
}
|
|
/* Hiragana and Katakana modes */
|
|
else {
|
|
wchar_t u = ks.unicode;
|
|
|
|
im->s[0] = L'\0'; /* Zero-out output string */
|
|
wcsncat(im->buf, &u, 1); /* Copy new character */
|
|
|
|
/* Translate the characters */
|
|
im->redraw = 0;
|
|
while(1) {
|
|
const wchar_t* us = charmap_search(&cm, im->buf);
|
|
#ifdef IM_DEBUG
|
|
wprintf(L" [%8ls] [%8ls] %2d %2d\n", im->s, im->buf, wcslen(im->s), wcslen(im->buf));
|
|
#endif
|
|
|
|
/* Match was found? */
|
|
if(us && wcslen(us)) {
|
|
#ifdef IM_DEBUG
|
|
wprintf(L" 1\n");
|
|
#endif
|
|
|
|
wcscat(im->s, us);
|
|
|
|
/* Final match */
|
|
if(cm.match_is_final) {
|
|
wcs_lshift(im->buf, cm.match_count);
|
|
cm.match_count = 0;
|
|
cm.match_is_final = 0;
|
|
}
|
|
/* May need to be overwritten next time */
|
|
else {
|
|
im->redraw += wcslen(us);
|
|
break;
|
|
}
|
|
}
|
|
/* No match, but more data is in the buffer */
|
|
else if(wcslen(im->buf) > 0) {
|
|
/* If the input character has no state, it's its own state */
|
|
if(cm.match_count == 0) {
|
|
#ifdef IM_DEBUG
|
|
wprintf(L" 2a\n");
|
|
#endif
|
|
wcsncat(im->s, im->buf, 1);
|
|
wcs_lshift(im->buf, 1);
|
|
cm.match_is_final = 0;
|
|
}
|
|
/* If the matched characters didn't consume all, it's own state */
|
|
else if((size_t)cm.match_count != wcslen(im->buf)) {
|
|
#ifdef IM_DEBUG
|
|
wprintf(L" 2b (%2d)\n", cm.match_count);
|
|
#endif
|
|
wcsncat(im->s, im->buf, 1);
|
|
wcs_lshift(im->buf, 1);
|
|
cm.match_is_final = 0;
|
|
}
|
|
/* Otherwise it's just a part of a future input */
|
|
else {
|
|
#ifdef IM_DEBUG
|
|
wprintf(L" 2c (%2d)\n", cm.match_count);
|
|
#endif
|
|
wcscat(im->s, im->buf);
|
|
cm.match_is_final = 0;
|
|
im->redraw += wcslen(im->buf);
|
|
break;
|
|
}
|
|
}
|
|
/* No match and no more data in the buffer */
|
|
else {
|
|
#ifdef IM_DEBUG
|
|
wprintf(L" 3\n");
|
|
#endif
|
|
break;
|
|
}
|
|
|
|
/* Is this the end? */
|
|
if(cm.match_is_final) break;
|
|
}
|
|
}
|
|
}
|
|
|
|
return im->redraw;
|
|
}
|
|
|
|
|
|
/**
|
|
* Korean IM helper function to tell whether a character typed will produce
|
|
* a vowel.
|
|
*
|
|
* @see im_event_ko
|
|
*/
|
|
static int im_event_ko_isvowel(CHARMAP* cm, wchar_t c)
|
|
{
|
|
STATE_MACHINE *start, *next;
|
|
const wchar_t* unicode;
|
|
int section;
|
|
|
|
/* Determine the starting state based on the charmap's active section */
|
|
section = cm->section;
|
|
if(!IN_RANGE(0, section, (int)ARRAYLEN(cm->sections))) section = 0;
|
|
start = &cm->sections[section];
|
|
|
|
next = sm_search_shallow(start, (char)c);
|
|
unicode = next ? next->output : NULL;
|
|
|
|
return (unicode && wcslen(unicode) == 1 && 0x314F <= unicode[0] && unicode[0] <= 0x3163);
|
|
}
|
|
|
|
|
|
/**
|
|
* Korean IM.
|
|
*
|
|
* @see im_read
|
|
*/
|
|
static int im_event_ko(IM_DATA* im, SDL_keysym ks)
|
|
{
|
|
static const char* lang_file = IMDIR "ko.im";
|
|
enum { SEC_ENGLISH, SEC_HANGUL, SEC_TOTAL };
|
|
|
|
static CHARMAP cm;
|
|
|
|
|
|
/* Handle event requests */
|
|
switch(im->request) {
|
|
case 0: break;
|
|
|
|
case IM_REQ_FREE: /* Free allocated resources */
|
|
charmap_free(&cm);
|
|
/* go onto full reset */
|
|
|
|
case IM_REQ_RESET_FULL: /* Full reset */
|
|
cm.section = SEC_ENGLISH;
|
|
im->tip_text = im_tip_text[IM_TIP_ENGLISH];
|
|
/* go onto soft reset */
|
|
|
|
case IM_REQ_RESET_SOFT: /* Soft reset */
|
|
im->s[0] = L'\0';
|
|
im->buf[0] = L'\0';
|
|
im->redraw = 0;
|
|
cm.match_count = 0;
|
|
cm.match_is_final = 0;
|
|
cm.match_state = &cm.sections[cm.section];
|
|
cm.match_state_prev = &cm.sections[cm.section];
|
|
break;
|
|
|
|
case IM_REQ_INIT: /* Initialization */
|
|
charmap_init(&cm);
|
|
|
|
if(charmap_load(&cm, lang_file)) {
|
|
fprintf(stderr, "Unable to load %s, defaulting to im_event_c\n", lang_file);
|
|
im->lang = LANG_DEFAULT;
|
|
return im_event_c(im, ks);
|
|
}
|
|
|
|
im_fullreset(im);
|
|
|
|
#ifdef DEBUG
|
|
printf("IM: Loaded '%s'\n", lang_file);
|
|
#endif
|
|
break;
|
|
}
|
|
if(im->request != IM_REQ_TRANSLATE) return 0;
|
|
|
|
|
|
/* Discard redraw characters, so they can be redrawn */
|
|
if((int)wcslen(im->s) < im->redraw) im->redraw = wcslen(im->s);
|
|
wcs_lshift(im->s, (wcslen(im->s) - im->redraw) );
|
|
|
|
|
|
/* Handle keys */
|
|
switch(ks.sym) {
|
|
/* Keys to ignore */
|
|
case SDLK_NUMLOCK: case SDLK_CAPSLOCK: case SDLK_SCROLLOCK:
|
|
case SDLK_LSHIFT: case SDLK_RSHIFT:
|
|
case SDLK_LCTRL: case SDLK_RCTRL:
|
|
case SDLK_LMETA: case SDLK_RMETA:
|
|
case SDLK_LSUPER: case SDLK_RSUPER:
|
|
case SDLK_MODE: case SDLK_COMPOSE:
|
|
break;
|
|
|
|
/* Right-Alt mapped to mode-switch */
|
|
case SDLK_LALT: case SDLK_RALT:
|
|
cm.section = (++cm.section % SEC_TOTAL); /* Change section */
|
|
im_softreset(im); /* Soft reset */
|
|
|
|
/* Set tip text */
|
|
switch(cm.section) {
|
|
case SEC_ENGLISH: im->tip_text = im_tip_text[IM_TIP_ENGLISH]; break;
|
|
case SEC_HANGUL: im->tip_text = im_tip_text[IM_TIP_HANGUL]; break;
|
|
}
|
|
break;
|
|
|
|
/* Backspace removes only a single buffered character */
|
|
case SDLK_BACKSPACE:
|
|
/* Delete one buffered character */
|
|
if(wcslen(im->buf) > 0) {
|
|
wcs_pull(im->buf, 1);
|
|
if(im->redraw > 0) im->redraw--;
|
|
ks.unicode = L'\0';
|
|
}
|
|
/* continue processing: */
|
|
|
|
/* Actual character processing */
|
|
default:
|
|
/* English mode */
|
|
if(cm.section == SEC_ENGLISH) {
|
|
im->s[0] = ks.unicode;
|
|
im->s[1] = L'\0';
|
|
im->buf[0] = L'\0';
|
|
}
|
|
/* Hangul mode */
|
|
else {
|
|
wchar_t u = ks.unicode;
|
|
wchar_t* bp = im->buf;
|
|
|
|
im->s[0] = L'\0'; /* Zero-out output string */
|
|
wcsncat(bp, &u, 1); /* Copy new character */
|
|
|
|
/* Translate the characters */
|
|
im->redraw = 0;
|
|
while(1) {
|
|
const wchar_t* us = charmap_search(&cm, bp);
|
|
#ifdef IM_DEBUG
|
|
wprintf(L" [%8ls] [%8ls] %2d %2d\n", im->s, im->buf, wcslen(im->s), wcslen(im->buf));
|
|
#endif
|
|
|
|
/* Match was found? */
|
|
if(us && wcslen(us)) {
|
|
/* Final match */
|
|
if(cm.match_is_final) {
|
|
/* Batchim may carry over to the next character */
|
|
if(cm.match_state->flag == 'b') {
|
|
wchar_t next_char = bp[cm.match_count];
|
|
|
|
/* If there is no more buffer, output it */
|
|
if(cm.match_stats & MATCH_STAT_NOMOBUF) {
|
|
#ifdef IM_DEBUG
|
|
wprintf(L" 1a\n");
|
|
#endif
|
|
|
|
wcscat(im->s, us); /* Output */
|
|
im->redraw += wcslen(us); /* May need to re-eval next time */
|
|
bp += cm.match_count; /* Keep buffer data for re-eval*/
|
|
cm.match_count = 0;
|
|
cm.match_is_final = 0;
|
|
}
|
|
/* If there is buffer data but it's not vowel, finalize it */
|
|
else if(!im_event_ko_isvowel(&cm, next_char)) {
|
|
#ifdef IM_DEBUG
|
|
wprintf(L" 1b\n");
|
|
#endif
|
|
|
|
wcscat(im->s, us); /* Output */
|
|
wcs_lshift(bp, cm.match_count);
|
|
cm.match_count = 0;
|
|
cm.match_is_final = 0;
|
|
}
|
|
/* If there is buffer and it's vowel, re-eval */
|
|
else {
|
|
#ifdef IM_DEBUG
|
|
wprintf(L" 1c\n");
|
|
#endif
|
|
|
|
us = cm.match_state_prev->output;
|
|
wcscat(im->s, us); /* Output */
|
|
cm.match_count--; /* Matched all but one */
|
|
cm.match_is_final = 0;
|
|
wcs_lshift(bp, cm.match_count);
|
|
}
|
|
}
|
|
/* No batchim - this is final */
|
|
else {
|
|
#ifdef IM_DEBUG
|
|
wprintf(L" 1d\n");
|
|
#endif
|
|
|
|
wcscat(im->s, us);
|
|
wcs_lshift(bp, cm.match_count);
|
|
cm.match_count = 0;
|
|
cm.match_is_final = 0;
|
|
}
|
|
}
|
|
/* May need to be overwritten next time */
|
|
else {
|
|
#ifdef IM_DEBUG
|
|
wprintf(L" 1e\n");
|
|
#endif
|
|
|
|
wcscat(im->s, us);
|
|
im->redraw += wcslen(us);
|
|
break;
|
|
}
|
|
}
|
|
/* No match, but more data is in the buffer */
|
|
else if(wcslen(bp) > 0) {
|
|
/* If the input character has no state, it's its own state */
|
|
if(cm.match_count == 0) {
|
|
#ifdef IM_DEBUG
|
|
wprintf(L" 2a\n");
|
|
#endif
|
|
wcsncat(im->s, bp, 1);
|
|
wcs_lshift(bp, 1);
|
|
cm.match_is_final = 0;
|
|
}
|
|
/* If the matched characters didn't consume all, it's own state */
|
|
else if((size_t)cm.match_count != wcslen(bp)) {
|
|
#ifdef IM_DEBUG
|
|
wprintf(L" 2b (%2d)\n", cm.match_count);
|
|
#endif
|
|
wcsncat(im->s, bp, 1);
|
|
wcs_lshift(bp, 1);
|
|
cm.match_is_final = 0;
|
|
}
|
|
/* Otherwise it's just a part of a future input */
|
|
else {
|
|
#ifdef IM_DEBUG
|
|
wprintf(L" 2c (%2d)\n", cm.match_count);
|
|
#endif
|
|
wcscat(im->s, bp);
|
|
cm.match_is_final = 0;
|
|
im->redraw += wcslen(bp);
|
|
break;
|
|
}
|
|
}
|
|
/* No match and no more data in the buffer */
|
|
else {
|
|
#ifdef IM_DEBUG
|
|
wprintf(L" 3\n");
|
|
#endif
|
|
break;
|
|
}
|
|
|
|
/* Is this the end? */
|
|
if(cm.match_is_final) break;
|
|
}
|
|
}
|
|
}
|
|
|
|
return im->redraw;
|
|
}
|
|
|
|
|
|
/* ***************************************************************************
|
|
* OTHER STATIC IM FUNCTIONS
|
|
*/
|
|
|
|
/**
|
|
* Generic event handler that calls the appropriate language handler.
|
|
* im->request should have the event ID.
|
|
*/
|
|
static void im_event(IM_DATA* im)
|
|
{
|
|
SDL_keysym ks;
|
|
|
|
ks.sym = 0;
|
|
ks.unicode = 0;
|
|
|
|
im_read(im, ks);
|
|
}
|
|
|
|
|
|
/**
|
|
* Make an event request and call the event handler.
|
|
*/
|
|
static void im_request(IM_DATA* im, int request)
|
|
{
|
|
im->request = request;
|
|
im_event(im);
|
|
im->request = IM_REQ_TRANSLATE;
|
|
}
|
|
|
|
|
|
/* ***************************************************************************
|
|
* PUBLIC IM FUNCTIONS
|
|
*/
|
|
|
|
/**
|
|
* Initialize the IM_DATA structure.
|
|
*
|
|
* @param im IM_DATA structure to initialize.
|
|
* @param lang LANG_* defined constant to initialize the structure with.
|
|
*/
|
|
void im_init(IM_DATA* im, int lang)
|
|
{
|
|
/* Free already allocated resources if initialized before */
|
|
if(im_initialized) {
|
|
im_free(im);
|
|
}
|
|
|
|
/* Initialize */
|
|
memset(im, 0, sizeof(IM_DATA));
|
|
im->lang = lang;
|
|
|
|
/* Setup static globals */
|
|
if(!im_initialized) {
|
|
/* ADD NEW LANGUAGE SUPPORT HERE */
|
|
im_event_fns[LANG_JA] = &im_event_ja;
|
|
im_event_fns[LANG_KO] = &im_event_ko;
|
|
|
|
im_initialized = 1;
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
assert(0 <= im->lang && im->lang < NUM_LANGS);
|
|
if(im_event_fp) printf("Initializing IM for %s...\n", lang_prefixes[im->lang]);
|
|
#endif
|
|
|
|
/* Initialize the individual IM */
|
|
im_request(im, IM_REQ_INIT);
|
|
}
|
|
|
|
|
|
void im_softreset(IM_DATA* im)
|
|
{
|
|
im->s[0] = L'\0';
|
|
im->buf[0] = L'\0';
|
|
|
|
im_request(im, IM_REQ_RESET_SOFT);
|
|
}
|
|
|
|
|
|
void im_fullreset(IM_DATA* im)
|
|
{
|
|
im->s[0] = L'\0';
|
|
im->buf[0] = L'\0';
|
|
|
|
im_request(im, IM_REQ_RESET_FULL);
|
|
}
|
|
|
|
|
|
/**
|
|
* Free any allocated resources.
|
|
*/
|
|
void im_free(IM_DATA* im)
|
|
{
|
|
im_request(im, IM_REQ_FREE);
|
|
}
|
|
|
|
|
|
/**
|
|
* IM-process a character. This function simply looks up the language from
|
|
* IM and calls the appropriate im_event_<lang>() language-specific IM event
|
|
* handler. im_event_c() is called by default if no language-specific
|
|
* function is specified for the specified language.
|
|
*
|
|
* @param im IM-processed data to return to the caller function.
|
|
* @param ks SDL_keysym typed on the keyboard.
|
|
*
|
|
* @return The number of characters in im->s that should not be committed.
|
|
* In other words, the returned number of characters at the end of
|
|
* im->s should be overwritten the next time im_read is called.
|
|
*
|
|
* @see im_event_c()
|
|
* @see im_event_fns
|
|
*/
|
|
int im_read(IM_DATA* im, SDL_keysym ks)
|
|
{
|
|
IM_EVENT_FN im_event_fp = NULL;
|
|
int redraw = 0;
|
|
|
|
/* Sanity check */
|
|
if(im->lang < 0 || im->lang >= NUM_LANGS) {
|
|
fprintf(stderr, "im->lang out of range (%d), using default\n", im->lang);
|
|
im->lang = LANG_DEFAULT;
|
|
}
|
|
|
|
/* Function pointer to the language-specific im_event_* function */
|
|
im_event_fp = im_event_fns[im->lang];
|
|
|
|
/* Run the language-specific IM or run the default C IM */
|
|
if(im_event_fp) redraw = (*im_event_fp)(im, ks);
|
|
else redraw = im_event_c(im, ks);
|
|
|
|
#ifdef IM_DEBUG
|
|
wprintf(L"* [%8ls] [%8ls] %2d %2d (%2d)\n", im->s, im->buf, wcslen(im->s), wcslen(im->buf), im->redraw);
|
|
#endif
|
|
|
|
return redraw;
|
|
}
|
|
|
|
|
|
/* vim:ts=2:et
|
|
*/
|