1 /* 2 * romaji.c - ローマ字変換 3 * 4 * Written By: MURAOKA Taro <koron@tka.att.ne.jp> 5 */ 6 module migemo_d.romaji; 7 8 9 private static import core.memory; 10 private static import core.stdc.ctype; 11 private static import core.stdc.stdio; 12 private static import core.stdc.string; 13 private static import migemo_d.charset; 14 private static import migemo_d.wordbuf; 15 16 public alias romaji_proc_char2int = extern (C) nothrow @nogc int function(const (char)*, uint*); 17 public alias ROMAJI_PROC_CHAR2INT = .romaji_proc_char2int; 18 19 enum ROMAJI_FIXKEY_N = 'n'; 20 enum ROMAJI_FIXKEY_XN = "xn"; 21 enum ROMAJI_FIXKEY_XTU = "xtu"; 22 enum ROMAJI_FIXKEY_NONXTU = "aiueon"; 23 24 /* 25 * romanode interfaces 26 */ 27 28 struct _romanode 29 { 30 char key; 31 char* value; 32 romanode* next; 33 romanode* child; 34 } 35 36 alias romanode = ._romanode; 37 38 int n_romanode_new = 0; 39 int n_romanode_delete = 0; 40 41 pragma(inline, true) 42 nothrow @nogc 43 package .romanode* romanode_new() 44 45 do 46 { 47 ++.n_romanode_new; 48 49 return cast(.romanode*)(core.memory.pureCalloc(1, .romanode.sizeof)); 50 } 51 52 nothrow @nogc 53 package void romanode_delete(.romanode* node) 54 55 do 56 { 57 while (node != null) { 58 .romanode* child = node.child; 59 60 if (node.next != null) { 61 .romanode_delete(node.next); 62 node.next = null; 63 } 64 65 assert(node.value != null); 66 core.memory.pureFree(node.value); 67 core.memory.pureFree(node); 68 node = child; 69 ++.n_romanode_delete; 70 } 71 } 72 73 nothrow @nogc 74 package .romanode** romanode_dig(.romanode** ref_node, const (char)* key) 75 76 do 77 { 78 if ((ref_node == null) || (key == null) || (key[0] == '\0')) { 79 return null; 80 } 81 82 while (true) { 83 if (!*ref_node) { 84 *ref_node = .romanode_new(); 85 86 if (*ref_node == null) { 87 return null; 88 } 89 90 (*ref_node).key = *key; 91 } 92 93 if ((*ref_node).key == *key) { 94 (*ref_node).value = null; 95 96 if (!*++key) { 97 break; 98 } 99 100 ref_node = &(*ref_node).child; 101 } else { 102 ref_node = &(*ref_node).next; 103 } 104 } 105 106 if ((*ref_node).child) { 107 .romanode_delete((*ref_node).child); 108 (*ref_node).child = null; 109 } 110 111 return ref_node; 112 } 113 114 /** 115 * キーに対応したromanodeを検索して返す。 116 * 117 * Params: 118 * node = ルートノード 119 * key = 検索キー 120 * skip = 進めるべきkeyのバイト数を受け取るポインタ 121 * 122 * Returns: romanodeが見つからなかった場合NULL 123 */ 124 nothrow @nogc 125 package .romanode* romanode_query(.romanode* node, const (char)* key, int* skip, .ROMAJI_PROC_CHAR2INT char2int) 126 127 do 128 { 129 int nskip = 0; 130 const (char)* key_start = key; 131 132 // core.stdc.stdio.printf("romanode_query: key=%s skip=%p char2int=%p\n", key, skip, char2int); 133 if ((node != null) && (key != null) && (*key)) { 134 while (true) { 135 if (*key != node.key) { 136 node = node.next; 137 } else { 138 ++nskip; 139 140 if (node.value != null) { 141 // core.stdc.stdio.printf(" HERE 1\n"); 142 143 break; 144 } 145 146 if (!*++key) { 147 nskip = 0; 148 // core.stdc.stdio.printf(" HERE 2\n"); 149 150 break; 151 } 152 153 node = node.child; 154 } 155 156 /* 次に走査するノードが空の場合、キーを進めてNULLを返す */ 157 if (node == null) { 158 /* 1バイトではなく1文字進める */ 159 if ((char2int == null) || ((nskip = (*char2int)(key_start, null)) < 1)) { 160 nskip = 1; 161 } 162 163 // core.stdc.stdio.printf(" HERE 3: nskip=%d\n", nskip); 164 165 break; 166 } 167 } 168 } 169 170 if (skip != null) { 171 *skip = nskip; 172 } 173 174 return node; 175 } 176 177 version (none) { 178 nothrow @nogc 179 package void romanode_print_stub(.romanode* node, char* p) 180 181 in 182 { 183 assert(node != null); 184 } 185 186 do 187 { 188 static char[256] buf; 189 190 if (p == null) { 191 p = &(buf[0]); 192 } 193 194 p[0] = node.key; 195 p[1] = '\0'; 196 197 if (node.value != null) { 198 core.stdc.stdio.printf("%s=%s\n", &(buf[0]), node.value); 199 } 200 201 if (node.child != null) { 202 .romanode_print_stub(node.child, p + 1); 203 } 204 205 if (node.next != null) { 206 .romanode_print_stub(node.next, p); 207 } 208 } 209 210 nothrow @nogc 211 package void romanode_print(.romanode* node) 212 213 do 214 { 215 if (node == null) { 216 return; 217 } 218 219 .romanode_print_stub(node, null); 220 } 221 } 222 223 /** 224 * romaji interface 225 */ 226 extern (C) 227 struct _romaji 228 { 229 int verbose; 230 .romanode* node; 231 char* fixvalue_xn; 232 char* fixvalue_xtu; 233 .ROMAJI_PROC_CHAR2INT char2int; 234 } 235 236 public alias romaji = ._romaji; 237 238 nothrow @nogc 239 package char* strdup_lower(const (char)* string_) 240 241 in 242 { 243 assert(string_ != null); 244 } 245 246 do 247 { 248 char* out_ = core.stdc..string.strdup(string_); 249 250 if (out_ != null) { 251 for (char* tmp = out_; *tmp != '\0'; ++tmp) { 252 *tmp = cast(char)(core.stdc.ctype.tolower(*tmp)); 253 } 254 } 255 256 return out_; 257 } 258 259 extern (C) 260 pure nothrow @trusted @nogc 261 public .romaji* romaji_open() 262 263 do 264 { 265 return cast(.romaji*)(core.memory.pureCalloc(1, .romaji.sizeof)); 266 } 267 268 extern (C) 269 nothrow @nogc 270 public void romaji_close(.romaji* object) 271 272 do 273 { 274 if (object != null) { 275 if (object.node != null) { 276 .romanode_delete(object.node); 277 object.node = null; 278 } 279 280 if (object.fixvalue_xn != null) { 281 core.memory.pureFree(object.fixvalue_xn); 282 object.fixvalue_xn = null; 283 } 284 285 if (object.fixvalue_xtu != null) { 286 core.memory.pureFree(object.fixvalue_xtu); 287 object.fixvalue_xtu = null; 288 } 289 290 core.memory.pureFree(object); 291 } 292 } 293 294 extern (C) 295 nothrow @nogc 296 public int romaji_add_table(.romaji* object, const (char)* key, const (char)* value) 297 298 do 299 { 300 if ((object == null) || (key == null) || (value == null)) { 301 /* Unexpected error */ 302 return 1; 303 } 304 305 size_t value_length = core.stdc..string.strlen(value); 306 307 if (value_length == 0) { 308 /* Too short value string */ 309 return 2; 310 } 311 312 .romanode** ref_node = .romanode_dig(&object.node, key); 313 314 if (ref_node == null) { 315 /* Memory exhausted */ 316 return 4; 317 } 318 319 debug { 320 if (object.verbose >= 10) { 321 core.stdc.stdio.printf("romaji_add_table(\"%s\", \"%s\")\n", key, value); 322 } 323 } 324 325 (*ref_node).value = core.stdc..string.strdup(value); 326 327 /* 「ん」と「っ」は保存しておく */ 328 if ((object.fixvalue_xn == null) && (value_length > 0) && (!core.stdc..string.strcmp(key, .ROMAJI_FIXKEY_XN))) { 329 /*core.stdc.stdio.fprintf(core.stdc.stdio.stderr, "XN: key=%s, value=%s\n", key, value);*/ 330 object.fixvalue_xn = core.stdc..string.strdup(value); 331 } 332 333 if ((object.fixvalue_xtu == null) && (value_length > 0) && (!core.stdc..string.strcmp(key, .ROMAJI_FIXKEY_XTU))) { 334 /*core.stdc.stdio.fprintf(core.stdc.stdio.stderr, "XTU: key=%s, value=%s\n", key, value);*/ 335 object.fixvalue_xtu = core.stdc..string.strdup(value); 336 } 337 338 return 0; 339 } 340 341 nothrow @nogc 342 int romaji_load_stub(.romaji* object, core.stdc.stdio.FILE* fp) 343 344 do 345 { 346 migemo_d.wordbuf.wordbuf_p buf_key = migemo_d.wordbuf.wordbuf_open(); 347 migemo_d.wordbuf.wordbuf_p buf_value = migemo_d.wordbuf.wordbuf_open(); 348 349 scope (exit) { 350 if (buf_key != null) { 351 migemo_d.wordbuf.wordbuf_close(buf_key); 352 buf_key = null; 353 } 354 355 if (buf_value != null) { 356 migemo_d.wordbuf.wordbuf_close(buf_value); 357 buf_value = null; 358 } 359 } 360 361 if ((buf_key == null) || (buf_value == null)) { 362 363 return -1; 364 } 365 366 int mode = 0; 367 int ch; 368 369 do { 370 ch = core.stdc.stdio.fgetc(fp); 371 372 switch (mode) { 373 case 0: 374 /* key待ちモード */ 375 if (ch == '#') { 376 /* 1文字先読みして空白ならばkeyとして扱う */ 377 ch = core.stdc.stdio.fgetc(fp); 378 379 if (ch != '#') { 380 core.stdc.stdio.ungetc(ch, fp); 381 382 /* 行末まで読み飛ばしモード へ移行 */ 383 mode = 1; 384 385 break; 386 } 387 } 388 389 if ((ch != core.stdc.stdio.EOF) && (!core.stdc.ctype.isspace(ch))) { 390 migemo_d.wordbuf.wordbuf_reset(buf_key); 391 migemo_d.wordbuf.wordbuf_add(buf_key, cast(char)(ch)); 392 393 /* key読み込みモード へ移行 */ 394 mode = 2; 395 } 396 397 break; 398 399 case 1: 400 /* 行末まで読み飛ばしモード */ 401 if (ch == '\n') { 402 /* key待ちモード へ移行 */ 403 mode = 0; 404 } 405 406 break; 407 408 case 2: 409 /* key読み込みモード */ 410 if (!core.stdc.ctype.isspace(ch)) { 411 migemo_d.wordbuf.wordbuf_add(buf_key, cast(char)(ch)); 412 } else { 413 /* value待ちモード へ移行 */ 414 mode = 3; 415 } 416 417 break; 418 419 case 3: 420 /* value待ちモード */ 421 if ((ch != core.stdc.stdio.EOF) && (!core.stdc.ctype.isspace(ch))) { 422 migemo_d.wordbuf.wordbuf_reset(buf_value); 423 migemo_d.wordbuf.wordbuf_add(buf_value, cast(char)(ch)); 424 425 /* value読み込みモード へ移行 */ 426 mode = 4; 427 } 428 429 break; 430 431 case 4: 432 /* value読み込みモード */ 433 if ((ch != core.stdc.stdio.EOF) && (!core.stdc.ctype.isspace(ch))) { 434 migemo_d.wordbuf.wordbuf_add(buf_value, cast(char)(ch)); 435 } else { 436 char* key = migemo_d.wordbuf.WORDBUF_GET(buf_key); 437 char* value = migemo_d.wordbuf.WORDBUF_GET(buf_value); 438 .romaji_add_table(object, key, value); 439 mode = 0; 440 } 441 442 break; 443 444 default: 445 break; 446 } 447 } while (ch != core.stdc.stdio.EOF); 448 449 return 0; 450 } 451 452 /** 453 * ローマ字辞書を読み込む。 454 * 455 * Params: 456 * object = ローマ字オブジェクト 457 * filename = 辞書ファイル名 458 * 459 * Returns: 成功した場合0、失敗した場合は非0を返す。 460 */ 461 extern (C) 462 nothrow @nogc 463 public int romaji_load(.romaji* object, const (char)* filename) 464 465 do 466 { 467 if ((object == null) || (filename == null)) { 468 return -1; 469 } 470 471 version (all) { 472 int charset = migemo_d.charset.charset_detect_file(filename); 473 migemo_d.charset.charset_getproc(charset,&object.char2int, null); 474 } 475 476 core.stdc.stdio.FILE* fp = core.stdc.stdio.fopen(filename, "rt"); 477 478 scope (exit) { 479 if (fp != null) { 480 core.stdc.stdio.fclose(fp); 481 fp = null; 482 } 483 } 484 485 if (fp != null) { 486 int result = .romaji_load_stub(object, fp); 487 488 return result; 489 } else { 490 return -1; 491 } 492 } 493 494 extern (C) 495 nothrow @nogc 496 public char* romaji_convert2(.romaji* object, const (char)* string_, char** ppstop, int ignorecase) 497 498 do 499 { 500 /* Argument "ppstop" receive conversion stoped position. */ 501 migemo_d.wordbuf.wordbuf_p buf = null; 502 char* lower = null; 503 char* answer = null; 504 const (char)* input = string_; 505 int stop = -1; 506 507 if (ignorecase) { 508 lower = .strdup_lower(string_); 509 input = lower; 510 } 511 512 scope (exit) { 513 if (lower != null) { 514 core.memory.pureFree(lower); 515 lower = null; 516 } 517 518 if (buf != null) { 519 migemo_d.wordbuf.wordbuf_close(buf); 520 buf = null; 521 } 522 } 523 524 if ((object != null) && (string_ != null) && (input != null)) { 525 buf = migemo_d.wordbuf.wordbuf_open(); 526 527 if (buf != null) { 528 int skip; 529 530 for (int i = 0; string_[i];) { 531 /* 「っ」の判定 */ 532 if ((object.fixvalue_xtu != null) && (input[i] == input[i + 1]) && (core.stdc..string.strchr(.ROMAJI_FIXKEY_NONXTU, input[i]) == null)) { 533 ++i; 534 migemo_d.wordbuf.wordbuf_cat(buf, object.fixvalue_xtu); 535 536 continue; 537 } 538 539 .romanode* node = .romanode_query(object.node, &input[i], &skip, object.char2int); 540 541 debug { 542 if (object.verbose >= 1) { 543 core.stdc.stdio.printf("key=%s value=%s skip=%d\n", &input[i], (node != null) ? cast(char*)(node.value) : (&("null\0"[0])), skip); 544 } 545 } 546 547 if (skip == 0) { 548 if (string_[i]) { 549 stop = migemo_d.wordbuf.WORDBUF_LEN(buf); 550 migemo_d.wordbuf.wordbuf_cat(buf, &string_[i]); 551 } 552 553 break; 554 } else if (node == null) { 555 /* 「n(子音)」を「ん(子音)」に変換 */ 556 if ((skip == 1) && (input[i] == .ROMAJI_FIXKEY_N) && (object.fixvalue_xn != null)) { 557 ++i; 558 migemo_d.wordbuf.wordbuf_cat(buf, object.fixvalue_xn); 559 } else 560 while (skip--) { 561 migemo_d.wordbuf.wordbuf_add(buf, string_[i++]); 562 } 563 } else { 564 i += skip; 565 migemo_d.wordbuf.wordbuf_cat(buf, node.value); 566 } 567 } 568 569 answer = core.stdc..string.strdup(migemo_d.wordbuf.WORDBUF_GET(buf)); 570 } 571 } 572 573 if (ppstop != null) { 574 *ppstop = ((stop >= 0)) ? (answer + stop) : (null); 575 } 576 577 return answer; 578 } 579 580 extern (C) 581 nothrow @nogc 582 public char* romaji_convert(.romaji* object, const (char)* string_, char** ppstop) 583 584 do 585 { 586 return .romaji_convert2(object, string_, ppstop, 1); 587 } 588 589 extern (C) 590 pure nothrow @nogc 591 public void romaji_release(.romaji* object, char* string_) 592 593 do 594 { 595 if (string_ != null) { 596 core.memory.pureFree(string_); 597 } 598 } 599 600 extern (C) 601 pure nothrow @nogc 602 public void romaji_setproc_char2int(.romaji* object, .ROMAJI_PROC_CHAR2INT proc) 603 604 do 605 { 606 if (object != null) { 607 object.char2int = proc; 608 } 609 } 610 611 extern (C) 612 pure nothrow @nogc 613 public void romaji_set_verbose(.romaji* object, int level) 614 615 do 616 { 617 if (object != null) { 618 object.verbose = level; 619 } 620 }