1 /*
2  * romaji.c - ローマ字変換
3  *
4  * Written By:  MURAOKA Taro <koron@tka.att.ne.jp>
5  */
6 module migemo_d.romaji;
7 
8 
9 private static import core.memory;
10 private static import core.stdc.ctype;
11 private static import core.stdc.stdio;
12 private static import core.stdc.string;
13 private static import migemo_d.charset;
14 private static import migemo_d.wordbuf;
15 
16 public alias romaji_proc_char2int = extern (C) nothrow @nogc int function(const (char)*, uint*);
17 public alias ROMAJI_PROC_CHAR2INT = .romaji_proc_char2int;
18 
19 enum ROMAJI_FIXKEY_N = 'n';
20 enum ROMAJI_FIXKEY_XN = "xn";
21 enum ROMAJI_FIXKEY_XTU = "xtu";
22 enum ROMAJI_FIXKEY_NONXTU = "aiueon";
23 
24 /*
25  * romanode interfaces
26  */
27 
28 struct _romanode
29 {
30 	char key;
31 	char* value;
32 	romanode* next;
33 	romanode* child;
34 }
35 
36 alias romanode = ._romanode;
37 
38 int n_romanode_new = 0;
39 int n_romanode_delete = 0;
40 
41 pragma(inline, true)
42 nothrow @nogc
43 package .romanode* romanode_new()
44 
45 	do
46 	{
47 		++.n_romanode_new;
48 
49 		return cast(.romanode*)(core.memory.pureCalloc(1, .romanode.sizeof));
50 	}
51 
52 nothrow @nogc
53 package void romanode_delete(.romanode* node)
54 
55 	do
56 	{
57 		while (node != null) {
58 			.romanode* child = node.child;
59 
60 			if (node.next != null) {
61 				.romanode_delete(node.next);
62 				node.next = null;
63 			}
64 
65 			assert(node.value != null);
66 			core.memory.pureFree(node.value);
67 			core.memory.pureFree(node);
68 			node = child;
69 			++.n_romanode_delete;
70 		}
71 	}
72 
73 nothrow @nogc
74 package .romanode** romanode_dig(.romanode** ref_node, const (char)* key)
75 
76 	do
77 	{
78 		if ((ref_node == null) || (key == null) || (key[0] == '\0')) {
79 			return null;
80 		}
81 
82 		while (true) {
83 			if (!*ref_node) {
84 				*ref_node = .romanode_new();
85 
86 				if (*ref_node == null) {
87 					return null;
88 				}
89 
90 				(*ref_node).key = *key;
91 			}
92 
93 			if ((*ref_node).key == *key) {
94 				(*ref_node).value = null;
95 
96 				if (!*++key) {
97 					break;
98 				}
99 
100 				ref_node = &(*ref_node).child;
101 			} else {
102 				ref_node = &(*ref_node).next;
103 			}
104 		}
105 
106 		if ((*ref_node).child) {
107 			.romanode_delete((*ref_node).child);
108 			(*ref_node).child = null;
109 		}
110 
111 		return ref_node;
112 	}
113 
114 /**
115  * キーに対応したromanodeを検索して返す。
116  *
117  * Params:
118  *      node = ルートノード
119  *      key = 検索キー
120  *      skip = 進めるべきkeyのバイト数を受け取るポインタ
121  *
122  * Returns: romanodeが見つからなかった場合NULL
123  */
124 nothrow @nogc
125 package .romanode* romanode_query(.romanode* node, const (char)* key, int* skip, .ROMAJI_PROC_CHAR2INT char2int)
126 
127 	do
128 	{
129 		int nskip = 0;
130 		const (char)* key_start = key;
131 
132 		// core.stdc.stdio.printf("romanode_query: key=%s skip=%p char2int=%p\n", key, skip, char2int);
133 		if ((node != null) && (key != null) && (*key)) {
134 			while (true) {
135 				if (*key != node.key) {
136 					node = node.next;
137 				} else {
138 					++nskip;
139 
140 					if (node.value != null) {
141 						// core.stdc.stdio.printf("  HERE 1\n");
142 
143 						break;
144 					}
145 
146 					if (!*++key) {
147 						nskip = 0;
148 						// core.stdc.stdio.printf("  HERE 2\n");
149 
150 						break;
151 					}
152 
153 					node = node.child;
154 				}
155 
156 				/* 次に走査するノードが空の場合、キーを進めてNULLを返す */
157 				if (node == null) {
158 					/* 1バイトではなく1文字進める */
159 					if ((char2int == null) || ((nskip = (*char2int)(key_start, null)) < 1)) {
160 						nskip = 1;
161 					}
162 
163 					// core.stdc.stdio.printf("  HERE 3: nskip=%d\n", nskip);
164 
165 					break;
166 				}
167 			}
168 		}
169 
170 		if (skip != null) {
171 			*skip = nskip;
172 		}
173 
174 		return node;
175 	}
176 
177 version (none) {
178 	nothrow @nogc
179 	package void romanode_print_stub(.romanode* node, char* p)
180 
181 		in
182 		{
183 			assert(node != null);
184 		}
185 
186 		do
187 		{
188 			static char[256] buf;
189 
190 			if (p == null) {
191 				p = &(buf[0]);
192 			}
193 
194 			p[0] = node.key;
195 			p[1] = '\0';
196 
197 			if (node.value != null) {
198 				core.stdc.stdio.printf("%s=%s\n", &(buf[0]), node.value);
199 			}
200 
201 			if (node.child != null) {
202 				.romanode_print_stub(node.child, p + 1);
203 			}
204 
205 			if (node.next != null) {
206 				.romanode_print_stub(node.next, p);
207 			}
208 		}
209 
210 	nothrow @nogc
211 	package void romanode_print(.romanode* node)
212 
213 		do
214 		{
215 			if (node == null) {
216 				return;
217 			}
218 
219 			.romanode_print_stub(node, null);
220 		}
221 }
222 
223 /**
224  * romaji interface
225  */
226 extern (C)
227 struct _romaji
228 {
229 	int verbose;
230 	.romanode* node;
231 	char* fixvalue_xn;
232 	char* fixvalue_xtu;
233 	.ROMAJI_PROC_CHAR2INT char2int;
234 }
235 
236 public alias romaji = ._romaji;
237 
238 nothrow @nogc
239 package char* strdup_lower(const (char)* string_)
240 
241 	in
242 	{
243 		assert(string_ != null);
244 	}
245 
246 	do
247 	{
248 		char* out_ = core.stdc..string.strdup(string_);
249 
250 		if (out_ != null) {
251 			for (char* tmp = out_; *tmp != '\0'; ++tmp) {
252 				*tmp = cast(char)(core.stdc.ctype.tolower(*tmp));
253 			}
254 		}
255 
256 		return out_;
257 	}
258 
259 extern (C)
260 pure nothrow @trusted @nogc
261 public .romaji* romaji_open()
262 
263 	do
264 	{
265 		return cast(.romaji*)(core.memory.pureCalloc(1, .romaji.sizeof));
266 	}
267 
268 extern (C)
269 nothrow @nogc
270 public void romaji_close(.romaji* object)
271 
272 	do
273 	{
274 		if (object != null) {
275 			if (object.node != null) {
276 				.romanode_delete(object.node);
277 				object.node = null;
278 			}
279 
280 			if (object.fixvalue_xn != null) {
281 				core.memory.pureFree(object.fixvalue_xn);
282 				object.fixvalue_xn = null;
283 			}
284 
285 			if (object.fixvalue_xtu != null) {
286 				core.memory.pureFree(object.fixvalue_xtu);
287 				object.fixvalue_xtu = null;
288 			}
289 
290 			core.memory.pureFree(object);
291 		}
292 	}
293 
294 extern (C)
295 nothrow @nogc
296 public int romaji_add_table(.romaji* object, const (char)* key, const (char)* value)
297 
298 	do
299 	{
300 		if ((object == null) || (key == null) || (value == null)) {
301 			/* Unexpected error */
302 			return 1;
303 		}
304 
305 		size_t value_length = core.stdc..string.strlen(value);
306 
307 		if (value_length == 0) {
308 			/* Too short value string */
309 			return 2;
310 		}
311 
312 		.romanode** ref_node = .romanode_dig(&object.node, key);
313 
314 		if (ref_node == null) {
315 			/* Memory exhausted */
316 			return 4;
317 		}
318 
319 		debug {
320 			if (object.verbose >= 10) {
321 				core.stdc.stdio.printf("romaji_add_table(\"%s\", \"%s\")\n", key, value);
322 			}
323 		}
324 
325 		(*ref_node).value = core.stdc..string.strdup(value);
326 
327 		/* 「ん」と「っ」は保存しておく */
328 		if ((object.fixvalue_xn == null) && (value_length > 0) && (!core.stdc..string.strcmp(key, .ROMAJI_FIXKEY_XN))) {
329 			/*core.stdc.stdio.fprintf(core.stdc.stdio.stderr, "XN: key=%s, value=%s\n", key, value);*/
330 			object.fixvalue_xn = core.stdc..string.strdup(value);
331 		}
332 
333 		if ((object.fixvalue_xtu == null) && (value_length > 0) && (!core.stdc..string.strcmp(key, .ROMAJI_FIXKEY_XTU))) {
334 			/*core.stdc.stdio.fprintf(core.stdc.stdio.stderr, "XTU: key=%s, value=%s\n", key, value);*/
335 			object.fixvalue_xtu = core.stdc..string.strdup(value);
336 		}
337 
338 		return 0;
339 	}
340 
341 nothrow @nogc
342 int romaji_load_stub(.romaji* object, core.stdc.stdio.FILE* fp)
343 
344 	do
345 	{
346 		migemo_d.wordbuf.wordbuf_p buf_key = migemo_d.wordbuf.wordbuf_open();
347 		migemo_d.wordbuf.wordbuf_p buf_value = migemo_d.wordbuf.wordbuf_open();
348 
349 		scope (exit) {
350 			if (buf_key != null) {
351 				migemo_d.wordbuf.wordbuf_close(buf_key);
352 				buf_key = null;
353 			}
354 
355 			if (buf_value != null) {
356 				migemo_d.wordbuf.wordbuf_close(buf_value);
357 				buf_value = null;
358 			}
359 		}
360 
361 		if ((buf_key == null) || (buf_value == null)) {
362 
363 			return -1;
364 		}
365 
366 		int mode = 0;
367 		int ch;
368 
369 		do {
370 			ch = core.stdc.stdio.fgetc(fp);
371 
372 			switch (mode) {
373 				case 0:
374 					/* key待ちモード */
375 					if (ch == '#') {
376 						/* 1文字先読みして空白ならばkeyとして扱う */
377 						ch = core.stdc.stdio.fgetc(fp);
378 
379 						if (ch != '#') {
380 							core.stdc.stdio.ungetc(ch, fp);
381 
382 							/* 行末まで読み飛ばしモード へ移行 */
383 							mode = 1;
384 
385 							break;
386 						}
387 					}
388 
389 					if ((ch != core.stdc.stdio.EOF) && (!core.stdc.ctype.isspace(ch))) {
390 						migemo_d.wordbuf.wordbuf_reset(buf_key);
391 						migemo_d.wordbuf.wordbuf_add(buf_key, cast(char)(ch));
392 
393 						/* key読み込みモード へ移行 */
394 						mode = 2;
395 					}
396 
397 					break;
398 
399 				case 1:
400 					/* 行末まで読み飛ばしモード */
401 					if (ch == '\n') {
402 						/* key待ちモード へ移行 */
403 						mode = 0;
404 					}
405 
406 					break;
407 
408 				case 2:
409 					/* key読み込みモード */
410 					if (!core.stdc.ctype.isspace(ch)) {
411 						migemo_d.wordbuf.wordbuf_add(buf_key, cast(char)(ch));
412 					} else {
413 						/* value待ちモード へ移行 */
414 						mode = 3;
415 					}
416 
417 					break;
418 
419 				case 3:
420 					/* value待ちモード */
421 					if ((ch != core.stdc.stdio.EOF) && (!core.stdc.ctype.isspace(ch))) {
422 						migemo_d.wordbuf.wordbuf_reset(buf_value);
423 						migemo_d.wordbuf.wordbuf_add(buf_value, cast(char)(ch));
424 
425 						/* value読み込みモード へ移行 */
426 						mode = 4;
427 					}
428 
429 					break;
430 
431 				case 4:
432 					/* value読み込みモード */
433 					if ((ch != core.stdc.stdio.EOF) && (!core.stdc.ctype.isspace(ch))) {
434 						migemo_d.wordbuf.wordbuf_add(buf_value, cast(char)(ch));
435 					} else {
436 						char* key = migemo_d.wordbuf.WORDBUF_GET(buf_key);
437 						char* value = migemo_d.wordbuf.WORDBUF_GET(buf_value);
438 						.romaji_add_table(object, key, value);
439 						mode = 0;
440 					}
441 
442 					break;
443 
444 				default:
445 					break;
446 			}
447 		} while (ch != core.stdc.stdio.EOF);
448 
449 		return 0;
450 	}
451 
452 /**
453  * ローマ字辞書を読み込む。
454  *
455  * Params:
456  *      object = ローマ字オブジェクト
457  *      filename = 辞書ファイル名
458  *
459  * Returns: 成功した場合0、失敗した場合は非0を返す。
460  */
461 extern (C)
462 nothrow @nogc
463 public int romaji_load(.romaji* object, const (char)* filename)
464 
465 	do
466 	{
467 		if ((object == null) || (filename == null)) {
468 			return -1;
469 		}
470 
471 		version (all) {
472 			int charset = migemo_d.charset.charset_detect_file(filename);
473 			migemo_d.charset.charset_getproc(charset,&object.char2int, null);
474 		}
475 
476 		core.stdc.stdio.FILE* fp = core.stdc.stdio.fopen(filename, "rt");
477 
478 		scope (exit) {
479 			if (fp != null) {
480 				core.stdc.stdio.fclose(fp);
481 				fp = null;
482 			}
483 		}
484 
485 		if (fp != null) {
486 			int result = .romaji_load_stub(object, fp);
487 
488 			return result;
489 		} else {
490 			return -1;
491 		}
492 	}
493 
494 extern (C)
495 nothrow @nogc
496 public char* romaji_convert2(.romaji* object, const (char)* string_, char** ppstop, int ignorecase)
497 
498 	do
499 	{
500 		/* Argument "ppstop" receive conversion stoped position. */
501 		migemo_d.wordbuf.wordbuf_p buf = null;
502 		char* lower = null;
503 		char* answer = null;
504 		const (char)* input = string_;
505 		int stop = -1;
506 
507 		if (ignorecase) {
508 			lower = .strdup_lower(string_);
509 			input = lower;
510 		}
511 
512 		scope (exit) {
513 			if (lower != null) {
514 				core.memory.pureFree(lower);
515 				lower = null;
516 			}
517 
518 			if (buf != null) {
519 				migemo_d.wordbuf.wordbuf_close(buf);
520 				buf = null;
521 			}
522 		}
523 
524 		if ((object != null) && (string_ != null) && (input != null)) {
525 			buf = migemo_d.wordbuf.wordbuf_open();
526 
527 			if (buf != null) {
528 				int skip;
529 
530 				for (int i = 0; string_[i];) {
531 					/* 「っ」の判定 */
532 					if ((object.fixvalue_xtu != null) && (input[i] == input[i + 1]) && (core.stdc..string.strchr(.ROMAJI_FIXKEY_NONXTU, input[i]) == null)) {
533 						++i;
534 						migemo_d.wordbuf.wordbuf_cat(buf, object.fixvalue_xtu);
535 
536 						continue;
537 					}
538 
539 					.romanode* node = .romanode_query(object.node, &input[i], &skip, object.char2int);
540 
541 					debug {
542 						if (object.verbose >= 1) {
543 							core.stdc.stdio.printf("key=%s value=%s skip=%d\n", &input[i], (node != null) ? cast(char*)(node.value) : (&("null\0"[0])), skip);
544 						}
545 					}
546 
547 					if (skip == 0) {
548 						if (string_[i]) {
549 							stop = migemo_d.wordbuf.WORDBUF_LEN(buf);
550 							migemo_d.wordbuf.wordbuf_cat(buf, &string_[i]);
551 						}
552 
553 						break;
554 					} else if (node == null) {
555 						/* 「n(子音)」を「ん(子音)」に変換 */
556 						if ((skip == 1) && (input[i] == .ROMAJI_FIXKEY_N) && (object.fixvalue_xn != null)) {
557 							++i;
558 							migemo_d.wordbuf.wordbuf_cat(buf, object.fixvalue_xn);
559 						} else
560 							while (skip--) {
561 								migemo_d.wordbuf.wordbuf_add(buf, string_[i++]);
562 							}
563 					} else {
564 						i += skip;
565 						migemo_d.wordbuf.wordbuf_cat(buf, node.value);
566 					}
567 				}
568 
569 				answer = core.stdc..string.strdup(migemo_d.wordbuf.WORDBUF_GET(buf));
570 			}
571 		}
572 
573 		if (ppstop != null) {
574 			*ppstop = ((stop >= 0)) ? (answer + stop) : (null);
575 		}
576 
577 		return answer;
578 	}
579 
580 extern (C)
581 nothrow @nogc
582 public char* romaji_convert(.romaji* object, const (char)* string_, char** ppstop)
583 
584 	do
585 	{
586 		return .romaji_convert2(object, string_, ppstop, 1);
587 	}
588 
589 extern (C)
590 pure nothrow @nogc
591 public void romaji_release(.romaji* object, char* string_)
592 
593 	do
594 	{
595 		if (string_ != null) {
596 			core.memory.pureFree(string_);
597 		}
598 	}
599 
600 extern (C)
601 pure nothrow @nogc
602 public void romaji_setproc_char2int(.romaji* object, .ROMAJI_PROC_CHAR2INT proc)
603 
604 	do
605 	{
606 		if (object != null) {
607 			object.char2int = proc;
608 		}
609 	}
610 
611 extern (C)
612 pure nothrow @nogc
613 public void romaji_set_verbose(.romaji* object, int level)
614 
615 	do
616 	{
617 		if (object != null) {
618 			object.verbose = level;
619 		}
620 	}