1 /*
2  * rxgen.c - regular expression generator
3  *
4  * Written By:  MURAOKA Taro <koron@tka.att.ne.jp>
5  * Last Change: 19-Sep-2009.
6  */
7 module migemo_d.rxgen;
8 
9 
10 private static import core.memory;
11 private static import core.stdc.stdio;
12 private static import core.stdc.string;
13 private static import migemo_d.wordbuf;
14 
15 public alias rxgen_proc_char2int = extern (C) nothrow @nogc int function(const (char)*, uint*);
16 public alias rxgen_proc_int2char = extern (C) nothrow @nogc int function(uint, char*);
17 public alias RXGEN_PROC_CHAR2INT = .rxgen_proc_char2int;
18 public alias RXGEN_PROC_INT2CHAR = .rxgen_proc_int2char;
19 
20 /* for rxgen_set_operator */
21 public enum RXGEN_OPINDEX_OR = 0;
22 public enum RXGEN_OPINDEX_NEST_IN = 1;
23 public enum RXGEN_OPINDEX_NEST_OUT = 2;
24 public enum RXGEN_OPINDEX_SELECT_IN = 3;
25 public enum RXGEN_OPINDEX_SELECT_OUT = 4;
26 public enum RXGEN_OPINDEX_NEWLINE = 5;
27 version = RXGEN_ENC_SJISTINY;
28 //version = RXGEN_OP_VIM;
29 
30 enum RXGEN_OP_MAXLEN = 8;
31 enum RXGEN_OP_OR = "|\0";
32 enum RXGEN_OP_NEST_IN = "(\0";
33 enum RXGEN_OP_NEST_OUT = ")\0";
34 enum RXGEN_OP_SELECT_IN = "[\0";
35 enum RXGEN_OP_SELECT_OUT = "]\0";
36 enum RXGEN_OP_NEWLINE = "\n\0";
37 
38 public int n_rnode_new = 0;
39 public int n_rnode_delete = 0;
40 
41 extern (C)
42 struct _rxgen
43 {
44 	.rnode* node;
45 	.RXGEN_PROC_CHAR2INT char2int;
46 	.RXGEN_PROC_INT2CHAR int2char;
47 	char[.RXGEN_OP_MAXLEN] op_or;
48 	char[.RXGEN_OP_MAXLEN] op_nest_in;
49 	char[.RXGEN_OP_MAXLEN] op_nest_out;
50 	char[.RXGEN_OP_MAXLEN] op_select_in;
51 	char[.RXGEN_OP_MAXLEN] op_select_out;
52 	char[.RXGEN_OP_MAXLEN] op_newline;
53 }
54 
55 public alias rxgen = ._rxgen;
56 
57 /*
58  * rnode interfaces
59  */
60 
61 struct _rnode
62 {
63 	uint code;
64 	.rnode* child;
65 	.rnode* next;
66 }
67 
68 alias rnode = ._rnode;
69 
70 nothrow @nogc
71 package .rnode* rnode_new()
72 
73 	do
74 	{
75 		++.n_rnode_new;
76 
77 		return cast(.rnode*)(core.memory.pureCalloc(1, .rnode.sizeof));
78 	}
79 
80 nothrow @nogc
81 package void rnode_delete(.rnode* node)
82 
83 	do
84 	{
85 		while (node != null) {
86 			.rnode* child = node.child;
87 
88 			if (node.next != null) {
89 				.rnode_delete(node.next);
90 				node.next = null;
91 			}
92 
93 			core.memory.pureFree(node);
94 			node = child;
95 			++.n_rnode_delete;
96 		}
97 	}
98 
99 /*
100  * rxgen interfaces
101  */
102 
103 extern (C)
104 pure nothrow @trusted @nogc @live
105 package int default_char2int(const char* in_, uint* out_)
106 
107 	in
108 	{
109 		assert(in_ != null);
110 	}
111 
112 	do
113 	{
114 		if (out_ != null) {
115 			*out_ = *in_;
116 		}
117 
118 		return 1;
119 	}
120 
121 extern (C)
122 pure nothrow @trusted @nogc @live
123 package int default_int2char(uint in_, char* out_)
124 
125 	do
126 	{
127 		int len = 0;
128 
129 		/* outは最低でも16バイトはある、という仮定を置く */
130 		switch (in_) {
131 			case '\\':
132 			case '.':
133 			case '*':
134 			case '^':
135 			case '$':
136 			case '/':
137 
138 			version (RXGEN_OP_VIM) {
139 				case '[':
140 				case ']':
141 				case '~':
142 			}
143 
144 				if (out_ != null) {
145 					out_[len] = '\\';
146 				}
147 
148 				++len;
149 
150 				//ToDo: FALLTHROUGH?
151 				goto default;
152 
153 			default:
154 				if (out_ != null) {
155 					out_[len] = cast(char)(in_ & 0xFF);
156 				}
157 
158 				++len;
159 
160 				break;
161 		}
162 
163 		return len;
164 	}
165 
166 extern (C)
167 nothrow @nogc
168 public void rxgen_setproc_char2int(.rxgen* object, .RXGEN_PROC_CHAR2INT proc)
169 
170 	do
171 	{
172 		if (object != null) {
173 			object.char2int = (proc != null) ? (proc) : (&.default_char2int);
174 		}
175 	}
176 
177 extern (C)
178 nothrow @nogc
179 public void rxgen_setproc_int2char(.rxgen* object, .RXGEN_PROC_INT2CHAR proc)
180 
181 	do
182 	{
183 		if (object != null) {
184 			object.int2char = (proc != null) ? (proc) : (&.default_int2char);
185 		}
186 	}
187 
188 nothrow @nogc
189 package int rxgen_call_char2int(.rxgen* object, const (char)* pch, uint* code)
190 
191 	do
192 	{
193 		int len = object.char2int(pch, code);
194 
195 		return (len) ? (len) : (.default_char2int(pch, code));
196 	}
197 
198 nothrow @nogc
199 package int rxgen_call_int2char(.rxgen* object, uint code, char* buf)
200 
201 	do
202 	{
203 		int len = object.int2char(code, buf);
204 
205 		return (len) ? (len) : (.default_int2char(code, buf));
206 	}
207 
208 extern (C)
209 nothrow @nogc
210 public .rxgen* rxgen_open()
211 
212 	do
213 	{
214 		.rxgen* object = cast(.rxgen*)(core.memory.pureCalloc(1, .rxgen.sizeof));
215 
216 		if (object != null) {
217 			.rxgen_setproc_char2int(object, null);
218 			.rxgen_setproc_int2char(object, null);
219 			core.stdc..string.strcpy(&(object.op_or[0]), &(.RXGEN_OP_OR[0]));
220 			core.stdc..string.strcpy(&(object.op_nest_in[0]), &(.RXGEN_OP_NEST_IN[0]));
221 			core.stdc..string.strcpy(&(object.op_nest_out[0]), &(.RXGEN_OP_NEST_OUT[0]));
222 			core.stdc..string.strcpy(&(object.op_select_in[0]), &(.RXGEN_OP_SELECT_IN[0]));
223 			core.stdc..string.strcpy(&(object.op_select_out[0]), &(.RXGEN_OP_SELECT_OUT[0]));
224 			core.stdc..string.strcpy(&(object.op_newline[0]), &(.RXGEN_OP_NEWLINE[0]));
225 		}
226 
227 		return object;
228 	}
229 
230 extern (C)
231 nothrow @nogc
232 public void rxgen_close(.rxgen* object)
233 
234 	do
235 	{
236 		if (object != null) {
237 			.rnode_delete(object.node);
238 			object.node = null;
239 			core.memory.pureFree(object);
240 		}
241 	}
242 
243 pure nothrow @trusted @nogc @live
244 package .rnode* search_rnode(.rnode* node, uint code)
245 
246 	do
247 	{
248 		while ((node != null) && (node.code != code)) {
249 			node = node.next;
250 		}
251 
252 		return node;
253 	}
254 
255 extern (C)
256 nothrow @nogc
257 public int rxgen_add(.rxgen* object, const (char)* word)
258 
259 	do
260 	{
261 		if ((object == null) || (word == null)) {
262 			return 0;
263 		}
264 
265 		.rnode** ppnode = &object.node;
266 		uint code;
267 
268 		while (true) {
269 			int len = .rxgen_call_char2int(object, word, &code);
270 			/*core.stdc.stdio.printf("rxgen_call_char2int: code=%08x\n", code);*/
271 
272 			/* 入力パターンが尽きたら終了 */
273 			if (code == 0) {
274 				/* 入力パターンよりも長い既存パターンは破棄する */
275 				if (*ppnode) {
276 					.rnode_delete(*ppnode);
277 					*ppnode = null;
278 				}
279 
280 				break;
281 			}
282 
283 			.rnode* pnode = .search_rnode(*ppnode, code);
284 
285 			if (pnode == null) {
286 				/* codeを持つノードが無い場合、作成追加する */
287 				pnode = .rnode_new();
288 				pnode.code = code;
289 				pnode.next = *ppnode;
290 				*ppnode = pnode;
291 			} else if (pnode.child == null) {
292 				/*
293 				 * codeを持つノードは有るが、その子供が無い場合、それ以降の入力
294 				 * パターンは破棄する。例:
295 				 *     あかい + あかるい . あか
296 				 *	   たのしい + たのしみ . たのし
297 				 */
298 				break;
299 			}
300 
301 			/* 子ノードを辿って深い方へ注視点を移動 */
302 			ppnode = &pnode.child;
303 			word += len;
304 		}
305 
306 		return 1;
307 	}
308 
309 nothrow @nogc
310 package void rxgen_generate_stub(.rxgen* object, migemo_d.wordbuf.wordbuf_t* buf, .rnode* node)
311 
312 	in
313 	{
314 		assert(node != null);
315 	}
316 
317 	do
318 	{
319 		int haschild = 0;
320 		int brother = 1;
321 		.rnode* tmp;
322 
323 		/* 現在の階層の特性(兄弟の数、子供の数)をチェックする */
324 		for (tmp = node; tmp; tmp = tmp.next) {
325 			if (tmp.next != null) {
326 				++brother;
327 			}
328 
329 			if (tmp.child != null) {
330 				++haschild;
331 			}
332 		}
333 
334 		int nochild = brother - haschild;
335 
336 		/* For debug */
337 		version (none) {
338 			core.stdc.stdio.printf("node=%p code=%04X\n  nochild=%d haschild=%d brother=%d\n", node, node.code, nochild, haschild, brother);
339 		}
340 
341 		/* 必要ならば()によるグルーピング */
342 		if ((brother > 1) && (haschild > 0)) {
343 			migemo_d.wordbuf.wordbuf_cat(buf, &(object.op_nest_in[0]));
344 		}
345 
346 		char[16] ch;
347 
348 		version (all) {
349 			/* 子の無いノードを先に[]によりグルーピング */
350 			if (nochild > 0) {
351 				if (nochild > 1) {
352 					migemo_d.wordbuf.wordbuf_cat(buf, &(object.op_select_in[0]));
353 				}
354 
355 				for (tmp = node; tmp; tmp = tmp.next) {
356 					if (tmp.child != null) {
357 						continue;
358 					}
359 
360 					int chlen = .rxgen_call_int2char(object, tmp.code, &(ch[0]));
361 					ch[chlen] = '\0';
362 					/*core.stdc.stdio.printf("nochild: %s\n", ch);*/
363 					migemo_d.wordbuf.wordbuf_cat(buf, &(ch[0]));
364 				}
365 
366 				if (nochild > 1) {
367 					migemo_d.wordbuf.wordbuf_cat(buf, &(object.op_select_out[0]));
368 				}
369 			}
370 		}
371 
372 		version (all) {
373 			/* 子のあるノードを出力 */
374 			if (haschild > 0) {
375 				/* グループを出力済みならORで繋ぐ */
376 				if (nochild > 0) {
377 					migemo_d.wordbuf.wordbuf_cat(buf, &(object.op_or[0]));
378 				}
379 
380 				for (tmp = node; !tmp.child; tmp = tmp.next) {
381 				}
382 
383 				while (true) {
384 					int chlen = .rxgen_call_int2char(object, tmp.code, &(ch[0]));
385 					/*core.stdc.stdio.printf("code=%04X len=%d\n", tmp.code, chlen);*/
386 					ch[chlen] = '\0';
387 					migemo_d.wordbuf.wordbuf_cat(buf, &(ch[0]));
388 
389 					/* 空白・改行飛ばしのパターンを挿入 */
390 					if (object.op_newline[0]) {
391 						migemo_d.wordbuf.wordbuf_cat(buf, &(object.op_newline[0]));
392 					}
393 
394 					.rxgen_generate_stub(object, buf, tmp.child);
395 
396 					for (tmp = tmp.next; (tmp != null) && (tmp.child == null); tmp = tmp.next) {
397 					}
398 
399 					if (tmp == null) {
400 						break;
401 					}
402 
403 					if (haschild > 1) {
404 						migemo_d.wordbuf.wordbuf_cat(buf, &(object.op_or[0]));
405 					}
406 				}
407 			}
408 		}
409 
410 		/* 必要ならば()によるグルーピング */
411 		if ((brother > 1) && (haschild > 0)) {
412 			migemo_d.wordbuf.wordbuf_cat(buf, &(object.op_nest_out[0]));
413 		}
414 	}
415 
416 extern (C)
417 nothrow @nogc
418 public char* rxgen_generate(.rxgen* object)
419 
420 	do
421 	{
422 		char* answer = null;
423 
424 		if (object != null) {
425 			migemo_d.wordbuf.wordbuf_t* buf = migemo_d.wordbuf.wordbuf_open();
426 
427 			scope (exit) {
428 				if (buf != null) {
429 					migemo_d.wordbuf.wordbuf_close(buf);
430 					buf = null;
431 				}
432 			}
433 
434 			if (buf != null) {
435 				if (object.node != null) {
436 					.rxgen_generate_stub(object, buf, object.node);
437 				}
438 
439 				answer = core.stdc..string.strdup(migemo_d.wordbuf.WORDBUF_GET(buf));
440 			}
441 		}
442 
443 		return answer;
444 	}
445 
446 extern (C)
447 pure nothrow @trusted @nogc
448 public void rxgen_release(.rxgen* object, char* string_)
449 
450 	do
451 	{
452 		if (string_ != null) {
453 			core.memory.pureFree(string_);
454 		}
455 	}
456 
457 /**
458  * rxgen_add()してきたパターンを全てリセット。
459  */
460 extern (C)
461 nothrow @nogc
462 public void rxgen_reset(.rxgen* object)
463 
464 	in
465 	{
466 	}
467 
468 	do
469 	{
470 		if (object != null) {
471 			.rnode_delete(object.node);
472 			object.node = null;
473 		}
474 	}
475 
476 pure nothrow @trusted @nogc @live
477 package char* rxgen_get_operator_stub(.rxgen* object, int index)
478 
479 	in
480 	{
481 		assert(object != null);
482 	}
483 
484 	do
485 	{
486 		switch (index) {
487 			case .RXGEN_OPINDEX_OR:
488 				return &(object.op_or[0]);
489 
490 			case .RXGEN_OPINDEX_NEST_IN:
491 				return &(object.op_nest_in[0]);
492 
493 			case .RXGEN_OPINDEX_NEST_OUT:
494 				return &(object.op_nest_out[0]);
495 
496 			case .RXGEN_OPINDEX_SELECT_IN:
497 				return &(object.op_select_in[0]);
498 
499 			case .RXGEN_OPINDEX_SELECT_OUT:
500 				return &(object.op_select_out[0]);
501 
502 			case .RXGEN_OPINDEX_NEWLINE:
503 				return &(object.op_newline[0]);
504 
505 			default:
506 				return null;
507 		}
508 	}
509 
510 extern (C)
511 pure nothrow @trusted @nogc @live
512 public const (char)* rxgen_get_operator(.rxgen* object, int index)
513 
514 	in
515 	{
516 	}
517 
518 	do
519 	{
520 		return cast(const (char)*)((object != null) ? (.rxgen_get_operator_stub(object, index)) : (null));
521 	}
522 
523 extern (C)
524 pure nothrow @nogc
525 public int rxgen_set_operator(.rxgen* object, int index, const (char)* op)
526 
527 	in
528 	{
529 		assert(op != null);
530 	}
531 
532 	do
533 	{
534 		if (object == null) {
535 			/* Invalid object */
536 			return 1;
537 		}
538 
539 		if (core.stdc..string.strlen(op) >= .RXGEN_OP_MAXLEN) {
540 			/* Too long operator */
541 			return 2;
542 		}
543 
544 		char* dest = .rxgen_get_operator_stub(object, index);
545 
546 		if (dest == null) {
547 			/* No such an operator */
548 			return 3;
549 		}
550 
551 		core.stdc..string.strcpy(dest, op);
552 
553 		return 0;
554 	}