1 /* 2 * rxgen.c - regular expression generator 3 * 4 * Written By: MURAOKA Taro <koron@tka.att.ne.jp> 5 * Last Change: 19-Sep-2009. 6 */ 7 module migemo_d.rxgen; 8 9 10 private static import core.memory; 11 private static import core.stdc.stdio; 12 private static import core.stdc.string; 13 private static import migemo_d.wordbuf; 14 15 public alias rxgen_proc_char2int = extern (C) nothrow @nogc int function(const (char)*, uint*); 16 public alias rxgen_proc_int2char = extern (C) nothrow @nogc int function(uint, char*); 17 public alias RXGEN_PROC_CHAR2INT = .rxgen_proc_char2int; 18 public alias RXGEN_PROC_INT2CHAR = .rxgen_proc_int2char; 19 20 /* for rxgen_set_operator */ 21 public enum RXGEN_OPINDEX_OR = 0; 22 public enum RXGEN_OPINDEX_NEST_IN = 1; 23 public enum RXGEN_OPINDEX_NEST_OUT = 2; 24 public enum RXGEN_OPINDEX_SELECT_IN = 3; 25 public enum RXGEN_OPINDEX_SELECT_OUT = 4; 26 public enum RXGEN_OPINDEX_NEWLINE = 5; 27 version = RXGEN_ENC_SJISTINY; 28 //version = RXGEN_OP_VIM; 29 30 enum RXGEN_OP_MAXLEN = 8; 31 enum RXGEN_OP_OR = "|\0"; 32 enum RXGEN_OP_NEST_IN = "(\0"; 33 enum RXGEN_OP_NEST_OUT = ")\0"; 34 enum RXGEN_OP_SELECT_IN = "[\0"; 35 enum RXGEN_OP_SELECT_OUT = "]\0"; 36 enum RXGEN_OP_NEWLINE = "\n\0"; 37 38 public int n_rnode_new = 0; 39 public int n_rnode_delete = 0; 40 41 extern (C) 42 struct _rxgen 43 { 44 .rnode* node; 45 .RXGEN_PROC_CHAR2INT char2int; 46 .RXGEN_PROC_INT2CHAR int2char; 47 char[.RXGEN_OP_MAXLEN] op_or; 48 char[.RXGEN_OP_MAXLEN] op_nest_in; 49 char[.RXGEN_OP_MAXLEN] op_nest_out; 50 char[.RXGEN_OP_MAXLEN] op_select_in; 51 char[.RXGEN_OP_MAXLEN] op_select_out; 52 char[.RXGEN_OP_MAXLEN] op_newline; 53 } 54 55 public alias rxgen = ._rxgen; 56 57 /* 58 * rnode interfaces 59 */ 60 61 struct _rnode 62 { 63 uint code; 64 .rnode* child; 65 .rnode* next; 66 } 67 68 alias rnode = ._rnode; 69 70 nothrow @nogc 71 package .rnode* rnode_new() 72 73 do 74 { 75 ++.n_rnode_new; 76 77 return cast(.rnode*)(core.memory.pureCalloc(1, .rnode.sizeof)); 78 } 79 80 nothrow @nogc 81 package void rnode_delete(.rnode* node) 82 83 do 84 { 85 while (node != null) { 86 .rnode* child = node.child; 87 88 if (node.next != null) { 89 .rnode_delete(node.next); 90 node.next = null; 91 } 92 93 core.memory.pureFree(node); 94 node = child; 95 ++.n_rnode_delete; 96 } 97 } 98 99 /* 100 * rxgen interfaces 101 */ 102 103 extern (C) 104 pure nothrow @trusted @nogc @live 105 package int default_char2int(const char* in_, uint* out_) 106 107 in 108 { 109 assert(in_ != null); 110 } 111 112 do 113 { 114 if (out_ != null) { 115 *out_ = *in_; 116 } 117 118 return 1; 119 } 120 121 extern (C) 122 pure nothrow @trusted @nogc @live 123 package int default_int2char(uint in_, char* out_) 124 125 do 126 { 127 int len = 0; 128 129 /* outは最低でも16バイトはある、という仮定を置く */ 130 switch (in_) { 131 case '\\': 132 case '.': 133 case '*': 134 case '^': 135 case '$': 136 case '/': 137 138 version (RXGEN_OP_VIM) { 139 case '[': 140 case ']': 141 case '~': 142 } 143 144 if (out_ != null) { 145 out_[len] = '\\'; 146 } 147 148 ++len; 149 150 //ToDo: FALLTHROUGH? 151 goto default; 152 153 default: 154 if (out_ != null) { 155 out_[len] = cast(char)(in_ & 0xFF); 156 } 157 158 ++len; 159 160 break; 161 } 162 163 return len; 164 } 165 166 extern (C) 167 nothrow @nogc 168 public void rxgen_setproc_char2int(.rxgen* object, .RXGEN_PROC_CHAR2INT proc) 169 170 do 171 { 172 if (object != null) { 173 object.char2int = (proc != null) ? (proc) : (&.default_char2int); 174 } 175 } 176 177 extern (C) 178 nothrow @nogc 179 public void rxgen_setproc_int2char(.rxgen* object, .RXGEN_PROC_INT2CHAR proc) 180 181 do 182 { 183 if (object != null) { 184 object.int2char = (proc != null) ? (proc) : (&.default_int2char); 185 } 186 } 187 188 nothrow @nogc 189 package int rxgen_call_char2int(.rxgen* object, const (char)* pch, uint* code) 190 191 do 192 { 193 int len = object.char2int(pch, code); 194 195 return (len) ? (len) : (.default_char2int(pch, code)); 196 } 197 198 nothrow @nogc 199 package int rxgen_call_int2char(.rxgen* object, uint code, char* buf) 200 201 do 202 { 203 int len = object.int2char(code, buf); 204 205 return (len) ? (len) : (.default_int2char(code, buf)); 206 } 207 208 extern (C) 209 nothrow @nogc 210 public .rxgen* rxgen_open() 211 212 do 213 { 214 .rxgen* object = cast(.rxgen*)(core.memory.pureCalloc(1, .rxgen.sizeof)); 215 216 if (object != null) { 217 .rxgen_setproc_char2int(object, null); 218 .rxgen_setproc_int2char(object, null); 219 core.stdc..string.strcpy(&(object.op_or[0]), &(.RXGEN_OP_OR[0])); 220 core.stdc..string.strcpy(&(object.op_nest_in[0]), &(.RXGEN_OP_NEST_IN[0])); 221 core.stdc..string.strcpy(&(object.op_nest_out[0]), &(.RXGEN_OP_NEST_OUT[0])); 222 core.stdc..string.strcpy(&(object.op_select_in[0]), &(.RXGEN_OP_SELECT_IN[0])); 223 core.stdc..string.strcpy(&(object.op_select_out[0]), &(.RXGEN_OP_SELECT_OUT[0])); 224 core.stdc..string.strcpy(&(object.op_newline[0]), &(.RXGEN_OP_NEWLINE[0])); 225 } 226 227 return object; 228 } 229 230 extern (C) 231 nothrow @nogc 232 public void rxgen_close(.rxgen* object) 233 234 do 235 { 236 if (object != null) { 237 .rnode_delete(object.node); 238 object.node = null; 239 core.memory.pureFree(object); 240 } 241 } 242 243 pure nothrow @trusted @nogc @live 244 package .rnode* search_rnode(.rnode* node, uint code) 245 246 do 247 { 248 while ((node != null) && (node.code != code)) { 249 node = node.next; 250 } 251 252 return node; 253 } 254 255 extern (C) 256 nothrow @nogc 257 public int rxgen_add(.rxgen* object, const (char)* word) 258 259 do 260 { 261 if ((object == null) || (word == null)) { 262 return 0; 263 } 264 265 .rnode** ppnode = &object.node; 266 uint code; 267 268 while (true) { 269 int len = .rxgen_call_char2int(object, word, &code); 270 /*core.stdc.stdio.printf("rxgen_call_char2int: code=%08x\n", code);*/ 271 272 /* 入力パターンが尽きたら終了 */ 273 if (code == 0) { 274 /* 入力パターンよりも長い既存パターンは破棄する */ 275 if (*ppnode) { 276 .rnode_delete(*ppnode); 277 *ppnode = null; 278 } 279 280 break; 281 } 282 283 .rnode* pnode = .search_rnode(*ppnode, code); 284 285 if (pnode == null) { 286 /* codeを持つノードが無い場合、作成追加する */ 287 pnode = .rnode_new(); 288 pnode.code = code; 289 pnode.next = *ppnode; 290 *ppnode = pnode; 291 } else if (pnode.child == null) { 292 /* 293 * codeを持つノードは有るが、その子供が無い場合、それ以降の入力 294 * パターンは破棄する。例: 295 * あかい + あかるい . あか 296 * たのしい + たのしみ . たのし 297 */ 298 break; 299 } 300 301 /* 子ノードを辿って深い方へ注視点を移動 */ 302 ppnode = &pnode.child; 303 word += len; 304 } 305 306 return 1; 307 } 308 309 nothrow @nogc 310 package void rxgen_generate_stub(.rxgen* object, migemo_d.wordbuf.wordbuf_t* buf, .rnode* node) 311 312 in 313 { 314 assert(node != null); 315 } 316 317 do 318 { 319 int haschild = 0; 320 int brother = 1; 321 .rnode* tmp; 322 323 /* 現在の階層の特性(兄弟の数、子供の数)をチェックする */ 324 for (tmp = node; tmp; tmp = tmp.next) { 325 if (tmp.next != null) { 326 ++brother; 327 } 328 329 if (tmp.child != null) { 330 ++haschild; 331 } 332 } 333 334 int nochild = brother - haschild; 335 336 /* For debug */ 337 version (none) { 338 core.stdc.stdio.printf("node=%p code=%04X\n nochild=%d haschild=%d brother=%d\n", node, node.code, nochild, haschild, brother); 339 } 340 341 /* 必要ならば()によるグルーピング */ 342 if ((brother > 1) && (haschild > 0)) { 343 migemo_d.wordbuf.wordbuf_cat(buf, &(object.op_nest_in[0])); 344 } 345 346 char[16] ch; 347 348 version (all) { 349 /* 子の無いノードを先に[]によりグルーピング */ 350 if (nochild > 0) { 351 if (nochild > 1) { 352 migemo_d.wordbuf.wordbuf_cat(buf, &(object.op_select_in[0])); 353 } 354 355 for (tmp = node; tmp; tmp = tmp.next) { 356 if (tmp.child != null) { 357 continue; 358 } 359 360 int chlen = .rxgen_call_int2char(object, tmp.code, &(ch[0])); 361 ch[chlen] = '\0'; 362 /*core.stdc.stdio.printf("nochild: %s\n", ch);*/ 363 migemo_d.wordbuf.wordbuf_cat(buf, &(ch[0])); 364 } 365 366 if (nochild > 1) { 367 migemo_d.wordbuf.wordbuf_cat(buf, &(object.op_select_out[0])); 368 } 369 } 370 } 371 372 version (all) { 373 /* 子のあるノードを出力 */ 374 if (haschild > 0) { 375 /* グループを出力済みならORで繋ぐ */ 376 if (nochild > 0) { 377 migemo_d.wordbuf.wordbuf_cat(buf, &(object.op_or[0])); 378 } 379 380 for (tmp = node; !tmp.child; tmp = tmp.next) { 381 } 382 383 while (true) { 384 int chlen = .rxgen_call_int2char(object, tmp.code, &(ch[0])); 385 /*core.stdc.stdio.printf("code=%04X len=%d\n", tmp.code, chlen);*/ 386 ch[chlen] = '\0'; 387 migemo_d.wordbuf.wordbuf_cat(buf, &(ch[0])); 388 389 /* 空白・改行飛ばしのパターンを挿入 */ 390 if (object.op_newline[0]) { 391 migemo_d.wordbuf.wordbuf_cat(buf, &(object.op_newline[0])); 392 } 393 394 .rxgen_generate_stub(object, buf, tmp.child); 395 396 for (tmp = tmp.next; (tmp != null) && (tmp.child == null); tmp = tmp.next) { 397 } 398 399 if (tmp == null) { 400 break; 401 } 402 403 if (haschild > 1) { 404 migemo_d.wordbuf.wordbuf_cat(buf, &(object.op_or[0])); 405 } 406 } 407 } 408 } 409 410 /* 必要ならば()によるグルーピング */ 411 if ((brother > 1) && (haschild > 0)) { 412 migemo_d.wordbuf.wordbuf_cat(buf, &(object.op_nest_out[0])); 413 } 414 } 415 416 extern (C) 417 nothrow @nogc 418 public char* rxgen_generate(.rxgen* object) 419 420 do 421 { 422 char* answer = null; 423 424 if (object != null) { 425 migemo_d.wordbuf.wordbuf_t* buf = migemo_d.wordbuf.wordbuf_open(); 426 427 scope (exit) { 428 if (buf != null) { 429 migemo_d.wordbuf.wordbuf_close(buf); 430 buf = null; 431 } 432 } 433 434 if (buf != null) { 435 if (object.node != null) { 436 .rxgen_generate_stub(object, buf, object.node); 437 } 438 439 answer = core.stdc..string.strdup(migemo_d.wordbuf.WORDBUF_GET(buf)); 440 } 441 } 442 443 return answer; 444 } 445 446 extern (C) 447 pure nothrow @trusted @nogc 448 public void rxgen_release(.rxgen* object, char* string_) 449 450 do 451 { 452 if (string_ != null) { 453 core.memory.pureFree(string_); 454 } 455 } 456 457 /** 458 * rxgen_add()してきたパターンを全てリセット。 459 */ 460 extern (C) 461 nothrow @nogc 462 public void rxgen_reset(.rxgen* object) 463 464 in 465 { 466 } 467 468 do 469 { 470 if (object != null) { 471 .rnode_delete(object.node); 472 object.node = null; 473 } 474 } 475 476 pure nothrow @trusted @nogc @live 477 package char* rxgen_get_operator_stub(.rxgen* object, int index) 478 479 in 480 { 481 assert(object != null); 482 } 483 484 do 485 { 486 switch (index) { 487 case .RXGEN_OPINDEX_OR: 488 return &(object.op_or[0]); 489 490 case .RXGEN_OPINDEX_NEST_IN: 491 return &(object.op_nest_in[0]); 492 493 case .RXGEN_OPINDEX_NEST_OUT: 494 return &(object.op_nest_out[0]); 495 496 case .RXGEN_OPINDEX_SELECT_IN: 497 return &(object.op_select_in[0]); 498 499 case .RXGEN_OPINDEX_SELECT_OUT: 500 return &(object.op_select_out[0]); 501 502 case .RXGEN_OPINDEX_NEWLINE: 503 return &(object.op_newline[0]); 504 505 default: 506 return null; 507 } 508 } 509 510 extern (C) 511 pure nothrow @trusted @nogc @live 512 public const (char)* rxgen_get_operator(.rxgen* object, int index) 513 514 in 515 { 516 } 517 518 do 519 { 520 return cast(const (char)*)((object != null) ? (.rxgen_get_operator_stub(object, index)) : (null)); 521 } 522 523 extern (C) 524 pure nothrow @nogc 525 public int rxgen_set_operator(.rxgen* object, int index, const (char)* op) 526 527 in 528 { 529 assert(op != null); 530 } 531 532 do 533 { 534 if (object == null) { 535 /* Invalid object */ 536 return 1; 537 } 538 539 if (core.stdc..string.strlen(op) >= .RXGEN_OP_MAXLEN) { 540 /* Too long operator */ 541 return 2; 542 } 543 544 char* dest = .rxgen_get_operator_stub(object, index); 545 546 if (dest == null) { 547 /* No such an operator */ 548 return 3; 549 } 550 551 core.stdc..string.strcpy(dest, op); 552 553 return 0; 554 }