34# undef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
36# define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
39#ifndef USE_TOKEN_THREADED_VM
41# define USE_TOKEN_THREADED_VM 1
43# define USE_TOKEN_THREADED_VM 0
48# define ENC_DUMMY_FLAG (1<<24)
52 return ONIGENC_MBC_MINLEN(enc)==1 && !((enc)->ruby_encoding_index & ENC_DUMMY_FLAG);
54# undef ONIGENC_IS_MBC_ASCII_WORD
55# define ONIGENC_IS_MBC_ASCII_WORD(enc,s,end) \
56 (rb_enc_asciicompat(enc) ? (ISALNUM(*s) || *s=='_') : \
57 onigenc_ascii_is_code_ctype( \
58 ONIGENC_MBC_TO_CODE(enc,s,end),ONIGENC_CTYPE_WORD,enc))
61#ifdef USE_CRNL_AS_LINE_TERMINATOR
62# define ONIGENC_IS_MBC_CRNL(enc,p,end) \
63 (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
64 ONIGENC_MBC_TO_CODE(enc,(p+enclen(enc,p,end)),end) == 10)
65# define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
66 is_mbc_newline_ex((enc),(p),(start),(end),(option),(check_prev))
68is_mbc_newline_ex(
OnigEncoding enc,
const UChar *p,
const UChar *start,
69 const UChar *end, OnigOptionType option,
int check_prev)
71 if (IS_NEWLINE_CRLF(option)) {
72 if (ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0a) {
74 const UChar *prev = onigenc_get_prev_char_head(enc, start, p, end);
75 if ((prev != NULL) && ONIGENC_MBC_TO_CODE(enc, prev, end) == 0x0d)
84 const UChar *pnext = p + enclen(enc, p, end);
86 ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0d &&
87 ONIGENC_MBC_TO_CODE(enc, pnext, end) == 0x0a)
89 if (ONIGENC_IS_MBC_NEWLINE(enc, p, end))
95 return ONIGENC_IS_MBC_NEWLINE(enc, p, end);
99# define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
100 ONIGENC_IS_MBC_NEWLINE((enc), (p), (end))
103#ifdef USE_CAPTURE_HISTORY
104static void history_tree_free(OnigCaptureTreeNode* node);
107history_tree_clear(OnigCaptureTreeNode* node)
111 if (IS_NOT_NULL(node)) {
112 for (i = 0; i < node->num_childs; i++) {
113 if (IS_NOT_NULL(node->childs[i])) {
114 history_tree_free(node->childs[i]);
117 for (i = 0; i < node->allocated; i++) {
118 node->childs[i] = (OnigCaptureTreeNode* )0;
120 node->num_childs = 0;
121 node->beg = ONIG_REGION_NOTPOS;
122 node->end = ONIG_REGION_NOTPOS;
125 node->childs = (OnigCaptureTreeNode** )0;
130history_tree_free(OnigCaptureTreeNode* node)
132 history_tree_clear(node);
139 if (IS_NOT_NULL(r->history_root)) {
140 history_tree_free(r->history_root);
141 r->history_root = (OnigCaptureTreeNode* )0;
145static OnigCaptureTreeNode*
146history_node_new(
void)
148 OnigCaptureTreeNode* node;
150 node = (OnigCaptureTreeNode* )
xmalloc(
sizeof(OnigCaptureTreeNode));
151 CHECK_NULL_RETURN(node);
152 node->childs = (OnigCaptureTreeNode** )0;
154 node->num_childs = 0;
156 node->beg = ONIG_REGION_NOTPOS;
157 node->end = ONIG_REGION_NOTPOS;
163history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
165# define HISTORY_TREE_INIT_ALLOC_SIZE 8
167 if (parent->num_childs >= parent->allocated) {
170 if (IS_NULL(parent->childs)) {
171 n = HISTORY_TREE_INIT_ALLOC_SIZE;
173 (OnigCaptureTreeNode** )
xmalloc(
sizeof(OnigCaptureTreeNode*) * n);
174 CHECK_NULL_RETURN_MEMERR(parent->childs);
177 OnigCaptureTreeNode** tmp;
178 n = parent->allocated * 2;
180 (OnigCaptureTreeNode** )
xrealloc(parent->childs,
181 sizeof(OnigCaptureTreeNode*) * n);
183 history_tree_clear(parent);
184 return ONIGERR_MEMORY;
186 parent->childs = tmp;
188 for (i = parent->allocated; i < n; i++) {
189 parent->childs[i] = (OnigCaptureTreeNode* )0;
191 parent->allocated = n;
194 parent->childs[parent->num_childs] = child;
195 parent->num_childs++;
199static OnigCaptureTreeNode*
200history_tree_clone(OnigCaptureTreeNode* node)
203 OnigCaptureTreeNode *clone, *child;
205 clone = history_node_new();
206 CHECK_NULL_RETURN(clone);
208 clone->beg = node->beg;
209 clone->end = node->end;
210 for (i = 0; i < node->num_childs; i++) {
211 child = history_tree_clone(node->childs[i]);
212 if (IS_NULL(child)) {
213 history_tree_free(clone);
214 return (OnigCaptureTreeNode* )0;
216 r = history_tree_add_child(clone, child);
218 history_tree_free(child);
219 history_tree_free(clone);
220 return (OnigCaptureTreeNode* )0;
227extern OnigCaptureTreeNode*
230 return region->history_root;
239 for (i = 0; i < region->num_regs; i++) {
240 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
242#ifdef USE_CAPTURE_HISTORY
243 history_root_free(region);
250 region->num_regs = n;
252 if (n < ONIG_NREGION)
255 if (region->allocated == 0) {
256 region->beg = (OnigPosition* )
xmalloc(n *
sizeof(OnigPosition));
257 if (region->beg == 0)
258 return ONIGERR_MEMORY;
260 region->end = (OnigPosition* )
xmalloc(n *
sizeof(OnigPosition));
261 if (region->end == 0) {
263 return ONIGERR_MEMORY;
266 region->allocated = n;
268 else if (region->allocated < n) {
271 region->allocated = 0;
272 tmp = (OnigPosition* )
xrealloc(region->beg, n *
sizeof(OnigPosition));
276 return ONIGERR_MEMORY;
279 tmp = (OnigPosition* )
xrealloc(region->end, n *
sizeof(OnigPosition));
283 return ONIGERR_MEMORY;
287 region->allocated = n;
294onig_region_resize_clear(
OnigRegion* region,
int n)
298 r = onig_region_resize(region, n);
299 if (r != 0)
return r;
300 onig_region_clear(region);
305onig_region_set(
OnigRegion* region,
int at,
int beg,
int end)
307 if (at < 0)
return ONIGERR_INVALID_ARGUMENT;
309 if (at >= region->allocated) {
310 int r = onig_region_resize(region, at + 1);
314 region->beg[at] = beg;
315 region->end[at] = end;
322 region->num_regs = 0;
323 region->allocated = 0;
324 region->beg = (OnigPosition* )0;
325 region->end = (OnigPosition* )0;
326#ifdef USE_CAPTURE_HISTORY
327 region->history_root = (OnigCaptureTreeNode* )0;
346 if (r->allocated > 0) {
347 if (r->beg)
xfree(r->beg);
348 if (r->end)
xfree(r->end);
351#ifdef USE_CAPTURE_HISTORY
352 history_root_free(r);
354 if (free_self)
xfree(r);
361#define RREGC_SIZE (sizeof(int) * from->num_regs)
364 if (to == from)
return;
366 r = onig_region_resize(to, from->num_regs);
369 for (i = 0; i < from->num_regs; i++) {
370 to->beg[i] = from->beg[i];
371 to->end[i] = from->end[i];
373 to->num_regs = from->num_regs;
375#ifdef USE_CAPTURE_HISTORY
376 history_root_free(to);
378 if (IS_NOT_NULL(from->history_root)) {
379 to->history_root = history_tree_clone(from->history_root);
386#define INVALID_STACK_INDEX -1
390#define STK_ALT 0x0001
391#define STK_LOOK_BEHIND_NOT 0x0002
392#define STK_POS_NOT 0x0003
394#define STK_MEM_START 0x0100
395#define STK_MEM_END 0x8200
396#define STK_REPEAT_INC 0x0300
397#define STK_STATE_CHECK_MARK 0x1000
399#define STK_NULL_CHECK_START 0x3000
400#define STK_NULL_CHECK_END 0x5000
401#define STK_MEM_END_MARK 0x8400
402#define STK_POS 0x0500
403#define STK_STOP_BT 0x0600
404#define STK_REPEAT 0x0700
405#define STK_CALL_FRAME 0x0800
406#define STK_RETURN 0x0900
407#define STK_VOID 0x0a00
408#define STK_ABSENT_POS 0x0b00
409#define STK_ABSENT 0x0c00
412#define STK_MASK_POP_USED 0x00ff
413#define STK_MASK_TO_VOID_TARGET 0x10ff
414#define STK_MASK_MEM_END_OR_MARK 0x8000
416#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
417# define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
418 (msa).stack_p = (void* )0;\
419 (msa).options = (arg_option);\
420 (msa).region = (arg_region);\
421 (msa).start = (arg_start);\
422 (msa).gpos = (arg_gpos);\
423 (msa).best_len = ONIG_MISMATCH;\
426# define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
427 (msa).stack_p = (void* )0;\
428 (msa).options = (arg_option);\
429 (msa).region = (arg_region);\
430 (msa).start = (arg_start);\
431 (msa).gpos = (arg_gpos);\
435#ifdef USE_COMBINATION_EXPLOSION_CHECK
437# define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
439# define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \
440 if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
441 unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
442 offset = ((offset) * (state_num)) >> 3;\
443 if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
444 if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) {\
445 (msa).state_check_buff = (void* )xmalloc(size);\
446 CHECK_NULL_RETURN_MEMERR((msa).state_check_buff);\
449 (msa).state_check_buff = (void* )xalloca(size);\
450 xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
451 (size_t )(size - (offset))); \
452 (msa).state_check_buff_size = size;\
455 (msa).state_check_buff = (void* )0;\
456 (msa).state_check_buff_size = 0;\
460 (msa).state_check_buff = (void* )0;\
461 (msa).state_check_buff_size = 0;\
465# define MATCH_ARG_FREE(msa) do {\
466 if ((msa).stack_p) xfree((msa).stack_p);\
467 if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
468 if ((msa).state_check_buff) xfree((msa).state_check_buff);\
472# define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
477#define MAX_PTR_NUM 100
479#define STACK_INIT(alloc_addr, heap_addr, ptr_num, stack_num) do {\
480 if (ptr_num > MAX_PTR_NUM) {\
481 alloc_addr = (char* )xmalloc(sizeof(OnigStackIndex) * (ptr_num));\
482 heap_addr = alloc_addr;\
484 stk_alloc = (OnigStackType* )(msa->stack_p);\
485 stk_base = stk_alloc;\
487 stk_end = stk_base + msa->stack_n;\
489 stk_alloc = (OnigStackType* )xalloca(sizeof(OnigStackType) * (stack_num));\
490 stk_base = stk_alloc;\
492 stk_end = stk_base + (stack_num);\
494 } else if (msa->stack_p) {\
495 alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num));\
497 stk_alloc = (OnigStackType* )(msa->stack_p);\
498 stk_base = stk_alloc;\
500 stk_end = stk_base + msa->stack_n;\
503 alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num)\
504 + sizeof(OnigStackType) * (stack_num));\
506 stk_alloc = (OnigStackType* )(alloc_addr + sizeof(OnigStackIndex) * (ptr_num));\
507 stk_base = stk_alloc;\
509 stk_end = stk_base + (stack_num);\
513#define STACK_SAVE do{\
514 if (stk_base != stk_alloc) {\
515 msa->stack_p = stk_base;\
516 msa->stack_n = stk_end - stk_base; \
520static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
523onig_get_match_stack_limit_size(
void)
525 return MatchStackLimitSize;
529onig_set_match_stack_limit_size(
unsigned int size)
531 MatchStackLimitSize = size;
542 stk_base = *arg_stk_base;
543 stk_end = *arg_stk_end;
546 n = stk_end - stk_base;
547 if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) {
551 return ONIGERR_MEMORY;
557 unsigned int limit_size = MatchStackLimitSize;
559 if (limit_size != 0 && n > limit_size) {
560 if ((
unsigned int )(stk_end - stk_base) == limit_size)
561 return ONIGERR_MATCH_STACK_LIMIT_OVER;
568 return ONIGERR_MEMORY;
571 *arg_stk = x + (stk - stk_base);
573 *arg_stk_end = x + n;
577#define STACK_ENSURE(n) do {\
578 if (stk_end - stk < (n)) {\
579 int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
582 if (xmalloc_base) xfree(xmalloc_base);\
588#define STACK_AT(index) (stk_base + (index))
589#define GET_STACK_INDEX(stk) ((stk) - stk_base)
591#define STACK_PUSH_TYPE(stack_type) do {\
593 stk->type = (stack_type);\
597#define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
599#ifdef USE_COMBINATION_EXPLOSION_CHECK
600# define STATE_CHECK_POS(s,snum) \
601 (((s) - str) * num_comb_exp_check + ((snum) - 1))
602# define STATE_CHECK_VAL(v,snum) do {\
603 if (state_check_buff != NULL) {\
604 ptrdiff_t x = STATE_CHECK_POS(s,snum);\
605 (v) = state_check_buff[x/8] & (1<<(x%8));\
611# define ELSE_IF_STATE_CHECK_MARK(stk) \
612 else if ((stk)->type == STK_STATE_CHECK_MARK) { \
613 ptrdiff_t x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
614 state_check_buff[x/8] |= (1<<(x%8)); \
617# define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
619 stk->type = (stack_type);\
620 stk->u.state.pcode = (pat);\
621 stk->u.state.pstr = (s);\
622 stk->u.state.pstr_prev = (sprev);\
623 stk->u.state.state_check = 0;\
624 stk->u.state.pkeep = (keep);\
628# define STACK_PUSH_ENSURED(stack_type,pat) do {\
629 stk->type = (stack_type);\
630 stk->u.state.pcode = (pat);\
631 stk->u.state.state_check = 0;\
635# define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum,keep) do {\
637 stk->type = STK_ALT;\
638 stk->u.state.pcode = (pat);\
639 stk->u.state.pstr = (s);\
640 stk->u.state.pstr_prev = (sprev);\
641 stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
642 stk->u.state.pkeep = (keep);\
646# define STACK_PUSH_STATE_CHECK(s,snum) do {\
647 if (state_check_buff != NULL) {\
649 stk->type = STK_STATE_CHECK_MARK;\
650 stk->u.state.pstr = (s);\
651 stk->u.state.state_check = (snum);\
658# define ELSE_IF_STATE_CHECK_MARK(stk)
660# define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
662 stk->type = (stack_type);\
663 stk->u.state.pcode = (pat);\
664 stk->u.state.pstr = (s);\
665 stk->u.state.pstr_prev = (sprev);\
666 stk->u.state.pkeep = (keep);\
670# define STACK_PUSH_ENSURED(stack_type,pat) do {\
671 stk->type = (stack_type);\
672 stk->u.state.pcode = (pat);\
677#define STACK_PUSH_ALT(pat,s,sprev,keep) STACK_PUSH(STK_ALT,pat,s,sprev,keep)
678#define STACK_PUSH_POS(s,sprev,keep) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev,keep)
679#define STACK_PUSH_POS_NOT(pat,s,sprev,keep) STACK_PUSH(STK_POS_NOT,pat,s,sprev,keep)
680#define STACK_PUSH_ABSENT STACK_PUSH_TYPE(STK_ABSENT)
681#define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT)
682#define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev,keep) \
683 STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev,keep)
685#define STACK_PUSH_REPEAT(id, pat) do {\
687 stk->type = STK_REPEAT;\
688 stk->u.repeat.num = (id);\
689 stk->u.repeat.pcode = (pat);\
690 stk->u.repeat.count = 0;\
694#define STACK_PUSH_REPEAT_INC(sindex) do {\
696 stk->type = STK_REPEAT_INC;\
697 stk->u.repeat_inc.si = (sindex);\
701#define STACK_PUSH_MEM_START(mnum, s) do {\
703 stk->type = STK_MEM_START;\
704 stk->u.mem.num = (mnum);\
705 stk->u.mem.pstr = (s);\
706 stk->u.mem.start = mem_start_stk[mnum];\
707 stk->u.mem.end = mem_end_stk[mnum];\
708 mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
709 mem_end_stk[mnum] = INVALID_STACK_INDEX;\
713#define STACK_PUSH_MEM_END(mnum, s) do {\
715 stk->type = STK_MEM_END;\
716 stk->u.mem.num = (mnum);\
717 stk->u.mem.pstr = (s);\
718 stk->u.mem.start = mem_start_stk[mnum];\
719 stk->u.mem.end = mem_end_stk[mnum];\
720 mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
724#define STACK_PUSH_MEM_END_MARK(mnum) do {\
726 stk->type = STK_MEM_END_MARK;\
727 stk->u.mem.num = (mnum);\
731#define STACK_GET_MEM_START(mnum, k) do {\
734 while (k > stk_base) {\
736 if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
737 && k->u.mem.num == (mnum)) {\
740 else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
741 if (level == 0) break;\
747#define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
750 if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
751 if (level == 0) (start) = k->u.mem.pstr;\
754 else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
757 (end) = k->u.mem.pstr;\
765#define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
767 stk->type = STK_NULL_CHECK_START;\
768 stk->u.null_check.num = (cnum);\
769 stk->u.null_check.pstr = (s);\
773#define STACK_PUSH_NULL_CHECK_END(cnum) do {\
775 stk->type = STK_NULL_CHECK_END;\
776 stk->u.null_check.num = (cnum);\
780#define STACK_PUSH_CALL_FRAME(pat) do {\
782 stk->type = STK_CALL_FRAME;\
783 stk->u.call_frame.ret_addr = (pat);\
787#define STACK_PUSH_RETURN do {\
789 stk->type = STK_RETURN;\
793#define STACK_PUSH_ABSENT_POS(start, end) do {\
795 stk->type = STK_ABSENT_POS;\
796 stk->u.absent_pos.abs_pstr = (start);\
797 stk->u.absent_pos.end_pstr = (end);\
803# define STACK_BASE_CHECK(p, at) \
804 if ((p) < stk_base) {\
805 fprintf(stderr, "at %s\n", at);\
809# define STACK_BASE_CHECK(p, at)
812#define STACK_POP_ONE do {\
814 STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
817#define STACK_POP do {\
818 switch (pop_level) {\
819 case STACK_POP_LEVEL_FREE:\
822 STACK_BASE_CHECK(stk, "STACK_POP"); \
823 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
824 ELSE_IF_STATE_CHECK_MARK(stk);\
827 case STACK_POP_LEVEL_MEM_START:\
830 STACK_BASE_CHECK(stk, "STACK_POP 2"); \
831 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
832 else if (stk->type == STK_MEM_START) {\
833 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
834 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
836 ELSE_IF_STATE_CHECK_MARK(stk);\
842 STACK_BASE_CHECK(stk, "STACK_POP 3"); \
843 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
844 else if (stk->type == STK_MEM_START) {\
845 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
846 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
848 else if (stk->type == STK_REPEAT_INC) {\
849 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
851 else if (stk->type == STK_MEM_END) {\
852 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
853 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
855 ELSE_IF_STATE_CHECK_MARK(stk);\
861#define STACK_POP_TIL_POS_NOT do {\
864 STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
865 if (stk->type == STK_POS_NOT) break;\
866 else if (stk->type == STK_MEM_START) {\
867 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
868 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
870 else if (stk->type == STK_REPEAT_INC) {\
871 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
873 else if (stk->type == STK_MEM_END) {\
874 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
875 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
877 ELSE_IF_STATE_CHECK_MARK(stk);\
881#define STACK_POP_TIL_LOOK_BEHIND_NOT do {\
884 STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
885 if (stk->type == STK_LOOK_BEHIND_NOT) break;\
886 else if (stk->type == STK_MEM_START) {\
887 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
888 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
890 else if (stk->type == STK_REPEAT_INC) {\
891 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
893 else if (stk->type == STK_MEM_END) {\
894 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
895 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
897 ELSE_IF_STATE_CHECK_MARK(stk);\
901#define STACK_POP_TIL_ABSENT do {\
904 STACK_BASE_CHECK(stk, "STACK_POP_TIL_ABSENT"); \
905 if (stk->type == STK_ABSENT) break;\
906 else if (stk->type == STK_MEM_START) {\
907 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
908 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
910 else if (stk->type == STK_REPEAT_INC) {\
911 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
913 else if (stk->type == STK_MEM_END) {\
914 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
915 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
917 ELSE_IF_STATE_CHECK_MARK(stk);\
921#define STACK_POP_ABSENT_POS(start, end) do {\
923 STACK_BASE_CHECK(stk, "STACK_POP_ABSENT_POS"); \
924 (start) = stk->u.absent_pos.abs_pstr;\
925 (end) = stk->u.absent_pos.end_pstr;\
928#define STACK_POS_END(k) do {\
932 STACK_BASE_CHECK(k, "STACK_POS_END"); \
933 if (IS_TO_VOID_TARGET(k)) {\
936 else if (k->type == STK_POS) {\
943#define STACK_STOP_BT_END do {\
944 OnigStackType *k = stk;\
947 STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
948 if (IS_TO_VOID_TARGET(k)) {\
951 else if (k->type == STK_STOP_BT) {\
958#define STACK_NULL_CHECK(isnull,id,s) do {\
959 OnigStackType* k = stk;\
962 STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
963 if (k->type == STK_NULL_CHECK_START) {\
964 if (k->u.null_check.num == (id)) {\
965 (isnull) = (k->u.null_check.pstr == (s));\
972#define STACK_NULL_CHECK_REC(isnull,id,s) do {\
974 OnigStackType* k = stk;\
977 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
978 if (k->type == STK_NULL_CHECK_START) {\
979 if (k->u.null_check.num == (id)) {\
981 (isnull) = (k->u.null_check.pstr == (s));\
987 else if (k->type == STK_NULL_CHECK_END) {\
993#define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
994 OnigStackType* k = stk;\
997 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
998 if (k->type == STK_NULL_CHECK_START) {\
999 if (k->u.null_check.num == (id)) {\
1000 if (k->u.null_check.pstr != (s)) {\
1008 if (k->type == STK_MEM_START) {\
1009 if (k->u.mem.end == INVALID_STACK_INDEX) {\
1010 (isnull) = 0; break;\
1012 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
1013 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
1015 endp = (UChar* )k->u.mem.end;\
1016 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
1017 (isnull) = 0; break;\
1019 else if (endp != s) {\
1032#define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
1034 OnigStackType* k = stk;\
1037 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
1038 if (k->type == STK_NULL_CHECK_START) {\
1039 if (k->u.null_check.num == (id)) {\
1041 if (k->u.null_check.pstr != (s)) {\
1049 if (k->type == STK_MEM_START) {\
1050 if (k->u.mem.end == INVALID_STACK_INDEX) {\
1051 (isnull) = 0; break;\
1053 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
1054 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
1056 endp = (UChar* )k->u.mem.end;\
1057 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
1058 (isnull) = 0; break;\
1060 else if (endp != s) {\
1074 else if (k->type == STK_NULL_CHECK_END) {\
1075 if (k->u.null_check.num == (id)) level++;\
1080#define STACK_GET_REPEAT(id, k) do {\
1085 STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
1086 if (k->type == STK_REPEAT) {\
1088 if (k->u.repeat.num == (id)) {\
1093 else if (k->type == STK_CALL_FRAME) level--;\
1094 else if (k->type == STK_RETURN) level++;\
1098#define STACK_RETURN(addr) do {\
1100 OnigStackType* k = stk;\
1103 STACK_BASE_CHECK(k, "STACK_RETURN"); \
1104 if (k->type == STK_CALL_FRAME) {\
1106 (addr) = k->u.call_frame.ret_addr;\
1111 else if (k->type == STK_RETURN)\
1117#define STRING_CMP(s1,s2,len) do {\
1118 while (len-- > 0) {\
1119 if (*s1++ != *s2++) goto fail;\
1123#define STRING_CMP_IC(case_fold_flag,s1,ps2,len,text_end) do {\
1124 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
1128static int string_cmp_ic(
OnigEncoding enc,
int case_fold_flag,
1129 UChar* s1, UChar** ps2, OnigDistance mblen,
const UChar* text_end)
1131 UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1132 UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1133 UChar *p1, *p2, *end1, *s2;
1139 len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, text_end, buf1);
1140 len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, text_end, buf2);
1141 if (len1 != len2)
return 0;
1144 while (len1-- > 0) {
1145 if (*p1 != *p2)
return 0;
1155#define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
1157 while (len-- > 0) {\
1158 if (*s1++ != *s2++) {\
1159 is_fail = 1; break;\
1164#define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,text_end,is_fail) do {\
1165 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
1172#define IS_EMPTY_STR (str == end)
1173#define ON_STR_BEGIN(s) ((s) == str)
1174#define ON_STR_END(s) ((s) == end)
1175#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1176# define DATA_ENSURE_CHECK1 (s < right_range)
1177# define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
1178# define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
1179# define DATA_ENSURE_CONTINUE(n) if (s + (n) > right_range) continue
1180# define ABSENT_END_POS right_range
1182# define DATA_ENSURE_CHECK1 (s < end)
1183# define DATA_ENSURE_CHECK(n) (s + (n) <= end)
1184# define DATA_ENSURE(n) if (s + (n) > end) goto fail
1185# define DATA_ENSURE_CONTINUE(n) if (s + (n) > end) continue
1186# define ABSENT_END_POS end
1190#ifdef USE_CAPTURE_HISTORY
1192make_capture_history_tree(OnigCaptureTreeNode* node,
OnigStackType** kp,
1196 OnigCaptureTreeNode* child;
1199 while (k < stk_top) {
1200 if (k->type == STK_MEM_START) {
1202 if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
1203 BIT_STATUS_AT(reg->capture_history, n) != 0) {
1204 child = history_node_new();
1205 CHECK_NULL_RETURN_MEMERR(child);
1207 child->beg = k->u.mem.pstr - str;
1208 r = history_tree_add_child(node, child);
1210 history_tree_free(child);
1214 r = make_capture_history_tree(child, kp, stk_top, str, reg);
1215 if (r != 0)
return r;
1218 child->end = k->u.mem.pstr - str;
1221 else if (k->type == STK_MEM_END) {
1222 if (k->u.mem.num == node->group) {
1223 node->end = k->u.mem.pstr - str;
1235#ifdef USE_BACKREF_WITH_LEVEL
1236static int mem_is_in_memp(
int mem,
int num, UChar* memp)
1241 for (i = 0; i < num; i++) {
1242 GET_MEMNUM_INC(m, memp);
1243 if (mem == (
int )m)
return 1;
1248static int backref_match_at_nested_level(
regex_t* reg,
1250 int ignore_case,
int case_fold_flag,
1251 int nest,
int mem_num, UChar* memp, UChar** s,
const UChar* send)
1253 UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
1260 while (k >= stk_base) {
1261 if (k->type == STK_CALL_FRAME) {
1264 else if (k->type == STK_RETURN) {
1267 else if (level == nest) {
1268 if (k->type == STK_MEM_START) {
1269 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1270 pstart = k->u.mem.pstr;
1271 if (pend != NULL_UCHARP) {
1272 if (pend - pstart > send - *s)
return 0;
1276 if (ignore_case != 0) {
1277 if (string_cmp_ic(reg->enc, case_fold_flag,
1278 pstart, &ss, pend - pstart, send) == 0)
1283 if (*p++ != *ss++)
return 0;
1292 else if (k->type == STK_MEM_END) {
1293 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1294 pend = k->u.mem.pstr;
1306#ifdef ONIG_DEBUG_STATISTICS
1309# include <windows.h>
1310static LARGE_INTEGER ts, te, freq;
1311# define GETTIME(t) QueryPerformanceCounter(&(t))
1312# define TIMEDIFF(te,ts) (unsigned long )(((te).QuadPart - (ts).QuadPart) \
1313 * 1000000 / freq.QuadPart)
1316# define USE_TIMEOFDAY
1318# ifdef USE_TIMEOFDAY
1319# ifdef HAVE_SYS_TIME_H
1320# include <sys/time.h>
1322# ifdef HAVE_UNISTD_H
1326# define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
1327# define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
1328 (((te).tv_sec - (ts).tv_sec)*1000000))
1330# ifdef HAVE_SYS_TIMES_H
1331# include <sys/times.h>
1333static struct tms ts, te;
1334# define GETTIME(t) times(&(t))
1335# define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
1340static int OpCounter[256];
1341static int OpPrevCounter[256];
1342static unsigned long OpTime[256];
1343static int OpCurr = OP_FINISH;
1344static int OpPrevTarget = OP_FAIL;
1345static int MaxStackDepth = 0;
1347# define MOP_IN(opcode) do {\
1348 if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
1350 OpCounter[opcode]++;\
1354# define MOP_OUT do {\
1356 OpTime[OpCurr] += TIMEDIFF(te, ts);\
1360onig_statistics_init(
void)
1363 for (i = 0; i < 256; i++) {
1364 OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
1368 QueryPerformanceFrequency(&freq);
1373onig_print_statistics(
FILE* f)
1376 fprintf(f,
" count prev time\n");
1377 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
1378 fprintf(f,
"%8d: %8d: %10lu: %s\n",
1379 OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
1381 fprintf(f,
"\nmax stack depth: %d\n", MaxStackDepth);
1384# define STACK_INC do {\
1386 if (stk - stk_base > MaxStackDepth) \
1387 MaxStackDepth = stk - stk_base;\
1391# define STACK_INC stk++
1393# define MOP_IN(opcode)
1398#ifdef ONIG_DEBUG_MATCH
1400stack_type_str(
int stack_type)
1402 switch (stack_type) {
1403 case STK_ALT:
return "Alt ";
1404 case STK_LOOK_BEHIND_NOT:
return "LBNot ";
1405 case STK_POS_NOT:
return "PosNot";
1406 case STK_MEM_START:
return "MemS ";
1407 case STK_MEM_END:
return "MemE ";
1408 case STK_REPEAT_INC:
return "RepInc";
1409 case STK_STATE_CHECK_MARK:
return "StChMk";
1410 case STK_NULL_CHECK_START:
return "NulChS";
1411 case STK_NULL_CHECK_END:
return "NulChE";
1412 case STK_MEM_END_MARK:
return "MemEMk";
1413 case STK_POS:
return "Pos ";
1414 case STK_STOP_BT:
return "StopBt";
1415 case STK_REPEAT:
return "Rep ";
1416 case STK_CALL_FRAME:
return "Call ";
1417 case STK_RETURN:
return "Ret ";
1418 case STK_VOID:
return "Void ";
1419 case STK_ABSENT_POS:
return "AbsPos";
1420 case STK_ABSENT:
return "Absent";
1421 default:
return " ";
1429match_at(
regex_t* reg,
const UChar* str,
const UChar* end,
1430#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1431 const UChar* right_range,
1435 static const UChar FinishCode[] = { OP_FINISH };
1437 int i, num_mem, pop_level;
1438 ptrdiff_t n, best_len;
1439 LengthType tlen, tlen2;
1442 OnigOptionType option = reg->options;
1444 OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
1445 UChar *s, *q, *sbegin;
1449 char *xmalloc_base = NULL;
1453 OnigStackIndex *repeat_stk;
1454 OnigStackIndex *mem_start_stk, *mem_end_stk;
1455#ifdef USE_COMBINATION_EXPLOSION_CHECK
1457 unsigned char* state_check_buff = msa->state_check_buff;
1458 int num_comb_exp_check = reg->num_comb_exp_check;
1461#if USE_TOKEN_THREADED_VM
1463# define VM_LOOP JUMP;
1465# define CASE(x) L_##x: sbegin = s; OPCODE_EXEC_HOOK;
1466# define DEFAULT L_DEFAULT:
1467# define NEXT sprev = sbegin; JUMP
1468# define JUMP RB_GNUC_EXTENSION_BLOCK(goto *oplabels[*p++])
1494 &&L_OP_CCLASS_MB_NOT,
1495 &&L_OP_CCLASS_MIX_NOT,
1499 &&L_OP_ANYCHAR_STAR,
1500 &&L_OP_ANYCHAR_ML_STAR,
1501 &&L_OP_ANYCHAR_STAR_PEEK_NEXT,
1502 &&L_OP_ANYCHAR_ML_STAR_PEEK_NEXT,
1507 &&L_OP_NOT_WORD_BOUND,
1508# ifdef USE_WORD_BEGIN_END
1516 &&L_OP_NOT_ASCII_WORD,
1517 &&L_OP_ASCII_WORD_BOUND,
1518 &&L_OP_NOT_ASCII_WORD_BOUND,
1519# ifdef USE_WORD_BEGIN_END
1520 &&L_OP_ASCII_WORD_BEGIN,
1521 &&L_OP_ASCII_WORD_END,
1531 &&L_OP_SEMI_END_BUF,
1532 &&L_OP_BEGIN_POSITION,
1538 &&L_OP_BACKREF_MULTI,
1539 &&L_OP_BACKREF_MULTI_IC,
1540# ifdef USE_BACKREF_WITH_LEVEL
1541 &&L_OP_BACKREF_WITH_LEVEL,
1545 &&L_OP_MEMORY_START,
1546 &&L_OP_MEMORY_START_PUSH,
1547 &&L_OP_MEMORY_END_PUSH,
1548# ifdef USE_SUBEXP_CALL
1549 &&L_OP_MEMORY_END_PUSH_REC,
1554# ifdef USE_SUBEXP_CALL
1555 &&L_OP_MEMORY_END_REC,
1566# ifdef USE_OP_PUSH_OR_JUMP_EXACT
1567 &&L_OP_PUSH_OR_JUMP_EXACT1,
1571 &&L_OP_PUSH_IF_PEEK_NEXT,
1575 &&L_OP_REPEAT_INC_NG,
1576 &&L_OP_REPEAT_INC_SG,
1577 &&L_OP_REPEAT_INC_NG_SG,
1578 &&L_OP_NULL_CHECK_START,
1579 &&L_OP_NULL_CHECK_END,
1580# ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
1581 &&L_OP_NULL_CHECK_END_MEMST,
1585# ifdef USE_SUBEXP_CALL
1586 &&L_OP_NULL_CHECK_END_MEMST_PUSH,
1593 &&L_OP_PUSH_POS_NOT,
1595 &&L_OP_PUSH_STOP_BT,
1598 &&L_OP_PUSH_LOOK_BEHIND_NOT,
1599 &&L_OP_FAIL_LOOK_BEHIND_NOT,
1600 &&L_OP_PUSH_ABSENT_POS,
1604# ifdef USE_SUBEXP_CALL
1613# ifdef USE_COMBINATION_EXPLOSION_CHECK
1614 &&L_OP_STATE_CHECK_PUSH,
1615 &&L_OP_STATE_CHECK_PUSH_OR_JUMP,
1622# ifdef USE_COMBINATION_EXPLOSION_CHECK
1623 &&L_OP_STATE_CHECK_ANYCHAR_STAR,
1624 &&L_OP_STATE_CHECK_ANYCHAR_ML_STAR,
1631 &&L_OP_SET_OPTION_PUSH,
1646# define VM_LOOP_END } sprev = sbegin; }
1647# define CASE(x) case x:
1648# define DEFAULT default:
1650# define JUMP continue; break
1654#ifdef USE_SUBEXP_CALL
1657# define ADD_NUMMEM 1
1660# define ADD_NUMMEM 0
1663 n = reg->num_repeat + (reg->num_mem + ADD_NUMMEM) * 2;
1665 STACK_INIT(alloca_base, xmalloc_base, n, INIT_MATCH_STACK_SIZE);
1666 pop_level = reg->stack_pop_level;
1667 num_mem = reg->num_mem;
1668 repeat_stk = (OnigStackIndex* )alloca_base;
1670 mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);
1671 mem_end_stk = mem_start_stk + (num_mem + ADD_NUMMEM);
1673 OnigStackIndex *pp = mem_start_stk;
1674 for (; pp < repeat_stk + n; pp += 2) {
1675 pp[0] = INVALID_STACK_INDEX;
1676 pp[1] = INVALID_STACK_INDEX;
1679#ifndef USE_SUBEXP_CALL
1686#ifdef ONIG_DEBUG_MATCH
1687 fprintf(stderr,
"match_at: str: %"PRIuPTR
" (%p), end: %"PRIuPTR
" (%p), start: %"PRIuPTR
" (%p), sprev: %"PRIuPTR
" (%p)\n",
1688 (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )sstart, sstart, (uintptr_t )sprev, sprev);
1689 fprintf(stderr,
"size: %d, start offset: %d\n",
1690 (
int )(end - str), (
int )(sstart - str));
1691 fprintf(stderr,
"\n ofs> str stk:type addr:opcode\n");
1694 STACK_PUSH_ENSURED(STK_ALT, (UChar* )FinishCode);
1695 best_len = ONIG_MISMATCH;
1696 s = (UChar* )sstart;
1697 pkeep = (UChar* )sstart;
1700#ifdef ONIG_DEBUG_MATCH
1701# define OPCODE_EXEC_HOOK \
1703 UChar *op, *q, *bp, buf[50]; \
1705 op = p - OP_OFFSET; \
1706 fprintf(stderr, "%4"PRIdPTR"> \"", (*op == OP_FINISH) ? (ptrdiff_t )-1 : s - str); \
1709 if (*op != OP_FINISH) {
\
1710 for (i = 0; i < 7 && q < end; i++) { \
1711 len = enclen(encode, q, end); \
1712 while (len-- > 0) *bp++ = *q++; \
1714 if (q < end) { xmemcpy(bp, "...", 3); bp += 3; } \
1716 xmemcpy(bp, "\"", 1); bp += 1; \
1718 fputs((char* )buf, stderr); \
1719 for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); \
1720 fprintf(stderr, "%4"PRIdPTR":%s %4"PRIdPTR":", \
1721 stk - stk_base - 1, \
1722 (stk > stk_base) ? stack_type_str(stk[-1].type) : " ", \
1723 (op == FinishCode) ? (ptrdiff_t )-1 : op - reg->p); \
1724 onig_print_compiled_byte_code(stderr, op, reg->p+reg->used, NULL, encode); \
1725 fprintf(stderr, "\n"); \
1728# define OPCODE_EXEC_HOOK ((void) 0)
1733 CASE(OP_END) MOP_IN(OP_END);
1737#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1738 if (IS_FIND_LONGEST(option)) {
1739 if (n > msa->best_len) {
1741 msa->best_s = (UChar* )sstart;
1748 region = msa->region;
1750 region->beg[0] = ((pkeep > s) ? s : pkeep) - str;
1751 region->end[0] = s - str;
1752 for (i = 1; i <= num_mem; i++) {
1753 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
1754 if (BIT_STATUS_AT(reg->bt_mem_start, i))
1755 region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
1757 region->beg[i] = (UChar* )((
void* )mem_start_stk[i]) - str;
1759 region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
1760 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
1761 : (UChar* )((
void* )mem_end_stk[i])) - str;
1764 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
1768#ifdef USE_CAPTURE_HISTORY
1769 if (reg->capture_history != 0) {
1771 OnigCaptureTreeNode* node;
1773 if (IS_NULL(region->history_root)) {
1774 region->history_root = node = history_node_new();
1775 CHECK_NULL_RETURN_MEMERR(node);
1778 node = region->history_root;
1779 history_tree_clear(node);
1783 node->beg = ((pkeep > s) ? s : pkeep) - str;
1784 node->end = s - str;
1787 r = make_capture_history_tree(region->history_root, &stkp,
1788 stk, (UChar* )str, reg);
1798#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1803 if (IS_FIND_CONDITION(option)) {
1804 if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
1805 best_len = ONIG_MISMATCH;
1808 if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
1817 CASE(OP_EXACT1) MOP_IN(OP_EXACT1);
1819 if (*p != *s)
goto fail;
1824 CASE(OP_EXACT1_IC) MOP_IN(OP_EXACT1_IC);
1827 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1830 len = ONIGENC_MBC_CASE_FOLD(encode,
1846 CASE(OP_EXACT2) MOP_IN(OP_EXACT2);
1848 if (*p != *s)
goto fail;
1850 if (*p != *s)
goto fail;
1856 CASE(OP_EXACT3) MOP_IN(OP_EXACT3);
1858 if (*p != *s)
goto fail;
1860 if (*p != *s)
goto fail;
1862 if (*p != *s)
goto fail;
1868 CASE(OP_EXACT4) MOP_IN(OP_EXACT4);
1870 if (*p != *s)
goto fail;
1872 if (*p != *s)
goto fail;
1874 if (*p != *s)
goto fail;
1876 if (*p != *s)
goto fail;
1882 CASE(OP_EXACT5) MOP_IN(OP_EXACT5);
1884 if (*p != *s)
goto fail;
1886 if (*p != *s)
goto fail;
1888 if (*p != *s)
goto fail;
1890 if (*p != *s)
goto fail;
1892 if (*p != *s)
goto fail;
1898 CASE(OP_EXACTN) MOP_IN(OP_EXACTN);
1899 GET_LENGTH_INC(tlen, p);
1901 while (tlen-- > 0) {
1902 if (*p++ != *s++)
goto fail;
1908 CASE(OP_EXACTN_IC) MOP_IN(OP_EXACTN_IC);
1911 UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1913 GET_LENGTH_INC(tlen, p);
1919 len = ONIGENC_MBC_CASE_FOLD(encode,
1926 if (*p != *q)
goto fail;
1935 CASE(OP_EXACTMB2N1) MOP_IN(OP_EXACTMB2N1);
1937 if (*p != *s)
goto fail;
1939 if (*p != *s)
goto fail;
1944 CASE(OP_EXACTMB2N2) MOP_IN(OP_EXACTMB2N2);
1946 if (*p != *s)
goto fail;
1948 if (*p != *s)
goto fail;
1951 if (*p != *s)
goto fail;
1953 if (*p != *s)
goto fail;
1958 CASE(OP_EXACTMB2N3) MOP_IN(OP_EXACTMB2N3);
1960 if (*p != *s)
goto fail;
1962 if (*p != *s)
goto fail;
1964 if (*p != *s)
goto fail;
1966 if (*p != *s)
goto fail;
1969 if (*p != *s)
goto fail;
1971 if (*p != *s)
goto fail;
1976 CASE(OP_EXACTMB2N) MOP_IN(OP_EXACTMB2N);
1977 GET_LENGTH_INC(tlen, p);
1978 DATA_ENSURE(tlen * 2);
1979 while (tlen-- > 0) {
1980 if (*p != *s)
goto fail;
1982 if (*p != *s)
goto fail;
1989 CASE(OP_EXACTMB3N) MOP_IN(OP_EXACTMB3N);
1990 GET_LENGTH_INC(tlen, p);
1991 DATA_ENSURE(tlen * 3);
1992 while (tlen-- > 0) {
1993 if (*p != *s)
goto fail;
1995 if (*p != *s)
goto fail;
1997 if (*p != *s)
goto fail;
2004 CASE(OP_EXACTMBN) MOP_IN(OP_EXACTMBN);
2005 GET_LENGTH_INC(tlen, p);
2006 GET_LENGTH_INC(tlen2, p);
2009 while (tlen2-- > 0) {
2010 if (*p != *s)
goto fail;
2017 CASE(OP_CCLASS) MOP_IN(OP_CCLASS);
2019 if (BITSET_AT(((BitSetRef )p), *s) == 0)
goto fail;
2021 s += enclen(encode, s, end);
2025 CASE(OP_CCLASS_MB) MOP_IN(OP_CCLASS_MB);
2026 if (! ONIGENC_IS_MBC_HEAD(encode, s, end))
goto fail;
2029 GET_LENGTH_INC(tlen, p);
2036 mb_len = enclen(encode, s, end);
2037 DATA_ENSURE(mb_len);
2040 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
2042#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
2043 if (! onig_is_in_code_range(p, code))
goto fail;
2047 if (! onig_is_in_code_range(q, code))
goto fail;
2054 CASE(OP_CCLASS_MIX) MOP_IN(OP_CCLASS_MIX);
2056 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2061 if (BITSET_AT(((BitSetRef )p), *s) == 0)
2065 GET_LENGTH_INC(tlen, p);
2072 CASE(OP_CCLASS_NOT) MOP_IN(OP_CCLASS_NOT);
2074 if (BITSET_AT(((BitSetRef )p), *s) != 0)
goto fail;
2076 s += enclen(encode, s, end);
2080 CASE(OP_CCLASS_MB_NOT) MOP_IN(OP_CCLASS_MB_NOT);
2082 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2084 GET_LENGTH_INC(tlen, p);
2086 goto cc_mb_not_success;
2090 GET_LENGTH_INC(tlen, p);
2094 int mb_len = enclen(encode, s, end);
2096 if (! DATA_ENSURE_CHECK(mb_len)) {
2100 goto cc_mb_not_success;
2105 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
2107#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
2108 if (onig_is_in_code_range(p, code))
goto fail;
2112 if (onig_is_in_code_range(q, code))
goto fail;
2121 CASE(OP_CCLASS_MIX_NOT) MOP_IN(OP_CCLASS_MIX_NOT);
2123 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2128 if (BITSET_AT(((BitSetRef )p), *s) != 0)
2132 GET_LENGTH_INC(tlen, p);
2139 CASE(OP_ANYCHAR) MOP_IN(OP_ANYCHAR);
2141 n = enclen(encode, s, end);
2143 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0))
goto fail;
2148 CASE(OP_ANYCHAR_ML) MOP_IN(OP_ANYCHAR_ML);
2150 n = enclen(encode, s, end);
2156 CASE(OP_ANYCHAR_STAR) MOP_IN(OP_ANYCHAR_STAR);
2157 while (DATA_ENSURE_CHECK1) {
2158 STACK_PUSH_ALT(p, s, sprev, pkeep);
2159 n = enclen(encode, s, end);
2161 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0))
goto fail;
2168 CASE(OP_ANYCHAR_ML_STAR) MOP_IN(OP_ANYCHAR_ML_STAR);
2169 while (DATA_ENSURE_CHECK1) {
2170 STACK_PUSH_ALT(p, s, sprev, pkeep);
2171 n = enclen(encode, s, end);
2185 CASE(OP_ANYCHAR_STAR_PEEK_NEXT) MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
2186 while (DATA_ENSURE_CHECK1) {
2188 STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
2190 n = enclen(encode, s, end);
2192 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0))
goto fail;
2200 CASE(OP_ANYCHAR_ML_STAR_PEEK_NEXT)MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
2201 while (DATA_ENSURE_CHECK1) {
2203 STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
2205 n = enclen(encode, s, end);
2220#ifdef USE_COMBINATION_EXPLOSION_CHECK
2221 CASE(OP_STATE_CHECK_ANYCHAR_STAR) MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
2222 GET_STATE_CHECK_NUM_INC(mem, p);
2223 while (DATA_ENSURE_CHECK1) {
2224 STATE_CHECK_VAL(scv, mem);
2227 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
2228 n = enclen(encode, s, end);
2230 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0))
goto fail;
2237 CASE(OP_STATE_CHECK_ANYCHAR_ML_STAR)
2238 MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
2240 GET_STATE_CHECK_NUM_INC(mem, p);
2241 while (DATA_ENSURE_CHECK1) {
2242 STATE_CHECK_VAL(scv, mem);
2245 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
2246 n = enclen(encode, s, end);
2261 CASE(OP_WORD) MOP_IN(OP_WORD);
2263 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
2266 s += enclen(encode, s, end);
2270 CASE(OP_ASCII_WORD) MOP_IN(OP_ASCII_WORD);
2272 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2275 s += enclen(encode, s, end);
2279 CASE(OP_NOT_WORD) MOP_IN(OP_NOT_WORD);
2281 if (ONIGENC_IS_MBC_WORD(encode, s, end))
2284 s += enclen(encode, s, end);
2288 CASE(OP_NOT_ASCII_WORD) MOP_IN(OP_NOT_ASCII_WORD);
2290 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2293 s += enclen(encode, s, end);
2297 CASE(OP_WORD_BOUND) MOP_IN(OP_WORD_BOUND);
2298 if (ON_STR_BEGIN(s)) {
2300 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
2303 else if (ON_STR_END(s)) {
2304 if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
2308 if (ONIGENC_IS_MBC_WORD(encode, s, end)
2309 == ONIGENC_IS_MBC_WORD(encode, sprev, end))
2315 CASE(OP_ASCII_WORD_BOUND) MOP_IN(OP_ASCII_WORD_BOUND);
2316 if (ON_STR_BEGIN(s)) {
2318 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2321 else if (ON_STR_END(s)) {
2322 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
2326 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
2327 == ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
2333 CASE(OP_NOT_WORD_BOUND) MOP_IN(OP_NOT_WORD_BOUND);
2334 if (ON_STR_BEGIN(s)) {
2335 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end))
2338 else if (ON_STR_END(s)) {
2339 if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
2343 if (ONIGENC_IS_MBC_WORD(encode, s, end)
2344 != ONIGENC_IS_MBC_WORD(encode, sprev, end))
2350 CASE(OP_NOT_ASCII_WORD_BOUND) MOP_IN(OP_NOT_ASCII_WORD_BOUND);
2351 if (ON_STR_BEGIN(s)) {
2352 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2355 else if (ON_STR_END(s)) {
2356 if (ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
2360 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
2361 != ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
2367#ifdef USE_WORD_BEGIN_END
2368 CASE(OP_WORD_BEGIN) MOP_IN(OP_WORD_BEGIN);
2369 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) {
2370 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
2378 CASE(OP_ASCII_WORD_BEGIN) MOP_IN(OP_ASCII_WORD_BEGIN);
2379 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
2380 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
2388 CASE(OP_WORD_END) MOP_IN(OP_WORD_END);
2389 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
2390 if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
2398 CASE(OP_ASCII_WORD_END) MOP_IN(OP_ASCII_WORD_END);
2399 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
2400 if (ON_STR_END(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
2409 CASE(OP_BEGIN_BUF) MOP_IN(OP_BEGIN_BUF);
2410 if (! ON_STR_BEGIN(s))
goto fail;
2411 if (IS_NOTBOS(msa->options))
goto fail;
2416 CASE(OP_END_BUF) MOP_IN(OP_END_BUF);
2417 if (! ON_STR_END(s))
goto fail;
2418 if (IS_NOTEOS(msa->options))
goto fail;
2423 CASE(OP_BEGIN_LINE) MOP_IN(OP_BEGIN_LINE);
2424 if (ON_STR_BEGIN(s)) {
2425 if (IS_NOTBOL(msa->options))
goto fail;
2429 else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)
2430#ifdef USE_CRNL_AS_LINE_TERMINATOR
2431 && !(IS_NEWLINE_CRLF(option)
2432 && ONIGENC_IS_MBC_CRNL(encode, sprev, end))
2434 && !ON_STR_END(s)) {
2441 CASE(OP_END_LINE) MOP_IN(OP_END_LINE);
2442 if (ON_STR_END(s)) {
2443#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2444 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
2446 if (IS_NOTEOL(msa->options))
goto fail;
2449#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2453 else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
2460 CASE(OP_SEMI_END_BUF) MOP_IN(OP_SEMI_END_BUF);
2461 if (ON_STR_END(s)) {
2462#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2463 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
2465 if (IS_NOTEOL(msa->options))
goto fail;
2468#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2472 else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
2473 UChar* ss = s + enclen(encode, s, end);
2474 if (ON_STR_END(ss)) {
2478#ifdef USE_CRNL_AS_LINE_TERMINATOR
2479 else if (IS_NEWLINE_CRLF(option)
2480 && ONIGENC_IS_MBC_CRNL(encode, s, end)) {
2481 ss += enclen(encode, ss, end);
2482 if (ON_STR_END(ss)) {
2492 CASE(OP_BEGIN_POSITION) MOP_IN(OP_BEGIN_POSITION);
2499 CASE(OP_MEMORY_START_PUSH) MOP_IN(OP_MEMORY_START_PUSH);
2500 GET_MEMNUM_INC(mem, p);
2501 STACK_PUSH_MEM_START(mem, s);
2505 CASE(OP_MEMORY_START) MOP_IN(OP_MEMORY_START);
2506 GET_MEMNUM_INC(mem, p);
2507 mem_start_stk[mem] = (OnigStackIndex )((
void* )s);
2508 mem_end_stk[mem] = INVALID_STACK_INDEX;
2512 CASE(OP_MEMORY_END_PUSH) MOP_IN(OP_MEMORY_END_PUSH);
2513 GET_MEMNUM_INC(mem, p);
2514 STACK_PUSH_MEM_END(mem, s);
2518 CASE(OP_MEMORY_END) MOP_IN(OP_MEMORY_END);
2519 GET_MEMNUM_INC(mem, p);
2520 mem_end_stk[mem] = (OnigStackIndex )((
void* )s);
2524 CASE(OP_KEEP) MOP_IN(OP_KEEP);
2529#ifdef USE_SUBEXP_CALL
2530 CASE(OP_MEMORY_END_PUSH_REC) MOP_IN(OP_MEMORY_END_PUSH_REC);
2531 GET_MEMNUM_INC(mem, p);
2532 STACK_GET_MEM_START(mem, stkp);
2533 STACK_PUSH_MEM_END(mem, s);
2534 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
2538 CASE(OP_MEMORY_END_REC) MOP_IN(OP_MEMORY_END_REC);
2539 GET_MEMNUM_INC(mem, p);
2540 mem_end_stk[mem] = (OnigStackIndex )((
void* )s);
2541 STACK_GET_MEM_START(mem, stkp);
2543 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2544 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
2546 mem_start_stk[mem] = (OnigStackIndex )((
void* )stkp->u.mem.pstr);
2548 STACK_PUSH_MEM_END_MARK(mem);
2553 CASE(OP_BACKREF1) MOP_IN(OP_BACKREF1);
2558 CASE(OP_BACKREF2) MOP_IN(OP_BACKREF2);
2563 CASE(OP_BACKREFN) MOP_IN(OP_BACKREFN);
2564 GET_MEMNUM_INC(mem, p);
2568 UChar *pstart, *pend;
2572 if (mem > num_mem)
goto fail;
2573 if (mem_end_stk[mem] == INVALID_STACK_INDEX)
goto fail;
2574 if (mem_start_stk[mem] == INVALID_STACK_INDEX)
goto fail;
2576 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2577 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2579 pstart = (UChar* )((
void* )mem_start_stk[mem]);
2581 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2582 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2583 : (UChar* )((
void* )mem_end_stk[mem]));
2587 STRING_CMP(pstart, s, n);
2588 while (sprev + (len = enclen(encode, sprev, end)) < s)
2595 CASE(OP_BACKREFN_IC) MOP_IN(OP_BACKREFN_IC);
2596 GET_MEMNUM_INC(mem, p);
2599 UChar *pstart, *pend;
2603 if (mem > num_mem)
goto fail;
2604 if (mem_end_stk[mem] == INVALID_STACK_INDEX)
goto fail;
2605 if (mem_start_stk[mem] == INVALID_STACK_INDEX)
goto fail;
2607 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2608 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2610 pstart = (UChar* )((
void* )mem_start_stk[mem]);
2612 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2613 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2614 : (UChar* )((
void* )mem_end_stk[mem]));
2618 STRING_CMP_IC(case_fold_flag, pstart, &s, (
int)n, end);
2619 while (sprev + (len = enclen(encode, sprev, end)) < s)
2627 CASE(OP_BACKREF_MULTI) MOP_IN(OP_BACKREF_MULTI);
2630 UChar *pstart, *pend, *swork;
2632 GET_LENGTH_INC(tlen, p);
2633 for (i = 0; i < tlen; i++) {
2634 GET_MEMNUM_INC(mem, p);
2636 if (mem_end_stk[mem] == INVALID_STACK_INDEX)
continue;
2637 if (mem_start_stk[mem] == INVALID_STACK_INDEX)
continue;
2639 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2640 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2642 pstart = (UChar* )((
void* )mem_start_stk[mem]);
2644 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2645 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2646 : (UChar* )((
void* )mem_end_stk[mem]));
2648 DATA_ENSURE_CONTINUE(n);
2651 STRING_CMP_VALUE(pstart, swork, n, is_fail);
2652 if (is_fail)
continue;
2654 while (sprev + (len = enclen(encode, sprev, end)) < s)
2657 p += (SIZE_MEMNUM * (tlen - i - 1));
2660 if (i == tlen)
goto fail;
2666 CASE(OP_BACKREF_MULTI_IC) MOP_IN(OP_BACKREF_MULTI_IC);
2669 UChar *pstart, *pend, *swork;
2671 GET_LENGTH_INC(tlen, p);
2672 for (i = 0; i < tlen; i++) {
2673 GET_MEMNUM_INC(mem, p);
2675 if (mem_end_stk[mem] == INVALID_STACK_INDEX)
continue;
2676 if (mem_start_stk[mem] == INVALID_STACK_INDEX)
continue;
2678 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2679 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2681 pstart = (UChar* )((
void* )mem_start_stk[mem]);
2683 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2684 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2685 : (UChar* )((
void* )mem_end_stk[mem]));
2687 DATA_ENSURE_CONTINUE(n);
2690 STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, end, is_fail);
2691 if (is_fail)
continue;
2693 while (sprev + (len = enclen(encode, sprev, end)) < s)
2696 p += (SIZE_MEMNUM * (tlen - i - 1));
2699 if (i == tlen)
goto fail;
2704#ifdef USE_BACKREF_WITH_LEVEL
2705 CASE(OP_BACKREF_WITH_LEVEL)
2711 GET_OPTION_INC(ic, p);
2712 GET_LENGTH_INC(level, p);
2713 GET_LENGTH_INC(tlen, p);
2716 if (backref_match_at_nested_level(reg, stk, stk_base, ic,
2717 case_fold_flag, (
int )level, (
int )tlen, p, &s, end)) {
2718 while (sprev + (len = enclen(encode, sprev, end)) < s)
2721 p += (SIZE_MEMNUM * tlen);
2733 CASE(OP_SET_OPTION_PUSH) MOP_IN(OP_SET_OPTION_PUSH);
2734 GET_OPTION_INC(option, p);
2735 STACK_PUSH_ALT(p, s, sprev, pkeep);
2736 p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL;
2740 CASE(OP_SET_OPTION) MOP_IN(OP_SET_OPTION);
2741 GET_OPTION_INC(option, p);
2746 CASE(OP_NULL_CHECK_START) MOP_IN(OP_NULL_CHECK_START);
2747 GET_MEMNUM_INC(mem, p);
2748 STACK_PUSH_NULL_CHECK_START(mem, s);
2752 CASE(OP_NULL_CHECK_END) MOP_IN(OP_NULL_CHECK_END);
2756 GET_MEMNUM_INC(mem, p);
2757 STACK_NULL_CHECK(isnull, mem, s);
2759#ifdef ONIG_DEBUG_MATCH
2760 fprintf(stderr,
"NULL_CHECK_END: skip id:%d, s:%"PRIuPTR
" (%p)\n",
2761 (
int )mem, (uintptr_t )s, s);
2771 case OP_REPEAT_INC_NG:
2772 case OP_REPEAT_INC_SG:
2773 case OP_REPEAT_INC_NG_SG:
2777 goto unexpected_bytecode_error;
2785#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2786 CASE(OP_NULL_CHECK_END_MEMST) MOP_IN(OP_NULL_CHECK_END_MEMST);
2790 GET_MEMNUM_INC(mem, p);
2791 STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);
2793# ifdef ONIG_DEBUG_MATCH
2794 fprintf(stderr,
"NULL_CHECK_END_MEMST: skip id:%d, s:%"PRIuPTR
" (%p)\n",
2795 (
int )mem, (uintptr_t )s, s);
2797 if (isnull == -1)
goto fail;
2798 goto null_check_found;
2805#ifdef USE_SUBEXP_CALL
2806 CASE(OP_NULL_CHECK_END_MEMST_PUSH)
2807 MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
2811 GET_MEMNUM_INC(mem, p);
2812# ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2813 STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
2815 STACK_NULL_CHECK_REC(isnull, mem, s);
2818# ifdef ONIG_DEBUG_MATCH
2819 fprintf(stderr,
"NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%"PRIuPTR
" (%p)\n",
2820 (
int )mem, (uintptr_t )s, s);
2822 if (isnull == -1)
goto fail;
2823 goto null_check_found;
2826 STACK_PUSH_NULL_CHECK_END(mem);
2833 CASE(OP_JUMP) MOP_IN(OP_JUMP);
2834 GET_RELADDR_INC(addr, p);
2837 CHECK_INTERRUPT_IN_MATCH_AT;
2840 CASE(OP_PUSH) MOP_IN(OP_PUSH);
2841 GET_RELADDR_INC(addr, p);
2842 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
2846#ifdef USE_COMBINATION_EXPLOSION_CHECK
2847 CASE(OP_STATE_CHECK_PUSH) MOP_IN(OP_STATE_CHECK_PUSH);
2848 GET_STATE_CHECK_NUM_INC(mem, p);
2849 STATE_CHECK_VAL(scv, mem);
2852 GET_RELADDR_INC(addr, p);
2853 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
2857 CASE(OP_STATE_CHECK_PUSH_OR_JUMP) MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
2858 GET_STATE_CHECK_NUM_INC(mem, p);
2859 GET_RELADDR_INC(addr, p);
2860 STATE_CHECK_VAL(scv, mem);
2865 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
2870 CASE(OP_STATE_CHECK) MOP_IN(OP_STATE_CHECK);
2871 GET_STATE_CHECK_NUM_INC(mem, p);
2872 STATE_CHECK_VAL(scv, mem);
2875 STACK_PUSH_STATE_CHECK(s, mem);
2880 CASE(OP_POP) MOP_IN(OP_POP);
2885#ifdef USE_OP_PUSH_OR_JUMP_EXACT
2886 CASE(OP_PUSH_OR_JUMP_EXACT1) MOP_IN(OP_PUSH_OR_JUMP_EXACT1);
2887 GET_RELADDR_INC(addr, p);
2888 if (*p == *s && DATA_ENSURE_CHECK1) {
2890 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
2899 CASE(OP_PUSH_IF_PEEK_NEXT) MOP_IN(OP_PUSH_IF_PEEK_NEXT);
2900 GET_RELADDR_INC(addr, p);
2903 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
2911 CASE(OP_REPEAT) MOP_IN(OP_REPEAT);
2913 GET_MEMNUM_INC(mem, p);
2914 GET_RELADDR_INC(addr, p);
2917 repeat_stk[mem] = GET_STACK_INDEX(stk);
2918 STACK_PUSH_REPEAT(mem, p);
2920 if (reg->repeat_range[mem].lower == 0) {
2921 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
2927 CASE(OP_REPEAT_NG) MOP_IN(OP_REPEAT_NG);
2929 GET_MEMNUM_INC(mem, p);
2930 GET_RELADDR_INC(addr, p);
2933 repeat_stk[mem] = GET_STACK_INDEX(stk);
2934 STACK_PUSH_REPEAT(mem, p);
2936 if (reg->repeat_range[mem].lower == 0) {
2937 STACK_PUSH_ALT(p, s, sprev, pkeep);
2944 CASE(OP_REPEAT_INC) MOP_IN(OP_REPEAT_INC);
2945 GET_MEMNUM_INC(mem, p);
2946 si = repeat_stk[mem];
2947 stkp = STACK_AT(si);
2950 stkp->u.repeat.count++;
2951 if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
2954 else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
2955 STACK_PUSH_ALT(p, s, sprev, pkeep);
2956 p = STACK_AT(si)->u.repeat.pcode;
2959 p = stkp->u.repeat.pcode;
2961 STACK_PUSH_REPEAT_INC(si);
2963 CHECK_INTERRUPT_IN_MATCH_AT;
2966 CASE(OP_REPEAT_INC_SG) MOP_IN(OP_REPEAT_INC_SG);
2967 GET_MEMNUM_INC(mem, p);
2968 STACK_GET_REPEAT(mem, stkp);
2969 si = GET_STACK_INDEX(stkp);
2973 CASE(OP_REPEAT_INC_NG) MOP_IN(OP_REPEAT_INC_NG);
2974 GET_MEMNUM_INC(mem, p);
2975 si = repeat_stk[mem];
2976 stkp = STACK_AT(si);
2979 stkp->u.repeat.count++;
2980 if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
2981 if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
2982 UChar* pcode = stkp->u.repeat.pcode;
2984 STACK_PUSH_REPEAT_INC(si);
2985 STACK_PUSH_ALT(pcode, s, sprev, pkeep);
2988 p = stkp->u.repeat.pcode;
2989 STACK_PUSH_REPEAT_INC(si);
2992 else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
2993 STACK_PUSH_REPEAT_INC(si);
2996 CHECK_INTERRUPT_IN_MATCH_AT;
2999 CASE(OP_REPEAT_INC_NG_SG) MOP_IN(OP_REPEAT_INC_NG_SG);
3000 GET_MEMNUM_INC(mem, p);
3001 STACK_GET_REPEAT(mem, stkp);
3002 si = GET_STACK_INDEX(stkp);
3006 CASE(OP_PUSH_POS) MOP_IN(OP_PUSH_POS);
3007 STACK_PUSH_POS(s, sprev, pkeep);
3011 CASE(OP_POP_POS) MOP_IN(OP_POP_POS);
3013 STACK_POS_END(stkp);
3014 s = stkp->u.state.pstr;
3015 sprev = stkp->u.state.pstr_prev;
3020 CASE(OP_PUSH_POS_NOT) MOP_IN(OP_PUSH_POS_NOT);
3021 GET_RELADDR_INC(addr, p);
3022 STACK_PUSH_POS_NOT(p + addr, s, sprev, pkeep);
3026 CASE(OP_FAIL_POS) MOP_IN(OP_FAIL_POS);
3027 STACK_POP_TIL_POS_NOT;
3031 CASE(OP_PUSH_STOP_BT) MOP_IN(OP_PUSH_STOP_BT);
3036 CASE(OP_POP_STOP_BT) MOP_IN(OP_POP_STOP_BT);
3041 CASE(OP_LOOK_BEHIND) MOP_IN(OP_LOOK_BEHIND);
3042 GET_LENGTH_INC(tlen, p);
3043 s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (
int )tlen);
3044 if (IS_NULL(s))
goto fail;
3045 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
3049 CASE(OP_PUSH_LOOK_BEHIND_NOT) MOP_IN(OP_PUSH_LOOK_BEHIND_NOT);
3050 GET_RELADDR_INC(addr, p);
3051 GET_LENGTH_INC(tlen, p);
3052 q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (
int )tlen);
3060 STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev, pkeep);
3062 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
3067 CASE(OP_FAIL_LOOK_BEHIND_NOT) MOP_IN(OP_FAIL_LOOK_BEHIND_NOT);
3068 STACK_POP_TIL_LOOK_BEHIND_NOT;
3072 CASE(OP_PUSH_ABSENT_POS) MOP_IN(OP_PUSH_ABSENT_POS);
3074 STACK_PUSH_ABSENT_POS(s, ABSENT_END_POS);
3078 CASE(OP_ABSENT) MOP_IN(OP_ABSENT);
3080 const UChar* aend = ABSENT_END_POS;
3082 UChar* selfp = p - 1;
3084 STACK_POP_ABSENT_POS(absent, ABSENT_END_POS);
3085 GET_RELADDR_INC(addr, p);
3086#ifdef ONIG_DEBUG_MATCH
3087 fprintf(stderr,
"ABSENT: s:%p, end:%p, absent:%p, aend:%p\n", s, end, absent, aend);
3089 if ((absent > aend) && (s > absent)) {
3095 else if ((s >= aend) && (s > absent)) {
3106 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3107 n = enclen(encode, s, end);
3108 STACK_PUSH_ABSENT_POS(absent, ABSENT_END_POS);
3109 STACK_PUSH_ALT(selfp, s + n, s, pkeep);
3111 ABSENT_END_POS = aend;
3117 CASE(OP_ABSENT_END) MOP_IN(OP_ABSENT_END);
3120 if (sprev < ABSENT_END_POS)
3121 ABSENT_END_POS = sprev;
3122#ifdef ONIG_DEBUG_MATCH
3123 fprintf(stderr,
"ABSENT_END: end:%p\n", ABSENT_END_POS);
3125 STACK_POP_TIL_ABSENT;
3129#ifdef USE_SUBEXP_CALL
3130 CASE(OP_CALL) MOP_IN(OP_CALL);
3131 GET_ABSADDR_INC(addr, p);
3132 STACK_PUSH_CALL_FRAME(p);
3137 CASE(OP_RETURN) MOP_IN(OP_RETURN);
3144 CASE(OP_CONDITION) MOP_IN(OP_CONDITION);
3145 GET_MEMNUM_INC(mem, p);
3146 GET_RELADDR_INC(addr, p);
3147 if ((mem > num_mem) ||
3148 (mem_end_stk[mem] == INVALID_STACK_INDEX) ||
3149 (mem_start_stk[mem] == INVALID_STACK_INDEX)) {
3167 p = stk->u.state.pcode;
3168 s = stk->u.state.pstr;
3169 sprev = stk->u.state.pstr_prev;
3170 pkeep = stk->u.state.pkeep;
3172#ifdef USE_COMBINATION_EXPLOSION_CHECK
3173 if (stk->u.state.state_check != 0) {
3174 stk->type = STK_STATE_CHECK_MARK;
3183 goto bytecode_error;
3188 if (xmalloc_base)
xfree(xmalloc_base);
3194 if (xmalloc_base)
xfree(xmalloc_base);
3195 return ONIGERR_STACK_BUG;
3200 if (xmalloc_base)
xfree(xmalloc_base);
3201 return ONIGERR_UNDEFINED_BYTECODE;
3203 unexpected_bytecode_error:
3205 if (xmalloc_base)
xfree(xmalloc_base);
3206 return ONIGERR_UNEXPECTED_BYTECODE;
3211slow_search(
OnigEncoding enc, UChar* target, UChar* target_end,
3212 const UChar* text,
const UChar* text_end, UChar* text_range)
3214 UChar *t, *p, *s, *end;
3216 end = (UChar* )text_end;
3217 end -= target_end - target - 1;
3218 if (end > text_range)
3223 if (enc->max_enc_len == enc->min_enc_len) {
3224 int n = enc->max_enc_len;
3227 if (*s == *target) {
3230 if (target_end == t || memcmp(t, p, target_end - t) == 0)
3235 return (UChar* )NULL;
3238 if (*s == *target) {
3241 if (target_end == t || memcmp(t, p, target_end - t) == 0)
3244 s += enclen(enc, s, text_end);
3247 return (UChar* )NULL;
3251str_lower_case_match(
OnigEncoding enc,
int case_fold_flag,
3252 const UChar* t,
const UChar* tend,
3253 const UChar* p,
const UChar* end)
3256 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
3259 lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
3261 while (lowlen > 0) {
3262 if (*t++ != *q++)
return 0;
3272 UChar* target, UChar* target_end,
3273 const UChar* text,
const UChar* text_end, UChar* text_range)
3277 end = (UChar* )text_end;
3278 end -= target_end - target - 1;
3279 if (end > text_range)
3285 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3289 s += enclen(enc, s, text_end);
3292 return (UChar* )NULL;
3296slow_search_backward(
OnigEncoding enc, UChar* target, UChar* target_end,
3297 const UChar* text,
const UChar* adjust_text,
3298 const UChar* text_end,
const UChar* text_start)
3302 s = (UChar* )text_end;
3303 s -= (target_end - target);
3305 s = (UChar* )text_start;
3307 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
3310 if (*s == *target) {
3313 while (t < target_end) {
3318 if (t == target_end)
3321 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
3324 return (UChar* )NULL;
3328slow_search_backward_ic(
OnigEncoding enc,
int case_fold_flag,
3329 UChar* target, UChar* target_end,
3330 const UChar* text,
const UChar* adjust_text,
3331 const UChar* text_end,
const UChar* text_start)
3335 s = (UChar* )text_end;
3336 s -= (target_end - target);
3338 s = (UChar* )text_start;
3340 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
3343 if (str_lower_case_match(enc, case_fold_flag,
3344 target, target_end, s, text_end))
3347 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
3350 return (UChar* )NULL;
3353#ifndef USE_SUNDAY_QUICK_SEARCH
3356bm_search_notrev(
regex_t* reg,
const UChar* target,
const UChar* target_end,
3357 const UChar* text,
const UChar* text_end,
3358 const UChar* text_range)
3360 const UChar *s, *se, *t, *p, *end;
3362 ptrdiff_t skip, tlen1;
3364# ifdef ONIG_DEBUG_SEARCH
3365 fprintf(stderr,
"bm_search_notrev: text: %"PRIuPTR
" (%p), text_end: %"PRIuPTR
" (%p), text_range: %"PRIuPTR
" (%p)\n",
3366 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3369 tail = target_end - 1;
3370 tlen1 = tail - target;
3372 if (end + tlen1 > text_end)
3373 end = text_end - tlen1;
3377 if (IS_NULL(reg->int_map)) {
3382 if (t == target)
return (UChar* )s;
3385 skip = reg->map[*se];
3388 s += enclen(reg->enc, s, end);
3389 }
while ((s - t) < skip && s < end);
3393# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3398 if (t == target)
return (UChar* )s;
3401 skip = reg->int_map[*se];
3404 s += enclen(reg->enc, s, end);
3405 }
while ((s - t) < skip && s < end);
3410 return (UChar* )NULL;
3415bm_search(
regex_t* reg,
const UChar* target,
const UChar* target_end,
3416 const UChar* text,
const UChar* text_end,
const UChar* text_range)
3418 const UChar *s, *t, *p, *end;
3421# ifdef ONIG_DEBUG_SEARCH
3422 fprintf(stderr,
"bm_search: text: %"PRIuPTR
" (%p), text_end: %"PRIuPTR
" (%p), text_range: %"PRIuPTR
" (%p)\n",
3423 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3426 end = text_range + (target_end - target) - 1;
3430 tail = target_end - 1;
3431 s = text + (target_end - target) - 1;
3432 if (IS_NULL(reg->int_map)) {
3436# ifdef ONIG_DEBUG_SEARCH
3437 fprintf(stderr,
"bm_search_loop: pos: %"PRIdPTR
" %s\n",
3438 (intptr_t )(s - text), s);
3441 if (t == target)
return (UChar* )p;
3448# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3453 if (t == target)
return (UChar* )p;
3456 s += reg->int_map[*s];
3460 return (UChar* )NULL;
3465bm_search_notrev_ic(
regex_t* reg,
const UChar* target,
const UChar* target_end,
3466 const UChar* text,
const UChar* text_end,
3467 const UChar* text_range)
3469 const UChar *s, *se, *t, *end;
3471 ptrdiff_t skip, tlen1;
3473 int case_fold_flag = reg->case_fold_flag;
3475# ifdef ONIG_DEBUG_SEARCH
3476 fprintf(stderr,
"bm_search_notrev_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
3477 (
int )text, text, (
int )text_end, text_end, (
int )text_range, text_range);
3480 tail = target_end - 1;
3481 tlen1 = tail - target;
3483 if (end + tlen1 > text_end)
3484 end = text_end - tlen1;
3488 if (IS_NULL(reg->int_map)) {
3491 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3494 skip = reg->map[*se];
3497 s += enclen(reg->enc, s, end);
3498 }
while ((s - t) < skip && s < end);
3502# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3505 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3508 skip = reg->int_map[*se];
3511 s += enclen(reg->enc, s, end);
3512 }
while ((s - t) < skip && s < end);
3517 return (UChar* )NULL;
3522bm_search_ic(
regex_t* reg,
const UChar* target,
const UChar* target_end,
3523 const UChar* text,
const UChar* text_end,
const UChar* text_range)
3525 const UChar *s, *p, *end;
3528 int case_fold_flag = reg->case_fold_flag;
3530# ifdef ONIG_DEBUG_SEARCH
3531 fprintf(stderr,
"bm_search_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
3532 (
int )text, text, (
int )text_end, text_end, (
int )text_range, text_range);
3535 end = text_range + (target_end - target) - 1;
3539 tail = target_end - 1;
3540 s = text + (target_end - target) - 1;
3541 if (IS_NULL(reg->int_map)) {
3543 p = s - (target_end - target) + 1;
3544 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3551# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3553 p = s - (target_end - target) + 1;
3554 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3557 s += reg->int_map[*s];
3561 return (UChar* )NULL;
3568bm_search_notrev(
regex_t* reg,
const UChar* target,
const UChar* target_end,
3569 const UChar* text,
const UChar* text_end,
3570 const UChar* text_range)
3572 const UChar *s, *se, *t, *p, *end;
3574 ptrdiff_t skip, tlen1;
3577# ifdef ONIG_DEBUG_SEARCH
3578 fprintf(stderr,
"bm_search_notrev: text: %"PRIuPTR
" (%p), text_end: %"PRIuPTR
" (%p), text_range: %"PRIuPTR
" (%p)\n",
3579 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3582 tail = target_end - 1;
3583 tlen1 = tail - target;
3585 if (end + tlen1 > text_end)
3586 end = text_end - tlen1;
3590 if (IS_NULL(reg->int_map)) {
3595 if (t == target)
return (UChar* )s;
3598 if (s + 1 >= end)
break;
3599 skip = reg->map[se[1]];
3602 s += enclen(enc, s, end);
3603 }
while ((s - t) < skip && s < end);
3607# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3612 if (t == target)
return (UChar* )s;
3615 if (s + 1 >= end)
break;
3616 skip = reg->int_map[se[1]];
3619 s += enclen(enc, s, end);
3620 }
while ((s - t) < skip && s < end);
3625 return (UChar* )NULL;
3630bm_search(
regex_t* reg,
const UChar* target,
const UChar* target_end,
3631 const UChar* text,
const UChar* text_end,
const UChar* text_range)
3633 const UChar *s, *t, *p, *end;
3637# ifdef ONIG_DEBUG_SEARCH
3638 fprintf(stderr,
"bm_search: text: %"PRIuPTR
" (%p), text_end: %"PRIuPTR
" (%p), text_range: %"PRIuPTR
" (%p)\n",
3639 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3642 tail = target_end - 1;
3643 tlen1 = tail - target;
3644 end = text_range + tlen1;
3649 if (IS_NULL(reg->int_map)) {
3654 if (t == target)
return (UChar* )p;
3657 if (s + 1 >= end)
break;
3658 s += reg->map[s[1]];
3662# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3667 if (t == target)
return (UChar* )p;
3670 if (s + 1 >= end)
break;
3671 s += reg->int_map[s[1]];
3675 return (UChar* )NULL;
3680bm_search_notrev_ic(
regex_t* reg,
const UChar* target,
const UChar* target_end,
3681 const UChar* text,
const UChar* text_end,
3682 const UChar* text_range)
3684 const UChar *s, *se, *t, *end;
3686 ptrdiff_t skip, tlen1;
3688 int case_fold_flag = reg->case_fold_flag;
3690# ifdef ONIG_DEBUG_SEARCH
3691 fprintf(stderr,
"bm_search_notrev_ic: text: %"PRIuPTR
" (%p), text_end: %"PRIuPTR
" (%p), text_range: %"PRIuPTR
" (%p)\n",
3692 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3695 tail = target_end - 1;
3696 tlen1 = tail - target;
3698 if (end + tlen1 > text_end)
3699 end = text_end - tlen1;
3703 if (IS_NULL(reg->int_map)) {
3706 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3709 if (s + 1 >= end)
break;
3710 skip = reg->map[se[1]];
3713 s += enclen(enc, s, end);
3714 }
while ((s - t) < skip && s < end);
3718# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3721 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3724 if (s + 1 >= end)
break;
3725 skip = reg->int_map[se[1]];
3728 s += enclen(enc, s, end);
3729 }
while ((s - t) < skip && s < end);
3734 return (UChar* )NULL;
3739bm_search_ic(
regex_t* reg,
const UChar* target,
const UChar* target_end,
3740 const UChar* text,
const UChar* text_end,
const UChar* text_range)
3742 const UChar *s, *p, *end;
3746 int case_fold_flag = reg->case_fold_flag;
3748# ifdef ONIG_DEBUG_SEARCH
3749 fprintf(stderr,
"bm_search_ic: text: %"PRIuPTR
" (%p), text_end: %"PRIuPTR
" (%p), text_range: %"PRIuPTR
" (%p)\n",
3750 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3753 tail = target_end - 1;
3754 tlen1 = tail - target;
3755 end = text_range + tlen1;
3760 if (IS_NULL(reg->int_map)) {
3763 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3766 if (s + 1 >= end)
break;
3767 s += reg->map[s[1]];
3771# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3774 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3777 if (s + 1 >= end)
break;
3778 s += reg->int_map[s[1]];
3782 return (UChar* )NULL;
3786#ifdef USE_INT_MAP_BACKWARD
3788set_bm_backward_skip(UChar* s, UChar* end,
OnigEncoding enc ARG_UNUSED,
3793 if (IS_NULL(*skip)) {
3794 *skip = (
int* )
xmalloc(
sizeof(
int) * ONIG_CHAR_TABLE_SIZE);
3795 if (IS_NULL(*skip))
return ONIGERR_MEMORY;
3798 len = (int )(end - s);
3799 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
3802 for (i = len - 1; i > 0; i--)
3809bm_search_backward(
regex_t* reg,
const UChar* target,
const UChar* target_end,
3810 const UChar* text,
const UChar* adjust_text,
3811 const UChar* text_end,
const UChar* text_start)
3813 const UChar *s, *t, *p;
3815 s = text_end - (target_end - target);
3819 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
3824 while (t < target_end && *p == *t) {
3827 if (t == target_end)
3830 s -= reg->int_map_backward[*s];
3831 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
3834 return (UChar* )NULL;
3840 const UChar* text,
const UChar* text_range,
const UChar* text_end)
3842 const UChar *s = text;
3844 while (s < text_range) {
3845 if (map[*s])
return (UChar* )s;
3847 s += enclen(enc, s, text_end);
3849 return (UChar* )NULL;
3854 const UChar* text,
const UChar* adjust_text,
3855 const UChar* text_start,
const UChar* text_end)
3857 const UChar *s = text_start;
3860 if (map[*s])
return (UChar* )s;
3862 s = onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
3864 return (UChar* )NULL;
3868onig_match(
regex_t* reg,
const UChar* str,
const UChar* end,
const UChar* at,
OnigRegion* region,
3869 OnigOptionType option)
3875 MATCH_ARG_INIT(msa, option, region, at, at);
3876#ifdef USE_COMBINATION_EXPLOSION_CHECK
3878 ptrdiff_t offset = at - str;
3879 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
3884 r = onig_region_resize_clear(region, reg->num_mem + 1);
3890 prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at, end);
3891 r = match_at(reg, str, end,
3892#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3898 MATCH_ARG_FREE(msa);
3903forward_search_range(
regex_t* reg,
const UChar* str,
const UChar* end, UChar* s,
3904 UChar* range, UChar** low, UChar** high, UChar** low_prev)
3906 UChar *p, *pprev = (UChar* )NULL;
3908#ifdef ONIG_DEBUG_SEARCH
3909 fprintf(stderr,
"forward_search_range: str: %"PRIuPTR
" (%p), end: %"PRIuPTR
" (%p), s: %"PRIuPTR
" (%p), range: %"PRIuPTR
" (%p)\n",
3910 (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )s, s, (uintptr_t )range, range);
3914 if (reg->dmin > 0) {
3915 if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
3919 UChar *q = p + reg->dmin;
3921 if (q >= end)
return 0;
3922 while (p < q) p += enclen(reg->enc, p, end);
3927 switch (reg->optimize) {
3928 case ONIG_OPTIMIZE_EXACT:
3929 p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
3931 case ONIG_OPTIMIZE_EXACT_IC:
3932 p = slow_search_ic(reg->enc, reg->case_fold_flag,
3933 reg->exact, reg->exact_end, p, end, range);
3936 case ONIG_OPTIMIZE_EXACT_BM:
3937 p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
3940 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
3941 p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
3944 case ONIG_OPTIMIZE_EXACT_BM_IC:
3945 p = bm_search_ic(reg, reg->exact, reg->exact_end, p, end, range);
3948 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
3949 p = bm_search_notrev_ic(reg, reg->exact, reg->exact_end, p, end, range);
3952 case ONIG_OPTIMIZE_MAP:
3953 p = map_search(reg->enc, reg->map, p, range, end);
3957 if (p && p < range) {
3958 if (p - reg->dmin < s) {
3961 p += enclen(reg->enc, p, end);
3965 if (reg->sub_anchor) {
3968 switch (reg->sub_anchor) {
3969 case ANCHOR_BEGIN_LINE:
3970 if (!ON_STR_BEGIN(p)) {
3971 prev = onigenc_get_prev_char_head(reg->enc,
3972 (pprev ? pprev : str), p, end);
3973 if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0))
3978 case ANCHOR_END_LINE:
3979 if (ON_STR_END(p)) {
3980#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3981 prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
3982 (pprev ? pprev : str), p);
3983 if (prev && ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1))
3987 else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1))
3993 if (reg->dmax == 0) {
3997 *low_prev = onigenc_get_prev_char_head(reg->enc, s, p, end);
3999 *low_prev = onigenc_get_prev_char_head(reg->enc,
4000 (pprev ? pprev : str), p, end);
4004 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
4005 if (p < str + reg->dmax) {
4006 *low = (UChar* )str;
4008 *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low, end);
4011 *low = p - reg->dmax;
4013 *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
4014 *low, end, (
const UChar** )low_prev);
4015 if (low_prev && IS_NULL(*low_prev))
4016 *low_prev = onigenc_get_prev_char_head(reg->enc,
4017 (pprev ? pprev : s), *low, end);
4021 *low_prev = onigenc_get_prev_char_head(reg->enc,
4022 (pprev ? pprev : str), *low, end);
4028 *high = p - reg->dmin;
4030#ifdef ONIG_DEBUG_SEARCH
4032 "forward_search_range success: low: %"PRIdPTR
", high: %"PRIdPTR
", dmin: %"PRIdPTR
", dmax: %"PRIdPTR
"\n",
4033 *low - str, *high - str, reg->dmin, reg->dmax);
4041#define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
4044backward_search_range(
regex_t* reg,
const UChar* str,
const UChar* end,
4045 UChar* s,
const UChar* range, UChar* adjrange,
4046 UChar** low, UChar** high)
4054 switch (reg->optimize) {
4055 case ONIG_OPTIMIZE_EXACT:
4057 p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
4058 range, adjrange, end, p);
4061 case ONIG_OPTIMIZE_EXACT_IC:
4062 case ONIG_OPTIMIZE_EXACT_BM_IC:
4063 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
4064 p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
4065 reg->exact, reg->exact_end,
4066 range, adjrange, end, p);
4069 case ONIG_OPTIMIZE_EXACT_BM:
4070 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
4071#ifdef USE_INT_MAP_BACKWARD
4072 if (IS_NULL(reg->int_map_backward)) {
4074 if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
4077 r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
4078 &(reg->int_map_backward));
4081 p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
4088 case ONIG_OPTIMIZE_MAP:
4089 p = map_search_backward(reg->enc, reg->map, range, adjrange, p, end);
4094 if (reg->sub_anchor) {
4097 switch (reg->sub_anchor) {
4098 case ANCHOR_BEGIN_LINE:
4099 if (!ON_STR_BEGIN(p)) {
4100 prev = onigenc_get_prev_char_head(reg->enc, str, p, end);
4101 if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)) {
4108 case ANCHOR_END_LINE:
4109 if (ON_STR_END(p)) {
4110#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
4111 prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
4112 if (IS_NULL(prev))
goto fail;
4113 if (ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1)) {
4119 else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1)) {
4120 p = onigenc_get_prev_char_head(reg->enc, adjrange, p, end);
4121 if (IS_NULL(p))
goto fail;
4129 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
4130 *low = p - reg->dmax;
4131 *high = p - reg->dmin;
4132 *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high, end);
4135#ifdef ONIG_DEBUG_SEARCH
4136 fprintf(stderr,
"backward_search_range: low: %d, high: %d\n",
4137 (
int )(*low - str), (
int )(*high - str));
4143#ifdef ONIG_DEBUG_SEARCH
4144 fprintf(stderr,
"backward_search_range: fail.\n");
4151onig_search(
regex_t* reg,
const UChar* str,
const UChar* end,
4152 const UChar* start,
const UChar* range,
OnigRegion* region, OnigOptionType option)
4154 return onig_search_gpos(reg, str, end, start, start, range, region, option);
4158onig_search_gpos(
regex_t* reg,
const UChar* str,
const UChar* end,
4159 const UChar* global_pos,
4160 const UChar* start,
const UChar* range,
OnigRegion* region, OnigOptionType option)
4165#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
4166 const UChar *orig_start = start;
4167 const UChar *orig_range = range;
4170#ifdef ONIG_DEBUG_SEARCH
4172 "onig_search (entry point): str: %"PRIuPTR
" (%p), end: %"PRIuPTR
", start: %"PRIuPTR
", range: %"PRIuPTR
"\n",
4173 (uintptr_t )str, str, end - str, start - str, range - str);
4177 r = onig_region_resize_clear(region, reg->num_mem + 1);
4178 if (r)
goto finish_no_msa;
4181 if (start > end || start < str)
goto mismatch_no_msa;
4184#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
4185# ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
4186# define MATCH_AND_RETURN_CHECK(upper_range) \
4187 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
4188 if (r != ONIG_MISMATCH) {\
4190 if (! IS_FIND_LONGEST(reg->options)) {\
4197# define MATCH_AND_RETURN_CHECK(upper_range) \
4198 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
4199 if (r != ONIG_MISMATCH) {\
4207# ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
4208# define MATCH_AND_RETURN_CHECK(none) \
4209 r = match_at(reg, str, end, s, prev, &msa);\
4210 if (r != ONIG_MISMATCH) {\
4212 if (! IS_FIND_LONGEST(reg->options)) {\
4219# define MATCH_AND_RETURN_CHECK(none) \
4220 r = match_at(reg, str, end, s, prev, &msa);\
4221 if (r != ONIG_MISMATCH) {\
4232 if (reg->anchor != 0 && str < end) {
4233 UChar *min_semi_end, *max_semi_end;
4235 if (reg->anchor & ANCHOR_BEGIN_POSITION) {
4240 if (global_pos > start)
4242 if (global_pos < range)
4243 range = global_pos + 1;
4251 else if (reg->anchor & ANCHOR_BEGIN_BUF) {
4253 if (range > start) {
4254 if (start != str)
goto mismatch_no_msa;
4263 goto mismatch_no_msa;
4266 else if (reg->anchor & ANCHOR_END_BUF) {
4267 min_semi_end = max_semi_end = (UChar* )end;
4270 if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin)
4271 goto mismatch_no_msa;
4273 if (range > start) {
4274 if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) {
4275 start = min_semi_end - reg->anchor_dmax;
4277 start = onigenc_get_right_adjust_char_head(reg->enc, str, start, end);
4279 if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
4280 range = max_semi_end - reg->anchor_dmin + 1;
4283 if (start > range)
goto mismatch_no_msa;
4288 if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) {
4289 range = min_semi_end - reg->anchor_dmax;
4291 if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) {
4292 start = max_semi_end - reg->anchor_dmin;
4293 start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start, end);
4295 if (range > start)
goto mismatch_no_msa;
4298 else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
4299 UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, end, 1);
4301 max_semi_end = (UChar* )end;
4302 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
4303 min_semi_end = pre_end;
4305#ifdef USE_CRNL_AS_LINE_TERMINATOR
4306 pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, end, 1);
4307 if (IS_NOT_NULL(pre_end) &&
4308 IS_NEWLINE_CRLF(reg->options) &&
4309 ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
4310 min_semi_end = pre_end;
4313 if (min_semi_end > str && start <= min_semi_end) {
4318 min_semi_end = (UChar* )end;
4322 else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
4323 goto begin_position;
4326 else if (str == end) {
4327 static const UChar address_for_empty_string[] =
"";
4329#ifdef ONIG_DEBUG_SEARCH
4330 fprintf(stderr,
"onig_search: empty string.\n");
4333 if (reg->threshold_len == 0) {
4334 start = end = str = address_for_empty_string;
4336 prev = (UChar* )NULL;
4338 MATCH_ARG_INIT(msa, option, region, start, start);
4339#ifdef USE_COMBINATION_EXPLOSION_CHECK
4340 msa.state_check_buff = (
void* )0;
4341 msa.state_check_buff_size = 0;
4343 MATCH_AND_RETURN_CHECK(end);
4346 goto mismatch_no_msa;
4349#ifdef ONIG_DEBUG_SEARCH
4350 fprintf(stderr,
"onig_search(apply anchor): end: %d, start: %d, range: %d\n",
4351 (
int )(end - str), (
int )(start - str), (
int )(range - str));
4354 MATCH_ARG_INIT(msa, option, region, start, global_pos);
4355#ifdef USE_COMBINATION_EXPLOSION_CHECK
4357 ptrdiff_t offset = (MIN(start, range) - str);
4358 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
4363 if (range > start) {
4365 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
4367 prev = (UChar* )NULL;
4369 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
4370 UChar *sch_range, *low, *high, *low_prev;
4372 sch_range = (UChar* )range;
4373 if (reg->dmax != 0) {
4374 if (reg->dmax == ONIG_INFINITE_DISTANCE)
4375 sch_range = (UChar* )end;
4377 sch_range += reg->dmax;
4378 if (sch_range > end) sch_range = (UChar* )end;
4382 if ((end - start) < reg->threshold_len)
4385 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
4387 if (! forward_search_range(reg, str, end, s, sch_range,
4388 &low, &high, &low_prev))
goto mismatch;
4394 MATCH_AND_RETURN_CHECK(orig_range);
4396 s += enclen(reg->enc, s, end);
4398 }
while (s < range);
4402 if (! forward_search_range(reg, str, end, s, sch_range,
4403 &low, &high, (UChar** )NULL))
goto mismatch;
4405 if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
4407 MATCH_AND_RETURN_CHECK(orig_range);
4409 s += enclen(reg->enc, s, end);
4411 if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) {
4412 while (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)
4415 s += enclen(reg->enc, s, end);
4418 }
while (s < range);
4425 MATCH_AND_RETURN_CHECK(orig_range);
4427 s += enclen(reg->enc, s, end);
4428 }
while (s < range);
4431 MATCH_AND_RETURN_CHECK(orig_range);
4435 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
4436 UChar *low, *high, *adjrange, *sch_start;
4439 adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range, end);
4441 adjrange = (UChar* )end;
4443 if (reg->dmax != ONIG_INFINITE_DISTANCE &&
4444 (end - range) >= reg->threshold_len) {
4446 sch_start = s + reg->dmax;
4447 if (sch_start > end) sch_start = (UChar* )end;
4448 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
4456 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
4457 MATCH_AND_RETURN_CHECK(orig_start);
4460 }
while (s >= range);
4464 if ((end - range) < reg->threshold_len)
goto mismatch;
4467 if (reg->dmax != 0) {
4468 if (reg->dmax == ONIG_INFINITE_DISTANCE)
4469 sch_start = (UChar* )end;
4471 sch_start += reg->dmax;
4472 if (sch_start > end) sch_start = (UChar* )end;
4474 sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
4475 start, sch_start, end);
4478 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
4479 &low, &high) <= 0)
goto mismatch;
4484 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
4485 MATCH_AND_RETURN_CHECK(orig_start);
4487 }
while (s >= range);
4491#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
4492 if (IS_FIND_LONGEST(reg->options)) {
4493 if (msa.best_len >= 0) {
4502 MATCH_ARG_FREE(msa);
4506 if (IS_FIND_NOT_EMPTY(reg->options) && region) {
4507 onig_region_clear(region);
4511 if (r != ONIG_MISMATCH)
4512 fprintf(stderr,
"onig_search: error %"PRIdPTRDIFF
"\n", r);
4520 if (r != ONIG_MISMATCH)
4521 fprintf(stderr,
"onig_search: error %"PRIdPTRDIFF
"\n", r);
4526 MATCH_ARG_FREE(msa);
4531onig_scan(
regex_t* reg,
const UChar* str,
const UChar* end,
4533 int (*scan_callback)(OnigPosition, OnigPosition,
OnigRegion*,
void*),
4544 r = onig_search(reg, str, end, start, end, region, option);
4546 rs = scan_callback(n, r, region, callback_arg);
4551 if (region->end[0] == start - str) {
4552 if (start >= end)
break;
4553 start += enclen(reg->enc, start, end);
4556 start = str + region->end[0];
4561 else if (r == ONIG_MISMATCH) {
4573onig_get_encoding(
const regex_t* reg)
4578extern OnigOptionType
4579onig_get_options(
const regex_t* reg)
4581 return reg->options;
4584extern OnigCaseFoldType
4585onig_get_case_fold_flag(
const regex_t* reg)
4587 return reg->case_fold_flag;
4591onig_get_syntax(
const regex_t* reg)
4597onig_number_of_captures(
const regex_t* reg)
4599 return reg->num_mem;
4603onig_number_of_capture_histories(
const regex_t* reg)
4605#ifdef USE_CAPTURE_HISTORY
4609 for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
4610 if (BIT_STATUS_AT(reg->capture_history, i) != 0)
#define RB_GNUC_EXTENSION
This is expanded to nothing for non-GCC compilers.
#define xfree
Old name of ruby_xfree.
#define xrealloc
Old name of ruby_xrealloc.
#define xmalloc
Old name of ruby_xmalloc.
static bool rb_enc_asciicompat(rb_encoding *enc)
Queries if the passed encoding is in some sense compatible with ASCII.