33OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
42onigenc_get_default_encoding(
void)
44 return OnigEncDefaultCharEncoding;
50 OnigEncDefaultCharEncoding = enc;
55onigenc_mbclen_approximate(
const OnigUChar* p,
const OnigUChar* e,
OnigEncoding enc)
57 int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc, p, e);
58 if (ONIGENC_MBCLEN_CHARFOUND_P(ret))
59 return ONIGENC_MBCLEN_CHARFOUND_LEN(ret);
60 else if (ONIGENC_MBCLEN_NEEDMORE_P(ret))
61 return (
int )(e - p) + ONIGENC_MBCLEN_NEEDMORE_LEN(ret);
66onigenc_get_right_adjust_char_head(
OnigEncoding enc,
const UChar* start,
const UChar* s,
const UChar* end)
68 UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);
70 p += enclen(enc, p, end);
76onigenc_get_right_adjust_char_head_with_prev(
OnigEncoding enc,
77 const UChar* start,
const UChar* s,
const UChar* end,
const UChar** prev)
79 UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);
82 if (prev) *prev = (
const UChar* )p;
83 p += enclen(enc, p, end);
86 if (prev) *prev = (
const UChar* )NULL;
92onigenc_get_prev_char_head(
OnigEncoding enc,
const UChar* start,
const UChar* s,
const UChar* end)
97 return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end);
101onigenc_step_back(
OnigEncoding enc,
const UChar* start,
const UChar* s,
const UChar* end,
int n)
103 while (ONIG_IS_NOT_NULL(s) && n-- > 0) {
105 return (UChar* )NULL;
107 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end);
113onigenc_step(
OnigEncoding enc,
const UChar* p,
const UChar* end,
int n)
115 UChar* q = (UChar* )p;
117 q += ONIGENC_MBC_ENC_LEN(enc, q, end);
119 return (q <= end ? q : NULL);
123onigenc_strlen(
OnigEncoding enc,
const UChar* p,
const UChar* end)
126 UChar* q = (UChar* )p;
129 q += ONIGENC_MBC_ENC_LEN(enc, q, end);
139 UChar* p = (UChar* )s;
145 int len = ONIGENC_MBC_MINLEN(enc);
147 if (len == 1)
return n;
150 if (*q !=
'\0')
break;
154 if (len == 1)
return n;
156 e = p + ONIGENC_MBC_MAXLEN(enc);
157 p += ONIGENC_MBC_ENC_LEN(enc, p, e);
163onigenc_str_bytelen_null(
OnigEncoding enc,
const UChar* s)
165 UChar* start = (UChar* )s;
166 UChar* p = (UChar* )s;
172 int len = ONIGENC_MBC_MINLEN(enc);
174 if (len == 1)
return (
int )(p - start);
177 if (*q !=
'\0')
break;
181 if (len == 1)
return (
int )(p - start);
183 e = p + ONIGENC_MBC_MAXLEN(enc);
184 p += ONIGENC_MBC_ENC_LEN(enc, p, e);
188const UChar OnigEncAsciiToLowerCaseTable[] = {
189 '\000',
'\001',
'\002',
'\003',
'\004',
'\005',
'\006',
'\007',
190 '\010',
'\011',
'\012',
'\013',
'\014',
'\015',
'\016',
'\017',
191 '\020',
'\021',
'\022',
'\023',
'\024',
'\025',
'\026',
'\027',
192 '\030',
'\031',
'\032',
'\033',
'\034',
'\035',
'\036',
'\037',
193 '\040',
'\041',
'\042',
'\043',
'\044',
'\045',
'\046',
'\047',
194 '\050',
'\051',
'\052',
'\053',
'\054',
'\055',
'\056',
'\057',
195 '\060',
'\061',
'\062',
'\063',
'\064',
'\065',
'\066',
'\067',
196 '\070',
'\071',
'\072',
'\073',
'\074',
'\075',
'\076',
'\077',
197 '\100',
'\141',
'\142',
'\143',
'\144',
'\145',
'\146',
'\147',
198 '\150',
'\151',
'\152',
'\153',
'\154',
'\155',
'\156',
'\157',
199 '\160',
'\161',
'\162',
'\163',
'\164',
'\165',
'\166',
'\167',
200 '\170',
'\171',
'\172',
'\133',
'\134',
'\135',
'\136',
'\137',
201 '\140',
'\141',
'\142',
'\143',
'\144',
'\145',
'\146',
'\147',
202 '\150',
'\151',
'\152',
'\153',
'\154',
'\155',
'\156',
'\157',
203 '\160',
'\161',
'\162',
'\163',
'\164',
'\165',
'\166',
'\167',
204 '\170',
'\171',
'\172',
'\173',
'\174',
'\175',
'\176',
'\177',
205 '\200',
'\201',
'\202',
'\203',
'\204',
'\205',
'\206',
'\207',
206 '\210',
'\211',
'\212',
'\213',
'\214',
'\215',
'\216',
'\217',
207 '\220',
'\221',
'\222',
'\223',
'\224',
'\225',
'\226',
'\227',
208 '\230',
'\231',
'\232',
'\233',
'\234',
'\235',
'\236',
'\237',
209 '\240',
'\241',
'\242',
'\243',
'\244',
'\245',
'\246',
'\247',
210 '\250',
'\251',
'\252',
'\253',
'\254',
'\255',
'\256',
'\257',
211 '\260',
'\261',
'\262',
'\263',
'\264',
'\265',
'\266',
'\267',
212 '\270',
'\271',
'\272',
'\273',
'\274',
'\275',
'\276',
'\277',
213 '\300',
'\301',
'\302',
'\303',
'\304',
'\305',
'\306',
'\307',
214 '\310',
'\311',
'\312',
'\313',
'\314',
'\315',
'\316',
'\317',
215 '\320',
'\321',
'\322',
'\323',
'\324',
'\325',
'\326',
'\327',
216 '\330',
'\331',
'\332',
'\333',
'\334',
'\335',
'\336',
'\337',
217 '\340',
'\341',
'\342',
'\343',
'\344',
'\345',
'\346',
'\347',
218 '\350',
'\351',
'\352',
'\353',
'\354',
'\355',
'\356',
'\357',
219 '\360',
'\361',
'\362',
'\363',
'\364',
'\365',
'\366',
'\367',
220 '\370',
'\371',
'\372',
'\373',
'\374',
'\375',
'\376',
'\377',
223#ifdef USE_UPPER_CASE_TABLE
224const UChar OnigEncAsciiToUpperCaseTable[256] = {
225 '\000',
'\001',
'\002',
'\003',
'\004',
'\005',
'\006',
'\007',
226 '\010',
'\011',
'\012',
'\013',
'\014',
'\015',
'\016',
'\017',
227 '\020',
'\021',
'\022',
'\023',
'\024',
'\025',
'\026',
'\027',
228 '\030',
'\031',
'\032',
'\033',
'\034',
'\035',
'\036',
'\037',
229 '\040',
'\041',
'\042',
'\043',
'\044',
'\045',
'\046',
'\047',
230 '\050',
'\051',
'\052',
'\053',
'\054',
'\055',
'\056',
'\057',
231 '\060',
'\061',
'\062',
'\063',
'\064',
'\065',
'\066',
'\067',
232 '\070',
'\071',
'\072',
'\073',
'\074',
'\075',
'\076',
'\077',
233 '\100',
'\101',
'\102',
'\103',
'\104',
'\105',
'\106',
'\107',
234 '\110',
'\111',
'\112',
'\113',
'\114',
'\115',
'\116',
'\117',
235 '\120',
'\121',
'\122',
'\123',
'\124',
'\125',
'\126',
'\127',
236 '\130',
'\131',
'\132',
'\133',
'\134',
'\135',
'\136',
'\137',
237 '\140',
'\101',
'\102',
'\103',
'\104',
'\105',
'\106',
'\107',
238 '\110',
'\111',
'\112',
'\113',
'\114',
'\115',
'\116',
'\117',
239 '\120',
'\121',
'\122',
'\123',
'\124',
'\125',
'\126',
'\127',
240 '\130',
'\131',
'\132',
'\173',
'\174',
'\175',
'\176',
'\177',
241 '\200',
'\201',
'\202',
'\203',
'\204',
'\205',
'\206',
'\207',
242 '\210',
'\211',
'\212',
'\213',
'\214',
'\215',
'\216',
'\217',
243 '\220',
'\221',
'\222',
'\223',
'\224',
'\225',
'\226',
'\227',
244 '\230',
'\231',
'\232',
'\233',
'\234',
'\235',
'\236',
'\237',
245 '\240',
'\241',
'\242',
'\243',
'\244',
'\245',
'\246',
'\247',
246 '\250',
'\251',
'\252',
'\253',
'\254',
'\255',
'\256',
'\257',
247 '\260',
'\261',
'\262',
'\263',
'\264',
'\265',
'\266',
'\267',
248 '\270',
'\271',
'\272',
'\273',
'\274',
'\275',
'\276',
'\277',
249 '\300',
'\301',
'\302',
'\303',
'\304',
'\305',
'\306',
'\307',
250 '\310',
'\311',
'\312',
'\313',
'\314',
'\315',
'\316',
'\317',
251 '\320',
'\321',
'\322',
'\323',
'\324',
'\325',
'\326',
'\327',
252 '\330',
'\331',
'\332',
'\333',
'\334',
'\335',
'\336',
'\337',
253 '\340',
'\341',
'\342',
'\343',
'\344',
'\345',
'\346',
'\347',
254 '\350',
'\351',
'\352',
'\353',
'\354',
'\355',
'\356',
'\357',
255 '\360',
'\361',
'\362',
'\363',
'\364',
'\365',
'\366',
'\367',
256 '\370',
'\371',
'\372',
'\373',
'\374',
'\375',
'\376',
'\377',
260const unsigned short OnigEncAsciiCtypeTable[256] = {
261 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
262 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
263 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
264 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
265 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
266 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
267 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
268 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
269 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
270 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
271 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
272 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
273 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
274 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
275 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
276 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
277 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
278 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
279 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
280 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
281 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
282 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
283 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
284 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
285 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
286 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
287 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
288 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
289 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
290 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
291 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
292 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
295const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
296 '\000',
'\001',
'\002',
'\003',
'\004',
'\005',
'\006',
'\007',
297 '\010',
'\011',
'\012',
'\013',
'\014',
'\015',
'\016',
'\017',
298 '\020',
'\021',
'\022',
'\023',
'\024',
'\025',
'\026',
'\027',
299 '\030',
'\031',
'\032',
'\033',
'\034',
'\035',
'\036',
'\037',
300 '\040',
'\041',
'\042',
'\043',
'\044',
'\045',
'\046',
'\047',
301 '\050',
'\051',
'\052',
'\053',
'\054',
'\055',
'\056',
'\057',
302 '\060',
'\061',
'\062',
'\063',
'\064',
'\065',
'\066',
'\067',
303 '\070',
'\071',
'\072',
'\073',
'\074',
'\075',
'\076',
'\077',
304 '\100',
'\141',
'\142',
'\143',
'\144',
'\145',
'\146',
'\147',
305 '\150',
'\151',
'\152',
'\153',
'\154',
'\155',
'\156',
'\157',
306 '\160',
'\161',
'\162',
'\163',
'\164',
'\165',
'\166',
'\167',
307 '\170',
'\171',
'\172',
'\133',
'\134',
'\135',
'\136',
'\137',
308 '\140',
'\141',
'\142',
'\143',
'\144',
'\145',
'\146',
'\147',
309 '\150',
'\151',
'\152',
'\153',
'\154',
'\155',
'\156',
'\157',
310 '\160',
'\161',
'\162',
'\163',
'\164',
'\165',
'\166',
'\167',
311 '\170',
'\171',
'\172',
'\173',
'\174',
'\175',
'\176',
'\177',
312 '\200',
'\201',
'\202',
'\203',
'\204',
'\205',
'\206',
'\207',
313 '\210',
'\211',
'\212',
'\213',
'\214',
'\215',
'\216',
'\217',
314 '\220',
'\221',
'\222',
'\223',
'\224',
'\225',
'\226',
'\227',
315 '\230',
'\231',
'\232',
'\233',
'\234',
'\235',
'\236',
'\237',
316 '\240',
'\241',
'\242',
'\243',
'\244',
'\245',
'\246',
'\247',
317 '\250',
'\251',
'\252',
'\253',
'\254',
'\255',
'\256',
'\257',
318 '\260',
'\261',
'\262',
'\263',
'\264',
'\265',
'\266',
'\267',
319 '\270',
'\271',
'\272',
'\273',
'\274',
'\275',
'\276',
'\277',
320 '\340',
'\341',
'\342',
'\343',
'\344',
'\345',
'\346',
'\347',
321 '\350',
'\351',
'\352',
'\353',
'\354',
'\355',
'\356',
'\357',
322 '\360',
'\361',
'\362',
'\363',
'\364',
'\365',
'\366',
'\327',
323 '\370',
'\371',
'\372',
'\373',
'\374',
'\375',
'\376',
'\337',
324 '\340',
'\341',
'\342',
'\343',
'\344',
'\345',
'\346',
'\347',
325 '\350',
'\351',
'\352',
'\353',
'\354',
'\355',
'\356',
'\357',
326 '\360',
'\361',
'\362',
'\363',
'\364',
'\365',
'\366',
'\367',
327 '\370',
'\371',
'\372',
'\373',
'\374',
'\375',
'\376',
'\377'
330#ifdef USE_UPPER_CASE_TABLE
331const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
332 '\000',
'\001',
'\002',
'\003',
'\004',
'\005',
'\006',
'\007',
333 '\010',
'\011',
'\012',
'\013',
'\014',
'\015',
'\016',
'\017',
334 '\020',
'\021',
'\022',
'\023',
'\024',
'\025',
'\026',
'\027',
335 '\030',
'\031',
'\032',
'\033',
'\034',
'\035',
'\036',
'\037',
336 '\040',
'\041',
'\042',
'\043',
'\044',
'\045',
'\046',
'\047',
337 '\050',
'\051',
'\052',
'\053',
'\054',
'\055',
'\056',
'\057',
338 '\060',
'\061',
'\062',
'\063',
'\064',
'\065',
'\066',
'\067',
339 '\070',
'\071',
'\072',
'\073',
'\074',
'\075',
'\076',
'\077',
340 '\100',
'\101',
'\102',
'\103',
'\104',
'\105',
'\106',
'\107',
341 '\110',
'\111',
'\112',
'\113',
'\114',
'\115',
'\116',
'\117',
342 '\120',
'\121',
'\122',
'\123',
'\124',
'\125',
'\126',
'\127',
343 '\130',
'\131',
'\132',
'\133',
'\134',
'\135',
'\136',
'\137',
344 '\140',
'\101',
'\102',
'\103',
'\104',
'\105',
'\106',
'\107',
345 '\110',
'\111',
'\112',
'\113',
'\114',
'\115',
'\116',
'\117',
346 '\120',
'\121',
'\122',
'\123',
'\124',
'\125',
'\126',
'\127',
347 '\130',
'\131',
'\132',
'\173',
'\174',
'\175',
'\176',
'\177',
348 '\200',
'\201',
'\202',
'\203',
'\204',
'\205',
'\206',
'\207',
349 '\210',
'\211',
'\212',
'\213',
'\214',
'\215',
'\216',
'\217',
350 '\220',
'\221',
'\222',
'\223',
'\224',
'\225',
'\226',
'\227',
351 '\230',
'\231',
'\232',
'\233',
'\234',
'\235',
'\236',
'\237',
352 '\240',
'\241',
'\242',
'\243',
'\244',
'\245',
'\246',
'\247',
353 '\250',
'\251',
'\252',
'\253',
'\254',
'\255',
'\256',
'\257',
354 '\260',
'\261',
'\262',
'\263',
'\264',
'\265',
'\266',
'\267',
355 '\270',
'\271',
'\272',
'\273',
'\274',
'\275',
'\276',
'\277',
356 '\300',
'\301',
'\302',
'\303',
'\304',
'\305',
'\306',
'\307',
357 '\310',
'\311',
'\312',
'\313',
'\314',
'\315',
'\316',
'\317',
358 '\320',
'\321',
'\322',
'\323',
'\324',
'\325',
'\326',
'\327',
359 '\330',
'\331',
'\332',
'\333',
'\334',
'\335',
'\336',
'\337',
360 '\300',
'\301',
'\302',
'\303',
'\304',
'\305',
'\306',
'\307',
361 '\310',
'\311',
'\312',
'\313',
'\314',
'\315',
'\316',
'\317',
362 '\320',
'\321',
'\322',
'\323',
'\324',
'\325',
'\326',
'\367',
363 '\330',
'\331',
'\332',
'\333',
'\334',
'\335',
'\336',
'\377',
369onigenc_set_default_caseconv_table(
const UChar* table ARG_UNUSED)
377onigenc_get_left_adjust_char_head(
OnigEncoding enc,
const UChar* start,
const UChar* s,
const UChar* end)
379 return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);
412onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
413 OnigApplyAllCaseFoldFunc f,
void* arg,
419 for (i = 0; i < numberof(OnigAsciiLowerMap); i++) {
420 code = OnigAsciiLowerMap[i].to;
421 r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg);
422 if (r != 0)
return r;
424 code = OnigAsciiLowerMap[i].from;
425 r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg);
426 if (r != 0)
return r;
433onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,
434 const OnigUChar* p,
const OnigUChar* end ARG_UNUSED,
437 if (0x41 <= *p && *p <= 0x5a) {
438 items[0].byte_len = 1;
439 items[0].code_len = 1;
440 items[0].code[0] = (OnigCodePoint )(*p + 0x20);
443 else if (0x61 <= *p && *p <= 0x7a) {
444 items[0].byte_len = 1;
445 items[0].code_len = 1;
446 items[0].code[0] = (OnigCodePoint )(*p - 0x20);
454ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
455 OnigApplyAllCaseFoldFunc f,
void* arg)
457 OnigCodePoint ss[] = { 0x73, 0x73 };
459 return (*f)((OnigCodePoint )0xdf, ss, 2, arg);
463onigenc_apply_all_case_fold_with_map(
int map_size,
465 int ess_tsett_flag, OnigCaseFoldType flag,
466 OnigApplyAllCaseFoldFunc f,
void* arg)
471 r = onigenc_ascii_apply_all_case_fold(flag, f, arg, 0);
472 if (r != 0)
return r;
474 for (i = 0; i < map_size; i++) {
476 r = (*f)(map[i].from, &code, 1, arg);
477 if (r != 0)
return r;
480 r = (*f)(map[i].to, &code, 1, arg);
481 if (r != 0)
return r;
484 if (ess_tsett_flag != 0)
485 return ss_apply_all_case_fold(flag, f, arg);
491onigenc_get_case_fold_codes_by_str_with_map(
int map_size,
493 int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED,
496 if (0x41 <= *p && *p <= 0x5a) {
497 items[0].byte_len = 1;
498 items[0].code_len = 1;
499 items[0].code[0] = (OnigCodePoint )(*p + 0x20);
500 if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1
501 && (*(p+1) == 0x53 || *(p+1) == 0x73)) {
503 items[1].byte_len = 2;
504 items[1].code_len = 1;
505 items[1].code[0] = (OnigCodePoint )0xdf;
511 else if (0x61 <= *p && *p <= 0x7a) {
512 items[0].byte_len = 1;
513 items[0].code_len = 1;
514 items[0].code[0] = (OnigCodePoint )(*p - 0x20);
515 if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1
516 && (*(p+1) == 0x73 || *(p+1) == 0x53)) {
518 items[1].byte_len = 2;
519 items[1].code_len = 1;
520 items[1].code[0] = (OnigCodePoint )0xdf;
526 else if (*p == 0xdf && ess_tsett_flag != 0) {
527 items[0].byte_len = 1;
528 items[0].code_len = 2;
529 items[0].code[0] = (OnigCodePoint )
's';
530 items[0].code[1] = (OnigCodePoint )
's';
532 items[1].byte_len = 1;
533 items[1].code_len = 2;
534 items[1].code[0] = (OnigCodePoint )
'S';
535 items[1].code[1] = (OnigCodePoint )
'S';
537 items[2].byte_len = 1;
538 items[2].code_len = 2;
539 items[2].code[0] = (OnigCodePoint )
's';
540 items[2].code[1] = (OnigCodePoint )
'S';
542 items[3].byte_len = 1;
543 items[3].code_len = 2;
544 items[3].code[0] = (OnigCodePoint )
'S';
545 items[3].code[1] = (OnigCodePoint )
's';
552 for (i = 0; i < map_size; i++) {
553 if (*p == map[i].from) {
554 items[0].byte_len = 1;
555 items[0].code_len = 1;
556 items[0].code[0] = map[i].to;
559 else if (*p == map[i].to) {
560 items[0].byte_len = 1;
561 items[0].code_len = 1;
562 items[0].code[0] = map[i].from;
573onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED,
574 OnigCodePoint* sb_out ARG_UNUSED,
575 const OnigCodePoint* ranges[] ARG_UNUSED,
578 return ONIG_NO_SUPPORT_CONFIG;
582onigenc_is_mbc_newline_0x0a(
const UChar* p,
const UChar* end,
OnigEncoding enc ARG_UNUSED)
585 if (*p == 0x0a)
return 1;
592onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
const UChar** p,
593 const UChar* end, UChar* lower,
OnigEncoding enc ARG_UNUSED)
595 *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);
603onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag ARG_UNUSED,
604 const UChar** pp,
const UChar* end ARG_UNUSED)
606 const UChar* p = *pp;
609 return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
614onigenc_single_byte_mbc_enc_len(
const UChar* p ARG_UNUSED,
const UChar* e ARG_UNUSED,
621onigenc_single_byte_mbc_to_code(
const UChar* p,
const UChar* end ARG_UNUSED,
624 return (OnigCodePoint )(*p);
628onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED,
OnigEncoding enc ARG_UNUSED)
634onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf,
OnigEncoding enc ARG_UNUSED)
638 rb_raise(rb_eRangeError,
"%u out of char range", code);
640 *buf = (UChar )(code & 0xff);
645onigenc_single_byte_left_adjust_char_head(
const UChar* start ARG_UNUSED,
647 const UChar* end ARG_UNUSED,
654onigenc_always_true_is_allowed_reverse_match(
const UChar* s ARG_UNUSED,
655 const UChar* end ARG_UNUSED,
662onigenc_always_false_is_allowed_reverse_match(
const UChar* s ARG_UNUSED,
663 const UChar* end ARG_UNUSED,
670onigenc_ascii_is_code_ctype(OnigCodePoint code,
unsigned int ctype,
674 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
680onigenc_mbn_mbc_to_code(
OnigEncoding enc,
const UChar* p,
const UChar* end)
685 len = enclen(enc, p, end);
686 n = (OnigCodePoint )(*p++);
687 if (len == 1)
return n;
689 for (i = 1; i < len; i++) {
698onigenc_mbn_mbc_case_fold(
OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,
699 const UChar** pp,
const UChar* end ARG_UNUSED,
703 const UChar *p = *pp;
705 if (ONIGENC_IS_MBC_ASCII(p)) {
706 *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
713 len = enclen(enc, p, end);
714 for (i = 0; i < len; i++) {
724onigenc_mbn_is_mbc_ambiguous(
OnigEncoding enc, OnigCaseFoldType flag,
725 const UChar** pp,
const UChar* end ARG_UNUSED)
727 const UChar* p = *pp;
729 if (ONIGENC_IS_MBC_ASCII(p)) {
731 return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
734 (*pp) += enclen(enc, p);
740onigenc_mb2_code_to_mbclen(OnigCodePoint code,
OnigEncoding enc ARG_UNUSED)
742 if (code <= 0xff)
return 1;
743 if (code <= 0xffff)
return 2;
744 return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
748onigenc_mb4_code_to_mbclen(OnigCodePoint code,
OnigEncoding enc ARG_UNUSED)
750 if ((code & 0xff000000) != 0)
return 4;
751 else if ((code & 0xff0000) != 0)
return 3;
752 else if ((code & 0xff00) != 0)
return 2;
757onigenc_mb2_code_to_mbc(
OnigEncoding enc, OnigCodePoint code, UChar *buf)
761 if ((code & 0xff00) != 0) {
762 *p++ = (UChar )((code >> 8) & 0xff);
764 *p++ = (UChar )(code & 0xff);
767 if (enclen(enc, buf, p) != (p - buf))
768 return ONIGERR_INVALID_CODE_POINT_VALUE;
770 return (
int )(p - buf);
774onigenc_mb4_code_to_mbc(
OnigEncoding enc, OnigCodePoint code, UChar *buf)
778 if ((code & 0xff000000) != 0) {
779 *p++ = (UChar )((code >> 24) & 0xff);
781 if ((code & 0xff0000) != 0 || p != buf) {
782 *p++ = (UChar )((code >> 16) & 0xff);
784 if ((code & 0xff00) != 0 || p != buf) {
785 *p++ = (UChar )((code >> 8) & 0xff);
787 *p++ = (UChar )(code & 0xff);
790 if (enclen(enc, buf, p) != (p - buf))
791 return ONIGERR_INVALID_CODE_POINT_VALUE;
793 return (
int )(p - buf);
797onigenc_minimum_property_name_to_ctype(
OnigEncoding enc,
const UChar* p,
const UChar* end)
800 POSIX_BRACKET_ENTRY_INIT(
"Alnum", ONIGENC_CTYPE_ALNUM),
801 POSIX_BRACKET_ENTRY_INIT(
"Alpha", ONIGENC_CTYPE_ALPHA),
802 POSIX_BRACKET_ENTRY_INIT(
"Blank", ONIGENC_CTYPE_BLANK),
803 POSIX_BRACKET_ENTRY_INIT(
"Cntrl", ONIGENC_CTYPE_CNTRL),
804 POSIX_BRACKET_ENTRY_INIT(
"Digit", ONIGENC_CTYPE_DIGIT),
805 POSIX_BRACKET_ENTRY_INIT(
"Graph", ONIGENC_CTYPE_GRAPH),
806 POSIX_BRACKET_ENTRY_INIT(
"Lower", ONIGENC_CTYPE_LOWER),
807 POSIX_BRACKET_ENTRY_INIT(
"Print", ONIGENC_CTYPE_PRINT),
808 POSIX_BRACKET_ENTRY_INIT(
"Punct", ONIGENC_CTYPE_PUNCT),
809 POSIX_BRACKET_ENTRY_INIT(
"Space", ONIGENC_CTYPE_SPACE),
810 POSIX_BRACKET_ENTRY_INIT(
"Upper", ONIGENC_CTYPE_UPPER),
811 POSIX_BRACKET_ENTRY_INIT(
"XDigit", ONIGENC_CTYPE_XDIGIT),
812 POSIX_BRACKET_ENTRY_INIT(
"ASCII", ONIGENC_CTYPE_ASCII),
813 POSIX_BRACKET_ENTRY_INIT(
"Word", ONIGENC_CTYPE_WORD),
819 len = onigenc_strlen(enc, p, end);
820 for (pb = PBS; pb < PBS + numberof(PBS); pb++) {
821 if (len == pb->len &&
822 onigenc_with_ascii_strnicmp(enc, p, end, pb->name, pb->len) == 0)
826 return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
830onigenc_mb2_is_code_ctype(
OnigEncoding enc, OnigCodePoint code,
834 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
836 if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
837 return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
845onigenc_mb4_is_code_ctype(
OnigEncoding enc, OnigCodePoint code,
849 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
851 if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
852 return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
860onigenc_with_ascii_strncmp(
OnigEncoding enc,
const UChar* p,
const UChar* end,
861 const UChar* sascii ,
int n)
866 if (p >= end)
return (
int )(*sascii);
868 c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);
873 p += enclen(enc, p, end);
879onigenc_with_ascii_strnicmp(
OnigEncoding enc,
const UChar* p,
const UChar* end,
880 const UChar* sascii ,
int n)
885 if (p >= end)
return (
int )(*sascii);
887 c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);
888 if (ONIGENC_IS_ASCII_CODE(c))
889 c = ONIGENC_ASCII_CODE_TO_LOWER_CASE(c);
890 x = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*sascii) - c;
894 p += enclen(enc, p, end);
902resize_property_list(
int new_size,
const OnigCodePoint*** plist,
int* psize)
905 const OnigCodePoint **list = *plist;
907 size =
sizeof(OnigCodePoint*) * new_size;
909 list = (
const OnigCodePoint** )
xmalloc(size);
910 if (IS_NULL(list))
return ONIGERR_MEMORY;
913 const OnigCodePoint **tmp;
914 tmp = (
const OnigCodePoint** )
xrealloc((
void* )list, size);
915 if (IS_NULL(tmp))
return ONIGERR_MEMORY;
926onigenc_property_list_add_property(UChar* name,
const OnigCodePoint* prop,
927 hash_table_type **table,
const OnigCodePoint*** plist,
int *pnum,
930#define PROP_INIT_SIZE 16
934 if (*psize <= *pnum) {
935 int new_size = (*psize == 0 ? PROP_INIT_SIZE : *psize * 2);
936 r = resize_property_list(new_size, plist, psize);
937 if (r != 0)
return r;
940 (*plist)[*pnum] = prop;
942 if (ONIG_IS_NULL(*table)) {
943 *table = onig_st_init_strend_table_with_size(PROP_INIT_SIZE);
944 if (ONIG_IS_NULL(*table))
return ONIGERR_MEMORY;
948 onig_st_insert_strend(*table, name, name + strlen((
char* )name),
949 (hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE));
955onigenc_ascii_only_case_map(OnigCaseFoldType* flagP,
const OnigUChar** pp,
const OnigUChar* end,
959 OnigUChar *to_start = to;
960 OnigCaseFoldType flags = *flagP;
961 int codepoint_length;
963 while (*pp < end && to < to_end) {
964 codepoint_length = ONIGENC_PRECISE_MBC_ENC_LEN(enc, *pp, end);
965 if (codepoint_length < 0)
966 return codepoint_length;
967 code = ONIGENC_MBC_TO_CODE(enc, *pp, end);
968 *pp += codepoint_length;
970 if (code >=
'a' && code <=
'z' && (flags & ONIGENC_CASE_UPCASE)) {
971 flags |= ONIGENC_CASE_MODIFIED;
973 }
else if (code >=
'A' && code <=
'Z' &&
974 (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
975 flags |= ONIGENC_CASE_MODIFIED;
978 to += ONIGENC_CODE_TO_MBC(enc, code, to);
979 if (flags & ONIGENC_CASE_TITLECASE)
980 flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
983 return (
int )(to - to_start);
987onigenc_single_byte_ascii_only_case_map(OnigCaseFoldType* flagP,
const OnigUChar** pp,
988 const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
992 OnigUChar *to_start = to;
993 OnigCaseFoldType flags = *flagP;
995 while (*pp < end && to < to_end) {
998 if (code >=
'a' && code <=
'z' && (flags & ONIGENC_CASE_UPCASE)) {
999 flags |= ONIGENC_CASE_MODIFIED;
1001 }
else if (code >=
'A' && code <=
'Z' &&
1002 (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
1003 flags |= ONIGENC_CASE_MODIFIED;
1007 if (flags & ONIGENC_CASE_TITLECASE)
1008 flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
1011 return (
int )(to - to_start);
#define xrealloc
Old name of ruby_xrealloc.
#define xmalloc
Old name of ruby_xmalloc.
void rb_raise(VALUE exc, const char *fmt,...)
Exception entry point.