Ruby 3.1.3p185 (2022-11-24 revision 1a6b16756e0ba6b95ab71a441357ed5484e33498)
pack.c
1/**********************************************************************
2
3 pack.c -
4
5 $Author$
6 created at: Thu Feb 10 15:17:05 JST 1994
7
8 Copyright (C) 1993-2007 Yukihiro Matsumoto
9
10**********************************************************************/
11
12#include "ruby/internal/config.h"
13
14#include <ctype.h>
15#include <errno.h>
16#include <float.h>
17#include <sys/types.h>
18
19#include "internal.h"
20#include "internal/array.h"
21#include "internal/bits.h"
22#include "internal/string.h"
23#include "internal/symbol.h"
24#include "internal/variable.h"
25#include "ruby/util.h"
26
27#include "builtin.h"
28
29/*
30 * It is intentional that the condition for natstr is HAVE_TRUE_LONG_LONG
31 * instead of HAVE_LONG_LONG or LONG_LONG.
32 * This means q! and Q! means always the standard long long type and
33 * causes ArgumentError for platforms which has no long long type,
34 * even if the platform has an implementation specific 64bit type.
35 * This behavior is consistent with the document of pack/unpack.
36 */
37#ifdef HAVE_TRUE_LONG_LONG
38static const char natstr[] = "sSiIlLqQjJ";
39#else
40static const char natstr[] = "sSiIlLjJ";
41#endif
42static const char endstr[] = "sSiIlLqQjJ";
43
44#ifdef HAVE_TRUE_LONG_LONG
45/* It is intentional to use long long instead of LONG_LONG. */
46# define NATINT_LEN_Q NATINT_LEN(long long, 8)
47#else
48# define NATINT_LEN_Q 8
49#endif
50
51#if SIZEOF_SHORT != 2 || SIZEOF_LONG != 4 || (defined(HAVE_TRUE_LONG_LONG) && SIZEOF_LONG_LONG != 8)
52# define NATINT_PACK
53#endif
54
55#ifdef DYNAMIC_ENDIAN
56/* for universal binary of NEXTSTEP and MacOS X */
57/* useless since autoconf 2.63? */
58static int
59is_bigendian(void)
60{
61 static int init = 0;
62 static int endian_value;
63 char *p;
64
65 if (init) return endian_value;
66 init = 1;
67 p = (char*)&init;
68 return endian_value = p[0]?0:1;
69}
70# define BIGENDIAN_P() (is_bigendian())
71#elif defined(WORDS_BIGENDIAN)
72# define BIGENDIAN_P() 1
73#else
74# define BIGENDIAN_P() 0
75#endif
76
77#ifdef NATINT_PACK
78# define NATINT_LEN(type,len) (natint?(int)sizeof(type):(int)(len))
79#else
80# define NATINT_LEN(type,len) ((int)sizeof(type))
81#endif
82
83typedef union {
84 float f;
85 uint32_t u;
86 char buf[4];
88typedef union {
89 double d;
90 uint64_t u;
91 char buf[8];
93#define swapf(x) swap32(x)
94#define swapd(x) swap64(x)
95
96#define rb_ntohf(x) (BIGENDIAN_P()?(x):swapf(x))
97#define rb_ntohd(x) (BIGENDIAN_P()?(x):swapd(x))
98#define rb_htonf(x) (BIGENDIAN_P()?(x):swapf(x))
99#define rb_htond(x) (BIGENDIAN_P()?(x):swapd(x))
100#define rb_htovf(x) (BIGENDIAN_P()?swapf(x):(x))
101#define rb_htovd(x) (BIGENDIAN_P()?swapd(x):(x))
102#define rb_vtohf(x) (BIGENDIAN_P()?swapf(x):(x))
103#define rb_vtohd(x) (BIGENDIAN_P()?swapd(x):(x))
104
105#define FLOAT_CONVWITH(x) FLOAT_SWAPPER x;
106#define HTONF(x) ((x).u = rb_htonf((x).u))
107#define HTOVF(x) ((x).u = rb_htovf((x).u))
108#define NTOHF(x) ((x).u = rb_ntohf((x).u))
109#define VTOHF(x) ((x).u = rb_vtohf((x).u))
110
111#define DOUBLE_CONVWITH(x) DOUBLE_SWAPPER x;
112#define HTOND(x) ((x).u = rb_htond((x).u))
113#define HTOVD(x) ((x).u = rb_htovd((x).u))
114#define NTOHD(x) ((x).u = rb_ntohd((x).u))
115#define VTOHD(x) ((x).u = rb_vtohd((x).u))
116
117#define MAX_INTEGER_PACK_SIZE 8
118
119static const char toofew[] = "too few arguments";
120
121static void encodes(VALUE,const char*,long,int,int);
122static void qpencode(VALUE,VALUE,long);
123
124static unsigned long utf8_to_uv(const char*,long*);
125
126static ID id_associated;
127
128static void
129str_associate(VALUE str, VALUE add)
130{
131 /* assert(NIL_P(rb_attr_get(str, id_associated))); */
132 rb_ivar_set(str, id_associated, add);
133}
134
135static VALUE
136str_associated(VALUE str)
137{
138 VALUE associates = rb_ivar_lookup(str, id_associated, Qfalse);
139 if (!associates)
140 rb_raise(rb_eArgError, "no associated pointer");
141 return associates;
142}
143
144static VALUE
145associated_pointer(VALUE associates, const char *t)
146{
147 const VALUE *p = RARRAY_CONST_PTR(associates);
148 const VALUE *pend = p + RARRAY_LEN(associates);
149 for (; p < pend; p++) {
150 VALUE tmp = *p;
151 if (RB_TYPE_P(tmp, T_STRING) && RSTRING_PTR(tmp) == t) return tmp;
152 }
153 rb_raise(rb_eArgError, "non associated pointer");
155}
156
157static void
158unknown_directive(const char *mode, char type, VALUE fmt)
159{
160 char unknown[5];
161
162 if (ISPRINT(type)) {
163 unknown[0] = type;
164 unknown[1] = '\0';
165 }
166 else {
167 snprintf(unknown, sizeof(unknown), "\\x%.2x", type & 0xff);
168 }
169 fmt = rb_str_quote_unprintable(fmt);
170 rb_warning("unknown %s directive '%s' in '%"PRIsVALUE"'",
171 mode, unknown, fmt);
172}
173
174static float
175VALUE_to_float(VALUE obj)
176{
177 VALUE v = rb_to_float(obj);
178 double d = RFLOAT_VALUE(v);
179
180 if (isnan(d)) {
181 return NAN;
182 }
183 else if (d < -FLT_MAX) {
184 return -INFINITY;
185 }
186 else if (d <= FLT_MAX) {
187 return d;
188 }
189 else {
190 return INFINITY;
191 }
192}
193
194static VALUE
195pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer)
196{
197 static const char nul10[] = "\0\0\0\0\0\0\0\0\0\0";
198 static const char spc10[] = " ";
199 const char *p, *pend;
200 VALUE res, from, associates = 0;
201 char type;
202 long len, idx, plen;
203 const char *ptr;
204 int enc_info = 1; /* 0 - BINARY, 1 - US-ASCII, 2 - UTF-8 */
205#ifdef NATINT_PACK
206 int natint; /* native integer */
207#endif
208 int integer_size, bigendian_p;
209
210 StringValue(fmt);
211 p = RSTRING_PTR(fmt);
212 pend = p + RSTRING_LEN(fmt);
213
214 if (NIL_P(buffer)) {
215 res = rb_str_buf_new(0);
216 }
217 else {
218 if (!RB_TYPE_P(buffer, T_STRING))
219 rb_raise(rb_eTypeError, "buffer must be String, not %s", rb_obj_classname(buffer));
220 rb_str_modify(buffer);
221 res = buffer;
222 }
223
224 idx = 0;
225
226#define TOO_FEW (rb_raise(rb_eArgError, toofew), 0)
227#define MORE_ITEM (idx < RARRAY_LEN(ary))
228#define THISFROM (MORE_ITEM ? RARRAY_AREF(ary, idx) : TOO_FEW)
229#define NEXTFROM (MORE_ITEM ? RARRAY_AREF(ary, idx++) : TOO_FEW)
230
231 while (p < pend) {
232 int explicit_endian = 0;
233 if (RSTRING_PTR(fmt) + RSTRING_LEN(fmt) != pend) {
234 rb_raise(rb_eRuntimeError, "format string modified");
235 }
236 type = *p++; /* get data type */
237#ifdef NATINT_PACK
238 natint = 0;
239#endif
240
241 if (ISSPACE(type)) continue;
242 if (type == '#') {
243 while ((p < pend) && (*p != '\n')) {
244 p++;
245 }
246 continue;
247 }
248
249 {
250 modifiers:
251 switch (*p) {
252 case '_':
253 case '!':
254 if (strchr(natstr, type)) {
255#ifdef NATINT_PACK
256 natint = 1;
257#endif
258 p++;
259 }
260 else {
261 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
262 }
263 goto modifiers;
264
265 case '<':
266 case '>':
267 if (!strchr(endstr, type)) {
268 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
269 }
270 if (explicit_endian) {
271 rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
272 }
273 explicit_endian = *p++;
274 goto modifiers;
275 }
276 }
277
278 if (*p == '*') { /* set data length */
279 len = strchr("@Xxu", type) ? 0
280 : strchr("PMm", type) ? 1
281 : RARRAY_LEN(ary) - idx;
282 p++;
283 }
284 else if (ISDIGIT(*p)) {
285 errno = 0;
286 len = STRTOUL(p, (char**)&p, 10);
287 if (errno) {
288 rb_raise(rb_eRangeError, "pack length too big");
289 }
290 }
291 else {
292 len = 1;
293 }
294
295 switch (type) {
296 case 'U':
297 /* if encoding is US-ASCII, upgrade to UTF-8 */
298 if (enc_info == 1) enc_info = 2;
299 break;
300 case 'm': case 'M': case 'u':
301 /* keep US-ASCII (do nothing) */
302 break;
303 default:
304 /* fall back to BINARY */
305 enc_info = 0;
306 break;
307 }
308 switch (type) {
309 case 'A': case 'a': case 'Z':
310 case 'B': case 'b':
311 case 'H': case 'h':
312 from = NEXTFROM;
313 if (NIL_P(from)) {
314 ptr = "";
315 plen = 0;
316 }
317 else {
318 StringValue(from);
319 ptr = RSTRING_PTR(from);
320 plen = RSTRING_LEN(from);
321 }
322
323 if (p[-1] == '*')
324 len = plen;
325
326 switch (type) {
327 case 'a': /* arbitrary binary string (null padded) */
328 case 'A': /* arbitrary binary string (ASCII space padded) */
329 case 'Z': /* null terminated string */
330 if (plen >= len) {
331 rb_str_buf_cat(res, ptr, len);
332 if (p[-1] == '*' && type == 'Z')
333 rb_str_buf_cat(res, nul10, 1);
334 }
335 else {
336 rb_str_buf_cat(res, ptr, plen);
337 len -= plen;
338 while (len >= 10) {
339 rb_str_buf_cat(res, (type == 'A')?spc10:nul10, 10);
340 len -= 10;
341 }
342 rb_str_buf_cat(res, (type == 'A')?spc10:nul10, len);
343 }
344 break;
345
346#define castchar(from) (char)((from) & 0xff)
347
348 case 'b': /* bit string (ascending) */
349 {
350 int byte = 0;
351 long i, j = 0;
352
353 if (len > plen) {
354 j = (len - plen + 1)/2;
355 len = plen;
356 }
357 for (i=0; i++ < len; ptr++) {
358 if (*ptr & 1)
359 byte |= 128;
360 if (i & 7)
361 byte >>= 1;
362 else {
363 char c = castchar(byte);
364 rb_str_buf_cat(res, &c, 1);
365 byte = 0;
366 }
367 }
368 if (len & 7) {
369 char c;
370 byte >>= 7 - (len & 7);
371 c = castchar(byte);
372 rb_str_buf_cat(res, &c, 1);
373 }
374 len = j;
375 goto grow;
376 }
377 break;
378
379 case 'B': /* bit string (descending) */
380 {
381 int byte = 0;
382 long i, j = 0;
383
384 if (len > plen) {
385 j = (len - plen + 1)/2;
386 len = plen;
387 }
388 for (i=0; i++ < len; ptr++) {
389 byte |= *ptr & 1;
390 if (i & 7)
391 byte <<= 1;
392 else {
393 char c = castchar(byte);
394 rb_str_buf_cat(res, &c, 1);
395 byte = 0;
396 }
397 }
398 if (len & 7) {
399 char c;
400 byte <<= 7 - (len & 7);
401 c = castchar(byte);
402 rb_str_buf_cat(res, &c, 1);
403 }
404 len = j;
405 goto grow;
406 }
407 break;
408
409 case 'h': /* hex string (low nibble first) */
410 {
411 int byte = 0;
412 long i, j = 0;
413
414 if (len > plen) {
415 j = (len + 1) / 2 - (plen + 1) / 2;
416 len = plen;
417 }
418 for (i=0; i++ < len; ptr++) {
419 if (ISALPHA(*ptr))
420 byte |= (((*ptr & 15) + 9) & 15) << 4;
421 else
422 byte |= (*ptr & 15) << 4;
423 if (i & 1)
424 byte >>= 4;
425 else {
426 char c = castchar(byte);
427 rb_str_buf_cat(res, &c, 1);
428 byte = 0;
429 }
430 }
431 if (len & 1) {
432 char c = castchar(byte);
433 rb_str_buf_cat(res, &c, 1);
434 }
435 len = j;
436 goto grow;
437 }
438 break;
439
440 case 'H': /* hex string (high nibble first) */
441 {
442 int byte = 0;
443 long i, j = 0;
444
445 if (len > plen) {
446 j = (len + 1) / 2 - (plen + 1) / 2;
447 len = plen;
448 }
449 for (i=0; i++ < len; ptr++) {
450 if (ISALPHA(*ptr))
451 byte |= ((*ptr & 15) + 9) & 15;
452 else
453 byte |= *ptr & 15;
454 if (i & 1)
455 byte <<= 4;
456 else {
457 char c = castchar(byte);
458 rb_str_buf_cat(res, &c, 1);
459 byte = 0;
460 }
461 }
462 if (len & 1) {
463 char c = castchar(byte);
464 rb_str_buf_cat(res, &c, 1);
465 }
466 len = j;
467 goto grow;
468 }
469 break;
470 }
471 break;
472
473 case 'c': /* signed char */
474 case 'C': /* unsigned char */
475 integer_size = 1;
476 bigendian_p = BIGENDIAN_P(); /* not effective */
477 goto pack_integer;
478
479 case 's': /* s for int16_t, s! for signed short */
480 integer_size = NATINT_LEN(short, 2);
481 bigendian_p = BIGENDIAN_P();
482 goto pack_integer;
483
484 case 'S': /* S for uint16_t, S! for unsigned short */
485 integer_size = NATINT_LEN(short, 2);
486 bigendian_p = BIGENDIAN_P();
487 goto pack_integer;
488
489 case 'i': /* i and i! for signed int */
490 integer_size = (int)sizeof(int);
491 bigendian_p = BIGENDIAN_P();
492 goto pack_integer;
493
494 case 'I': /* I and I! for unsigned int */
495 integer_size = (int)sizeof(int);
496 bigendian_p = BIGENDIAN_P();
497 goto pack_integer;
498
499 case 'l': /* l for int32_t, l! for signed long */
500 integer_size = NATINT_LEN(long, 4);
501 bigendian_p = BIGENDIAN_P();
502 goto pack_integer;
503
504 case 'L': /* L for uint32_t, L! for unsigned long */
505 integer_size = NATINT_LEN(long, 4);
506 bigendian_p = BIGENDIAN_P();
507 goto pack_integer;
508
509 case 'q': /* q for int64_t, q! for signed long long */
510 integer_size = NATINT_LEN_Q;
511 bigendian_p = BIGENDIAN_P();
512 goto pack_integer;
513
514 case 'Q': /* Q for uint64_t, Q! for unsigned long long */
515 integer_size = NATINT_LEN_Q;
516 bigendian_p = BIGENDIAN_P();
517 goto pack_integer;
518
519 case 'j': /* j for intptr_t */
520 integer_size = sizeof(intptr_t);
521 bigendian_p = BIGENDIAN_P();
522 goto pack_integer;
523
524 case 'J': /* J for uintptr_t */
525 integer_size = sizeof(uintptr_t);
526 bigendian_p = BIGENDIAN_P();
527 goto pack_integer;
528
529 case 'n': /* 16 bit (2 bytes) integer (network byte-order) */
530 integer_size = 2;
531 bigendian_p = 1;
532 goto pack_integer;
533
534 case 'N': /* 32 bit (4 bytes) integer (network byte-order) */
535 integer_size = 4;
536 bigendian_p = 1;
537 goto pack_integer;
538
539 case 'v': /* 16 bit (2 bytes) integer (VAX byte-order) */
540 integer_size = 2;
541 bigendian_p = 0;
542 goto pack_integer;
543
544 case 'V': /* 32 bit (4 bytes) integer (VAX byte-order) */
545 integer_size = 4;
546 bigendian_p = 0;
547 goto pack_integer;
548
549 pack_integer:
550 if (explicit_endian) {
551 bigendian_p = explicit_endian == '>';
552 }
553 if (integer_size > MAX_INTEGER_PACK_SIZE)
554 rb_bug("unexpected intger size for pack: %d", integer_size);
555 while (len-- > 0) {
556 char intbuf[MAX_INTEGER_PACK_SIZE];
557
558 from = NEXTFROM;
559 rb_integer_pack(from, intbuf, integer_size, 1, 0,
562 rb_str_buf_cat(res, intbuf, integer_size);
563 }
564 break;
565
566 case 'f': /* single precision float in native format */
567 case 'F': /* ditto */
568 while (len-- > 0) {
569 float f;
570
571 from = NEXTFROM;
572 f = VALUE_to_float(from);
573 rb_str_buf_cat(res, (char*)&f, sizeof(float));
574 }
575 break;
576
577 case 'e': /* single precision float in VAX byte-order */
578 while (len-- > 0) {
579 FLOAT_CONVWITH(tmp);
580
581 from = NEXTFROM;
582 tmp.f = VALUE_to_float(from);
583 HTOVF(tmp);
584 rb_str_buf_cat(res, tmp.buf, sizeof(float));
585 }
586 break;
587
588 case 'E': /* double precision float in VAX byte-order */
589 while (len-- > 0) {
590 DOUBLE_CONVWITH(tmp);
591 from = NEXTFROM;
592 tmp.d = RFLOAT_VALUE(rb_to_float(from));
593 HTOVD(tmp);
594 rb_str_buf_cat(res, tmp.buf, sizeof(double));
595 }
596 break;
597
598 case 'd': /* double precision float in native format */
599 case 'D': /* ditto */
600 while (len-- > 0) {
601 double d;
602
603 from = NEXTFROM;
604 d = RFLOAT_VALUE(rb_to_float(from));
605 rb_str_buf_cat(res, (char*)&d, sizeof(double));
606 }
607 break;
608
609 case 'g': /* single precision float in network byte-order */
610 while (len-- > 0) {
611 FLOAT_CONVWITH(tmp);
612 from = NEXTFROM;
613 tmp.f = VALUE_to_float(from);
614 HTONF(tmp);
615 rb_str_buf_cat(res, tmp.buf, sizeof(float));
616 }
617 break;
618
619 case 'G': /* double precision float in network byte-order */
620 while (len-- > 0) {
621 DOUBLE_CONVWITH(tmp);
622
623 from = NEXTFROM;
624 tmp.d = RFLOAT_VALUE(rb_to_float(from));
625 HTOND(tmp);
626 rb_str_buf_cat(res, tmp.buf, sizeof(double));
627 }
628 break;
629
630 case 'x': /* null byte */
631 grow:
632 while (len >= 10) {
633 rb_str_buf_cat(res, nul10, 10);
634 len -= 10;
635 }
636 rb_str_buf_cat(res, nul10, len);
637 break;
638
639 case 'X': /* back up byte */
640 shrink:
641 plen = RSTRING_LEN(res);
642 if (plen < len)
643 rb_raise(rb_eArgError, "X outside of string");
644 rb_str_set_len(res, plen - len);
645 break;
646
647 case '@': /* null fill to absolute position */
648 len -= RSTRING_LEN(res);
649 if (len > 0) goto grow;
650 len = -len;
651 if (len > 0) goto shrink;
652 break;
653
654 case '%':
655 rb_raise(rb_eArgError, "%% is not supported");
656 break;
657
658 case 'U': /* Unicode character */
659 while (len-- > 0) {
660 SIGNED_VALUE l;
661 char buf[8];
662 int le;
663
664 from = NEXTFROM;
665 from = rb_to_int(from);
666 l = NUM2LONG(from);
667 if (l < 0) {
668 rb_raise(rb_eRangeError, "pack(U): value out of range");
669 }
670 le = rb_uv_to_utf8(buf, l);
671 rb_str_buf_cat(res, (char*)buf, le);
672 }
673 break;
674
675 case 'u': /* uuencoded string */
676 case 'm': /* base64 encoded string */
677 from = NEXTFROM;
678 StringValue(from);
679 ptr = RSTRING_PTR(from);
680 plen = RSTRING_LEN(from);
681
682 if (len == 0 && type == 'm') {
683 encodes(res, ptr, plen, type, 0);
684 ptr += plen;
685 break;
686 }
687 if (len <= 2)
688 len = 45;
689 else if (len > 63 && type == 'u')
690 len = 63;
691 else
692 len = len / 3 * 3;
693 while (plen > 0) {
694 long todo;
695
696 if (plen > len)
697 todo = len;
698 else
699 todo = plen;
700 encodes(res, ptr, todo, type, 1);
701 plen -= todo;
702 ptr += todo;
703 }
704 break;
705
706 case 'M': /* quoted-printable encoded string */
707 from = rb_obj_as_string(NEXTFROM);
708 if (len <= 1)
709 len = 72;
710 qpencode(res, from, len);
711 break;
712
713 case 'P': /* pointer to packed byte string */
714 from = THISFROM;
715 if (!NIL_P(from)) {
716 StringValue(from);
717 if (RSTRING_LEN(from) < len) {
718 rb_raise(rb_eArgError, "too short buffer for P(%ld for %ld)",
719 RSTRING_LEN(from), len);
720 }
721 }
722 len = 1;
723 /* FALL THROUGH */
724 case 'p': /* pointer to string */
725 while (len-- > 0) {
726 char *t;
727 from = NEXTFROM;
728 if (NIL_P(from)) {
729 t = 0;
730 }
731 else {
732 t = StringValuePtr(from);
733 }
734 if (!associates) {
735 associates = rb_ary_new();
736 }
737 rb_ary_push(associates, from);
738 rb_str_buf_cat(res, (char*)&t, sizeof(char*));
739 }
740 break;
741
742 case 'w': /* BER compressed integer */
743 while (len-- > 0) {
744 VALUE buf = rb_str_new(0, 0);
745 size_t numbytes;
746 int sign;
747 char *cp;
748
749 from = NEXTFROM;
750 from = rb_to_int(from);
751 numbytes = rb_absint_numwords(from, 7, NULL);
752 if (numbytes == 0)
753 numbytes = 1;
754 buf = rb_str_new(NULL, numbytes);
755
756 sign = rb_integer_pack(from, RSTRING_PTR(buf), RSTRING_LEN(buf), 1, 1, INTEGER_PACK_BIG_ENDIAN);
757
758 if (sign < 0)
759 rb_raise(rb_eArgError, "can't compress negative numbers");
760 if (sign == 2)
761 rb_bug("buffer size problem?");
762
763 cp = RSTRING_PTR(buf);
764 while (1 < numbytes) {
765 *cp |= 0x80;
766 cp++;
767 numbytes--;
768 }
769
770 rb_str_buf_cat(res, RSTRING_PTR(buf), RSTRING_LEN(buf));
771 }
772 break;
773
774 default: {
775 unknown_directive("pack", type, fmt);
776 break;
777 }
778 }
779 }
780
781 if (associates) {
782 str_associate(res, associates);
783 }
784 switch (enc_info) {
785 case 1:
787 break;
788 case 2:
790 break;
791 default:
792 /* do nothing, keep ASCII-8BIT */
793 break;
794 }
795 return res;
796}
797
798static const char uu_table[] =
799"`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
800static const char b64_table[] =
801"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
802
803static void
804encodes(VALUE str, const char *s0, long len, int type, int tail_lf)
805{
806 enum {buff_size = 4096, encoded_unit = 4, input_unit = 3};
807 char buff[buff_size + 1]; /* +1 for tail_lf */
808 long i = 0;
809 const char *const trans = type == 'u' ? uu_table : b64_table;
810 char padding;
811 const unsigned char *s = (const unsigned char *)s0;
812
813 if (type == 'u') {
814 buff[i++] = (char)len + ' ';
815 padding = '`';
816 }
817 else {
818 padding = '=';
819 }
820 while (len >= input_unit) {
821 while (len >= input_unit && buff_size-i >= encoded_unit) {
822 buff[i++] = trans[077 & (*s >> 2)];
823 buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
824 buff[i++] = trans[077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03))];
825 buff[i++] = trans[077 & s[2]];
826 s += input_unit;
827 len -= input_unit;
828 }
829 if (buff_size-i < encoded_unit) {
830 rb_str_buf_cat(str, buff, i);
831 i = 0;
832 }
833 }
834
835 if (len == 2) {
836 buff[i++] = trans[077 & (*s >> 2)];
837 buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
838 buff[i++] = trans[077 & (((s[1] << 2) & 074) | (('\0' >> 6) & 03))];
839 buff[i++] = padding;
840 }
841 else if (len == 1) {
842 buff[i++] = trans[077 & (*s >> 2)];
843 buff[i++] = trans[077 & (((*s << 4) & 060) | (('\0' >> 4) & 017))];
844 buff[i++] = padding;
845 buff[i++] = padding;
846 }
847 if (tail_lf) buff[i++] = '\n';
848 rb_str_buf_cat(str, buff, i);
849 if ((size_t)i > sizeof(buff)) rb_bug("encodes() buffer overrun");
850}
851
852static const char hex_table[] = "0123456789ABCDEF";
853
854static void
855qpencode(VALUE str, VALUE from, long len)
856{
857 char buff[1024];
858 long i = 0, n = 0, prev = EOF;
859 unsigned char *s = (unsigned char*)RSTRING_PTR(from);
860 unsigned char *send = s + RSTRING_LEN(from);
861
862 while (s < send) {
863 if ((*s > 126) ||
864 (*s < 32 && *s != '\n' && *s != '\t') ||
865 (*s == '=')) {
866 buff[i++] = '=';
867 buff[i++] = hex_table[*s >> 4];
868 buff[i++] = hex_table[*s & 0x0f];
869 n += 3;
870 prev = EOF;
871 }
872 else if (*s == '\n') {
873 if (prev == ' ' || prev == '\t') {
874 buff[i++] = '=';
875 buff[i++] = *s;
876 }
877 buff[i++] = *s;
878 n = 0;
879 prev = *s;
880 }
881 else {
882 buff[i++] = *s;
883 n++;
884 prev = *s;
885 }
886 if (n > len) {
887 buff[i++] = '=';
888 buff[i++] = '\n';
889 n = 0;
890 prev = '\n';
891 }
892 if (i > 1024 - 5) {
893 rb_str_buf_cat(str, buff, i);
894 i = 0;
895 }
896 s++;
897 }
898 if (n > 0) {
899 buff[i++] = '=';
900 buff[i++] = '\n';
901 }
902 if (i > 0) {
903 rb_str_buf_cat(str, buff, i);
904 }
905}
906
907static inline int
908hex2num(char c)
909{
910 int n;
911 n = ruby_digit36_to_number_table[(unsigned char)c];
912 if (16 <= n)
913 n = -1;
914 return n;
915}
916
917#define PACK_LENGTH_ADJUST_SIZE(sz) do { \
918 tmp_len = 0; \
919 if (len > (long)((send-s)/(sz))) { \
920 if (!star) { \
921 tmp_len = len-(send-s)/(sz); \
922 } \
923 len = (send-s)/(sz); \
924 } \
925} while (0)
926
927#define PACK_ITEM_ADJUST() do { \
928 if (tmp_len > 0 && mode == UNPACK_ARRAY) \
929 rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \
930} while (0)
931
932/* Workaround for Oracle Developer Studio (Oracle Solaris Studio)
933 * 12.4/12.5/12.6 C compiler optimization bug
934 * with "-xO4" optimization option.
935 */
936#if defined(__SUNPRO_C) && 0x5130 <= __SUNPRO_C && __SUNPRO_C <= 0x5150
937# define AVOID_CC_BUG volatile
938#else
939# define AVOID_CC_BUG
940#endif
941
942/* unpack mode */
943#define UNPACK_ARRAY 0
944#define UNPACK_BLOCK 1
945#define UNPACK_1 2
946
947static VALUE
948pack_unpack_internal(VALUE str, VALUE fmt, int mode, long offset)
949{
950#define hexdigits ruby_hexdigits
951 char *s, *send;
952 char *p, *pend;
953 VALUE ary, associates = Qfalse;
954 char type;
955 long len;
956 AVOID_CC_BUG long tmp_len;
957 int star;
958#ifdef NATINT_PACK
959 int natint; /* native integer */
960#endif
961 int signed_p, integer_size, bigendian_p;
962#define UNPACK_PUSH(item) do {\
963 VALUE item_val = (item);\
964 if ((mode) == UNPACK_BLOCK) {\
965 rb_yield(item_val);\
966 }\
967 else if ((mode) == UNPACK_ARRAY) {\
968 rb_ary_push(ary, item_val);\
969 }\
970 else /* if ((mode) == UNPACK_1) { */ {\
971 return item_val; \
972 }\
973 } while (0)
974
975 StringValue(str);
976 StringValue(fmt);
977
978 if (offset < 0) rb_raise(rb_eArgError, "offset can't be negative");
979 len = RSTRING_LEN(str);
980 if (offset > len) rb_raise(rb_eArgError, "offset outside of string");
981
982 s = RSTRING_PTR(str);
983 send = s + len;
984 s += offset;
985
986 p = RSTRING_PTR(fmt);
987 pend = p + RSTRING_LEN(fmt);
988
989#define UNPACK_FETCH(var, type) (memcpy((var), s, sizeof(type)), s += sizeof(type))
990
991 ary = mode == UNPACK_ARRAY ? rb_ary_new() : Qnil;
992 while (p < pend) {
993 int explicit_endian = 0;
994 type = *p++;
995#ifdef NATINT_PACK
996 natint = 0;
997#endif
998
999 if (ISSPACE(type)) continue;
1000 if (type == '#') {
1001 while ((p < pend) && (*p != '\n')) {
1002 p++;
1003 }
1004 continue;
1005 }
1006
1007 star = 0;
1008 {
1009 modifiers:
1010 switch (*p) {
1011 case '_':
1012 case '!':
1013
1014 if (strchr(natstr, type)) {
1015#ifdef NATINT_PACK
1016 natint = 1;
1017#endif
1018 p++;
1019 }
1020 else {
1021 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
1022 }
1023 goto modifiers;
1024
1025 case '<':
1026 case '>':
1027 if (!strchr(endstr, type)) {
1028 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
1029 }
1030 if (explicit_endian) {
1031 rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
1032 }
1033 explicit_endian = *p++;
1034 goto modifiers;
1035 }
1036 }
1037
1038 if (p >= pend)
1039 len = 1;
1040 else if (*p == '*') {
1041 star = 1;
1042 len = send - s;
1043 p++;
1044 }
1045 else if (ISDIGIT(*p)) {
1046 errno = 0;
1047 len = STRTOUL(p, (char**)&p, 10);
1048 if (len < 0 || errno) {
1049 rb_raise(rb_eRangeError, "pack length too big");
1050 }
1051 }
1052 else {
1053 len = (type != '@');
1054 }
1055
1056 switch (type) {
1057 case '%':
1058 rb_raise(rb_eArgError, "%% is not supported");
1059 break;
1060
1061 case 'A':
1062 if (len > send - s) len = send - s;
1063 {
1064 long end = len;
1065 char *t = s + len - 1;
1066
1067 while (t >= s) {
1068 if (*t != ' ' && *t != '\0') break;
1069 t--; len--;
1070 }
1071 UNPACK_PUSH(rb_str_new(s, len));
1072 s += end;
1073 }
1074 break;
1075
1076 case 'Z':
1077 {
1078 char *t = s;
1079
1080 if (len > send-s) len = send-s;
1081 while (t < s+len && *t) t++;
1082 UNPACK_PUSH(rb_str_new(s, t-s));
1083 if (t < send) t++;
1084 s = star ? t : s+len;
1085 }
1086 break;
1087
1088 case 'a':
1089 if (len > send - s) len = send - s;
1090 UNPACK_PUSH(rb_str_new(s, len));
1091 s += len;
1092 break;
1093
1094 case 'b':
1095 {
1096 VALUE bitstr;
1097 char *t;
1098 int bits;
1099 long i;
1100
1101 if (p[-1] == '*' || len > (send - s) * 8)
1102 len = (send - s) * 8;
1103 bits = 0;
1104 bitstr = rb_usascii_str_new(0, len);
1105 t = RSTRING_PTR(bitstr);
1106 for (i=0; i<len; i++) {
1107 if (i & 7) bits >>= 1;
1108 else bits = (unsigned char)*s++;
1109 *t++ = (bits & 1) ? '1' : '0';
1110 }
1111 UNPACK_PUSH(bitstr);
1112 }
1113 break;
1114
1115 case 'B':
1116 {
1117 VALUE bitstr;
1118 char *t;
1119 int bits;
1120 long i;
1121
1122 if (p[-1] == '*' || len > (send - s) * 8)
1123 len = (send - s) * 8;
1124 bits = 0;
1125 bitstr = rb_usascii_str_new(0, len);
1126 t = RSTRING_PTR(bitstr);
1127 for (i=0; i<len; i++) {
1128 if (i & 7) bits <<= 1;
1129 else bits = (unsigned char)*s++;
1130 *t++ = (bits & 128) ? '1' : '0';
1131 }
1132 UNPACK_PUSH(bitstr);
1133 }
1134 break;
1135
1136 case 'h':
1137 {
1138 VALUE bitstr;
1139 char *t;
1140 int bits;
1141 long i;
1142
1143 if (p[-1] == '*' || len > (send - s) * 2)
1144 len = (send - s) * 2;
1145 bits = 0;
1146 bitstr = rb_usascii_str_new(0, len);
1147 t = RSTRING_PTR(bitstr);
1148 for (i=0; i<len; i++) {
1149 if (i & 1)
1150 bits >>= 4;
1151 else
1152 bits = (unsigned char)*s++;
1153 *t++ = hexdigits[bits & 15];
1154 }
1155 UNPACK_PUSH(bitstr);
1156 }
1157 break;
1158
1159 case 'H':
1160 {
1161 VALUE bitstr;
1162 char *t;
1163 int bits;
1164 long i;
1165
1166 if (p[-1] == '*' || len > (send - s) * 2)
1167 len = (send - s) * 2;
1168 bits = 0;
1169 bitstr = rb_usascii_str_new(0, len);
1170 t = RSTRING_PTR(bitstr);
1171 for (i=0; i<len; i++) {
1172 if (i & 1)
1173 bits <<= 4;
1174 else
1175 bits = (unsigned char)*s++;
1176 *t++ = hexdigits[(bits >> 4) & 15];
1177 }
1178 UNPACK_PUSH(bitstr);
1179 }
1180 break;
1181
1182 case 'c':
1183 signed_p = 1;
1184 integer_size = 1;
1185 bigendian_p = BIGENDIAN_P(); /* not effective */
1186 goto unpack_integer;
1187
1188 case 'C':
1189 signed_p = 0;
1190 integer_size = 1;
1191 bigendian_p = BIGENDIAN_P(); /* not effective */
1192 goto unpack_integer;
1193
1194 case 's':
1195 signed_p = 1;
1196 integer_size = NATINT_LEN(short, 2);
1197 bigendian_p = BIGENDIAN_P();
1198 goto unpack_integer;
1199
1200 case 'S':
1201 signed_p = 0;
1202 integer_size = NATINT_LEN(short, 2);
1203 bigendian_p = BIGENDIAN_P();
1204 goto unpack_integer;
1205
1206 case 'i':
1207 signed_p = 1;
1208 integer_size = (int)sizeof(int);
1209 bigendian_p = BIGENDIAN_P();
1210 goto unpack_integer;
1211
1212 case 'I':
1213 signed_p = 0;
1214 integer_size = (int)sizeof(int);
1215 bigendian_p = BIGENDIAN_P();
1216 goto unpack_integer;
1217
1218 case 'l':
1219 signed_p = 1;
1220 integer_size = NATINT_LEN(long, 4);
1221 bigendian_p = BIGENDIAN_P();
1222 goto unpack_integer;
1223
1224 case 'L':
1225 signed_p = 0;
1226 integer_size = NATINT_LEN(long, 4);
1227 bigendian_p = BIGENDIAN_P();
1228 goto unpack_integer;
1229
1230 case 'q':
1231 signed_p = 1;
1232 integer_size = NATINT_LEN_Q;
1233 bigendian_p = BIGENDIAN_P();
1234 goto unpack_integer;
1235
1236 case 'Q':
1237 signed_p = 0;
1238 integer_size = NATINT_LEN_Q;
1239 bigendian_p = BIGENDIAN_P();
1240 goto unpack_integer;
1241
1242 case 'j':
1243 signed_p = 1;
1244 integer_size = sizeof(intptr_t);
1245 bigendian_p = BIGENDIAN_P();
1246 goto unpack_integer;
1247
1248 case 'J':
1249 signed_p = 0;
1250 integer_size = sizeof(uintptr_t);
1251 bigendian_p = BIGENDIAN_P();
1252 goto unpack_integer;
1253
1254 case 'n':
1255 signed_p = 0;
1256 integer_size = 2;
1257 bigendian_p = 1;
1258 goto unpack_integer;
1259
1260 case 'N':
1261 signed_p = 0;
1262 integer_size = 4;
1263 bigendian_p = 1;
1264 goto unpack_integer;
1265
1266 case 'v':
1267 signed_p = 0;
1268 integer_size = 2;
1269 bigendian_p = 0;
1270 goto unpack_integer;
1271
1272 case 'V':
1273 signed_p = 0;
1274 integer_size = 4;
1275 bigendian_p = 0;
1276 goto unpack_integer;
1277
1278 unpack_integer:
1279 if (explicit_endian) {
1280 bigendian_p = explicit_endian == '>';
1281 }
1282 PACK_LENGTH_ADJUST_SIZE(integer_size);
1283 while (len-- > 0) {
1284 int flags = bigendian_p ? INTEGER_PACK_BIG_ENDIAN : INTEGER_PACK_LITTLE_ENDIAN;
1285 VALUE val;
1286 if (signed_p)
1287 flags |= INTEGER_PACK_2COMP;
1288 val = rb_integer_unpack(s, integer_size, 1, 0, flags);
1289 UNPACK_PUSH(val);
1290 s += integer_size;
1291 }
1292 PACK_ITEM_ADJUST();
1293 break;
1294
1295 case 'f':
1296 case 'F':
1297 PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1298 while (len-- > 0) {
1299 float tmp;
1300 UNPACK_FETCH(&tmp, float);
1301 UNPACK_PUSH(DBL2NUM((double)tmp));
1302 }
1303 PACK_ITEM_ADJUST();
1304 break;
1305
1306 case 'e':
1307 PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1308 while (len-- > 0) {
1309 FLOAT_CONVWITH(tmp);
1310 UNPACK_FETCH(tmp.buf, float);
1311 VTOHF(tmp);
1312 UNPACK_PUSH(DBL2NUM(tmp.f));
1313 }
1314 PACK_ITEM_ADJUST();
1315 break;
1316
1317 case 'E':
1318 PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1319 while (len-- > 0) {
1320 DOUBLE_CONVWITH(tmp);
1321 UNPACK_FETCH(tmp.buf, double);
1322 VTOHD(tmp);
1323 UNPACK_PUSH(DBL2NUM(tmp.d));
1324 }
1325 PACK_ITEM_ADJUST();
1326 break;
1327
1328 case 'D':
1329 case 'd':
1330 PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1331 while (len-- > 0) {
1332 double tmp;
1333 UNPACK_FETCH(&tmp, double);
1334 UNPACK_PUSH(DBL2NUM(tmp));
1335 }
1336 PACK_ITEM_ADJUST();
1337 break;
1338
1339 case 'g':
1340 PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1341 while (len-- > 0) {
1342 FLOAT_CONVWITH(tmp);
1343 UNPACK_FETCH(tmp.buf, float);
1344 NTOHF(tmp);
1345 UNPACK_PUSH(DBL2NUM(tmp.f));
1346 }
1347 PACK_ITEM_ADJUST();
1348 break;
1349
1350 case 'G':
1351 PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1352 while (len-- > 0) {
1353 DOUBLE_CONVWITH(tmp);
1354 UNPACK_FETCH(tmp.buf, double);
1355 NTOHD(tmp);
1356 UNPACK_PUSH(DBL2NUM(tmp.d));
1357 }
1358 PACK_ITEM_ADJUST();
1359 break;
1360
1361 case 'U':
1362 if (len > send - s) len = send - s;
1363 while (len > 0 && s < send) {
1364 long alen = send - s;
1365 unsigned long l;
1366
1367 l = utf8_to_uv(s, &alen);
1368 s += alen; len--;
1369 UNPACK_PUSH(ULONG2NUM(l));
1370 }
1371 break;
1372
1373 case 'u':
1374 {
1375 VALUE buf = rb_str_new(0, (send - s)*3/4);
1376 char *ptr = RSTRING_PTR(buf);
1377 long total = 0;
1378
1379 while (s < send && (unsigned char)*s > ' ' && (unsigned char)*s < 'a') {
1380 long a,b,c,d;
1381 char hunk[3];
1382
1383 len = ((unsigned char)*s++ - ' ') & 077;
1384
1385 total += len;
1386 if (total > RSTRING_LEN(buf)) {
1387 len -= total - RSTRING_LEN(buf);
1388 total = RSTRING_LEN(buf);
1389 }
1390
1391 while (len > 0) {
1392 long mlen = len > 3 ? 3 : len;
1393
1394 if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1395 a = ((unsigned char)*s++ - ' ') & 077;
1396 else
1397 a = 0;
1398 if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1399 b = ((unsigned char)*s++ - ' ') & 077;
1400 else
1401 b = 0;
1402 if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1403 c = ((unsigned char)*s++ - ' ') & 077;
1404 else
1405 c = 0;
1406 if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1407 d = ((unsigned char)*s++ - ' ') & 077;
1408 else
1409 d = 0;
1410 hunk[0] = (char)(a << 2 | b >> 4);
1411 hunk[1] = (char)(b << 4 | c >> 2);
1412 hunk[2] = (char)(c << 6 | d);
1413 memcpy(ptr, hunk, mlen);
1414 ptr += mlen;
1415 len -= mlen;
1416 }
1417 if (s < send && (unsigned char)*s != '\r' && *s != '\n')
1418 s++; /* possible checksum byte */
1419 if (s < send && *s == '\r') s++;
1420 if (s < send && *s == '\n') s++;
1421 }
1422
1423 rb_str_set_len(buf, total);
1424 UNPACK_PUSH(buf);
1425 }
1426 break;
1427
1428 case 'm':
1429 {
1430 VALUE buf = rb_str_new(0, (send - s + 3)*3/4); /* +3 is for skipping paddings */
1431 char *ptr = RSTRING_PTR(buf);
1432 int a = -1,b = -1,c = 0,d = 0;
1433 static signed char b64_xtable[256];
1434
1435 if (b64_xtable['/'] <= 0) {
1436 int i;
1437
1438 for (i = 0; i < 256; i++) {
1439 b64_xtable[i] = -1;
1440 }
1441 for (i = 0; i < 64; i++) {
1442 b64_xtable[(unsigned char)b64_table[i]] = (char)i;
1443 }
1444 }
1445 if (len == 0) {
1446 while (s < send) {
1447 a = b = c = d = -1;
1448 a = b64_xtable[(unsigned char)*s++];
1449 if (s >= send || a == -1) rb_raise(rb_eArgError, "invalid base64");
1450 b = b64_xtable[(unsigned char)*s++];
1451 if (s >= send || b == -1) rb_raise(rb_eArgError, "invalid base64");
1452 if (*s == '=') {
1453 if (s + 2 == send && *(s + 1) == '=') break;
1454 rb_raise(rb_eArgError, "invalid base64");
1455 }
1456 c = b64_xtable[(unsigned char)*s++];
1457 if (s >= send || c == -1) rb_raise(rb_eArgError, "invalid base64");
1458 if (s + 1 == send && *s == '=') break;
1459 d = b64_xtable[(unsigned char)*s++];
1460 if (d == -1) rb_raise(rb_eArgError, "invalid base64");
1461 *ptr++ = castchar(a << 2 | b >> 4);
1462 *ptr++ = castchar(b << 4 | c >> 2);
1463 *ptr++ = castchar(c << 6 | d);
1464 }
1465 if (c == -1) {
1466 *ptr++ = castchar(a << 2 | b >> 4);
1467 if (b & 0xf) rb_raise(rb_eArgError, "invalid base64");
1468 }
1469 else if (d == -1) {
1470 *ptr++ = castchar(a << 2 | b >> 4);
1471 *ptr++ = castchar(b << 4 | c >> 2);
1472 if (c & 0x3) rb_raise(rb_eArgError, "invalid base64");
1473 }
1474 }
1475 else {
1476 while (s < send) {
1477 a = b = c = d = -1;
1478 while ((a = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
1479 if (s >= send) break;
1480 s++;
1481 while ((b = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
1482 if (s >= send) break;
1483 s++;
1484 while ((c = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
1485 if (*s == '=' || s >= send) break;
1486 s++;
1487 while ((d = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
1488 if (*s == '=' || s >= send) break;
1489 s++;
1490 *ptr++ = castchar(a << 2 | b >> 4);
1491 *ptr++ = castchar(b << 4 | c >> 2);
1492 *ptr++ = castchar(c << 6 | d);
1493 a = -1;
1494 }
1495 if (a != -1 && b != -1) {
1496 if (c == -1)
1497 *ptr++ = castchar(a << 2 | b >> 4);
1498 else {
1499 *ptr++ = castchar(a << 2 | b >> 4);
1500 *ptr++ = castchar(b << 4 | c >> 2);
1501 }
1502 }
1503 }
1504 rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
1505 UNPACK_PUSH(buf);
1506 }
1507 break;
1508
1509 case 'M':
1510 {
1511 VALUE buf = rb_str_new(0, send - s);
1512 char *ptr = RSTRING_PTR(buf), *ss = s;
1513 int csum = 0;
1514 int c1, c2;
1515
1516 while (s < send) {
1517 if (*s == '=') {
1518 if (++s == send) break;
1519 if (s+1 < send && *s == '\r' && *(s+1) == '\n')
1520 s++;
1521 if (*s != '\n') {
1522 if ((c1 = hex2num(*s)) == -1) break;
1523 if (++s == send) break;
1524 if ((c2 = hex2num(*s)) == -1) break;
1525 csum |= *ptr++ = castchar(c1 << 4 | c2);
1526 }
1527 }
1528 else {
1529 csum |= *ptr++ = *s;
1530 }
1531 s++;
1532 ss = s;
1533 }
1534 rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
1535 rb_str_buf_cat(buf, ss, send-ss);
1538 UNPACK_PUSH(buf);
1539 }
1540 break;
1541
1542 case '@':
1543 if (len > RSTRING_LEN(str))
1544 rb_raise(rb_eArgError, "@ outside of string");
1545 s = RSTRING_PTR(str) + len;
1546 break;
1547
1548 case 'X':
1549 if (len > s - RSTRING_PTR(str))
1550 rb_raise(rb_eArgError, "X outside of string");
1551 s -= len;
1552 break;
1553
1554 case 'x':
1555 if (len > send - s)
1556 rb_raise(rb_eArgError, "x outside of string");
1557 s += len;
1558 break;
1559
1560 case 'P':
1561 if (sizeof(char *) <= (size_t)(send - s)) {
1562 VALUE tmp = Qnil;
1563 char *t;
1564
1565 UNPACK_FETCH(&t, char *);
1566 if (t) {
1567 if (!associates) associates = str_associated(str);
1568 tmp = associated_pointer(associates, t);
1569 if (len < RSTRING_LEN(tmp)) {
1570 tmp = rb_str_new(t, len);
1571 str_associate(tmp, associates);
1572 }
1573 }
1574 UNPACK_PUSH(tmp);
1575 }
1576 break;
1577
1578 case 'p':
1579 if (len > (long)((send - s) / sizeof(char *)))
1580 len = (send - s) / sizeof(char *);
1581 while (len-- > 0) {
1582 if ((size_t)(send - s) < sizeof(char *))
1583 break;
1584 else {
1585 VALUE tmp = Qnil;
1586 char *t;
1587
1588 UNPACK_FETCH(&t, char *);
1589 if (t) {
1590 if (!associates) associates = str_associated(str);
1591 tmp = associated_pointer(associates, t);
1592 }
1593 UNPACK_PUSH(tmp);
1594 }
1595 }
1596 break;
1597
1598 case 'w':
1599 {
1600 char *s0 = s;
1601 while (len > 0 && s < send) {
1602 if (*s & 0x80) {
1603 s++;
1604 }
1605 else {
1606 s++;
1607 UNPACK_PUSH(rb_integer_unpack(s0, s-s0, 1, 1, INTEGER_PACK_BIG_ENDIAN));
1608 len--;
1609 s0 = s;
1610 }
1611 }
1612 }
1613 break;
1614
1615 default:
1616 unknown_directive("unpack", type, fmt);
1617 break;
1618 }
1619 }
1620
1621 return ary;
1622}
1623
1624static VALUE
1625pack_unpack(rb_execution_context_t *ec, VALUE str, VALUE fmt, VALUE offset)
1626{
1627 int mode = rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY;
1628 return pack_unpack_internal(str, fmt, mode, RB_NUM2LONG(offset));
1629}
1630
1631static VALUE
1632pack_unpack1(rb_execution_context_t *ec, VALUE str, VALUE fmt, VALUE offset)
1633{
1634 return pack_unpack_internal(str, fmt, UNPACK_1, RB_NUM2LONG(offset));
1635}
1636
1637int
1638rb_uv_to_utf8(char buf[6], unsigned long uv)
1639{
1640 if (uv <= 0x7f) {
1641 buf[0] = (char)uv;
1642 return 1;
1643 }
1644 if (uv <= 0x7ff) {
1645 buf[0] = castchar(((uv>>6)&0xff)|0xc0);
1646 buf[1] = castchar((uv&0x3f)|0x80);
1647 return 2;
1648 }
1649 if (uv <= 0xffff) {
1650 buf[0] = castchar(((uv>>12)&0xff)|0xe0);
1651 buf[1] = castchar(((uv>>6)&0x3f)|0x80);
1652 buf[2] = castchar((uv&0x3f)|0x80);
1653 return 3;
1654 }
1655 if (uv <= 0x1fffff) {
1656 buf[0] = castchar(((uv>>18)&0xff)|0xf0);
1657 buf[1] = castchar(((uv>>12)&0x3f)|0x80);
1658 buf[2] = castchar(((uv>>6)&0x3f)|0x80);
1659 buf[3] = castchar((uv&0x3f)|0x80);
1660 return 4;
1661 }
1662 if (uv <= 0x3ffffff) {
1663 buf[0] = castchar(((uv>>24)&0xff)|0xf8);
1664 buf[1] = castchar(((uv>>18)&0x3f)|0x80);
1665 buf[2] = castchar(((uv>>12)&0x3f)|0x80);
1666 buf[3] = castchar(((uv>>6)&0x3f)|0x80);
1667 buf[4] = castchar((uv&0x3f)|0x80);
1668 return 5;
1669 }
1670 if (uv <= 0x7fffffff) {
1671 buf[0] = castchar(((uv>>30)&0xff)|0xfc);
1672 buf[1] = castchar(((uv>>24)&0x3f)|0x80);
1673 buf[2] = castchar(((uv>>18)&0x3f)|0x80);
1674 buf[3] = castchar(((uv>>12)&0x3f)|0x80);
1675 buf[4] = castchar(((uv>>6)&0x3f)|0x80);
1676 buf[5] = castchar((uv&0x3f)|0x80);
1677 return 6;
1678 }
1679 rb_raise(rb_eRangeError, "pack(U): value out of range");
1680
1682}
1683
1684static const unsigned long utf8_limits[] = {
1685 0x0, /* 1 */
1686 0x80, /* 2 */
1687 0x800, /* 3 */
1688 0x10000, /* 4 */
1689 0x200000, /* 5 */
1690 0x4000000, /* 6 */
1691 0x80000000, /* 7 */
1692};
1693
1694static unsigned long
1695utf8_to_uv(const char *p, long *lenp)
1696{
1697 int c = *p++ & 0xff;
1698 unsigned long uv = c;
1699 long n;
1700
1701 if (!(uv & 0x80)) {
1702 *lenp = 1;
1703 return uv;
1704 }
1705 if (!(uv & 0x40)) {
1706 *lenp = 1;
1707 rb_raise(rb_eArgError, "malformed UTF-8 character");
1708 }
1709
1710 if (!(uv & 0x20)) { n = 2; uv &= 0x1f; }
1711 else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; }
1712 else if (!(uv & 0x08)) { n = 4; uv &= 0x07; }
1713 else if (!(uv & 0x04)) { n = 5; uv &= 0x03; }
1714 else if (!(uv & 0x02)) { n = 6; uv &= 0x01; }
1715 else {
1716 *lenp = 1;
1717 rb_raise(rb_eArgError, "malformed UTF-8 character");
1718 }
1719 if (n > *lenp) {
1720 rb_raise(rb_eArgError, "malformed UTF-8 character (expected %ld bytes, given %ld bytes)",
1721 n, *lenp);
1722 }
1723 *lenp = n--;
1724 if (n != 0) {
1725 while (n--) {
1726 c = *p++ & 0xff;
1727 if ((c & 0xc0) != 0x80) {
1728 *lenp -= n + 1;
1729 rb_raise(rb_eArgError, "malformed UTF-8 character");
1730 }
1731 else {
1732 c &= 0x3f;
1733 uv = uv << 6 | c;
1734 }
1735 }
1736 }
1737 n = *lenp - 1;
1738 if (uv < utf8_limits[n]) {
1739 rb_raise(rb_eArgError, "redundant UTF-8 sequence");
1740 }
1741 return uv;
1742}
1743
1744#include "pack.rbinc"
1745
1746void
1747Init_pack(void)
1748{
1749 id_associated = rb_make_internal_id();
1750}
int rb_block_given_p(void)
Determines if the current method is given a block.
Definition: eval.c:850
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
Definition: coderange.h:180
#define ENC_CODERANGE_VALID
Old name of RUBY_ENC_CODERANGE_VALID.
Definition: coderange.h:181
#define ISSPACE
Old name of rb_isspace.
Definition: ctype.h:88
#define RFLOAT_VALUE
Old name of rb_float_value.
Definition: double.h:28
#define T_STRING
Old name of RUBY_T_STRING.
Definition: value_type.h:78
#define ULONG2NUM
Old name of RB_ULONG2NUM.
Definition: long.h:60
#define UNREACHABLE_RETURN
Old name of RBIMPL_UNREACHABLE_RETURN.
Definition: assume.h:31
#define STRTOUL
Old name of ruby_strtoul.
Definition: ctype.h:104
#define ISDIGIT
Old name of rb_isdigit.
Definition: ctype.h:93
#define ISALPHA
Old name of rb_isalpha.
Definition: ctype.h:92
#define ISASCII
Old name of rb_isascii.
Definition: ctype.h:85
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define NIL_P
Old name of RB_NIL_P.
#define DBL2NUM
Old name of rb_float_new.
Definition: double.h:29
#define ISPRINT
Old name of rb_isprint.
Definition: ctype.h:86
#define NUM2LONG
Old name of RB_NUM2LONG.
Definition: long.h:51
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Old name of RB_ENCODING_CODERANGE_SET.
Definition: coderange.h:189
void rb_raise(VALUE exc, const char *fmt,...)
Exception entry point.
Definition: error.c:3021
void rb_bug(const char *fmt,...)
Interpreter panic switch.
Definition: error.c:802
void rb_warning(const char *fmt,...)
Issues a warning.
Definition: error.c:449
int rb_utf8_encindex(void)
Identical to rb_utf8_encoding(), except it returns the encoding's index instead of the encoding itsel...
Definition: encoding.c:1533
int rb_ascii8bit_encindex(void)
Identical to rb_ascii8bit_encoding(), except it returns the encoding's index instead of the encoding ...
Definition: encoding.c:1521
void rb_enc_set_index(VALUE obj, int encindex)
Destructively assigns an encoding (via its index) to an object.
Definition: encoding.c:1030
int rb_usascii_encindex(void)
Identical to rb_usascii_encoding(), except it returns the encoding's index instead of the encoding it...
Definition: encoding.c:1545
Defines RBIMPL_HAS_BUILTIN.
VALUE rb_ary_new(void)
Allocates a new, empty array.
Definition: array.c:750
VALUE rb_ary_push(VALUE ary, VALUE elem)
Special case of rb_ary_cat() that it adds only one element.
Definition: array.c:1308
int rb_integer_pack(VALUE val, void *words, size_t numwords, size_t wordsize, size_t nails, int flags)
Exports an integer into a buffer.
Definition: bignum.c:3559
size_t rb_absint_numwords(VALUE val, size_t word_numbits, size_t *nlz_bits_ret)
Calculates the number of words needed represent the absolute value of the passed integer.
Definition: bignum.c:3393
VALUE rb_integer_unpack(const void *words, size_t numwords, size_t wordsize, size_t nails, int flags)
Import an integer from a buffer.
Definition: bignum.c:3645
#define INTEGER_PACK_LITTLE_ENDIAN
Little endian combination.
Definition: bignum.h:567
#define INTEGER_PACK_BIG_ENDIAN
Big endian combination.
Definition: bignum.h:572
int rb_uv_to_utf8(char buf[6], unsigned long uv)
Encodes a Unicode codepoint into its UTF-8 representation.
Definition: pack.c:1638
#define INTEGER_PACK_2COMP
Uses 2's complement representation.
Definition: bignum.h:549
VALUE rb_str_buf_cat(VALUE, const char *, long)
Just another name of rb_str_cat.
void rb_str_modify(VALUE str)
Declares that the string is about to be modified.
Definition: string.c:2459
VALUE rb_usascii_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "US ASCII" encoding.
Definition: string.c:924
void rb_str_set_len(VALUE str, long len)
Overwrites the length of the string.
Definition: string.c:3039
VALUE rb_str_new(const char *ptr, long len)
Allocates an instance of rb_cString.
Definition: string.c:918
VALUE rb_str_buf_new(long capa)
Allocates a "string buffer".
Definition: string.c:1506
VALUE rb_obj_as_string(VALUE obj)
Try converting an object to its stringised representation using its to_s method, if any.
Definition: string.c:1657
VALUE rb_ivar_set(VALUE obj, ID name, VALUE val)
Identical to rb_iv_set(), except it accepts the name as an ID instead of a C string.
Definition: variable.c:1575
const signed char ruby_digit36_to_number_table[]
Character to number mapping like ‘'a’->10,'b'->11` etc.
Definition: util.c:76
#define RB_NUM2LONG
Just another name of rb_num2long_inline.
Definition: long.h:57
VALUE type(ANYARGS)
ANYARGS-ed function type.
Definition: cxxanyargs.hpp:56
#define RARRAY_LEN
Just another name of rb_array_len.
Definition: rarray.h:68
#define RARRAY_CONST_PTR
Just another name of rb_array_const_ptr.
Definition: rarray.h:69
#define StringValue(v)
Ensures that the parameter object is a String.
Definition: rstring.h:72
#define StringValuePtr(v)
Identical to StringValue, except it returns a char*.
Definition: rstring.h:82
static long RSTRING_LEN(VALUE str)
Queries the length of the string.
Definition: rstring.h:483
static char * RSTRING_PTR(VALUE str)
Queries the contents pointer of the string.
Definition: rstring.h:497
const char * rb_obj_classname(VALUE obj)
Queries the name of the class of the passed object.
Definition: variable.c:309
intptr_t SIGNED_VALUE
A signed integer type that has the same width with VALUE.
Definition: value.h:63
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
Definition: value_type.h:375