Line data Source code
1 : /* base64.c -- Encode binary data using printable characters.
2 : Copyright (C) 1999, 2000, 2001, 2004, 2005, 2006, 2007 Free Software
3 : Foundation, Inc.
4 :
5 : This program is free software: you can redistribute it and/or modify
6 : it under the terms of the GNU General Public License as published by
7 : the Free Software Foundation, either version 3 of the License, or
8 : (at your option) any later version.
9 :
10 : This program is distributed in the hope that it will be useful,
11 : but WITHOUT ANY WARRANTY; without even the implied warranty of
12 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 : GNU General Public License for more details.
14 :
15 : You should have received a copy of the GNU General Public License
16 : along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 :
18 : /* Written by Simon Josefsson. Partially adapted from GNU MailUtils
19 : * (mailbox/filter_trans.c, as of 2004-11-28). Improved by review
20 : * from Paul Eggert, Bruno Haible, and Stepan Kasal.
21 : *
22 : * See also RFC 3548 <http://www.ietf.org/rfc/rfc3548.txt>.
23 : *
24 : * Be careful with error checking. Here is how you would typically
25 : * use these functions:
26 : *
27 : * bool ok = base64_decode_alloc (in, inlen, &out, &outlen);
28 : * if (!ok)
29 : * FAIL: input was not valid base64
30 : * if (out == NULL)
31 : * FAIL: memory allocation error
32 : * OK: data in OUT/OUTLEN
33 : *
34 : * size_t outlen = base64_encode_alloc (in, inlen, &out);
35 : * if (out == NULL && outlen == 0 && inlen != 0)
36 : * FAIL: input too long
37 : * if (out == NULL)
38 : * FAIL: memory allocation error
39 : * OK: data in OUT/OUTLEN.
40 : *
41 : */
42 :
43 : #include <config.h>
44 :
45 : /* Get prototype. */
46 : #include "base64.h"
47 :
48 : /* Get malloc. */
49 : #include <stdlib.h>
50 :
51 : /* Get UCHAR_MAX. */
52 : #include <limits.h>
53 :
54 : #include <string.h>
55 :
56 : /* C89 compliant way to cast 'char' to 'unsigned char'. */
57 : static inline unsigned char
58 497 : to_uchar (char ch)
59 : {
60 497 : return ch;
61 : }
62 :
63 : /* Base64 encode IN array of size INLEN into OUT array of size OUTLEN.
64 : If OUTLEN is less than BASE64_LENGTH(INLEN), write as many bytes as
65 : possible. If OUTLEN is larger than BASE64_LENGTH(INLEN), also zero
66 : terminate the output buffer. */
67 : void
68 10 : base64_encode (const char *restrict in, size_t inlen,
69 : char *restrict out, size_t outlen)
70 : {
71 : static const char b64str[64] =
72 : "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
73 :
74 40 : while (inlen && outlen)
75 : {
76 30 : *out++ = b64str[(to_uchar (in[0]) >> 2) & 0x3f];
77 30 : if (!--outlen)
78 0 : break;
79 60 : *out++ = b64str[((to_uchar (in[0]) << 4)
80 30 : + (--inlen ? to_uchar (in[1]) >> 4 : 0))
81 30 : & 0x3f];
82 30 : if (!--outlen)
83 0 : break;
84 60 : *out++ =
85 : (inlen
86 30 : ? b64str[((to_uchar (in[1]) << 2)
87 30 : + (--inlen ? to_uchar (in[2]) >> 6 : 0))
88 30 : & 0x3f]
89 : : '=');
90 30 : if (!--outlen)
91 0 : break;
92 30 : *out++ = inlen ? b64str[to_uchar (in[2]) & 0x3f] : '=';
93 30 : if (!--outlen)
94 10 : break;
95 20 : if (inlen)
96 20 : inlen--;
97 20 : if (inlen)
98 20 : in += 3;
99 : }
100 :
101 10 : if (outlen)
102 0 : *out = '\0';
103 10 : }
104 :
105 : /* Allocate a buffer and store zero terminated base64 encoded data
106 : from array IN of size INLEN, returning BASE64_LENGTH(INLEN), i.e.,
107 : the length of the encoded data, excluding the terminating zero. On
108 : return, the OUT variable will hold a pointer to newly allocated
109 : memory that must be deallocated by the caller. If output string
110 : length would overflow, 0 is returned and OUT is set to NULL. If
111 : memory allocation failed, OUT is set to NULL, and the return value
112 : indicates length of the requested memory block, i.e.,
113 : BASE64_LENGTH(inlen) + 1. */
114 : size_t
115 0 : base64_encode_alloc (const char *in, size_t inlen, char **out)
116 : {
117 0 : size_t outlen = 1 + BASE64_LENGTH (inlen);
118 :
119 : /* Check for overflow in outlen computation.
120 : *
121 : * If there is no overflow, outlen >= inlen.
122 : *
123 : * If the operation (inlen + 2) overflows then it yields at most +1, so
124 : * outlen is 0.
125 : *
126 : * If the multiplication overflows, we lose at least half of the
127 : * correct value, so the result is < ((inlen + 2) / 3) * 2, which is
128 : * less than (inlen + 2) * 0.66667, which is less than inlen as soon as
129 : * (inlen > 4).
130 : */
131 0 : if (inlen > outlen)
132 : {
133 0 : *out = NULL;
134 0 : return 0;
135 : }
136 :
137 0 : *out = malloc (outlen);
138 0 : if (!*out)
139 0 : return outlen;
140 :
141 0 : base64_encode (in, inlen, *out, outlen);
142 :
143 0 : return outlen - 1;
144 : }
145 :
146 : /* With this approach this file works independent of the charset used
147 : (think EBCDIC). However, it does assume that the characters in the
148 : Base64 alphabet (A-Za-z0-9+/) are encoded in 0..255. POSIX
149 : 1003.1-2001 require that char and unsigned char are 8-bit
150 : quantities, though, taking care of that problem. But this may be a
151 : potential problem on non-POSIX C99 platforms.
152 :
153 : IBM C V6 for AIX mishandles "#define B64(x) ...'x'...", so use "_"
154 : as the formal parameter rather than "x". */
155 : #define B64(_) \
156 : ((_) == 'A' ? 0 \
157 : : (_) == 'B' ? 1 \
158 : : (_) == 'C' ? 2 \
159 : : (_) == 'D' ? 3 \
160 : : (_) == 'E' ? 4 \
161 : : (_) == 'F' ? 5 \
162 : : (_) == 'G' ? 6 \
163 : : (_) == 'H' ? 7 \
164 : : (_) == 'I' ? 8 \
165 : : (_) == 'J' ? 9 \
166 : : (_) == 'K' ? 10 \
167 : : (_) == 'L' ? 11 \
168 : : (_) == 'M' ? 12 \
169 : : (_) == 'N' ? 13 \
170 : : (_) == 'O' ? 14 \
171 : : (_) == 'P' ? 15 \
172 : : (_) == 'Q' ? 16 \
173 : : (_) == 'R' ? 17 \
174 : : (_) == 'S' ? 18 \
175 : : (_) == 'T' ? 19 \
176 : : (_) == 'U' ? 20 \
177 : : (_) == 'V' ? 21 \
178 : : (_) == 'W' ? 22 \
179 : : (_) == 'X' ? 23 \
180 : : (_) == 'Y' ? 24 \
181 : : (_) == 'Z' ? 25 \
182 : : (_) == 'a' ? 26 \
183 : : (_) == 'b' ? 27 \
184 : : (_) == 'c' ? 28 \
185 : : (_) == 'd' ? 29 \
186 : : (_) == 'e' ? 30 \
187 : : (_) == 'f' ? 31 \
188 : : (_) == 'g' ? 32 \
189 : : (_) == 'h' ? 33 \
190 : : (_) == 'i' ? 34 \
191 : : (_) == 'j' ? 35 \
192 : : (_) == 'k' ? 36 \
193 : : (_) == 'l' ? 37 \
194 : : (_) == 'm' ? 38 \
195 : : (_) == 'n' ? 39 \
196 : : (_) == 'o' ? 40 \
197 : : (_) == 'p' ? 41 \
198 : : (_) == 'q' ? 42 \
199 : : (_) == 'r' ? 43 \
200 : : (_) == 's' ? 44 \
201 : : (_) == 't' ? 45 \
202 : : (_) == 'u' ? 46 \
203 : : (_) == 'v' ? 47 \
204 : : (_) == 'w' ? 48 \
205 : : (_) == 'x' ? 49 \
206 : : (_) == 'y' ? 50 \
207 : : (_) == 'z' ? 51 \
208 : : (_) == '0' ? 52 \
209 : : (_) == '1' ? 53 \
210 : : (_) == '2' ? 54 \
211 : : (_) == '3' ? 55 \
212 : : (_) == '4' ? 56 \
213 : : (_) == '5' ? 57 \
214 : : (_) == '6' ? 58 \
215 : : (_) == '7' ? 59 \
216 : : (_) == '8' ? 60 \
217 : : (_) == '9' ? 61 \
218 : : (_) == '+' ? 62 \
219 : : (_) == '/' ? 63 \
220 : : -1)
221 :
222 : static const signed char b64[0x100] = {
223 : B64 (0), B64 (1), B64 (2), B64 (3),
224 : B64 (4), B64 (5), B64 (6), B64 (7),
225 : B64 (8), B64 (9), B64 (10), B64 (11),
226 : B64 (12), B64 (13), B64 (14), B64 (15),
227 : B64 (16), B64 (17), B64 (18), B64 (19),
228 : B64 (20), B64 (21), B64 (22), B64 (23),
229 : B64 (24), B64 (25), B64 (26), B64 (27),
230 : B64 (28), B64 (29), B64 (30), B64 (31),
231 : B64 (32), B64 (33), B64 (34), B64 (35),
232 : B64 (36), B64 (37), B64 (38), B64 (39),
233 : B64 (40), B64 (41), B64 (42), B64 (43),
234 : B64 (44), B64 (45), B64 (46), B64 (47),
235 : B64 (48), B64 (49), B64 (50), B64 (51),
236 : B64 (52), B64 (53), B64 (54), B64 (55),
237 : B64 (56), B64 (57), B64 (58), B64 (59),
238 : B64 (60), B64 (61), B64 (62), B64 (63),
239 : B64 (64), B64 (65), B64 (66), B64 (67),
240 : B64 (68), B64 (69), B64 (70), B64 (71),
241 : B64 (72), B64 (73), B64 (74), B64 (75),
242 : B64 (76), B64 (77), B64 (78), B64 (79),
243 : B64 (80), B64 (81), B64 (82), B64 (83),
244 : B64 (84), B64 (85), B64 (86), B64 (87),
245 : B64 (88), B64 (89), B64 (90), B64 (91),
246 : B64 (92), B64 (93), B64 (94), B64 (95),
247 : B64 (96), B64 (97), B64 (98), B64 (99),
248 : B64 (100), B64 (101), B64 (102), B64 (103),
249 : B64 (104), B64 (105), B64 (106), B64 (107),
250 : B64 (108), B64 (109), B64 (110), B64 (111),
251 : B64 (112), B64 (113), B64 (114), B64 (115),
252 : B64 (116), B64 (117), B64 (118), B64 (119),
253 : B64 (120), B64 (121), B64 (122), B64 (123),
254 : B64 (124), B64 (125), B64 (126), B64 (127),
255 : B64 (128), B64 (129), B64 (130), B64 (131),
256 : B64 (132), B64 (133), B64 (134), B64 (135),
257 : B64 (136), B64 (137), B64 (138), B64 (139),
258 : B64 (140), B64 (141), B64 (142), B64 (143),
259 : B64 (144), B64 (145), B64 (146), B64 (147),
260 : B64 (148), B64 (149), B64 (150), B64 (151),
261 : B64 (152), B64 (153), B64 (154), B64 (155),
262 : B64 (156), B64 (157), B64 (158), B64 (159),
263 : B64 (160), B64 (161), B64 (162), B64 (163),
264 : B64 (164), B64 (165), B64 (166), B64 (167),
265 : B64 (168), B64 (169), B64 (170), B64 (171),
266 : B64 (172), B64 (173), B64 (174), B64 (175),
267 : B64 (176), B64 (177), B64 (178), B64 (179),
268 : B64 (180), B64 (181), B64 (182), B64 (183),
269 : B64 (184), B64 (185), B64 (186), B64 (187),
270 : B64 (188), B64 (189), B64 (190), B64 (191),
271 : B64 (192), B64 (193), B64 (194), B64 (195),
272 : B64 (196), B64 (197), B64 (198), B64 (199),
273 : B64 (200), B64 (201), B64 (202), B64 (203),
274 : B64 (204), B64 (205), B64 (206), B64 (207),
275 : B64 (208), B64 (209), B64 (210), B64 (211),
276 : B64 (212), B64 (213), B64 (214), B64 (215),
277 : B64 (216), B64 (217), B64 (218), B64 (219),
278 : B64 (220), B64 (221), B64 (222), B64 (223),
279 : B64 (224), B64 (225), B64 (226), B64 (227),
280 : B64 (228), B64 (229), B64 (230), B64 (231),
281 : B64 (232), B64 (233), B64 (234), B64 (235),
282 : B64 (236), B64 (237), B64 (238), B64 (239),
283 : B64 (240), B64 (241), B64 (242), B64 (243),
284 : B64 (244), B64 (245), B64 (246), B64 (247),
285 : B64 (248), B64 (249), B64 (250), B64 (251),
286 : B64 (252), B64 (253), B64 (254), B64 (255)
287 : };
288 :
289 : #if UCHAR_MAX == 255
290 : # define uchar_in_range(c) true
291 : #else
292 : # define uchar_in_range(c) ((c) <= 255)
293 : #endif
294 :
295 : /* Return true if CH is a character from the Base64 alphabet, and
296 : false otherwise. Note that '=' is padding and not considered to be
297 : part of the alphabet. */
298 : bool
299 181 : isbase64 (char ch)
300 : {
301 181 : return uchar_in_range (to_uchar (ch)) && 0 <= b64[to_uchar (ch)];
302 : }
303 :
304 : /* Initialize decode-context buffer, CTX. */
305 : void
306 25 : base64_decode_ctx_init (struct base64_decode_context *ctx)
307 : {
308 25 : ctx->i = 0;
309 25 : }
310 :
311 : /* If CTX->i is 0 or 4, there are four or more bytes in [*IN..IN_END), and
312 : none of those four is a newline, then return *IN. Otherwise, copy up to
313 : 4 - CTX->i non-newline bytes from that range into CTX->buf, starting at
314 : index CTX->i and setting CTX->i to reflect the number of bytes copied,
315 : and return CTX->buf. In either case, advance *IN to point to the byte
316 : after the last one processed, and set *N_NON_NEWLINE to the number of
317 : verified non-newline bytes accessible through the returned pointer. */
318 : static inline char *
319 36 : get_4 (struct base64_decode_context *ctx,
320 : char const *restrict *in, char const *restrict in_end,
321 : size_t *n_non_newline)
322 : {
323 36 : if (ctx->i == 4)
324 1 : ctx->i = 0;
325 :
326 36 : if (ctx->i == 0)
327 : {
328 25 : char const *t = *in;
329 25 : if (4 <= in_end - *in && memchr (t, '\n', 4) == NULL)
330 : {
331 : /* This is the common case: no newline. */
332 12 : *in += 4;
333 12 : *n_non_newline = 4;
334 12 : return (char *) t;
335 : }
336 : }
337 :
338 : {
339 : /* Copy non-newline bytes into BUF. */
340 24 : char const *p = *in;
341 119 : while (p < in_end)
342 : {
343 72 : char c = *p++;
344 72 : if (c != '\n')
345 : {
346 33 : ctx->buf[ctx->i++] = c;
347 33 : if (ctx->i == 4)
348 1 : break;
349 : }
350 : }
351 :
352 24 : *in = p;
353 24 : *n_non_newline = ctx->i;
354 24 : return ctx->buf;
355 : }
356 : }
357 :
358 : #define return_false \
359 : do \
360 : { \
361 : *outp = out; \
362 : return false; \
363 : } \
364 : while (false)
365 :
366 : /* Decode up to four bytes of base64-encoded data, IN, of length INLEN
367 : into the output buffer, *OUT, of size *OUTLEN bytes. Return true if
368 : decoding is successful, false otherwise. If *OUTLEN is too small,
369 : as many bytes as possible are written to *OUT. On return, advance
370 : *OUT to point to the byte after the last one written, and decrement
371 : *OUTLEN to reflect the number of bytes remaining in *OUT. */
372 : static inline bool
373 68 : decode_4 (char const *restrict in, size_t inlen,
374 : char *restrict *outp, size_t *outleft)
375 : {
376 68 : char *out = *outp;
377 68 : if (inlen < 2)
378 9 : return false;
379 :
380 59 : if (!isbase64 (in[0]) || !isbase64 (in[1]))
381 6 : return false;
382 :
383 53 : if (*outleft)
384 : {
385 159 : *out++ = ((b64[to_uchar (in[0])] << 2)
386 106 : | (b64[to_uchar (in[1])] >> 4));
387 53 : --*outleft;
388 : }
389 :
390 53 : if (inlen == 2)
391 2 : return_false;
392 :
393 51 : if (in[2] == '=')
394 : {
395 30 : if (inlen != 4)
396 17 : return_false;
397 :
398 13 : if (in[3] != '=')
399 2 : return_false;
400 : }
401 : else
402 : {
403 21 : if (!isbase64 (in[2]))
404 2 : return_false;
405 :
406 19 : if (*outleft)
407 : {
408 57 : *out++ = (((b64[to_uchar (in[1])] << 4) & 0xf0)
409 38 : | (b64[to_uchar (in[2])] >> 2));
410 19 : --*outleft;
411 : }
412 :
413 19 : if (inlen == 3)
414 3 : return_false;
415 :
416 16 : if (in[3] == '=')
417 : {
418 9 : if (inlen != 4)
419 4 : return_false;
420 : }
421 : else
422 : {
423 7 : if (!isbase64 (in[3]))
424 1 : return_false;
425 :
426 6 : if (*outleft)
427 : {
428 18 : *out++ = (((b64[to_uchar (in[2])] << 6) & 0xc0)
429 12 : | b64[to_uchar (in[3])]);
430 6 : --*outleft;
431 : }
432 : }
433 : }
434 :
435 22 : *outp = out;
436 22 : return true;
437 : }
438 :
439 : /* Decode base64-encoded input array IN of length INLEN to output array
440 : OUT that can hold *OUTLEN bytes. The input data may be interspersed
441 : with newlines. Return true if decoding was successful, i.e. if the
442 : input was valid base64 data, false otherwise. If *OUTLEN is too
443 : small, as many bytes as possible will be written to OUT. On return,
444 : *OUTLEN holds the length of decoded bytes in OUT. Note that as soon
445 : as any non-alphabet, non-newline character is encountered, decoding
446 : is stopped and false is returned. If INLEN is zero, then process
447 : only whatever data is stored in CTX.
448 :
449 : Initially, CTX must have been initialized via base64_decode_ctx_init.
450 : Subsequent calls to this function must reuse whatever state is recorded
451 : in that buffer. It is necessary for when a quadruple of base64 input
452 : bytes spans two input buffers. */
453 :
454 : bool
455 34 : base64_decode (struct base64_decode_context *ctx,
456 : const char *restrict in, size_t inlen,
457 : char *restrict out, size_t *outlen)
458 : {
459 34 : size_t outleft = *outlen;
460 34 : bool flush_ctx = inlen == 0;
461 :
462 : while (true)
463 13 : {
464 47 : size_t outleft_save = outleft;
465 47 : if (ctx->i == 0 && !flush_ctx)
466 : {
467 : while (true)
468 : {
469 : /* Save a copy of outleft, in case we need to re-parse this
470 : block of four bytes. */
471 55 : outleft_save = outleft;
472 44 : if (!decode_4 (in, inlen, &out, &outleft))
473 33 : break;
474 :
475 11 : in += 4;
476 11 : inlen -= 4;
477 : }
478 : }
479 :
480 47 : if (inlen == 0 && !flush_ctx)
481 9 : break;
482 :
483 : /* Handle the common case of 72-byte wrapped lines.
484 : This also handles any other multiple-of-4-byte wrapping. */
485 38 : if (inlen && *in == '\n')
486 : {
487 2 : ++in;
488 2 : --inlen;
489 2 : continue;
490 : }
491 :
492 : /* Restore OUT and OUTLEFT. */
493 36 : out -= outleft_save - outleft;
494 36 : outleft = outleft_save;
495 :
496 : {
497 36 : char const *in_end = in + inlen;
498 36 : char const *non_nl = get_4 (ctx, &in, in_end, &inlen);
499 :
500 : /* If the input is empty or consists solely of newlines (0 non-newlines),
501 : then we're done. Likewise if there are fewer than 4 bytes when not
502 : flushing context. */
503 36 : if (inlen == 0 || (inlen < 4 && !flush_ctx))
504 : {
505 12 : inlen = 0;
506 12 : break;
507 : }
508 24 : if (!decode_4 (non_nl, inlen, &out, &outleft))
509 13 : break;
510 :
511 11 : inlen = in_end - in;
512 : }
513 : }
514 :
515 34 : *outlen -= outleft;
516 :
517 34 : return inlen == 0;
518 : }
519 :
520 : /* Allocate an output buffer in *OUT, and decode the base64 encoded
521 : data stored in IN of size INLEN to the *OUT buffer. On return, the
522 : size of the decoded data is stored in *OUTLEN. OUTLEN may be NULL,
523 : if the caller is not interested in the decoded length. *OUT may be
524 : NULL to indicate an out of memory error, in which case *OUTLEN
525 : contains the size of the memory block needed. The function returns
526 : true on successful decoding and memory allocation errors. (Use the
527 : *OUT and *OUTLEN parameters to differentiate between successful
528 : decoding and memory error.) The function returns false if the
529 : input was invalid, in which case *OUT is NULL and *OUTLEN is
530 : undefined. */
531 : bool
532 0 : base64_decode_alloc (struct base64_decode_context *ctx,
533 : const char *in, size_t inlen, char **out,
534 : size_t *outlen)
535 : {
536 : /* This may allocate a few bytes too many, depending on input,
537 : but it's not worth the extra CPU time to compute the exact size.
538 : The exact size is 3 * inlen / 4, minus 1 if the input ends
539 : with "=" and minus another 1 if the input ends with "==".
540 : Dividing before multiplying avoids the possibility of overflow. */
541 0 : size_t needlen = 3 * (inlen / 4) + 2;
542 :
543 0 : *out = malloc (needlen);
544 0 : if (!*out)
545 0 : return true;
546 :
547 0 : if (!base64_decode (ctx, in, inlen, *out, &needlen))
548 : {
549 0 : free (*out);
550 0 : *out = NULL;
551 0 : return false;
552 : }
553 :
554 0 : if (outlen)
555 0 : *outlen = needlen;
556 :
557 0 : return true;
558 : }
|