1 /* printf - format and print data
2 Copyright (C) 1990-2023 Free Software Foundation, Inc.
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
16
17 /* Usage: printf format [argument...]
18
19 A front end to the printf function that lets it be used from the shell.
20
21 Backslash escapes:
22
23 \" = double quote
24 \\ = backslash
25 \a = alert (bell)
26 \b = backspace
27 \c = produce no further output
28 \e = escape
29 \f = form feed
30 \n = new line
31 \r = carriage return
32 \t = horizontal tab
33 \v = vertical tab
34 \ooo = octal number (ooo is 1 to 3 digits)
35 \xhh = hexadecimal number (hhh is 1 to 2 digits)
36 \uhhhh = 16-bit Unicode character (hhhh is 4 digits)
37 \Uhhhhhhhh = 32-bit Unicode character (hhhhhhhh is 8 digits)
38
39 Additional directive:
40
41 %b = print an argument string, interpreting backslash escapes,
42 except that octal escapes are of the form \0 or \0ooo.
43
44 %q = print an argument string in a format that can be
45 reused as shell input. Escaped characters used the proposed
46 POSIX $'' syntax supported by most shells.
47
48 The 'format' argument is re-used as many times as necessary
49 to convert all of the given arguments.
50
51 David MacKenzie <djm@gnu.ai.mit.edu> */
52
53 #include <config.h>
54 #include <stdio.h>
55 #include <sys/types.h>
56 #include <wchar.h>
57
58 #include "system.h"
59 #include "cl-strtod.h"
60 #include "quote.h"
61 #include "unicodeio.h"
62 #include "xprintf.h"
63
64 /* The official name of this program (e.g., no 'g' prefix). */
65 #define PROGRAM_NAME "printf"
66
67 #define AUTHORS proper_name ("David MacKenzie")
68
69 #define isodigit(c) ((c) >= '0' && (c) <= '7')
70 #define hextobin(c) ((c) >= 'a' && (c) <= 'f' ? (c) - 'a' + 10 : \
71 (c) >= 'A' && (c) <= 'F' ? (c) - 'A' + 10 : (c) - '0')
72 #define octtobin(c) ((c) - '0')
73
74 /* The value to return to the calling program. */
75 static int exit_status;
76
77 /* True if the POSIXLY_CORRECT environment variable is set. */
78 static bool posixly_correct;
79
80 /* This message appears in N_() here rather than just in _() below because
81 the sole use would have been in a #define. */
82 static char const *const cfcc_msg =
83 N_("warning: %s: character(s) following character constant have been ignored");
84
85 void
usage(int status)86 usage (int status)
87 {
88 if (status != EXIT_SUCCESS)
89 emit_try_help ();
90 else
91 {
92 printf (_("\
93 Usage: %s FORMAT [ARGUMENT]...\n\
94 or: %s OPTION\n\
95 "),
96 program_name, program_name);
97 fputs (_("\
98 Print ARGUMENT(s) according to FORMAT, or execute according to OPTION:\n\
99 \n\
100 "), stdout);
101 fputs (HELP_OPTION_DESCRIPTION, stdout);
102 fputs (VERSION_OPTION_DESCRIPTION, stdout);
103 fputs (_("\
104 \n\
105 FORMAT controls the output as in C printf. Interpreted sequences are:\n\
106 \n\
107 \\\" double quote\n\
108 "), stdout);
109 fputs (_("\
110 \\\\ backslash\n\
111 \\a alert (BEL)\n\
112 \\b backspace\n\
113 \\c produce no further output\n\
114 \\e escape\n\
115 \\f form feed\n\
116 \\n new line\n\
117 \\r carriage return\n\
118 \\t horizontal tab\n\
119 \\v vertical tab\n\
120 "), stdout);
121 fputs (_("\
122 \\NNN byte with octal value NNN (1 to 3 digits)\n\
123 \\xHH byte with hexadecimal value HH (1 to 2 digits)\n\
124 \\uHHHH Unicode (ISO/IEC 10646) character with hex value HHHH (4 digits)\n\
125 \\UHHHHHHHH Unicode character with hex value HHHHHHHH (8 digits)\n\
126 "), stdout);
127 fputs (_("\
128 %% a single %\n\
129 %b ARGUMENT as a string with '\\' escapes interpreted,\n\
130 except that octal escapes are of the form \\0 or \\0NNN\n\
131 %q ARGUMENT is printed in a format that can be reused as shell input,\n\
132 escaping non-printable characters with the proposed POSIX $'' syntax.\
133 \n\n\
134 and all C format specifications ending with one of diouxXfeEgGcs, with\n\
135 ARGUMENTs converted to proper type first. Variable widths are handled.\n\
136 "), stdout);
137 printf (USAGE_BUILTIN_WARNING, PROGRAM_NAME);
138 emit_ancillary_info (PROGRAM_NAME);
139 }
140 exit (status);
141 }
142
143 static void
verify_numeric(char const * s,char const * end)144 verify_numeric (char const *s, char const *end)
145 {
146 if (errno)
147 {
148 error (0, errno, "%s", quote (s));
149 exit_status = EXIT_FAILURE;
150 }
151 else if (*end)
152 {
153 if (s == end)
154 error (0, 0, _("%s: expected a numeric value"), quote (s));
155 else
156 error (0, 0, _("%s: value not completely converted"), quote (s));
157 exit_status = EXIT_FAILURE;
158 }
159 }
160
161 #define STRTOX(TYPE, FUNC_NAME, LIB_FUNC_EXPR) \
162 static TYPE \
163 FUNC_NAME (char const *s) \
164 { \
165 char *end; \
166 TYPE val; \
167 \
168 if ((*s == '\"' || *s == '\'') && *(s + 1)) \
169 { \
170 unsigned char ch = *++s; \
171 val = ch; \
172 \
173 if (MB_CUR_MAX > 1 && *(s + 1)) \
174 { \
175 mbstate_t mbstate; mbszero (&mbstate); \
176 wchar_t wc; \
177 size_t slen = strlen (s); \
178 ssize_t bytes; \
179 /* Use mbrtowc not mbrtoc32, as per POSIX. */ \
180 bytes = mbrtowc (&wc, s, slen, &mbstate); \
181 if (0 < bytes) \
182 { \
183 val = wc; \
184 s += bytes - 1; \
185 } \
186 } \
187 \
188 /* If POSIXLY_CORRECT is not set, then give a warning that there \
189 are characters following the character constant and that GNU \
190 printf is ignoring those characters. If POSIXLY_CORRECT *is* \
191 set, then don't give the warning. */ \
192 if (*++s != 0 && !posixly_correct) \
193 error (0, 0, _(cfcc_msg), s); \
194 } \
195 else \
196 { \
197 errno = 0; \
198 val = (LIB_FUNC_EXPR); \
199 verify_numeric (s, end); \
200 } \
201 return val; \
202 } \
203
204 STRTOX (intmax_t, vstrtoimax, strtoimax (s, &end, 0))
205 STRTOX (uintmax_t, vstrtoumax, strtoumax (s, &end, 0))
STRTOX(long double,vstrtold,cl_strtold (s,& end))206 STRTOX (long double, vstrtold, cl_strtold (s, &end))
207
208 /* Output a single-character \ escape. */
209
210 static void
211 print_esc_char (char c)
212 {
213 switch (c)
214 {
215 case 'a': /* Alert. */
216 putchar ('\a');
217 break;
218 case 'b': /* Backspace. */
219 putchar ('\b');
220 break;
221 case 'c': /* Cancel the rest of the output. */
222 exit (EXIT_SUCCESS);
223 break;
224 case 'e': /* Escape. */
225 putchar ('\x1B');
226 break;
227 case 'f': /* Form feed. */
228 putchar ('\f');
229 break;
230 case 'n': /* New line. */
231 putchar ('\n');
232 break;
233 case 'r': /* Carriage return. */
234 putchar ('\r');
235 break;
236 case 't': /* Horizontal tab. */
237 putchar ('\t');
238 break;
239 case 'v': /* Vertical tab. */
240 putchar ('\v');
241 break;
242 default:
243 putchar (c);
244 break;
245 }
246 }
247
248 /* Print a \ escape sequence starting at ESCSTART.
249 Return the number of characters in the escape sequence
250 besides the backslash.
251 If OCTAL_0 is nonzero, octal escapes are of the form \0ooo, where o
252 is an octal digit; otherwise they are of the form \ooo. */
253
254 static int
print_esc(char const * escstart,bool octal_0)255 print_esc (char const *escstart, bool octal_0)
256 {
257 char const *p = escstart + 1;
258 int esc_value = 0; /* Value of \nnn escape. */
259 int esc_length; /* Length of \nnn escape. */
260
261 if (*p == 'x')
262 {
263 /* A hexadecimal \xhh escape sequence must have 1 or 2 hex. digits. */
264 for (esc_length = 0, ++p;
265 esc_length < 2 && isxdigit (to_uchar (*p));
266 ++esc_length, ++p)
267 esc_value = esc_value * 16 + hextobin (*p);
268 if (esc_length == 0)
269 error (EXIT_FAILURE, 0, _("missing hexadecimal number in escape"));
270 putchar (esc_value);
271 }
272 else if (isodigit (*p))
273 {
274 /* Parse \0ooo (if octal_0 && *p == '0') or \ooo (otherwise).
275 Allow \ooo if octal_0 && *p != '0'; this is an undocumented
276 extension to POSIX that is compatible with Bash 2.05b. */
277 for (esc_length = 0, p += octal_0 && *p == '0';
278 esc_length < 3 && isodigit (*p);
279 ++esc_length, ++p)
280 esc_value = esc_value * 8 + octtobin (*p);
281 putchar (esc_value);
282 }
283 else if (*p && strchr ("\"\\abcefnrtv", *p))
284 print_esc_char (*p++);
285 else if (*p == 'u' || *p == 'U')
286 {
287 char esc_char = *p;
288 unsigned int uni_value;
289
290 uni_value = 0;
291 for (esc_length = (esc_char == 'u' ? 4 : 8), ++p;
292 esc_length > 0;
293 --esc_length, ++p)
294 {
295 if (! isxdigit (to_uchar (*p)))
296 error (EXIT_FAILURE, 0, _("missing hexadecimal number in escape"));
297 uni_value = uni_value * 16 + hextobin (*p);
298 }
299
300 /* Error for invalid code points 0000D800 through 0000DFFF inclusive.
301 Note print_unicode_char() would print the literal \u.. in this case. */
302 if (uni_value >= 0xd800 && uni_value <= 0xdfff)
303 error (EXIT_FAILURE, 0, _("invalid universal character name \\%c%0*x"),
304 esc_char, (esc_char == 'u' ? 4 : 8), uni_value);
305
306 print_unicode_char (stdout, uni_value, 0);
307 }
308 else
309 {
310 putchar ('\\');
311 if (*p)
312 {
313 putchar (*p);
314 p++;
315 }
316 }
317 return p - escstart - 1;
318 }
319
320 /* Print string STR, evaluating \ escapes. */
321
322 static void
print_esc_string(char const * str)323 print_esc_string (char const *str)
324 {
325 for (; *str; str++)
326 if (*str == '\\')
327 str += print_esc (str, true);
328 else
329 putchar (*str);
330 }
331
332 /* Evaluate a printf conversion specification. START is the start of
333 the directive, LENGTH is its length, and CONVERSION specifies the
334 type of conversion. LENGTH does not include any length modifier or
335 the conversion specifier itself. FIELD_WIDTH and PRECISION are the
336 field width and precision for '*' values, if HAVE_FIELD_WIDTH and
337 HAVE_PRECISION are true, respectively. ARGUMENT is the argument to
338 be formatted. */
339
340 static void
print_direc(char const * start,size_t length,char conversion,bool have_field_width,int field_width,bool have_precision,int precision,char const * argument)341 print_direc (char const *start, size_t length, char conversion,
342 bool have_field_width, int field_width,
343 bool have_precision, int precision,
344 char const *argument)
345 {
346 char *p; /* Null-terminated copy of % directive. */
347
348 /* Create a null-terminated copy of the % directive, with an
349 intmax_t-wide length modifier substituted for any existing
350 integer length modifier. */
351 {
352 char *q;
353 char const *length_modifier;
354 size_t length_modifier_len;
355
356 switch (conversion)
357 {
358 case 'd': case 'i': case 'o': case 'u': case 'x': case 'X':
359 length_modifier = "j";
360 length_modifier_len = 1;
361 break;
362
363 case 'a': case 'e': case 'f': case 'g':
364 case 'A': case 'E': case 'F': case 'G':
365 length_modifier = "L";
366 length_modifier_len = 1;
367 break;
368
369 default:
370 length_modifier = start; /* Any valid pointer will do. */
371 length_modifier_len = 0;
372 break;
373 }
374
375 p = xmalloc (length + length_modifier_len + 2);
376 q = mempcpy (p, start, length);
377 q = mempcpy (q, length_modifier, length_modifier_len);
378 *q++ = conversion;
379 *q = '\0';
380 }
381
382 switch (conversion)
383 {
384 case 'd':
385 case 'i':
386 {
387 intmax_t arg = vstrtoimax (argument);
388 if (!have_field_width)
389 {
390 if (!have_precision)
391 xprintf (p, arg);
392 else
393 xprintf (p, precision, arg);
394 }
395 else
396 {
397 if (!have_precision)
398 xprintf (p, field_width, arg);
399 else
400 xprintf (p, field_width, precision, arg);
401 }
402 }
403 break;
404
405 case 'o':
406 case 'u':
407 case 'x':
408 case 'X':
409 {
410 uintmax_t arg = vstrtoumax (argument);
411 if (!have_field_width)
412 {
413 if (!have_precision)
414 xprintf (p, arg);
415 else
416 xprintf (p, precision, arg);
417 }
418 else
419 {
420 if (!have_precision)
421 xprintf (p, field_width, arg);
422 else
423 xprintf (p, field_width, precision, arg);
424 }
425 }
426 break;
427
428 case 'a':
429 case 'A':
430 case 'e':
431 case 'E':
432 case 'f':
433 case 'F':
434 case 'g':
435 case 'G':
436 {
437 long double arg = vstrtold (argument);
438 if (!have_field_width)
439 {
440 if (!have_precision)
441 xprintf (p, arg);
442 else
443 xprintf (p, precision, arg);
444 }
445 else
446 {
447 if (!have_precision)
448 xprintf (p, field_width, arg);
449 else
450 xprintf (p, field_width, precision, arg);
451 }
452 }
453 break;
454
455 case 'c':
456 if (!have_field_width)
457 xprintf (p, *argument);
458 else
459 xprintf (p, field_width, *argument);
460 break;
461
462 case 's':
463 if (!have_field_width)
464 {
465 if (!have_precision)
466 xprintf (p, argument);
467 else
468 xprintf (p, precision, argument);
469 }
470 else
471 {
472 if (!have_precision)
473 xprintf (p, field_width, argument);
474 else
475 xprintf (p, field_width, precision, argument);
476 }
477 break;
478 }
479
480 free (p);
481 }
482
483 /* Print the text in FORMAT, using ARGV (with ARGC elements) for
484 arguments to any '%' directives.
485 Return the number of elements of ARGV used. */
486
487 static int
print_formatted(char const * format,int argc,char ** argv)488 print_formatted (char const *format, int argc, char **argv)
489 {
490 int save_argc = argc; /* Preserve original value. */
491 char const *f; /* Pointer into 'format'. */
492 char const *direc_start; /* Start of % directive. */
493 size_t direc_length; /* Length of % directive. */
494 bool have_field_width; /* True if FIELD_WIDTH is valid. */
495 int field_width = 0; /* Arg to first '*'. */
496 bool have_precision; /* True if PRECISION is valid. */
497 int precision = 0; /* Arg to second '*'. */
498 char ok[UCHAR_MAX + 1]; /* ok['x'] is true if %x is allowed. */
499
500 for (f = format; *f; ++f)
501 {
502 switch (*f)
503 {
504 case '%':
505 direc_start = f++;
506 direc_length = 1;
507 have_field_width = have_precision = false;
508 if (*f == '%')
509 {
510 putchar ('%');
511 break;
512 }
513 if (*f == 'b')
514 {
515 /* FIXME: Field width and precision are not supported
516 for %b, even though POSIX requires it. */
517 if (argc > 0)
518 {
519 print_esc_string (*argv);
520 ++argv;
521 --argc;
522 }
523 break;
524 }
525
526 if (*f == 'q')
527 {
528 if (argc > 0)
529 {
530 fputs (quotearg_style (shell_escape_quoting_style, *argv),
531 stdout);
532 ++argv;
533 --argc;
534 }
535 break;
536 }
537
538 memset (ok, 0, sizeof ok);
539 ok['a'] = ok['A'] = ok['c'] = ok['d'] = ok['e'] = ok['E'] =
540 ok['f'] = ok['F'] = ok['g'] = ok['G'] = ok['i'] = ok['o'] =
541 ok['s'] = ok['u'] = ok['x'] = ok['X'] = 1;
542
543 for (;; f++, direc_length++)
544 switch (*f)
545 {
546 #if (__GLIBC__ == 2 && 2 <= __GLIBC_MINOR__) || 3 <= __GLIBC__
547 case 'I':
548 #endif
549 case '\'':
550 ok['a'] = ok['A'] = ok['c'] = ok['e'] = ok['E'] =
551 ok['o'] = ok['s'] = ok['x'] = ok['X'] = 0;
552 break;
553 case '-': case '+': case ' ':
554 break;
555 case '#':
556 ok['c'] = ok['d'] = ok['i'] = ok['s'] = ok['u'] = 0;
557 break;
558 case '0':
559 ok['c'] = ok['s'] = 0;
560 break;
561 default:
562 goto no_more_flag_characters;
563 }
564 no_more_flag_characters:
565
566 if (*f == '*')
567 {
568 ++f;
569 ++direc_length;
570 if (argc > 0)
571 {
572 intmax_t width = vstrtoimax (*argv);
573 if (INT_MIN <= width && width <= INT_MAX)
574 field_width = width;
575 else
576 error (EXIT_FAILURE, 0, _("invalid field width: %s"),
577 quote (*argv));
578 ++argv;
579 --argc;
580 }
581 else
582 field_width = 0;
583 have_field_width = true;
584 }
585 else
586 while (ISDIGIT (*f))
587 {
588 ++f;
589 ++direc_length;
590 }
591 if (*f == '.')
592 {
593 ++f;
594 ++direc_length;
595 ok['c'] = 0;
596 if (*f == '*')
597 {
598 ++f;
599 ++direc_length;
600 if (argc > 0)
601 {
602 intmax_t prec = vstrtoimax (*argv);
603 if (prec < 0)
604 {
605 /* A negative precision is taken as if the
606 precision were omitted, so -1 is safe
607 here even if prec < INT_MIN. */
608 precision = -1;
609 }
610 else if (INT_MAX < prec)
611 error (EXIT_FAILURE, 0, _("invalid precision: %s"),
612 quote (*argv));
613 else
614 precision = prec;
615 ++argv;
616 --argc;
617 }
618 else
619 precision = 0;
620 have_precision = true;
621 }
622 else
623 while (ISDIGIT (*f))
624 {
625 ++f;
626 ++direc_length;
627 }
628 }
629
630 while (*f == 'l' || *f == 'L' || *f == 'h'
631 || *f == 'j' || *f == 't' || *f == 'z')
632 ++f;
633
634 {
635 unsigned char conversion = *f;
636 int speclen = MIN (f + 1 - direc_start, INT_MAX);
637 if (! ok[conversion])
638 error (EXIT_FAILURE, 0,
639 _("%.*s: invalid conversion specification"),
640 speclen, direc_start);
641 }
642
643 print_direc (direc_start, direc_length, *f,
644 have_field_width, field_width,
645 have_precision, precision,
646 (argc <= 0 ? "" : (argc--, *argv++)));
647 break;
648
649 case '\\':
650 f += print_esc (f, false);
651 break;
652
653 default:
654 putchar (*f);
655 }
656 }
657
658 return save_argc - argc;
659 }
660
661 int
main(int argc,char ** argv)662 main (int argc, char **argv)
663 {
664 char *format;
665 int args_used;
666
667 initialize_main (&argc, &argv);
668 set_program_name (argv[0]);
669 setlocale (LC_ALL, "");
670 bindtextdomain (PACKAGE, LOCALEDIR);
671 textdomain (PACKAGE);
672
673 atexit (close_stdout);
674
675 exit_status = EXIT_SUCCESS;
676
677 posixly_correct = (getenv ("POSIXLY_CORRECT") != nullptr);
678
679 /* We directly parse options, rather than use parse_long_options, in
680 order to avoid accepting abbreviations. */
681 if (argc == 2)
682 {
683 if (STREQ (argv[1], "--help"))
684 usage (EXIT_SUCCESS);
685
686 if (STREQ (argv[1], "--version"))
687 {
688 version_etc (stdout, PROGRAM_NAME, PACKAGE_NAME, Version, AUTHORS,
689 (char *) nullptr);
690 return EXIT_SUCCESS;
691 }
692 }
693
694 /* The above handles --help and --version.
695 Since there is no other invocation of getopt, handle '--' here. */
696 if (1 < argc && STREQ (argv[1], "--"))
697 {
698 --argc;
699 ++argv;
700 }
701
702 if (argc <= 1)
703 {
704 error (0, 0, _("missing operand"));
705 usage (EXIT_FAILURE);
706 }
707
708 format = argv[1];
709 argc -= 2;
710 argv += 2;
711
712 do
713 {
714 args_used = print_formatted (format, argc, argv);
715 argc -= args_used;
716 argv += args_used;
717 }
718 while (args_used > 0 && argc > 0);
719
720 if (argc > 0)
721 error (0, 0,
722 _("warning: ignoring excess arguments, starting with %s"),
723 quote (argv[0]));
724
725 return exit_status;
726 }
727