1 /* printf - format and print data
2    Copyright (C) 1990-2023 Free Software Foundation, Inc.
3 
4    This program is free software: you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation, either version 3 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
16 
17 /* Usage: printf format [argument...]
18 
19    A front end to the printf function that lets it be used from the shell.
20 
21    Backslash escapes:
22 
23    \" = double quote
24    \\ = backslash
25    \a = alert (bell)
26    \b = backspace
27    \c = produce no further output
28    \e = escape
29    \f = form feed
30    \n = new line
31    \r = carriage return
32    \t = horizontal tab
33    \v = vertical tab
34    \ooo = octal number (ooo is 1 to 3 digits)
35    \xhh = hexadecimal number (hhh is 1 to 2 digits)
36    \uhhhh = 16-bit Unicode character (hhhh is 4 digits)
37    \Uhhhhhhhh = 32-bit Unicode character (hhhhhhhh is 8 digits)
38 
39    Additional directive:
40 
41    %b = print an argument string, interpreting backslash escapes,
42      except that octal escapes are of the form \0 or \0ooo.
43 
44    %q = print an argument string in a format that can be
45      reused as shell input.  Escaped characters used the proposed
46      POSIX $'' syntax supported by most shells.
47 
48    The 'format' argument is re-used as many times as necessary
49    to convert all of the given arguments.
50 
51    David MacKenzie <djm@gnu.ai.mit.edu> */
52 
53 #include <config.h>
54 #include <stdio.h>
55 #include <sys/types.h>
56 #include <wchar.h>
57 
58 #include "system.h"
59 #include "cl-strtod.h"
60 #include "quote.h"
61 #include "unicodeio.h"
62 #include "xprintf.h"
63 
64 /* The official name of this program (e.g., no 'g' prefix).  */
65 #define PROGRAM_NAME "printf"
66 
67 #define AUTHORS proper_name ("David MacKenzie")
68 
69 #define isodigit(c) ((c) >= '0' && (c) <= '7')
70 #define hextobin(c) ((c) >= 'a' && (c) <= 'f' ? (c) - 'a' + 10 : \
71                      (c) >= 'A' && (c) <= 'F' ? (c) - 'A' + 10 : (c) - '0')
72 #define octtobin(c) ((c) - '0')
73 
74 /* The value to return to the calling program.  */
75 static int exit_status;
76 
77 /* True if the POSIXLY_CORRECT environment variable is set.  */
78 static bool posixly_correct;
79 
80 /* This message appears in N_() here rather than just in _() below because
81    the sole use would have been in a #define.  */
82 static char const *const cfcc_msg =
83  N_("warning: %s: character(s) following character constant have been ignored");
84 
85 void
usage(int status)86 usage (int status)
87 {
88   if (status != EXIT_SUCCESS)
89     emit_try_help ();
90   else
91     {
92       printf (_("\
93 Usage: %s FORMAT [ARGUMENT]...\n\
94   or:  %s OPTION\n\
95 "),
96               program_name, program_name);
97       fputs (_("\
98 Print ARGUMENT(s) according to FORMAT, or execute according to OPTION:\n\
99 \n\
100 "), stdout);
101       fputs (HELP_OPTION_DESCRIPTION, stdout);
102       fputs (VERSION_OPTION_DESCRIPTION, stdout);
103       fputs (_("\
104 \n\
105 FORMAT controls the output as in C printf.  Interpreted sequences are:\n\
106 \n\
107   \\\"      double quote\n\
108 "), stdout);
109       fputs (_("\
110   \\\\      backslash\n\
111   \\a      alert (BEL)\n\
112   \\b      backspace\n\
113   \\c      produce no further output\n\
114   \\e      escape\n\
115   \\f      form feed\n\
116   \\n      new line\n\
117   \\r      carriage return\n\
118   \\t      horizontal tab\n\
119   \\v      vertical tab\n\
120 "), stdout);
121       fputs (_("\
122   \\NNN    byte with octal value NNN (1 to 3 digits)\n\
123   \\xHH    byte with hexadecimal value HH (1 to 2 digits)\n\
124   \\uHHHH  Unicode (ISO/IEC 10646) character with hex value HHHH (4 digits)\n\
125   \\UHHHHHHHH  Unicode character with hex value HHHHHHHH (8 digits)\n\
126 "), stdout);
127       fputs (_("\
128   %%      a single %\n\
129   %b      ARGUMENT as a string with '\\' escapes interpreted,\n\
130           except that octal escapes are of the form \\0 or \\0NNN\n\
131   %q      ARGUMENT is printed in a format that can be reused as shell input,\n\
132           escaping non-printable characters with the proposed POSIX $'' syntax.\
133 \n\n\
134 and all C format specifications ending with one of diouxXfeEgGcs, with\n\
135 ARGUMENTs converted to proper type first.  Variable widths are handled.\n\
136 "), stdout);
137       printf (USAGE_BUILTIN_WARNING, PROGRAM_NAME);
138       emit_ancillary_info (PROGRAM_NAME);
139     }
140   exit (status);
141 }
142 
143 static void
verify_numeric(char const * s,char const * end)144 verify_numeric (char const *s, char const *end)
145 {
146   if (errno)
147     {
148       error (0, errno, "%s", quote (s));
149       exit_status = EXIT_FAILURE;
150     }
151   else if (*end)
152     {
153       if (s == end)
154         error (0, 0, _("%s: expected a numeric value"), quote (s));
155       else
156         error (0, 0, _("%s: value not completely converted"), quote (s));
157       exit_status = EXIT_FAILURE;
158     }
159 }
160 
161 #define STRTOX(TYPE, FUNC_NAME, LIB_FUNC_EXPR)				 \
162 static TYPE								 \
163 FUNC_NAME (char const *s)						 \
164 {									 \
165   char *end;								 \
166   TYPE val;								 \
167                                                                          \
168   if ((*s == '\"' || *s == '\'') && *(s + 1))				 \
169     {									 \
170       unsigned char ch = *++s;						 \
171       val = ch;								 \
172                                                                          \
173       if (MB_CUR_MAX > 1 && *(s + 1))					 \
174         {								 \
175           mbstate_t mbstate; mbszero (&mbstate);			 \
176           wchar_t wc;							 \
177           size_t slen = strlen (s);					 \
178           ssize_t bytes;						 \
179           /* Use mbrtowc not mbrtoc32, as per POSIX.  */		 \
180           bytes = mbrtowc (&wc, s, slen, &mbstate);			 \
181           if (0 < bytes)						 \
182             {								 \
183               val = wc;							 \
184               s += bytes - 1;						 \
185             }								 \
186         }								 \
187                                                                          \
188       /* If POSIXLY_CORRECT is not set, then give a warning that there	 \
189          are characters following the character constant and that GNU	 \
190          printf is ignoring those characters.  If POSIXLY_CORRECT *is*	 \
191          set, then don't give the warning.  */				 \
192       if (*++s != 0 && !posixly_correct)				 \
193         error (0, 0, _(cfcc_msg), s);					 \
194     }									 \
195   else									 \
196     {									 \
197       errno = 0;							 \
198       val = (LIB_FUNC_EXPR);						 \
199       verify_numeric (s, end);						 \
200     }									 \
201   return val;								 \
202 }									 \
203 
204 STRTOX (intmax_t,    vstrtoimax, strtoimax (s, &end, 0))
205 STRTOX (uintmax_t,   vstrtoumax, strtoumax (s, &end, 0))
STRTOX(long double,vstrtold,cl_strtold (s,& end))206 STRTOX (long double, vstrtold,   cl_strtold (s, &end))
207 
208 /* Output a single-character \ escape.  */
209 
210 static void
211 print_esc_char (char c)
212 {
213   switch (c)
214     {
215     case 'a':			/* Alert. */
216       putchar ('\a');
217       break;
218     case 'b':			/* Backspace. */
219       putchar ('\b');
220       break;
221     case 'c':			/* Cancel the rest of the output. */
222       exit (EXIT_SUCCESS);
223       break;
224     case 'e':			/* Escape. */
225       putchar ('\x1B');
226       break;
227     case 'f':			/* Form feed. */
228       putchar ('\f');
229       break;
230     case 'n':			/* New line. */
231       putchar ('\n');
232       break;
233     case 'r':			/* Carriage return. */
234       putchar ('\r');
235       break;
236     case 't':			/* Horizontal tab. */
237       putchar ('\t');
238       break;
239     case 'v':			/* Vertical tab. */
240       putchar ('\v');
241       break;
242     default:
243       putchar (c);
244       break;
245     }
246 }
247 
248 /* Print a \ escape sequence starting at ESCSTART.
249    Return the number of characters in the escape sequence
250    besides the backslash.
251    If OCTAL_0 is nonzero, octal escapes are of the form \0ooo, where o
252    is an octal digit; otherwise they are of the form \ooo.  */
253 
254 static int
print_esc(char const * escstart,bool octal_0)255 print_esc (char const *escstart, bool octal_0)
256 {
257   char const *p = escstart + 1;
258   int esc_value = 0;		/* Value of \nnn escape. */
259   int esc_length;		/* Length of \nnn escape. */
260 
261   if (*p == 'x')
262     {
263       /* A hexadecimal \xhh escape sequence must have 1 or 2 hex. digits.  */
264       for (esc_length = 0, ++p;
265            esc_length < 2 && isxdigit (to_uchar (*p));
266            ++esc_length, ++p)
267         esc_value = esc_value * 16 + hextobin (*p);
268       if (esc_length == 0)
269         error (EXIT_FAILURE, 0, _("missing hexadecimal number in escape"));
270       putchar (esc_value);
271     }
272   else if (isodigit (*p))
273     {
274       /* Parse \0ooo (if octal_0 && *p == '0') or \ooo (otherwise).
275          Allow \ooo if octal_0 && *p != '0'; this is an undocumented
276          extension to POSIX that is compatible with Bash 2.05b.  */
277       for (esc_length = 0, p += octal_0 && *p == '0';
278            esc_length < 3 && isodigit (*p);
279            ++esc_length, ++p)
280         esc_value = esc_value * 8 + octtobin (*p);
281       putchar (esc_value);
282     }
283   else if (*p && strchr ("\"\\abcefnrtv", *p))
284     print_esc_char (*p++);
285   else if (*p == 'u' || *p == 'U')
286     {
287       char esc_char = *p;
288       unsigned int uni_value;
289 
290       uni_value = 0;
291       for (esc_length = (esc_char == 'u' ? 4 : 8), ++p;
292            esc_length > 0;
293            --esc_length, ++p)
294         {
295           if (! isxdigit (to_uchar (*p)))
296             error (EXIT_FAILURE, 0, _("missing hexadecimal number in escape"));
297           uni_value = uni_value * 16 + hextobin (*p);
298         }
299 
300       /* Error for invalid code points 0000D800 through 0000DFFF inclusive.
301          Note print_unicode_char() would print the literal \u.. in this case. */
302       if (uni_value >= 0xd800 && uni_value <= 0xdfff)
303         error (EXIT_FAILURE, 0, _("invalid universal character name \\%c%0*x"),
304                esc_char, (esc_char == 'u' ? 4 : 8), uni_value);
305 
306       print_unicode_char (stdout, uni_value, 0);
307     }
308   else
309     {
310       putchar ('\\');
311       if (*p)
312         {
313           putchar (*p);
314           p++;
315         }
316     }
317   return p - escstart - 1;
318 }
319 
320 /* Print string STR, evaluating \ escapes. */
321 
322 static void
print_esc_string(char const * str)323 print_esc_string (char const *str)
324 {
325   for (; *str; str++)
326     if (*str == '\\')
327       str += print_esc (str, true);
328     else
329       putchar (*str);
330 }
331 
332 /* Evaluate a printf conversion specification.  START is the start of
333    the directive, LENGTH is its length, and CONVERSION specifies the
334    type of conversion.  LENGTH does not include any length modifier or
335    the conversion specifier itself.  FIELD_WIDTH and PRECISION are the
336    field width and precision for '*' values, if HAVE_FIELD_WIDTH and
337    HAVE_PRECISION are true, respectively.  ARGUMENT is the argument to
338    be formatted.  */
339 
340 static void
print_direc(char const * start,size_t length,char conversion,bool have_field_width,int field_width,bool have_precision,int precision,char const * argument)341 print_direc (char const *start, size_t length, char conversion,
342              bool have_field_width, int field_width,
343              bool have_precision, int precision,
344              char const *argument)
345 {
346   char *p;		/* Null-terminated copy of % directive. */
347 
348   /* Create a null-terminated copy of the % directive, with an
349      intmax_t-wide length modifier substituted for any existing
350      integer length modifier.  */
351   {
352     char *q;
353     char const *length_modifier;
354     size_t length_modifier_len;
355 
356     switch (conversion)
357       {
358       case 'd': case 'i': case 'o': case 'u': case 'x': case 'X':
359         length_modifier = "j";
360         length_modifier_len = 1;
361         break;
362 
363       case 'a': case 'e': case 'f': case 'g':
364       case 'A': case 'E': case 'F': case 'G':
365         length_modifier = "L";
366         length_modifier_len = 1;
367         break;
368 
369       default:
370         length_modifier = start;  /* Any valid pointer will do.  */
371         length_modifier_len = 0;
372         break;
373       }
374 
375     p = xmalloc (length + length_modifier_len + 2);
376     q = mempcpy (p, start, length);
377     q = mempcpy (q, length_modifier, length_modifier_len);
378     *q++ = conversion;
379     *q = '\0';
380   }
381 
382   switch (conversion)
383     {
384     case 'd':
385     case 'i':
386       {
387         intmax_t arg = vstrtoimax (argument);
388         if (!have_field_width)
389           {
390             if (!have_precision)
391               xprintf (p, arg);
392             else
393               xprintf (p, precision, arg);
394           }
395         else
396           {
397             if (!have_precision)
398               xprintf (p, field_width, arg);
399             else
400               xprintf (p, field_width, precision, arg);
401           }
402       }
403       break;
404 
405     case 'o':
406     case 'u':
407     case 'x':
408     case 'X':
409       {
410         uintmax_t arg = vstrtoumax (argument);
411         if (!have_field_width)
412           {
413             if (!have_precision)
414               xprintf (p, arg);
415             else
416               xprintf (p, precision, arg);
417           }
418         else
419           {
420             if (!have_precision)
421               xprintf (p, field_width, arg);
422             else
423               xprintf (p, field_width, precision, arg);
424           }
425       }
426       break;
427 
428     case 'a':
429     case 'A':
430     case 'e':
431     case 'E':
432     case 'f':
433     case 'F':
434     case 'g':
435     case 'G':
436       {
437         long double arg = vstrtold (argument);
438         if (!have_field_width)
439           {
440             if (!have_precision)
441               xprintf (p, arg);
442             else
443               xprintf (p, precision, arg);
444           }
445         else
446           {
447             if (!have_precision)
448               xprintf (p, field_width, arg);
449             else
450               xprintf (p, field_width, precision, arg);
451           }
452       }
453       break;
454 
455     case 'c':
456       if (!have_field_width)
457         xprintf (p, *argument);
458       else
459         xprintf (p, field_width, *argument);
460       break;
461 
462     case 's':
463       if (!have_field_width)
464         {
465           if (!have_precision)
466             xprintf (p, argument);
467           else
468             xprintf (p, precision, argument);
469         }
470       else
471         {
472           if (!have_precision)
473             xprintf (p, field_width, argument);
474           else
475             xprintf (p, field_width, precision, argument);
476         }
477       break;
478     }
479 
480   free (p);
481 }
482 
483 /* Print the text in FORMAT, using ARGV (with ARGC elements) for
484    arguments to any '%' directives.
485    Return the number of elements of ARGV used.  */
486 
487 static int
print_formatted(char const * format,int argc,char ** argv)488 print_formatted (char const *format, int argc, char **argv)
489 {
490   int save_argc = argc;		/* Preserve original value.  */
491   char const *f;		/* Pointer into 'format'.  */
492   char const *direc_start;	/* Start of % directive.  */
493   size_t direc_length;		/* Length of % directive.  */
494   bool have_field_width;	/* True if FIELD_WIDTH is valid.  */
495   int field_width = 0;		/* Arg to first '*'.  */
496   bool have_precision;		/* True if PRECISION is valid.  */
497   int precision = 0;		/* Arg to second '*'.  */
498   char ok[UCHAR_MAX + 1];	/* ok['x'] is true if %x is allowed.  */
499 
500   for (f = format; *f; ++f)
501     {
502       switch (*f)
503         {
504         case '%':
505           direc_start = f++;
506           direc_length = 1;
507           have_field_width = have_precision = false;
508           if (*f == '%')
509             {
510               putchar ('%');
511               break;
512             }
513           if (*f == 'b')
514             {
515               /* FIXME: Field width and precision are not supported
516                  for %b, even though POSIX requires it.  */
517               if (argc > 0)
518                 {
519                   print_esc_string (*argv);
520                   ++argv;
521                   --argc;
522                 }
523               break;
524             }
525 
526           if (*f == 'q')
527             {
528               if (argc > 0)
529                 {
530                   fputs (quotearg_style (shell_escape_quoting_style, *argv),
531                          stdout);
532                   ++argv;
533                   --argc;
534                 }
535               break;
536             }
537 
538           memset (ok, 0, sizeof ok);
539           ok['a'] = ok['A'] = ok['c'] = ok['d'] = ok['e'] = ok['E'] =
540             ok['f'] = ok['F'] = ok['g'] = ok['G'] = ok['i'] = ok['o'] =
541             ok['s'] = ok['u'] = ok['x'] = ok['X'] = 1;
542 
543           for (;; f++, direc_length++)
544             switch (*f)
545               {
546 #if (__GLIBC__ == 2 && 2 <= __GLIBC_MINOR__) || 3 <= __GLIBC__
547               case 'I':
548 #endif
549               case '\'':
550                 ok['a'] = ok['A'] = ok['c'] = ok['e'] = ok['E'] =
551                   ok['o'] = ok['s'] = ok['x'] = ok['X'] = 0;
552                 break;
553               case '-': case '+': case ' ':
554                 break;
555               case '#':
556                 ok['c'] = ok['d'] = ok['i'] = ok['s'] = ok['u'] = 0;
557                 break;
558               case '0':
559                 ok['c'] = ok['s'] = 0;
560                 break;
561               default:
562                 goto no_more_flag_characters;
563               }
564         no_more_flag_characters:
565 
566           if (*f == '*')
567             {
568               ++f;
569               ++direc_length;
570               if (argc > 0)
571                 {
572                   intmax_t width = vstrtoimax (*argv);
573                   if (INT_MIN <= width && width <= INT_MAX)
574                     field_width = width;
575                   else
576                     error (EXIT_FAILURE, 0, _("invalid field width: %s"),
577                            quote (*argv));
578                   ++argv;
579                   --argc;
580                 }
581               else
582                 field_width = 0;
583               have_field_width = true;
584             }
585           else
586             while (ISDIGIT (*f))
587               {
588                 ++f;
589                 ++direc_length;
590               }
591           if (*f == '.')
592             {
593               ++f;
594               ++direc_length;
595               ok['c'] = 0;
596               if (*f == '*')
597                 {
598                   ++f;
599                   ++direc_length;
600                   if (argc > 0)
601                     {
602                       intmax_t prec = vstrtoimax (*argv);
603                       if (prec < 0)
604                         {
605                           /* A negative precision is taken as if the
606                              precision were omitted, so -1 is safe
607                              here even if prec < INT_MIN.  */
608                           precision = -1;
609                         }
610                       else if (INT_MAX < prec)
611                         error (EXIT_FAILURE, 0, _("invalid precision: %s"),
612                                quote (*argv));
613                       else
614                         precision = prec;
615                       ++argv;
616                       --argc;
617                     }
618                   else
619                     precision = 0;
620                   have_precision = true;
621                 }
622               else
623                 while (ISDIGIT (*f))
624                   {
625                     ++f;
626                     ++direc_length;
627                   }
628             }
629 
630           while (*f == 'l' || *f == 'L' || *f == 'h'
631                  || *f == 'j' || *f == 't' || *f == 'z')
632             ++f;
633 
634           {
635             unsigned char conversion = *f;
636             int speclen = MIN (f + 1 - direc_start, INT_MAX);
637             if (! ok[conversion])
638               error (EXIT_FAILURE, 0,
639                      _("%.*s: invalid conversion specification"),
640                      speclen, direc_start);
641           }
642 
643           print_direc (direc_start, direc_length, *f,
644                        have_field_width, field_width,
645                        have_precision, precision,
646                        (argc <= 0 ? "" : (argc--, *argv++)));
647           break;
648 
649         case '\\':
650           f += print_esc (f, false);
651           break;
652 
653         default:
654           putchar (*f);
655         }
656     }
657 
658   return save_argc - argc;
659 }
660 
661 int
main(int argc,char ** argv)662 main (int argc, char **argv)
663 {
664   char *format;
665   int args_used;
666 
667   initialize_main (&argc, &argv);
668   set_program_name (argv[0]);
669   setlocale (LC_ALL, "");
670   bindtextdomain (PACKAGE, LOCALEDIR);
671   textdomain (PACKAGE);
672 
673   atexit (close_stdout);
674 
675   exit_status = EXIT_SUCCESS;
676 
677   posixly_correct = (getenv ("POSIXLY_CORRECT") != nullptr);
678 
679   /* We directly parse options, rather than use parse_long_options, in
680      order to avoid accepting abbreviations.  */
681   if (argc == 2)
682     {
683       if (STREQ (argv[1], "--help"))
684         usage (EXIT_SUCCESS);
685 
686       if (STREQ (argv[1], "--version"))
687         {
688           version_etc (stdout, PROGRAM_NAME, PACKAGE_NAME, Version, AUTHORS,
689                        (char *) nullptr);
690           return EXIT_SUCCESS;
691         }
692     }
693 
694   /* The above handles --help and --version.
695      Since there is no other invocation of getopt, handle '--' here.  */
696   if (1 < argc && STREQ (argv[1], "--"))
697     {
698       --argc;
699       ++argv;
700     }
701 
702   if (argc <= 1)
703     {
704       error (0, 0, _("missing operand"));
705       usage (EXIT_FAILURE);
706     }
707 
708   format = argv[1];
709   argc -= 2;
710   argv += 2;
711 
712   do
713     {
714       args_used = print_formatted (format, argc, argv);
715       argc -= args_used;
716       argv += args_used;
717     }
718   while (args_used > 0 && argc > 0);
719 
720   if (argc > 0)
721     error (0, 0,
722            _("warning: ignoring excess arguments, starting with %s"),
723            quote (argv[0]));
724 
725   return exit_status;
726 }
727